1/*
2 * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
3 * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25
26/**
27 * \file ir_to_mesa.cpp
28 *
29 * Translate GLSL IR to Mesa's gl_program representation.
30 */
31
32#include <stdio.h>
33#include "main/macros.h"
34#include "main/mtypes.h"
35#include "main/shaderapi.h"
36#include "main/shaderobj.h"
37#include "main/uniforms.h"
38#include "main/glspirv.h"
39#include "compiler/glsl/ast.h"
40#include "compiler/glsl/ir.h"
41#include "compiler/glsl/ir_expression_flattening.h"
42#include "compiler/glsl/ir_visitor.h"
43#include "compiler/glsl/ir_optimization.h"
44#include "compiler/glsl/ir_uniform.h"
45#include "compiler/glsl/glsl_parser_extras.h"
46#include "compiler/glsl_types.h"
47#include "compiler/glsl/linker.h"
48#include "compiler/glsl/program.h"
49#include "compiler/glsl/shader_cache.h"
50#include "compiler/glsl/string_to_uint_map.h"
51#include "program/prog_instruction.h"
52#include "program/prog_optimize.h"
53#include "program/prog_print.h"
54#include "program/program.h"
55#include "program/prog_parameter.h"
56
57
58static int swizzle_for_size(int size);
59
60namespace {
61
62class src_reg;
63class dst_reg;
64
65/**
66 * This struct is a corresponding struct to Mesa prog_src_register, with
67 * wider fields.
68 */
69class src_reg {
70public:
71   src_reg(gl_register_file file, int index, const glsl_type *type)
72   {
73      this->file = file;
74      this->index = index;
75      if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
76	 this->swizzle = swizzle_for_size(type->vector_elements);
77      else
78	 this->swizzle = SWIZZLE_XYZW;
79      this->negate = 0;
80      this->reladdr = NULL;
81   }
82
83   src_reg()
84   {
85      this->file = PROGRAM_UNDEFINED;
86      this->index = 0;
87      this->swizzle = 0;
88      this->negate = 0;
89      this->reladdr = NULL;
90   }
91
92   explicit src_reg(dst_reg reg);
93
94   gl_register_file file; /**< PROGRAM_* from Mesa */
95   int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
96   GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
97   int negate; /**< NEGATE_XYZW mask from mesa */
98   /** Register index should be offset by the integer in this reg. */
99   src_reg *reladdr;
100};
101
102class dst_reg {
103public:
104   dst_reg(gl_register_file file, int writemask)
105   {
106      this->file = file;
107      this->index = 0;
108      this->writemask = writemask;
109      this->reladdr = NULL;
110   }
111
112   dst_reg()
113   {
114      this->file = PROGRAM_UNDEFINED;
115      this->index = 0;
116      this->writemask = 0;
117      this->reladdr = NULL;
118   }
119
120   explicit dst_reg(src_reg reg);
121
122   gl_register_file file; /**< PROGRAM_* from Mesa */
123   int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
124   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
125   /** Register index should be offset by the integer in this reg. */
126   src_reg *reladdr;
127};
128
129} /* anonymous namespace */
130
131src_reg::src_reg(dst_reg reg)
132{
133   this->file = reg.file;
134   this->index = reg.index;
135   this->swizzle = SWIZZLE_XYZW;
136   this->negate = 0;
137   this->reladdr = reg.reladdr;
138}
139
140dst_reg::dst_reg(src_reg reg)
141{
142   this->file = reg.file;
143   this->index = reg.index;
144   this->writemask = WRITEMASK_XYZW;
145   this->reladdr = reg.reladdr;
146}
147
148namespace {
149
150class ir_to_mesa_instruction : public exec_node {
151public:
152   DECLARE_RALLOC_CXX_OPERATORS(ir_to_mesa_instruction)
153
154   enum prog_opcode op;
155   dst_reg dst;
156   src_reg src[3];
157   /** Pointer to the ir source this tree came from for debugging */
158   ir_instruction *ir;
159   bool saturate;
160   int sampler; /**< sampler index */
161   int tex_target; /**< One of TEXTURE_*_INDEX */
162   GLboolean tex_shadow;
163};
164
165class variable_storage : public exec_node {
166public:
167   variable_storage(ir_variable *var, gl_register_file file, int index)
168      : file(file), index(index), var(var)
169   {
170      /* empty */
171   }
172
173   gl_register_file file;
174   int index;
175   ir_variable *var; /* variable that maps to this, if any */
176};
177
178class function_entry : public exec_node {
179public:
180   ir_function_signature *sig;
181
182   /**
183    * identifier of this function signature used by the program.
184    *
185    * At the point that Mesa instructions for function calls are
186    * generated, we don't know the address of the first instruction of
187    * the function body.  So we make the BranchTarget that is called a
188    * small integer and rewrite them during set_branchtargets().
189    */
190   int sig_id;
191
192   /**
193    * Pointer to first instruction of the function body.
194    *
195    * Set during function body emits after main() is processed.
196    */
197   ir_to_mesa_instruction *bgn_inst;
198
199   /**
200    * Index of the first instruction of the function body in actual
201    * Mesa IR.
202    *
203    * Set after convertion from ir_to_mesa_instruction to prog_instruction.
204    */
205   int inst;
206
207   /** Storage for the return value. */
208   src_reg return_reg;
209};
210
211class ir_to_mesa_visitor : public ir_visitor {
212public:
213   ir_to_mesa_visitor();
214   ~ir_to_mesa_visitor();
215
216   function_entry *current_function;
217
218   struct gl_context *ctx;
219   struct gl_program *prog;
220   struct gl_shader_program *shader_program;
221   struct gl_shader_compiler_options *options;
222
223   int next_temp;
224
225   variable_storage *find_variable_storage(const ir_variable *var);
226
227   src_reg get_temp(const glsl_type *type);
228   void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
229
230   src_reg src_reg_for_float(float val);
231
232   /**
233    * \name Visit methods
234    *
235    * As typical for the visitor pattern, there must be one \c visit method for
236    * each concrete subclass of \c ir_instruction.  Virtual base classes within
237    * the hierarchy should not have \c visit methods.
238    */
239   /*@{*/
240   virtual void visit(ir_variable *);
241   virtual void visit(ir_loop *);
242   virtual void visit(ir_loop_jump *);
243   virtual void visit(ir_function_signature *);
244   virtual void visit(ir_function *);
245   virtual void visit(ir_expression *);
246   virtual void visit(ir_swizzle *);
247   virtual void visit(ir_dereference_variable  *);
248   virtual void visit(ir_dereference_array *);
249   virtual void visit(ir_dereference_record *);
250   virtual void visit(ir_assignment *);
251   virtual void visit(ir_constant *);
252   virtual void visit(ir_call *);
253   virtual void visit(ir_return *);
254   virtual void visit(ir_discard *);
255   virtual void visit(ir_demote *);
256   virtual void visit(ir_texture *);
257   virtual void visit(ir_if *);
258   virtual void visit(ir_emit_vertex *);
259   virtual void visit(ir_end_primitive *);
260   virtual void visit(ir_barrier *);
261   /*@}*/
262
263   src_reg result;
264
265   /** List of variable_storage */
266   exec_list variables;
267
268   /** List of function_entry */
269   exec_list function_signatures;
270   int next_signature_id;
271
272   /** List of ir_to_mesa_instruction */
273   exec_list instructions;
274
275   ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op);
276
277   ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
278			        dst_reg dst, src_reg src0);
279
280   ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
281			        dst_reg dst, src_reg src0, src_reg src1);
282
283   ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
284			        dst_reg dst,
285			        src_reg src0, src_reg src1, src_reg src2);
286
287   /**
288    * Emit the correct dot-product instruction for the type of arguments
289    */
290   ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
291				    dst_reg dst,
292				    src_reg src0,
293				    src_reg src1,
294				    unsigned elements);
295
296   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
297		    dst_reg dst, src_reg src0);
298
299   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
300		    dst_reg dst, src_reg src0, src_reg src1);
301
302   bool try_emit_mad(ir_expression *ir,
303			  int mul_operand);
304   bool try_emit_mad_for_and_not(ir_expression *ir,
305				 int mul_operand);
306
307   void emit_swz(ir_expression *ir);
308
309   void emit_equality_comparison(ir_expression *ir, enum prog_opcode op,
310                                 dst_reg dst,
311                                 const src_reg &src0, const src_reg &src1);
312
313   inline void emit_sne(ir_expression *ir, dst_reg dst,
314                        const src_reg &src0, const src_reg &src1)
315   {
316      emit_equality_comparison(ir, OPCODE_SLT, dst, src0, src1);
317   }
318
319   inline void emit_seq(ir_expression *ir, dst_reg dst,
320                        const src_reg &src0, const src_reg &src1)
321   {
322      emit_equality_comparison(ir, OPCODE_SGE, dst, src0, src1);
323   }
324
325   bool process_move_condition(ir_rvalue *ir);
326
327   void copy_propagate(void);
328
329   void *mem_ctx;
330};
331
332} /* anonymous namespace */
333
334static src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL);
335
336static dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
337
338static dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
339
340static int
341swizzle_for_size(int size)
342{
343   static const int size_swizzles[4] = {
344      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
345      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
346      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
347      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
348   };
349
350   assert((size >= 1) && (size <= 4));
351   return size_swizzles[size - 1];
352}
353
354ir_to_mesa_instruction *
355ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
356			 dst_reg dst,
357			 src_reg src0, src_reg src1, src_reg src2)
358{
359   ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
360   int num_reladdr = 0;
361
362   /* If we have to do relative addressing, we want to load the ARL
363    * reg directly for one of the regs, and preload the other reladdr
364    * sources into temps.
365    */
366   num_reladdr += dst.reladdr != NULL;
367   num_reladdr += src0.reladdr != NULL;
368   num_reladdr += src1.reladdr != NULL;
369   num_reladdr += src2.reladdr != NULL;
370
371   reladdr_to_temp(ir, &src2, &num_reladdr);
372   reladdr_to_temp(ir, &src1, &num_reladdr);
373   reladdr_to_temp(ir, &src0, &num_reladdr);
374
375   if (dst.reladdr) {
376      emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
377      num_reladdr--;
378   }
379   assert(num_reladdr == 0);
380
381   inst->op = op;
382   inst->dst = dst;
383   inst->src[0] = src0;
384   inst->src[1] = src1;
385   inst->src[2] = src2;
386   inst->ir = ir;
387
388   this->instructions.push_tail(inst);
389
390   return inst;
391}
392
393
394ir_to_mesa_instruction *
395ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
396			 dst_reg dst, src_reg src0, src_reg src1)
397{
398   return emit(ir, op, dst, src0, src1, undef_src);
399}
400
401ir_to_mesa_instruction *
402ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
403			 dst_reg dst, src_reg src0)
404{
405   assert(dst.writemask != 0);
406   return emit(ir, op, dst, src0, undef_src, undef_src);
407}
408
409ir_to_mesa_instruction *
410ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
411{
412   return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
413}
414
415ir_to_mesa_instruction *
416ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
417			    dst_reg dst, src_reg src0, src_reg src1,
418			    unsigned elements)
419{
420   static const enum prog_opcode dot_opcodes[] = {
421      OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
422   };
423
424   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
425}
426
427/**
428 * Emits Mesa scalar opcodes to produce unique answers across channels.
429 *
430 * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
431 * channel determines the result across all channels.  So to do a vec4
432 * of this operation, we want to emit a scalar per source channel used
433 * to produce dest channels.
434 */
435void
436ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
437			        dst_reg dst,
438				src_reg orig_src0, src_reg orig_src1)
439{
440   int i, j;
441   int done_mask = ~dst.writemask;
442
443   /* Mesa RCP is a scalar operation splatting results to all channels,
444    * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
445    * dst channels.
446    */
447   for (i = 0; i < 4; i++) {
448      GLuint this_mask = (1 << i);
449      ir_to_mesa_instruction *inst;
450      src_reg src0 = orig_src0;
451      src_reg src1 = orig_src1;
452
453      if (done_mask & this_mask)
454	 continue;
455
456      GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
457      GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
458      for (j = i + 1; j < 4; j++) {
459	 /* If there is another enabled component in the destination that is
460	  * derived from the same inputs, generate its value on this pass as
461	  * well.
462	  */
463	 if (!(done_mask & (1 << j)) &&
464	     GET_SWZ(src0.swizzle, j) == src0_swiz &&
465	     GET_SWZ(src1.swizzle, j) == src1_swiz) {
466	    this_mask |= (1 << j);
467	 }
468      }
469      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
470				   src0_swiz, src0_swiz);
471      src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
472				  src1_swiz, src1_swiz);
473
474      inst = emit(ir, op, dst, src0, src1);
475      inst->dst.writemask = this_mask;
476      done_mask |= this_mask;
477   }
478}
479
480void
481ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
482			        dst_reg dst, src_reg src0)
483{
484   src_reg undef = undef_src;
485
486   undef.swizzle = SWIZZLE_XXXX;
487
488   emit_scalar(ir, op, dst, src0, undef);
489}
490
491src_reg
492ir_to_mesa_visitor::src_reg_for_float(float val)
493{
494   src_reg src(PROGRAM_CONSTANT, -1, NULL);
495
496   src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
497					  (const gl_constant_value *)&val, 1, &src.swizzle);
498
499   return src;
500}
501
502static int
503type_size(const struct glsl_type *type)
504{
505   return type->count_vec4_slots(false, false);
506}
507
508/**
509 * In the initial pass of codegen, we assign temporary numbers to
510 * intermediate results.  (not SSA -- variable assignments will reuse
511 * storage).  Actual register allocation for the Mesa VM occurs in a
512 * pass over the Mesa IR later.
513 */
514src_reg
515ir_to_mesa_visitor::get_temp(const glsl_type *type)
516{
517   src_reg src;
518
519   src.file = PROGRAM_TEMPORARY;
520   src.index = next_temp;
521   src.reladdr = NULL;
522   next_temp += type_size(type);
523
524   if (type->is_array() || type->is_struct()) {
525      src.swizzle = SWIZZLE_NOOP;
526   } else {
527      src.swizzle = swizzle_for_size(type->vector_elements);
528   }
529   src.negate = 0;
530
531   return src;
532}
533
534variable_storage *
535ir_to_mesa_visitor::find_variable_storage(const ir_variable *var)
536{
537   foreach_in_list(variable_storage, entry, &this->variables) {
538      if (entry->var == var)
539	 return entry;
540   }
541
542   return NULL;
543}
544
545void
546ir_to_mesa_visitor::visit(ir_variable *ir)
547{
548   if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
549      unsigned int i;
550      const ir_state_slot *const slots = ir->get_state_slots();
551      assert(slots != NULL);
552
553      /* Check if this statevar's setup in the STATE file exactly
554       * matches how we'll want to reference it as a
555       * struct/array/whatever.  If not, then we need to move it into
556       * temporary storage and hope that it'll get copy-propagated
557       * out.
558       */
559      for (i = 0; i < ir->get_num_state_slots(); i++) {
560	 if (slots[i].swizzle != SWIZZLE_XYZW) {
561	    break;
562	 }
563      }
564
565      variable_storage *storage;
566      dst_reg dst;
567      if (i == ir->get_num_state_slots()) {
568	 /* We'll set the index later. */
569	 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
570	 this->variables.push_tail(storage);
571
572	 dst = undef_dst;
573      } else {
574	 /* The variable_storage constructor allocates slots based on the size
575	  * of the type.  However, this had better match the number of state
576	  * elements that we're going to copy into the new temporary.
577	  */
578	 assert((int) ir->get_num_state_slots() == type_size(ir->type));
579
580	 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
581						 this->next_temp);
582	 this->variables.push_tail(storage);
583	 this->next_temp += type_size(ir->type);
584
585	 dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
586      }
587
588
589      for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
590	 int index = _mesa_add_state_reference(this->prog->Parameters,
591					       slots[i].tokens);
592
593	 if (storage->file == PROGRAM_STATE_VAR) {
594	    if (storage->index == -1) {
595	       storage->index = index;
596	    } else {
597	       assert(index == storage->index + (int)i);
598	    }
599	 } else {
600	    src_reg src(PROGRAM_STATE_VAR, index, NULL);
601	    src.swizzle = slots[i].swizzle;
602	    emit(ir, OPCODE_MOV, dst, src);
603	    /* even a float takes up a whole vec4 reg in a struct/array. */
604	    dst.index++;
605	 }
606      }
607
608      if (storage->file == PROGRAM_TEMPORARY &&
609	  dst.index != storage->index + (int) ir->get_num_state_slots()) {
610	 linker_error(this->shader_program,
611		      "failed to load builtin uniform `%s' "
612		      "(%d/%d regs loaded)\n",
613		      ir->name, dst.index - storage->index,
614		      type_size(ir->type));
615      }
616   }
617}
618
619void
620ir_to_mesa_visitor::visit(ir_loop *ir)
621{
622   emit(NULL, OPCODE_BGNLOOP);
623
624   visit_exec_list(&ir->body_instructions, this);
625
626   emit(NULL, OPCODE_ENDLOOP);
627}
628
629void
630ir_to_mesa_visitor::visit(ir_loop_jump *ir)
631{
632   switch (ir->mode) {
633   case ir_loop_jump::jump_break:
634      emit(NULL, OPCODE_BRK);
635      break;
636   case ir_loop_jump::jump_continue:
637      emit(NULL, OPCODE_CONT);
638      break;
639   }
640}
641
642
643void
644ir_to_mesa_visitor::visit(ir_function_signature *ir)
645{
646   assert(0);
647   (void)ir;
648}
649
650void
651ir_to_mesa_visitor::visit(ir_function *ir)
652{
653   /* Ignore function bodies other than main() -- we shouldn't see calls to
654    * them since they should all be inlined before we get to ir_to_mesa.
655    */
656   if (strcmp(ir->name, "main") == 0) {
657      const ir_function_signature *sig;
658      exec_list empty;
659
660      sig = ir->matching_signature(NULL, &empty, false);
661
662      assert(sig);
663
664      foreach_in_list(ir_instruction, ir, &sig->body) {
665	 ir->accept(this);
666      }
667   }
668}
669
670bool
671ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
672{
673   int nonmul_operand = 1 - mul_operand;
674   src_reg a, b, c;
675
676   ir_expression *expr = ir->operands[mul_operand]->as_expression();
677   if (!expr || expr->operation != ir_binop_mul)
678      return false;
679
680   expr->operands[0]->accept(this);
681   a = this->result;
682   expr->operands[1]->accept(this);
683   b = this->result;
684   ir->operands[nonmul_operand]->accept(this);
685   c = this->result;
686
687   this->result = get_temp(ir->type);
688   emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c);
689
690   return true;
691}
692
693/**
694 * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
695 *
696 * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
697 * implemented using multiplication, and logical-or is implemented using
698 * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
699 * As result, the logical expression (a & !b) can be rewritten as:
700 *
701 *     - a * !b
702 *     - a * (1 - b)
703 *     - (a * 1) - (a * b)
704 *     - a + -(a * b)
705 *     - a + (a * -b)
706 *
707 * This final expression can be implemented as a single MAD(a, -b, a)
708 * instruction.
709 */
710bool
711ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
712{
713   const int other_operand = 1 - try_operand;
714   src_reg a, b;
715
716   ir_expression *expr = ir->operands[try_operand]->as_expression();
717   if (!expr || expr->operation != ir_unop_logic_not)
718      return false;
719
720   ir->operands[other_operand]->accept(this);
721   a = this->result;
722   expr->operands[0]->accept(this);
723   b = this->result;
724
725   b.negate = ~b.negate;
726
727   this->result = get_temp(ir->type);
728   emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
729
730   return true;
731}
732
733void
734ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
735				    src_reg *reg, int *num_reladdr)
736{
737   if (!reg->reladdr)
738      return;
739
740   emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
741
742   if (*num_reladdr != 1) {
743      src_reg temp = get_temp(glsl_type::vec4_type);
744
745      emit(ir, OPCODE_MOV, dst_reg(temp), *reg);
746      *reg = temp;
747   }
748
749   (*num_reladdr)--;
750}
751
752void
753ir_to_mesa_visitor::emit_swz(ir_expression *ir)
754{
755   /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
756    * This means that each of the operands is either an immediate value of -1,
757    * 0, or 1, or is a component from one source register (possibly with
758    * negation).
759    */
760   uint8_t components[4] = { 0 };
761   bool negate[4] = { false };
762   ir_variable *var = NULL;
763
764   for (unsigned i = 0; i < ir->type->vector_elements; i++) {
765      ir_rvalue *op = ir->operands[i];
766
767      assert(op->type->is_scalar());
768
769      while (op != NULL) {
770	 switch (op->ir_type) {
771	 case ir_type_constant: {
772
773	    assert(op->type->is_scalar());
774
775	    const ir_constant *const c = op->as_constant();
776	    if (c->is_one()) {
777	       components[i] = SWIZZLE_ONE;
778	    } else if (c->is_zero()) {
779	       components[i] = SWIZZLE_ZERO;
780	    } else if (c->is_negative_one()) {
781	       components[i] = SWIZZLE_ONE;
782	       negate[i] = true;
783	    } else {
784	       assert(!"SWZ constant must be 0.0 or 1.0.");
785	    }
786
787	    op = NULL;
788	    break;
789	 }
790
791	 case ir_type_dereference_variable: {
792	    ir_dereference_variable *const deref =
793	       (ir_dereference_variable *) op;
794
795	    assert((var == NULL) || (deref->var == var));
796	    components[i] = SWIZZLE_X;
797	    var = deref->var;
798	    op = NULL;
799	    break;
800	 }
801
802	 case ir_type_expression: {
803	    ir_expression *const expr = (ir_expression *) op;
804
805	    assert(expr->operation == ir_unop_neg);
806	    negate[i] = true;
807
808	    op = expr->operands[0];
809	    break;
810	 }
811
812	 case ir_type_swizzle: {
813	    ir_swizzle *const swiz = (ir_swizzle *) op;
814
815	    components[i] = swiz->mask.x;
816	    op = swiz->val;
817	    break;
818	 }
819
820	 default:
821	    assert(!"Should not get here.");
822	    return;
823	 }
824      }
825   }
826
827   assert(var != NULL);
828
829   ir_dereference_variable *const deref =
830      new(mem_ctx) ir_dereference_variable(var);
831
832   this->result.file = PROGRAM_UNDEFINED;
833   deref->accept(this);
834   if (this->result.file == PROGRAM_UNDEFINED) {
835      printf("Failed to get tree for expression operand:\n");
836      deref->print();
837      printf("\n");
838      exit(1);
839   }
840
841   src_reg src;
842
843   src = this->result;
844   src.swizzle = MAKE_SWIZZLE4(components[0],
845			       components[1],
846			       components[2],
847			       components[3]);
848   src.negate = ((unsigned(negate[0]) << 0)
849		 | (unsigned(negate[1]) << 1)
850		 | (unsigned(negate[2]) << 2)
851		 | (unsigned(negate[3]) << 3));
852
853   /* Storage for our result.  Ideally for an assignment we'd be using the
854    * actual storage for the result here, instead.
855    */
856   const src_reg result_src = get_temp(ir->type);
857   dst_reg result_dst = dst_reg(result_src);
858
859   /* Limit writes to the channels that will be used by result_src later.
860    * This does limit this temp's use as a temporary for multi-instruction
861    * sequences.
862    */
863   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
864
865   emit(ir, OPCODE_SWZ, result_dst, src);
866   this->result = result_src;
867}
868
869void
870ir_to_mesa_visitor::emit_equality_comparison(ir_expression *ir,
871                                             enum prog_opcode op,
872                                             dst_reg dst,
873                                             const src_reg &src0,
874                                             const src_reg &src1)
875{
876   src_reg difference;
877   src_reg abs_difference = get_temp(glsl_type::vec4_type);
878   const src_reg zero = src_reg_for_float(0.0);
879
880   /* x == y is equivalent to -abs(x-y) >= 0.  Since all of the code that
881    * consumes the generated IR is pretty dumb, take special care when one
882    * of the operands is zero.
883    *
884    * Similarly, x != y is equivalent to -abs(x-y) < 0.
885    */
886   if (src0.file == zero.file &&
887       src0.index == zero.index &&
888       src0.swizzle == zero.swizzle) {
889      difference = src1;
890   } else if (src1.file == zero.file &&
891              src1.index == zero.index &&
892              src1.swizzle == zero.swizzle) {
893      difference = src0;
894   } else {
895      difference = get_temp(glsl_type::vec4_type);
896
897      src_reg tmp_src = src0;
898      tmp_src.negate = ~tmp_src.negate;
899
900      emit(ir, OPCODE_ADD, dst_reg(difference), tmp_src, src1);
901   }
902
903   emit(ir, OPCODE_ABS, dst_reg(abs_difference), difference);
904
905   abs_difference.negate = ~abs_difference.negate;
906   emit(ir, op, dst, abs_difference, zero);
907}
908
909void
910ir_to_mesa_visitor::visit(ir_expression *ir)
911{
912   unsigned int operand;
913   src_reg op[ARRAY_SIZE(ir->operands)];
914   src_reg result_src;
915   dst_reg result_dst;
916
917   /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
918    */
919   if (ir->operation == ir_binop_add) {
920      if (try_emit_mad(ir, 1))
921	 return;
922      if (try_emit_mad(ir, 0))
923	 return;
924   }
925
926   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
927    */
928   if (ir->operation == ir_binop_logic_and) {
929      if (try_emit_mad_for_and_not(ir, 1))
930	 return;
931      if (try_emit_mad_for_and_not(ir, 0))
932	 return;
933   }
934
935   if (ir->operation == ir_quadop_vector) {
936      this->emit_swz(ir);
937      return;
938   }
939
940   for (operand = 0; operand < ir->num_operands; operand++) {
941      this->result.file = PROGRAM_UNDEFINED;
942      ir->operands[operand]->accept(this);
943      if (this->result.file == PROGRAM_UNDEFINED) {
944	 printf("Failed to get tree for expression operand:\n");
945         ir->operands[operand]->print();
946         printf("\n");
947	 exit(1);
948      }
949      op[operand] = this->result;
950
951      /* Matrix expression operands should have been broken down to vector
952       * operations already.
953       */
954      assert(!ir->operands[operand]->type->is_matrix());
955   }
956
957   int vector_elements = ir->operands[0]->type->vector_elements;
958   if (ir->operands[1]) {
959      vector_elements = MAX2(vector_elements,
960			     ir->operands[1]->type->vector_elements);
961   }
962
963   this->result.file = PROGRAM_UNDEFINED;
964
965   /* Storage for our result.  Ideally for an assignment we'd be using
966    * the actual storage for the result here, instead.
967    */
968   result_src = get_temp(ir->type);
969   /* convenience for the emit functions below. */
970   result_dst = dst_reg(result_src);
971   /* Limit writes to the channels that will be used by result_src later.
972    * This does limit this temp's use as a temporary for multi-instruction
973    * sequences.
974    */
975   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
976
977   switch (ir->operation) {
978   case ir_unop_logic_not:
979      /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
980       * older GPUs implement SEQ using multiple instructions (i915 uses two
981       * SGE instructions and a MUL instruction).  Since our logic values are
982       * 0.0 and 1.0, 1-x also implements !x.
983       */
984      op[0].negate = ~op[0].negate;
985      emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
986      break;
987   case ir_unop_neg:
988      op[0].negate = ~op[0].negate;
989      result_src = op[0];
990      break;
991   case ir_unop_abs:
992      emit(ir, OPCODE_ABS, result_dst, op[0]);
993      break;
994   case ir_unop_sign:
995      emit(ir, OPCODE_SSG, result_dst, op[0]);
996      break;
997   case ir_unop_rcp:
998      emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
999      break;
1000
1001   case ir_unop_exp2:
1002      emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
1003      break;
1004   case ir_unop_exp:
1005      assert(!"not reached: should be handled by exp_to_exp2");
1006      break;
1007   case ir_unop_log:
1008      assert(!"not reached: should be handled by log_to_log2");
1009      break;
1010   case ir_unop_log2:
1011      emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
1012      break;
1013   case ir_unop_sin:
1014      emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
1015      break;
1016   case ir_unop_cos:
1017      emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
1018      break;
1019
1020   case ir_unop_dFdx:
1021      emit(ir, OPCODE_DDX, result_dst, op[0]);
1022      break;
1023   case ir_unop_dFdy:
1024      emit(ir, OPCODE_DDY, result_dst, op[0]);
1025      break;
1026
1027   case ir_unop_saturate: {
1028      ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV,
1029                                          result_dst, op[0]);
1030      inst->saturate = true;
1031      break;
1032   }
1033
1034   case ir_binop_add:
1035      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1036      break;
1037   case ir_binop_sub:
1038      emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
1039      break;
1040
1041   case ir_binop_mul:
1042      emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1043      break;
1044   case ir_binop_div:
1045      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1046      break;
1047   case ir_binop_mod:
1048      /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
1049      assert(ir->type->is_integer_32());
1050      emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1051      break;
1052
1053   case ir_binop_less:
1054      emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
1055      break;
1056   case ir_binop_gequal:
1057      emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
1058      break;
1059   case ir_binop_equal:
1060      emit_seq(ir, result_dst, op[0], op[1]);
1061      break;
1062   case ir_binop_nequal:
1063      emit_sne(ir, result_dst, op[0], op[1]);
1064      break;
1065   case ir_binop_all_equal:
1066      /* "==" operator producing a scalar boolean. */
1067      if (ir->operands[0]->type->is_vector() ||
1068	  ir->operands[1]->type->is_vector()) {
1069	 src_reg temp = get_temp(glsl_type::vec4_type);
1070         emit_sne(ir, dst_reg(temp), op[0], op[1]);
1071
1072	 /* After the dot-product, the value will be an integer on the
1073	  * range [0,4].  Zero becomes 1.0, and positive values become zero.
1074	  */
1075	 emit_dp(ir, result_dst, temp, temp, vector_elements);
1076
1077	 /* Negating the result of the dot-product gives values on the range
1078	  * [-4, 0].  Zero becomes 1.0, and negative values become zero.  This
1079	  * achieved using SGE.
1080	  */
1081	 src_reg sge_src = result_src;
1082	 sge_src.negate = ~sge_src.negate;
1083	 emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
1084      } else {
1085         emit_seq(ir, result_dst, op[0], op[1]);
1086      }
1087      break;
1088   case ir_binop_any_nequal:
1089      /* "!=" operator producing a scalar boolean. */
1090      if (ir->operands[0]->type->is_vector() ||
1091	  ir->operands[1]->type->is_vector()) {
1092	 src_reg temp = get_temp(glsl_type::vec4_type);
1093         if (ir->operands[0]->type->is_boolean() &&
1094             ir->operands[1]->as_constant() &&
1095             ir->operands[1]->as_constant()->is_zero()) {
1096            temp = op[0];
1097         } else {
1098            emit_sne(ir, dst_reg(temp), op[0], op[1]);
1099         }
1100
1101	 /* After the dot-product, the value will be an integer on the
1102	  * range [0,4].  Zero stays zero, and positive values become 1.0.
1103	  */
1104	 ir_to_mesa_instruction *const dp =
1105	    emit_dp(ir, result_dst, temp, temp, vector_elements);
1106	 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1107	    /* The clamping to [0,1] can be done for free in the fragment
1108	     * shader with a saturate.
1109	     */
1110	    dp->saturate = true;
1111	 } else {
1112	    /* Negating the result of the dot-product gives values on the range
1113	     * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1114	     * achieved using SLT.
1115	     */
1116	    src_reg slt_src = result_src;
1117	    slt_src.negate = ~slt_src.negate;
1118	    emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1119	 }
1120      } else {
1121         emit_sne(ir, result_dst, op[0], op[1]);
1122      }
1123      break;
1124
1125   case ir_binop_logic_xor:
1126      emit_sne(ir, result_dst, op[0], op[1]);
1127      break;
1128
1129   case ir_binop_logic_or: {
1130      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1131         /* After the addition, the value will be an integer on the
1132          * range [0,2].  Zero stays zero, and positive values become 1.0.
1133          */
1134         ir_to_mesa_instruction *add =
1135            emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1136	 add->saturate = true;
1137      } else {
1138         /* The Boolean arguments are stored as float 0.0 and 1.0.  If either
1139          * value is 1.0, the result of the logcal-or should be 1.0.  If both
1140          * values are 0.0, the result should be 0.0.  This is exactly what
1141          * MAX does.
1142          */
1143         emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
1144      }
1145      break;
1146   }
1147
1148   case ir_binop_logic_and:
1149      /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
1150      emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1151      break;
1152
1153   case ir_binop_dot:
1154      assert(ir->operands[0]->type->is_vector());
1155      assert(ir->operands[0]->type == ir->operands[1]->type);
1156      emit_dp(ir, result_dst, op[0], op[1],
1157	      ir->operands[0]->type->vector_elements);
1158      break;
1159
1160   case ir_unop_sqrt:
1161      /* sqrt(x) = x * rsq(x). */
1162      emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1163      emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
1164      /* For incoming channels <= 0, set the result to 0. */
1165      op[0].negate = ~op[0].negate;
1166      emit(ir, OPCODE_CMP, result_dst,
1167			  op[0], result_src, src_reg_for_float(0.0));
1168      break;
1169   case ir_unop_rsq:
1170      emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1171      break;
1172   case ir_unop_i2f:
1173   case ir_unop_u2f:
1174   case ir_unop_b2f:
1175   case ir_unop_b2i:
1176   case ir_unop_i2u:
1177   case ir_unop_u2i:
1178      /* Mesa IR lacks types, ints are stored as truncated floats. */
1179      result_src = op[0];
1180      break;
1181   case ir_unop_f2i:
1182   case ir_unop_f2u:
1183      emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1184      break;
1185   case ir_unop_f2b:
1186   case ir_unop_i2b:
1187      emit_sne(ir, result_dst, op[0], src_reg_for_float(0.0));
1188      break;
1189   case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway
1190   case ir_unop_bitcast_f2u:
1191   case ir_unop_bitcast_i2f:
1192   case ir_unop_bitcast_u2f:
1193      break;
1194   case ir_unop_trunc:
1195      emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1196      break;
1197   case ir_unop_ceil:
1198      op[0].negate = ~op[0].negate;
1199      emit(ir, OPCODE_FLR, result_dst, op[0]);
1200      result_src.negate = ~result_src.negate;
1201      break;
1202   case ir_unop_floor:
1203      emit(ir, OPCODE_FLR, result_dst, op[0]);
1204      break;
1205   case ir_unop_fract:
1206      emit(ir, OPCODE_FRC, result_dst, op[0]);
1207      break;
1208   case ir_unop_pack_snorm_2x16:
1209   case ir_unop_pack_snorm_4x8:
1210   case ir_unop_pack_unorm_2x16:
1211   case ir_unop_pack_unorm_4x8:
1212   case ir_unop_pack_half_2x16:
1213   case ir_unop_pack_double_2x32:
1214   case ir_unop_unpack_snorm_2x16:
1215   case ir_unop_unpack_snorm_4x8:
1216   case ir_unop_unpack_unorm_2x16:
1217   case ir_unop_unpack_unorm_4x8:
1218   case ir_unop_unpack_half_2x16:
1219   case ir_unop_unpack_double_2x32:
1220   case ir_unop_bitfield_reverse:
1221   case ir_unop_bit_count:
1222   case ir_unop_find_msb:
1223   case ir_unop_find_lsb:
1224   case ir_unop_d2f:
1225   case ir_unop_f2d:
1226   case ir_unop_d2i:
1227   case ir_unop_i2d:
1228   case ir_unop_d2u:
1229   case ir_unop_u2d:
1230   case ir_unop_d2b:
1231   case ir_unop_frexp_sig:
1232   case ir_unop_frexp_exp:
1233      assert(!"not supported");
1234      break;
1235   case ir_binop_min:
1236      emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
1237      break;
1238   case ir_binop_max:
1239      emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
1240      break;
1241   case ir_binop_pow:
1242      emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
1243      break;
1244
1245      /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since
1246       * hardware backends have no way to avoid Mesa IR generation
1247       * even if they don't use it, we need to emit "something" and
1248       * continue.
1249       */
1250   case ir_binop_lshift:
1251   case ir_binop_rshift:
1252   case ir_binop_bit_and:
1253   case ir_binop_bit_xor:
1254   case ir_binop_bit_or:
1255      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1256      break;
1257
1258   case ir_unop_bit_not:
1259   case ir_unop_round_even:
1260      emit(ir, OPCODE_MOV, result_dst, op[0]);
1261      break;
1262
1263   case ir_binop_ubo_load:
1264      assert(!"not supported");
1265      break;
1266
1267   case ir_triop_lrp:
1268      /* ir_triop_lrp operands are (x, y, a) while
1269       * OPCODE_LRP operands are (a, y, x) to match ARB_fragment_program.
1270       */
1271      emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]);
1272      break;
1273
1274   case ir_triop_csel:
1275      /* We assume that boolean true and false are 1.0 and 0.0.  OPCODE_CMP
1276       * selects src1 if src0 is < 0, src2 otherwise.
1277       */
1278      op[0].negate = ~op[0].negate;
1279      emit(ir, OPCODE_CMP, result_dst, op[0], op[1], op[2]);
1280      break;
1281
1282   case ir_binop_vector_extract:
1283   case ir_triop_fma:
1284   case ir_triop_bitfield_extract:
1285   case ir_triop_vector_insert:
1286   case ir_quadop_bitfield_insert:
1287   case ir_binop_ldexp:
1288   case ir_binop_carry:
1289   case ir_binop_borrow:
1290   case ir_binop_abs_sub:
1291   case ir_binop_add_sat:
1292   case ir_binop_sub_sat:
1293   case ir_binop_avg:
1294   case ir_binop_avg_round:
1295   case ir_binop_mul_32x16:
1296   case ir_binop_imul_high:
1297   case ir_unop_interpolate_at_centroid:
1298   case ir_binop_interpolate_at_offset:
1299   case ir_binop_interpolate_at_sample:
1300   case ir_unop_dFdx_coarse:
1301   case ir_unop_dFdx_fine:
1302   case ir_unop_dFdy_coarse:
1303   case ir_unop_dFdy_fine:
1304   case ir_unop_subroutine_to_int:
1305   case ir_unop_get_buffer_size:
1306   case ir_unop_bitcast_u642d:
1307   case ir_unop_bitcast_i642d:
1308   case ir_unop_bitcast_d2u64:
1309   case ir_unop_bitcast_d2i64:
1310   case ir_unop_i642i:
1311   case ir_unop_u642i:
1312   case ir_unop_i642u:
1313   case ir_unop_u642u:
1314   case ir_unop_i642b:
1315   case ir_unop_i642f:
1316   case ir_unop_u642f:
1317   case ir_unop_i642d:
1318   case ir_unop_u642d:
1319   case ir_unop_i2i64:
1320   case ir_unop_u2i64:
1321   case ir_unop_b2i64:
1322   case ir_unop_f2i64:
1323   case ir_unop_d2i64:
1324   case ir_unop_i2u64:
1325   case ir_unop_u2u64:
1326   case ir_unop_f2u64:
1327   case ir_unop_d2u64:
1328   case ir_unop_u642i64:
1329   case ir_unop_i642u64:
1330   case ir_unop_pack_int_2x32:
1331   case ir_unop_unpack_int_2x32:
1332   case ir_unop_pack_uint_2x32:
1333   case ir_unop_unpack_uint_2x32:
1334   case ir_unop_pack_sampler_2x32:
1335   case ir_unop_unpack_sampler_2x32:
1336   case ir_unop_pack_image_2x32:
1337   case ir_unop_unpack_image_2x32:
1338   case ir_unop_atan:
1339   case ir_binop_atan2:
1340   case ir_unop_clz:
1341   case ir_unop_f162f:
1342   case ir_unop_f2f16:
1343   case ir_unop_f2fmp:
1344   case ir_unop_f162b:
1345   case ir_unop_b2f16:
1346   case ir_unop_i2i:
1347   case ir_unop_i2imp:
1348   case ir_unop_u2u:
1349   case ir_unop_u2ump:
1350      assert(!"not supported");
1351      break;
1352
1353   case ir_unop_ssbo_unsized_array_length:
1354   case ir_unop_implicitly_sized_array_length:
1355   case ir_quadop_vector:
1356      /* This operation should have already been handled.
1357       */
1358      assert(!"Should not get here.");
1359      break;
1360   }
1361
1362   this->result = result_src;
1363}
1364
1365
1366void
1367ir_to_mesa_visitor::visit(ir_swizzle *ir)
1368{
1369   src_reg src;
1370   int i;
1371   int swizzle[4] = {0};
1372
1373   /* Note that this is only swizzles in expressions, not those on the left
1374    * hand side of an assignment, which do write masking.  See ir_assignment
1375    * for that.
1376    */
1377
1378   ir->val->accept(this);
1379   src = this->result;
1380   assert(src.file != PROGRAM_UNDEFINED);
1381   assert(ir->type->vector_elements > 0);
1382
1383   for (i = 0; i < 4; i++) {
1384      if (i < ir->type->vector_elements) {
1385	 switch (i) {
1386	 case 0:
1387	    swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1388	    break;
1389	 case 1:
1390	    swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1391	    break;
1392	 case 2:
1393	    swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1394	    break;
1395	 case 3:
1396	    swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1397	    break;
1398	 }
1399      } else {
1400	 /* If the type is smaller than a vec4, replicate the last
1401	  * channel out.
1402	  */
1403	 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1404      }
1405   }
1406
1407   src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1408
1409   this->result = src;
1410}
1411
1412void
1413ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
1414{
1415   variable_storage *entry = find_variable_storage(ir->var);
1416   ir_variable *var = ir->var;
1417
1418   if (!entry) {
1419      switch (var->data.mode) {
1420      case ir_var_uniform:
1421	 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1422					       var->data.param_index);
1423	 this->variables.push_tail(entry);
1424	 break;
1425      case ir_var_shader_in:
1426	 /* The linker assigns locations for varyings and attributes,
1427	  * including deprecated builtins (like gl_Color),
1428	  * user-assigned generic attributes (glBindVertexLocation),
1429	  * and user-defined varyings.
1430	  */
1431	 assert(var->data.location != -1);
1432         entry = new(mem_ctx) variable_storage(var,
1433                                               PROGRAM_INPUT,
1434                                               var->data.location);
1435         break;
1436      case ir_var_shader_out:
1437	 assert(var->data.location != -1);
1438         entry = new(mem_ctx) variable_storage(var,
1439                                               PROGRAM_OUTPUT,
1440                                               var->data.location);
1441	 break;
1442      case ir_var_system_value:
1443         entry = new(mem_ctx) variable_storage(var,
1444                                               PROGRAM_SYSTEM_VALUE,
1445                                               var->data.location);
1446         break;
1447      case ir_var_auto:
1448      case ir_var_temporary:
1449	 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1450					       this->next_temp);
1451	 this->variables.push_tail(entry);
1452
1453	 next_temp += type_size(var->type);
1454	 break;
1455      }
1456
1457      if (!entry) {
1458	 printf("Failed to make storage for %s\n", var->name);
1459	 exit(1);
1460      }
1461   }
1462
1463   this->result = src_reg(entry->file, entry->index, var->type);
1464}
1465
1466void
1467ir_to_mesa_visitor::visit(ir_dereference_array *ir)
1468{
1469   ir_constant *index;
1470   src_reg src;
1471   int element_size = type_size(ir->type);
1472
1473   index = ir->array_index->constant_expression_value(ralloc_parent(ir));
1474
1475   ir->array->accept(this);
1476   src = this->result;
1477
1478   if (index) {
1479      src.index += index->value.i[0] * element_size;
1480   } else {
1481      /* Variable index array dereference.  It eats the "vec4" of the
1482       * base of the array and an index that offsets the Mesa register
1483       * index.
1484       */
1485      ir->array_index->accept(this);
1486
1487      src_reg index_reg;
1488
1489      if (element_size == 1) {
1490	 index_reg = this->result;
1491      } else {
1492	 index_reg = get_temp(glsl_type::float_type);
1493
1494	 emit(ir, OPCODE_MUL, dst_reg(index_reg),
1495	      this->result, src_reg_for_float(element_size));
1496      }
1497
1498      /* If there was already a relative address register involved, add the
1499       * new and the old together to get the new offset.
1500       */
1501      if (src.reladdr != NULL)  {
1502	 src_reg accum_reg = get_temp(glsl_type::float_type);
1503
1504	 emit(ir, OPCODE_ADD, dst_reg(accum_reg),
1505	      index_reg, *src.reladdr);
1506
1507	 index_reg = accum_reg;
1508      }
1509
1510      src.reladdr = ralloc(mem_ctx, src_reg);
1511      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1512   }
1513
1514   /* If the type is smaller than a vec4, replicate the last channel out. */
1515   if (ir->type->is_scalar() || ir->type->is_vector())
1516      src.swizzle = swizzle_for_size(ir->type->vector_elements);
1517   else
1518      src.swizzle = SWIZZLE_NOOP;
1519
1520   this->result = src;
1521}
1522
1523void
1524ir_to_mesa_visitor::visit(ir_dereference_record *ir)
1525{
1526   unsigned int i;
1527   const glsl_type *struct_type = ir->record->type;
1528   int offset = 0;
1529
1530   ir->record->accept(this);
1531
1532   assert(ir->field_idx >= 0);
1533   for (i = 0; i < struct_type->length; i++) {
1534      if (i == (unsigned) ir->field_idx)
1535	 break;
1536      offset += type_size(struct_type->fields.structure[i].type);
1537   }
1538
1539   /* If the type is smaller than a vec4, replicate the last channel out. */
1540   if (ir->type->is_scalar() || ir->type->is_vector())
1541      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1542   else
1543      this->result.swizzle = SWIZZLE_NOOP;
1544
1545   this->result.index += offset;
1546}
1547
1548/**
1549 * We want to be careful in assignment setup to hit the actual storage
1550 * instead of potentially using a temporary like we might with the
1551 * ir_dereference handler.
1552 */
1553static dst_reg
1554get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v)
1555{
1556   /* The LHS must be a dereference.  If the LHS is a variable indexed array
1557    * access of a vector, it must be separated into a series conditional moves
1558    * before reaching this point (see ir_vec_index_to_cond_assign).
1559    */
1560   assert(ir->as_dereference());
1561   ir_dereference_array *deref_array = ir->as_dereference_array();
1562   if (deref_array) {
1563      assert(!deref_array->array->type->is_vector());
1564   }
1565
1566   /* Use the rvalue deref handler for the most part.  We'll ignore
1567    * swizzles in it and write swizzles using writemask, though.
1568    */
1569   ir->accept(v);
1570   return dst_reg(v->result);
1571}
1572
1573/* Calculate the sampler index and also calculate the base uniform location
1574 * for struct members.
1575 */
1576static void
1577calc_sampler_offsets(struct gl_shader_program *prog, ir_dereference *deref,
1578                     unsigned *offset, unsigned *array_elements,
1579                     unsigned *location)
1580{
1581   if (deref->ir_type == ir_type_dereference_variable)
1582      return;
1583
1584   switch (deref->ir_type) {
1585   case ir_type_dereference_array: {
1586      ir_dereference_array *deref_arr = deref->as_dereference_array();
1587
1588      void *mem_ctx = ralloc_parent(deref_arr);
1589      ir_constant *array_index =
1590         deref_arr->array_index->constant_expression_value(mem_ctx);
1591
1592      if (!array_index) {
1593	 /* GLSL 1.10 and 1.20 allowed variable sampler array indices,
1594	  * while GLSL 1.30 requires that the array indices be
1595	  * constant integer expressions.  We don't expect any driver
1596	  * to actually work with a really variable array index, so
1597	  * all that would work would be an unrolled loop counter that ends
1598	  * up being constant above.
1599	  */
1600         ralloc_strcat(&prog->data->InfoLog,
1601		       "warning: Variable sampler array index unsupported.\n"
1602		       "This feature of the language was removed in GLSL 1.20 "
1603		       "and is unlikely to be supported for 1.10 in Mesa.\n");
1604      } else {
1605         *offset += array_index->value.u[0] * *array_elements;
1606      }
1607
1608      *array_elements *= deref_arr->array->type->length;
1609
1610      calc_sampler_offsets(prog, deref_arr->array->as_dereference(),
1611                           offset, array_elements, location);
1612      break;
1613   }
1614
1615   case ir_type_dereference_record: {
1616      ir_dereference_record *deref_record = deref->as_dereference_record();
1617      unsigned field_index = deref_record->field_idx;
1618      *location +=
1619         deref_record->record->type->struct_location_offset(field_index);
1620      calc_sampler_offsets(prog, deref_record->record->as_dereference(),
1621                           offset, array_elements, location);
1622      break;
1623   }
1624
1625   default:
1626      unreachable("Invalid deref type");
1627      break;
1628   }
1629}
1630
1631static int
1632get_sampler_uniform_value(class ir_dereference *sampler,
1633                          struct gl_shader_program *shader_program,
1634                          const struct gl_program *prog)
1635{
1636   GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
1637   ir_variable *var = sampler->variable_referenced();
1638   unsigned location = var->data.location;
1639   unsigned array_elements = 1;
1640   unsigned offset = 0;
1641
1642   calc_sampler_offsets(shader_program, sampler, &offset, &array_elements,
1643                        &location);
1644
1645   assert(shader_program->data->UniformStorage[location].opaque[shader].active);
1646   return shader_program->data->UniformStorage[location].opaque[shader].index +
1647          offset;
1648}
1649
1650/**
1651 * Process the condition of a conditional assignment
1652 *
1653 * Examines the condition of a conditional assignment to generate the optimal
1654 * first operand of a \c CMP instruction.  If the condition is a relational
1655 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
1656 * used as the source for the \c CMP instruction.  Otherwise the comparison
1657 * is processed to a boolean result, and the boolean result is used as the
1658 * operand to the CMP instruction.
1659 */
1660bool
1661ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir)
1662{
1663   ir_rvalue *src_ir = ir;
1664   bool negate = true;
1665   bool switch_order = false;
1666
1667   ir_expression *const expr = ir->as_expression();
1668   if ((expr != NULL) && (expr->num_operands == 2)) {
1669      bool zero_on_left = false;
1670
1671      if (expr->operands[0]->is_zero()) {
1672	 src_ir = expr->operands[1];
1673	 zero_on_left = true;
1674      } else if (expr->operands[1]->is_zero()) {
1675	 src_ir = expr->operands[0];
1676	 zero_on_left = false;
1677      }
1678
1679      /*      a is -  0  +            -  0  +
1680       * (a <  0)  T  F  F  ( a < 0)  T  F  F
1681       * (0 <  a)  F  F  T  (-a < 0)  F  F  T
1682       * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
1683       * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
1684       *
1685       * Note that exchanging the order of 0 and 'a' in the comparison simply
1686       * means that the value of 'a' should be negated.
1687       */
1688      if (src_ir != ir) {
1689	 switch (expr->operation) {
1690	 case ir_binop_less:
1691	    switch_order = false;
1692	    negate = zero_on_left;
1693	    break;
1694
1695	 case ir_binop_gequal:
1696	    switch_order = true;
1697	    negate = zero_on_left;
1698	    break;
1699
1700	 default:
1701	    /* This isn't the right kind of comparison afterall, so make sure
1702	     * the whole condition is visited.
1703	     */
1704	    src_ir = ir;
1705	    break;
1706	 }
1707      }
1708   }
1709
1710   src_ir->accept(this);
1711
1712   /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
1713    * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
1714    * choose which value OPCODE_CMP produces without an extra instruction
1715    * computing the condition.
1716    */
1717   if (negate)
1718      this->result.negate = ~this->result.negate;
1719
1720   return switch_order;
1721}
1722
1723void
1724ir_to_mesa_visitor::visit(ir_assignment *ir)
1725{
1726   dst_reg l;
1727   src_reg r;
1728   int i;
1729
1730   ir->rhs->accept(this);
1731   r = this->result;
1732
1733   l = get_assignment_lhs(ir->lhs, this);
1734
1735   /* FINISHME: This should really set to the correct maximal writemask for each
1736    * FINISHME: component written (in the loops below).  This case can only
1737    * FINISHME: occur for matrices, arrays, and structures.
1738    */
1739   if (ir->write_mask == 0) {
1740      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1741      l.writemask = WRITEMASK_XYZW;
1742   } else if (ir->lhs->type->is_scalar()) {
1743      /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
1744       * FINISHME: W component of fragment shader output zero, work correctly.
1745       */
1746      l.writemask = WRITEMASK_XYZW;
1747   } else {
1748      int swizzles[4];
1749      int first_enabled_chan = 0;
1750      int rhs_chan = 0;
1751
1752      assert(ir->lhs->type->is_vector());
1753      l.writemask = ir->write_mask;
1754
1755      for (int i = 0; i < 4; i++) {
1756	 if (l.writemask & (1 << i)) {
1757	    first_enabled_chan = GET_SWZ(r.swizzle, i);
1758	    break;
1759	 }
1760      }
1761
1762      /* Swizzle a small RHS vector into the channels being written.
1763       *
1764       * glsl ir treats write_mask as dictating how many channels are
1765       * present on the RHS while Mesa IR treats write_mask as just
1766       * showing which channels of the vec4 RHS get written.
1767       */
1768      for (int i = 0; i < 4; i++) {
1769	 if (l.writemask & (1 << i))
1770	    swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
1771	 else
1772	    swizzles[i] = first_enabled_chan;
1773      }
1774      r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
1775				swizzles[2], swizzles[3]);
1776   }
1777
1778   assert(l.file != PROGRAM_UNDEFINED);
1779   assert(r.file != PROGRAM_UNDEFINED);
1780
1781   if (ir->condition) {
1782      const bool switch_order = this->process_move_condition(ir->condition);
1783      src_reg condition = this->result;
1784
1785      for (i = 0; i < type_size(ir->lhs->type); i++) {
1786	 if (switch_order) {
1787	    emit(ir, OPCODE_CMP, l, condition, src_reg(l), r);
1788	 } else {
1789	    emit(ir, OPCODE_CMP, l, condition, r, src_reg(l));
1790	 }
1791
1792	 l.index++;
1793	 r.index++;
1794      }
1795   } else {
1796      for (i = 0; i < type_size(ir->lhs->type); i++) {
1797	 emit(ir, OPCODE_MOV, l, r);
1798	 l.index++;
1799	 r.index++;
1800      }
1801   }
1802}
1803
1804
1805void
1806ir_to_mesa_visitor::visit(ir_constant *ir)
1807{
1808   src_reg src;
1809   GLfloat stack_vals[4] = { 0 };
1810   GLfloat *values = stack_vals;
1811   unsigned int i;
1812
1813   /* Unfortunately, 4 floats is all we can get into
1814    * _mesa_add_unnamed_constant.  So, make a temp to store an
1815    * aggregate constant and move each constant value into it.  If we
1816    * get lucky, copy propagation will eliminate the extra moves.
1817    */
1818
1819   if (ir->type->is_struct()) {
1820      src_reg temp_base = get_temp(ir->type);
1821      dst_reg temp = dst_reg(temp_base);
1822
1823      for (i = 0; i < ir->type->length; i++) {
1824         ir_constant *const field_value = ir->get_record_field(i);
1825	 int size = type_size(field_value->type);
1826
1827	 assert(size > 0);
1828
1829	 field_value->accept(this);
1830	 src = this->result;
1831
1832         for (unsigned j = 0; j < (unsigned int)size; j++) {
1833	    emit(ir, OPCODE_MOV, temp, src);
1834
1835	    src.index++;
1836	    temp.index++;
1837	 }
1838      }
1839      this->result = temp_base;
1840      return;
1841   }
1842
1843   if (ir->type->is_array()) {
1844      src_reg temp_base = get_temp(ir->type);
1845      dst_reg temp = dst_reg(temp_base);
1846      int size = type_size(ir->type->fields.array);
1847
1848      assert(size > 0);
1849
1850      for (i = 0; i < ir->type->length; i++) {
1851	 ir->const_elements[i]->accept(this);
1852	 src = this->result;
1853	 for (int j = 0; j < size; j++) {
1854	    emit(ir, OPCODE_MOV, temp, src);
1855
1856	    src.index++;
1857	    temp.index++;
1858	 }
1859      }
1860      this->result = temp_base;
1861      return;
1862   }
1863
1864   if (ir->type->is_matrix()) {
1865      src_reg mat = get_temp(ir->type);
1866      dst_reg mat_column = dst_reg(mat);
1867
1868      for (i = 0; i < ir->type->matrix_columns; i++) {
1869	 assert(ir->type->is_float());
1870	 values = &ir->value.f[i * ir->type->vector_elements];
1871
1872	 src = src_reg(PROGRAM_CONSTANT, -1, NULL);
1873	 src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1874						(gl_constant_value *) values,
1875						ir->type->vector_elements,
1876						&src.swizzle);
1877	 emit(ir, OPCODE_MOV, mat_column, src);
1878
1879	 mat_column.index++;
1880      }
1881
1882      this->result = mat;
1883      return;
1884   }
1885
1886   src.file = PROGRAM_CONSTANT;
1887   switch (ir->type->base_type) {
1888   case GLSL_TYPE_FLOAT:
1889      values = &ir->value.f[0];
1890      break;
1891   case GLSL_TYPE_UINT:
1892      for (i = 0; i < ir->type->vector_elements; i++) {
1893	 values[i] = ir->value.u[i];
1894      }
1895      break;
1896   case GLSL_TYPE_INT:
1897      for (i = 0; i < ir->type->vector_elements; i++) {
1898	 values[i] = ir->value.i[i];
1899      }
1900      break;
1901   case GLSL_TYPE_BOOL:
1902      for (i = 0; i < ir->type->vector_elements; i++) {
1903	 values[i] = ir->value.b[i];
1904      }
1905      break;
1906   default:
1907      assert(!"Non-float/uint/int/bool constant");
1908   }
1909
1910   this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
1911   this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1912						   (gl_constant_value *) values,
1913						   ir->type->vector_elements,
1914						   &this->result.swizzle);
1915}
1916
1917void
1918ir_to_mesa_visitor::visit(ir_call *)
1919{
1920   assert(!"ir_to_mesa: All function calls should have been inlined by now.");
1921}
1922
1923void
1924ir_to_mesa_visitor::visit(ir_texture *ir)
1925{
1926   src_reg result_src, coord, lod_info, projector, dx, dy;
1927   dst_reg result_dst, coord_dst;
1928   ir_to_mesa_instruction *inst = NULL;
1929   prog_opcode opcode = OPCODE_NOP;
1930
1931   if (ir->op == ir_txs)
1932      this->result = src_reg_for_float(0.0);
1933   else
1934      ir->coordinate->accept(this);
1935
1936   /* Put our coords in a temp.  We'll need to modify them for shadow,
1937    * projection, or LOD, so the only case we'd use it as-is is if
1938    * we're doing plain old texturing.  Mesa IR optimization should
1939    * handle cleaning up our mess in that case.
1940    */
1941   coord = get_temp(glsl_type::vec4_type);
1942   coord_dst = dst_reg(coord);
1943   emit(ir, OPCODE_MOV, coord_dst, this->result);
1944
1945   if (ir->projector) {
1946      ir->projector->accept(this);
1947      projector = this->result;
1948   }
1949
1950   /* Storage for our result.  Ideally for an assignment we'd be using
1951    * the actual storage for the result here, instead.
1952    */
1953   result_src = get_temp(glsl_type::vec4_type);
1954   result_dst = dst_reg(result_src);
1955
1956   switch (ir->op) {
1957   case ir_tex:
1958   case ir_txs:
1959      opcode = OPCODE_TEX;
1960      break;
1961   case ir_txb:
1962      opcode = OPCODE_TXB;
1963      ir->lod_info.bias->accept(this);
1964      lod_info = this->result;
1965      break;
1966   case ir_txf:
1967      /* Pretend to be TXL so the sampler, coordinate, lod are available */
1968   case ir_txl:
1969      opcode = OPCODE_TXL;
1970      ir->lod_info.lod->accept(this);
1971      lod_info = this->result;
1972      break;
1973   case ir_txd:
1974      opcode = OPCODE_TXD;
1975      ir->lod_info.grad.dPdx->accept(this);
1976      dx = this->result;
1977      ir->lod_info.grad.dPdy->accept(this);
1978      dy = this->result;
1979      break;
1980   case ir_txf_ms:
1981      assert(!"Unexpected ir_txf_ms opcode");
1982      break;
1983   case ir_lod:
1984      assert(!"Unexpected ir_lod opcode");
1985      break;
1986   case ir_tg4:
1987      assert(!"Unexpected ir_tg4 opcode");
1988      break;
1989   case ir_query_levels:
1990      assert(!"Unexpected ir_query_levels opcode");
1991      break;
1992   case ir_samples_identical:
1993      unreachable("Unexpected ir_samples_identical opcode");
1994   case ir_texture_samples:
1995      unreachable("Unexpected ir_texture_samples opcode");
1996   }
1997
1998   const glsl_type *sampler_type = ir->sampler->type;
1999
2000   if (ir->projector) {
2001      if (opcode == OPCODE_TEX) {
2002	 /* Slot the projector in as the last component of the coord. */
2003	 coord_dst.writemask = WRITEMASK_W;
2004	 emit(ir, OPCODE_MOV, coord_dst, projector);
2005	 coord_dst.writemask = WRITEMASK_XYZW;
2006	 opcode = OPCODE_TXP;
2007      } else {
2008	 src_reg coord_w = coord;
2009	 coord_w.swizzle = SWIZZLE_WWWW;
2010
2011	 /* For the other TEX opcodes there's no projective version
2012	  * since the last slot is taken up by lod info.  Do the
2013	  * projective divide now.
2014	  */
2015	 coord_dst.writemask = WRITEMASK_W;
2016	 emit(ir, OPCODE_RCP, coord_dst, projector);
2017
2018	 /* In the case where we have to project the coordinates "by hand,"
2019	  * the shadow comparator value must also be projected.
2020	  */
2021	 src_reg tmp_src = coord;
2022	 if (ir->shadow_comparator) {
2023	    /* Slot the shadow value in as the second to last component of the
2024	     * coord.
2025	     */
2026	    ir->shadow_comparator->accept(this);
2027
2028	    tmp_src = get_temp(glsl_type::vec4_type);
2029	    dst_reg tmp_dst = dst_reg(tmp_src);
2030
2031	    /* Projective division not allowed for array samplers. */
2032	    assert(!sampler_type->sampler_array);
2033
2034	    tmp_dst.writemask = WRITEMASK_Z;
2035	    emit(ir, OPCODE_MOV, tmp_dst, this->result);
2036
2037	    tmp_dst.writemask = WRITEMASK_XY;
2038	    emit(ir, OPCODE_MOV, tmp_dst, coord);
2039	 }
2040
2041	 coord_dst.writemask = WRITEMASK_XYZ;
2042	 emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
2043
2044	 coord_dst.writemask = WRITEMASK_XYZW;
2045	 coord.swizzle = SWIZZLE_XYZW;
2046      }
2047   }
2048
2049   /* If projection is done and the opcode is not OPCODE_TXP, then the shadow
2050    * comparator was put in the correct place (and projected) by the code,
2051    * above, that handles by-hand projection.
2052    */
2053   if (ir->shadow_comparator && (!ir->projector || opcode == OPCODE_TXP)) {
2054      /* Slot the shadow value in as the second to last component of the
2055       * coord.
2056       */
2057      ir->shadow_comparator->accept(this);
2058
2059      /* XXX This will need to be updated for cubemap array samplers. */
2060      if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
2061          sampler_type->sampler_array) {
2062         coord_dst.writemask = WRITEMASK_W;
2063      } else {
2064         coord_dst.writemask = WRITEMASK_Z;
2065      }
2066
2067      emit(ir, OPCODE_MOV, coord_dst, this->result);
2068      coord_dst.writemask = WRITEMASK_XYZW;
2069   }
2070
2071   if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
2072      /* Mesa IR stores lod or lod bias in the last channel of the coords. */
2073      coord_dst.writemask = WRITEMASK_W;
2074      emit(ir, OPCODE_MOV, coord_dst, lod_info);
2075      coord_dst.writemask = WRITEMASK_XYZW;
2076   }
2077
2078   if (opcode == OPCODE_TXD)
2079      inst = emit(ir, opcode, result_dst, coord, dx, dy);
2080   else
2081      inst = emit(ir, opcode, result_dst, coord);
2082
2083   if (ir->shadow_comparator)
2084      inst->tex_shadow = GL_TRUE;
2085
2086   inst->sampler = get_sampler_uniform_value(ir->sampler, shader_program,
2087                                             prog);
2088
2089   switch (sampler_type->sampler_dimensionality) {
2090   case GLSL_SAMPLER_DIM_1D:
2091      inst->tex_target = (sampler_type->sampler_array)
2092	 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2093      break;
2094   case GLSL_SAMPLER_DIM_2D:
2095      inst->tex_target = (sampler_type->sampler_array)
2096	 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2097      break;
2098   case GLSL_SAMPLER_DIM_3D:
2099      inst->tex_target = TEXTURE_3D_INDEX;
2100      break;
2101   case GLSL_SAMPLER_DIM_CUBE:
2102      inst->tex_target = TEXTURE_CUBE_INDEX;
2103      break;
2104   case GLSL_SAMPLER_DIM_RECT:
2105      inst->tex_target = TEXTURE_RECT_INDEX;
2106      break;
2107   case GLSL_SAMPLER_DIM_BUF:
2108      assert(!"FINISHME: Implement ARB_texture_buffer_object");
2109      break;
2110   case GLSL_SAMPLER_DIM_EXTERNAL:
2111      inst->tex_target = TEXTURE_EXTERNAL_INDEX;
2112      break;
2113   default:
2114      assert(!"Should not get here.");
2115   }
2116
2117   this->result = result_src;
2118}
2119
2120void
2121ir_to_mesa_visitor::visit(ir_return *ir)
2122{
2123   /* Non-void functions should have been inlined.  We may still emit RETs
2124    * from main() unless the EmitNoMainReturn option is set.
2125    */
2126   assert(!ir->get_value());
2127   emit(ir, OPCODE_RET);
2128}
2129
2130void
2131ir_to_mesa_visitor::visit(ir_discard *ir)
2132{
2133   if (!ir->condition)
2134      ir->condition = new(mem_ctx) ir_constant(true);
2135
2136   ir->condition->accept(this);
2137   this->result.negate = ~this->result.negate;
2138   emit(ir, OPCODE_KIL, undef_dst, this->result);
2139}
2140
2141void
2142ir_to_mesa_visitor::visit(ir_demote *ir)
2143{
2144   assert(!"demote statement unsupported");
2145}
2146
2147void
2148ir_to_mesa_visitor::visit(ir_if *ir)
2149{
2150   ir_to_mesa_instruction *if_inst;
2151
2152   ir->condition->accept(this);
2153   assert(this->result.file != PROGRAM_UNDEFINED);
2154
2155   if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
2156
2157   this->instructions.push_tail(if_inst);
2158
2159   visit_exec_list(&ir->then_instructions, this);
2160
2161   if (!ir->else_instructions.is_empty()) {
2162      emit(ir->condition, OPCODE_ELSE);
2163      visit_exec_list(&ir->else_instructions, this);
2164   }
2165
2166   emit(ir->condition, OPCODE_ENDIF);
2167}
2168
2169void
2170ir_to_mesa_visitor::visit(ir_emit_vertex *)
2171{
2172   assert(!"Geometry shaders not supported.");
2173}
2174
2175void
2176ir_to_mesa_visitor::visit(ir_end_primitive *)
2177{
2178   assert(!"Geometry shaders not supported.");
2179}
2180
2181void
2182ir_to_mesa_visitor::visit(ir_barrier *)
2183{
2184   unreachable("GLSL barrier() not supported.");
2185}
2186
2187ir_to_mesa_visitor::ir_to_mesa_visitor()
2188{
2189   result.file = PROGRAM_UNDEFINED;
2190   next_temp = 1;
2191   next_signature_id = 1;
2192   current_function = NULL;
2193   mem_ctx = ralloc_context(NULL);
2194   ctx = NULL;
2195   prog = NULL;
2196   shader_program = NULL;
2197   options = NULL;
2198}
2199
2200ir_to_mesa_visitor::~ir_to_mesa_visitor()
2201{
2202   ralloc_free(mem_ctx);
2203}
2204
2205static struct prog_src_register
2206mesa_src_reg_from_ir_src_reg(src_reg reg)
2207{
2208   struct prog_src_register mesa_reg;
2209
2210   mesa_reg.File = reg.file;
2211   assert(reg.index < (1 << INST_INDEX_BITS));
2212   mesa_reg.Index = reg.index;
2213   mesa_reg.Swizzle = reg.swizzle;
2214   mesa_reg.RelAddr = reg.reladdr != NULL;
2215   mesa_reg.Negate = reg.negate;
2216
2217   return mesa_reg;
2218}
2219
2220static void
2221set_branchtargets(ir_to_mesa_visitor *v,
2222		  struct prog_instruction *mesa_instructions,
2223		  int num_instructions)
2224{
2225   int if_count = 0, loop_count = 0;
2226   int *if_stack, *loop_stack;
2227   int if_stack_pos = 0, loop_stack_pos = 0;
2228   int i, j;
2229
2230   for (i = 0; i < num_instructions; i++) {
2231      switch (mesa_instructions[i].Opcode) {
2232      case OPCODE_IF:
2233	 if_count++;
2234	 break;
2235      case OPCODE_BGNLOOP:
2236	 loop_count++;
2237	 break;
2238      case OPCODE_BRK:
2239      case OPCODE_CONT:
2240	 mesa_instructions[i].BranchTarget = -1;
2241	 break;
2242      default:
2243	 break;
2244      }
2245   }
2246
2247   if_stack = rzalloc_array(v->mem_ctx, int, if_count);
2248   loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
2249
2250   for (i = 0; i < num_instructions; i++) {
2251      switch (mesa_instructions[i].Opcode) {
2252      case OPCODE_IF:
2253	 if_stack[if_stack_pos] = i;
2254	 if_stack_pos++;
2255	 break;
2256      case OPCODE_ELSE:
2257	 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2258	 if_stack[if_stack_pos - 1] = i;
2259	 break;
2260      case OPCODE_ENDIF:
2261	 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2262	 if_stack_pos--;
2263	 break;
2264      case OPCODE_BGNLOOP:
2265	 loop_stack[loop_stack_pos] = i;
2266	 loop_stack_pos++;
2267	 break;
2268      case OPCODE_ENDLOOP:
2269	 loop_stack_pos--;
2270	 /* Rewrite any breaks/conts at this nesting level (haven't
2271	  * already had a BranchTarget assigned) to point to the end
2272	  * of the loop.
2273	  */
2274	 for (j = loop_stack[loop_stack_pos]; j < i; j++) {
2275	    if (mesa_instructions[j].Opcode == OPCODE_BRK ||
2276		mesa_instructions[j].Opcode == OPCODE_CONT) {
2277	       if (mesa_instructions[j].BranchTarget == -1) {
2278		  mesa_instructions[j].BranchTarget = i;
2279	       }
2280	    }
2281	 }
2282	 /* The loop ends point at each other. */
2283	 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
2284	 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
2285	 break;
2286      case OPCODE_CAL:
2287	 foreach_in_list(function_entry, entry, &v->function_signatures) {
2288	    if (entry->sig_id == mesa_instructions[i].BranchTarget) {
2289	       mesa_instructions[i].BranchTarget = entry->inst;
2290	       break;
2291	    }
2292	 }
2293	 break;
2294      default:
2295	 break;
2296      }
2297   }
2298}
2299
2300static void
2301print_program(struct prog_instruction *mesa_instructions,
2302	      ir_instruction **mesa_instruction_annotation,
2303	      int num_instructions)
2304{
2305   ir_instruction *last_ir = NULL;
2306   int i;
2307   int indent = 0;
2308
2309   for (i = 0; i < num_instructions; i++) {
2310      struct prog_instruction *mesa_inst = mesa_instructions + i;
2311      ir_instruction *ir = mesa_instruction_annotation[i];
2312
2313      fprintf(stdout, "%3d: ", i);
2314
2315      if (last_ir != ir && ir) {
2316	 int j;
2317
2318	 for (j = 0; j < indent; j++) {
2319	    fprintf(stdout, " ");
2320	 }
2321	 ir->print();
2322	 printf("\n");
2323	 last_ir = ir;
2324
2325	 fprintf(stdout, "     "); /* line number spacing. */
2326      }
2327
2328      indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
2329					    PROG_PRINT_DEBUG, NULL);
2330   }
2331}
2332
2333namespace {
2334
2335class add_uniform_to_shader : public program_resource_visitor {
2336public:
2337   add_uniform_to_shader(struct gl_context *ctx,
2338                         struct gl_shader_program *shader_program,
2339			 struct gl_program_parameter_list *params)
2340      : ctx(ctx), shader_program(shader_program), params(params), idx(-1),
2341        var(NULL)
2342   {
2343      /* empty */
2344   }
2345
2346   void process(ir_variable *var)
2347   {
2348      this->idx = -1;
2349      this->var = var;
2350      this->program_resource_visitor::process(var,
2351                                         ctx->Const.UseSTD430AsDefaultPacking);
2352      var->data.param_index = this->idx;
2353   }
2354
2355private:
2356   virtual void visit_field(const glsl_type *type, const char *name,
2357                            bool row_major, const glsl_type *record_type,
2358                            const enum glsl_interface_packing packing,
2359                            bool last_field);
2360
2361   struct gl_context *ctx;
2362   struct gl_shader_program *shader_program;
2363   struct gl_program_parameter_list *params;
2364   int idx;
2365   ir_variable *var;
2366};
2367
2368} /* anonymous namespace */
2369
2370void
2371add_uniform_to_shader::visit_field(const glsl_type *type, const char *name,
2372                                   bool /* row_major */,
2373                                   const glsl_type * /* record_type */,
2374                                   const enum glsl_interface_packing,
2375                                   bool /* last_field */)
2376{
2377   /* opaque types don't use storage in the param list unless they are
2378    * bindless samplers or images.
2379    */
2380   if (type->contains_opaque() && !var->data.bindless)
2381      return;
2382
2383   /* Add the uniform to the param list */
2384   assert(_mesa_lookup_parameter_index(params, name) < 0);
2385   int index = _mesa_lookup_parameter_index(params, name);
2386
2387   unsigned num_params = type->arrays_of_arrays_size();
2388   num_params = MAX2(num_params, 1);
2389   num_params *= type->without_array()->matrix_columns;
2390
2391   bool is_dual_slot = type->without_array()->is_dual_slot();
2392   if (is_dual_slot)
2393      num_params *= 2;
2394
2395   _mesa_reserve_parameter_storage(params, num_params, num_params);
2396   index = params->NumParameters;
2397
2398   if (ctx->Const.PackedDriverUniformStorage) {
2399      for (unsigned i = 0; i < num_params; i++) {
2400         unsigned dmul = type->without_array()->is_64bit() ? 2 : 1;
2401         unsigned comps = type->without_array()->vector_elements * dmul;
2402         if (is_dual_slot) {
2403            if (i & 0x1)
2404               comps -= 4;
2405            else
2406               comps = 4;
2407         }
2408
2409         _mesa_add_parameter(params, PROGRAM_UNIFORM, name, comps,
2410                             type->gl_type, NULL, NULL, false);
2411      }
2412   } else {
2413      for (unsigned i = 0; i < num_params; i++) {
2414         _mesa_add_parameter(params, PROGRAM_UNIFORM, name, 4,
2415                             type->gl_type, NULL, NULL, true);
2416      }
2417   }
2418
2419   /* The first part of the uniform that's processed determines the base
2420    * location of the whole uniform (for structures).
2421    */
2422   if (this->idx < 0)
2423      this->idx = index;
2424
2425   /* Each Parameter will hold the index to the backing uniform storage.
2426    * This avoids relying on names to match parameters and uniform
2427    * storages later when associating uniform storage.
2428    */
2429   unsigned location = -1;
2430   ASSERTED const bool found =
2431      shader_program->UniformHash->get(location, params->Parameters[index].Name);
2432   assert(found);
2433
2434   for (unsigned i = 0; i < num_params; i++) {
2435      struct gl_program_parameter *param = &params->Parameters[index + i];
2436      param->UniformStorageIndex = location;
2437      param->MainUniformStorageIndex = params->Parameters[this->idx].UniformStorageIndex;
2438   }
2439}
2440
2441/**
2442 * Generate the program parameters list for the user uniforms in a shader
2443 *
2444 * \param shader_program Linked shader program.  This is only used to
2445 *                       emit possible link errors to the info log.
2446 * \param sh             Shader whose uniforms are to be processed.
2447 * \param params         Parameter list to be filled in.
2448 */
2449void
2450_mesa_generate_parameters_list_for_uniforms(struct gl_context *ctx,
2451                                            struct gl_shader_program
2452					    *shader_program,
2453					    struct gl_linked_shader *sh,
2454					    struct gl_program_parameter_list
2455					    *params)
2456{
2457   add_uniform_to_shader add(ctx, shader_program, params);
2458
2459   foreach_in_list(ir_instruction, node, sh->ir) {
2460      ir_variable *var = node->as_variable();
2461
2462      if ((var == NULL) || (var->data.mode != ir_var_uniform)
2463	  || var->is_in_buffer_block() || (strncmp(var->name, "gl_", 3) == 0))
2464	 continue;
2465
2466      add.process(var);
2467   }
2468}
2469
2470void
2471_mesa_associate_uniform_storage(struct gl_context *ctx,
2472                                struct gl_shader_program *shader_program,
2473                                struct gl_program *prog)
2474{
2475   struct gl_program_parameter_list *params = prog->Parameters;
2476   gl_shader_stage shader_type = prog->info.stage;
2477
2478   _mesa_disallow_parameter_storage_realloc(params);
2479
2480   /* After adding each uniform to the parameter list, connect the storage for
2481    * the parameter with the tracking structure used by the API for the
2482    * uniform.
2483    */
2484   unsigned last_location = unsigned(~0);
2485   for (unsigned i = 0; i < params->NumParameters; i++) {
2486      if (params->Parameters[i].Type != PROGRAM_UNIFORM)
2487         continue;
2488
2489      unsigned location = params->Parameters[i].UniformStorageIndex;
2490
2491      struct gl_uniform_storage *storage =
2492         &shader_program->data->UniformStorage[location];
2493
2494      /* Do not associate any uniform storage to built-in uniforms */
2495      if (storage->builtin)
2496         continue;
2497
2498      if (location != last_location) {
2499         enum gl_uniform_driver_format format = uniform_native;
2500         unsigned columns = 0;
2501
2502         int dmul;
2503         if (ctx->Const.PackedDriverUniformStorage && !prog->info.is_arb_asm) {
2504            dmul = storage->type->vector_elements * sizeof(float);
2505         } else {
2506            dmul = 4 * sizeof(float);
2507         }
2508
2509         switch (storage->type->base_type) {
2510         case GLSL_TYPE_UINT64:
2511            if (storage->type->vector_elements > 2)
2512               dmul *= 2;
2513            FALLTHROUGH;
2514         case GLSL_TYPE_UINT:
2515         case GLSL_TYPE_UINT16:
2516         case GLSL_TYPE_UINT8:
2517            assert(ctx->Const.NativeIntegers);
2518            format = uniform_native;
2519            columns = 1;
2520            break;
2521         case GLSL_TYPE_INT64:
2522            if (storage->type->vector_elements > 2)
2523               dmul *= 2;
2524            FALLTHROUGH;
2525         case GLSL_TYPE_INT:
2526         case GLSL_TYPE_INT16:
2527         case GLSL_TYPE_INT8:
2528            format =
2529               (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float;
2530            columns = 1;
2531            break;
2532         case GLSL_TYPE_DOUBLE:
2533            if (storage->type->vector_elements > 2)
2534               dmul *= 2;
2535            FALLTHROUGH;
2536         case GLSL_TYPE_FLOAT:
2537         case GLSL_TYPE_FLOAT16:
2538            format = uniform_native;
2539            columns = storage->type->matrix_columns;
2540            break;
2541         case GLSL_TYPE_BOOL:
2542            format = uniform_native;
2543            columns = 1;
2544            break;
2545         case GLSL_TYPE_SAMPLER:
2546         case GLSL_TYPE_IMAGE:
2547         case GLSL_TYPE_SUBROUTINE:
2548            format = uniform_native;
2549            columns = 1;
2550            break;
2551         case GLSL_TYPE_ATOMIC_UINT:
2552         case GLSL_TYPE_ARRAY:
2553         case GLSL_TYPE_VOID:
2554         case GLSL_TYPE_STRUCT:
2555         case GLSL_TYPE_ERROR:
2556         case GLSL_TYPE_INTERFACE:
2557         case GLSL_TYPE_FUNCTION:
2558            assert(!"Should not get here.");
2559            break;
2560         }
2561
2562         unsigned pvo = params->Parameters[i].ValueOffset;
2563         _mesa_uniform_attach_driver_storage(storage, dmul * columns, dmul,
2564                                             format,
2565                                             &params->ParameterValues[pvo]);
2566
2567         /* When a bindless sampler/image is bound to a texture/image unit, we
2568          * have to overwrite the constant value by the resident handle
2569          * directly in the constant buffer before the next draw. One solution
2570          * is to keep track a pointer to the base of the data.
2571          */
2572         if (storage->is_bindless && (prog->sh.NumBindlessSamplers ||
2573                                      prog->sh.NumBindlessImages)) {
2574            unsigned array_elements = MAX2(1, storage->array_elements);
2575
2576            for (unsigned j = 0; j < array_elements; ++j) {
2577               unsigned unit = storage->opaque[shader_type].index + j;
2578
2579               if (storage->type->without_array()->is_sampler()) {
2580                  assert(unit >= 0 && unit < prog->sh.NumBindlessSamplers);
2581                  prog->sh.BindlessSamplers[unit].data =
2582                     &params->ParameterValues[pvo] + 4 * j;
2583               } else if (storage->type->without_array()->is_image()) {
2584                  assert(unit >= 0 && unit < prog->sh.NumBindlessImages);
2585                  prog->sh.BindlessImages[unit].data =
2586                     &params->ParameterValues[pvo] + 4 * j;
2587               }
2588            }
2589         }
2590
2591         /* After attaching the driver's storage to the uniform, propagate any
2592          * data from the linker's backing store.  This will cause values from
2593          * initializers in the source code to be copied over.
2594          */
2595         unsigned array_elements = MAX2(1, storage->array_elements);
2596         if (ctx->Const.PackedDriverUniformStorage && !prog->info.is_arb_asm &&
2597             (storage->is_bindless || !storage->type->contains_opaque())) {
2598            const int dmul = storage->type->is_64bit() ? 2 : 1;
2599            const unsigned components =
2600               storage->type->vector_elements *
2601               storage->type->matrix_columns;
2602
2603            for (unsigned s = 0; s < storage->num_driver_storage; s++) {
2604               gl_constant_value *uni_storage = (gl_constant_value *)
2605                  storage->driver_storage[s].data;
2606               memcpy(uni_storage, storage->storage,
2607                      sizeof(storage->storage[0]) * components *
2608                      array_elements * dmul);
2609            }
2610         } else {
2611            _mesa_propagate_uniforms_to_driver_storage(storage, 0,
2612                                                       array_elements);
2613         }
2614
2615	      last_location = location;
2616      }
2617   }
2618}
2619
2620void
2621_mesa_ensure_and_associate_uniform_storage(struct gl_context *ctx,
2622                              struct gl_shader_program *shader_program,
2623                              struct gl_program *prog, unsigned required_space)
2624{
2625   /* Avoid reallocation of the program parameter list, because the uniform
2626    * storage is only associated with the original parameter list.
2627    */
2628   _mesa_reserve_parameter_storage(prog->Parameters, required_space,
2629                                   required_space);
2630
2631   /* This has to be done last.  Any operation the can cause
2632    * prog->ParameterValues to get reallocated (e.g., anything that adds a
2633    * program constant) has to happen before creating this linkage.
2634    */
2635   _mesa_associate_uniform_storage(ctx, shader_program, prog);
2636}
2637
2638/*
2639 * On a basic block basis, tracks available PROGRAM_TEMPORARY register
2640 * channels for copy propagation and updates following instructions to
2641 * use the original versions.
2642 *
2643 * The ir_to_mesa_visitor lazily produces code assuming that this pass
2644 * will occur.  As an example, a TXP production before this pass:
2645 *
2646 * 0: MOV TEMP[1], INPUT[4].xyyy;
2647 * 1: MOV TEMP[1].w, INPUT[4].wwww;
2648 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
2649 *
2650 * and after:
2651 *
2652 * 0: MOV TEMP[1], INPUT[4].xyyy;
2653 * 1: MOV TEMP[1].w, INPUT[4].wwww;
2654 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
2655 *
2656 * which allows for dead code elimination on TEMP[1]'s writes.
2657 */
2658void
2659ir_to_mesa_visitor::copy_propagate(void)
2660{
2661   ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx,
2662						    ir_to_mesa_instruction *,
2663						    this->next_temp * 4);
2664   int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
2665   int level = 0;
2666
2667   foreach_in_list(ir_to_mesa_instruction, inst, &this->instructions) {
2668      assert(inst->dst.file != PROGRAM_TEMPORARY
2669	     || inst->dst.index < this->next_temp);
2670
2671      /* First, do any copy propagation possible into the src regs. */
2672      for (int r = 0; r < 3; r++) {
2673	 ir_to_mesa_instruction *first = NULL;
2674	 bool good = true;
2675	 int acp_base = inst->src[r].index * 4;
2676
2677	 if (inst->src[r].file != PROGRAM_TEMPORARY ||
2678	     inst->src[r].reladdr)
2679	    continue;
2680
2681	 /* See if we can find entries in the ACP consisting of MOVs
2682	  * from the same src register for all the swizzled channels
2683	  * of this src register reference.
2684	  */
2685	 for (int i = 0; i < 4; i++) {
2686	    int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2687	    ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan];
2688
2689	    if (!copy_chan) {
2690	       good = false;
2691	       break;
2692	    }
2693
2694	    assert(acp_level[acp_base + src_chan] <= level);
2695
2696	    if (!first) {
2697	       first = copy_chan;
2698	    } else {
2699	       if (first->src[0].file != copy_chan->src[0].file ||
2700		   first->src[0].index != copy_chan->src[0].index) {
2701		  good = false;
2702		  break;
2703	       }
2704	    }
2705	 }
2706
2707	 if (good) {
2708	    /* We've now validated that we can copy-propagate to
2709	     * replace this src register reference.  Do it.
2710	     */
2711	    inst->src[r].file = first->src[0].file;
2712	    inst->src[r].index = first->src[0].index;
2713
2714	    int swizzle = 0;
2715	    for (int i = 0; i < 4; i++) {
2716	       int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2717	       ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan];
2718	       swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
2719			   (3 * i));
2720	    }
2721	    inst->src[r].swizzle = swizzle;
2722	 }
2723      }
2724
2725      switch (inst->op) {
2726      case OPCODE_BGNLOOP:
2727      case OPCODE_ENDLOOP:
2728	 /* End of a basic block, clear the ACP entirely. */
2729	 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2730	 break;
2731
2732      case OPCODE_IF:
2733	 ++level;
2734	 break;
2735
2736      case OPCODE_ENDIF:
2737      case OPCODE_ELSE:
2738	 /* Clear all channels written inside the block from the ACP, but
2739	  * leaving those that were not touched.
2740	  */
2741	 for (int r = 0; r < this->next_temp; r++) {
2742	    for (int c = 0; c < 4; c++) {
2743	       if (!acp[4 * r + c])
2744		  continue;
2745
2746	       if (acp_level[4 * r + c] >= level)
2747		  acp[4 * r + c] = NULL;
2748	    }
2749	 }
2750	 if (inst->op == OPCODE_ENDIF)
2751	    --level;
2752	 break;
2753
2754      default:
2755	 /* Continuing the block, clear any written channels from
2756	  * the ACP.
2757	  */
2758	 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
2759	    /* Any temporary might be written, so no copy propagation
2760	     * across this instruction.
2761	     */
2762	    memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2763	 } else if (inst->dst.file == PROGRAM_OUTPUT &&
2764		    inst->dst.reladdr) {
2765	    /* Any output might be written, so no copy propagation
2766	     * from outputs across this instruction.
2767	     */
2768	    for (int r = 0; r < this->next_temp; r++) {
2769	       for (int c = 0; c < 4; c++) {
2770		  if (!acp[4 * r + c])
2771		     continue;
2772
2773		  if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
2774		     acp[4 * r + c] = NULL;
2775	       }
2776	    }
2777	 } else if (inst->dst.file == PROGRAM_TEMPORARY ||
2778		    inst->dst.file == PROGRAM_OUTPUT) {
2779	    /* Clear where it's used as dst. */
2780	    if (inst->dst.file == PROGRAM_TEMPORARY) {
2781	       for (int c = 0; c < 4; c++) {
2782		  if (inst->dst.writemask & (1 << c)) {
2783		     acp[4 * inst->dst.index + c] = NULL;
2784		  }
2785	       }
2786	    }
2787
2788	    /* Clear where it's used as src. */
2789	    for (int r = 0; r < this->next_temp; r++) {
2790	       for (int c = 0; c < 4; c++) {
2791		  if (!acp[4 * r + c])
2792		     continue;
2793
2794		  int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
2795
2796		  if (acp[4 * r + c]->src[0].file == inst->dst.file &&
2797		      acp[4 * r + c]->src[0].index == inst->dst.index &&
2798		      inst->dst.writemask & (1 << src_chan))
2799		  {
2800		     acp[4 * r + c] = NULL;
2801		  }
2802	       }
2803	    }
2804	 }
2805	 break;
2806      }
2807
2808      /* If this is a copy, add it to the ACP. */
2809      if (inst->op == OPCODE_MOV &&
2810	  inst->dst.file == PROGRAM_TEMPORARY &&
2811	  !(inst->dst.file == inst->src[0].file &&
2812	    inst->dst.index == inst->src[0].index) &&
2813	  !inst->dst.reladdr &&
2814	  !inst->saturate &&
2815	  !inst->src[0].reladdr &&
2816	  !inst->src[0].negate) {
2817	 for (int i = 0; i < 4; i++) {
2818	    if (inst->dst.writemask & (1 << i)) {
2819	       acp[4 * inst->dst.index + i] = inst;
2820	       acp_level[4 * inst->dst.index + i] = level;
2821	    }
2822	 }
2823      }
2824   }
2825
2826   ralloc_free(acp_level);
2827   ralloc_free(acp);
2828}
2829
2830
2831/**
2832 * Convert a shader's GLSL IR into a Mesa gl_program.
2833 */
2834static struct gl_program *
2835get_mesa_program(struct gl_context *ctx,
2836                 struct gl_shader_program *shader_program,
2837		 struct gl_linked_shader *shader)
2838{
2839   ir_to_mesa_visitor v;
2840   struct prog_instruction *mesa_instructions, *mesa_inst;
2841   ir_instruction **mesa_instruction_annotation;
2842   int i;
2843   struct gl_program *prog;
2844   GLenum target = _mesa_shader_stage_to_program(shader->Stage);
2845   const char *target_string = _mesa_shader_stage_to_string(shader->Stage);
2846   struct gl_shader_compiler_options *options =
2847         &ctx->Const.ShaderCompilerOptions[shader->Stage];
2848
2849   validate_ir_tree(shader->ir);
2850
2851   prog = shader->Program;
2852   prog->Parameters = _mesa_new_parameter_list();
2853   v.ctx = ctx;
2854   v.prog = prog;
2855   v.shader_program = shader_program;
2856   v.options = options;
2857
2858   _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader,
2859					       prog->Parameters);
2860
2861   /* Emit Mesa IR for main(). */
2862   visit_exec_list(shader->ir, &v);
2863   v.emit(NULL, OPCODE_END);
2864
2865   prog->arb.NumTemporaries = v.next_temp;
2866
2867   unsigned num_instructions = v.instructions.length();
2868
2869   mesa_instructions = rzalloc_array(prog, struct prog_instruction,
2870                                     num_instructions);
2871   mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *,
2872					      num_instructions);
2873
2874   v.copy_propagate();
2875
2876   /* Convert ir_mesa_instructions into prog_instructions.
2877    */
2878   mesa_inst = mesa_instructions;
2879   i = 0;
2880   foreach_in_list(const ir_to_mesa_instruction, inst, &v.instructions) {
2881      mesa_inst->Opcode = inst->op;
2882      if (inst->saturate)
2883	 mesa_inst->Saturate = GL_TRUE;
2884      mesa_inst->DstReg.File = inst->dst.file;
2885      mesa_inst->DstReg.Index = inst->dst.index;
2886      mesa_inst->DstReg.WriteMask = inst->dst.writemask;
2887      mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL;
2888      mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]);
2889      mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]);
2890      mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]);
2891      mesa_inst->TexSrcUnit = inst->sampler;
2892      mesa_inst->TexSrcTarget = inst->tex_target;
2893      mesa_inst->TexShadow = inst->tex_shadow;
2894      mesa_instruction_annotation[i] = inst->ir;
2895
2896      /* Set IndirectRegisterFiles. */
2897      if (mesa_inst->DstReg.RelAddr)
2898         prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File;
2899
2900      /* Update program's bitmask of indirectly accessed register files */
2901      for (unsigned src = 0; src < 3; src++)
2902         if (mesa_inst->SrcReg[src].RelAddr)
2903            prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
2904
2905      switch (mesa_inst->Opcode) {
2906      case OPCODE_IF:
2907	 if (options->MaxIfDepth == 0) {
2908	    linker_warning(shader_program,
2909			   "Couldn't flatten if-statement.  "
2910			   "This will likely result in software "
2911			   "rasterization.\n");
2912	 }
2913	 break;
2914      case OPCODE_BGNLOOP:
2915	 if (options->EmitNoLoops) {
2916	    linker_warning(shader_program,
2917			   "Couldn't unroll loop.  "
2918			   "This will likely result in software "
2919			   "rasterization.\n");
2920	 }
2921	 break;
2922      case OPCODE_CONT:
2923	 if (options->EmitNoCont) {
2924	    linker_warning(shader_program,
2925			   "Couldn't lower continue-statement.  "
2926			   "This will likely result in software "
2927			   "rasterization.\n");
2928	 }
2929	 break;
2930      case OPCODE_ARL:
2931         prog->arb.NumAddressRegs = 1;
2932	 break;
2933      default:
2934	 break;
2935      }
2936
2937      mesa_inst++;
2938      i++;
2939
2940      if (!shader_program->data->LinkStatus)
2941         break;
2942   }
2943
2944   if (!shader_program->data->LinkStatus) {
2945      goto fail_exit;
2946   }
2947
2948   set_branchtargets(&v, mesa_instructions, num_instructions);
2949
2950   if (ctx->_Shader->Flags & GLSL_DUMP) {
2951      fprintf(stderr, "\n");
2952      fprintf(stderr, "GLSL IR for linked %s program %d:\n", target_string,
2953	      shader_program->Name);
2954      _mesa_print_ir(stderr, shader->ir, NULL);
2955      fprintf(stderr, "\n");
2956      fprintf(stderr, "\n");
2957      fprintf(stderr, "Mesa IR for linked %s program %d:\n", target_string,
2958	      shader_program->Name);
2959      print_program(mesa_instructions, mesa_instruction_annotation,
2960		    num_instructions);
2961      fflush(stderr);
2962   }
2963
2964   prog->arb.Instructions = mesa_instructions;
2965   prog->arb.NumInstructions = num_instructions;
2966
2967   /* Setting this to NULL prevents a possible double free in the fail_exit
2968    * path (far below).
2969    */
2970   mesa_instructions = NULL;
2971
2972   do_set_program_inouts(shader->ir, prog, shader->Stage);
2973
2974   prog->ShadowSamplers = shader->shadow_samplers;
2975   prog->ExternalSamplersUsed = gl_external_samplers(prog);
2976   _mesa_update_shader_textures_used(shader_program, prog);
2977
2978   /* Set the gl_FragDepth layout. */
2979   if (target == GL_FRAGMENT_PROGRAM_ARB) {
2980      prog->info.fs.depth_layout = shader_program->FragDepthLayout;
2981   }
2982
2983   _mesa_optimize_program(prog, prog);
2984
2985   /* This has to be done last.  Any operation that can cause
2986    * prog->ParameterValues to get reallocated (e.g., anything that adds a
2987    * program constant) has to happen before creating this linkage.
2988    */
2989   _mesa_associate_uniform_storage(ctx, shader_program, prog);
2990   if (!shader_program->data->LinkStatus) {
2991      goto fail_exit;
2992   }
2993
2994   return prog;
2995
2996fail_exit:
2997   ralloc_free(mesa_instructions);
2998   _mesa_reference_program(ctx, &shader->Program, NULL);
2999   return NULL;
3000}
3001
3002extern "C" {
3003
3004/**
3005 * Link a shader.
3006 * Called via ctx->Driver.LinkShader()
3007 * This actually involves converting GLSL IR into Mesa gl_programs with
3008 * code lowering and other optimizations.
3009 */
3010GLboolean
3011_mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
3012{
3013   assert(prog->data->LinkStatus);
3014
3015   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
3016      if (prog->_LinkedShaders[i] == NULL)
3017	 continue;
3018
3019      bool progress;
3020      exec_list *ir = prog->_LinkedShaders[i]->ir;
3021      const struct gl_shader_compiler_options *options =
3022            &ctx->Const.ShaderCompilerOptions[prog->_LinkedShaders[i]->Stage];
3023
3024      do {
3025	 progress = false;
3026
3027	 /* Lowering */
3028	 do_mat_op_to_vec(ir);
3029	 lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2
3030				 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
3031				 | MUL64_TO_MUL_AND_MUL_HIGH
3032				 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
3033
3034	 progress = do_common_optimization(ir, true, true,
3035                                           options, ctx->Const.NativeIntegers)
3036	   || progress;
3037
3038	 progress = lower_quadop_vector(ir, true) || progress;
3039
3040	 if (options->MaxIfDepth == 0)
3041	    progress = lower_discard(ir) || progress;
3042
3043	 progress = lower_if_to_cond_assign((gl_shader_stage)i, ir,
3044                                            options->MaxIfDepth) || progress;
3045
3046	 /* If there are forms of indirect addressing that the driver
3047	  * cannot handle, perform the lowering pass.
3048	  */
3049	 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
3050	     || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
3051	   progress =
3052	     lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
3053						 options->EmitNoIndirectInput,
3054						 options->EmitNoIndirectOutput,
3055						 options->EmitNoIndirectTemp,
3056						 options->EmitNoIndirectUniform)
3057	     || progress;
3058
3059	 progress = do_vec_index_to_cond_assign(ir) || progress;
3060         progress = lower_vector_insert(ir, true) || progress;
3061      } while (progress);
3062
3063      validate_ir_tree(ir);
3064   }
3065
3066   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
3067      struct gl_program *linked_prog;
3068
3069      if (prog->_LinkedShaders[i] == NULL)
3070	 continue;
3071
3072      linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
3073
3074      if (linked_prog) {
3075         _mesa_copy_linked_program_data(prog, prog->_LinkedShaders[i]);
3076
3077         if (!ctx->Driver.ProgramStringNotify(ctx,
3078                                              _mesa_shader_stage_to_program(i),
3079                                              linked_prog)) {
3080            _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
3081                                    NULL);
3082            return GL_FALSE;
3083         }
3084      }
3085   }
3086
3087   build_program_resource_list(ctx, prog, false);
3088   return prog->data->LinkStatus;
3089}
3090
3091/**
3092 * Link a GLSL shader program.  Called via glLinkProgram().
3093 */
3094void
3095_mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
3096{
3097   unsigned int i;
3098   bool spirv = false;
3099
3100   _mesa_clear_shader_program_data(ctx, prog);
3101
3102   prog->data = _mesa_create_shader_program_data();
3103
3104   prog->data->LinkStatus = LINKING_SUCCESS;
3105
3106   for (i = 0; i < prog->NumShaders; i++) {
3107      if (!prog->Shaders[i]->CompileStatus) {
3108	 linker_error(prog, "linking with uncompiled/unspecialized shader");
3109      }
3110
3111      if (!i) {
3112         spirv = (prog->Shaders[i]->spirv_data != NULL);
3113      } else if (spirv && !prog->Shaders[i]->spirv_data) {
3114         /* The GL_ARB_gl_spirv spec adds a new bullet point to the list of
3115          * reasons LinkProgram can fail:
3116          *
3117          *    "All the shader objects attached to <program> do not have the
3118          *     same value for the SPIR_V_BINARY_ARB state."
3119          */
3120         linker_error(prog,
3121                      "not all attached shaders have the same "
3122                      "SPIR_V_BINARY_ARB state");
3123      }
3124   }
3125   prog->data->spirv = spirv;
3126
3127   if (prog->data->LinkStatus) {
3128      if (!spirv)
3129         link_shaders(ctx, prog);
3130      else
3131         _mesa_spirv_link_shaders(ctx, prog);
3132   }
3133
3134   /* If LinkStatus is LINKING_SUCCESS, then reset sampler validated to true.
3135    * Validation happens via the LinkShader call below. If LinkStatus is
3136    * LINKING_SKIPPED, then SamplersValidated will have been restored from the
3137    * shader cache.
3138    */
3139   if (prog->data->LinkStatus == LINKING_SUCCESS) {
3140      prog->SamplersValidated = GL_TRUE;
3141   }
3142
3143   if (prog->data->LinkStatus && !ctx->Driver.LinkShader(ctx, prog)) {
3144      prog->data->LinkStatus = LINKING_FAILURE;
3145   }
3146
3147   if (prog->data->LinkStatus != LINKING_FAILURE)
3148      _mesa_create_program_resource_hash(prog);
3149
3150   /* Return early if we are loading the shader from on-disk cache */
3151   if (prog->data->LinkStatus == LINKING_SKIPPED)
3152      return;
3153
3154   if (ctx->_Shader->Flags & GLSL_DUMP) {
3155      if (!prog->data->LinkStatus) {
3156	 fprintf(stderr, "GLSL shader program %d failed to link\n", prog->Name);
3157      }
3158
3159      if (prog->data->InfoLog && prog->data->InfoLog[0] != 0) {
3160	 fprintf(stderr, "GLSL shader program %d info log:\n", prog->Name);
3161         fprintf(stderr, "%s\n", prog->data->InfoLog);
3162      }
3163   }
3164
3165#ifdef ENABLE_SHADER_CACHE
3166   if (prog->data->LinkStatus)
3167      shader_cache_write_program_metadata(ctx, prog);
3168#endif
3169}
3170
3171} /* extern "C" */
3172