1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "util/u_prim.h"
45#include "tgsi/tgsi_dump.h"
46#include "tgsi/tgsi_exec.h"
47#include "tgsi/tgsi_info.h"
48#include "tgsi/tgsi_parse.h"
49#include "tgsi/tgsi_util.h"
50#include "tgsi/tgsi_scan.h"
51#include "tgsi/tgsi_strings.h"
52#include "lp_bld_tgsi_action.h"
53#include "lp_bld_type.h"
54#include "lp_bld_const.h"
55#include "lp_bld_arit.h"
56#include "lp_bld_bitarit.h"
57#include "lp_bld_gather.h"
58#include "lp_bld_init.h"
59#include "lp_bld_logic.h"
60#include "lp_bld_swizzle.h"
61#include "lp_bld_flow.h"
62#include "lp_bld_quad.h"
63#include "lp_bld_tgsi.h"
64#include "lp_bld_limits.h"
65#include "lp_bld_debug.h"
66#include "lp_bld_printf.h"
67#include "lp_bld_sample.h"
68#include "lp_bld_struct.h"
69
70/* SM 4.0 says that subroutines can nest 32 deep and
71 * we need one more for our main function */
72#define LP_MAX_NUM_FUNCS 33
73
74#define DUMP_GS_EMITS 0
75
76/*
77 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
78 * instruction.
79 *
80 * TODO:
81 * - take execution masks in consideration
82 * - debug control-flow instructions
83 */
84#define DEBUG_EXECUTION 0
85
86
87/*
88 * Emit code to print a register value.
89 */
90static void
91emit_dump_reg(struct gallivm_state *gallivm,
92              unsigned file,
93              unsigned index,
94              unsigned chan,
95              LLVMValueRef value)
96{
97   char buf[32];
98
99   util_snprintf(buf, sizeof buf, "    %s[%u].%c = ",
100                 tgsi_file_name(file),
101                 index, "xyzw"[chan]);
102
103   lp_build_print_value(gallivm, buf, value);
104}
105
106/*
107 * Return the context for the current function.
108 * (always 'main', if shader doesn't do any function calls)
109 */
110static inline struct function_ctx *
111func_ctx(struct lp_exec_mask *mask)
112{
113   assert(mask->function_stack_size > 0);
114   assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
115   return &mask->function_stack[mask->function_stack_size - 1];
116}
117
118/*
119 * Returns true if we're in a loop.
120 * It's global, meaning that it returns true even if there's
121 * no loop inside the current function, but we were inside
122 * a loop inside another function, from which this one was called.
123 */
124static inline boolean
125mask_has_loop(struct lp_exec_mask *mask)
126{
127   int i;
128   for (i = mask->function_stack_size - 1; i >= 0; --i) {
129      const struct function_ctx *ctx = &mask->function_stack[i];
130      if (ctx->loop_stack_size > 0)
131         return TRUE;
132   }
133   return FALSE;
134}
135
136/*
137 * Returns true if we're inside a switch statement.
138 * It's global, meaning that it returns true even if there's
139 * no switch in the current function, but we were inside
140 * a switch inside another function, from which this one was called.
141 */
142static inline boolean
143mask_has_switch(struct lp_exec_mask *mask)
144{
145   int i;
146   for (i = mask->function_stack_size - 1; i >= 0; --i) {
147      const struct function_ctx *ctx = &mask->function_stack[i];
148      if (ctx->switch_stack_size > 0)
149         return TRUE;
150   }
151   return FALSE;
152}
153
154/*
155 * Returns true if we're inside a conditional.
156 * It's global, meaning that it returns true even if there's
157 * no conditional in the current function, but we were inside
158 * a conditional inside another function, from which this one was called.
159 */
160static inline boolean
161mask_has_cond(struct lp_exec_mask *mask)
162{
163   int i;
164   for (i = mask->function_stack_size - 1; i >= 0; --i) {
165      const struct function_ctx *ctx = &mask->function_stack[i];
166      if (ctx->cond_stack_size > 0)
167         return TRUE;
168   }
169   return FALSE;
170}
171
172
173/*
174 * Initialize a function context at the specified index.
175 */
176static void
177lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
178{
179   LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
180   LLVMBuilderRef builder = mask->bld->gallivm->builder;
181   struct function_ctx *ctx =  &mask->function_stack[function_idx];
182
183   ctx->cond_stack_size = 0;
184   ctx->loop_stack_size = 0;
185   ctx->switch_stack_size = 0;
186
187   if (function_idx == 0) {
188      ctx->ret_mask = mask->ret_mask;
189   }
190
191   ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
192                                       int_type, "looplimiter");
193   LLVMBuildStore(
194      builder,
195      LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
196      ctx->loop_limiter);
197}
198
199static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
200{
201   mask->bld = bld;
202   mask->has_mask = FALSE;
203   mask->ret_in_main = FALSE;
204   /* For the main function */
205   mask->function_stack_size = 1;
206
207   mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
208   mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
209         mask->cond_mask = mask->switch_mask =
210         LLVMConstAllOnes(mask->int_vec_type);
211
212   mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
213                                 sizeof(mask->function_stack[0]));
214   lp_exec_mask_function_init(mask, 0);
215}
216
217static void
218lp_exec_mask_fini(struct lp_exec_mask *mask)
219{
220   FREE(mask->function_stack);
221}
222
223static void lp_exec_mask_update(struct lp_exec_mask *mask)
224{
225   LLVMBuilderRef builder = mask->bld->gallivm->builder;
226   boolean has_loop_mask = mask_has_loop(mask);
227   boolean has_cond_mask = mask_has_cond(mask);
228   boolean has_switch_mask = mask_has_switch(mask);
229   boolean has_ret_mask = mask->function_stack_size > 1 ||
230         mask->ret_in_main;
231
232   if (has_loop_mask) {
233      /*for loops we need to update the entire mask at runtime */
234      LLVMValueRef tmp;
235      assert(mask->break_mask);
236      tmp = LLVMBuildAnd(builder,
237                         mask->cont_mask,
238                         mask->break_mask,
239                         "maskcb");
240      mask->exec_mask = LLVMBuildAnd(builder,
241                                     mask->cond_mask,
242                                     tmp,
243                                     "maskfull");
244   } else
245      mask->exec_mask = mask->cond_mask;
246
247   if (has_switch_mask) {
248      mask->exec_mask = LLVMBuildAnd(builder,
249                                     mask->exec_mask,
250                                     mask->switch_mask,
251                                     "switchmask");
252   }
253
254   if (has_ret_mask) {
255      mask->exec_mask = LLVMBuildAnd(builder,
256                                     mask->exec_mask,
257                                     mask->ret_mask,
258                                     "callmask");
259   }
260
261   mask->has_mask = (has_cond_mask ||
262                     has_loop_mask ||
263                     has_switch_mask ||
264                     has_ret_mask);
265}
266
267static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
268                                   LLVMValueRef val)
269{
270   LLVMBuilderRef builder = mask->bld->gallivm->builder;
271   struct function_ctx *ctx = func_ctx(mask);
272
273   if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
274      ctx->cond_stack_size++;
275      return;
276   }
277   if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
278      assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
279   }
280   ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
281   assert(LLVMTypeOf(val) == mask->int_vec_type);
282   mask->cond_mask = LLVMBuildAnd(builder,
283                                  mask->cond_mask,
284                                  val,
285                                  "");
286   lp_exec_mask_update(mask);
287}
288
289static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
290{
291   LLVMBuilderRef builder = mask->bld->gallivm->builder;
292   struct function_ctx *ctx = func_ctx(mask);
293   LLVMValueRef prev_mask;
294   LLVMValueRef inv_mask;
295
296   assert(ctx->cond_stack_size);
297   if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
298      return;
299   prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
300   if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
301      assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
302   }
303
304   inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
305
306   mask->cond_mask = LLVMBuildAnd(builder,
307                                  inv_mask,
308                                  prev_mask, "");
309   lp_exec_mask_update(mask);
310}
311
312static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
313{
314   struct function_ctx *ctx = func_ctx(mask);
315   assert(ctx->cond_stack_size);
316   --ctx->cond_stack_size;
317   if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
318      return;
319   mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
320   lp_exec_mask_update(mask);
321}
322
323static void lp_exec_bgnloop(struct lp_exec_mask *mask)
324{
325   LLVMBuilderRef builder = mask->bld->gallivm->builder;
326   struct function_ctx *ctx = func_ctx(mask);
327
328   if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
329      ++ctx->loop_stack_size;
330      return;
331   }
332
333   ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
334      ctx->break_type;
335   ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
336
337   ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
338   ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
339   ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
340   ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
341   ++ctx->loop_stack_size;
342
343   ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
344   LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
345
346   ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
347
348   LLVMBuildBr(builder, ctx->loop_block);
349   LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
350
351   mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
352
353   lp_exec_mask_update(mask);
354}
355
356static void lp_exec_break(struct lp_exec_mask *mask,
357                          struct lp_build_tgsi_context * bld_base)
358{
359   LLVMBuilderRef builder = mask->bld->gallivm->builder;
360   struct function_ctx *ctx = func_ctx(mask);
361
362   if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
363      LLVMValueRef exec_mask = LLVMBuildNot(builder,
364                                            mask->exec_mask,
365                                            "break");
366
367      mask->break_mask = LLVMBuildAnd(builder,
368                                      mask->break_mask,
369                                      exec_mask, "break_full");
370   }
371   else {
372      enum tgsi_opcode opcode =
373         bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
374      boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
375                              opcode == TGSI_OPCODE_CASE);
376
377
378      if (ctx->switch_in_default) {
379         /*
380          * stop default execution but only if this is an unconditional switch.
381          * (The condition here is not perfect since dead code after break is
382          * allowed but should be sufficient since false negatives are just
383          * unoptimized - so we don't have to pre-evaluate that).
384          */
385         if(break_always && ctx->switch_pc) {
386            bld_base->pc = ctx->switch_pc;
387            return;
388         }
389      }
390
391      if (break_always) {
392         mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
393      }
394      else {
395         LLVMValueRef exec_mask = LLVMBuildNot(builder,
396                                               mask->exec_mask,
397                                               "break");
398         mask->switch_mask = LLVMBuildAnd(builder,
399                                          mask->switch_mask,
400                                          exec_mask, "break_switch");
401      }
402   }
403
404   lp_exec_mask_update(mask);
405}
406
407static void lp_exec_continue(struct lp_exec_mask *mask)
408{
409   LLVMBuilderRef builder = mask->bld->gallivm->builder;
410   LLVMValueRef exec_mask = LLVMBuildNot(builder,
411                                         mask->exec_mask,
412                                         "");
413
414   mask->cont_mask = LLVMBuildAnd(builder,
415                                  mask->cont_mask,
416                                  exec_mask, "");
417
418   lp_exec_mask_update(mask);
419}
420
421
422static void lp_exec_endloop(struct gallivm_state *gallivm,
423                            struct lp_exec_mask *mask)
424{
425   LLVMBuilderRef builder = mask->bld->gallivm->builder;
426   struct function_ctx *ctx = func_ctx(mask);
427   LLVMBasicBlockRef endloop;
428   LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
429   LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
430                                               mask->bld->type.width *
431                                               mask->bld->type.length);
432   LLVMValueRef i1cond, i2cond, icond, limiter;
433
434   assert(mask->break_mask);
435
436
437   assert(ctx->loop_stack_size);
438   if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
439      --ctx->loop_stack_size;
440      return;
441   }
442
443   /*
444    * Restore the cont_mask, but don't pop
445    */
446   mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
447   lp_exec_mask_update(mask);
448
449   /*
450    * Unlike the continue mask, the break_mask must be preserved across loop
451    * iterations
452    */
453   LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
454
455   /* Decrement the loop limiter */
456   limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
457
458   limiter = LLVMBuildSub(
459      builder,
460      limiter,
461      LLVMConstInt(int_type, 1, false),
462      "");
463
464   LLVMBuildStore(builder, limiter, ctx->loop_limiter);
465
466   /* i1cond = (mask != 0) */
467   i1cond = LLVMBuildICmp(
468      builder,
469      LLVMIntNE,
470      LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
471      LLVMConstNull(reg_type), "i1cond");
472
473   /* i2cond = (looplimiter > 0) */
474   i2cond = LLVMBuildICmp(
475      builder,
476      LLVMIntSGT,
477      limiter,
478      LLVMConstNull(int_type), "i2cond");
479
480   /* if( i1cond && i2cond ) */
481   icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
482
483   endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
484
485   LLVMBuildCondBr(builder,
486                   icond, ctx->loop_block, endloop);
487
488   LLVMPositionBuilderAtEnd(builder, endloop);
489
490   assert(ctx->loop_stack_size);
491   --ctx->loop_stack_size;
492   mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
493   mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
494   ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
495   ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
496   ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
497         ctx->switch_stack_size];
498
499   lp_exec_mask_update(mask);
500}
501
502static void lp_exec_switch(struct lp_exec_mask *mask,
503                           LLVMValueRef switchval)
504{
505   struct function_ctx *ctx = func_ctx(mask);
506
507   if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
508       ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
509      ctx->switch_stack_size++;
510      return;
511   }
512
513   ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
514      ctx->break_type;
515   ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
516
517   ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
518   ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
519   ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
520   ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
521   ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
522   ctx->switch_stack_size++;
523
524   mask->switch_mask = LLVMConstNull(mask->int_vec_type);
525   ctx->switch_val = switchval;
526   ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
527   ctx->switch_in_default = false;
528   ctx->switch_pc = 0;
529
530   lp_exec_mask_update(mask);
531}
532
533static void lp_exec_endswitch(struct lp_exec_mask *mask,
534                              struct lp_build_tgsi_context * bld_base)
535{
536   LLVMBuilderRef builder = mask->bld->gallivm->builder;
537   struct function_ctx *ctx = func_ctx(mask);
538
539   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
540      ctx->switch_stack_size--;
541      return;
542   }
543
544   /* check if there's deferred default if so do it now */
545   if (ctx->switch_pc && !ctx->switch_in_default) {
546      LLVMValueRef prevmask, defaultmask;
547      unsigned tmp_pc;
548      prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
549      defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
550      mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
551      ctx->switch_in_default = true;
552
553      lp_exec_mask_update(mask);
554
555      assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
556             TGSI_OPCODE_DEFAULT);
557
558      tmp_pc = bld_base->pc;
559      bld_base->pc = ctx->switch_pc;
560      /*
561       * re-purpose switch_pc to point to here again, since we stop execution of
562       * the deferred default after next break.
563       */
564      ctx->switch_pc = tmp_pc - 1;
565
566      return;
567   }
568
569   else if (ctx->switch_pc && ctx->switch_in_default) {
570      assert(bld_base->pc == ctx->switch_pc + 1);
571   }
572
573   ctx->switch_stack_size--;
574   mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
575   ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
576   ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
577   ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
578   ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
579
580   ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
581
582   lp_exec_mask_update(mask);
583}
584
585static void lp_exec_case(struct lp_exec_mask *mask,
586                         LLVMValueRef caseval)
587{
588   LLVMBuilderRef builder = mask->bld->gallivm->builder;
589   struct function_ctx *ctx = func_ctx(mask);
590
591   LLVMValueRef casemask, prevmask;
592
593   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
594      return;
595   }
596
597   /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
598   if (!ctx->switch_in_default) {
599      prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
600      casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
601      ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
602                                             ctx->switch_mask_default, "sw_default_mask");
603      casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
604      mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
605
606      lp_exec_mask_update(mask);
607   }
608}
609
610/*
611 * Analyse default statement in a switch.
612 * \return true if default is last statement, false otherwise
613 * \param default_pc_start contains pc of instruction to jump to
614 *                         if default wasn't last but there's no
615 *                         fallthrough into default.
616 */
617static boolean default_analyse_is_last(struct lp_exec_mask *mask,
618                                       struct lp_build_tgsi_context * bld_base,
619                                       int *default_pc_start)
620{
621   unsigned pc = bld_base->pc;
622   struct function_ctx *ctx = func_ctx(mask);
623   int curr_switch_stack = ctx->switch_stack_size;
624
625   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
626      return false;
627   }
628
629   /* skip over case statements which are together with default */
630   while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
631      pc++;
632   }
633
634   while (pc != ~0u && pc < bld_base->num_instructions) {
635      enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
636      switch (opcode) {
637      case TGSI_OPCODE_CASE:
638         if (curr_switch_stack == ctx->switch_stack_size) {
639            *default_pc_start = pc - 1;
640            return false;
641         }
642         break;
643      case TGSI_OPCODE_SWITCH:
644         curr_switch_stack++;
645         break;
646      case TGSI_OPCODE_ENDSWITCH:
647         if (curr_switch_stack == ctx->switch_stack_size) {
648            *default_pc_start = pc - 1;
649            return true;
650         }
651         curr_switch_stack--;
652         break;
653      default:
654         ; /* nothing */
655      }
656      pc++;
657   }
658   /* should never arrive here */
659   assert(0);
660   return true;
661}
662
663static void lp_exec_default(struct lp_exec_mask *mask,
664                            struct lp_build_tgsi_context * bld_base)
665{
666   LLVMBuilderRef builder = mask->bld->gallivm->builder;
667   struct function_ctx *ctx = func_ctx(mask);
668
669   int default_exec_pc;
670   boolean default_is_last;
671
672   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
673      return;
674   }
675
676   /*
677    * This is a messy opcode, because it may not be always at the end and
678    * there can be fallthrough in and out of it.
679    */
680
681   default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
682   /*
683    * If it is last statement in switch (note that case statements appearing
684    * "at the same time" as default don't change that) everything is just fine,
685    * update switch mask and go on. This means we can handle default with
686    * fallthrough INTO it without overhead, if it is last.
687    */
688   if (default_is_last) {
689      LLVMValueRef prevmask, defaultmask;
690      prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
691      defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
692      defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
693      mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
694      ctx->switch_in_default = true;
695
696      lp_exec_mask_update(mask);
697   }
698   else {
699      /*
700       * Technically, "case" immediately before default isn't really a
701       * fallthrough, however we still have to count them as such as we
702       * already have updated the masks.
703       * If that happens in practice could add a switch optimizer pass
704       * which just gets rid of all case statements appearing together with
705       * default (or could do switch analysis at switch start time instead).
706       */
707      enum tgsi_opcode opcode =
708         bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
709      boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
710                         opcode != TGSI_OPCODE_SWITCH);
711      /*
712       * If it is not last statement and there was no fallthrough into it,
713       * we record the PC and continue execution at next case (again, those
714       * case encountered at the same time don't count). At endswitch
715       * time, we update switchmask, and go back executing the code we skipped
716       * until the next break (possibly re-executing some code with changed mask
717       * if there was a fallthrough out of default).
718       * Finally, if it is not last statement and there was a fallthrough into it,
719       * do the same as with the former case, except instead of skipping the code
720       * just execute it without updating the mask, then go back and re-execute.
721       */
722      ctx->switch_pc = bld_base->pc;
723      if (!ft_into) {
724         bld_base->pc = default_exec_pc;
725      }
726   }
727}
728
729
730/* stores val into an address pointed to by dst_ptr.
731 * mask->exec_mask is used to figure out which bits of val
732 * should be stored into the address
733 * (0 means don't store this bit, 1 means do store).
734 */
735static void lp_exec_mask_store(struct lp_exec_mask *mask,
736                               struct lp_build_context *bld_store,
737                               LLVMValueRef val,
738                               LLVMValueRef dst_ptr)
739{
740   LLVMBuilderRef builder = mask->bld->gallivm->builder;
741   LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
742
743   assert(lp_check_value(bld_store->type, val));
744   assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
745   assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val) ||
746          LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(dst_ptr))) == LLVMArrayTypeKind);
747
748   if (exec_mask) {
749      LLVMValueRef res, dst;
750
751      dst = LLVMBuildLoad(builder, dst_ptr, "");
752      res = lp_build_select(bld_store, exec_mask, val, dst);
753      LLVMBuildStore(builder, res, dst_ptr);
754   } else
755      LLVMBuildStore(builder, val, dst_ptr);
756}
757
758static void lp_exec_mask_call(struct lp_exec_mask *mask,
759                              int func,
760                              int *pc)
761{
762   if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
763      return;
764   }
765
766   lp_exec_mask_function_init(mask, mask->function_stack_size);
767   mask->function_stack[mask->function_stack_size].pc = *pc;
768   mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
769   mask->function_stack_size++;
770   *pc = func;
771}
772
773static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
774{
775   LLVMBuilderRef builder = mask->bld->gallivm->builder;
776   struct function_ctx *ctx = func_ctx(mask);
777   LLVMValueRef exec_mask;
778
779   if (ctx->cond_stack_size == 0 &&
780       ctx->loop_stack_size == 0 &&
781       ctx->switch_stack_size == 0 &&
782       mask->function_stack_size == 1) {
783      /* returning from main() */
784      *pc = -1;
785      return;
786   }
787
788   if (mask->function_stack_size == 1) {
789      /*
790       * This requires special handling since we need to ensure
791       * we don't drop the mask even if we have no call stack
792       * (e.g. after a ret in a if clause after the endif)
793       */
794      mask->ret_in_main = TRUE;
795   }
796
797   exec_mask = LLVMBuildNot(builder,
798                            mask->exec_mask,
799                            "ret");
800
801   mask->ret_mask = LLVMBuildAnd(builder,
802                                 mask->ret_mask,
803                                 exec_mask, "ret_full");
804
805   lp_exec_mask_update(mask);
806}
807
808static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
809{
810}
811
812static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
813{
814   struct function_ctx *ctx;
815
816   assert(mask->function_stack_size > 1);
817   assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
818
819   ctx = func_ctx(mask);
820   mask->function_stack_size--;
821
822   *pc = ctx->pc;
823   mask->ret_mask = ctx->ret_mask;
824
825   lp_exec_mask_update(mask);
826}
827
828
829static LLVMValueRef
830get_file_ptr(struct lp_build_tgsi_soa_context *bld,
831             unsigned file,
832             int index,
833             unsigned chan)
834{
835   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
836   LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
837   LLVMValueRef var_of_array;
838
839   switch (file) {
840   case TGSI_FILE_TEMPORARY:
841      array_of_vars = bld->temps;
842      var_of_array = bld->temps_array;
843      break;
844   case TGSI_FILE_OUTPUT:
845      array_of_vars = bld->outputs;
846      var_of_array = bld->outputs_array;
847      break;
848   default:
849      assert(0);
850      return NULL;
851   }
852
853   assert(chan < 4);
854
855   if (bld->indirect_files & (1 << file)) {
856      LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
857      if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
858         LLVMValueRef gep[2];
859         gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
860         gep[1] = lindex;
861         return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
862      } else {
863         return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
864      }
865   }
866   else {
867      assert(index <= bld->bld_base.info->file_max[file]);
868      return array_of_vars[index][chan];
869   }
870}
871
872
873/**
874 * Return pointer to a temporary register channel (src or dest).
875 * Note that indirect addressing cannot be handled here.
876 * \param index  which temporary register
877 * \param chan  which channel of the temp register.
878 */
879LLVMValueRef
880lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
881             unsigned index,
882             unsigned chan)
883{
884   return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
885}
886
887/**
888 * Return pointer to a output register channel (src or dest).
889 * Note that indirect addressing cannot be handled here.
890 * \param index  which output register
891 * \param chan  which channel of the output register.
892 */
893LLVMValueRef
894lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
895               unsigned index,
896               unsigned chan)
897{
898   return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
899}
900
901/*
902 * If we have indirect addressing in outputs copy our alloca array
903 * to the outputs slots specified by the caller to make sure
904 * our outputs are delivered consistently via the same interface.
905 */
906static void
907gather_outputs(struct lp_build_tgsi_soa_context * bld)
908{
909   if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
910      unsigned index, chan;
911      assert(bld->bld_base.info->num_outputs <=
912             bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
913      for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
914         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
915            bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
916         }
917      }
918   }
919}
920
921/**
922 * Gather vector.
923 * XXX the lp_build_gather() function should be capable of doing this
924 * with a little work.
925 */
926static LLVMValueRef
927build_gather(struct lp_build_tgsi_context *bld_base,
928             LLVMValueRef base_ptr,
929             LLVMValueRef indexes,
930             LLVMValueRef overflow_mask,
931             LLVMValueRef indexes2)
932{
933   struct gallivm_state *gallivm = bld_base->base.gallivm;
934   LLVMBuilderRef builder = gallivm->builder;
935   struct lp_build_context *uint_bld = &bld_base->uint_bld;
936   struct lp_build_context *bld = &bld_base->base;
937   LLVMValueRef res;
938   unsigned i;
939
940   if (indexes2)
941      res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
942   else
943      res = bld->undef;
944   /*
945    * overflow_mask is a vector telling us which channels
946    * in the vector overflowed. We use the overflow behavior for
947    * constant buffers which is defined as:
948    * Out of bounds access to constant buffer returns 0 in all
949    * components. Out of bounds behavior is always with respect
950    * to the size of the buffer bound at that slot.
951    */
952
953   if (overflow_mask) {
954      /*
955       * We avoid per-element control flow here (also due to llvm going crazy,
956       * though I suspect it's better anyway since overflow is likely rare).
957       * Note that since we still fetch from buffers even if num_elements was
958       * zero (in this case we'll fetch from index zero) the jit func callers
959       * MUST provide valid fake constant buffers of size 4x32 (the values do
960       * not matter), otherwise we'd still need (not per element though)
961       * control flow.
962       */
963      indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
964      if (indexes2)
965         indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
966   }
967
968   /*
969    * Loop over elements of index_vec, load scalar value, insert it into 'res'.
970    */
971   for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
972      LLVMValueRef si, di;
973      LLVMValueRef index;
974      LLVMValueRef scalar_ptr, scalar;
975
976      di = lp_build_const_int32(bld->gallivm, i);
977      if (indexes2)
978         si = lp_build_const_int32(bld->gallivm, i >> 1);
979      else
980         si = di;
981
982      if (indexes2 && (i & 1)) {
983         index = LLVMBuildExtractElement(builder,
984                                         indexes2, si, "");
985      } else {
986         index = LLVMBuildExtractElement(builder,
987                                         indexes, si, "");
988      }
989      scalar_ptr = LLVMBuildGEP(builder, base_ptr,
990                                &index, 1, "gather_ptr");
991      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
992
993      res = LLVMBuildInsertElement(builder, res, scalar, di, "");
994   }
995
996   if (overflow_mask) {
997      if (indexes2) {
998         res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
999         overflow_mask = LLVMBuildSExt(builder, overflow_mask,
1000                                       bld_base->dbl_bld.int_vec_type, "");
1001         res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
1002                               bld_base->dbl_bld.zero, res);
1003      } else
1004         res = lp_build_select(bld, overflow_mask, bld->zero, res);
1005   }
1006
1007   return res;
1008}
1009
1010
1011/**
1012 * Scatter/store vector.
1013 */
1014static void
1015emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
1016                  LLVMValueRef base_ptr,
1017                  LLVMValueRef indexes,
1018                  LLVMValueRef values,
1019                  struct lp_exec_mask *mask)
1020{
1021   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1022   LLVMBuilderRef builder = gallivm->builder;
1023   unsigned i;
1024   LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
1025
1026   /*
1027    * Loop over elements of index_vec, store scalar value.
1028    */
1029   for (i = 0; i < bld->bld_base.base.type.length; i++) {
1030      LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1031      LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
1032      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
1033      LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
1034      LLVMValueRef scalar_pred = pred ?
1035         LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
1036
1037      if (0)
1038         lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
1039                         ii, val, index, scalar_ptr);
1040
1041      if (scalar_pred) {
1042         LLVMValueRef real_val, dst_val;
1043         dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
1044         real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
1045         LLVMBuildStore(builder, real_val, scalar_ptr);
1046      }
1047      else {
1048         LLVMBuildStore(builder, val, scalar_ptr);
1049      }
1050   }
1051}
1052
1053
1054/**
1055 * Read the current value of the ADDR register, convert the floats to
1056 * ints, add the base index and return the vector of offsets.
1057 * The offsets will be used to index into the constant buffer or
1058 * temporary register file.
1059 */
1060static LLVMValueRef
1061get_indirect_index(struct lp_build_tgsi_soa_context *bld,
1062                   unsigned reg_file, unsigned reg_index,
1063                   const struct tgsi_ind_register *indirect_reg,
1064                   int index_limit)
1065{
1066   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1067   struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1068   /* always use X component of address register */
1069   unsigned swizzle = indirect_reg->Swizzle;
1070   LLVMValueRef base;
1071   LLVMValueRef rel;
1072   LLVMValueRef max_index;
1073   LLVMValueRef index;
1074
1075   assert(bld->indirect_files & (1 << reg_file));
1076
1077   base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
1078
1079   assert(swizzle < 4);
1080   switch (indirect_reg->File) {
1081   case TGSI_FILE_ADDRESS:
1082      rel = LLVMBuildLoad(builder,
1083                          bld->addr[indirect_reg->Index][swizzle],
1084                          "load addr reg");
1085      /* ADDR LLVM values already have LLVM integer type. */
1086      break;
1087   case TGSI_FILE_TEMPORARY:
1088      rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
1089      rel = LLVMBuildLoad(builder, rel, "load temp reg");
1090      /* TEMP LLVM values always have LLVM float type, but for indirection, the
1091       * value actually stored is expected to be an integer */
1092      rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
1093      break;
1094   default:
1095      assert(0);
1096      rel = uint_bld->zero;
1097   }
1098
1099   index = lp_build_add(uint_bld, base, rel);
1100
1101   /*
1102    * emit_fetch_constant handles constant buffer overflow so this code
1103    * is pointless for them.
1104    * Furthermore the D3D10 spec in section 6.5 says:
1105    * If the constant buffer bound to a slot is larger than the size
1106    * declared in the shader for that slot, implementations are allowed
1107    * to return incorrect data (not necessarily 0) for indices that are
1108    * larger than the declared size but smaller than the buffer size.
1109    */
1110   if (reg_file != TGSI_FILE_CONSTANT) {
1111      assert(index_limit >= 0);
1112      max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
1113                                         uint_bld->type, index_limit);
1114
1115      assert(!uint_bld->type.sign);
1116      index = lp_build_min(uint_bld, index, max_index);
1117   }
1118
1119   return index;
1120}
1121
1122static struct lp_build_context *
1123stype_to_fetch(struct lp_build_tgsi_context * bld_base,
1124	       enum tgsi_opcode_type stype)
1125{
1126   struct lp_build_context *bld_fetch;
1127
1128   switch (stype) {
1129   case TGSI_TYPE_FLOAT:
1130   case TGSI_TYPE_UNTYPED:
1131      bld_fetch = &bld_base->base;
1132      break;
1133   case TGSI_TYPE_UNSIGNED:
1134      bld_fetch = &bld_base->uint_bld;
1135      break;
1136   case TGSI_TYPE_SIGNED:
1137      bld_fetch = &bld_base->int_bld;
1138      break;
1139   case TGSI_TYPE_DOUBLE:
1140      bld_fetch = &bld_base->dbl_bld;
1141      break;
1142   case TGSI_TYPE_UNSIGNED64:
1143      bld_fetch = &bld_base->uint64_bld;
1144      break;
1145   case TGSI_TYPE_SIGNED64:
1146      bld_fetch = &bld_base->int64_bld;
1147      break;
1148   case TGSI_TYPE_VOID:
1149   default:
1150      assert(0);
1151      bld_fetch = NULL;
1152      break;
1153   }
1154   return bld_fetch;
1155}
1156
1157static LLVMValueRef
1158get_soa_array_offsets(struct lp_build_context *uint_bld,
1159                      LLVMValueRef indirect_index,
1160                      unsigned chan_index,
1161                      boolean need_perelement_offset)
1162{
1163   struct gallivm_state *gallivm = uint_bld->gallivm;
1164   LLVMValueRef chan_vec =
1165      lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1166   LLVMValueRef length_vec =
1167      lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1168   LLVMValueRef index_vec;
1169
1170   /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1171   index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1172   index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1173   index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1174
1175   if (need_perelement_offset) {
1176      LLVMValueRef pixel_offsets;
1177      unsigned i;
1178     /* build pixel offset vector: {0, 1, 2, 3, ...} */
1179      pixel_offsets = uint_bld->undef;
1180      for (i = 0; i < uint_bld->type.length; i++) {
1181         LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1182         pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1183                                                ii, ii, "");
1184      }
1185      index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1186   }
1187   return index_vec;
1188}
1189
1190static LLVMValueRef
1191emit_fetch_constant(
1192   struct lp_build_tgsi_context * bld_base,
1193   const struct tgsi_full_src_register * reg,
1194   enum tgsi_opcode_type stype,
1195   unsigned swizzle_in)
1196{
1197   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1198   struct gallivm_state *gallivm = bld_base->base.gallivm;
1199   LLVMBuilderRef builder = gallivm->builder;
1200   struct lp_build_context *uint_bld = &bld_base->uint_bld;
1201   unsigned dimension = 0;
1202   LLVMValueRef consts_ptr;
1203   LLVMValueRef num_consts;
1204   LLVMValueRef res;
1205   unsigned swizzle = swizzle_in & 0xffff;
1206
1207   /* XXX: Handle fetching xyzw components as a vector */
1208   assert(swizzle != ~0u);
1209
1210   if (reg->Register.Dimension) {
1211      assert(!reg->Dimension.Indirect);
1212      dimension = reg->Dimension.Index;
1213      assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1214   }
1215
1216   consts_ptr = bld->consts[dimension];
1217   num_consts = bld->consts_sizes[dimension];
1218
1219   if (reg->Register.Indirect) {
1220      LLVMValueRef indirect_index;
1221      LLVMValueRef swizzle_vec =
1222         lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1223      LLVMValueRef index_vec;  /* index into the const buffer */
1224      LLVMValueRef overflow_mask;
1225      LLVMValueRef index_vec2 = NULL;
1226
1227      indirect_index = get_indirect_index(bld,
1228                                          reg->Register.File,
1229                                          reg->Register.Index,
1230                                          &reg->Indirect,
1231                                          bld->bld_base.info->file_max[reg->Register.File]);
1232
1233      /* All fetches are from the same constant buffer, so
1234       * we need to propagate the size to a vector to do a
1235       * vector comparison */
1236      num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1237      /* Construct a boolean vector telling us which channels
1238       * overflow the bound constant buffer */
1239      overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1240                                       indirect_index, num_consts);
1241
1242      /* index_vec = indirect_index * 4 + swizzle */
1243      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1244      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1245
1246      if (tgsi_type_is_64bit(stype)) {
1247         LLVMValueRef swizzle_vec2;
1248         swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
1249         index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
1250         index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
1251      }
1252      /* Gather values from the constant buffer */
1253      res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
1254   }
1255   else {
1256      LLVMValueRef index;  /* index into the const buffer */
1257      LLVMValueRef scalar, scalar_ptr;
1258      struct lp_build_context *bld_broad = &bld_base->base;
1259      index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1260
1261      scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1262                                &index, 1, "");
1263
1264      if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
1265
1266         LLVMValueRef scalar2, scalar2_ptr;
1267         LLVMValueRef shuffles[2];
1268         index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
1269
1270         scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
1271                                    &index, 1, "");
1272
1273         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1274         scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
1275         shuffles[0] = lp_build_const_int32(gallivm, 0);
1276         shuffles[1] = lp_build_const_int32(gallivm, 1);
1277
1278         res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
1279         res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
1280         res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
1281      } else {
1282        if (stype == TGSI_TYPE_DOUBLE) {
1283           LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
1284           scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
1285           bld_broad = &bld_base->dbl_bld;
1286        } else if (stype == TGSI_TYPE_UNSIGNED64) {
1287           LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1288           scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
1289           bld_broad = &bld_base->uint64_bld;
1290        } else if (stype == TGSI_TYPE_SIGNED64) {
1291           LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1292           scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
1293           bld_broad = &bld_base->int64_bld;
1294        }
1295        scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1296        res = lp_build_broadcast_scalar(bld_broad, scalar);
1297      }
1298
1299   }
1300
1301   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
1302      struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1303      res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1304   }
1305
1306   return res;
1307}
1308
1309/**
1310 * Fetch 64-bit values from two separate channels.
1311 * 64-bit values are stored split across two channels, like xy and zw.
1312 * This function creates a set of vec_length*2 floats,
1313 * extracts the values from the two channels,
1314 * puts them in the correct place, then casts to vec_length 64-bits.
1315 */
1316static LLVMValueRef
1317emit_fetch_64bit(
1318   struct lp_build_tgsi_context * bld_base,
1319   enum tgsi_opcode_type stype,
1320   LLVMValueRef input,
1321   LLVMValueRef input2)
1322{
1323   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1324   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1325   LLVMBuilderRef builder = gallivm->builder;
1326   LLVMValueRef res;
1327   struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1328   int i;
1329   LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
1330   int len = bld_base->base.type.length * 2;
1331   assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
1332
1333   for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
1334      shuffles[i] = lp_build_const_int32(gallivm, i / 2);
1335      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
1336   }
1337   res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
1338
1339   return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1340}
1341
1342static LLVMValueRef
1343emit_fetch_immediate(
1344   struct lp_build_tgsi_context * bld_base,
1345   const struct tgsi_full_src_register * reg,
1346   enum tgsi_opcode_type stype,
1347   unsigned swizzle_in)
1348{
1349   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1350   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1351   LLVMBuilderRef builder = gallivm->builder;
1352   LLVMValueRef res = NULL;
1353   unsigned swizzle = swizzle_in & 0xffff;
1354
1355   if (bld->use_immediates_array || reg->Register.Indirect) {
1356      LLVMValueRef imms_array;
1357      LLVMTypeRef fptr_type;
1358
1359      /* cast imms_array pointer to float* */
1360      fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1361      imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1362
1363      if (reg->Register.Indirect) {
1364         LLVMValueRef indirect_index;
1365         LLVMValueRef index_vec;  /* index into the immediate register array */
1366         LLVMValueRef index_vec2 = NULL;
1367         indirect_index = get_indirect_index(bld,
1368                                             reg->Register.File,
1369                                             reg->Register.Index,
1370                                             &reg->Indirect,
1371                                             bld->bld_base.info->file_max[reg->Register.File]);
1372         /*
1373          * Unlike for other reg classes, adding pixel offsets is unnecessary -
1374          * immediates are stored as full vectors (FIXME??? - might be better
1375          * to store them the same as constants) but all elements are the same
1376          * in any case.
1377          */
1378         index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1379                                           indirect_index,
1380                                           swizzle,
1381                                           FALSE);
1382         if (tgsi_type_is_64bit(stype))
1383            index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1384                                              indirect_index,
1385                                              swizzle_in >> 16,
1386                                              FALSE);
1387         /* Gather values from the immediate register array */
1388         res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1389      } else {
1390         LLVMValueRef gep[2];
1391         gep[0] = lp_build_const_int32(gallivm, 0);
1392         gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1393         LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1394                                              bld->imms_array, gep, 2, "");
1395         res = LLVMBuildLoad(builder, imms_ptr, "");
1396
1397         if (tgsi_type_is_64bit(stype)) {
1398            LLVMValueRef imms_ptr2;
1399            LLVMValueRef res2;
1400            gep[1] = lp_build_const_int32(gallivm,
1401                                          reg->Register.Index * 4 + (swizzle_in >> 16));
1402            imms_ptr2 = LLVMBuildGEP(builder,
1403                                     bld->imms_array, gep, 2, "");
1404            res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1405            res = emit_fetch_64bit(bld_base, stype, res, res2);
1406         }
1407      }
1408   }
1409   else {
1410      res = bld->immediates[reg->Register.Index][swizzle];
1411      if (tgsi_type_is_64bit(stype))
1412         res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1413   }
1414
1415   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1416      struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1417      res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1418   }
1419   return res;
1420}
1421
1422static LLVMValueRef
1423emit_fetch_input(
1424   struct lp_build_tgsi_context * bld_base,
1425   const struct tgsi_full_src_register * reg,
1426   enum tgsi_opcode_type stype,
1427   unsigned swizzle_in)
1428{
1429   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1430   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1431   LLVMBuilderRef builder = gallivm->builder;
1432   LLVMValueRef res;
1433   unsigned swizzle = swizzle_in & 0xffff;
1434
1435   if (reg->Register.Indirect) {
1436      LLVMValueRef indirect_index;
1437      LLVMValueRef index_vec;  /* index into the input reg array */
1438      LLVMValueRef index_vec2 = NULL;
1439      LLVMValueRef inputs_array;
1440      LLVMTypeRef fptr_type;
1441
1442      indirect_index = get_indirect_index(bld,
1443                                          reg->Register.File,
1444                                          reg->Register.Index,
1445                                          &reg->Indirect,
1446                                          bld->bld_base.info->file_max[reg->Register.File]);
1447
1448      index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1449                                        indirect_index,
1450                                        swizzle,
1451                                        TRUE);
1452      if (tgsi_type_is_64bit(stype)) {
1453         index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1454                                           indirect_index,
1455                                           swizzle_in >> 16,
1456                                           TRUE);
1457      }
1458      /* cast inputs_array pointer to float* */
1459      fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1460      inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1461
1462      /* Gather values from the input register array */
1463      res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1464   } else {
1465      if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1466         LLVMValueRef lindex = lp_build_const_int32(gallivm,
1467                                        reg->Register.Index * 4 + swizzle);
1468         LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1469                                               bld->inputs_array, &lindex, 1, "");
1470
1471         res = LLVMBuildLoad(builder, input_ptr, "");
1472         if (tgsi_type_is_64bit(stype)) {
1473            LLVMValueRef lindex1;
1474            LLVMValueRef input_ptr2;
1475            LLVMValueRef res2;
1476
1477            lindex1 = lp_build_const_int32(gallivm,
1478                                           reg->Register.Index * 4 + (swizzle_in >> 16));
1479            input_ptr2 = LLVMBuildGEP(builder,
1480                                      bld->inputs_array, &lindex1, 1, "");
1481            res2 = LLVMBuildLoad(builder, input_ptr2, "");
1482            res = emit_fetch_64bit(bld_base, stype, res, res2);
1483         }
1484      }
1485      else {
1486         res = bld->inputs[reg->Register.Index][swizzle];
1487         if (tgsi_type_is_64bit(stype))
1488            res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1489      }
1490   }
1491
1492   assert(res);
1493
1494   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1495      struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1496      res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1497   }
1498
1499   return res;
1500}
1501
1502
1503static LLVMValueRef
1504emit_fetch_gs_input(
1505   struct lp_build_tgsi_context * bld_base,
1506   const struct tgsi_full_src_register * reg,
1507   enum tgsi_opcode_type stype,
1508   unsigned swizzle_in)
1509{
1510   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1511   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1512   const struct tgsi_shader_info *info = bld->bld_base.info;
1513   LLVMBuilderRef builder = gallivm->builder;
1514   LLVMValueRef attrib_index = NULL;
1515   LLVMValueRef vertex_index = NULL;
1516   unsigned swizzle = swizzle_in & 0xffff;
1517   LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1518   LLVMValueRef res;
1519
1520   if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1521      /* This is really a system value not a regular input */
1522      assert(!reg->Register.Indirect);
1523      assert(!reg->Dimension.Indirect);
1524      res = bld->system_values.prim_id;
1525      if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1526         res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1527      }
1528      return res;
1529   }
1530
1531   if (reg->Register.Indirect) {
1532      /*
1533       * XXX: this is possibly not quite the right value, since file_max may be
1534       * larger than the max attrib index, due to it being the max of declared
1535       * inputs AND the max vertices per prim (which is 6 for tri adj).
1536       * It should however be safe to use (since we always allocate
1537       * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1538       */
1539      int index_limit = info->file_max[reg->Register.File];
1540      attrib_index = get_indirect_index(bld,
1541                                        reg->Register.File,
1542                                        reg->Register.Index,
1543                                        &reg->Indirect,
1544                                        index_limit);
1545   } else {
1546      attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1547   }
1548
1549   if (reg->Dimension.Indirect) {
1550      /*
1551       * A fixed 6 should do as well (which is what we allocate).
1552       */
1553      int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1554      vertex_index = get_indirect_index(bld,
1555                                        reg->Register.File,
1556                                        reg->Dimension.Index,
1557                                        &reg->DimIndirect,
1558                                        index_limit);
1559   } else {
1560      vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1561   }
1562
1563   res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1564                                    reg->Dimension.Indirect,
1565                                    vertex_index,
1566                                    reg->Register.Indirect,
1567                                    attrib_index,
1568                                    swizzle_index);
1569
1570   assert(res);
1571   if (tgsi_type_is_64bit(stype)) {
1572      LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1573      LLVMValueRef res2;
1574      res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1575                                        reg->Dimension.Indirect,
1576                                        vertex_index,
1577                                        reg->Register.Indirect,
1578                                        attrib_index,
1579                                        swizzle_index);
1580      assert(res2);
1581      res = emit_fetch_64bit(bld_base, stype, res, res2);
1582   } else if (stype == TGSI_TYPE_UNSIGNED) {
1583      res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1584   } else if (stype == TGSI_TYPE_SIGNED) {
1585      res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1586   }
1587
1588   return res;
1589}
1590
1591static LLVMValueRef
1592emit_fetch_temporary(
1593   struct lp_build_tgsi_context * bld_base,
1594   const struct tgsi_full_src_register * reg,
1595   enum tgsi_opcode_type stype,
1596   unsigned swizzle_in)
1597{
1598   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1599   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1600   LLVMBuilderRef builder = gallivm->builder;
1601   LLVMValueRef res;
1602   unsigned swizzle = swizzle_in & 0xffff;
1603
1604   if (reg->Register.Indirect) {
1605      LLVMValueRef indirect_index;
1606      LLVMValueRef index_vec, index_vec2 = NULL;  /* index into the temp reg array */
1607      LLVMValueRef temps_array;
1608      LLVMTypeRef fptr_type;
1609
1610      indirect_index = get_indirect_index(bld,
1611                                          reg->Register.File,
1612                                          reg->Register.Index,
1613                                          &reg->Indirect,
1614                                          bld->bld_base.info->file_max[reg->Register.File]);
1615
1616      index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1617                                        indirect_index,
1618                                        swizzle,
1619                                        TRUE);
1620      if (tgsi_type_is_64bit(stype)) {
1621               index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1622                                                  indirect_index,
1623                                                  swizzle_in >> 16,
1624                                                  TRUE);
1625      }
1626
1627      /* cast temps_array pointer to float* */
1628      fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1629      temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1630
1631      /* Gather values from the temporary register array */
1632      res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1633   }
1634   else {
1635      LLVMValueRef temp_ptr;
1636      temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1637      res = LLVMBuildLoad(builder, temp_ptr, "");
1638
1639      if (tgsi_type_is_64bit(stype)) {
1640         LLVMValueRef temp_ptr2, res2;
1641
1642         temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1643         res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1644         res = emit_fetch_64bit(bld_base, stype, res, res2);
1645      }
1646   }
1647
1648   if (stype == TGSI_TYPE_SIGNED ||
1649       stype == TGSI_TYPE_UNSIGNED ||
1650       stype == TGSI_TYPE_DOUBLE ||
1651       stype == TGSI_TYPE_SIGNED64 ||
1652       stype == TGSI_TYPE_UNSIGNED64) {
1653      struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1654      res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1655   }
1656
1657   return res;
1658}
1659
1660static LLVMValueRef
1661emit_fetch_system_value(
1662   struct lp_build_tgsi_context * bld_base,
1663   const struct tgsi_full_src_register * reg,
1664   enum tgsi_opcode_type stype,
1665   unsigned swizzle_in)
1666{
1667   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1668   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1669   const struct tgsi_shader_info *info = bld->bld_base.info;
1670   LLVMBuilderRef builder = gallivm->builder;
1671   LLVMValueRef res;
1672   enum tgsi_opcode_type atype; // Actual type of the value
1673
1674   assert(!reg->Register.Indirect);
1675
1676   switch (info->system_value_semantic_name[reg->Register.Index]) {
1677   case TGSI_SEMANTIC_INSTANCEID:
1678      res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1679      atype = TGSI_TYPE_UNSIGNED;
1680      break;
1681
1682   case TGSI_SEMANTIC_VERTEXID:
1683      res = bld->system_values.vertex_id;
1684      atype = TGSI_TYPE_UNSIGNED;
1685      break;
1686
1687   case TGSI_SEMANTIC_VERTEXID_NOBASE:
1688      res = bld->system_values.vertex_id_nobase;
1689      atype = TGSI_TYPE_UNSIGNED;
1690      break;
1691
1692   case TGSI_SEMANTIC_BASEVERTEX:
1693      res = bld->system_values.basevertex;
1694      atype = TGSI_TYPE_UNSIGNED;
1695      break;
1696
1697   case TGSI_SEMANTIC_PRIMID:
1698      res = bld->system_values.prim_id;
1699      atype = TGSI_TYPE_UNSIGNED;
1700      break;
1701
1702   case TGSI_SEMANTIC_INVOCATIONID:
1703      res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1704      atype = TGSI_TYPE_UNSIGNED;
1705      break;
1706
1707   default:
1708      assert(!"unexpected semantic in emit_fetch_system_value");
1709      res = bld_base->base.zero;
1710      atype = TGSI_TYPE_FLOAT;
1711      break;
1712   }
1713
1714   if (atype != stype) {
1715      if (stype == TGSI_TYPE_FLOAT) {
1716         res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1717      } else if (stype == TGSI_TYPE_UNSIGNED) {
1718         res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1719      } else if (stype == TGSI_TYPE_SIGNED) {
1720         res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1721      }
1722   }
1723
1724   return res;
1725}
1726
1727/**
1728 * Register fetch with derivatives.
1729 */
1730static void
1731emit_fetch_deriv(
1732   struct lp_build_tgsi_soa_context *bld,
1733   LLVMValueRef src,
1734   LLVMValueRef *res,
1735   LLVMValueRef *ddx,
1736   LLVMValueRef *ddy)
1737{
1738   if (res)
1739      *res = src;
1740
1741   /* TODO: use interpolation coeffs for inputs */
1742
1743   if (ddx)
1744      *ddx = lp_build_ddx(&bld->bld_base.base, src);
1745
1746   if (ddy)
1747      *ddy = lp_build_ddy(&bld->bld_base.base, src);
1748}
1749
1750/**
1751 * store an array of vec-length 64-bit into two arrays of vec_length floats
1752 * i.e.
1753 * value is d0, d1, d2, d3 etc.
1754 * each 64-bit has high and low pieces x, y
1755 * so gets stored into the separate channels as:
1756 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1757 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1758 */
1759static void
1760emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1761                      LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1762                      LLVMValueRef value)
1763{
1764   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1765   struct gallivm_state *gallivm = bld_base->base.gallivm;
1766   LLVMBuilderRef builder = gallivm->builder;
1767   struct lp_build_context *float_bld = &bld_base->base;
1768   unsigned i;
1769   LLVMValueRef temp, temp2;
1770   LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1771   LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1772
1773   for (i = 0; i < bld_base->base.type.length; i++) {
1774      shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1775      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1776   }
1777
1778   temp = LLVMBuildShuffleVector(builder, value,
1779                                 LLVMGetUndef(LLVMTypeOf(value)),
1780                                 LLVMConstVector(shuffles,
1781                                                 bld_base->base.type.length),
1782                                 "");
1783   temp2 = LLVMBuildShuffleVector(builder, value,
1784                                  LLVMGetUndef(LLVMTypeOf(value)),
1785                                  LLVMConstVector(shuffles2,
1786                                                  bld_base->base.type.length),
1787                                  "");
1788
1789   lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1790   lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1791}
1792
1793/**
1794 * Register store.
1795 */
1796static void
1797emit_store_chan(
1798   struct lp_build_tgsi_context *bld_base,
1799   const struct tgsi_full_instruction *inst,
1800   unsigned index,
1801   unsigned chan_index,
1802   LLVMValueRef value)
1803{
1804   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1805   struct gallivm_state *gallivm = bld_base->base.gallivm;
1806   LLVMBuilderRef builder = gallivm->builder;
1807   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1808   struct lp_build_context *float_bld = &bld_base->base;
1809   struct lp_build_context *int_bld = &bld_base->int_bld;
1810   LLVMValueRef indirect_index = NULL;
1811   enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1812
1813   /*
1814    * Apply saturation.
1815    *
1816    * It is always assumed to be float.
1817    */
1818   if (inst->Instruction.Saturate) {
1819      assert(dtype == TGSI_TYPE_FLOAT ||
1820             dtype == TGSI_TYPE_UNTYPED);
1821      value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1822      value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1823   }
1824
1825   if (reg->Register.Indirect) {
1826      /*
1827       * Currently the mesa/st doesn't generate indirect stores
1828       * to 64-bit values, it normally uses MOV to do indirect stores.
1829       */
1830      assert(!tgsi_type_is_64bit(dtype));
1831      indirect_index = get_indirect_index(bld,
1832                                          reg->Register.File,
1833                                          reg->Register.Index,
1834                                          &reg->Indirect,
1835                                          bld->bld_base.info->file_max[reg->Register.File]);
1836   } else {
1837      assert(reg->Register.Index <=
1838                             bld_base->info->file_max[reg->Register.File]);
1839   }
1840
1841   if (DEBUG_EXECUTION) {
1842      emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1843   }
1844
1845   switch( reg->Register.File ) {
1846   case TGSI_FILE_OUTPUT:
1847      /* Outputs are always stored as floats */
1848      value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1849
1850      if (reg->Register.Indirect) {
1851         LLVMValueRef index_vec;  /* indexes into the output registers */
1852         LLVMValueRef outputs_array;
1853         LLVMTypeRef fptr_type;
1854
1855         index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1856                                           indirect_index,
1857                                           chan_index,
1858                                           TRUE);
1859
1860         fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1861         outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1862
1863         /* Scatter store values into output registers */
1864         emit_mask_scatter(bld, outputs_array, index_vec, value,
1865                           &bld->exec_mask);
1866      }
1867      else {
1868         LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1869                                                  chan_index);
1870
1871         if (tgsi_type_is_64bit(dtype)) {
1872            LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1873                                                      chan_index + 1);
1874            emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1875                                  value);
1876         } else
1877            lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1878      }
1879      break;
1880
1881   case TGSI_FILE_TEMPORARY:
1882      /* Temporaries are always stored as floats */
1883      if (!tgsi_type_is_64bit(dtype))
1884         value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1885      else
1886         value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1887
1888      if (reg->Register.Indirect) {
1889         LLVMValueRef index_vec;  /* indexes into the temp registers */
1890         LLVMValueRef temps_array;
1891         LLVMTypeRef fptr_type;
1892
1893         index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1894                                           indirect_index,
1895                                           chan_index,
1896                                           TRUE);
1897
1898         fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1899         temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1900
1901         /* Scatter store values into temp registers */
1902         emit_mask_scatter(bld, temps_array, index_vec, value,
1903                           &bld->exec_mask);
1904      }
1905      else {
1906         LLVMValueRef temp_ptr;
1907         temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1908
1909         if (tgsi_type_is_64bit(dtype)) {
1910            LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1911                                                         reg->Register.Index,
1912                                                         chan_index + 1);
1913            emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1914                                  value);
1915         }
1916         else
1917            lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1918      }
1919      break;
1920
1921   case TGSI_FILE_ADDRESS:
1922      assert(dtype == TGSI_TYPE_SIGNED);
1923      assert(LLVMTypeOf(value) == int_bld->vec_type);
1924      value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1925      lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1926                         bld->addr[reg->Register.Index][chan_index]);
1927      break;
1928
1929   default:
1930      assert( 0 );
1931   }
1932
1933   (void)dtype;
1934}
1935
1936/*
1937 * Called at the beginning of the translation of each TGSI instruction, to
1938 * emit some debug code.
1939 */
1940static void
1941emit_debug(
1942   struct lp_build_tgsi_context * bld_base,
1943   const struct tgsi_full_instruction * inst,
1944   const struct tgsi_opcode_info * info)
1945
1946{
1947   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1948
1949   if (DEBUG_EXECUTION) {
1950      /*
1951       * Dump the TGSI instruction.
1952       */
1953
1954      struct gallivm_state *gallivm = bld_base->base.gallivm;
1955      char buf[512];
1956      buf[0] = '$';
1957      buf[1] = ' ';
1958      tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1959      lp_build_printf(gallivm, buf);
1960
1961      /* Dump the execution mask.
1962       */
1963      if (bld->exec_mask.has_mask) {
1964         lp_build_print_value(gallivm, "    mask = ", bld->exec_mask.exec_mask);
1965      }
1966   }
1967}
1968
1969static void
1970emit_store(
1971   struct lp_build_tgsi_context * bld_base,
1972   const struct tgsi_full_instruction * inst,
1973   const struct tgsi_opcode_info * info,
1974   unsigned index,
1975   LLVMValueRef dst[4])
1976
1977{
1978   enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1979
1980   unsigned writemask = inst->Dst[index].Register.WriteMask;
1981   while (writemask) {
1982      unsigned chan_index = u_bit_scan(&writemask);
1983      if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1984          continue;
1985      emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1986   }
1987}
1988
1989static unsigned
1990tgsi_to_pipe_tex_target(unsigned tgsi_target)
1991{
1992   switch (tgsi_target) {
1993   case TGSI_TEXTURE_BUFFER:
1994      return PIPE_BUFFER;
1995   case TGSI_TEXTURE_1D:
1996   case TGSI_TEXTURE_SHADOW1D:
1997      return PIPE_TEXTURE_1D;
1998   case TGSI_TEXTURE_2D:
1999   case TGSI_TEXTURE_SHADOW2D:
2000   case TGSI_TEXTURE_2D_MSAA:
2001      return PIPE_TEXTURE_2D;
2002   case TGSI_TEXTURE_3D:
2003      return PIPE_TEXTURE_3D;
2004   case TGSI_TEXTURE_CUBE:
2005   case TGSI_TEXTURE_SHADOWCUBE:
2006      return PIPE_TEXTURE_CUBE;
2007   case TGSI_TEXTURE_RECT:
2008   case TGSI_TEXTURE_SHADOWRECT:
2009      return PIPE_TEXTURE_RECT;
2010   case TGSI_TEXTURE_1D_ARRAY:
2011   case TGSI_TEXTURE_SHADOW1D_ARRAY:
2012      return PIPE_TEXTURE_1D_ARRAY;
2013   case TGSI_TEXTURE_2D_ARRAY:
2014   case TGSI_TEXTURE_SHADOW2D_ARRAY:
2015   case TGSI_TEXTURE_2D_ARRAY_MSAA:
2016      return PIPE_TEXTURE_2D_ARRAY;
2017   case TGSI_TEXTURE_CUBE_ARRAY:
2018   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2019      return PIPE_TEXTURE_CUBE_ARRAY;
2020   default:
2021      assert(0);
2022      return PIPE_BUFFER;
2023   }
2024}
2025
2026
2027static enum lp_sampler_lod_property
2028lp_build_lod_property(
2029   struct lp_build_tgsi_context *bld_base,
2030   const struct tgsi_full_instruction *inst,
2031   unsigned src_op)
2032{
2033   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2034   enum lp_sampler_lod_property lod_property;
2035
2036   /*
2037    * Not much we can do here. We could try catching inputs declared
2038    * with constant interpolation but not sure it's worth it - since for
2039    * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2040    * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2041    * like the constant/immediate recognition below.
2042    * What seems to be of more value would be to recognize temps holding
2043    * broadcasted scalars but no way we can do it.
2044    * Tried asking llvm but without any success (using LLVMIsConstant
2045    * even though this isn't exactly what we'd need), even as simple as
2046    * IMM[0] UINT32 (0,-1,0,0)
2047    * MOV TEMP[0] IMM[0].yyyy
2048    * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2049    * doesn't work.
2050    * This means there's ZERO chance this will ever catch a scalar lod
2051    * with traditional tex opcodes as well as texel fetches, since the lod
2052    * comes from the same reg as coords (except some test shaders using
2053    * constant coords maybe).
2054    * There's at least hope for sample opcodes as well as size queries.
2055    */
2056   if (reg->Register.File == TGSI_FILE_CONSTANT ||
2057       reg->Register.File == TGSI_FILE_IMMEDIATE) {
2058      lod_property = LP_SAMPLER_LOD_SCALAR;
2059   }
2060   else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2061      if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2062         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2063      }
2064      else {
2065         lod_property = LP_SAMPLER_LOD_PER_QUAD;
2066      }
2067   }
2068   else {
2069      /* never use scalar (per-quad) lod the results are just too wrong. */
2070      lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2071   }
2072   return lod_property;
2073}
2074
2075
2076/**
2077 * High-level instruction translators.
2078 */
2079
2080static void
2081emit_tex( struct lp_build_tgsi_soa_context *bld,
2082          const struct tgsi_full_instruction *inst,
2083          enum lp_build_tex_modifier modifier,
2084          LLVMValueRef *texel,
2085          unsigned sampler_reg,
2086          enum lp_sampler_op_type sampler_op)
2087{
2088   unsigned unit = inst->Src[sampler_reg].Register.Index;
2089   LLVMValueRef oow = NULL;
2090   LLVMValueRef lod = NULL;
2091   LLVMValueRef coords[5];
2092   LLVMValueRef offsets[3] = { NULL };
2093   struct lp_derivatives derivs;
2094   struct lp_sampler_params params;
2095   enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2096   unsigned num_derivs, num_offsets, i;
2097   unsigned shadow_coord = 0;
2098   unsigned layer_coord = 0;
2099   unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2100
2101   memset(&params, 0, sizeof(params));
2102
2103   if (!bld->sampler) {
2104      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2105      for (i = 0; i < 4; i++) {
2106         texel[i] = bld->bld_base.base.undef;
2107      }
2108      return;
2109   }
2110
2111   switch (inst->Texture.Texture) {
2112   case TGSI_TEXTURE_1D_ARRAY:
2113      layer_coord = 1;
2114      /* fallthrough */
2115   case TGSI_TEXTURE_1D:
2116      num_offsets = 1;
2117      num_derivs = 1;
2118      break;
2119   case TGSI_TEXTURE_2D_ARRAY:
2120      layer_coord = 2;
2121      /* fallthrough */
2122   case TGSI_TEXTURE_2D:
2123   case TGSI_TEXTURE_RECT:
2124      num_offsets = 2;
2125      num_derivs = 2;
2126      break;
2127   case TGSI_TEXTURE_SHADOW1D_ARRAY:
2128      layer_coord = 1;
2129      /* fallthrough */
2130   case TGSI_TEXTURE_SHADOW1D:
2131      shadow_coord = 2;
2132      num_offsets = 1;
2133      num_derivs = 1;
2134      break;
2135   case TGSI_TEXTURE_SHADOW2D_ARRAY:
2136      layer_coord = 2;
2137      shadow_coord = 3;
2138      num_offsets = 2;
2139      num_derivs = 2;
2140      break;
2141   case TGSI_TEXTURE_SHADOW2D:
2142   case TGSI_TEXTURE_SHADOWRECT:
2143      shadow_coord = 2;
2144      num_offsets = 2;
2145      num_derivs = 2;
2146      break;
2147   case TGSI_TEXTURE_CUBE:
2148      num_offsets = 2;
2149      num_derivs = 3;
2150      break;
2151   case TGSI_TEXTURE_3D:
2152      num_offsets = 3;
2153      num_derivs = 3;
2154      break;
2155   case TGSI_TEXTURE_SHADOWCUBE:
2156      shadow_coord = 3;
2157      num_offsets = 2;
2158      num_derivs = 3;
2159      break;
2160   case TGSI_TEXTURE_CUBE_ARRAY:
2161      num_offsets = 2;
2162      num_derivs = 3;
2163      layer_coord = 3;
2164      break;
2165   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2166      num_offsets = 2;
2167      num_derivs = 3;
2168      layer_coord = 3;
2169      shadow_coord = 4; /* shadow coord special different reg */
2170      break;
2171   case TGSI_TEXTURE_2D_MSAA:
2172   case TGSI_TEXTURE_2D_ARRAY_MSAA:
2173   default:
2174      assert(0);
2175      return;
2176   }
2177
2178   /* Note lod and especially projected are illegal in a LOT of cases */
2179   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2180       modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2181      if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2182          inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2183         /* note that shadow cube array with bias/explicit lod does not exist */
2184         lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2185      }
2186      else {
2187         lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2188      }
2189      if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2190         sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2191      }
2192      else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2193         sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2194      }
2195      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2196   }
2197
2198   if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2199      oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2200      oow = lp_build_rcp(&bld->bld_base.base, oow);
2201   }
2202
2203   for (i = 0; i < num_derivs; i++) {
2204      coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2205      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2206         coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2207   }
2208   for (i = num_derivs; i < 5; i++) {
2209      coords[i] = bld->bld_base.base.undef;
2210   }
2211
2212   /* Layer coord always goes into 3rd slot, except for cube map arrays */
2213   if (layer_coord) {
2214      if (layer_coord == 3) {
2215         coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2216      }
2217      else {
2218         coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2219      }
2220      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2221         coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2222   }
2223   /* Shadow coord occupies always 5th slot. */
2224   if (shadow_coord) {
2225      sample_key |= LP_SAMPLER_SHADOW;
2226      if (shadow_coord == 4) {
2227         coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2228      }
2229      else {
2230         coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2231      }
2232      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2233         coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2234   }
2235
2236   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2237      unsigned dim;
2238      sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2239      for (dim = 0; dim < num_derivs; ++dim) {
2240         derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2241         derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2242      }
2243      params.derivs = &derivs;
2244      /*
2245       * could also check all src regs if constant but I doubt such
2246       * cases exist in practice.
2247       */
2248      if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2249         if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2250            lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2251         }
2252         else {
2253            lod_property = LP_SAMPLER_LOD_PER_QUAD;
2254         }
2255      }
2256      else {
2257         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2258      }
2259   }
2260   sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2261
2262   /* we don't handle the 4 offset version of tg4 */
2263   if (inst->Texture.NumOffsets == 1) {
2264      unsigned dim;
2265      sample_key |= LP_SAMPLER_OFFSETS;
2266      for (dim = 0; dim < num_offsets; dim++) {
2267         offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2268      }
2269   }
2270
2271   params.type = bld->bld_base.base.type;
2272   params.sample_key = sample_key;
2273   params.texture_index = unit;
2274   params.sampler_index = unit;
2275   params.context_ptr = bld->context_ptr;
2276   params.thread_data_ptr = bld->thread_data_ptr;
2277   params.coords = coords;
2278   params.offsets = offsets;
2279   params.lod = lod;
2280   params.texel = texel;
2281
2282   bld->sampler->emit_tex_sample(bld->sampler,
2283                                 bld->bld_base.base.gallivm,
2284                                 &params);
2285}
2286
2287static void
2288emit_sample(struct lp_build_tgsi_soa_context *bld,
2289            const struct tgsi_full_instruction *inst,
2290            enum lp_build_tex_modifier modifier,
2291            boolean compare,
2292            enum lp_sampler_op_type sample_type,
2293            LLVMValueRef *texel)
2294{
2295   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2296   unsigned texture_unit, sampler_unit;
2297   LLVMValueRef lod = NULL;
2298   LLVMValueRef coords[5];
2299   LLVMValueRef offsets[3] = { NULL };
2300   struct lp_derivatives derivs;
2301   struct lp_sampler_params params;
2302   enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2303
2304   unsigned num_offsets, num_derivs, i;
2305   unsigned layer_coord = 0;
2306   unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2307
2308   memset(&params, 0, sizeof(params));
2309
2310   if (!bld->sampler) {
2311      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2312      for (i = 0; i < 4; i++) {
2313         texel[i] = bld->bld_base.base.undef;
2314      }
2315      return;
2316   }
2317
2318   /*
2319    * unlike old-style tex opcodes the texture/sampler indices
2320    * always come from src1 and src2 respectively.
2321    */
2322   texture_unit = inst->Src[1].Register.Index;
2323   sampler_unit = inst->Src[2].Register.Index;
2324
2325   /*
2326    * Note inst->Texture.Texture will contain the number of offsets,
2327    * however the target information is NOT there and comes from the
2328    * declared sampler views instead.
2329    */
2330   switch (bld->sv[texture_unit].Resource) {
2331   case TGSI_TEXTURE_1D:
2332      num_offsets = 1;
2333      num_derivs = 1;
2334      break;
2335   case TGSI_TEXTURE_1D_ARRAY:
2336      layer_coord = 1;
2337      num_offsets = 1;
2338      num_derivs = 1;
2339      break;
2340   case TGSI_TEXTURE_2D:
2341   case TGSI_TEXTURE_RECT:
2342      num_offsets = 2;
2343      num_derivs = 2;
2344      break;
2345   case TGSI_TEXTURE_2D_ARRAY:
2346      layer_coord = 2;
2347      num_offsets = 2;
2348      num_derivs = 2;
2349      break;
2350   case TGSI_TEXTURE_CUBE:
2351      num_offsets = 2;
2352      num_derivs = 3;
2353      break;
2354   case TGSI_TEXTURE_3D:
2355      num_offsets = 3;
2356      num_derivs = 3;
2357      break;
2358   case TGSI_TEXTURE_CUBE_ARRAY:
2359      layer_coord = 3;
2360      num_offsets = 2;
2361      num_derivs = 3;
2362      break;
2363   default:
2364      assert(0);
2365      return;
2366   }
2367
2368   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2369       modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2370      lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2371      if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2372         sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2373      }
2374      else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2375         sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2376      }
2377      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2378   }
2379   else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2380      /* XXX might be better to explicitly pass the level zero information */
2381      sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2382      lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2383   }
2384
2385   for (i = 0; i < num_derivs; i++) {
2386      coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2387   }
2388   for (i = num_derivs; i < 5; i++) {
2389      coords[i] = bld->bld_base.base.undef;
2390   }
2391
2392   /* Layer coord always goes into 3rd slot, except for cube map arrays */
2393   if (layer_coord) {
2394      if (layer_coord == 3)
2395         coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2396      else
2397         coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2398   }
2399   /* Shadow coord occupies always 5th slot. */
2400   if (compare) {
2401      sample_key |= LP_SAMPLER_SHADOW;
2402      coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2403   }
2404
2405   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2406      unsigned dim;
2407      sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2408      for (dim = 0; dim < num_derivs; ++dim) {
2409         derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2410         derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2411      }
2412      params.derivs = &derivs;
2413      /*
2414       * could also check all src regs if constant but I doubt such
2415       * cases exist in practice.
2416       */
2417      if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2418         if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2419            lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2420         }
2421         else {
2422            lod_property = LP_SAMPLER_LOD_PER_QUAD;
2423         }
2424      }
2425      else {
2426         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2427      }
2428   }
2429
2430   /* some advanced gather instructions (txgo) would require 4 offsets */
2431   if (inst->Texture.NumOffsets == 1) {
2432      unsigned dim;
2433      sample_key |= LP_SAMPLER_OFFSETS;
2434      for (dim = 0; dim < num_offsets; dim++) {
2435         offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2436      }
2437   }
2438   sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2439
2440   params.type = bld->bld_base.base.type;
2441   params.sample_key = sample_key;
2442   params.texture_index = texture_unit;
2443   params.sampler_index = sampler_unit;
2444   params.context_ptr = bld->context_ptr;
2445   params.thread_data_ptr = bld->thread_data_ptr;
2446   params.coords = coords;
2447   params.offsets = offsets;
2448   params.lod = lod;
2449   params.texel = texel;
2450
2451   bld->sampler->emit_tex_sample(bld->sampler,
2452                                 bld->bld_base.base.gallivm,
2453                                 &params);
2454
2455   if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2456       inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2457       inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2458       inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2459      unsigned char swizzles[4];
2460      swizzles[0] = inst->Src[1].Register.SwizzleX;
2461      swizzles[1] = inst->Src[1].Register.SwizzleY;
2462      swizzles[2] = inst->Src[1].Register.SwizzleZ;
2463      swizzles[3] = inst->Src[1].Register.SwizzleW;
2464
2465      lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2466   }
2467}
2468
2469static void
2470emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2471                   const struct tgsi_full_instruction *inst,
2472                   LLVMValueRef *texel,
2473                   boolean is_samplei)
2474{
2475   unsigned unit, target;
2476   LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2477   LLVMValueRef explicit_lod = NULL;
2478   LLVMValueRef coords[5];
2479   LLVMValueRef offsets[3] = { NULL };
2480   struct lp_sampler_params params;
2481   enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2482   unsigned dims, i;
2483   unsigned layer_coord = 0;
2484   unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2485
2486   memset(&params, 0, sizeof(params));
2487
2488   if (!bld->sampler) {
2489      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2490      for (i = 0; i < 4; i++) {
2491         texel[i] = coord_undef;
2492      }
2493      return;
2494   }
2495
2496   unit = inst->Src[1].Register.Index;
2497
2498   if (is_samplei) {
2499      target = bld->sv[unit].Resource;
2500   }
2501   else {
2502      target = inst->Texture.Texture;
2503   }
2504
2505   switch (target) {
2506   case TGSI_TEXTURE_1D:
2507   case TGSI_TEXTURE_BUFFER:
2508      dims = 1;
2509      break;
2510   case TGSI_TEXTURE_1D_ARRAY:
2511      layer_coord = 1;
2512      dims = 1;
2513      break;
2514   case TGSI_TEXTURE_2D:
2515   case TGSI_TEXTURE_RECT:
2516   case TGSI_TEXTURE_2D_MSAA:
2517      dims = 2;
2518      break;
2519   case TGSI_TEXTURE_2D_ARRAY:
2520   case TGSI_TEXTURE_2D_ARRAY_MSAA:
2521      layer_coord = 2;
2522      dims = 2;
2523      break;
2524   case TGSI_TEXTURE_3D:
2525      dims = 3;
2526      break;
2527   default:
2528      assert(0);
2529      return;
2530   }
2531
2532   /* always have lod except for buffers and msaa targets ? */
2533   if (target != TGSI_TEXTURE_BUFFER &&
2534       target != TGSI_TEXTURE_2D_MSAA &&
2535       target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2536      sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2537      explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2538      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2539   }
2540   /*
2541    * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2542    * would be the sample index.
2543    */
2544
2545   for (i = 0; i < dims; i++) {
2546      coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2547   }
2548   /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2549   for (i = dims; i < 5; i++) {
2550      coords[i] = coord_undef;
2551   }
2552   if (layer_coord)
2553      coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2554
2555   if (inst->Texture.NumOffsets == 1) {
2556      unsigned dim;
2557      sample_key |= LP_SAMPLER_OFFSETS;
2558      for (dim = 0; dim < dims; dim++) {
2559         offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2560      }
2561   }
2562   sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2563
2564   params.type = bld->bld_base.base.type;
2565   params.sample_key = sample_key;
2566   params.texture_index = unit;
2567   /*
2568    * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2569    * and trigger some assertions with d3d10 where the sampler view number
2570    * can exceed this.
2571    */
2572   params.sampler_index = 0;
2573   params.context_ptr = bld->context_ptr;
2574   params.thread_data_ptr = bld->thread_data_ptr;
2575   params.coords = coords;
2576   params.offsets = offsets;
2577   params.derivs = NULL;
2578   params.lod = explicit_lod;
2579   params.texel = texel;
2580
2581   bld->sampler->emit_tex_sample(bld->sampler,
2582                                 bld->bld_base.base.gallivm,
2583                                 &params);
2584
2585   if (is_samplei &&
2586       (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2587        inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2588        inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2589        inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2590      unsigned char swizzles[4];
2591      swizzles[0] = inst->Src[1].Register.SwizzleX;
2592      swizzles[1] = inst->Src[1].Register.SwizzleY;
2593      swizzles[2] = inst->Src[1].Register.SwizzleZ;
2594      swizzles[3] = inst->Src[1].Register.SwizzleW;
2595
2596      lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2597   }
2598}
2599
2600static void
2601emit_size_query( struct lp_build_tgsi_soa_context *bld,
2602                 const struct tgsi_full_instruction *inst,
2603                 LLVMValueRef *sizes_out,
2604                 boolean is_sviewinfo)
2605{
2606   LLVMValueRef explicit_lod;
2607   enum lp_sampler_lod_property lod_property;
2608   unsigned has_lod;
2609   unsigned i;
2610   unsigned unit = inst->Src[1].Register.Index;
2611   unsigned target, pipe_target;
2612   struct lp_sampler_size_query_params params;
2613
2614   if (is_sviewinfo) {
2615      target = bld->sv[unit].Resource;
2616   }
2617   else {
2618      target = inst->Texture.Texture;
2619   }
2620   switch (target) {
2621   case TGSI_TEXTURE_BUFFER:
2622   case TGSI_TEXTURE_RECT:
2623   case TGSI_TEXTURE_SHADOWRECT:
2624      has_lod = 0;
2625      break;
2626   default:
2627      has_lod = 1;
2628      break;
2629   }
2630
2631   if (!bld->sampler) {
2632      _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2633      for (i = 0; i < 4; i++)
2634         sizes_out[i] = bld->bld_base.int_bld.undef;
2635      return;
2636   }
2637
2638   if (has_lod) {
2639      explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2640      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2641   }
2642   else {
2643      explicit_lod = NULL;
2644      lod_property = LP_SAMPLER_LOD_SCALAR;
2645   }
2646
2647
2648   pipe_target = tgsi_to_pipe_tex_target(target);
2649
2650   params.int_type = bld->bld_base.int_bld.type;
2651   params.texture_unit = unit;
2652   params.target = pipe_target;
2653   params.context_ptr = bld->context_ptr;
2654   params.is_sviewinfo = TRUE;
2655   params.lod_property = lod_property;
2656   params.explicit_lod = explicit_lod;
2657   params.sizes_out = sizes_out;
2658
2659   bld->sampler->emit_size_query(bld->sampler,
2660                                 bld->bld_base.base.gallivm,
2661                                 &params);
2662}
2663
2664static boolean
2665near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2666                   int pc)
2667{
2668   unsigned i;
2669
2670   for (i = 0; i < 5; i++) {
2671      enum tgsi_opcode opcode;
2672
2673      if (pc + i >= bld->bld_base.info->num_instructions)
2674         return TRUE;
2675
2676      opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2677
2678      if (opcode == TGSI_OPCODE_END)
2679         return TRUE;
2680
2681      if (opcode == TGSI_OPCODE_TEX ||
2682         opcode == TGSI_OPCODE_TXP ||
2683         opcode == TGSI_OPCODE_TXD ||
2684         opcode == TGSI_OPCODE_TXB ||
2685         opcode == TGSI_OPCODE_TXL ||
2686         opcode == TGSI_OPCODE_TXF ||
2687         opcode == TGSI_OPCODE_TXQ ||
2688         opcode == TGSI_OPCODE_TEX2 ||
2689         opcode == TGSI_OPCODE_TXB2 ||
2690         opcode == TGSI_OPCODE_TXL2 ||
2691         opcode == TGSI_OPCODE_SAMPLE ||
2692         opcode == TGSI_OPCODE_SAMPLE_B ||
2693         opcode == TGSI_OPCODE_SAMPLE_C ||
2694         opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2695         opcode == TGSI_OPCODE_SAMPLE_D ||
2696         opcode == TGSI_OPCODE_SAMPLE_I ||
2697         opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2698         opcode == TGSI_OPCODE_SAMPLE_L ||
2699         opcode == TGSI_OPCODE_SVIEWINFO ||
2700         opcode == TGSI_OPCODE_CAL ||
2701         opcode == TGSI_OPCODE_IF ||
2702         opcode == TGSI_OPCODE_UIF ||
2703         opcode == TGSI_OPCODE_BGNLOOP ||
2704         opcode == TGSI_OPCODE_SWITCH)
2705         return FALSE;
2706   }
2707
2708   return TRUE;
2709}
2710
2711
2712
2713/**
2714 * Kill fragment if any of the src register values are negative.
2715 */
2716static void
2717emit_kill_if(
2718   struct lp_build_tgsi_soa_context *bld,
2719   const struct tgsi_full_instruction *inst,
2720   int pc)
2721{
2722   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2723   const struct tgsi_full_src_register *reg = &inst->Src[0];
2724   LLVMValueRef terms[TGSI_NUM_CHANNELS];
2725   LLVMValueRef mask;
2726   unsigned chan_index;
2727
2728   memset(&terms, 0, sizeof terms);
2729
2730   TGSI_FOR_EACH_CHANNEL( chan_index ) {
2731      unsigned swizzle;
2732
2733      /* Unswizzle channel */
2734      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2735
2736      /* Check if the component has not been already tested. */
2737      assert(swizzle < TGSI_NUM_CHANNELS);
2738      if( !terms[swizzle] )
2739         /* TODO: change the comparison operator instead of setting the sign */
2740         terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2741   }
2742
2743   mask = NULL;
2744   TGSI_FOR_EACH_CHANNEL( chan_index ) {
2745      if(terms[chan_index]) {
2746         LLVMValueRef chan_mask;
2747
2748         /*
2749          * If term < 0 then mask = 0 else mask = ~0.
2750          */
2751         chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2752
2753         if(mask)
2754            mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2755         else
2756            mask = chan_mask;
2757      }
2758   }
2759
2760   if (bld->exec_mask.has_mask) {
2761      LLVMValueRef invmask;
2762      invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2763      mask = LLVMBuildOr(builder, mask, invmask, "");
2764   }
2765
2766   lp_build_mask_update(bld->mask, mask);
2767   if (!near_end_of_shader(bld, pc))
2768      lp_build_mask_check(bld->mask);
2769}
2770
2771
2772/**
2773 * Unconditional fragment kill.
2774 * The only predication is the execution mask which will apply if
2775 * we're inside a loop or conditional.
2776 */
2777static void
2778emit_kill(struct lp_build_tgsi_soa_context *bld,
2779          int pc)
2780{
2781   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2782   LLVMValueRef mask;
2783
2784   /* For those channels which are "alive", disable fragment shader
2785    * execution.
2786    */
2787   if (bld->exec_mask.has_mask) {
2788      mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2789   }
2790   else {
2791      LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2792      mask = zero;
2793   }
2794
2795   lp_build_mask_update(bld->mask, mask);
2796
2797   if (!near_end_of_shader(bld, pc))
2798      lp_build_mask_check(bld->mask);
2799}
2800
2801
2802/**
2803 * Emit code which will dump the value of all the temporary registers
2804 * to stdout.
2805 */
2806static void
2807emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2808               unsigned file)
2809{
2810   const struct tgsi_shader_info *info = bld->bld_base.info;
2811   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2812   LLVMBuilderRef builder = gallivm->builder;
2813   LLVMValueRef reg_ptr;
2814   int index;
2815   int max_index = info->file_max[file];
2816
2817   /*
2818    * Some register files, particularly constants, can be very large,
2819    * and dumping everything could make this unusably slow.
2820    */
2821   max_index = MIN2(max_index, 32);
2822
2823   for (index = 0; index <= max_index; index++) {
2824      LLVMValueRef res;
2825      unsigned mask;
2826      int chan;
2827
2828      if (index < 8 * sizeof(unsigned) &&
2829          (info->file_mask[file] & (1u << index)) == 0)  {
2830         /* This was not declared.*/
2831         continue;
2832      }
2833
2834      if (file == TGSI_FILE_INPUT) {
2835         mask = info->input_usage_mask[index];
2836      } else {
2837         mask = TGSI_WRITEMASK_XYZW;
2838      }
2839
2840      for (chan = 0; chan < 4; chan++) {
2841         if ((mask & (1 << chan)) == 0) {
2842            /* This channel is not used.*/
2843            continue;
2844         }
2845
2846         if (file == TGSI_FILE_CONSTANT) {
2847            struct tgsi_full_src_register reg;
2848            memset(&reg, 0, sizeof reg);
2849            reg.Register.File = file;
2850            reg.Register.Index = index;
2851            reg.Register.SwizzleX = 0;
2852            reg.Register.SwizzleY = 1;
2853            reg.Register.SwizzleZ = 2;
2854            reg.Register.SwizzleW = 3;
2855
2856            res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2857            if (!res) {
2858               continue;
2859            }
2860         } else if (file == TGSI_FILE_INPUT) {
2861            res = bld->inputs[index][chan];
2862            if (!res) {
2863               continue;
2864            }
2865         } else if (file == TGSI_FILE_TEMPORARY) {
2866            reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2867            assert(reg_ptr);
2868            res = LLVMBuildLoad(builder, reg_ptr, "");
2869         } else if (file == TGSI_FILE_OUTPUT) {
2870            reg_ptr = lp_get_output_ptr(bld, index, chan);
2871            assert(reg_ptr);
2872            res = LLVMBuildLoad(builder, reg_ptr, "");
2873         } else {
2874            assert(0);
2875            continue;
2876         }
2877
2878         emit_dump_reg(gallivm, file, index, chan, res);
2879      }
2880   }
2881}
2882
2883
2884
2885void
2886lp_emit_declaration_soa(
2887   struct lp_build_tgsi_context *bld_base,
2888   const struct tgsi_full_declaration *decl)
2889{
2890   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2891   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2892   LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2893   const unsigned first = decl->Range.First;
2894   const unsigned last = decl->Range.Last;
2895   unsigned idx, i;
2896
2897   assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2898
2899   switch (decl->Declaration.File) {
2900   case TGSI_FILE_TEMPORARY:
2901      if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2902         assert(last < LP_MAX_INLINED_TEMPS);
2903         for (idx = first; idx <= last; ++idx) {
2904            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2905               bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2906         }
2907      }
2908      break;
2909
2910   case TGSI_FILE_OUTPUT:
2911      if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2912         for (idx = first; idx <= last; ++idx) {
2913            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2914               bld->outputs[idx][i] = lp_build_alloca(gallivm,
2915                                                      vec_type, "output");
2916         }
2917      }
2918      break;
2919
2920   case TGSI_FILE_ADDRESS:
2921      /* ADDR registers are only allocated with an integer LLVM IR type,
2922       * as they are guaranteed to always have integers.
2923       * XXX: Not sure if this exception is worthwhile (or the whole idea of
2924       * an ADDR register for that matter).
2925       */
2926      assert(last < LP_MAX_TGSI_ADDRS);
2927      for (idx = first; idx <= last; ++idx) {
2928         assert(idx < LP_MAX_TGSI_ADDRS);
2929         for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2930            bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2931      }
2932      break;
2933
2934   case TGSI_FILE_SAMPLER_VIEW:
2935      /*
2936       * The target stored here MUST match whatever there actually
2937       * is in the set sampler views (what about return type?).
2938       */
2939      assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2940      for (idx = first; idx <= last; ++idx) {
2941         bld->sv[idx] = decl->SamplerView;
2942      }
2943      break;
2944
2945   case TGSI_FILE_CONSTANT:
2946   {
2947      /*
2948       * We could trivially fetch the per-buffer pointer when fetching the
2949       * constant, relying on llvm to figure out it's always the same pointer
2950       * anyway. However, doing so results in a huge (more than factor of 10)
2951       * slowdown in llvm compilation times for some (but not all) shaders
2952       * (more specifically, the IR optimization spends way more time in
2953       * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2954       */
2955      unsigned idx2D = decl->Dim.Index2D;
2956      LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2957      assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2958      bld->consts[idx2D] =
2959         lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2960      bld->consts_sizes[idx2D] =
2961         lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2962   }
2963      break;
2964
2965   default:
2966      /* don't need to declare other vars */
2967      break;
2968   }
2969}
2970
2971
2972void lp_emit_immediate_soa(
2973   struct lp_build_tgsi_context *bld_base,
2974   const struct tgsi_full_immediate *imm)
2975{
2976   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2977   struct gallivm_state * gallivm = bld_base->base.gallivm;
2978   LLVMValueRef imms[4];
2979   unsigned i;
2980   const uint size = imm->Immediate.NrTokens - 1;
2981   assert(size <= 4);
2982   switch (imm->Immediate.DataType) {
2983   case TGSI_IMM_FLOAT32:
2984      for( i = 0; i < size; ++i )
2985         imms[i] =
2986               lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2987
2988      break;
2989   case TGSI_IMM_FLOAT64:
2990   case TGSI_IMM_UINT64:
2991   case TGSI_IMM_INT64:
2992   case TGSI_IMM_UINT32:
2993      for( i = 0; i < size; ++i ) {
2994         LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2995         imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2996      }
2997
2998      break;
2999   case TGSI_IMM_INT32:
3000      for( i = 0; i < size; ++i ) {
3001         LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3002         imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3003      }
3004
3005      break;
3006   }
3007   for( i = size; i < 4; ++i )
3008      imms[i] = bld_base->base.undef;
3009
3010   if (bld->use_immediates_array) {
3011      unsigned index = bld->num_immediates;
3012      struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3013      LLVMBuilderRef builder = gallivm->builder;
3014      LLVMValueRef gep[2];
3015      gep[0] = lp_build_const_int32(gallivm, 0);
3016
3017      assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3018      for (i = 0; i < 4; ++i ) {
3019         gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3020         LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3021                                             bld->imms_array, gep, 2, "");
3022         LLVMBuildStore(builder, imms[i], imm_ptr);
3023      }
3024   } else {
3025      /* simply copy the immediate values into the next immediates[] slot */
3026      unsigned i;
3027      assert(imm->Immediate.NrTokens - 1 <= 4);
3028      assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3029
3030      for(i = 0; i < 4; ++i )
3031         bld->immediates[bld->num_immediates][i] = imms[i];
3032
3033      if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3034         unsigned index = bld->num_immediates;
3035         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3036         LLVMBuilderRef builder = gallivm->builder;
3037         LLVMValueRef gep[2];
3038         gep[0] = lp_build_const_int32(gallivm, 0);
3039         for (i = 0; i < 4; ++i ) {
3040            gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3041            LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3042                                                bld->imms_array, gep, 2, "");
3043            LLVMBuildStore(builder,
3044                           bld->immediates[index][i],
3045                           imm_ptr);
3046         }
3047      }
3048   }
3049
3050   bld->num_immediates++;
3051}
3052
3053static void
3054ddx_emit(
3055   const struct lp_build_tgsi_action * action,
3056   struct lp_build_tgsi_context * bld_base,
3057   struct lp_build_emit_data * emit_data)
3058{
3059   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3060
3061   emit_fetch_deriv(bld, emit_data->args[0], NULL,
3062                    &emit_data->output[emit_data->chan], NULL);
3063}
3064
3065static void
3066ddy_emit(
3067   const struct lp_build_tgsi_action * action,
3068   struct lp_build_tgsi_context * bld_base,
3069   struct lp_build_emit_data * emit_data)
3070{
3071   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3072
3073   emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3074                    &emit_data->output[emit_data->chan]);
3075}
3076
3077static void
3078kill_emit(
3079   const struct lp_build_tgsi_action * action,
3080   struct lp_build_tgsi_context * bld_base,
3081   struct lp_build_emit_data * emit_data)
3082{
3083   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3084
3085   emit_kill(bld, bld_base->pc - 1);
3086}
3087
3088static void
3089kill_if_emit(
3090   const struct lp_build_tgsi_action * action,
3091   struct lp_build_tgsi_context * bld_base,
3092   struct lp_build_emit_data * emit_data)
3093{
3094   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3095
3096   emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3097}
3098
3099static void
3100tex_emit(
3101   const struct lp_build_tgsi_action * action,
3102   struct lp_build_tgsi_context * bld_base,
3103   struct lp_build_emit_data * emit_data)
3104{
3105   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3106
3107   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3108            emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3109}
3110
3111static void
3112tex2_emit(
3113   const struct lp_build_tgsi_action * action,
3114   struct lp_build_tgsi_context * bld_base,
3115   struct lp_build_emit_data * emit_data)
3116{
3117   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3118
3119   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3120            emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3121}
3122
3123static void
3124txb_emit(
3125   const struct lp_build_tgsi_action * action,
3126   struct lp_build_tgsi_context * bld_base,
3127   struct lp_build_emit_data * emit_data)
3128{
3129   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3130
3131   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3132            emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3133}
3134
3135static void
3136txb2_emit(
3137   const struct lp_build_tgsi_action * action,
3138   struct lp_build_tgsi_context * bld_base,
3139   struct lp_build_emit_data * emit_data)
3140{
3141   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3142
3143   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3144            emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3145}
3146
3147static void
3148txd_emit(
3149   const struct lp_build_tgsi_action * action,
3150   struct lp_build_tgsi_context * bld_base,
3151   struct lp_build_emit_data * emit_data)
3152{
3153   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3154
3155   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3156            emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3157}
3158
3159static void
3160txl_emit(
3161   const struct lp_build_tgsi_action * action,
3162   struct lp_build_tgsi_context * bld_base,
3163   struct lp_build_emit_data * emit_data)
3164{
3165   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3166
3167   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3168            emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3169}
3170
3171static void
3172txl2_emit(
3173   const struct lp_build_tgsi_action * action,
3174   struct lp_build_tgsi_context * bld_base,
3175   struct lp_build_emit_data * emit_data)
3176{
3177   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3178
3179   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3180            emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3181}
3182
3183static void
3184txp_emit(
3185   const struct lp_build_tgsi_action * action,
3186   struct lp_build_tgsi_context * bld_base,
3187   struct lp_build_emit_data * emit_data)
3188{
3189   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3190
3191   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3192            emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3193}
3194
3195static void
3196tg4_emit(
3197   const struct lp_build_tgsi_action * action,
3198   struct lp_build_tgsi_context * bld_base,
3199   struct lp_build_emit_data * emit_data)
3200{
3201   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3202
3203   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3204            emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3205}
3206
3207static void
3208lodq_emit(
3209   const struct lp_build_tgsi_action * action,
3210   struct lp_build_tgsi_context * bld_base,
3211   struct lp_build_emit_data * emit_data)
3212{
3213   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3214
3215   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3216            emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3217}
3218
3219static void
3220txq_emit(
3221   const struct lp_build_tgsi_action * action,
3222   struct lp_build_tgsi_context * bld_base,
3223   struct lp_build_emit_data * emit_data)
3224{
3225   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3226
3227   emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3228}
3229
3230static void
3231txf_emit(
3232   const struct lp_build_tgsi_action * action,
3233   struct lp_build_tgsi_context * bld_base,
3234   struct lp_build_emit_data * emit_data)
3235{
3236   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3237
3238   emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3239}
3240
3241static void
3242sample_i_emit(
3243   const struct lp_build_tgsi_action * action,
3244   struct lp_build_tgsi_context * bld_base,
3245   struct lp_build_emit_data * emit_data)
3246{
3247   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3248
3249   emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3250}
3251
3252static void
3253sample_emit(
3254   const struct lp_build_tgsi_action * action,
3255   struct lp_build_tgsi_context * bld_base,
3256   struct lp_build_emit_data * emit_data)
3257{
3258   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3259
3260   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3261               FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3262}
3263
3264static void
3265sample_b_emit(
3266   const struct lp_build_tgsi_action * action,
3267   struct lp_build_tgsi_context * bld_base,
3268   struct lp_build_emit_data * emit_data)
3269{
3270   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3271
3272   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3273               FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3274}
3275
3276static void
3277sample_c_emit(
3278   const struct lp_build_tgsi_action * action,
3279   struct lp_build_tgsi_context * bld_base,
3280   struct lp_build_emit_data * emit_data)
3281{
3282   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3283
3284   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3285               TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3286}
3287
3288static void
3289sample_c_lz_emit(
3290   const struct lp_build_tgsi_action * action,
3291   struct lp_build_tgsi_context * bld_base,
3292   struct lp_build_emit_data * emit_data)
3293{
3294   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3295
3296   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3297               TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3298}
3299
3300static void
3301sample_d_emit(
3302   const struct lp_build_tgsi_action * action,
3303   struct lp_build_tgsi_context * bld_base,
3304   struct lp_build_emit_data * emit_data)
3305{
3306   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3307
3308   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3309               FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3310}
3311
3312static void
3313sample_l_emit(
3314   const struct lp_build_tgsi_action * action,
3315   struct lp_build_tgsi_context * bld_base,
3316   struct lp_build_emit_data * emit_data)
3317{
3318   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3319
3320   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3321               FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3322}
3323
3324static void
3325gather4_emit(
3326   const struct lp_build_tgsi_action * action,
3327   struct lp_build_tgsi_context * bld_base,
3328   struct lp_build_emit_data * emit_data)
3329{
3330   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3331
3332   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3333               FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3334}
3335
3336static void
3337sviewinfo_emit(
3338   const struct lp_build_tgsi_action * action,
3339   struct lp_build_tgsi_context * bld_base,
3340   struct lp_build_emit_data * emit_data)
3341{
3342   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3343
3344   emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3345}
3346
3347static void
3348lod_emit(
3349   const struct lp_build_tgsi_action * action,
3350   struct lp_build_tgsi_context * bld_base,
3351   struct lp_build_emit_data * emit_data)
3352{
3353   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3354
3355   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3356               FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3357}
3358
3359static LLVMValueRef
3360mask_vec(struct lp_build_tgsi_context *bld_base)
3361{
3362   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3363   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3364   struct lp_exec_mask *exec_mask = &bld->exec_mask;
3365
3366   if (!exec_mask->has_mask) {
3367      return lp_build_mask_value(bld->mask);
3368   }
3369   return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
3370                       exec_mask->exec_mask, "");
3371}
3372
3373static void
3374increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3375                          LLVMValueRef ptr,
3376                          LLVMValueRef mask)
3377{
3378   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3379   LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3380
3381   current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3382
3383   LLVMBuildStore(builder, current_vec, ptr);
3384}
3385
3386static void
3387clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3388                             LLVMValueRef ptr,
3389                             LLVMValueRef mask)
3390{
3391   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3392   LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3393
3394   current_vec = lp_build_select(&bld_base->uint_bld,
3395                                 mask,
3396                                 bld_base->uint_bld.zero,
3397                                 current_vec);
3398
3399   LLVMBuildStore(builder, current_vec, ptr);
3400}
3401
3402static LLVMValueRef
3403clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3404                                  LLVMValueRef current_mask_vec,
3405                                  LLVMValueRef total_emitted_vertices_vec)
3406{
3407   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3408   struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3409   LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3410                                        total_emitted_vertices_vec,
3411                                        bld->max_output_vertices_vec);
3412
3413   return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3414}
3415
3416static void
3417emit_vertex(
3418   const struct lp_build_tgsi_action * action,
3419   struct lp_build_tgsi_context * bld_base,
3420   struct lp_build_emit_data * emit_data)
3421{
3422   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3423   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3424
3425   if (bld->gs_iface->emit_vertex) {
3426      LLVMValueRef mask = mask_vec(bld_base);
3427      LLVMValueRef total_emitted_vertices_vec =
3428         LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3429      mask = clamp_mask_to_max_output_vertices(bld, mask,
3430                                               total_emitted_vertices_vec);
3431      gather_outputs(bld);
3432      bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
3433                                 bld->outputs,
3434                                 total_emitted_vertices_vec);
3435      increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3436                                mask);
3437      increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3438                                mask);
3439#if DUMP_GS_EMITS
3440      lp_build_print_value(bld->bld_base.base.gallivm,
3441                           " +++ emit vertex masked ones = ",
3442                           mask);
3443      lp_build_print_value(bld->bld_base.base.gallivm,
3444                           " +++ emit vertex emitted = ",
3445                           total_emitted_vertices_vec);
3446#endif
3447   }
3448}
3449
3450
3451static void
3452end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3453                     LLVMValueRef mask)
3454{
3455   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3456   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3457
3458   if (bld->gs_iface->end_primitive) {
3459      struct lp_build_context *uint_bld = &bld_base->uint_bld;
3460      LLVMValueRef emitted_vertices_vec =
3461         LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3462      LLVMValueRef emitted_prims_vec =
3463         LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3464
3465      LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3466                                               emitted_vertices_vec,
3467                                               uint_bld->zero);
3468      /* We need to combine the current execution mask with the mask
3469         telling us which, if any, execution slots actually have
3470         unemitted primitives, this way we make sure that end_primitives
3471         executes only on the paths that have unflushed vertices */
3472      mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3473
3474      bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
3475                                   emitted_vertices_vec,
3476                                   emitted_prims_vec);
3477
3478#if DUMP_GS_EMITS
3479      lp_build_print_value(bld->bld_base.base.gallivm,
3480                           " +++ end prim masked ones = ",
3481                           mask);
3482      lp_build_print_value(bld->bld_base.base.gallivm,
3483                           " +++ end prim emitted verts1 = ",
3484                           emitted_vertices_vec);
3485      lp_build_print_value(bld->bld_base.base.gallivm,
3486                           " +++ end prim emitted prims1 = ",
3487                           LLVMBuildLoad(builder,
3488                                         bld->emitted_prims_vec_ptr, ""));
3489#endif
3490      increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3491                                mask);
3492      clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3493                                   mask);
3494#if DUMP_GS_EMITS
3495      lp_build_print_value(bld->bld_base.base.gallivm,
3496                           " +++ end prim emitted verts2 = ",
3497                           LLVMBuildLoad(builder,
3498                                         bld->emitted_vertices_vec_ptr, ""));
3499#endif
3500   }
3501
3502}
3503
3504static void
3505end_primitive(
3506   const struct lp_build_tgsi_action * action,
3507   struct lp_build_tgsi_context * bld_base,
3508   struct lp_build_emit_data * emit_data)
3509{
3510   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3511
3512   if (bld->gs_iface->end_primitive) {
3513      LLVMValueRef mask = mask_vec(bld_base);
3514      end_primitive_masked(bld_base, mask);
3515   }
3516}
3517
3518static void
3519cal_emit(
3520   const struct lp_build_tgsi_action * action,
3521   struct lp_build_tgsi_context * bld_base,
3522   struct lp_build_emit_data * emit_data)
3523{
3524   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3525
3526   lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3527                     &bld_base->pc);
3528}
3529
3530static void
3531ret_emit(
3532   const struct lp_build_tgsi_action * action,
3533   struct lp_build_tgsi_context * bld_base,
3534   struct lp_build_emit_data * emit_data)
3535{
3536   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3537
3538   lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3539}
3540
3541static void
3542brk_emit(
3543   const struct lp_build_tgsi_action * action,
3544   struct lp_build_tgsi_context * bld_base,
3545   struct lp_build_emit_data * emit_data)
3546{
3547   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3548
3549   lp_exec_break(&bld->exec_mask, bld_base);
3550}
3551
3552static void
3553if_emit(
3554   const struct lp_build_tgsi_action * action,
3555   struct lp_build_tgsi_context * bld_base,
3556   struct lp_build_emit_data * emit_data)
3557{
3558   LLVMValueRef tmp;
3559   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3560
3561   tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3562                      emit_data->args[0], bld->bld_base.base.zero);
3563   lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3564}
3565
3566static void
3567uif_emit(
3568   const struct lp_build_tgsi_action * action,
3569   struct lp_build_tgsi_context * bld_base,
3570   struct lp_build_emit_data * emit_data)
3571{
3572   LLVMValueRef tmp;
3573   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3574   struct lp_build_context *uint_bld = &bld_base->uint_bld;
3575
3576   tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3577                      emit_data->args[0], uint_bld->zero);
3578   lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3579}
3580
3581static void
3582case_emit(
3583   const struct lp_build_tgsi_action * action,
3584   struct lp_build_tgsi_context * bld_base,
3585   struct lp_build_emit_data * emit_data)
3586{
3587   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3588
3589   lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3590}
3591
3592static void
3593default_emit(
3594   const struct lp_build_tgsi_action * action,
3595   struct lp_build_tgsi_context * bld_base,
3596   struct lp_build_emit_data * emit_data)
3597{
3598   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3599
3600   lp_exec_default(&bld->exec_mask, bld_base);
3601}
3602
3603static void
3604switch_emit(
3605   const struct lp_build_tgsi_action * action,
3606   struct lp_build_tgsi_context * bld_base,
3607   struct lp_build_emit_data * emit_data)
3608{
3609   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3610
3611   lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3612}
3613
3614static void
3615endswitch_emit(
3616   const struct lp_build_tgsi_action * action,
3617   struct lp_build_tgsi_context * bld_base,
3618   struct lp_build_emit_data * emit_data)
3619{
3620   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3621
3622   lp_exec_endswitch(&bld->exec_mask, bld_base);
3623}
3624
3625static void
3626bgnloop_emit(
3627   const struct lp_build_tgsi_action * action,
3628   struct lp_build_tgsi_context * bld_base,
3629   struct lp_build_emit_data * emit_data)
3630{
3631   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3632
3633   lp_exec_bgnloop(&bld->exec_mask);
3634}
3635
3636static void
3637bgnsub_emit(
3638   const struct lp_build_tgsi_action * action,
3639   struct lp_build_tgsi_context * bld_base,
3640   struct lp_build_emit_data * emit_data)
3641{
3642   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3643
3644   lp_exec_mask_bgnsub(&bld->exec_mask);
3645}
3646
3647static void
3648else_emit(
3649   const struct lp_build_tgsi_action * action,
3650   struct lp_build_tgsi_context * bld_base,
3651   struct lp_build_emit_data * emit_data)
3652{
3653   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3654
3655   lp_exec_mask_cond_invert(&bld->exec_mask);
3656}
3657
3658static void
3659endif_emit(
3660   const struct lp_build_tgsi_action * action,
3661   struct lp_build_tgsi_context * bld_base,
3662   struct lp_build_emit_data * emit_data)
3663{
3664   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3665
3666   lp_exec_mask_cond_pop(&bld->exec_mask);
3667}
3668
3669static void
3670endloop_emit(
3671   const struct lp_build_tgsi_action * action,
3672   struct lp_build_tgsi_context * bld_base,
3673   struct lp_build_emit_data * emit_data)
3674{
3675   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3676
3677   lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3678}
3679
3680static void
3681endsub_emit(
3682   const struct lp_build_tgsi_action * action,
3683   struct lp_build_tgsi_context * bld_base,
3684   struct lp_build_emit_data * emit_data)
3685{
3686   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3687
3688   lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3689}
3690
3691static void
3692cont_emit(
3693   const struct lp_build_tgsi_action * action,
3694   struct lp_build_tgsi_context * bld_base,
3695   struct lp_build_emit_data * emit_data)
3696{
3697   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3698
3699   lp_exec_continue(&bld->exec_mask);
3700}
3701
3702static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3703{
3704   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3705   struct gallivm_state * gallivm = bld_base->base.gallivm;
3706
3707   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3708      unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
3709      bld->temps_array = lp_build_alloca_undef(gallivm,
3710                                               LLVMArrayType(bld_base->base.vec_type, array_size),
3711                                               "temp_array");
3712   }
3713
3714   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3715      LLVMValueRef array_size =
3716         lp_build_const_int32(gallivm,
3717                            bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3718      bld->outputs_array = lp_build_array_alloca(gallivm,
3719                                                bld_base->base.vec_type, array_size,
3720                                                "output_array");
3721   }
3722
3723   if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3724      unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
3725      bld->imms_array = lp_build_alloca_undef(gallivm,
3726                                              LLVMArrayType(bld_base->base.vec_type, array_size),
3727                                              "imms_array");
3728   }
3729
3730   /* If we have indirect addressing in inputs we need to copy them into
3731    * our alloca array to be able to iterate over them */
3732   if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3733      unsigned index, chan;
3734      LLVMTypeRef vec_type = bld_base->base.vec_type;
3735      LLVMValueRef array_size = lp_build_const_int32(gallivm,
3736            bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3737      bld->inputs_array = lp_build_array_alloca(gallivm,
3738                                               vec_type, array_size,
3739                                               "input_array");
3740
3741      assert(bld_base->info->num_inputs
3742                        <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3743
3744      for (index = 0; index < bld_base->info->num_inputs; ++index) {
3745         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3746            LLVMValueRef lindex =
3747               lp_build_const_int32(gallivm, index * 4 + chan);
3748            LLVMValueRef input_ptr =
3749               LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3750                            &lindex, 1, "");
3751            LLVMValueRef value = bld->inputs[index][chan];
3752            if (value)
3753               LLVMBuildStore(gallivm->builder, value, input_ptr);
3754         }
3755      }
3756   }
3757
3758   if (bld->gs_iface) {
3759      struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3760      bld->emitted_prims_vec_ptr =
3761         lp_build_alloca(gallivm,
3762                         uint_bld->vec_type,
3763                         "emitted_prims_ptr");
3764      bld->emitted_vertices_vec_ptr =
3765         lp_build_alloca(gallivm,
3766                         uint_bld->vec_type,
3767                         "emitted_vertices_ptr");
3768      bld->total_emitted_vertices_vec_ptr =
3769         lp_build_alloca(gallivm,
3770                         uint_bld->vec_type,
3771                         "total_emitted_vertices_ptr");
3772
3773      LLVMBuildStore(gallivm->builder, uint_bld->zero,
3774                     bld->emitted_prims_vec_ptr);
3775      LLVMBuildStore(gallivm->builder, uint_bld->zero,
3776                     bld->emitted_vertices_vec_ptr);
3777      LLVMBuildStore(gallivm->builder, uint_bld->zero,
3778                     bld->total_emitted_vertices_vec_ptr);
3779   }
3780
3781   if (DEBUG_EXECUTION) {
3782      lp_build_printf(gallivm, "\n");
3783      emit_dump_file(bld, TGSI_FILE_CONSTANT);
3784      if (!bld->gs_iface)
3785         emit_dump_file(bld, TGSI_FILE_INPUT);
3786   }
3787}
3788
3789static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3790{
3791   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3792   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3793
3794   if (DEBUG_EXECUTION) {
3795      /* for debugging */
3796      if (0) {
3797         emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3798      }
3799      emit_dump_file(bld, TGSI_FILE_OUTPUT);
3800      lp_build_printf(bld_base->base.gallivm, "\n");
3801   }
3802
3803   /* If we have indirect addressing in outputs we need to copy our alloca array
3804    * to the outputs slots specified by the caller */
3805   if (bld->gs_iface) {
3806      LLVMValueRef total_emitted_vertices_vec;
3807      LLVMValueRef emitted_prims_vec;
3808      /* implicit end_primitives, needed in case there are any unflushed
3809         vertices in the cache. Note must not call end_primitive here
3810         since the exec_mask is not valid at this point. */
3811      end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3812
3813      total_emitted_vertices_vec =
3814         LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3815      emitted_prims_vec =
3816         LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3817
3818      bld->gs_iface->gs_epilogue(bld->gs_iface,
3819                                 &bld->bld_base,
3820                                 total_emitted_vertices_vec,
3821                                 emitted_prims_vec);
3822   } else {
3823      gather_outputs(bld);
3824   }
3825}
3826
3827void
3828lp_build_tgsi_soa(struct gallivm_state *gallivm,
3829                  const struct tgsi_token *tokens,
3830                  struct lp_type type,
3831                  struct lp_build_mask_context *mask,
3832                  LLVMValueRef consts_ptr,
3833                  LLVMValueRef const_sizes_ptr,
3834                  const struct lp_bld_tgsi_system_values *system_values,
3835                  const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3836                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3837                  LLVMValueRef context_ptr,
3838                  LLVMValueRef thread_data_ptr,
3839                  const struct lp_build_sampler_soa *sampler,
3840                  const struct tgsi_shader_info *info,
3841                  const struct lp_build_tgsi_gs_iface *gs_iface)
3842{
3843   struct lp_build_tgsi_soa_context bld;
3844
3845   struct lp_type res_type;
3846
3847   assert(type.length <= LP_MAX_VECTOR_LENGTH);
3848   memset(&res_type, 0, sizeof res_type);
3849   res_type.width = type.width;
3850   res_type.length = type.length;
3851   res_type.sign = 1;
3852
3853   /* Setup build context */
3854   memset(&bld, 0, sizeof bld);
3855   lp_build_context_init(&bld.bld_base.base, gallivm, type);
3856   lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3857   lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3858   lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3859   {
3860      struct lp_type dbl_type;
3861      dbl_type = type;
3862      dbl_type.width *= 2;
3863      lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
3864   }
3865   {
3866      struct lp_type uint64_type;
3867      uint64_type = lp_uint_type(type);
3868      uint64_type.width *= 2;
3869      lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
3870   }
3871   {
3872      struct lp_type int64_type;
3873      int64_type = lp_int_type(type);
3874      int64_type.width *= 2;
3875      lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
3876   }
3877   bld.mask = mask;
3878   bld.inputs = inputs;
3879   bld.outputs = outputs;
3880   bld.consts_ptr = consts_ptr;
3881   bld.const_sizes_ptr = const_sizes_ptr;
3882   bld.sampler = sampler;
3883   bld.bld_base.info = info;
3884   bld.indirect_files = info->indirect_files;
3885   bld.context_ptr = context_ptr;
3886   bld.thread_data_ptr = thread_data_ptr;
3887
3888   /*
3889    * If the number of temporaries is rather large then we just
3890    * allocate them as an array right from the start and treat
3891    * like indirect temporaries.
3892    */
3893   if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
3894      bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
3895   }
3896   /*
3897    * For performance reason immediates are always backed in a static
3898    * array, but if their number is too great, we have to use just
3899    * a dynamically allocated array.
3900    */
3901   bld.use_immediates_array =
3902         (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
3903   if (bld.use_immediates_array) {
3904      bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
3905   }
3906
3907
3908   bld.bld_base.soa = TRUE;
3909   bld.bld_base.emit_debug = emit_debug;
3910   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3911   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3912   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3913   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3914   bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3915   bld.bld_base.emit_store = emit_store;
3916
3917   bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3918   bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3919
3920   bld.bld_base.emit_prologue = emit_prologue;
3921   bld.bld_base.emit_epilogue = emit_epilogue;
3922
3923   /* Set opcode actions */
3924   lp_set_default_actions_cpu(&bld.bld_base);
3925
3926   bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3927   bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3928   bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3929   bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3930   bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3931   bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3932   bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3933   bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3934   bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3935   bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3936   bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3937   bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3938   bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3939   bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3940   bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3941   bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3942   bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
3943   bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
3944   bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3945   bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3946   bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3947   bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3948   bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3949   bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3950   bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3951   bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3952   bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3953   bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
3954   bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
3955   bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
3956   bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
3957   bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
3958   /* DX10 sampling ops */
3959   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3960   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3961   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3962   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3963   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3964   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3965   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
3966   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3967   bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
3968   bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3969   bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
3970
3971
3972   if (gs_iface) {
3973      /* There's no specific value for this because it should always
3974       * be set, but apps using ext_geometry_shader4 quite often
3975       * were forgetting so we're using MAX_VERTEX_VARYING from
3976       * that spec even though we could debug_assert if it's not
3977       * set, but that's a lot uglier. */
3978      uint max_output_vertices;
3979
3980      /* inputs are always indirect with gs */
3981      bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3982      bld.gs_iface = gs_iface;
3983      bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3984      bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3985      bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3986
3987      max_output_vertices =
3988            info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
3989      if (!max_output_vertices)
3990         max_output_vertices = 32;
3991
3992      bld.max_output_vertices_vec =
3993         lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3994                                max_output_vertices);
3995   }
3996
3997   lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3998
3999   bld.system_values = *system_values;
4000
4001   lp_build_tgsi_llvm(&bld.bld_base, tokens);
4002
4003   if (0) {
4004      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4005      LLVMValueRef function = LLVMGetBasicBlockParent(block);
4006      debug_printf("11111111111111111111111111111 \n");
4007      tgsi_dump(tokens, 0);
4008      lp_debug_dump_value(function);
4009      debug_printf("2222222222222222222222222222 \n");
4010   }
4011
4012   if (0) {
4013      LLVMModuleRef module = LLVMGetGlobalParent(
4014         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4015      LLVMDumpModule(module);
4016
4017   }
4018   lp_exec_mask_fini(&bld.exec_mask);
4019}
4020