1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "util/u_prim.h"
45#include "tgsi/tgsi_dump.h"
46#include "tgsi/tgsi_exec.h"
47#include "tgsi/tgsi_info.h"
48#include "tgsi/tgsi_parse.h"
49#include "tgsi/tgsi_util.h"
50#include "tgsi/tgsi_scan.h"
51#include "tgsi/tgsi_strings.h"
52#include "lp_bld_tgsi_action.h"
53#include "lp_bld_type.h"
54#include "lp_bld_const.h"
55#include "lp_bld_arit.h"
56#include "lp_bld_bitarit.h"
57#include "lp_bld_gather.h"
58#include "lp_bld_init.h"
59#include "lp_bld_logic.h"
60#include "lp_bld_misc.h"
61#include "lp_bld_swizzle.h"
62#include "lp_bld_flow.h"
63#include "lp_bld_coro.h"
64#include "lp_bld_quad.h"
65#include "lp_bld_tgsi.h"
66#include "lp_bld_limits.h"
67#include "lp_bld_debug.h"
68#include "lp_bld_printf.h"
69#include "lp_bld_sample.h"
70#include "lp_bld_struct.h"
71
72#define DUMP_GS_EMITS 0
73
74/*
75 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
76 * instruction.
77 *
78 * TODO:
79 * - take execution masks in consideration
80 * - debug control-flow instructions
81 */
82#define DEBUG_EXECUTION 0
83
84
85/*
86 * Emit code to print a register value.
87 */
88static void
89emit_dump_reg(struct gallivm_state *gallivm,
90              unsigned file,
91              unsigned index,
92              unsigned chan,
93              LLVMValueRef value)
94{
95   char buf[32];
96
97   snprintf(buf, sizeof buf, "    %s[%u].%c = ",
98            tgsi_file_name(file),
99            index, "xyzw"[chan]);
100
101   lp_build_print_value(gallivm, buf, value);
102}
103
104static inline struct function_ctx *
105func_ctx(struct lp_exec_mask *mask)
106{
107   assert(mask->function_stack_size > 0);
108   assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
109   return &mask->function_stack[mask->function_stack_size - 1];
110}
111
112/*
113 * combine the execution mask if there is one with the current mask.
114 */
115static LLVMValueRef
116mask_vec(struct lp_build_tgsi_context *bld_base)
117{
118   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
119   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
120   struct lp_exec_mask *exec_mask = &bld->exec_mask;
121   LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
122   if (!exec_mask->has_mask) {
123      return bld_mask;
124   }
125   if (!bld_mask)
126      return exec_mask->exec_mask;
127   return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
128                       exec_mask->exec_mask, "");
129}
130
131static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
132                          struct lp_build_tgsi_context * bld_base)
133{
134   enum tgsi_opcode opcode =
135      bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
136   bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
137                        opcode == TGSI_OPCODE_CASE);
138   lp_exec_break(mask, &bld_base->pc, break_always);
139}
140
141static void lp_exec_switch(struct lp_exec_mask *mask,
142                           LLVMValueRef switchval)
143{
144   struct function_ctx *ctx = func_ctx(mask);
145
146   if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
147       ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
148      ctx->switch_stack_size++;
149      return;
150   }
151
152   ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
153      ctx->break_type;
154   ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
155
156   ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
157   ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
158   ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
159   ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
160   ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
161   ctx->switch_stack_size++;
162
163   mask->switch_mask = LLVMConstNull(mask->int_vec_type);
164   ctx->switch_val = switchval;
165   ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
166   ctx->switch_in_default = false;
167   ctx->switch_pc = 0;
168
169   lp_exec_mask_update(mask);
170}
171
172static void lp_exec_endswitch(struct lp_exec_mask *mask,
173                              struct lp_build_tgsi_context * bld_base)
174{
175   LLVMBuilderRef builder = mask->bld->gallivm->builder;
176   struct function_ctx *ctx = func_ctx(mask);
177
178   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
179      ctx->switch_stack_size--;
180      return;
181   }
182
183   /* check if there's deferred default if so do it now */
184   if (ctx->switch_pc && !ctx->switch_in_default) {
185      LLVMValueRef prevmask, defaultmask;
186      unsigned tmp_pc;
187      prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
188      defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
189      mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
190      ctx->switch_in_default = true;
191
192      lp_exec_mask_update(mask);
193
194      assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
195             TGSI_OPCODE_DEFAULT);
196
197      tmp_pc = bld_base->pc;
198      bld_base->pc = ctx->switch_pc;
199      /*
200       * re-purpose switch_pc to point to here again, since we stop execution of
201       * the deferred default after next break.
202       */
203      ctx->switch_pc = tmp_pc - 1;
204
205      return;
206   }
207
208   else if (ctx->switch_pc && ctx->switch_in_default) {
209      assert(bld_base->pc == ctx->switch_pc + 1);
210   }
211
212   ctx->switch_stack_size--;
213   mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
214   ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
215   ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
216   ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
217   ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
218
219   ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
220
221   lp_exec_mask_update(mask);
222}
223
224static void lp_exec_case(struct lp_exec_mask *mask,
225                         LLVMValueRef caseval)
226{
227   LLVMBuilderRef builder = mask->bld->gallivm->builder;
228   struct function_ctx *ctx = func_ctx(mask);
229
230   LLVMValueRef casemask, prevmask;
231
232   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
233      return;
234   }
235
236   /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
237   if (!ctx->switch_in_default) {
238      prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
239      casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
240      ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
241                                             ctx->switch_mask_default, "sw_default_mask");
242      casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
243      mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
244
245      lp_exec_mask_update(mask);
246   }
247}
248
249/*
250 * Analyse default statement in a switch.
251 * \return true if default is last statement, false otherwise
252 * \param default_pc_start contains pc of instruction to jump to
253 *                         if default wasn't last but there's no
254 *                         fallthrough into default.
255 */
256static boolean default_analyse_is_last(struct lp_exec_mask *mask,
257                                       struct lp_build_tgsi_context * bld_base,
258                                       int *default_pc_start)
259{
260   unsigned pc = bld_base->pc;
261   struct function_ctx *ctx = func_ctx(mask);
262   int curr_switch_stack = ctx->switch_stack_size;
263
264   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
265      return false;
266   }
267
268   /* skip over case statements which are together with default */
269   while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
270      pc++;
271   }
272
273   while (pc != ~0u && pc < bld_base->num_instructions) {
274      enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
275      switch (opcode) {
276      case TGSI_OPCODE_CASE:
277         if (curr_switch_stack == ctx->switch_stack_size) {
278            *default_pc_start = pc - 1;
279            return false;
280         }
281         break;
282      case TGSI_OPCODE_SWITCH:
283         curr_switch_stack++;
284         break;
285      case TGSI_OPCODE_ENDSWITCH:
286         if (curr_switch_stack == ctx->switch_stack_size) {
287            *default_pc_start = pc - 1;
288            return true;
289         }
290         curr_switch_stack--;
291         break;
292      default:
293         ; /* nothing */
294      }
295      pc++;
296   }
297   /* should never arrive here */
298   assert(0);
299   return true;
300}
301
302static void lp_exec_default(struct lp_exec_mask *mask,
303                            struct lp_build_tgsi_context * bld_base)
304{
305   LLVMBuilderRef builder = mask->bld->gallivm->builder;
306   struct function_ctx *ctx = func_ctx(mask);
307
308   int default_exec_pc = 0;
309   boolean default_is_last;
310
311   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
312      return;
313   }
314
315   /*
316    * This is a messy opcode, because it may not be always at the end and
317    * there can be fallthrough in and out of it.
318    */
319
320   default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
321   /*
322    * If it is last statement in switch (note that case statements appearing
323    * "at the same time" as default don't change that) everything is just fine,
324    * update switch mask and go on. This means we can handle default with
325    * fallthrough INTO it without overhead, if it is last.
326    */
327   if (default_is_last) {
328      LLVMValueRef prevmask, defaultmask;
329      prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
330      defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
331      defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
332      mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
333      ctx->switch_in_default = true;
334
335      lp_exec_mask_update(mask);
336   }
337   else {
338      /*
339       * Technically, "case" immediately before default isn't really a
340       * fallthrough, however we still have to count them as such as we
341       * already have updated the masks.
342       * If that happens in practice could add a switch optimizer pass
343       * which just gets rid of all case statements appearing together with
344       * default (or could do switch analysis at switch start time instead).
345       */
346      enum tgsi_opcode opcode =
347         bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
348      boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
349                         opcode != TGSI_OPCODE_SWITCH);
350      /*
351       * If it is not last statement and there was no fallthrough into it,
352       * we record the PC and continue execution at next case (again, those
353       * case encountered at the same time don't count). At endswitch
354       * time, we update switchmask, and go back executing the code we skipped
355       * until the next break (possibly re-executing some code with changed mask
356       * if there was a fallthrough out of default).
357       * Finally, if it is not last statement and there was a fallthrough into it,
358       * do the same as with the former case, except instead of skipping the code
359       * just execute it without updating the mask, then go back and re-execute.
360       */
361      ctx->switch_pc = bld_base->pc;
362      if (!ft_into) {
363         bld_base->pc = default_exec_pc;
364      }
365   }
366}
367
368
369static void lp_exec_mask_call(struct lp_exec_mask *mask,
370                              int func,
371                              int *pc)
372{
373   if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
374      return;
375   }
376
377   lp_exec_mask_function_init(mask, mask->function_stack_size);
378   mask->function_stack[mask->function_stack_size].pc = *pc;
379   mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
380   mask->function_stack_size++;
381   *pc = func;
382}
383
384static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
385{
386   LLVMBuilderRef builder = mask->bld->gallivm->builder;
387   struct function_ctx *ctx = func_ctx(mask);
388   LLVMValueRef exec_mask;
389
390   if (ctx->cond_stack_size == 0 &&
391       ctx->loop_stack_size == 0 &&
392       ctx->switch_stack_size == 0 &&
393       mask->function_stack_size == 1) {
394      /* returning from main() */
395      *pc = -1;
396      return;
397   }
398
399   if (mask->function_stack_size == 1) {
400      /*
401       * This requires special handling since we need to ensure
402       * we don't drop the mask even if we have no call stack
403       * (e.g. after a ret in a if clause after the endif)
404       */
405      mask->ret_in_main = TRUE;
406   }
407
408   exec_mask = LLVMBuildNot(builder,
409                            mask->exec_mask,
410                            "ret");
411
412   mask->ret_mask = LLVMBuildAnd(builder,
413                                 mask->ret_mask,
414                                 exec_mask, "ret_full");
415
416   lp_exec_mask_update(mask);
417}
418
419static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
420{
421}
422
423static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
424{
425   struct function_ctx *ctx;
426
427   assert(mask->function_stack_size > 1);
428   assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
429
430   ctx = func_ctx(mask);
431   mask->function_stack_size--;
432
433   *pc = ctx->pc;
434   mask->ret_mask = ctx->ret_mask;
435
436   lp_exec_mask_update(mask);
437}
438
439
440static LLVMValueRef
441get_file_ptr(struct lp_build_tgsi_soa_context *bld,
442             unsigned file,
443             int index,
444             unsigned chan)
445{
446   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
447   LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
448   LLVMValueRef var_of_array;
449
450   switch (file) {
451   case TGSI_FILE_TEMPORARY:
452      array_of_vars = bld->temps;
453      var_of_array = bld->temps_array;
454      break;
455   case TGSI_FILE_OUTPUT:
456      array_of_vars = bld->outputs;
457      var_of_array = bld->outputs_array;
458      break;
459   default:
460      assert(0);
461      return NULL;
462   }
463
464   assert(chan < 4);
465
466   if (bld->indirect_files & (1 << file)) {
467      LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
468      if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
469         LLVMValueRef gep[2];
470         gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
471         gep[1] = lindex;
472         return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
473      } else {
474         return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
475      }
476   }
477   else {
478      assert(index <= bld->bld_base.info->file_max[file]);
479      return array_of_vars[index][chan];
480   }
481}
482
483
484/**
485 * Return pointer to a temporary register channel (src or dest).
486 * Note that indirect addressing cannot be handled here.
487 * \param index  which temporary register
488 * \param chan  which channel of the temp register.
489 */
490LLVMValueRef
491lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
492             unsigned index,
493             unsigned chan)
494{
495   return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
496}
497
498/**
499 * Return pointer to a output register channel (src or dest).
500 * Note that indirect addressing cannot be handled here.
501 * \param index  which output register
502 * \param chan  which channel of the output register.
503 */
504LLVMValueRef
505lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
506               unsigned index,
507               unsigned chan)
508{
509   return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
510}
511
512/*
513 * If we have indirect addressing in outputs copy our alloca array
514 * to the outputs slots specified by the caller to make sure
515 * our outputs are delivered consistently via the same interface.
516 */
517static void
518gather_outputs(struct lp_build_tgsi_soa_context * bld)
519{
520   if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
521      unsigned index, chan;
522      assert(bld->bld_base.info->num_outputs <=
523             bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
524      for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
525         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
526            bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
527         }
528      }
529   }
530}
531
532/**
533 * Gather vector.
534 * XXX the lp_build_gather() function should be capable of doing this
535 * with a little work.
536 */
537static LLVMValueRef
538build_gather(struct lp_build_tgsi_context *bld_base,
539             LLVMValueRef base_ptr,
540             LLVMValueRef indexes,
541             LLVMValueRef overflow_mask,
542             LLVMValueRef indexes2)
543{
544   struct gallivm_state *gallivm = bld_base->base.gallivm;
545   LLVMBuilderRef builder = gallivm->builder;
546   struct lp_build_context *uint_bld = &bld_base->uint_bld;
547   struct lp_build_context *bld = &bld_base->base;
548   LLVMValueRef res;
549   unsigned i;
550
551   if (indexes2)
552      res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
553   else
554      res = bld->undef;
555   /*
556    * overflow_mask is a vector telling us which channels
557    * in the vector overflowed. We use the overflow behavior for
558    * constant buffers which is defined as:
559    * Out of bounds access to constant buffer returns 0 in all
560    * components. Out of bounds behavior is always with respect
561    * to the size of the buffer bound at that slot.
562    */
563
564   if (overflow_mask) {
565      /*
566       * We avoid per-element control flow here (also due to llvm going crazy,
567       * though I suspect it's better anyway since overflow is likely rare).
568       * Note that since we still fetch from buffers even if num_elements was
569       * zero (in this case we'll fetch from index zero) the jit func callers
570       * MUST provide valid fake constant buffers of size 4x32 (the values do
571       * not matter), otherwise we'd still need (not per element though)
572       * control flow.
573       */
574      indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
575      if (indexes2)
576         indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
577   }
578
579   /*
580    * Loop over elements of index_vec, load scalar value, insert it into 'res'.
581    */
582   for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
583      LLVMValueRef si, di;
584      LLVMValueRef index;
585      LLVMValueRef scalar_ptr, scalar;
586
587      di = lp_build_const_int32(bld->gallivm, i);
588      if (indexes2)
589         si = lp_build_const_int32(bld->gallivm, i >> 1);
590      else
591         si = di;
592
593      if (indexes2 && (i & 1)) {
594         index = LLVMBuildExtractElement(builder,
595                                         indexes2, si, "");
596      } else {
597         index = LLVMBuildExtractElement(builder,
598                                         indexes, si, "");
599      }
600      scalar_ptr = LLVMBuildGEP(builder, base_ptr,
601                                &index, 1, "gather_ptr");
602      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
603
604      res = LLVMBuildInsertElement(builder, res, scalar, di, "");
605   }
606
607   if (overflow_mask) {
608      if (indexes2) {
609         res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
610         overflow_mask = LLVMBuildSExt(builder, overflow_mask,
611                                       bld_base->dbl_bld.int_vec_type, "");
612         res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
613                               bld_base->dbl_bld.zero, res);
614      } else
615         res = lp_build_select(bld, overflow_mask, bld->zero, res);
616   }
617
618   return res;
619}
620
621
622/**
623 * Scatter/store vector.
624 */
625static void
626emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
627                  LLVMValueRef base_ptr,
628                  LLVMValueRef indexes,
629                  LLVMValueRef values,
630                  struct lp_exec_mask *mask)
631{
632   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
633   LLVMBuilderRef builder = gallivm->builder;
634   unsigned i;
635   LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
636
637   /*
638    * Loop over elements of index_vec, store scalar value.
639    */
640   for (i = 0; i < bld->bld_base.base.type.length; i++) {
641      LLVMValueRef ii = lp_build_const_int32(gallivm, i);
642      LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
643      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
644      LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
645      LLVMValueRef scalar_pred = pred ?
646         LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
647
648      if (0)
649         lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
650                         ii, val, index, scalar_ptr);
651
652      if (scalar_pred) {
653         LLVMValueRef real_val, dst_val;
654         dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
655         real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
656         LLVMBuildStore(builder, real_val, scalar_ptr);
657      }
658      else {
659         LLVMBuildStore(builder, val, scalar_ptr);
660      }
661   }
662}
663
664
665/**
666 * Read the current value of the ADDR register, convert the floats to
667 * ints, add the base index and return the vector of offsets.
668 * The offsets will be used to index into the constant buffer or
669 * temporary register file.
670 */
671static LLVMValueRef
672get_indirect_index(struct lp_build_tgsi_soa_context *bld,
673                   unsigned reg_file, unsigned reg_index,
674                   const struct tgsi_ind_register *indirect_reg,
675                   int index_limit)
676{
677   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
678   struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
679   /* always use X component of address register */
680   unsigned swizzle = indirect_reg->Swizzle;
681   LLVMValueRef base;
682   LLVMValueRef rel;
683   LLVMValueRef max_index;
684   LLVMValueRef index;
685
686   assert(bld->indirect_files & (1 << reg_file));
687
688   base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
689
690   assert(swizzle < 4);
691   switch (indirect_reg->File) {
692   case TGSI_FILE_ADDRESS:
693      rel = LLVMBuildLoad(builder,
694                          bld->addr[indirect_reg->Index][swizzle],
695                          "load addr reg");
696      /* ADDR LLVM values already have LLVM integer type. */
697      break;
698   case TGSI_FILE_TEMPORARY:
699      rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
700      rel = LLVMBuildLoad(builder, rel, "load temp reg");
701      /* TEMP LLVM values always have LLVM float type, but for indirection, the
702       * value actually stored is expected to be an integer */
703      rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
704      break;
705   default:
706      assert(0);
707      rel = uint_bld->zero;
708   }
709
710   index = lp_build_add(uint_bld, base, rel);
711
712   /*
713    * emit_fetch_constant handles constant buffer overflow so this code
714    * is pointless for them.
715    * Furthermore the D3D10 spec in section 6.5 says:
716    * If the constant buffer bound to a slot is larger than the size
717    * declared in the shader for that slot, implementations are allowed
718    * to return incorrect data (not necessarily 0) for indices that are
719    * larger than the declared size but smaller than the buffer size.
720    */
721   if (reg_file != TGSI_FILE_CONSTANT) {
722      assert(index_limit >= 0);
723      max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
724                                         uint_bld->type, index_limit);
725
726      assert(!uint_bld->type.sign);
727      index = lp_build_min(uint_bld, index, max_index);
728   }
729
730   return index;
731}
732
733static struct lp_build_context *
734stype_to_fetch(struct lp_build_tgsi_context * bld_base,
735	       enum tgsi_opcode_type stype)
736{
737   struct lp_build_context *bld_fetch;
738
739   switch (stype) {
740   case TGSI_TYPE_FLOAT:
741   case TGSI_TYPE_UNTYPED:
742      bld_fetch = &bld_base->base;
743      break;
744   case TGSI_TYPE_UNSIGNED:
745      bld_fetch = &bld_base->uint_bld;
746      break;
747   case TGSI_TYPE_SIGNED:
748      bld_fetch = &bld_base->int_bld;
749      break;
750   case TGSI_TYPE_DOUBLE:
751      bld_fetch = &bld_base->dbl_bld;
752      break;
753   case TGSI_TYPE_UNSIGNED64:
754      bld_fetch = &bld_base->uint64_bld;
755      break;
756   case TGSI_TYPE_SIGNED64:
757      bld_fetch = &bld_base->int64_bld;
758      break;
759   case TGSI_TYPE_VOID:
760   default:
761      assert(0);
762      bld_fetch = NULL;
763      break;
764   }
765   return bld_fetch;
766}
767
768static LLVMValueRef
769get_soa_array_offsets(struct lp_build_context *uint_bld,
770                      LLVMValueRef indirect_index,
771                      unsigned chan_index,
772                      boolean need_perelement_offset)
773{
774   struct gallivm_state *gallivm = uint_bld->gallivm;
775   LLVMValueRef chan_vec =
776      lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
777   LLVMValueRef length_vec =
778      lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
779   LLVMValueRef index_vec;
780
781   /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
782   index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
783   index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
784   index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
785
786   if (need_perelement_offset) {
787      LLVMValueRef pixel_offsets;
788      unsigned i;
789     /* build pixel offset vector: {0, 1, 2, 3, ...} */
790      pixel_offsets = uint_bld->undef;
791      for (i = 0; i < uint_bld->type.length; i++) {
792         LLVMValueRef ii = lp_build_const_int32(gallivm, i);
793         pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
794                                                ii, ii, "");
795      }
796      index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
797   }
798   return index_vec;
799}
800
801static LLVMValueRef
802emit_fetch_constant(
803   struct lp_build_tgsi_context * bld_base,
804   const struct tgsi_full_src_register * reg,
805   enum tgsi_opcode_type stype,
806   unsigned swizzle_in)
807{
808   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
809   struct gallivm_state *gallivm = bld_base->base.gallivm;
810   LLVMBuilderRef builder = gallivm->builder;
811   struct lp_build_context *uint_bld = &bld_base->uint_bld;
812   unsigned dimension = 0;
813   LLVMValueRef consts_ptr;
814   LLVMValueRef num_consts;
815   LLVMValueRef res;
816   unsigned swizzle = swizzle_in & 0xffff;
817
818   /* XXX: Handle fetching xyzw components as a vector */
819   assert(swizzle != ~0u);
820
821   if (reg->Register.Dimension) {
822      assert(!reg->Dimension.Indirect);
823      dimension = reg->Dimension.Index;
824      assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
825   }
826
827   consts_ptr = bld->consts[dimension];
828   num_consts = bld->consts_sizes[dimension];
829
830   if (reg->Register.Indirect) {
831      LLVMValueRef indirect_index;
832      LLVMValueRef swizzle_vec =
833         lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
834      LLVMValueRef index_vec;  /* index into the const buffer */
835      LLVMValueRef overflow_mask;
836      LLVMValueRef index_vec2 = NULL;
837
838      indirect_index = get_indirect_index(bld,
839                                          reg->Register.File,
840                                          reg->Register.Index,
841                                          &reg->Indirect,
842                                          bld->bld_base.info->file_max[reg->Register.File]);
843
844      /* All fetches are from the same constant buffer, so
845       * we need to propagate the size to a vector to do a
846       * vector comparison */
847      num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
848      /* Construct a boolean vector telling us which channels
849       * overflow the bound constant buffer */
850      overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
851                                       indirect_index, num_consts);
852
853      /* index_vec = indirect_index * 4 + swizzle */
854      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
855      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
856
857      if (tgsi_type_is_64bit(stype)) {
858         LLVMValueRef swizzle_vec2;
859         swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
860         index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
861         index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
862      }
863      /* Gather values from the constant buffer */
864      res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
865   }
866   else {
867      LLVMValueRef index;  /* index into the const buffer */
868      LLVMValueRef scalar, scalar_ptr;
869      struct lp_build_context *bld_broad = &bld_base->base;
870      index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
871
872      scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
873                                &index, 1, "");
874
875      if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
876
877         LLVMValueRef scalar2, scalar2_ptr;
878         LLVMValueRef shuffles[2];
879         index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
880
881         scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
882                                    &index, 1, "");
883
884         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
885         scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
886         shuffles[0] = lp_build_const_int32(gallivm, 0);
887         shuffles[1] = lp_build_const_int32(gallivm, 1);
888
889         res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
890         res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
891         res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
892      } else {
893        if (stype == TGSI_TYPE_DOUBLE) {
894           LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
895           scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
896           bld_broad = &bld_base->dbl_bld;
897        } else if (stype == TGSI_TYPE_UNSIGNED64) {
898           LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
899           scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
900           bld_broad = &bld_base->uint64_bld;
901        } else if (stype == TGSI_TYPE_SIGNED64) {
902           LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
903           scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
904           bld_broad = &bld_base->int64_bld;
905        }
906        scalar = LLVMBuildLoad(builder, scalar_ptr, "");
907        res = lp_build_broadcast_scalar(bld_broad, scalar);
908      }
909
910   }
911
912   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
913      struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
914      res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
915   }
916
917   return res;
918}
919
920/**
921 * Fetch 64-bit values from two separate channels.
922 * 64-bit values are stored split across two channels, like xy and zw.
923 * This function creates a set of vec_length*2 floats,
924 * extracts the values from the two channels,
925 * puts them in the correct place, then casts to vec_length 64-bits.
926 */
927static LLVMValueRef
928emit_fetch_64bit(
929   struct lp_build_tgsi_context * bld_base,
930   enum tgsi_opcode_type stype,
931   LLVMValueRef input,
932   LLVMValueRef input2)
933{
934   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
935   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
936   LLVMBuilderRef builder = gallivm->builder;
937   LLVMValueRef res;
938   struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
939   int i;
940   LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
941   int len = bld_base->base.type.length * 2;
942   assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
943
944   for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
945      shuffles[i] = lp_build_const_int32(gallivm, i / 2);
946      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
947   }
948   res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
949
950   return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
951}
952
953static LLVMValueRef
954emit_fetch_immediate(
955   struct lp_build_tgsi_context * bld_base,
956   const struct tgsi_full_src_register * reg,
957   enum tgsi_opcode_type stype,
958   unsigned swizzle_in)
959{
960   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
961   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
962   LLVMBuilderRef builder = gallivm->builder;
963   LLVMValueRef res = NULL;
964   unsigned swizzle = swizzle_in & 0xffff;
965
966   if (bld->use_immediates_array || reg->Register.Indirect) {
967      LLVMValueRef imms_array;
968      LLVMTypeRef fptr_type;
969
970      /* cast imms_array pointer to float* */
971      fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
972      imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
973
974      if (reg->Register.Indirect) {
975         LLVMValueRef indirect_index;
976         LLVMValueRef index_vec;  /* index into the immediate register array */
977         LLVMValueRef index_vec2 = NULL;
978         indirect_index = get_indirect_index(bld,
979                                             reg->Register.File,
980                                             reg->Register.Index,
981                                             &reg->Indirect,
982                                             bld->bld_base.info->file_max[reg->Register.File]);
983         /*
984          * Unlike for other reg classes, adding pixel offsets is unnecessary -
985          * immediates are stored as full vectors (FIXME??? - might be better
986          * to store them the same as constants) but all elements are the same
987          * in any case.
988          */
989         index_vec = get_soa_array_offsets(&bld_base->uint_bld,
990                                           indirect_index,
991                                           swizzle,
992                                           FALSE);
993         if (tgsi_type_is_64bit(stype))
994            index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
995                                              indirect_index,
996                                              swizzle_in >> 16,
997                                              FALSE);
998         /* Gather values from the immediate register array */
999         res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1000      } else {
1001         LLVMValueRef gep[2];
1002         gep[0] = lp_build_const_int32(gallivm, 0);
1003         gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1004         LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1005                                              bld->imms_array, gep, 2, "");
1006         res = LLVMBuildLoad(builder, imms_ptr, "");
1007
1008         if (tgsi_type_is_64bit(stype)) {
1009            LLVMValueRef imms_ptr2;
1010            LLVMValueRef res2;
1011            gep[1] = lp_build_const_int32(gallivm,
1012                                          reg->Register.Index * 4 + (swizzle_in >> 16));
1013            imms_ptr2 = LLVMBuildGEP(builder,
1014                                     bld->imms_array, gep, 2, "");
1015            res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1016            res = emit_fetch_64bit(bld_base, stype, res, res2);
1017         }
1018      }
1019   }
1020   else {
1021      res = bld->immediates[reg->Register.Index][swizzle];
1022      if (tgsi_type_is_64bit(stype))
1023         res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1024   }
1025
1026   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1027      struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1028      res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1029   }
1030   return res;
1031}
1032
1033static LLVMValueRef
1034emit_fetch_input(
1035   struct lp_build_tgsi_context * bld_base,
1036   const struct tgsi_full_src_register * reg,
1037   enum tgsi_opcode_type stype,
1038   unsigned swizzle_in)
1039{
1040   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1041   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1042   LLVMBuilderRef builder = gallivm->builder;
1043   LLVMValueRef res;
1044   unsigned swizzle = swizzle_in & 0xffff;
1045
1046   if (reg->Register.Indirect) {
1047      LLVMValueRef indirect_index;
1048      LLVMValueRef index_vec;  /* index into the input reg array */
1049      LLVMValueRef index_vec2 = NULL;
1050      LLVMValueRef inputs_array;
1051      LLVMTypeRef fptr_type;
1052
1053      indirect_index = get_indirect_index(bld,
1054                                          reg->Register.File,
1055                                          reg->Register.Index,
1056                                          &reg->Indirect,
1057                                          bld->bld_base.info->file_max[reg->Register.File]);
1058
1059      index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1060                                        indirect_index,
1061                                        swizzle,
1062                                        TRUE);
1063      if (tgsi_type_is_64bit(stype)) {
1064         index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1065                                           indirect_index,
1066                                           swizzle_in >> 16,
1067                                           TRUE);
1068      }
1069      /* cast inputs_array pointer to float* */
1070      fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1071      inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1072
1073      /* Gather values from the input register array */
1074      res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1075   } else {
1076      if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1077         LLVMValueRef lindex = lp_build_const_int32(gallivm,
1078                                        reg->Register.Index * 4 + swizzle);
1079         LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1080                                               bld->inputs_array, &lindex, 1, "");
1081
1082         res = LLVMBuildLoad(builder, input_ptr, "");
1083         if (tgsi_type_is_64bit(stype)) {
1084            LLVMValueRef lindex1;
1085            LLVMValueRef input_ptr2;
1086            LLVMValueRef res2;
1087
1088            lindex1 = lp_build_const_int32(gallivm,
1089                                           reg->Register.Index * 4 + (swizzle_in >> 16));
1090            input_ptr2 = LLVMBuildGEP(builder,
1091                                      bld->inputs_array, &lindex1, 1, "");
1092            res2 = LLVMBuildLoad(builder, input_ptr2, "");
1093            res = emit_fetch_64bit(bld_base, stype, res, res2);
1094         }
1095      }
1096      else {
1097         res = bld->inputs[reg->Register.Index][swizzle];
1098         if (tgsi_type_is_64bit(stype))
1099            res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1100      }
1101   }
1102
1103   assert(res);
1104
1105   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1106      struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1107      res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1108   }
1109
1110   return res;
1111}
1112
1113
1114static LLVMValueRef
1115emit_fetch_gs_input(
1116   struct lp_build_tgsi_context * bld_base,
1117   const struct tgsi_full_src_register * reg,
1118   enum tgsi_opcode_type stype,
1119   unsigned swizzle_in)
1120{
1121   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1122   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1123   const struct tgsi_shader_info *info = bld->bld_base.info;
1124   LLVMBuilderRef builder = gallivm->builder;
1125   LLVMValueRef attrib_index = NULL;
1126   LLVMValueRef vertex_index = NULL;
1127   unsigned swizzle = swizzle_in & 0xffff;
1128   LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1129   LLVMValueRef res;
1130
1131   if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1132      /* This is really a system value not a regular input */
1133      assert(!reg->Register.Indirect);
1134      assert(!reg->Dimension.Indirect);
1135      res = bld->system_values.prim_id;
1136      if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1137         res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1138      }
1139      return res;
1140   }
1141
1142   if (reg->Register.Indirect) {
1143      /*
1144       * XXX: this is possibly not quite the right value, since file_max may be
1145       * larger than the max attrib index, due to it being the max of declared
1146       * inputs AND the max vertices per prim (which is 6 for tri adj).
1147       * It should however be safe to use (since we always allocate
1148       * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1149       */
1150      int index_limit = info->file_max[reg->Register.File];
1151      attrib_index = get_indirect_index(bld,
1152                                        reg->Register.File,
1153                                        reg->Register.Index,
1154                                        &reg->Indirect,
1155                                        index_limit);
1156   } else {
1157      attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1158   }
1159
1160   if (reg->Dimension.Indirect) {
1161      /*
1162       * A fixed 6 should do as well (which is what we allocate).
1163       */
1164      int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1165      vertex_index = get_indirect_index(bld,
1166                                        reg->Register.File,
1167                                        reg->Dimension.Index,
1168                                        &reg->DimIndirect,
1169                                        index_limit);
1170   } else {
1171      vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1172   }
1173
1174   res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1175                                    reg->Dimension.Indirect,
1176                                    vertex_index,
1177                                    reg->Register.Indirect,
1178                                    attrib_index,
1179                                    swizzle_index);
1180
1181   assert(res);
1182   if (tgsi_type_is_64bit(stype)) {
1183      LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1184      LLVMValueRef res2;
1185      res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1186                                        reg->Dimension.Indirect,
1187                                        vertex_index,
1188                                        reg->Register.Indirect,
1189                                        attrib_index,
1190                                        swizzle_index);
1191      assert(res2);
1192      res = emit_fetch_64bit(bld_base, stype, res, res2);
1193   } else if (stype == TGSI_TYPE_UNSIGNED) {
1194      res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1195   } else if (stype == TGSI_TYPE_SIGNED) {
1196      res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1197   }
1198
1199   return res;
1200}
1201
1202static LLVMValueRef
1203emit_fetch_tcs_input(
1204   struct lp_build_tgsi_context * bld_base,
1205   const struct tgsi_full_src_register * reg,
1206   enum tgsi_opcode_type stype,
1207   unsigned swizzle_in)
1208{
1209   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1210   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1211   const struct tgsi_shader_info *info = bld->bld_base.info;
1212   LLVMBuilderRef builder = gallivm->builder;
1213   LLVMValueRef attrib_index = NULL;
1214   LLVMValueRef vertex_index = NULL;
1215   unsigned swizzle = swizzle_in & 0xffff;
1216   LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1217   LLVMValueRef res;
1218
1219   if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1220      /* This is really a system value not a regular input */
1221      assert(!reg->Register.Indirect);
1222      assert(!reg->Dimension.Indirect);
1223      res = bld->system_values.prim_id;
1224      if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1225         res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1226      }
1227      return res;
1228   }
1229
1230   if (reg->Register.Indirect) {
1231      int index_limit = info->file_max[reg->Register.File];
1232      attrib_index = get_indirect_index(bld,
1233                                        reg->Register.File,
1234                                        reg->Register.Index,
1235                                        &reg->Indirect,
1236                                        index_limit);
1237   } else {
1238      attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1239   }
1240
1241   if (reg->Dimension.Indirect) {
1242      vertex_index = get_indirect_index(bld,
1243                                        reg->Register.File,
1244                                        reg->Dimension.Index,
1245                                        &reg->DimIndirect,
1246                                        PIPE_MAX_SHADER_INPUTS);
1247   } else {
1248      vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1249   }
1250
1251   // TCS can read from its own outputs
1252   if (reg->Register.File == TGSI_FILE_OUTPUT) {
1253      res = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1254                                              reg->Dimension.Indirect,
1255                                              vertex_index,
1256                                              reg->Register.Indirect,
1257                                              attrib_index,
1258                                              FALSE,
1259                                              swizzle_index,
1260                                              bld_base->info->output_semantic_name[reg->Register.Index]);
1261   } else {
1262      res = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1263                                             reg->Dimension.Indirect,
1264                                             vertex_index,
1265                                             reg->Register.Indirect,
1266                                             attrib_index,
1267                                             FALSE,
1268                                             swizzle_index);
1269   }
1270
1271
1272   assert(res);
1273   if (tgsi_type_is_64bit(stype)) {
1274      LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1275      LLVMValueRef res2;
1276      if (reg->Register.File == TGSI_FILE_OUTPUT) {
1277         res2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1278                                                  reg->Dimension.Indirect,
1279                                                  vertex_index,
1280                                                  reg->Register.Indirect,
1281                                                  attrib_index,
1282                                                  FALSE,
1283                                                  swizzle_index,
1284                                                  bld_base->info->output_semantic_name[reg->Register.Index]);
1285      } else {
1286         res2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1287                                                 reg->Dimension.Indirect,
1288                                                 vertex_index,
1289                                                 reg->Register.Indirect,
1290                                                 attrib_index,
1291                                                 FALSE,
1292                                                 swizzle_index);
1293      }
1294      assert(res2);
1295      res = emit_fetch_64bit(bld_base, stype, res, res2);
1296   } else if (stype == TGSI_TYPE_UNSIGNED) {
1297      res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1298   } else if (stype == TGSI_TYPE_SIGNED) {
1299      res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1300   }
1301
1302   return res;
1303}
1304
1305static LLVMValueRef
1306emit_fetch_tes_input(
1307   struct lp_build_tgsi_context * bld_base,
1308   const struct tgsi_full_src_register * reg,
1309   enum tgsi_opcode_type stype,
1310   unsigned swizzle_in)
1311{
1312   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1313   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1314   const struct tgsi_shader_info *info = bld->bld_base.info;
1315   LLVMBuilderRef builder = gallivm->builder;
1316   LLVMValueRef attrib_index = NULL;
1317   LLVMValueRef vertex_index = NULL;
1318   unsigned swizzle = swizzle_in & 0xffff;
1319   LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1320   LLVMValueRef res;
1321
1322   if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1323      /* This is really a system value not a regular input */
1324      assert(!reg->Register.Indirect);
1325      assert(!reg->Dimension.Indirect);
1326      res = bld->system_values.prim_id;
1327      if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1328         res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1329      }
1330      return res;
1331   }
1332
1333   if (reg->Register.Indirect) {
1334      int index_limit = info->file_max[reg->Register.File];
1335      attrib_index = get_indirect_index(bld,
1336                                        reg->Register.File,
1337                                        reg->Register.Index,
1338                                        &reg->Indirect,
1339                                        index_limit);
1340   } else {
1341      attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1342   }
1343
1344   if (reg->Dimension.Indirect) {
1345      vertex_index = get_indirect_index(bld,
1346                                        reg->Register.File,
1347                                        reg->Dimension.Index,
1348                                        &reg->DimIndirect,
1349                                        PIPE_MAX_SHADER_INPUTS);
1350   } else {
1351      vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1352   }
1353
1354   if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1355      res = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1356                                     reg->Register.Indirect,
1357                                     attrib_index,
1358                                     swizzle_index);
1359   } else {
1360      res = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1361                                       reg->Dimension.Indirect,
1362                                       vertex_index,
1363                                       reg->Register.Indirect,
1364                                       attrib_index,
1365                                       FALSE,
1366                                       swizzle_index);
1367   }
1368
1369   assert(res);
1370   if (tgsi_type_is_64bit(stype)) {
1371      LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1372      LLVMValueRef res2;
1373      if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1374         res2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1375                                    reg->Register.Indirect,
1376                                    attrib_index,
1377                                    swizzle_index);
1378      }
1379      else {
1380         res2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1381                                             reg->Dimension.Indirect,
1382                                             vertex_index,
1383                                             reg->Register.Indirect,
1384                                             attrib_index,
1385                                             FALSE,
1386                                             swizzle_index);
1387      }
1388      assert(res2);
1389      res = emit_fetch_64bit(bld_base, stype, res, res2);
1390   } else if (stype == TGSI_TYPE_UNSIGNED) {
1391      res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1392   } else if (stype == TGSI_TYPE_SIGNED) {
1393      res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1394   }
1395
1396   return res;
1397}
1398
1399
1400
1401static LLVMValueRef
1402emit_fetch_temporary(
1403   struct lp_build_tgsi_context * bld_base,
1404   const struct tgsi_full_src_register * reg,
1405   enum tgsi_opcode_type stype,
1406   unsigned swizzle_in)
1407{
1408   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1409   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1410   LLVMBuilderRef builder = gallivm->builder;
1411   LLVMValueRef res;
1412   unsigned swizzle = swizzle_in & 0xffff;
1413
1414   if (reg->Register.Indirect) {
1415      LLVMValueRef indirect_index;
1416      LLVMValueRef index_vec, index_vec2 = NULL;  /* index into the temp reg array */
1417      LLVMValueRef temps_array;
1418      LLVMTypeRef fptr_type;
1419
1420      indirect_index = get_indirect_index(bld,
1421                                          reg->Register.File,
1422                                          reg->Register.Index,
1423                                          &reg->Indirect,
1424                                          bld->bld_base.info->file_max[reg->Register.File]);
1425
1426      index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1427                                        indirect_index,
1428                                        swizzle,
1429                                        TRUE);
1430      if (tgsi_type_is_64bit(stype)) {
1431               index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1432                                                  indirect_index,
1433                                                  swizzle_in >> 16,
1434                                                  TRUE);
1435      }
1436
1437      /* cast temps_array pointer to float* */
1438      fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1439      temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1440
1441      /* Gather values from the temporary register array */
1442      res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1443   }
1444   else {
1445      LLVMValueRef temp_ptr;
1446      temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1447      res = LLVMBuildLoad(builder, temp_ptr, "");
1448
1449      if (tgsi_type_is_64bit(stype)) {
1450         LLVMValueRef temp_ptr2, res2;
1451
1452         temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1453         res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1454         res = emit_fetch_64bit(bld_base, stype, res, res2);
1455      }
1456   }
1457
1458   if (stype == TGSI_TYPE_SIGNED ||
1459       stype == TGSI_TYPE_UNSIGNED ||
1460       stype == TGSI_TYPE_DOUBLE ||
1461       stype == TGSI_TYPE_SIGNED64 ||
1462       stype == TGSI_TYPE_UNSIGNED64) {
1463      struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1464      res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1465   }
1466
1467   return res;
1468}
1469
1470static LLVMValueRef
1471emit_fetch_system_value(
1472   struct lp_build_tgsi_context * bld_base,
1473   const struct tgsi_full_src_register * reg,
1474   enum tgsi_opcode_type stype,
1475   unsigned swizzle_in)
1476{
1477   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1478   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1479   const struct tgsi_shader_info *info = bld->bld_base.info;
1480   LLVMBuilderRef builder = gallivm->builder;
1481   LLVMValueRef res;
1482   enum tgsi_opcode_type atype; // Actual type of the value
1483   unsigned swizzle = swizzle_in & 0xffff;
1484
1485   assert(!reg->Register.Indirect);
1486
1487   switch (info->system_value_semantic_name[reg->Register.Index]) {
1488   case TGSI_SEMANTIC_INSTANCEID:
1489      res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1490      atype = TGSI_TYPE_UNSIGNED;
1491      break;
1492
1493   case TGSI_SEMANTIC_VERTEXID:
1494      res = bld->system_values.vertex_id;
1495      atype = TGSI_TYPE_UNSIGNED;
1496      break;
1497
1498   case TGSI_SEMANTIC_VERTEXID_NOBASE:
1499      res = bld->system_values.vertex_id_nobase;
1500      atype = TGSI_TYPE_UNSIGNED;
1501      break;
1502
1503   case TGSI_SEMANTIC_BASEVERTEX:
1504      res = bld->system_values.basevertex;
1505      atype = TGSI_TYPE_UNSIGNED;
1506      break;
1507
1508   case TGSI_SEMANTIC_BASEINSTANCE:
1509      res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1510      atype = TGSI_TYPE_UNSIGNED;
1511      break;
1512
1513   case TGSI_SEMANTIC_PRIMID:
1514      res = bld->system_values.prim_id;
1515      atype = TGSI_TYPE_UNSIGNED;
1516      break;
1517
1518   case TGSI_SEMANTIC_INVOCATIONID:
1519      if (info->processor == PIPE_SHADER_TESS_CTRL)
1520         res = bld->system_values.invocation_id;
1521      else
1522         res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1523      atype = TGSI_TYPE_UNSIGNED;
1524      break;
1525
1526   case TGSI_SEMANTIC_HELPER_INVOCATION:
1527      res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1528      atype = TGSI_TYPE_UNSIGNED;
1529      break;
1530
1531   case TGSI_SEMANTIC_THREAD_ID:
1532      res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, "");
1533      atype = TGSI_TYPE_UNSIGNED;
1534      break;
1535
1536   case TGSI_SEMANTIC_BLOCK_ID:
1537      res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle));
1538      atype = TGSI_TYPE_UNSIGNED;
1539      break;
1540
1541   case TGSI_SEMANTIC_GRID_SIZE:
1542      res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle));
1543      atype = TGSI_TYPE_UNSIGNED;
1544      break;
1545
1546   case TGSI_SEMANTIC_TESSCOORD:
1547      {
1548         LLVMValueRef index[] = { lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, swizzle_in) };
1549         LLVMValueRef array_indexed = LLVMBuildGEP(gallivm->builder, bld->system_values.tess_coord, index, 2, "tess_coord_array_indexed");
1550         res = LLVMBuildLoad(builder, array_indexed, "tess_coord");
1551      }
1552      atype = TGSI_TYPE_FLOAT;
1553      break;
1554
1555   case TGSI_SEMANTIC_FACE:
1556      res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1557      atype = TGSI_TYPE_UNSIGNED;
1558      break;
1559
1560  case TGSI_SEMANTIC_DRAWID:
1561      res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1562      atype = TGSI_TYPE_UNSIGNED;
1563      break;
1564
1565   case TGSI_SEMANTIC_TESSOUTER:
1566      res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1567                                       bld->system_values.tess_outer,
1568                                       lp_build_const_int32(gallivm, swizzle_in));
1569      atype = TGSI_TYPE_FLOAT;
1570      break;
1571
1572   case TGSI_SEMANTIC_TESSINNER:
1573      res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1574                                       bld->system_values.tess_inner,
1575                                       lp_build_const_int32(gallivm, swizzle_in));
1576      atype = TGSI_TYPE_FLOAT;
1577      break;
1578
1579   case TGSI_SEMANTIC_VERTICESIN:
1580      res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.vertices_in);
1581      atype = TGSI_TYPE_UNSIGNED;
1582      break;
1583
1584   default:
1585      assert(!"unexpected semantic in emit_fetch_system_value");
1586      res = bld_base->base.zero;
1587      atype = TGSI_TYPE_FLOAT;
1588      break;
1589   }
1590
1591   if (atype != stype) {
1592      if (stype == TGSI_TYPE_FLOAT) {
1593         res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1594      } else if (stype == TGSI_TYPE_UNSIGNED) {
1595         res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1596      } else if (stype == TGSI_TYPE_SIGNED) {
1597         res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1598      }
1599   }
1600
1601   return res;
1602}
1603
1604/**
1605 * Register fetch with derivatives.
1606 */
1607static void
1608emit_fetch_deriv(
1609   struct lp_build_tgsi_soa_context *bld,
1610   LLVMValueRef src,
1611   LLVMValueRef *res,
1612   LLVMValueRef *ddx,
1613   LLVMValueRef *ddy)
1614{
1615   if (res)
1616      *res = src;
1617
1618   /* TODO: use interpolation coeffs for inputs */
1619
1620   if (ddx)
1621      *ddx = lp_build_ddx(&bld->bld_base.base, src);
1622
1623   if (ddy)
1624      *ddy = lp_build_ddy(&bld->bld_base.base, src);
1625}
1626
1627/**
1628 * store an array of vec-length 64-bit into two arrays of vec_length floats
1629 * i.e.
1630 * value is d0, d1, d2, d3 etc.
1631 * each 64-bit has high and low pieces x, y
1632 * so gets stored into the separate channels as:
1633 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1634 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1635 */
1636static void
1637emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1638                      LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1639                      LLVMValueRef value)
1640{
1641   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1642   struct gallivm_state *gallivm = bld_base->base.gallivm;
1643   LLVMBuilderRef builder = gallivm->builder;
1644   struct lp_build_context *float_bld = &bld_base->base;
1645   unsigned i;
1646   LLVMValueRef temp, temp2;
1647   LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1648   LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1649
1650   for (i = 0; i < bld_base->base.type.length; i++) {
1651      shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1652      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1653   }
1654
1655   temp = LLVMBuildShuffleVector(builder, value,
1656                                 LLVMGetUndef(LLVMTypeOf(value)),
1657                                 LLVMConstVector(shuffles,
1658                                                 bld_base->base.type.length),
1659                                 "");
1660   temp2 = LLVMBuildShuffleVector(builder, value,
1661                                  LLVMGetUndef(LLVMTypeOf(value)),
1662                                  LLVMConstVector(shuffles2,
1663                                                  bld_base->base.type.length),
1664                                  "");
1665
1666   lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1667   lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1668}
1669
1670static void
1671emit_store_output(struct lp_build_tgsi_context *bld_base,
1672                  enum tgsi_opcode_type dtype,
1673                  const struct tgsi_full_dst_register *reg,
1674                  unsigned index,
1675                  unsigned chan_index,
1676                  LLVMValueRef indirect_index,
1677                  LLVMValueRef value)
1678{
1679   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1680   struct gallivm_state *gallivm = bld_base->base.gallivm;
1681   LLVMBuilderRef builder = gallivm->builder;
1682   struct lp_build_context *float_bld = &bld_base->base;
1683
1684   /* Outputs are always stored as floats */
1685   value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1686
1687   if (reg->Register.Indirect) {
1688      LLVMValueRef index_vec;  /* indexes into the output registers */
1689      LLVMValueRef outputs_array;
1690      LLVMTypeRef fptr_type;
1691
1692      index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1693                                          indirect_index,
1694                                          chan_index,
1695                                          TRUE);
1696
1697      fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1698      outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1699
1700      /* Scatter store values into output registers */
1701      emit_mask_scatter(bld, outputs_array, index_vec, value,
1702                        &bld->exec_mask);
1703   }
1704   else {
1705      assert(LLVMTypeOf(value) == float_bld->vec_type);
1706      LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1707                                                chan_index);
1708
1709      if (tgsi_type_is_64bit(dtype)) {
1710         LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1711                                                   chan_index + 1);
1712         emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1713                                 value);
1714      } else
1715         lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1716   }
1717}
1718
1719static void
1720emit_store_tcs_output(struct lp_build_tgsi_context *bld_base,
1721                      enum tgsi_opcode_type dtype,
1722                      const struct tgsi_full_dst_register *reg,
1723                      unsigned index,
1724                      unsigned chan_index,
1725                      LLVMValueRef indirect_index,
1726                      LLVMValueRef value)
1727{
1728   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1729   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1730   const struct tgsi_shader_info *info = bld->bld_base.info;
1731   LLVMValueRef attrib_index = NULL;
1732   LLVMValueRef vertex_index = NULL;
1733   LLVMValueRef channel_index = NULL;
1734
1735   if (reg->Register.Indirect) {
1736      /*
1737       * XXX: this is possibly not quite the right value, since file_max may be
1738       * larger than the max attrib index, due to it being the max of declared
1739       * inputs AND the max vertices per prim (which is 6 for tri adj).
1740       * It should however be safe to use (since we always allocate
1741       * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1742       */
1743      int index_limit = info->file_max[reg->Register.File];
1744      attrib_index = get_indirect_index(bld,
1745                                        reg->Register.File,
1746                                        reg->Register.Index,
1747                                        &reg->Indirect,
1748                                        index_limit);
1749   } else {
1750      attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1751   }
1752
1753   if (reg->Dimension.Indirect) {
1754      vertex_index = get_indirect_index(bld,
1755                                        reg->Register.File,
1756                                        reg->Dimension.Index,
1757                                        &reg->DimIndirect,
1758                                        PIPE_MAX_SHADER_OUTPUTS);
1759   } else {
1760      vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1761   }
1762
1763   channel_index = lp_build_const_int32(gallivm, chan_index);
1764
1765   assert(bld->tcs_iface->emit_store_output);
1766   bld->tcs_iface->emit_store_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1767                                          bld_base->info->output_semantic_name[reg->Register.Index],
1768                                          reg->Dimension.Indirect,
1769                                          vertex_index,
1770                                          reg->Register.Indirect,
1771                                          attrib_index,
1772                                          false,
1773                                          channel_index,
1774                                          value,
1775                                          mask_vec(bld_base));
1776}
1777
1778static void
1779emit_store_temp(struct lp_build_tgsi_context *bld_base,
1780                  enum tgsi_opcode_type dtype,
1781                  const struct tgsi_full_dst_register *reg,
1782                  unsigned index,
1783                  unsigned chan_index,
1784                  LLVMValueRef indirect_index,
1785                  LLVMValueRef value)
1786{
1787   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1788   struct gallivm_state *gallivm = bld_base->base.gallivm;
1789   LLVMBuilderRef builder = gallivm->builder;
1790   struct lp_build_context *float_bld = &bld_base->base;
1791
1792   /* Temporaries are always stored as floats */
1793   if (!tgsi_type_is_64bit(dtype))
1794      value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1795   else
1796      value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1797
1798   if (reg->Register.Indirect) {
1799      LLVMValueRef index_vec;  /* indexes into the temp registers */
1800      LLVMValueRef temps_array;
1801      LLVMTypeRef fptr_type;
1802
1803      index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1804                                          indirect_index,
1805                                          chan_index,
1806                                          TRUE);
1807
1808      fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1809      temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1810
1811      /* Scatter store values into temp registers */
1812      emit_mask_scatter(bld, temps_array, index_vec, value,
1813                        &bld->exec_mask);
1814   }
1815   else {
1816      LLVMValueRef temp_ptr;
1817      temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1818
1819      if (tgsi_type_is_64bit(dtype)) {
1820         LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1821                                                      reg->Register.Index,
1822                                                      chan_index + 1);
1823         emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1824                                 value);
1825      }
1826      else
1827         lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1828   }
1829}
1830
1831static void
1832emit_store_address(struct lp_build_tgsi_context *bld_base,
1833                   enum tgsi_opcode_type dtype,
1834                   const struct tgsi_full_dst_register *reg,
1835                   unsigned index,
1836                   unsigned chan_index,
1837                   LLVMValueRef indirect_index,
1838                   LLVMValueRef value)
1839{
1840   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1841   struct gallivm_state *gallivm = bld_base->base.gallivm;
1842   LLVMBuilderRef builder = gallivm->builder;
1843   struct lp_build_context *int_bld = &bld_base->int_bld;
1844
1845   assert(dtype == TGSI_TYPE_SIGNED);
1846   assert(LLVMTypeOf(value) == int_bld->vec_type);
1847   value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1848   lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1849                        bld->addr[reg->Register.Index][chan_index]);
1850}
1851
1852/**
1853 * Register store.
1854 */
1855static void
1856emit_store_chan(
1857   struct lp_build_tgsi_context *bld_base,
1858   const struct tgsi_full_instruction *inst,
1859   unsigned index,
1860   unsigned chan_index,
1861   LLVMValueRef value)
1862{
1863   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1864   struct gallivm_state *gallivm = bld_base->base.gallivm;
1865   LLVMBuilderRef builder = gallivm->builder;
1866   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1867   struct lp_build_context *float_bld = &bld_base->base;
1868   LLVMValueRef indirect_index = NULL;
1869   enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1870
1871   /*
1872    * Apply saturation.
1873    *
1874    * It is always assumed to be float.
1875    */
1876   if (inst->Instruction.Saturate) {
1877      assert(dtype == TGSI_TYPE_FLOAT ||
1878             dtype == TGSI_TYPE_UNTYPED);
1879      value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1880      value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1881   }
1882
1883   if (reg->Register.Indirect) {
1884      /*
1885       * Currently the mesa/st doesn't generate indirect stores
1886       * to 64-bit values, it normally uses MOV to do indirect stores.
1887       */
1888      assert(!tgsi_type_is_64bit(dtype));
1889      indirect_index = get_indirect_index(bld,
1890                                          reg->Register.File,
1891                                          reg->Register.Index,
1892                                          &reg->Indirect,
1893                                          bld->bld_base.info->file_max[reg->Register.File]);
1894   } else {
1895      assert(reg->Register.Index <=
1896                             bld_base->info->file_max[reg->Register.File]);
1897   }
1898
1899   if (DEBUG_EXECUTION) {
1900      emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1901   }
1902
1903   assert(bld_base->emit_store_reg_funcs[reg->Register.File]);
1904   bld_base->emit_store_reg_funcs[reg->Register.File](bld_base,
1905                                                      dtype,
1906                                                      reg,
1907                                                      index,
1908                                                      chan_index,
1909                                                      indirect_index,
1910                                                      value);
1911
1912   (void)dtype;
1913}
1914
1915/*
1916 * Called at the beginning of the translation of each TGSI instruction, to
1917 * emit some debug code.
1918 */
1919static void
1920emit_debug(
1921   struct lp_build_tgsi_context * bld_base,
1922   const struct tgsi_full_instruction * inst,
1923   const struct tgsi_opcode_info * info)
1924
1925{
1926   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1927
1928   if (DEBUG_EXECUTION) {
1929      /*
1930       * Dump the TGSI instruction.
1931       */
1932
1933      struct gallivm_state *gallivm = bld_base->base.gallivm;
1934      char buf[512];
1935      buf[0] = '$';
1936      buf[1] = ' ';
1937      tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1938      lp_build_printf(gallivm, buf);
1939
1940      /* Dump the execution mask.
1941       */
1942      if (bld->exec_mask.has_mask) {
1943         lp_build_print_value(gallivm, "    mask = ", bld->exec_mask.exec_mask);
1944      }
1945   }
1946}
1947
1948static void
1949emit_store(
1950   struct lp_build_tgsi_context * bld_base,
1951   const struct tgsi_full_instruction * inst,
1952   const struct tgsi_opcode_info * info,
1953   unsigned index,
1954   LLVMValueRef dst[4])
1955
1956{
1957   enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1958
1959   unsigned writemask = inst->Dst[index].Register.WriteMask;
1960   while (writemask) {
1961      unsigned chan_index = u_bit_scan(&writemask);
1962      if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1963          continue;
1964      emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1965   }
1966}
1967
1968static unsigned
1969tgsi_to_pipe_tex_target(unsigned tgsi_target)
1970{
1971   switch (tgsi_target) {
1972   case TGSI_TEXTURE_BUFFER:
1973      return PIPE_BUFFER;
1974   case TGSI_TEXTURE_1D:
1975   case TGSI_TEXTURE_SHADOW1D:
1976      return PIPE_TEXTURE_1D;
1977   case TGSI_TEXTURE_2D:
1978   case TGSI_TEXTURE_SHADOW2D:
1979   case TGSI_TEXTURE_2D_MSAA:
1980      return PIPE_TEXTURE_2D;
1981   case TGSI_TEXTURE_3D:
1982      return PIPE_TEXTURE_3D;
1983   case TGSI_TEXTURE_CUBE:
1984   case TGSI_TEXTURE_SHADOWCUBE:
1985      return PIPE_TEXTURE_CUBE;
1986   case TGSI_TEXTURE_RECT:
1987   case TGSI_TEXTURE_SHADOWRECT:
1988      return PIPE_TEXTURE_RECT;
1989   case TGSI_TEXTURE_1D_ARRAY:
1990   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1991      return PIPE_TEXTURE_1D_ARRAY;
1992   case TGSI_TEXTURE_2D_ARRAY:
1993   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1994   case TGSI_TEXTURE_2D_ARRAY_MSAA:
1995      return PIPE_TEXTURE_2D_ARRAY;
1996   case TGSI_TEXTURE_CUBE_ARRAY:
1997   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1998      return PIPE_TEXTURE_CUBE_ARRAY;
1999   default:
2000      assert(0);
2001      return PIPE_BUFFER;
2002   }
2003}
2004
2005
2006static enum lp_sampler_lod_property
2007lp_build_lod_property(
2008   struct lp_build_tgsi_context *bld_base,
2009   const struct tgsi_full_instruction *inst,
2010   unsigned src_op)
2011{
2012   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2013   enum lp_sampler_lod_property lod_property;
2014
2015   /*
2016    * Not much we can do here. We could try catching inputs declared
2017    * with constant interpolation but not sure it's worth it - since for
2018    * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2019    * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2020    * like the constant/immediate recognition below.
2021    * What seems to be of more value would be to recognize temps holding
2022    * broadcasted scalars but no way we can do it.
2023    * Tried asking llvm but without any success (using LLVMIsConstant
2024    * even though this isn't exactly what we'd need), even as simple as
2025    * IMM[0] UINT32 (0,-1,0,0)
2026    * MOV TEMP[0] IMM[0].yyyy
2027    * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2028    * doesn't work.
2029    * This means there's ZERO chance this will ever catch a scalar lod
2030    * with traditional tex opcodes as well as texel fetches, since the lod
2031    * comes from the same reg as coords (except some test shaders using
2032    * constant coords maybe).
2033    * There's at least hope for sample opcodes as well as size queries.
2034    */
2035   if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ ||
2036       reg->Register.File == TGSI_FILE_CONSTANT ||
2037       reg->Register.File == TGSI_FILE_IMMEDIATE) {
2038      lod_property = LP_SAMPLER_LOD_SCALAR;
2039   }
2040   else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2041      if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2042         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2043      }
2044      else {
2045         lod_property = LP_SAMPLER_LOD_PER_QUAD;
2046      }
2047   }
2048   else {
2049      /* never use scalar (per-quad) lod the results are just too wrong. */
2050      lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2051   }
2052   return lod_property;
2053}
2054
2055
2056/**
2057 * High-level instruction translators.
2058 */
2059
2060static void
2061emit_tex( struct lp_build_tgsi_soa_context *bld,
2062          const struct tgsi_full_instruction *inst,
2063          enum lp_build_tex_modifier modifier,
2064          LLVMValueRef *texel,
2065          unsigned sampler_reg,
2066          enum lp_sampler_op_type sampler_op)
2067{
2068   unsigned unit = inst->Src[sampler_reg].Register.Index;
2069   LLVMValueRef oow = NULL;
2070   LLVMValueRef lod = NULL;
2071   LLVMValueRef coords[5];
2072   LLVMValueRef offsets[3] = { NULL };
2073   struct lp_derivatives derivs;
2074   struct lp_sampler_params params;
2075   enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2076   unsigned num_derivs, num_offsets, i;
2077   unsigned shadow_coord = 0;
2078   unsigned layer_coord = 0;
2079   unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2080
2081   memset(&params, 0, sizeof(params));
2082
2083   if (!bld->sampler) {
2084      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2085      for (i = 0; i < 4; i++) {
2086         texel[i] = bld->bld_base.base.undef;
2087      }
2088      return;
2089   }
2090
2091   switch (inst->Texture.Texture) {
2092   case TGSI_TEXTURE_1D_ARRAY:
2093      layer_coord = 1;
2094      FALLTHROUGH;
2095   case TGSI_TEXTURE_1D:
2096      num_offsets = 1;
2097      num_derivs = 1;
2098      break;
2099   case TGSI_TEXTURE_2D_ARRAY:
2100      layer_coord = 2;
2101      FALLTHROUGH;
2102   case TGSI_TEXTURE_2D:
2103   case TGSI_TEXTURE_RECT:
2104      num_offsets = 2;
2105      num_derivs = 2;
2106      break;
2107   case TGSI_TEXTURE_SHADOW1D_ARRAY:
2108      layer_coord = 1;
2109      FALLTHROUGH;
2110   case TGSI_TEXTURE_SHADOW1D:
2111      shadow_coord = 2;
2112      num_offsets = 1;
2113      num_derivs = 1;
2114      break;
2115   case TGSI_TEXTURE_SHADOW2D_ARRAY:
2116      layer_coord = 2;
2117      shadow_coord = 3;
2118      num_offsets = 2;
2119      num_derivs = 2;
2120      break;
2121   case TGSI_TEXTURE_SHADOW2D:
2122   case TGSI_TEXTURE_SHADOWRECT:
2123      shadow_coord = 2;
2124      num_offsets = 2;
2125      num_derivs = 2;
2126      break;
2127   case TGSI_TEXTURE_CUBE:
2128      num_offsets = 2;
2129      num_derivs = 3;
2130      break;
2131   case TGSI_TEXTURE_3D:
2132      num_offsets = 3;
2133      num_derivs = 3;
2134      break;
2135   case TGSI_TEXTURE_SHADOWCUBE:
2136      shadow_coord = 3;
2137      num_offsets = 2;
2138      num_derivs = 3;
2139      break;
2140   case TGSI_TEXTURE_CUBE_ARRAY:
2141      num_offsets = 2;
2142      num_derivs = 3;
2143      layer_coord = 3;
2144      break;
2145   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2146      num_offsets = 2;
2147      num_derivs = 3;
2148      layer_coord = 3;
2149      shadow_coord = 4; /* shadow coord special different reg */
2150      break;
2151   case TGSI_TEXTURE_2D_MSAA:
2152   case TGSI_TEXTURE_2D_ARRAY_MSAA:
2153   default:
2154      assert(0);
2155      return;
2156   }
2157
2158   /* Note lod and especially projected are illegal in a LOT of cases */
2159   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2160       modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2161      if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
2162         lod = bld->bld_base.base.zero;
2163      } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2164                 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2165         /* note that shadow cube array with bias/explicit lod does not exist */
2166         lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2167      }
2168      else {
2169         lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2170      }
2171      if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2172         sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2173      }
2174      else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2175         sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2176      }
2177      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2178   }
2179
2180   if (sampler_op == LP_SAMPLER_OP_GATHER) {
2181      uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
2182      sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
2183   }
2184   if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2185      oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2186      oow = lp_build_rcp(&bld->bld_base.base, oow);
2187   }
2188
2189   for (i = 0; i < num_derivs; i++) {
2190      coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2191      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2192         coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2193   }
2194   for (i = num_derivs; i < 5; i++) {
2195      coords[i] = bld->bld_base.base.undef;
2196   }
2197
2198   /* Layer coord always goes into 3rd slot, except for cube map arrays */
2199   if (layer_coord) {
2200      if (layer_coord == 3) {
2201         coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2202      }
2203      else {
2204         coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2205      }
2206      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2207         coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2208   }
2209   /* Shadow coord occupies always 5th slot. */
2210   if (shadow_coord) {
2211      sample_key |= LP_SAMPLER_SHADOW;
2212      if (shadow_coord == 4) {
2213         coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2214      }
2215      else {
2216         coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2217      }
2218      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2219         coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2220   }
2221
2222   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2223      unsigned dim;
2224      sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2225      for (dim = 0; dim < num_derivs; ++dim) {
2226         derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2227         derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2228      }
2229      params.derivs = &derivs;
2230      /*
2231       * could also check all src regs if constant but I doubt such
2232       * cases exist in practice.
2233       */
2234      if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2235         if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2236            lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2237         }
2238         else {
2239            lod_property = LP_SAMPLER_LOD_PER_QUAD;
2240         }
2241      }
2242      else {
2243         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2244      }
2245   }
2246   sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2247
2248   /* we don't handle the 4 offset version of tg4 */
2249   if (inst->Texture.NumOffsets == 1) {
2250      unsigned dim;
2251      sample_key |= LP_SAMPLER_OFFSETS;
2252      for (dim = 0; dim < num_offsets; dim++) {
2253         offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2254      }
2255   }
2256
2257   params.type = bld->bld_base.base.type;
2258   params.sample_key = sample_key;
2259   params.texture_index = unit;
2260   params.sampler_index = unit;
2261   params.context_ptr = bld->context_ptr;
2262   params.thread_data_ptr = bld->thread_data_ptr;
2263   params.coords = coords;
2264   params.offsets = offsets;
2265   params.lod = lod;
2266   params.texel = texel;
2267
2268   bld->sampler->emit_tex_sample(bld->sampler,
2269                                 bld->bld_base.base.gallivm,
2270                                 &params);
2271}
2272
2273static void
2274emit_sample(struct lp_build_tgsi_soa_context *bld,
2275            const struct tgsi_full_instruction *inst,
2276            enum lp_build_tex_modifier modifier,
2277            boolean compare,
2278            enum lp_sampler_op_type sample_type,
2279            LLVMValueRef *texel)
2280{
2281   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2282   unsigned texture_unit, sampler_unit;
2283   LLVMValueRef lod = NULL;
2284   LLVMValueRef coords[5];
2285   LLVMValueRef offsets[3] = { NULL };
2286   struct lp_derivatives derivs;
2287   struct lp_sampler_params params;
2288   enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2289
2290   unsigned num_offsets, num_derivs, i;
2291   unsigned layer_coord = 0;
2292   unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2293
2294   memset(&params, 0, sizeof(params));
2295
2296   if (!bld->sampler) {
2297      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2298      for (i = 0; i < 4; i++) {
2299         texel[i] = bld->bld_base.base.undef;
2300      }
2301      return;
2302   }
2303
2304   /*
2305    * unlike old-style tex opcodes the texture/sampler indices
2306    * always come from src1 and src2 respectively.
2307    */
2308   texture_unit = inst->Src[1].Register.Index;
2309   sampler_unit = inst->Src[2].Register.Index;
2310
2311   /*
2312    * Note inst->Texture.Texture will contain the number of offsets,
2313    * however the target information is NOT there and comes from the
2314    * declared sampler views instead.
2315    */
2316   switch (bld->sv[texture_unit].Resource) {
2317   case TGSI_TEXTURE_1D:
2318      num_offsets = 1;
2319      num_derivs = 1;
2320      break;
2321   case TGSI_TEXTURE_1D_ARRAY:
2322      layer_coord = 1;
2323      num_offsets = 1;
2324      num_derivs = 1;
2325      break;
2326   case TGSI_TEXTURE_2D:
2327   case TGSI_TEXTURE_RECT:
2328      num_offsets = 2;
2329      num_derivs = 2;
2330      break;
2331   case TGSI_TEXTURE_2D_ARRAY:
2332      layer_coord = 2;
2333      num_offsets = 2;
2334      num_derivs = 2;
2335      break;
2336   case TGSI_TEXTURE_CUBE:
2337      num_offsets = 2;
2338      num_derivs = 3;
2339      break;
2340   case TGSI_TEXTURE_3D:
2341      num_offsets = 3;
2342      num_derivs = 3;
2343      break;
2344   case TGSI_TEXTURE_CUBE_ARRAY:
2345      layer_coord = 3;
2346      num_offsets = 2;
2347      num_derivs = 3;
2348      break;
2349   default:
2350      assert(0);
2351      return;
2352   }
2353
2354   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2355       modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2356      lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2357      if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2358         sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2359      }
2360      else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2361         sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2362      }
2363      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2364   }
2365   else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2366      /* XXX might be better to explicitly pass the level zero information */
2367      sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2368      lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2369   }
2370
2371   for (i = 0; i < num_derivs; i++) {
2372      coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2373   }
2374   for (i = num_derivs; i < 5; i++) {
2375      coords[i] = bld->bld_base.base.undef;
2376   }
2377
2378   /* Layer coord always goes into 3rd slot, except for cube map arrays */
2379   if (layer_coord) {
2380      if (layer_coord == 3)
2381         coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2382      else
2383         coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2384   }
2385   /* Shadow coord occupies always 5th slot. */
2386   if (compare) {
2387      sample_key |= LP_SAMPLER_SHADOW;
2388      coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2389   }
2390
2391   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2392      unsigned dim;
2393      sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2394      for (dim = 0; dim < num_derivs; ++dim) {
2395         derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2396         derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2397      }
2398      params.derivs = &derivs;
2399      /*
2400       * could also check all src regs if constant but I doubt such
2401       * cases exist in practice.
2402       */
2403      if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2404         if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2405            lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2406         }
2407         else {
2408            lod_property = LP_SAMPLER_LOD_PER_QUAD;
2409         }
2410      }
2411      else {
2412         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2413      }
2414   }
2415
2416   /* some advanced gather instructions (txgo) would require 4 offsets */
2417   if (inst->Texture.NumOffsets == 1) {
2418      unsigned dim;
2419      sample_key |= LP_SAMPLER_OFFSETS;
2420      for (dim = 0; dim < num_offsets; dim++) {
2421         offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2422      }
2423   }
2424   sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2425
2426   params.type = bld->bld_base.base.type;
2427   params.sample_key = sample_key;
2428   params.texture_index = texture_unit;
2429   params.sampler_index = sampler_unit;
2430   params.context_ptr = bld->context_ptr;
2431   params.thread_data_ptr = bld->thread_data_ptr;
2432   params.coords = coords;
2433   params.offsets = offsets;
2434   params.lod = lod;
2435   params.texel = texel;
2436
2437   bld->sampler->emit_tex_sample(bld->sampler,
2438                                 bld->bld_base.base.gallivm,
2439                                 &params);
2440
2441   if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2442       inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2443       inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2444       inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2445      unsigned char swizzles[4];
2446      swizzles[0] = inst->Src[1].Register.SwizzleX;
2447      swizzles[1] = inst->Src[1].Register.SwizzleY;
2448      swizzles[2] = inst->Src[1].Register.SwizzleZ;
2449      swizzles[3] = inst->Src[1].Register.SwizzleW;
2450
2451      lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2452   }
2453}
2454
2455static void
2456emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2457                   const struct tgsi_full_instruction *inst,
2458                   LLVMValueRef *texel,
2459                   boolean is_samplei)
2460{
2461   unsigned unit, target;
2462   LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2463   LLVMValueRef explicit_lod = NULL;
2464   LLVMValueRef coords[5];
2465   LLVMValueRef offsets[3] = { NULL };
2466   LLVMValueRef ms_index = NULL;
2467   struct lp_sampler_params params;
2468   enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2469   unsigned dims, i;
2470   unsigned layer_coord = 0;
2471   unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2472
2473   memset(&params, 0, sizeof(params));
2474
2475   if (!bld->sampler) {
2476      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2477      for (i = 0; i < 4; i++) {
2478         texel[i] = coord_undef;
2479      }
2480      return;
2481   }
2482
2483   unit = inst->Src[1].Register.Index;
2484
2485   if (is_samplei) {
2486      target = bld->sv[unit].Resource;
2487   }
2488   else {
2489      target = inst->Texture.Texture;
2490   }
2491
2492   switch (target) {
2493   case TGSI_TEXTURE_1D:
2494   case TGSI_TEXTURE_BUFFER:
2495      dims = 1;
2496      break;
2497   case TGSI_TEXTURE_1D_ARRAY:
2498      layer_coord = 1;
2499      dims = 1;
2500      break;
2501   case TGSI_TEXTURE_2D:
2502   case TGSI_TEXTURE_RECT:
2503   case TGSI_TEXTURE_2D_MSAA:
2504      dims = 2;
2505      break;
2506   case TGSI_TEXTURE_2D_ARRAY:
2507   case TGSI_TEXTURE_2D_ARRAY_MSAA:
2508      layer_coord = 2;
2509      dims = 2;
2510      break;
2511   case TGSI_TEXTURE_3D:
2512      dims = 3;
2513      break;
2514   default:
2515      assert(0);
2516      return;
2517   }
2518
2519   /* always have lod except for buffers and msaa targets ? */
2520   if (target != TGSI_TEXTURE_BUFFER &&
2521       target != TGSI_TEXTURE_2D_MSAA &&
2522       target != TGSI_TEXTURE_2D_ARRAY_MSAA &&
2523       inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) {
2524      sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2525      explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2526      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2527   }
2528
2529   if (target == TGSI_TEXTURE_2D_MSAA ||
2530       target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
2531      sample_key |= LP_SAMPLER_FETCH_MS;
2532      ms_index = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2533   }
2534
2535   /*
2536    * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2537    * would be the sample index.
2538    */
2539
2540   for (i = 0; i < dims; i++) {
2541      coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2542   }
2543   /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2544   for (i = dims; i < 5; i++) {
2545      coords[i] = coord_undef;
2546   }
2547   if (layer_coord)
2548      coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2549
2550   if (inst->Texture.NumOffsets == 1) {
2551      unsigned dim;
2552      sample_key |= LP_SAMPLER_OFFSETS;
2553      for (dim = 0; dim < dims; dim++) {
2554         offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2555      }
2556   }
2557   sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2558
2559   params.type = bld->bld_base.base.type;
2560   params.sample_key = sample_key;
2561   params.texture_index = unit;
2562   /*
2563    * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2564    * and trigger some assertions with d3d10 where the sampler view number
2565    * can exceed this.
2566    */
2567   params.sampler_index = 0;
2568   params.context_ptr = bld->context_ptr;
2569   params.thread_data_ptr = bld->thread_data_ptr;
2570   params.coords = coords;
2571   params.offsets = offsets;
2572   params.derivs = NULL;
2573   params.lod = explicit_lod;
2574   params.texel = texel;
2575   params.ms_index = ms_index;
2576
2577   bld->sampler->emit_tex_sample(bld->sampler,
2578                                 bld->bld_base.base.gallivm,
2579                                 &params);
2580
2581   if (is_samplei &&
2582       (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2583        inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2584        inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2585        inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2586      unsigned char swizzles[4];
2587      swizzles[0] = inst->Src[1].Register.SwizzleX;
2588      swizzles[1] = inst->Src[1].Register.SwizzleY;
2589      swizzles[2] = inst->Src[1].Register.SwizzleZ;
2590      swizzles[3] = inst->Src[1].Register.SwizzleW;
2591
2592      lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2593   }
2594}
2595
2596static void
2597emit_size_query( struct lp_build_tgsi_soa_context *bld,
2598                 const struct tgsi_full_instruction *inst,
2599                 LLVMValueRef *sizes_out,
2600                 boolean is_sviewinfo)
2601{
2602   LLVMValueRef explicit_lod;
2603   enum lp_sampler_lod_property lod_property;
2604   unsigned has_lod;
2605   unsigned i;
2606   unsigned unit = inst->Src[1].Register.Index;
2607   unsigned target, pipe_target;
2608   struct lp_sampler_size_query_params params;
2609
2610   if (is_sviewinfo) {
2611      target = bld->sv[unit].Resource;
2612   }
2613   else {
2614      target = inst->Texture.Texture;
2615   }
2616   switch (target) {
2617   case TGSI_TEXTURE_BUFFER:
2618   case TGSI_TEXTURE_RECT:
2619   case TGSI_TEXTURE_SHADOWRECT:
2620      has_lod = 0;
2621      break;
2622   default:
2623      has_lod = 1;
2624      break;
2625   }
2626
2627   if (!bld->sampler) {
2628      _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2629      for (i = 0; i < 4; i++)
2630         sizes_out[i] = bld->bld_base.int_bld.undef;
2631      return;
2632   }
2633
2634   if (has_lod) {
2635      explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2636      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2637   }
2638   else {
2639      explicit_lod = NULL;
2640      lod_property = LP_SAMPLER_LOD_SCALAR;
2641   }
2642
2643
2644   pipe_target = tgsi_to_pipe_tex_target(target);
2645
2646   params.int_type = bld->bld_base.int_bld.type;
2647   params.texture_unit = unit;
2648   params.target = pipe_target;
2649   params.context_ptr = bld->context_ptr;
2650   params.is_sviewinfo = TRUE;
2651   params.lod_property = lod_property;
2652   params.explicit_lod = explicit_lod;
2653   params.sizes_out = sizes_out;
2654   params.samples_only = false;
2655
2656   bld->sampler->emit_size_query(bld->sampler,
2657                                 bld->bld_base.base.gallivm,
2658                                 &params);
2659}
2660
2661static boolean
2662near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2663                   int pc)
2664{
2665   unsigned i;
2666
2667   for (i = 0; i < 5; i++) {
2668      enum tgsi_opcode opcode;
2669
2670      if (pc + i >= bld->bld_base.info->num_instructions)
2671         return TRUE;
2672
2673      opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2674
2675      if (opcode == TGSI_OPCODE_END)
2676         return TRUE;
2677
2678      if (opcode == TGSI_OPCODE_TEX ||
2679         opcode == TGSI_OPCODE_TXP ||
2680         opcode == TGSI_OPCODE_TXD ||
2681         opcode == TGSI_OPCODE_TXB ||
2682         opcode == TGSI_OPCODE_TXL ||
2683         opcode == TGSI_OPCODE_TXF ||
2684         opcode == TGSI_OPCODE_TXQ ||
2685         opcode == TGSI_OPCODE_TEX2 ||
2686         opcode == TGSI_OPCODE_TXB2 ||
2687         opcode == TGSI_OPCODE_TXL2 ||
2688         opcode == TGSI_OPCODE_SAMPLE ||
2689         opcode == TGSI_OPCODE_SAMPLE_B ||
2690         opcode == TGSI_OPCODE_SAMPLE_C ||
2691         opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2692         opcode == TGSI_OPCODE_SAMPLE_D ||
2693         opcode == TGSI_OPCODE_SAMPLE_I ||
2694         opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2695         opcode == TGSI_OPCODE_SAMPLE_L ||
2696         opcode == TGSI_OPCODE_SVIEWINFO ||
2697         opcode == TGSI_OPCODE_CAL ||
2698         opcode == TGSI_OPCODE_IF ||
2699         opcode == TGSI_OPCODE_UIF ||
2700         opcode == TGSI_OPCODE_BGNLOOP ||
2701         opcode == TGSI_OPCODE_SWITCH)
2702         return FALSE;
2703   }
2704
2705   return TRUE;
2706}
2707
2708
2709
2710/**
2711 * Kill fragment if any of the src register values are negative.
2712 */
2713static void
2714emit_kill_if(
2715   struct lp_build_tgsi_soa_context *bld,
2716   const struct tgsi_full_instruction *inst,
2717   int pc)
2718{
2719   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2720   const struct tgsi_full_src_register *reg = &inst->Src[0];
2721   LLVMValueRef terms[TGSI_NUM_CHANNELS];
2722   LLVMValueRef mask;
2723   unsigned chan_index;
2724
2725   memset(&terms, 0, sizeof terms);
2726
2727   TGSI_FOR_EACH_CHANNEL( chan_index ) {
2728      unsigned swizzle;
2729
2730      /* Unswizzle channel */
2731      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2732
2733      /* Check if the component has not been already tested. */
2734      assert(swizzle < TGSI_NUM_CHANNELS);
2735      if( !terms[swizzle] )
2736         /* TODO: change the comparison operator instead of setting the sign */
2737         terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2738   }
2739
2740   mask = NULL;
2741   TGSI_FOR_EACH_CHANNEL( chan_index ) {
2742      if(terms[chan_index]) {
2743         LLVMValueRef chan_mask;
2744
2745         /*
2746          * If term < 0 then mask = 0 else mask = ~0.
2747          */
2748         chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2749
2750         if(mask)
2751            mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2752         else
2753            mask = chan_mask;
2754      }
2755   }
2756
2757   if (bld->exec_mask.has_mask) {
2758      LLVMValueRef invmask;
2759      invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2760      mask = LLVMBuildOr(builder, mask, invmask, "");
2761   }
2762
2763   lp_build_mask_update(bld->mask, mask);
2764   if (!near_end_of_shader(bld, pc))
2765      lp_build_mask_check(bld->mask);
2766}
2767
2768
2769/**
2770 * Unconditional fragment kill.
2771 * The only predication is the execution mask which will apply if
2772 * we're inside a loop or conditional.
2773 */
2774static void
2775emit_kill(struct lp_build_tgsi_soa_context *bld,
2776          int pc)
2777{
2778   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2779   LLVMValueRef mask;
2780
2781   /* For those channels which are "alive", disable fragment shader
2782    * execution.
2783    */
2784   if (bld->exec_mask.has_mask) {
2785      mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2786   }
2787   else {
2788      LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2789      mask = zero;
2790   }
2791
2792   lp_build_mask_update(bld->mask, mask);
2793
2794   if (!near_end_of_shader(bld, pc))
2795      lp_build_mask_check(bld->mask);
2796}
2797
2798
2799/**
2800 * Emit code which will dump the value of all the temporary registers
2801 * to stdout.
2802 */
2803static void
2804emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2805               unsigned file)
2806{
2807   const struct tgsi_shader_info *info = bld->bld_base.info;
2808   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2809   LLVMBuilderRef builder = gallivm->builder;
2810   LLVMValueRef reg_ptr;
2811   int index;
2812   int max_index = info->file_max[file];
2813
2814   /*
2815    * Some register files, particularly constants, can be very large,
2816    * and dumping everything could make this unusably slow.
2817    */
2818   max_index = MIN2(max_index, 32);
2819
2820   for (index = 0; index <= max_index; index++) {
2821      LLVMValueRef res;
2822      unsigned mask;
2823      int chan;
2824
2825      if (index < 8 * sizeof(unsigned) &&
2826          (info->file_mask[file] & (1u << index)) == 0)  {
2827         /* This was not declared.*/
2828         continue;
2829      }
2830
2831      if (file == TGSI_FILE_INPUT) {
2832         mask = info->input_usage_mask[index];
2833      } else {
2834         mask = TGSI_WRITEMASK_XYZW;
2835      }
2836
2837      for (chan = 0; chan < 4; chan++) {
2838         if ((mask & (1 << chan)) == 0) {
2839            /* This channel is not used.*/
2840            continue;
2841         }
2842
2843         if (file == TGSI_FILE_CONSTANT) {
2844            struct tgsi_full_src_register reg;
2845            memset(&reg, 0, sizeof reg);
2846            reg.Register.File = file;
2847            reg.Register.Index = index;
2848            reg.Register.SwizzleX = 0;
2849            reg.Register.SwizzleY = 1;
2850            reg.Register.SwizzleZ = 2;
2851            reg.Register.SwizzleW = 3;
2852
2853            res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2854            if (!res) {
2855               continue;
2856            }
2857         } else if (file == TGSI_FILE_INPUT) {
2858            res = bld->inputs[index][chan];
2859            if (!res) {
2860               continue;
2861            }
2862         } else if (file == TGSI_FILE_TEMPORARY) {
2863            reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2864            assert(reg_ptr);
2865            res = LLVMBuildLoad(builder, reg_ptr, "");
2866         } else if (file == TGSI_FILE_OUTPUT) {
2867            reg_ptr = lp_get_output_ptr(bld, index, chan);
2868            assert(reg_ptr);
2869            res = LLVMBuildLoad(builder, reg_ptr, "");
2870         } else {
2871            assert(0);
2872            continue;
2873         }
2874
2875         emit_dump_reg(gallivm, file, index, chan, res);
2876      }
2877   }
2878}
2879
2880
2881
2882void
2883lp_emit_declaration_soa(
2884   struct lp_build_tgsi_context *bld_base,
2885   const struct tgsi_full_declaration *decl)
2886{
2887   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2888   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2889   LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2890   const unsigned first = decl->Range.First;
2891   const unsigned last = decl->Range.Last;
2892   unsigned idx, i;
2893
2894   assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2895
2896   switch (decl->Declaration.File) {
2897   case TGSI_FILE_TEMPORARY:
2898      if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2899         assert(last < LP_MAX_INLINED_TEMPS);
2900         for (idx = first; idx <= last; ++idx) {
2901            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2902               bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2903         }
2904      }
2905      break;
2906
2907   case TGSI_FILE_OUTPUT:
2908      if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2909         for (idx = first; idx <= last; ++idx) {
2910            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2911               bld->outputs[idx][i] = lp_build_alloca(gallivm,
2912                                                      vec_type, "output");
2913         }
2914      }
2915      break;
2916
2917   case TGSI_FILE_ADDRESS:
2918      /* ADDR registers are only allocated with an integer LLVM IR type,
2919       * as they are guaranteed to always have integers.
2920       * XXX: Not sure if this exception is worthwhile (or the whole idea of
2921       * an ADDR register for that matter).
2922       */
2923      assert(last < LP_MAX_TGSI_ADDRS);
2924      for (idx = first; idx <= last; ++idx) {
2925         assert(idx < LP_MAX_TGSI_ADDRS);
2926         for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2927            bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2928      }
2929      break;
2930
2931   case TGSI_FILE_SAMPLER_VIEW:
2932      /*
2933       * The target stored here MUST match whatever there actually
2934       * is in the set sampler views (what about return type?).
2935       */
2936      assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2937      for (idx = first; idx <= last; ++idx) {
2938         bld->sv[idx] = decl->SamplerView;
2939      }
2940      break;
2941
2942   case TGSI_FILE_CONSTANT:
2943   {
2944      /*
2945       * We could trivially fetch the per-buffer pointer when fetching the
2946       * constant, relying on llvm to figure out it's always the same pointer
2947       * anyway. However, doing so results in a huge (more than factor of 10)
2948       * slowdown in llvm compilation times for some (but not all) shaders
2949       * (more specifically, the IR optimization spends way more time in
2950       * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2951       */
2952      unsigned idx2D = decl->Dim.Index2D;
2953      LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2954      assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2955      bld->consts[idx2D] =
2956         lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2957      bld->consts_sizes[idx2D] =
2958         lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2959   }
2960   break;
2961   case TGSI_FILE_BUFFER:
2962   {
2963      unsigned idx = decl->Range.First;
2964      LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2965      assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2966      bld->ssbos[idx] =
2967         lp_build_array_get(gallivm, bld->ssbo_ptr, index);
2968      bld->ssbo_sizes[idx] =
2969         lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
2970
2971   }
2972   break;
2973   case TGSI_FILE_MEMORY:
2974      break;
2975   default:
2976      /* don't need to declare other vars */
2977      break;
2978   }
2979}
2980
2981
2982void lp_emit_immediate_soa(
2983   struct lp_build_tgsi_context *bld_base,
2984   const struct tgsi_full_immediate *imm)
2985{
2986   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2987   struct gallivm_state * gallivm = bld_base->base.gallivm;
2988   LLVMValueRef imms[4];
2989   unsigned i;
2990   const uint size = imm->Immediate.NrTokens - 1;
2991   assert(size <= 4);
2992   switch (imm->Immediate.DataType) {
2993   case TGSI_IMM_FLOAT32:
2994      for( i = 0; i < size; ++i )
2995         imms[i] =
2996               lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2997
2998      break;
2999   case TGSI_IMM_FLOAT64:
3000   case TGSI_IMM_UINT64:
3001   case TGSI_IMM_INT64:
3002   case TGSI_IMM_UINT32:
3003      for( i = 0; i < size; ++i ) {
3004         LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3005         imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3006      }
3007
3008      break;
3009   case TGSI_IMM_INT32:
3010      for( i = 0; i < size; ++i ) {
3011         LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3012         imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3013      }
3014
3015      break;
3016   }
3017   for( i = size; i < 4; ++i )
3018      imms[i] = bld_base->base.undef;
3019
3020   if (bld->use_immediates_array) {
3021      unsigned index = bld->num_immediates;
3022      struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3023      LLVMBuilderRef builder = gallivm->builder;
3024      LLVMValueRef gep[2];
3025      gep[0] = lp_build_const_int32(gallivm, 0);
3026
3027      assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3028      for (i = 0; i < 4; ++i ) {
3029         gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3030         LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3031                                             bld->imms_array, gep, 2, "");
3032         LLVMBuildStore(builder, imms[i], imm_ptr);
3033      }
3034   } else {
3035      /* simply copy the immediate values into the next immediates[] slot */
3036      unsigned i;
3037      assert(imm->Immediate.NrTokens - 1 <= 4);
3038      assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3039
3040      for(i = 0; i < 4; ++i )
3041         bld->immediates[bld->num_immediates][i] = imms[i];
3042
3043      if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3044         unsigned index = bld->num_immediates;
3045         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3046         LLVMBuilderRef builder = gallivm->builder;
3047         LLVMValueRef gep[2];
3048         gep[0] = lp_build_const_int32(gallivm, 0);
3049         for (i = 0; i < 4; ++i ) {
3050            gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3051            LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3052                                                bld->imms_array, gep, 2, "");
3053            LLVMBuildStore(builder,
3054                           bld->immediates[index][i],
3055                           imm_ptr);
3056         }
3057      }
3058   }
3059
3060   bld->num_immediates++;
3061}
3062
3063static void
3064ddx_emit(
3065   const struct lp_build_tgsi_action * action,
3066   struct lp_build_tgsi_context * bld_base,
3067   struct lp_build_emit_data * emit_data)
3068{
3069   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3070
3071   emit_fetch_deriv(bld, emit_data->args[0], NULL,
3072                    &emit_data->output[emit_data->chan], NULL);
3073}
3074
3075static void
3076ddy_emit(
3077   const struct lp_build_tgsi_action * action,
3078   struct lp_build_tgsi_context * bld_base,
3079   struct lp_build_emit_data * emit_data)
3080{
3081   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3082
3083   emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3084                    &emit_data->output[emit_data->chan]);
3085}
3086
3087static void
3088kill_emit(
3089   const struct lp_build_tgsi_action * action,
3090   struct lp_build_tgsi_context * bld_base,
3091   struct lp_build_emit_data * emit_data)
3092{
3093   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3094
3095   emit_kill(bld, bld_base->pc - 1);
3096}
3097
3098static void
3099kill_if_emit(
3100   const struct lp_build_tgsi_action * action,
3101   struct lp_build_tgsi_context * bld_base,
3102   struct lp_build_emit_data * emit_data)
3103{
3104   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3105
3106   emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3107}
3108
3109static void
3110tex_emit(
3111   const struct lp_build_tgsi_action * action,
3112   struct lp_build_tgsi_context * bld_base,
3113   struct lp_build_emit_data * emit_data)
3114{
3115   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3116
3117   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3118            emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3119}
3120
3121static void
3122tex2_emit(
3123   const struct lp_build_tgsi_action * action,
3124   struct lp_build_tgsi_context * bld_base,
3125   struct lp_build_emit_data * emit_data)
3126{
3127   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3128
3129   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3130            emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3131}
3132
3133static void
3134txb_emit(
3135   const struct lp_build_tgsi_action * action,
3136   struct lp_build_tgsi_context * bld_base,
3137   struct lp_build_emit_data * emit_data)
3138{
3139   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3140
3141   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3142            emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3143}
3144
3145static void
3146txb2_emit(
3147   const struct lp_build_tgsi_action * action,
3148   struct lp_build_tgsi_context * bld_base,
3149   struct lp_build_emit_data * emit_data)
3150{
3151   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3152
3153   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3154            emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3155}
3156
3157static void
3158txd_emit(
3159   const struct lp_build_tgsi_action * action,
3160   struct lp_build_tgsi_context * bld_base,
3161   struct lp_build_emit_data * emit_data)
3162{
3163   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3164
3165   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3166            emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3167}
3168
3169static void
3170txl_emit(
3171   const struct lp_build_tgsi_action * action,
3172   struct lp_build_tgsi_context * bld_base,
3173   struct lp_build_emit_data * emit_data)
3174{
3175   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3176
3177   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3178            emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3179}
3180
3181static void
3182txl2_emit(
3183   const struct lp_build_tgsi_action * action,
3184   struct lp_build_tgsi_context * bld_base,
3185   struct lp_build_emit_data * emit_data)
3186{
3187   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3188
3189   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3190            emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3191}
3192
3193static void
3194txp_emit(
3195   const struct lp_build_tgsi_action * action,
3196   struct lp_build_tgsi_context * bld_base,
3197   struct lp_build_emit_data * emit_data)
3198{
3199   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3200
3201   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3202            emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3203}
3204
3205static void
3206tg4_emit(
3207   const struct lp_build_tgsi_action * action,
3208   struct lp_build_tgsi_context * bld_base,
3209   struct lp_build_emit_data * emit_data)
3210{
3211   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3212
3213   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3214            emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3215}
3216
3217static void
3218lodq_emit(
3219   const struct lp_build_tgsi_action * action,
3220   struct lp_build_tgsi_context * bld_base,
3221   struct lp_build_emit_data * emit_data)
3222{
3223   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3224
3225   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3226            emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3227}
3228
3229static void
3230txq_emit(
3231   const struct lp_build_tgsi_action * action,
3232   struct lp_build_tgsi_context * bld_base,
3233   struct lp_build_emit_data * emit_data)
3234{
3235   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3236
3237   emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3238}
3239
3240static void
3241txf_emit(
3242   const struct lp_build_tgsi_action * action,
3243   struct lp_build_tgsi_context * bld_base,
3244   struct lp_build_emit_data * emit_data)
3245{
3246   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3247
3248   emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3249}
3250
3251static void
3252sample_i_emit(
3253   const struct lp_build_tgsi_action * action,
3254   struct lp_build_tgsi_context * bld_base,
3255   struct lp_build_emit_data * emit_data)
3256{
3257   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3258
3259   emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3260}
3261
3262static void
3263sample_emit(
3264   const struct lp_build_tgsi_action * action,
3265   struct lp_build_tgsi_context * bld_base,
3266   struct lp_build_emit_data * emit_data)
3267{
3268   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3269
3270   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3271               FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3272}
3273
3274static void
3275sample_b_emit(
3276   const struct lp_build_tgsi_action * action,
3277   struct lp_build_tgsi_context * bld_base,
3278   struct lp_build_emit_data * emit_data)
3279{
3280   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3281
3282   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3283               FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3284}
3285
3286static void
3287sample_c_emit(
3288   const struct lp_build_tgsi_action * action,
3289   struct lp_build_tgsi_context * bld_base,
3290   struct lp_build_emit_data * emit_data)
3291{
3292   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3293
3294   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3295               TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3296}
3297
3298static void
3299sample_c_lz_emit(
3300   const struct lp_build_tgsi_action * action,
3301   struct lp_build_tgsi_context * bld_base,
3302   struct lp_build_emit_data * emit_data)
3303{
3304   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3305
3306   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3307               TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3308}
3309
3310static void
3311sample_d_emit(
3312   const struct lp_build_tgsi_action * action,
3313   struct lp_build_tgsi_context * bld_base,
3314   struct lp_build_emit_data * emit_data)
3315{
3316   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3317
3318   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3319               FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3320}
3321
3322static void
3323sample_l_emit(
3324   const struct lp_build_tgsi_action * action,
3325   struct lp_build_tgsi_context * bld_base,
3326   struct lp_build_emit_data * emit_data)
3327{
3328   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3329
3330   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3331               FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3332}
3333
3334static void
3335gather4_emit(
3336   const struct lp_build_tgsi_action * action,
3337   struct lp_build_tgsi_context * bld_base,
3338   struct lp_build_emit_data * emit_data)
3339{
3340   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3341
3342   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3343               FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3344}
3345
3346static void
3347sviewinfo_emit(
3348   const struct lp_build_tgsi_action * action,
3349   struct lp_build_tgsi_context * bld_base,
3350   struct lp_build_emit_data * emit_data)
3351{
3352   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3353
3354   emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3355}
3356
3357static void
3358lod_emit(
3359   const struct lp_build_tgsi_action * action,
3360   struct lp_build_tgsi_context * bld_base,
3361   struct lp_build_emit_data * emit_data)
3362{
3363   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3364
3365   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3366               FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3367}
3368
3369static void target_to_dims_layer(unsigned target,
3370                                 unsigned *dims,
3371                                 unsigned *layer_coord)
3372{
3373   *layer_coord = 0;
3374   switch (target) {
3375   case TGSI_TEXTURE_1D:
3376   case TGSI_TEXTURE_BUFFER:
3377      *dims = 1;
3378      break;
3379   case TGSI_TEXTURE_1D_ARRAY:
3380      *layer_coord = 1;
3381      *dims = 1;
3382      break;
3383   case TGSI_TEXTURE_2D:
3384   case TGSI_TEXTURE_RECT:
3385      *dims = 2;
3386      break;
3387   case TGSI_TEXTURE_2D_ARRAY:
3388      *layer_coord = 2;
3389      *dims = 2;
3390      break;
3391   case TGSI_TEXTURE_3D:
3392   case TGSI_TEXTURE_CUBE:
3393   case TGSI_TEXTURE_CUBE_ARRAY:
3394      *dims = 3;
3395      break;
3396   default:
3397      assert(0);
3398      *dims = 0;
3399      return;
3400   }
3401}
3402
3403static void
3404img_load_emit(
3405   const struct lp_build_tgsi_action * action,
3406   struct lp_build_tgsi_context * bld_base,
3407   struct lp_build_emit_data * emit_data)
3408{
3409   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3410   struct lp_img_params params;
3411   LLVMValueRef coords[5];
3412   LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3413   unsigned dims;
3414   unsigned target = emit_data->inst->Memory.Texture;
3415   unsigned layer_coord;
3416
3417   target_to_dims_layer(target, &dims, &layer_coord);
3418
3419   for (unsigned i = 0; i < dims; i++) {
3420      coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3421   }
3422   for (unsigned i = dims; i < 5; i++) {
3423      coords[i] = coord_undef;
3424   }
3425   if (layer_coord)
3426      coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3427
3428   memset(&params, 0, sizeof(params));
3429
3430   params.type = bld->bld_base.base.type;
3431   params.context_ptr = bld->context_ptr;
3432   params.thread_data_ptr = bld->thread_data_ptr;
3433   params.coords = coords;
3434   params.outdata = emit_data->output;
3435   params.target = tgsi_to_pipe_tex_target(target);
3436   params.image_index = emit_data->inst->Src[0].Register.Index;
3437   params.img_op = LP_IMG_LOAD;
3438   bld->image->emit_op(bld->image,
3439                         bld->bld_base.base.gallivm,
3440                         &params);
3441}
3442
3443static void
3444load_emit(
3445   const struct lp_build_tgsi_action * action,
3446   struct lp_build_tgsi_context * bld_base,
3447   struct lp_build_emit_data * emit_data)
3448{
3449   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3450   struct gallivm_state * gallivm = bld_base->base.gallivm;
3451   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3452   const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3453   unsigned buf = bufreg->Register.Index;
3454   assert(bufreg->Register.File == TGSI_FILE_BUFFER ||
3455          bufreg->Register.File == TGSI_FILE_IMAGE ||
3456          bufreg->Register.File == TGSI_FILE_MEMORY ||
3457          bufreg->Register.File == TGSI_FILE_CONSTBUF);
3458   bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3459   struct lp_build_context *uint_bld = &bld_base->uint_bld;
3460
3461   if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3462      img_load_emit(action, bld_base, emit_data);
3463   } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) {
3464      LLVMValueRef consts_ptr = bld->consts[buf];
3465      LLVMValueRef num_consts = bld->consts_sizes[buf];
3466
3467      LLVMValueRef indirect_index;
3468      LLVMValueRef overflow_mask;
3469
3470      indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0);
3471      indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4);
3472
3473      /* All fetches are from the same constant buffer, so
3474       * we need to propagate the size to a vector to do a
3475       * vector comparison */
3476      num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
3477
3478      /* Gather values from the constant buffer */
3479      unsigned chan_index;
3480      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3481         /* Construct a boolean vector telling us which channels
3482          * overflow the bound constant buffer */
3483         overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
3484                                          indirect_index, num_consts);
3485
3486         /* index_vec = indirect_index * 4 */
3487         LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
3488         index_vec = lp_build_add(uint_bld, index_vec,
3489                                  lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3490
3491         emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL);
3492      }
3493   } else if (0) {
3494      /* for indirect support with ARB_gpu_shader5 */
3495   } else {
3496      LLVMValueRef index;
3497      LLVMValueRef scalar, scalar_ptr;
3498      unsigned chan_index;
3499
3500      index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3501      index = lp_build_shr_imm(uint_bld, index, 2);
3502
3503      scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3504
3505      LLVMValueRef ssbo_limit = NULL;
3506
3507      if (!is_shared) {
3508         ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3509         ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3510      }
3511
3512      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3513         LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3514
3515         LLVMValueRef exec_mask = mask_vec(bld_base);
3516         if (!is_shared) {
3517            LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3518            exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3519         }
3520
3521         LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3522         struct lp_build_loop_state loop_state;
3523         lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3524
3525         struct lp_build_if_state ifthen;
3526         LLVMValueRef cond, temp_res;
3527
3528         loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3529                                              loop_state.counter, "");
3530
3531         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3532         cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3533
3534         lp_build_if(&ifthen, gallivm, cond);
3535         scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
3536
3537         temp_res = LLVMBuildLoad(builder, result, "");
3538         temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3539         LLVMBuildStore(builder, temp_res, result);
3540         lp_build_else(&ifthen);
3541         temp_res = LLVMBuildLoad(builder, result, "");
3542         temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3543         LLVMBuildStore(builder, temp_res, result);
3544         lp_build_endif(&ifthen);
3545         lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3546                                NULL, LLVMIntUGE);
3547         emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
3548      }
3549   }
3550}
3551
3552static void
3553img_store_emit(
3554   const struct lp_build_tgsi_action * action,
3555   struct lp_build_tgsi_context * bld_base,
3556   struct lp_build_emit_data * emit_data)
3557{
3558   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3559   struct lp_img_params params;
3560   LLVMValueRef coords[5];
3561   LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3562   unsigned dims;
3563   unsigned target = emit_data->inst->Memory.Texture;
3564   unsigned layer_coord;
3565
3566   target_to_dims_layer(target, &dims, &layer_coord);
3567   for (unsigned i = 0; i < dims; i++) {
3568      coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3569   }
3570   for (unsigned i = dims; i < 5; i++) {
3571      coords[i] = coord_undef;
3572   }
3573   if (layer_coord)
3574      coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3575   memset(&params, 0, sizeof(params));
3576
3577   params.type = bld->bld_base.base.type;
3578   params.context_ptr = bld->context_ptr;
3579   params.thread_data_ptr = bld->thread_data_ptr;
3580   params.coords = coords;
3581   params.outdata = NULL;
3582   params.exec_mask = mask_vec(bld_base);
3583   params.target = tgsi_to_pipe_tex_target(target);
3584   params.image_index = emit_data->inst->Dst[0].Register.Index;
3585   params.img_op = LP_IMG_STORE;
3586   for (unsigned i = 0; i < 4; i++)
3587      params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3588
3589   bld->image->emit_op(bld->image,
3590                       bld->bld_base.base.gallivm,
3591                       &params);
3592}
3593
3594static void
3595store_emit(
3596   const struct lp_build_tgsi_action * action,
3597   struct lp_build_tgsi_context * bld_base,
3598   struct lp_build_emit_data * emit_data)
3599{
3600   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3601   struct gallivm_state * gallivm = bld_base->base.gallivm;
3602   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3603   struct lp_build_context *uint_bld = &bld_base->uint_bld;
3604   const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3605   unsigned buf = bufreg->Register.Index;
3606   assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3607   bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3608
3609   if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3610      img_store_emit(action, bld_base, emit_data);
3611   } else if (0) {
3612
3613   } else {
3614      LLVMValueRef index;  /* index into the const buffer */
3615      LLVMValueRef scalar_ptr;
3616      LLVMValueRef value;
3617      unsigned chan_index;
3618
3619      index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3620      index = lp_build_shr_imm(uint_bld, index, 2);
3621
3622      scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3623
3624      LLVMValueRef ssbo_limit = NULL;
3625
3626      if (!is_shared) {
3627         ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3628         ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3629      }
3630
3631      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3632         LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3633
3634         value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3635
3636         LLVMValueRef exec_mask = mask_vec(bld_base);
3637         if (!is_shared) {
3638            LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3639            exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3640         }
3641
3642         struct lp_build_loop_state loop_state;
3643         lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3644
3645         LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3646                                                          loop_state.counter, "");
3647         value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3648
3649         struct lp_build_if_state ifthen;
3650         LLVMValueRef cond;
3651
3652         loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3653                                              loop_state.counter, "");
3654
3655         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3656         cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3657         lp_build_if(&ifthen, gallivm, cond);
3658
3659         lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3660
3661         lp_build_endif(&ifthen);
3662         lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3663                                NULL, LLVMIntUGE);
3664      }
3665   }
3666}
3667
3668static void
3669resq_emit(
3670   const struct lp_build_tgsi_action * action,
3671   struct lp_build_tgsi_context * bld_base,
3672   struct lp_build_emit_data * emit_data)
3673{
3674   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3675   struct lp_build_context *uint_bld = &bld_base->uint_bld;
3676   const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3677
3678   unsigned buf = bufreg->Register.Index;
3679   assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3680
3681   if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3682      unsigned target = emit_data->inst->Memory.Texture;
3683      struct lp_sampler_size_query_params params = { 0 };
3684      params.int_type = bld->bld_base.int_bld.type;
3685      params.texture_unit = buf;
3686      params.target = tgsi_to_pipe_tex_target(target);
3687      params.context_ptr = bld->context_ptr;
3688      params.sizes_out = emit_data->output;
3689
3690      bld->image->emit_size_query(bld->image,
3691                                  bld->bld_base.base.gallivm,
3692                                  &params);
3693   } else {
3694      LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3695
3696      emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3697   }
3698}
3699
3700static void
3701img_atomic_emit(
3702   const struct lp_build_tgsi_action * action,
3703   struct lp_build_tgsi_context * bld_base,
3704   struct lp_build_emit_data * emit_data,
3705   LLVMAtomicRMWBinOp op)
3706{
3707   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3708   struct lp_img_params params;
3709   LLVMValueRef coords[5];
3710   LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3711   unsigned dims;
3712   unsigned layer_coord;
3713   unsigned target = emit_data->inst->Memory.Texture;
3714
3715   target_to_dims_layer(target, &dims, &layer_coord);
3716
3717   for (unsigned i = 0; i < dims; i++) {
3718      coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3719   }
3720   for (unsigned i = dims; i < 5; i++) {
3721      coords[i] = coord_undef;
3722   }
3723   if (layer_coord)
3724      coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3725   memset(&params, 0, sizeof(params));
3726
3727   params.type = bld->bld_base.base.type;
3728   params.context_ptr = bld->context_ptr;
3729   params.thread_data_ptr = bld->thread_data_ptr;
3730   params.exec_mask = mask_vec(bld_base);
3731   params.image_index = emit_data->inst->Src[0].Register.Index;
3732   params.coords = coords;
3733   params.target = tgsi_to_pipe_tex_target(target);
3734   params.op = op;
3735   params.outdata = emit_data->output;
3736   params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3737
3738   for (unsigned i = 0; i < 4; i++)
3739      params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3740   if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3741      for (unsigned i = 0; i < 4; i++)
3742         params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3743   }
3744   bld->image->emit_op(bld->image,
3745                       bld->bld_base.base.gallivm,
3746                       &params);
3747}
3748
3749static void
3750atomic_emit(
3751   const struct lp_build_tgsi_action * action,
3752   struct lp_build_tgsi_context * bld_base,
3753   struct lp_build_emit_data * emit_data)
3754{
3755   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3756   struct gallivm_state * gallivm = bld_base->base.gallivm;
3757   LLVMBuilderRef builder = gallivm->builder;
3758   struct lp_build_context *uint_bld = &bld_base->uint_bld;
3759   const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3760
3761   assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3762   unsigned buf = bufreg->Register.Index;
3763   bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3764
3765   LLVMAtomicRMWBinOp op = -1;
3766   switch (emit_data->inst->Instruction.Opcode) {
3767   case TGSI_OPCODE_ATOMUADD:
3768      op = LLVMAtomicRMWBinOpAdd;
3769      break;
3770   case TGSI_OPCODE_ATOMXCHG:
3771      op = LLVMAtomicRMWBinOpXchg;
3772      break;
3773   case TGSI_OPCODE_ATOMAND:
3774      op = LLVMAtomicRMWBinOpAnd;
3775      break;
3776   case TGSI_OPCODE_ATOMOR:
3777      op = LLVMAtomicRMWBinOpOr;
3778      break;
3779   case TGSI_OPCODE_ATOMXOR:
3780      op = LLVMAtomicRMWBinOpXor;
3781      break;
3782   case TGSI_OPCODE_ATOMUMIN:
3783      op = LLVMAtomicRMWBinOpUMin;
3784      break;
3785   case TGSI_OPCODE_ATOMUMAX:
3786      op = LLVMAtomicRMWBinOpUMax;
3787      break;
3788   case TGSI_OPCODE_ATOMIMIN:
3789      op = LLVMAtomicRMWBinOpMin;
3790      break;
3791   case TGSI_OPCODE_ATOMIMAX:
3792      op = LLVMAtomicRMWBinOpMax;
3793      break;
3794   case TGSI_OPCODE_ATOMCAS:
3795      break;
3796   default:
3797      assert(0);
3798      return;
3799   }
3800
3801   if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3802      img_atomic_emit(action, bld_base, emit_data, op);
3803   } else if (0) {
3804   } else {
3805      LLVMValueRef index;  /* index into the const buffer */
3806      LLVMValueRef scalar, scalar_ptr;
3807      LLVMValueRef value;
3808
3809      index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3810      value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3811
3812      index = lp_build_shr_imm(uint_bld, index, 2);
3813
3814      if (!is_shared) {
3815         index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3816         scalar_ptr = bld->ssbos[buf];
3817      } else
3818         scalar_ptr = bld->shared_ptr;
3819
3820      LLVMValueRef atom_res = lp_build_alloca(gallivm,
3821                                              uint_bld->vec_type, "");
3822
3823      LLVMValueRef ssbo_limit;
3824      if (!is_shared) {
3825         ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3826         ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3827      }
3828
3829      LLVMValueRef exec_mask = mask_vec(bld_base);
3830
3831      if (!is_shared) {
3832         LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3833         exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3834      }
3835
3836      struct lp_build_loop_state loop_state;
3837      lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3838
3839      LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3840                                                       loop_state.counter, "");
3841      value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3842
3843      index = LLVMBuildExtractElement(gallivm->builder, index,
3844                                      loop_state.counter, "");
3845
3846      scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
3847                                &index, 1, "");
3848
3849      struct lp_build_if_state ifthen;
3850      LLVMValueRef cond, temp_res;
3851
3852      cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3853      cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3854      lp_build_if(&ifthen, gallivm, cond);
3855
3856      if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3857         LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3858         LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3859                                                            loop_state.counter, "");
3860         cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3861         scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3862                                         cas_src_ptr,
3863                                         LLVMAtomicOrderingSequentiallyConsistent,
3864                                         LLVMAtomicOrderingSequentiallyConsistent,
3865                                         false);
3866         scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3867      } else {
3868         scalar = LLVMBuildAtomicRMW(builder, op,
3869                                     scalar_ptr, value_ptr,
3870                                     LLVMAtomicOrderingSequentiallyConsistent,
3871                                     false);
3872      }
3873      temp_res = LLVMBuildLoad(builder, atom_res, "");
3874      temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3875      LLVMBuildStore(builder, temp_res, atom_res);
3876      lp_build_else(&ifthen);
3877      temp_res = LLVMBuildLoad(builder, atom_res, "");
3878      temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3879      LLVMBuildStore(builder, temp_res, atom_res);
3880      lp_build_endif(&ifthen);
3881
3882      lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3883                             NULL, LLVMIntUGE);
3884      emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
3885   }
3886}
3887
3888static void
3889barrier_emit(
3890   const struct lp_build_tgsi_action * action,
3891   struct lp_build_tgsi_context * bld_base,
3892   struct lp_build_emit_data * emit_data)
3893{
3894   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3895   struct gallivm_state * gallivm = bld_base->base.gallivm;
3896
3897   LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3898
3899   lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3900   LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3901}
3902
3903static void
3904membar_emit(
3905   const struct lp_build_tgsi_action * action,
3906   struct lp_build_tgsi_context * bld_base,
3907   struct lp_build_emit_data * emit_data)
3908{
3909   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3910   LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3911}
3912
3913static void
3914increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3915                          LLVMValueRef ptr,
3916                          LLVMValueRef mask)
3917{
3918   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3919   LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3920
3921   current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3922
3923   LLVMBuildStore(builder, current_vec, ptr);
3924}
3925
3926static void
3927clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3928                             LLVMValueRef ptr,
3929                             LLVMValueRef mask)
3930{
3931   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3932   LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3933
3934   current_vec = lp_build_select(&bld_base->uint_bld,
3935                                 mask,
3936                                 bld_base->uint_bld.zero,
3937                                 current_vec);
3938
3939   LLVMBuildStore(builder, current_vec, ptr);
3940}
3941
3942static LLVMValueRef
3943clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3944                                  LLVMValueRef current_mask_vec,
3945                                  LLVMValueRef total_emitted_vertices_vec)
3946{
3947   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3948   struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3949   LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3950                                        total_emitted_vertices_vec,
3951                                        bld->max_output_vertices_vec);
3952
3953   return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3954}
3955
3956static void
3957emit_vertex(
3958   const struct lp_build_tgsi_action * action,
3959   struct lp_build_tgsi_context * bld_base,
3960   struct lp_build_emit_data * emit_data)
3961{
3962   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3963   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3964
3965   if (bld->gs_iface->emit_vertex) {
3966      LLVMValueRef stream_id = emit_fetch_immediate(bld_base, &emit_data->inst->Src[0],
3967                                                    TGSI_TYPE_UNSIGNED,
3968                                                    emit_data->inst->Src[0].Register.SwizzleX);
3969      LLVMValueRef mask = mask_vec(bld_base);
3970      LLVMValueRef total_emitted_vertices_vec =
3971         LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3972
3973      mask = clamp_mask_to_max_output_vertices(bld, mask,
3974                                               total_emitted_vertices_vec);
3975      gather_outputs(bld);
3976      bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
3977                                 bld->outputs,
3978                                 total_emitted_vertices_vec,
3979                                 mask,
3980                                 stream_id);
3981      increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3982                                mask);
3983      increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3984                                mask);
3985#if DUMP_GS_EMITS
3986      lp_build_print_value(bld->bld_base.base.gallivm,
3987                           " +++ emit vertex masked ones = ",
3988                           mask);
3989      lp_build_print_value(bld->bld_base.base.gallivm,
3990                           " +++ emit vertex emitted = ",
3991                           total_emitted_vertices_vec);
3992#endif
3993   }
3994}
3995
3996
3997static void
3998end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3999                     LLVMValueRef mask)
4000{
4001   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4002   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
4003
4004   if (bld->gs_iface->end_primitive) {
4005      struct lp_build_context *uint_bld = &bld_base->uint_bld;
4006      LLVMValueRef emitted_vertices_vec =
4007         LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
4008      LLVMValueRef emitted_prims_vec =
4009         LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4010      LLVMValueRef total_emitted_vertices_vec =
4011         LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4012      LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4013                                               emitted_vertices_vec,
4014                                               uint_bld->zero);
4015      /* We need to combine the current execution mask with the mask
4016         telling us which, if any, execution slots actually have
4017         unemitted primitives, this way we make sure that end_primitives
4018         executes only on the paths that have unflushed vertices */
4019      mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
4020
4021      bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
4022                                   total_emitted_vertices_vec,
4023                                   emitted_vertices_vec,
4024                                   emitted_prims_vec,
4025                                   mask_vec(bld_base), 0);
4026
4027#if DUMP_GS_EMITS
4028      lp_build_print_value(bld->bld_base.base.gallivm,
4029                           " +++ end prim masked ones = ",
4030                           mask);
4031      lp_build_print_value(bld->bld_base.base.gallivm,
4032                           " +++ end prim emitted verts1 = ",
4033                           emitted_vertices_vec);
4034      lp_build_print_value(bld->bld_base.base.gallivm,
4035                           " +++ end prim emitted prims1 = ",
4036                           LLVMBuildLoad(builder,
4037                                         bld->emitted_prims_vec_ptr, ""));
4038#endif
4039      increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
4040                                mask);
4041      clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
4042                                   mask);
4043#if DUMP_GS_EMITS
4044      lp_build_print_value(bld->bld_base.base.gallivm,
4045                           " +++ end prim emitted verts2 = ",
4046                           LLVMBuildLoad(builder,
4047                                         bld->emitted_vertices_vec_ptr, ""));
4048#endif
4049   }
4050
4051}
4052
4053static void
4054end_primitive(
4055   const struct lp_build_tgsi_action * action,
4056   struct lp_build_tgsi_context * bld_base,
4057   struct lp_build_emit_data * emit_data)
4058{
4059   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4060
4061   if (bld->gs_iface->end_primitive) {
4062      LLVMValueRef mask = mask_vec(bld_base);
4063      end_primitive_masked(bld_base, mask);
4064   }
4065}
4066
4067static void
4068barrier_emit_tcs(
4069   const struct lp_build_tgsi_action * action,
4070   struct lp_build_tgsi_context * bld_base,
4071   struct lp_build_emit_data * emit_data)
4072{
4073   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4074
4075   if (bld->tcs_iface->emit_barrier) {
4076      bld->tcs_iface->emit_barrier((struct lp_build_context*)bld_base);
4077   }
4078}
4079
4080
4081static void
4082cal_emit(
4083   const struct lp_build_tgsi_action * action,
4084   struct lp_build_tgsi_context * bld_base,
4085   struct lp_build_emit_data * emit_data)
4086{
4087   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4088
4089   lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
4090                     &bld_base->pc);
4091}
4092
4093static void
4094ret_emit(
4095   const struct lp_build_tgsi_action * action,
4096   struct lp_build_tgsi_context * bld_base,
4097   struct lp_build_emit_data * emit_data)
4098{
4099   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4100
4101   lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
4102}
4103
4104static void
4105brk_emit(
4106   const struct lp_build_tgsi_action * action,
4107   struct lp_build_tgsi_context * bld_base,
4108   struct lp_build_emit_data * emit_data)
4109{
4110   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4111
4112   lp_exec_tgsi_break(&bld->exec_mask, bld_base);
4113}
4114
4115static void
4116if_emit(
4117   const struct lp_build_tgsi_action * action,
4118   struct lp_build_tgsi_context * bld_base,
4119   struct lp_build_emit_data * emit_data)
4120{
4121   LLVMValueRef tmp;
4122   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4123
4124   tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
4125                      emit_data->args[0], bld->bld_base.base.zero);
4126   lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4127}
4128
4129static void
4130uif_emit(
4131   const struct lp_build_tgsi_action * action,
4132   struct lp_build_tgsi_context * bld_base,
4133   struct lp_build_emit_data * emit_data)
4134{
4135   LLVMValueRef tmp;
4136   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4137   struct lp_build_context *uint_bld = &bld_base->uint_bld;
4138
4139   tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4140                      emit_data->args[0], uint_bld->zero);
4141   lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4142}
4143
4144static void
4145case_emit(
4146   const struct lp_build_tgsi_action * action,
4147   struct lp_build_tgsi_context * bld_base,
4148   struct lp_build_emit_data * emit_data)
4149{
4150   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4151
4152   lp_exec_case(&bld->exec_mask, emit_data->args[0]);
4153}
4154
4155static void
4156default_emit(
4157   const struct lp_build_tgsi_action * action,
4158   struct lp_build_tgsi_context * bld_base,
4159   struct lp_build_emit_data * emit_data)
4160{
4161   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4162
4163   lp_exec_default(&bld->exec_mask, bld_base);
4164}
4165
4166static void
4167switch_emit(
4168   const struct lp_build_tgsi_action * action,
4169   struct lp_build_tgsi_context * bld_base,
4170   struct lp_build_emit_data * emit_data)
4171{
4172   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4173
4174   lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
4175}
4176
4177static void
4178endswitch_emit(
4179   const struct lp_build_tgsi_action * action,
4180   struct lp_build_tgsi_context * bld_base,
4181   struct lp_build_emit_data * emit_data)
4182{
4183   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4184
4185   lp_exec_endswitch(&bld->exec_mask, bld_base);
4186}
4187
4188static void
4189bgnloop_emit(
4190   const struct lp_build_tgsi_action * action,
4191   struct lp_build_tgsi_context * bld_base,
4192   struct lp_build_emit_data * emit_data)
4193{
4194   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4195
4196   lp_exec_bgnloop(&bld->exec_mask, true);
4197}
4198
4199static void
4200bgnsub_emit(
4201   const struct lp_build_tgsi_action * action,
4202   struct lp_build_tgsi_context * bld_base,
4203   struct lp_build_emit_data * emit_data)
4204{
4205   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4206
4207   lp_exec_mask_bgnsub(&bld->exec_mask);
4208}
4209
4210static void
4211else_emit(
4212   const struct lp_build_tgsi_action * action,
4213   struct lp_build_tgsi_context * bld_base,
4214   struct lp_build_emit_data * emit_data)
4215{
4216   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4217
4218   lp_exec_mask_cond_invert(&bld->exec_mask);
4219}
4220
4221static void
4222endif_emit(
4223   const struct lp_build_tgsi_action * action,
4224   struct lp_build_tgsi_context * bld_base,
4225   struct lp_build_emit_data * emit_data)
4226{
4227   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4228
4229   lp_exec_mask_cond_pop(&bld->exec_mask);
4230}
4231
4232static void
4233endloop_emit(
4234   const struct lp_build_tgsi_action * action,
4235   struct lp_build_tgsi_context * bld_base,
4236   struct lp_build_emit_data * emit_data)
4237{
4238   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4239
4240   lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
4241}
4242
4243static void
4244endsub_emit(
4245   const struct lp_build_tgsi_action * action,
4246   struct lp_build_tgsi_context * bld_base,
4247   struct lp_build_emit_data * emit_data)
4248{
4249   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4250
4251   lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
4252}
4253
4254static void
4255cont_emit(
4256   const struct lp_build_tgsi_action * action,
4257   struct lp_build_tgsi_context * bld_base,
4258   struct lp_build_emit_data * emit_data)
4259{
4260   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4261
4262   lp_exec_continue(&bld->exec_mask);
4263}
4264
4265static void emit_prologue(struct lp_build_tgsi_context * bld_base)
4266{
4267   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4268   struct gallivm_state * gallivm = bld_base->base.gallivm;
4269
4270   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
4271      unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
4272      bld->temps_array = lp_build_alloca_undef(gallivm,
4273                                               LLVMArrayType(bld_base->base.vec_type, array_size),
4274                                               "temp_array");
4275   }
4276
4277   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
4278      LLVMValueRef array_size =
4279         lp_build_const_int32(gallivm,
4280                            bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
4281      bld->outputs_array = lp_build_array_alloca(gallivm,
4282                                                bld_base->base.vec_type, array_size,
4283                                                "output_array");
4284   }
4285
4286   if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
4287      unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
4288      bld->imms_array = lp_build_alloca_undef(gallivm,
4289                                              LLVMArrayType(bld_base->base.vec_type, array_size),
4290                                              "imms_array");
4291   }
4292
4293   /* If we have indirect addressing in inputs we need to copy them into
4294    * our alloca array to be able to iterate over them */
4295   if (bld->indirect_files & (1 << TGSI_FILE_INPUT) &&
4296       !bld->gs_iface && !bld->tes_iface && !bld->tcs_iface) {
4297      unsigned index, chan;
4298      LLVMTypeRef vec_type = bld_base->base.vec_type;
4299      LLVMValueRef array_size = lp_build_const_int32(gallivm,
4300            bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
4301      bld->inputs_array = lp_build_array_alloca(gallivm,
4302                                               vec_type, array_size,
4303                                               "input_array");
4304
4305      assert(bld_base->info->num_inputs
4306                        <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
4307
4308      for (index = 0; index < bld_base->info->num_inputs; ++index) {
4309         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
4310            LLVMValueRef lindex =
4311               lp_build_const_int32(gallivm, index * 4 + chan);
4312            LLVMValueRef input_ptr =
4313               LLVMBuildGEP(gallivm->builder, bld->inputs_array,
4314                            &lindex, 1, "");
4315            LLVMValueRef value = bld->inputs[index][chan];
4316            if (value)
4317               LLVMBuildStore(gallivm->builder, value, input_ptr);
4318         }
4319      }
4320   }
4321
4322   if (bld->gs_iface) {
4323      struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
4324      bld->emitted_prims_vec_ptr =
4325         lp_build_alloca(gallivm,
4326                         uint_bld->vec_type,
4327                         "emitted_prims_ptr");
4328      bld->emitted_vertices_vec_ptr =
4329         lp_build_alloca(gallivm,
4330                         uint_bld->vec_type,
4331                         "emitted_vertices_ptr");
4332      bld->total_emitted_vertices_vec_ptr =
4333         lp_build_alloca(gallivm,
4334                         uint_bld->vec_type,
4335                         "total_emitted_vertices_ptr");
4336
4337      LLVMBuildStore(gallivm->builder, uint_bld->zero,
4338                     bld->emitted_prims_vec_ptr);
4339      LLVMBuildStore(gallivm->builder, uint_bld->zero,
4340                     bld->emitted_vertices_vec_ptr);
4341      LLVMBuildStore(gallivm->builder, uint_bld->zero,
4342                     bld->total_emitted_vertices_vec_ptr);
4343   }
4344
4345   if (DEBUG_EXECUTION) {
4346      lp_build_printf(gallivm, "\n");
4347      emit_dump_file(bld, TGSI_FILE_CONSTANT);
4348      if (!bld->gs_iface)
4349         emit_dump_file(bld, TGSI_FILE_INPUT);
4350   }
4351}
4352
4353static void emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)
4354{
4355   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4356
4357   if (bld->tcs_iface && bld->tcs_iface->emit_prologue) {
4358      bld->tcs_iface->emit_prologue((struct lp_build_context*)bld_base);
4359   }
4360}
4361
4362static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
4363{
4364   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4365   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
4366
4367   if (DEBUG_EXECUTION) {
4368      /* for debugging */
4369      if (0) {
4370         emit_dump_file(bld, TGSI_FILE_TEMPORARY);
4371      }
4372      emit_dump_file(bld, TGSI_FILE_OUTPUT);
4373      lp_build_printf(bld_base->base.gallivm, "\n");
4374   }
4375
4376   if (bld->tcs_iface && bld->tcs_iface->emit_epilogue) {
4377      bld->tcs_iface->emit_epilogue((struct lp_build_context*)bld_base);
4378   }
4379
4380   /* If we have indirect addressing in outputs we need to copy our alloca array
4381    * to the outputs slots specified by the caller */
4382   if (bld->gs_iface) {
4383      LLVMValueRef total_emitted_vertices_vec;
4384      LLVMValueRef emitted_prims_vec;
4385      /* implicit end_primitives, needed in case there are any unflushed
4386         vertices in the cache. Note must not call end_primitive here
4387         since the exec_mask is not valid at this point. */
4388      end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4389
4390      total_emitted_vertices_vec =
4391         LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4392      emitted_prims_vec =
4393         LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4394
4395      bld->gs_iface->gs_epilogue(bld->gs_iface,
4396                                 total_emitted_vertices_vec,
4397                                 emitted_prims_vec, 0);
4398   } else {
4399      gather_outputs(bld);
4400   }
4401}
4402
4403void
4404lp_build_tgsi_soa(struct gallivm_state *gallivm,
4405                  const struct tgsi_token *tokens,
4406                  const struct lp_build_tgsi_params *params,
4407                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
4408{
4409   struct lp_build_tgsi_soa_context bld;
4410   struct lp_type type = params->type;
4411   struct lp_type res_type;
4412
4413   assert(type.length <= LP_MAX_VECTOR_LENGTH);
4414   memset(&res_type, 0, sizeof res_type);
4415   res_type.width = type.width;
4416   res_type.length = type.length;
4417   res_type.sign = 1;
4418
4419   /* Setup build context */
4420   memset(&bld, 0, sizeof bld);
4421   lp_build_context_init(&bld.bld_base.base, gallivm, type);
4422   lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4423   lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4424   lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4425   {
4426      struct lp_type dbl_type;
4427      dbl_type = type;
4428      dbl_type.width *= 2;
4429      lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4430   }
4431   {
4432      struct lp_type uint64_type;
4433      uint64_type = lp_uint_type(type);
4434      uint64_type.width *= 2;
4435      lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4436   }
4437   {
4438      struct lp_type int64_type;
4439      int64_type = lp_int_type(type);
4440      int64_type.width *= 2;
4441      lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4442   }
4443   bld.mask = params->mask;
4444   bld.inputs = params->inputs;
4445   bld.outputs = outputs;
4446   bld.consts_ptr = params->consts_ptr;
4447   bld.const_sizes_ptr = params->const_sizes_ptr;
4448   bld.ssbo_ptr = params->ssbo_ptr;
4449   bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
4450   bld.sampler = params->sampler;
4451   bld.bld_base.info = params->info;
4452   bld.indirect_files = params->info->indirect_files;
4453   bld.context_ptr = params->context_ptr;
4454   bld.thread_data_ptr = params->thread_data_ptr;
4455   bld.image = params->image;
4456   bld.shared_ptr = params->shared_ptr;
4457   bld.coro = params->coro;
4458
4459   /*
4460    * If the number of temporaries is rather large then we just
4461    * allocate them as an array right from the start and treat
4462    * like indirect temporaries.
4463    */
4464   if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4465      bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4466   }
4467   /*
4468    * For performance reason immediates are always backed in a static
4469    * array, but if their number is too great, we have to use just
4470    * a dynamically allocated array.
4471    */
4472   bld.use_immediates_array =
4473         (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4474   if (bld.use_immediates_array) {
4475      bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4476   }
4477
4478
4479   bld.bld_base.soa = TRUE;
4480   bld.bld_base.emit_debug = emit_debug;
4481   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4482   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4483   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4484   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4485   bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4486
4487   bld.bld_base.emit_store = emit_store;
4488   bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_output;
4489   bld.bld_base.emit_store_reg_funcs[TGSI_FILE_TEMPORARY] = emit_store_temp;
4490   bld.bld_base.emit_store_reg_funcs[TGSI_FILE_ADDRESS] = emit_store_address;
4491
4492   bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4493   bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4494
4495   bld.bld_base.emit_prologue = emit_prologue;
4496   bld.bld_base.emit_prologue_post_decl = emit_prologue_post_decl;
4497   bld.bld_base.emit_epilogue = emit_epilogue;
4498
4499   /* Set opcode actions */
4500   lp_set_default_actions_cpu(&bld.bld_base);
4501
4502   bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4503   bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4504   bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4505   bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4506   bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4507   bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4508   bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4509   bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4510   bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4511   bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4512   bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4513   bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4514   bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4515   bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4516   bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4517   bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4518   bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4519   bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4520   bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4521   bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4522   bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4523   bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4524   bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4525   bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4526   bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit;
4527   bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4528   bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4529   bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4530   bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit;
4531   bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4532   bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4533   bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4534   bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4535   bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4536   /* DX10 sampling ops */
4537   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4538   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4539   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4540   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4541   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4542   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4543   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4544   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4545   bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4546   bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4547   bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4548
4549   bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4550   bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4551   bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4552
4553   bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4554   bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4555   bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4556   bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4557   bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4558   bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4559   bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4560   bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4561   bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4562   bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4563
4564   bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4565   bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4566
4567   if (params->gs_iface) {
4568      /* There's no specific value for this because it should always
4569       * be set, but apps using ext_geometry_shader4 quite often
4570       * were forgetting so we're using MAX_VERTEX_VARYING from
4571       * that spec even though we could debug_assert if it's not
4572       * set, but that's a lot uglier. */
4573      uint max_output_vertices;
4574
4575      /* inputs are always indirect with gs */
4576      bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4577      bld.gs_iface = params->gs_iface;
4578      bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4579      bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4580      bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4581
4582      max_output_vertices =
4583         params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4584      if (!max_output_vertices)
4585         max_output_vertices = 32;
4586
4587      bld.max_output_vertices_vec =
4588         lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4589                                max_output_vertices);
4590   }
4591
4592   if (params->tes_iface) {
4593      /* inputs are always indirect with tes */
4594      bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4595      bld.tes_iface = params->tes_iface;
4596      bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tes_input;
4597   }
4598
4599   if (params->tcs_iface) {
4600      bld.tcs_iface = params->tcs_iface;
4601      /* outputs and inputs are always indirect with tcs */
4602      bld.indirect_files |= (1 << TGSI_FILE_OUTPUT);
4603      bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_tcs_output;
4604      bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4605      bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tcs_input;
4606      bld.bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_tcs_input;
4607      bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit_tcs;
4608   }
4609
4610   lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4611
4612   bld.system_values = *params->system_values;
4613
4614   lp_build_tgsi_llvm(&bld.bld_base, tokens);
4615
4616   if (0) {
4617      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4618      LLVMValueRef function = LLVMGetBasicBlockParent(block);
4619      debug_printf("11111111111111111111111111111 \n");
4620      tgsi_dump(tokens, 0);
4621      lp_debug_dump_value(function);
4622      debug_printf("2222222222222222222222222222 \n");
4623   }
4624
4625   if (0) {
4626      LLVMModuleRef module = LLVMGetGlobalParent(
4627         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4628      LLVMDumpModule(module);
4629
4630   }
4631   lp_exec_mask_fini(&bld.exec_mask);
4632}
4633