1/**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25
26#include "lp_bld_nir.h"
27#include "lp_bld_arit.h"
28#include "lp_bld_bitarit.h"
29#include "lp_bld_const.h"
30#include "lp_bld_conv.h"
31#include "lp_bld_gather.h"
32#include "lp_bld_logic.h"
33#include "lp_bld_quad.h"
34#include "lp_bld_flow.h"
35#include "lp_bld_intr.h"
36#include "lp_bld_struct.h"
37#include "lp_bld_debug.h"
38#include "lp_bld_printf.h"
39#include "nir_deref.h"
40#include "nir_search_helpers.h"
41
42static void visit_cf_list(struct lp_build_nir_context *bld_base,
43                          struct exec_list *list);
44
45static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRef val,
46                              nir_alu_type alu_type, unsigned bit_size)
47{
48   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
49   switch (alu_type) {
50   case nir_type_float:
51      switch (bit_size) {
52      case 16:
53         return LLVMBuildBitCast(builder, val, bld_base->half_bld.vec_type, "");
54      case 32:
55         return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, "");
56      case 64:
57         return LLVMBuildBitCast(builder, val, bld_base->dbl_bld.vec_type, "");
58      default:
59         assert(0);
60         break;
61      }
62      break;
63   case nir_type_int:
64      switch (bit_size) {
65      case 8:
66         return LLVMBuildBitCast(builder, val, bld_base->int8_bld.vec_type, "");
67      case 16:
68         return LLVMBuildBitCast(builder, val, bld_base->int16_bld.vec_type, "");
69      case 32:
70         return LLVMBuildBitCast(builder, val, bld_base->int_bld.vec_type, "");
71      case 64:
72         return LLVMBuildBitCast(builder, val, bld_base->int64_bld.vec_type, "");
73      default:
74         assert(0);
75         break;
76      }
77      break;
78   case nir_type_uint:
79      switch (bit_size) {
80      case 8:
81         return LLVMBuildBitCast(builder, val, bld_base->uint8_bld.vec_type, "");
82      case 16:
83         return LLVMBuildBitCast(builder, val, bld_base->uint16_bld.vec_type, "");
84      case 1:
85      case 32:
86         return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
87      case 64:
88         return LLVMBuildBitCast(builder, val, bld_base->uint64_bld.vec_type, "");
89      default:
90         assert(0);
91         break;
92      }
93      break;
94   case nir_type_uint32:
95      return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
96   default:
97      return val;
98   }
99   return NULL;
100}
101
102
103static unsigned glsl_sampler_to_pipe(int sampler_dim, bool is_array)
104{
105   unsigned pipe_target = PIPE_BUFFER;
106   switch (sampler_dim) {
107   case GLSL_SAMPLER_DIM_1D:
108      pipe_target = is_array ? PIPE_TEXTURE_1D_ARRAY : PIPE_TEXTURE_1D;
109      break;
110   case GLSL_SAMPLER_DIM_2D:
111      pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
112      break;
113   case GLSL_SAMPLER_DIM_SUBPASS:
114   case GLSL_SAMPLER_DIM_SUBPASS_MS:
115      pipe_target = PIPE_TEXTURE_2D_ARRAY;
116      break;
117   case GLSL_SAMPLER_DIM_3D:
118      pipe_target = PIPE_TEXTURE_3D;
119      break;
120   case GLSL_SAMPLER_DIM_MS:
121      pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
122      break;
123   case GLSL_SAMPLER_DIM_CUBE:
124      pipe_target = is_array ? PIPE_TEXTURE_CUBE_ARRAY : PIPE_TEXTURE_CUBE;
125      break;
126   case GLSL_SAMPLER_DIM_RECT:
127      pipe_target = PIPE_TEXTURE_RECT;
128      break;
129   case GLSL_SAMPLER_DIM_BUF:
130      pipe_target = PIPE_BUFFER;
131      break;
132   default:
133      break;
134   }
135   return pipe_target;
136}
137
138static LLVMValueRef get_ssa_src(struct lp_build_nir_context *bld_base, nir_ssa_def *ssa)
139{
140   return bld_base->ssa_defs[ssa->index];
141}
142
143static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src);
144
145static LLVMValueRef get_reg_src(struct lp_build_nir_context *bld_base, nir_reg_src src)
146{
147   struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, src.reg);
148   LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
149   struct lp_build_context *reg_bld = get_int_bld(bld_base, true, src.reg->bit_size);
150   LLVMValueRef indir_src = NULL;
151   if (src.indirect)
152      indir_src = get_src(bld_base, *src.indirect);
153   return bld_base->load_reg(bld_base, reg_bld, &src, indir_src, reg_storage);
154}
155
156static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src)
157{
158   if (src.is_ssa)
159      return get_ssa_src(bld_base, src.ssa);
160   else
161      return get_reg_src(bld_base, src.reg);
162}
163
164static void assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValueRef ptr)
165{
166   bld_base->ssa_defs[idx] = ptr;
167}
168
169static void assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa,
170                            LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
171{
172   assign_ssa(bld_base, ssa->index, ssa->num_components == 1 ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components));
173}
174
175static void assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg,
176                       unsigned write_mask,
177                       LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
178{
179   struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, reg->reg);
180   LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
181   struct lp_build_context *reg_bld = get_int_bld(bld_base, true, reg->reg->bit_size);
182   LLVMValueRef indir_src = NULL;
183   if (reg->indirect)
184      indir_src = get_src(bld_base, *reg->indirect);
185   bld_base->store_reg(bld_base, reg_bld, reg, write_mask ? write_mask : 0xf, indir_src, reg_storage, vals);
186}
187
188static void assign_dest(struct lp_build_nir_context *bld_base, const nir_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
189{
190   if (dest->is_ssa)
191      assign_ssa_dest(bld_base, &dest->ssa, vals);
192   else
193      assign_reg(bld_base, &dest->reg, 0, vals);
194}
195
196static void assign_alu_dest(struct lp_build_nir_context *bld_base, const nir_alu_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
197{
198   if (dest->dest.is_ssa)
199      assign_ssa_dest(bld_base, &dest->dest.ssa, vals);
200   else
201      assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals);
202}
203
204static LLVMValueRef int_to_bool32(struct lp_build_nir_context *bld_base,
205                                uint32_t src_bit_size,
206                                bool is_unsigned,
207                                LLVMValueRef val)
208{
209   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
210   struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
211   LLVMValueRef result = lp_build_compare(bld_base->base.gallivm, int_bld->type, PIPE_FUNC_NOTEQUAL, val, int_bld->zero);
212   if (src_bit_size == 16)
213      result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
214   else if (src_bit_size == 64)
215      result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
216   return result;
217}
218
219static LLVMValueRef flt_to_bool32(struct lp_build_nir_context *bld_base,
220                                  uint32_t src_bit_size,
221                                  LLVMValueRef val)
222{
223   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
224   struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
225   LLVMValueRef result = lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero);
226   if (src_bit_size == 64)
227      result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
228   if (src_bit_size == 16)
229      result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
230   return result;
231}
232
233static LLVMValueRef fcmp32(struct lp_build_nir_context *bld_base,
234                           enum pipe_compare_func compare,
235                           uint32_t src_bit_size,
236                           LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
237{
238   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
239   struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
240   LLVMValueRef result;
241
242   if (compare != PIPE_FUNC_NOTEQUAL)
243      result = lp_build_cmp_ordered(flt_bld, compare, src[0], src[1]);
244   else
245      result = lp_build_cmp(flt_bld, compare, src[0], src[1]);
246   if (src_bit_size == 64)
247      result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
248   else if (src_bit_size == 16)
249      result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
250   return result;
251}
252
253static LLVMValueRef icmp32(struct lp_build_nir_context *bld_base,
254                           enum pipe_compare_func compare,
255                           bool is_unsigned,
256                           uint32_t src_bit_size,
257                           LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
258{
259   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
260   struct lp_build_context *i_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
261   LLVMValueRef result = lp_build_cmp(i_bld, compare, src[0], src[1]);
262   if (src_bit_size < 32)
263      result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
264   else if (src_bit_size == 64)
265      result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
266   return result;
267}
268
269static LLVMValueRef get_alu_src(struct lp_build_nir_context *bld_base,
270                                nir_alu_src src,
271                                unsigned num_components)
272{
273   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
274   struct gallivm_state *gallivm = bld_base->base.gallivm;
275   LLVMValueRef value = get_src(bld_base, src.src);
276   bool need_swizzle = false;
277
278   assert(value);
279   unsigned src_components = nir_src_num_components(src.src);
280   for (unsigned i = 0; i < num_components; ++i) {
281      assert(src.swizzle[i] < src_components);
282      if (src.swizzle[i] != i)
283         need_swizzle = true;
284   }
285
286   if (need_swizzle || num_components != src_components) {
287      if (src_components > 1 && num_components == 1) {
288         value = LLVMBuildExtractValue(gallivm->builder, value,
289                                       src.swizzle[0], "");
290      } else if (src_components == 1 && num_components > 1) {
291         LLVMValueRef values[] = {value, value, value, value, value, value, value, value, value, value, value, value, value, value, value, value};
292         value = lp_nir_array_build_gather_values(builder, values, num_components);
293      } else {
294         LLVMValueRef arr = LLVMGetUndef(LLVMArrayType(LLVMTypeOf(LLVMBuildExtractValue(builder, value, 0, "")), num_components));
295         for (unsigned i = 0; i < num_components; i++)
296            arr = LLVMBuildInsertValue(builder, arr, LLVMBuildExtractValue(builder, value, src.swizzle[i], ""), i, "");
297         value = arr;
298      }
299   }
300   assert(!src.negate);
301   assert(!src.abs);
302   return value;
303}
304
305static LLVMValueRef emit_b2f(struct lp_build_nir_context *bld_base,
306                             LLVMValueRef src0,
307                             unsigned bitsize)
308{
309   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
310   LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
311                                      LLVMBuildBitCast(builder, lp_build_const_vec(bld_base->base.gallivm, bld_base->base.type,
312                                                                                   1.0), bld_base->int_bld.vec_type, ""),
313                                      "");
314   result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, "");
315   switch (bitsize) {
316   case 16:
317      result = LLVMBuildFPTrunc(builder, result, bld_base->half_bld.vec_type, "");
318      break;
319   case 32:
320      break;
321   case 64:
322      result = LLVMBuildFPExt(builder, result, bld_base->dbl_bld.vec_type, "");
323      break;
324   default:
325      unreachable("unsupported bit size.");
326   }
327   return result;
328}
329
330static LLVMValueRef emit_b2i(struct lp_build_nir_context *bld_base,
331                             LLVMValueRef src0,
332                             unsigned bitsize)
333{
334   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
335   LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
336                                      lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, 1), "");
337   switch (bitsize) {
338   case 8:
339      return LLVMBuildTrunc(builder, result, bld_base->int8_bld.vec_type, "");
340   case 16:
341      return LLVMBuildTrunc(builder, result, bld_base->int16_bld.vec_type, "");
342   case 32:
343      return result;
344   case 64:
345      return LLVMBuildZExt(builder, result, bld_base->int64_bld.vec_type, "");
346   default:
347      unreachable("unsupported bit size.");
348   }
349}
350
351static LLVMValueRef emit_b32csel(struct lp_build_nir_context *bld_base,
352                               unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
353                               LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
354{
355   LLVMValueRef sel = cast_type(bld_base, src[0], nir_type_int, 32);
356   LLVMValueRef v = lp_build_compare(bld_base->base.gallivm, bld_base->int_bld.type, PIPE_FUNC_NOTEQUAL, sel, bld_base->int_bld.zero);
357   struct lp_build_context *bld = get_int_bld(bld_base, false, src_bit_size[1]);
358   return lp_build_select(bld, v, src[1], src[2]);
359}
360
361static LLVMValueRef split_64bit(struct lp_build_nir_context *bld_base,
362                                LLVMValueRef src,
363                                bool hi)
364{
365   struct gallivm_state *gallivm = bld_base->base.gallivm;
366   LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
367   LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
368   int len = bld_base->base.type.length * 2;
369   for (unsigned i = 0; i < bld_base->base.type.length; i++) {
370#if UTIL_ARCH_LITTLE_ENDIAN
371      shuffles[i] = lp_build_const_int32(gallivm, i * 2);
372      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
373#else
374      shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
375      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
376#endif
377   }
378
379   src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), "");
380   return LLVMBuildShuffleVector(gallivm->builder, src,
381                                 LLVMGetUndef(LLVMTypeOf(src)),
382                                 LLVMConstVector(hi ? shuffles2 : shuffles,
383                                                 bld_base->base.type.length),
384                                 "");
385}
386
387static LLVMValueRef
388merge_64bit(struct lp_build_nir_context *bld_base,
389            LLVMValueRef input,
390            LLVMValueRef input2)
391{
392   struct gallivm_state *gallivm = bld_base->base.gallivm;
393   LLVMBuilderRef builder = gallivm->builder;
394   int i;
395   LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
396   int len = bld_base->base.type.length * 2;
397   assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
398
399   for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
400#if UTIL_ARCH_LITTLE_ENDIAN
401      shuffles[i] = lp_build_const_int32(gallivm, i / 2);
402      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
403#else
404      shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
405      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
406#endif
407   }
408   return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
409}
410
411static LLVMValueRef split_16bit(struct lp_build_nir_context *bld_base,
412                                LLVMValueRef src,
413                                bool hi)
414{
415   struct gallivm_state *gallivm = bld_base->base.gallivm;
416   LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
417   LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
418   int len = bld_base->base.type.length * 2;
419   for (unsigned i = 0; i < bld_base->base.type.length; i++) {
420#if UTIL_ARCH_LITTLE_ENDIAN
421      shuffles[i] = lp_build_const_int32(gallivm, i * 2);
422      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
423#else
424      shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
425      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
426#endif
427   }
428
429   src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt16TypeInContext(gallivm->context), len), "");
430   return LLVMBuildShuffleVector(gallivm->builder, src,
431                                 LLVMGetUndef(LLVMTypeOf(src)),
432                                 LLVMConstVector(hi ? shuffles2 : shuffles,
433                                                 bld_base->base.type.length),
434                                 "");
435}
436static LLVMValueRef
437merge_16bit(struct lp_build_nir_context *bld_base,
438            LLVMValueRef input,
439            LLVMValueRef input2)
440{
441   struct gallivm_state *gallivm = bld_base->base.gallivm;
442   LLVMBuilderRef builder = gallivm->builder;
443   int i;
444   LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
445   int len = bld_base->int16_bld.type.length * 2;
446   assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
447
448   for (i = 0; i < bld_base->int_bld.type.length * 2; i+=2) {
449#if UTIL_ARCH_LITTLE_ENDIAN
450      shuffles[i] = lp_build_const_int32(gallivm, i / 2);
451      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
452#else
453      shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
454      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
455#endif
456   }
457   return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
458}
459
460static LLVMValueRef get_signed_divisor(struct gallivm_state *gallivm,
461                                       struct lp_build_context *int_bld,
462                                       struct lp_build_context *mask_bld,
463                                       int src_bit_size,
464                                       LLVMValueRef src, LLVMValueRef divisor)
465{
466   LLVMBuilderRef builder = gallivm->builder;
467   /* However for signed divides SIGFPE can occur if the numerator is INT_MIN
468      and divisor is -1. */
469   /* set mask if numerator == INT_MIN */
470   long long min_val;
471   switch (src_bit_size) {
472   case 8:
473      min_val = INT8_MIN;
474      break;
475   case 16:
476      min_val = INT16_MIN;
477      break;
478   default:
479   case 32:
480      min_val = INT_MIN;
481      break;
482   case 64:
483      min_val = INT64_MIN;
484      break;
485   }
486   LLVMValueRef div_mask2 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src,
487                                         lp_build_const_int_vec(gallivm, int_bld->type, min_val));
488   /* set another mask if divisor is - 1 */
489   LLVMValueRef div_mask3 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, divisor,
490                                         lp_build_const_int_vec(gallivm, int_bld->type, -1));
491   div_mask2 = LLVMBuildAnd(builder, div_mask2, div_mask3, "");
492
493   divisor = lp_build_select(mask_bld, div_mask2, int_bld->one, divisor);
494   return divisor;
495}
496
497static LLVMValueRef
498do_int_divide(struct lp_build_nir_context *bld_base,
499              bool is_unsigned, unsigned src_bit_size,
500              LLVMValueRef src, LLVMValueRef src2)
501{
502   struct gallivm_state *gallivm = bld_base->base.gallivm;
503   LLVMBuilderRef builder = gallivm->builder;
504   struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
505   struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
506
507   /* avoid divide by 0. Converted divisor from 0 to -1 */
508   LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
509                                        mask_bld->zero);
510
511   LLVMValueRef divisor = LLVMBuildOr(builder, div_mask, src2, "");
512   if (!is_unsigned) {
513      divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
514                                   src_bit_size, src, divisor);
515   }
516   LLVMValueRef result = lp_build_div(int_bld, src, divisor);
517
518   if (!is_unsigned) {
519      LLVMValueRef not_div_mask = LLVMBuildNot(builder, div_mask, "");
520      return LLVMBuildAnd(builder, not_div_mask, result, "");
521   } else
522      /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10
523       * may as well do same for idiv */
524      return LLVMBuildOr(builder, div_mask, result, "");
525}
526
527static LLVMValueRef
528do_int_mod(struct lp_build_nir_context *bld_base,
529           bool is_unsigned, unsigned src_bit_size,
530           LLVMValueRef src, LLVMValueRef src2)
531{
532   struct gallivm_state *gallivm = bld_base->base.gallivm;
533   LLVMBuilderRef builder = gallivm->builder;
534   struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
535   struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
536   LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
537                                        mask_bld->zero);
538   LLVMValueRef divisor = LLVMBuildOr(builder,
539                                      div_mask,
540                                      src2, "");
541   if (!is_unsigned) {
542      divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
543                                   src_bit_size, src, divisor);
544   }
545   LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
546   return LLVMBuildOr(builder, div_mask, result, "");
547}
548
549static LLVMValueRef
550do_quantize_to_f16(struct lp_build_nir_context *bld_base,
551                   LLVMValueRef src)
552{
553   struct gallivm_state *gallivm = bld_base->base.gallivm;
554   LLVMBuilderRef builder = gallivm->builder;
555   LLVMValueRef result, cond, cond2, temp;
556
557   result = LLVMBuildFPTrunc(builder, src, bld_base->half_bld.vec_type, "");
558   result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
559
560   temp = lp_build_abs(get_flt_bld(bld_base, 32), result);
561   cond = LLVMBuildFCmp(builder, LLVMRealOGT,
562                        LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, 0x38800000), bld_base->base.vec_type, ""),
563                        temp, "");
564   cond2 = LLVMBuildFCmp(builder, LLVMRealONE, temp, bld_base->base.zero, "");
565   cond = LLVMBuildAnd(builder, cond, cond2, "");
566   result = LLVMBuildSelect(builder, cond, bld_base->base.zero, result, "");
567   return result;
568}
569
570static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
571                                  const nir_alu_instr *instr,
572                                  unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
573                                  LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
574{
575   struct gallivm_state *gallivm = bld_base->base.gallivm;
576   LLVMBuilderRef builder = gallivm->builder;
577   LLVMValueRef result;
578
579   switch (instr->op) {
580   case nir_op_b2f16:
581      result = emit_b2f(bld_base, src[0], 16);
582      break;
583   case nir_op_b2f32:
584      result = emit_b2f(bld_base, src[0], 32);
585      break;
586   case nir_op_b2f64:
587      result = emit_b2f(bld_base, src[0], 64);
588      break;
589   case nir_op_b2i8:
590      result = emit_b2i(bld_base, src[0], 8);
591      break;
592   case nir_op_b2i16:
593      result = emit_b2i(bld_base, src[0], 16);
594      break;
595   case nir_op_b2i32:
596      result = emit_b2i(bld_base, src[0], 32);
597      break;
598   case nir_op_b2i64:
599      result = emit_b2i(bld_base, src[0], 64);
600      break;
601   case nir_op_b32csel:
602      result = emit_b32csel(bld_base, src_bit_size, src);
603      break;
604   case nir_op_bit_count:
605      result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
606      if (src_bit_size[0] < 32)
607         result = LLVMBuildZExt(builder, result, bld_base->int_bld.vec_type, "");
608      else if (src_bit_size[0] > 32)
609         result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
610      break;
611   case nir_op_bitfield_select:
612      result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2])));
613      break;
614   case nir_op_bitfield_reverse:
615      result = lp_build_bitfield_reverse(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
616      break;
617   case nir_op_f2b32:
618      result = flt_to_bool32(bld_base, src_bit_size[0], src[0]);
619      break;
620   case nir_op_f2f16:
621      if (src_bit_size[0] == 64)
622         src[0] = LLVMBuildFPTrunc(builder, src[0],
623                                   bld_base->base.vec_type, "");
624      result = LLVMBuildFPTrunc(builder, src[0],
625                                bld_base->half_bld.vec_type, "");
626      break;
627   case nir_op_f2f32:
628      if (src_bit_size[0] < 32)
629         result = LLVMBuildFPExt(builder, src[0],
630                                 bld_base->base.vec_type, "");
631      else
632         result = LLVMBuildFPTrunc(builder, src[0],
633                                   bld_base->base.vec_type, "");
634      break;
635   case nir_op_f2f64:
636      result = LLVMBuildFPExt(builder, src[0],
637                              bld_base->dbl_bld.vec_type, "");
638      break;
639   case nir_op_f2i8:
640      result = LLVMBuildFPToSI(builder,
641                               src[0],
642                               bld_base->uint8_bld.vec_type, "");
643      break;
644   case nir_op_f2i16:
645      result = LLVMBuildFPToSI(builder,
646                               src[0],
647                               bld_base->uint16_bld.vec_type, "");
648      break;
649   case nir_op_f2i32:
650      result = LLVMBuildFPToSI(builder, src[0], bld_base->base.int_vec_type, "");
651      break;
652   case nir_op_f2u8:
653      result = LLVMBuildFPToUI(builder,
654                               src[0],
655                               bld_base->uint8_bld.vec_type, "");
656      break;
657   case nir_op_f2u16:
658      result = LLVMBuildFPToUI(builder,
659                               src[0],
660                               bld_base->uint16_bld.vec_type, "");
661      break;
662   case nir_op_f2u32:
663      result = LLVMBuildFPToUI(builder,
664                               src[0],
665                               bld_base->base.int_vec_type, "");
666      break;
667   case nir_op_f2i64:
668      result = LLVMBuildFPToSI(builder,
669                               src[0],
670                               bld_base->int64_bld.vec_type, "");
671      break;
672   case nir_op_f2u64:
673      result = LLVMBuildFPToUI(builder,
674                               src[0],
675                               bld_base->uint64_bld.vec_type, "");
676      break;
677   case nir_op_fabs:
678      result = lp_build_abs(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
679      break;
680   case nir_op_fadd:
681      result = lp_build_add(get_flt_bld(bld_base, src_bit_size[0]),
682                            src[0], src[1]);
683      break;
684   case nir_op_fceil:
685      result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
686      break;
687   case nir_op_fcos:
688      result = lp_build_cos(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
689      break;
690   case nir_op_fddx:
691   case nir_op_fddx_coarse:
692   case nir_op_fddx_fine:
693      result = lp_build_ddx(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
694      break;
695   case nir_op_fddy:
696   case nir_op_fddy_coarse:
697   case nir_op_fddy_fine:
698      result = lp_build_ddy(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
699      break;
700   case nir_op_fdiv:
701      result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]),
702                            src[0], src[1]);
703      break;
704   case nir_op_feq32:
705      result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src);
706      break;
707   case nir_op_fexp2:
708      result = lp_build_exp2(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
709      break;
710   case nir_op_ffloor:
711      result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
712      break;
713   case nir_op_ffma:
714      result = lp_build_fmuladd(builder, src[0], src[1], src[2]);
715      break;
716   case nir_op_ffract: {
717      struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
718      LLVMValueRef tmp = lp_build_floor(flt_bld, src[0]);
719      result = lp_build_sub(flt_bld, src[0], tmp);
720      break;
721   }
722   case nir_op_fge32:
723      result = fcmp32(bld_base, PIPE_FUNC_GEQUAL, src_bit_size[0], src);
724      break;
725   case nir_op_find_lsb: {
726      struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
727      result = lp_build_cttz(int_bld, src[0]);
728      if (src_bit_size[0] < 32)
729         result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
730      else if (src_bit_size[0] > 32)
731         result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
732      break;
733   }
734   case nir_op_fisfinite32:
735      unreachable("Should have been lowered in nir_opt_algebraic_late.");
736   case nir_op_flog2:
737      result = lp_build_log2_safe(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
738      break;
739   case nir_op_flt:
740   case nir_op_flt32:
741      result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src);
742      break;
743   case nir_op_fmax:
744   case nir_op_fmin: {
745      enum gallivm_nan_behavior minmax_nan;
746      int first = 0;
747
748      /* If one of the sources is known to be a number (i.e., not NaN), then
749       * better code can be generated by passing that information along.
750       */
751      if (is_a_number(bld_base->range_ht, instr, 1,
752                      0 /* unused num_components */,
753                      NULL /* unused swizzle */)) {
754         minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
755      } else if (is_a_number(bld_base->range_ht, instr, 0,
756                             0 /* unused num_components */,
757                             NULL /* unused swizzle */)) {
758         first = 1;
759         minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
760      } else {
761         minmax_nan = GALLIVM_NAN_RETURN_OTHER;
762      }
763
764      if (instr->op == nir_op_fmin) {
765         result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]),
766                                   src[first], src[1 - first], minmax_nan);
767      } else {
768         result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]),
769                                   src[first], src[1 - first], minmax_nan);
770      }
771      break;
772   }
773   case nir_op_fmod: {
774      struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
775      result = lp_build_div(flt_bld, src[0], src[1]);
776      result = lp_build_floor(flt_bld, result);
777      result = lp_build_mul(flt_bld, src[1], result);
778      result = lp_build_sub(flt_bld, src[0], result);
779      break;
780   }
781   case nir_op_fmul:
782      result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]),
783                            src[0], src[1]);
784      break;
785   case nir_op_fneu32:
786      result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
787      break;
788   case nir_op_fneg:
789      result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
790      break;
791   case nir_op_fpow:
792      result = lp_build_pow(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]);
793      break;
794   case nir_op_fquantize2f16:
795      result = do_quantize_to_f16(bld_base, src[0]);
796      break;
797   case nir_op_frcp:
798      result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
799      break;
800   case nir_op_fround_even:
801      if (src_bit_size[0] == 16) {
802	 struct lp_build_context *bld = get_flt_bld(bld_base, 16);
803	 char intrinsic[64];
804	 lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type);
805	 result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]);
806      } else
807	 result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
808      break;
809   case nir_op_frsq:
810      result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
811      break;
812   case nir_op_fsat:
813      result = lp_build_clamp_zero_one_nanzero(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
814      break;
815   case nir_op_fsign:
816      result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
817      break;
818   case nir_op_fsin:
819      result = lp_build_sin(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
820      break;
821   case nir_op_fsqrt:
822      result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
823      break;
824   case nir_op_ftrunc:
825      result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
826      break;
827   case nir_op_i2b32:
828      result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]);
829      break;
830   case nir_op_i2f16:
831      result = LLVMBuildSIToFP(builder, src[0],
832                               bld_base->half_bld.vec_type, "");
833      break;
834   case nir_op_i2f32:
835      result = lp_build_int_to_float(&bld_base->base, src[0]);
836      break;
837   case nir_op_i2f64:
838      result = lp_build_int_to_float(&bld_base->dbl_bld, src[0]);
839      break;
840   case nir_op_i2i8:
841      result = LLVMBuildTrunc(builder, src[0], bld_base->int8_bld.vec_type, "");
842      break;
843   case nir_op_i2i16:
844      if (src_bit_size[0] < 16)
845         result = LLVMBuildSExt(builder, src[0], bld_base->int16_bld.vec_type, "");
846      else
847         result = LLVMBuildTrunc(builder, src[0], bld_base->int16_bld.vec_type, "");
848      break;
849   case nir_op_i2i32:
850      if (src_bit_size[0] < 32)
851         result = LLVMBuildSExt(builder, src[0], bld_base->int_bld.vec_type, "");
852      else
853         result = LLVMBuildTrunc(builder, src[0], bld_base->int_bld.vec_type, "");
854      break;
855   case nir_op_i2i64:
856      result = LLVMBuildSExt(builder, src[0], bld_base->int64_bld.vec_type, "");
857      break;
858   case nir_op_iabs:
859      result = lp_build_abs(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
860      break;
861   case nir_op_iadd:
862      result = lp_build_add(get_int_bld(bld_base, false, src_bit_size[0]),
863                            src[0], src[1]);
864      break;
865   case nir_op_iand:
866      result = lp_build_and(get_int_bld(bld_base, false, src_bit_size[0]),
867                            src[0], src[1]);
868      break;
869   case nir_op_idiv:
870      result = do_int_divide(bld_base, false, src_bit_size[0], src[0], src[1]);
871      break;
872   case nir_op_ieq32:
873      result = icmp32(bld_base, PIPE_FUNC_EQUAL, false, src_bit_size[0], src);
874      break;
875   case nir_op_ige32:
876      result = icmp32(bld_base, PIPE_FUNC_GEQUAL, false, src_bit_size[0], src);
877      break;
878   case nir_op_ilt32:
879      result = icmp32(bld_base, PIPE_FUNC_LESS, false, src_bit_size[0], src);
880      break;
881   case nir_op_imax:
882      result = lp_build_max(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
883      break;
884   case nir_op_imin:
885      result = lp_build_min(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
886      break;
887   case nir_op_imul:
888   case nir_op_imul24:
889      result = lp_build_mul(get_int_bld(bld_base, false, src_bit_size[0]),
890                            src[0], src[1]);
891      break;
892   case nir_op_imul_high: {
893      LLVMValueRef hi_bits;
894      lp_build_mul_32_lohi(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1], &hi_bits);
895      result = hi_bits;
896      break;
897   }
898   case nir_op_ine32:
899      result = icmp32(bld_base, PIPE_FUNC_NOTEQUAL, false, src_bit_size[0], src);
900      break;
901   case nir_op_ineg:
902      result = lp_build_negate(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
903      break;
904   case nir_op_inot:
905      result = lp_build_not(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
906      break;
907   case nir_op_ior:
908      result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]),
909                           src[0], src[1]);
910      break;
911   case nir_op_imod:
912   case nir_op_irem:
913      result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]);
914      break;
915   case nir_op_ishl: {
916      struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
917      struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
918      if (src_bit_size[0] == 64)
919         src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
920      if (src_bit_size[0] < 32)
921         src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
922      src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
923      result = lp_build_shl(int_bld, src[0], src[1]);
924      break;
925   }
926   case nir_op_ishr: {
927      struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
928      struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
929      if (src_bit_size[0] == 64)
930         src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
931      if (src_bit_size[0] < 32)
932         src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
933      src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
934      result = lp_build_shr(int_bld, src[0], src[1]);
935      break;
936   }
937   case nir_op_isign:
938      result = lp_build_sgn(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
939      break;
940   case nir_op_isub:
941      result = lp_build_sub(get_int_bld(bld_base, false, src_bit_size[0]),
942                            src[0], src[1]);
943      break;
944   case nir_op_ixor:
945      result = lp_build_xor(get_int_bld(bld_base, false, src_bit_size[0]),
946                            src[0], src[1]);
947      break;
948   case nir_op_mov:
949      result = src[0];
950      break;
951   case nir_op_unpack_64_2x32_split_x:
952      result = split_64bit(bld_base, src[0], false);
953      break;
954   case nir_op_unpack_64_2x32_split_y:
955      result = split_64bit(bld_base, src[0], true);
956      break;
957
958   case nir_op_pack_32_2x16_split: {
959      LLVMValueRef tmp = merge_16bit(bld_base, src[0], src[1]);
960      result = LLVMBuildBitCast(builder, tmp, bld_base->base.vec_type, "");
961      break;
962   }
963   case nir_op_unpack_32_2x16_split_x:
964      result = split_16bit(bld_base, src[0], false);
965      break;
966   case nir_op_unpack_32_2x16_split_y:
967      result = split_16bit(bld_base, src[0], true);
968      break;
969   case nir_op_pack_64_2x32_split: {
970      LLVMValueRef tmp = merge_64bit(bld_base, src[0], src[1]);
971      result = LLVMBuildBitCast(builder, tmp, bld_base->uint64_bld.vec_type, "");
972      break;
973   }
974   case nir_op_u2f16:
975      result = LLVMBuildUIToFP(builder, src[0],
976                               bld_base->half_bld.vec_type, "");
977      break;
978   case nir_op_u2f32:
979      result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, "");
980      break;
981   case nir_op_u2f64:
982      result = LLVMBuildUIToFP(builder, src[0], bld_base->dbl_bld.vec_type, "");
983      break;
984   case nir_op_u2u8:
985      result = LLVMBuildTrunc(builder, src[0], bld_base->uint8_bld.vec_type, "");
986      break;
987   case nir_op_u2u16:
988      if (src_bit_size[0] < 16)
989         result = LLVMBuildZExt(builder, src[0], bld_base->uint16_bld.vec_type, "");
990      else
991         result = LLVMBuildTrunc(builder, src[0], bld_base->uint16_bld.vec_type, "");
992      break;
993   case nir_op_u2u32:
994      if (src_bit_size[0] < 32)
995         result = LLVMBuildZExt(builder, src[0], bld_base->uint_bld.vec_type, "");
996      else
997         result = LLVMBuildTrunc(builder, src[0], bld_base->uint_bld.vec_type, "");
998      break;
999   case nir_op_u2u64:
1000      result = LLVMBuildZExt(builder, src[0], bld_base->uint64_bld.vec_type, "");
1001      break;
1002   case nir_op_udiv:
1003      result = do_int_divide(bld_base, true, src_bit_size[0], src[0], src[1]);
1004      break;
1005   case nir_op_ufind_msb: {
1006      struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1007      result = lp_build_ctlz(uint_bld, src[0]);
1008      result = lp_build_sub(uint_bld, lp_build_const_int_vec(gallivm, uint_bld->type, src_bit_size[0] - 1), result);
1009      if (src_bit_size[0] < 32)
1010         result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
1011      else
1012         result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
1013      break;
1014   }
1015   case nir_op_uge32:
1016      result = icmp32(bld_base, PIPE_FUNC_GEQUAL, true, src_bit_size[0], src);
1017      break;
1018   case nir_op_ult32:
1019      result = icmp32(bld_base, PIPE_FUNC_LESS, true, src_bit_size[0], src);
1020      break;
1021   case nir_op_umax:
1022      result = lp_build_max(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
1023      break;
1024   case nir_op_umin:
1025      result = lp_build_min(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
1026      break;
1027   case nir_op_umod:
1028      result = do_int_mod(bld_base, true, src_bit_size[0], src[0], src[1]);
1029      break;
1030   case nir_op_umul_high: {
1031      LLVMValueRef hi_bits;
1032      lp_build_mul_32_lohi(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1], &hi_bits);
1033      result = hi_bits;
1034      break;
1035   }
1036   case nir_op_ushr: {
1037      struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1038      if (src_bit_size[0] == 64)
1039         src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
1040      if (src_bit_size[0] < 32)
1041         src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
1042      src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
1043      result = lp_build_shr(uint_bld, src[0], src[1]);
1044      break;
1045   }
1046   default:
1047      assert(0);
1048      break;
1049   }
1050   return result;
1051}
1052
1053static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr *instr)
1054{
1055   struct gallivm_state *gallivm = bld_base->base.gallivm;
1056   LLVMValueRef src[NIR_MAX_VEC_COMPONENTS];
1057   unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS];
1058   unsigned num_components = nir_dest_num_components(instr->dest.dest);
1059   unsigned src_components;
1060   switch (instr->op) {
1061   case nir_op_vec2:
1062   case nir_op_vec3:
1063   case nir_op_vec4:
1064   case nir_op_vec8:
1065   case nir_op_vec16:
1066      src_components = 1;
1067      break;
1068   case nir_op_pack_half_2x16:
1069      src_components = 2;
1070      break;
1071   case nir_op_unpack_half_2x16:
1072      src_components = 1;
1073      break;
1074   case nir_op_cube_face_coord_amd:
1075   case nir_op_cube_face_index_amd:
1076      src_components = 3;
1077      break;
1078   case nir_op_fsum2:
1079   case nir_op_fsum3:
1080   case nir_op_fsum4:
1081      src_components = nir_op_infos[instr->op].input_sizes[0];
1082      break;
1083   default:
1084      src_components = num_components;
1085      break;
1086   }
1087   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1088      src[i] = get_alu_src(bld_base, instr->src[i], src_components);
1089      src_bit_size[i] = nir_src_bit_size(instr->src[i].src);
1090   }
1091
1092   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1093   if (instr->op == nir_op_vec4 || instr->op == nir_op_vec3 || instr->op == nir_op_vec2 || instr->op == nir_op_vec8 || instr->op == nir_op_vec16) {
1094      for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1095         result[i] = cast_type(bld_base, src[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
1096      }
1097   } else if (instr->op == nir_op_fsum4 || instr->op == nir_op_fsum3 || instr->op == nir_op_fsum2) {
1098      for (unsigned c = 0; c < nir_op_infos[instr->op].input_sizes[0]; c++) {
1099         LLVMValueRef temp_chan = LLVMBuildExtractValue(gallivm->builder,
1100                                                          src[0], c, "");
1101         temp_chan = cast_type(bld_base, temp_chan, nir_op_infos[instr->op].input_types[0], src_bit_size[0]);
1102         result[0] = (c == 0) ? temp_chan : lp_build_add(get_flt_bld(bld_base, src_bit_size[0]), result[0], temp_chan);
1103      }
1104    } else {
1105      for (unsigned c = 0; c < num_components; c++) {
1106         LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS];
1107
1108         for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1109            if (num_components > 1) {
1110               src_chan[i] = LLVMBuildExtractValue(gallivm->builder,
1111                                                     src[i], c, "");
1112            } else
1113               src_chan[i] = src[i];
1114            src_chan[i] = cast_type(bld_base, src_chan[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
1115         }
1116         result[c] = do_alu_action(bld_base, instr, src_bit_size, src_chan);
1117         result[c] = cast_type(bld_base, result[c], nir_op_infos[instr->op].output_type, nir_dest_bit_size(instr->dest.dest));
1118      }
1119   }
1120   assign_alu_dest(bld_base, &instr->dest, result);
1121 }
1122
1123static void visit_load_const(struct lp_build_nir_context *bld_base,
1124                             const nir_load_const_instr *instr)
1125{
1126   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1127   struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
1128   for (unsigned i = 0; i < instr->def.num_components; i++)
1129      result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64);
1130   memset(&result[instr->def.num_components], 0, NIR_MAX_VEC_COMPONENTS - instr->def.num_components);
1131   assign_ssa_dest(bld_base, &instr->def, result);
1132}
1133
1134static void
1135get_deref_offset(struct lp_build_nir_context *bld_base, nir_deref_instr *instr,
1136                 bool vs_in, unsigned *vertex_index_out,
1137                 LLVMValueRef *vertex_index_ref,
1138                 unsigned *const_out, LLVMValueRef *indir_out)
1139{
1140   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1141   nir_variable *var = nir_deref_instr_get_variable(instr);
1142   nir_deref_path path;
1143   unsigned idx_lvl = 1;
1144
1145   nir_deref_path_init(&path, instr, NULL);
1146
1147   if (vertex_index_out != NULL || vertex_index_ref != NULL) {
1148      if (vertex_index_ref) {
1149         *vertex_index_ref = get_src(bld_base, path.path[idx_lvl]->arr.index);
1150         if (vertex_index_out)
1151            *vertex_index_out = 0;
1152      } else {
1153         *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
1154      }
1155      ++idx_lvl;
1156   }
1157
1158   uint32_t const_offset = 0;
1159   LLVMValueRef offset = NULL;
1160
1161   if (var->data.compact && nir_src_is_const(instr->arr.index)) {
1162      assert(instr->deref_type == nir_deref_type_array);
1163      const_offset = nir_src_as_uint(instr->arr.index);
1164      goto out;
1165   }
1166
1167   for (; path.path[idx_lvl]; ++idx_lvl) {
1168      const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
1169      if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
1170         unsigned index = path.path[idx_lvl]->strct.index;
1171
1172         for (unsigned i = 0; i < index; i++) {
1173            const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
1174            const_offset += glsl_count_attribute_slots(ft, vs_in);
1175         }
1176      } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
1177         unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
1178         if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
1179           const_offset += nir_src_comp_as_int(path.path[idx_lvl]->arr.index, 0) * size;
1180         } else {
1181           LLVMValueRef idx_src = get_src(bld_base, path.path[idx_lvl]->arr.index);
1182           idx_src = cast_type(bld_base, idx_src, nir_type_uint, 32);
1183           LLVMValueRef array_off = lp_build_mul(&bld_base->uint_bld, lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, size),
1184                                               idx_src);
1185           if (offset)
1186             offset = lp_build_add(&bld_base->uint_bld, offset, array_off);
1187           else
1188             offset = array_off;
1189         }
1190      } else
1191         unreachable("Uhandled deref type in get_deref_instr_offset");
1192   }
1193
1194out:
1195   nir_deref_path_finish(&path);
1196
1197   if (const_offset && offset)
1198      offset = LLVMBuildAdd(builder, offset,
1199                            lp_build_const_int_vec(bld_base->base.gallivm, bld_base->uint_bld.type, const_offset),
1200                            "");
1201   *const_out = const_offset;
1202   *indir_out = offset;
1203}
1204
1205static void
1206visit_load_input(struct lp_build_nir_context *bld_base,
1207                 nir_intrinsic_instr *instr,
1208                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1209{
1210   nir_variable var = {0};
1211   var.data.location = nir_intrinsic_io_semantics(instr).location;
1212   var.data.driver_location = nir_intrinsic_base(instr);
1213   var.data.location_frac = nir_intrinsic_component(instr);
1214
1215   unsigned nc = nir_dest_num_components(instr->dest);
1216   unsigned bit_size = nir_dest_bit_size(instr->dest);
1217
1218   nir_src offset = *nir_get_io_offset_src(instr);
1219   bool indirect = !nir_src_is_const(offset);
1220   if (!indirect)
1221      assert(nir_src_as_uint(offset) == 0);
1222   LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1223
1224   bld_base->load_var(bld_base, nir_var_shader_in, nc, bit_size, &var, 0, NULL, 0, indir_index, result);
1225}
1226
1227static void
1228visit_store_output(struct lp_build_nir_context *bld_base,
1229                   nir_intrinsic_instr *instr)
1230{
1231   nir_variable var = {0};
1232   var.data.location = nir_intrinsic_io_semantics(instr).location;
1233   var.data.driver_location = nir_intrinsic_base(instr);
1234   var.data.location_frac = nir_intrinsic_component(instr);
1235
1236   unsigned mask = nir_intrinsic_write_mask(instr);
1237
1238   unsigned bit_size = nir_src_bit_size(instr->src[0]);
1239   LLVMValueRef src = get_src(bld_base, instr->src[0]);
1240
1241   nir_src offset = *nir_get_io_offset_src(instr);
1242   bool indirect = !nir_src_is_const(offset);
1243   if (!indirect)
1244      assert(nir_src_as_uint(offset) == 0);
1245   LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1246
1247   if (mask == 0x1 && LLVMGetTypeKind(LLVMTypeOf(src)) == LLVMArrayTypeKind) {
1248      src = LLVMBuildExtractValue(bld_base->base.gallivm->builder,
1249                                  src, 0, "");
1250   }
1251
1252   bld_base->store_var(bld_base, nir_var_shader_out, util_last_bit(mask),
1253                       bit_size, &var, mask, NULL, 0, indir_index, src);
1254}
1255
1256static void visit_load_var(struct lp_build_nir_context *bld_base,
1257                           nir_intrinsic_instr *instr,
1258                           LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1259{
1260   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1261   nir_variable *var = nir_deref_instr_get_variable(deref);
1262   assert(util_bitcount(deref->modes) == 1);
1263   nir_variable_mode mode = deref->modes;
1264   unsigned const_index;
1265   LLVMValueRef indir_index;
1266   LLVMValueRef indir_vertex_index = NULL;
1267   unsigned vertex_index = 0;
1268   unsigned nc = nir_dest_num_components(instr->dest);
1269   unsigned bit_size = nir_dest_bit_size(instr->dest);
1270   if (var) {
1271      bool vs_in = bld_base->shader->info.stage == MESA_SHADER_VERTEX &&
1272         var->data.mode == nir_var_shader_in;
1273      bool gs_in = bld_base->shader->info.stage == MESA_SHADER_GEOMETRY &&
1274         var->data.mode == nir_var_shader_in;
1275      bool tcs_in = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1276         var->data.mode == nir_var_shader_in;
1277      bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1278         var->data.mode == nir_var_shader_out && !var->data.patch;
1279      bool tes_in = bld_base->shader->info.stage == MESA_SHADER_TESS_EVAL &&
1280         var->data.mode == nir_var_shader_in && !var->data.patch;
1281
1282      mode = var->data.mode;
1283
1284      get_deref_offset(bld_base, deref, vs_in, gs_in ? &vertex_index : NULL, (tcs_in || tcs_out || tes_in) ? &indir_vertex_index : NULL,
1285                       &const_index, &indir_index);
1286   }
1287   bld_base->load_var(bld_base, mode, nc, bit_size, var, vertex_index, indir_vertex_index, const_index, indir_index, result);
1288}
1289
1290static void
1291visit_store_var(struct lp_build_nir_context *bld_base,
1292                nir_intrinsic_instr *instr)
1293{
1294   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1295   nir_variable *var = nir_deref_instr_get_variable(deref);
1296   assert(util_bitcount(deref->modes) == 1);
1297   nir_variable_mode mode = deref->modes;
1298   int writemask = instr->const_index[0];
1299   unsigned bit_size = nir_src_bit_size(instr->src[1]);
1300   LLVMValueRef src = get_src(bld_base, instr->src[1]);
1301   unsigned const_index = 0;
1302   LLVMValueRef indir_index, indir_vertex_index = NULL;
1303   if (var) {
1304      bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1305         var->data.mode == nir_var_shader_out && !var->data.patch;
1306      get_deref_offset(bld_base, deref, false, NULL, tcs_out ? &indir_vertex_index : NULL,
1307                       &const_index, &indir_index);
1308   }
1309   bld_base->store_var(bld_base, mode, instr->num_components, bit_size, var, writemask, indir_vertex_index, const_index, indir_index, src);
1310}
1311
1312static void visit_load_ubo(struct lp_build_nir_context *bld_base,
1313                           nir_intrinsic_instr *instr,
1314                           LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1315{
1316   struct gallivm_state *gallivm = bld_base->base.gallivm;
1317   LLVMBuilderRef builder = gallivm->builder;
1318   LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1319   LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1320
1321   bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[1]);
1322   idx = LLVMBuildExtractElement(builder, idx, lp_build_const_int32(gallivm, 0), "");
1323   bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1324                      offset_is_uniform, idx, offset, result);
1325}
1326
1327static void visit_load_push_constant(struct lp_build_nir_context *bld_base,
1328                                     nir_intrinsic_instr *instr,
1329                                     LLVMValueRef result[4])
1330{
1331   struct gallivm_state *gallivm = bld_base->base.gallivm;
1332   LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1333   LLVMValueRef idx = lp_build_const_int32(gallivm, 0);
1334   bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
1335
1336   bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1337                      offset_is_uniform, idx, offset, result);
1338}
1339
1340
1341static void visit_load_ssbo(struct lp_build_nir_context *bld_base,
1342                           nir_intrinsic_instr *instr,
1343                           LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1344{
1345   LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1346   LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1347   bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1348                       idx, offset, result);
1349}
1350
1351static void visit_store_ssbo(struct lp_build_nir_context *bld_base,
1352                             nir_intrinsic_instr *instr)
1353{
1354   LLVMValueRef val = get_src(bld_base, instr->src[0]);
1355   LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_uint, 32);
1356   LLVMValueRef offset = get_src(bld_base, instr->src[2]);
1357   int writemask = instr->const_index[0];
1358   int nc = nir_src_num_components(instr->src[0]);
1359   int bitsize = nir_src_bit_size(instr->src[0]);
1360   bld_base->store_mem(bld_base, writemask, nc, bitsize, idx, offset, val);
1361}
1362
1363static void visit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1364                                nir_intrinsic_instr *instr,
1365                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1366{
1367   LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1368   result[0] = bld_base->get_ssbo_size(bld_base, idx);
1369}
1370
1371static void visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
1372                              nir_intrinsic_instr *instr,
1373                              LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1374{
1375   LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1376   LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1377   LLVMValueRef val = get_src(bld_base, instr->src[2]);
1378   LLVMValueRef val2 = NULL;
1379   int bitsize = nir_src_bit_size(instr->src[2]);
1380   if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
1381      val2 = get_src(bld_base, instr->src[3]);
1382
1383   bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, idx, offset, val, val2, &result[0]);
1384
1385}
1386
1387static void visit_load_image(struct lp_build_nir_context *bld_base,
1388                             nir_intrinsic_instr *instr,
1389                             LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1390{
1391   struct gallivm_state *gallivm = bld_base->base.gallivm;
1392   LLVMBuilderRef builder = gallivm->builder;
1393   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1394   nir_variable *var = nir_deref_instr_get_variable(deref);
1395   LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1396   LLVMValueRef coords[5];
1397   struct lp_img_params params;
1398   const struct glsl_type *type = glsl_without_array(var->type);
1399   unsigned const_index;
1400   LLVMValueRef indir_index;
1401   get_deref_offset(bld_base, deref, false, NULL, NULL,
1402                    &const_index, &indir_index);
1403
1404   memset(&params, 0, sizeof(params));
1405   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1406   for (unsigned i = 0; i < 4; i++)
1407      coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1408   if (params.target == PIPE_TEXTURE_1D_ARRAY)
1409      coords[2] = coords[1];
1410
1411   params.coords = coords;
1412   params.outdata = result;
1413   params.img_op = LP_IMG_LOAD;
1414   if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS || glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS)
1415      params.ms_index = cast_type(bld_base, get_src(bld_base, instr->src[2]), nir_type_uint, 32);
1416   params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1417   params.image_index_offset = indir_index;
1418   bld_base->image_op(bld_base, &params);
1419}
1420
1421static void visit_store_image(struct lp_build_nir_context *bld_base,
1422                              nir_intrinsic_instr *instr)
1423{
1424   struct gallivm_state *gallivm = bld_base->base.gallivm;
1425   LLVMBuilderRef builder = gallivm->builder;
1426   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1427   nir_variable *var = nir_deref_instr_get_variable(deref);
1428   LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1429   LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1430   LLVMValueRef coords[5];
1431   struct lp_img_params params;
1432   const struct glsl_type *type = glsl_without_array(var->type);
1433   unsigned const_index;
1434   LLVMValueRef indir_index;
1435   get_deref_offset(bld_base, deref, false, NULL, NULL,
1436                    &const_index, &indir_index);
1437
1438   memset(&params, 0, sizeof(params));
1439   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1440   for (unsigned i = 0; i < 4; i++)
1441      coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1442   if (params.target == PIPE_TEXTURE_1D_ARRAY)
1443      coords[2] = coords[1];
1444   params.coords = coords;
1445
1446   for (unsigned i = 0; i < 4; i++) {
1447      params.indata[i] = LLVMBuildExtractValue(builder, in_val, i, "");
1448      params.indata[i] = LLVMBuildBitCast(builder, params.indata[i], bld_base->base.vec_type, "");
1449   }
1450   if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1451      params.ms_index = get_src(bld_base, instr->src[2]);
1452   params.img_op = LP_IMG_STORE;
1453   params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1454   params.image_index_offset = indir_index;
1455
1456   if (params.target == PIPE_TEXTURE_1D_ARRAY)
1457      coords[2] = coords[1];
1458   bld_base->image_op(bld_base, &params);
1459}
1460
1461static void visit_atomic_image(struct lp_build_nir_context *bld_base,
1462                               nir_intrinsic_instr *instr,
1463                               LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1464{
1465   struct gallivm_state *gallivm = bld_base->base.gallivm;
1466   LLVMBuilderRef builder = gallivm->builder;
1467   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1468   nir_variable *var = nir_deref_instr_get_variable(deref);
1469   struct lp_img_params params;
1470   LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1471   LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1472   LLVMValueRef coords[5];
1473   const struct glsl_type *type = glsl_without_array(var->type);
1474   unsigned const_index;
1475   LLVMValueRef indir_index;
1476   get_deref_offset(bld_base, deref, false, NULL, NULL,
1477                    &const_index, &indir_index);
1478
1479   memset(&params, 0, sizeof(params));
1480
1481   switch (instr->intrinsic) {
1482   case nir_intrinsic_image_deref_atomic_add:
1483      params.op = LLVMAtomicRMWBinOpAdd;
1484      break;
1485   case nir_intrinsic_image_deref_atomic_exchange:
1486      params.op = LLVMAtomicRMWBinOpXchg;
1487      break;
1488   case nir_intrinsic_image_deref_atomic_and:
1489      params.op = LLVMAtomicRMWBinOpAnd;
1490      break;
1491   case nir_intrinsic_image_deref_atomic_or:
1492      params.op = LLVMAtomicRMWBinOpOr;
1493      break;
1494   case nir_intrinsic_image_deref_atomic_xor:
1495      params.op = LLVMAtomicRMWBinOpXor;
1496      break;
1497   case nir_intrinsic_image_deref_atomic_umin:
1498      params.op = LLVMAtomicRMWBinOpUMin;
1499      break;
1500   case nir_intrinsic_image_deref_atomic_umax:
1501      params.op = LLVMAtomicRMWBinOpUMax;
1502      break;
1503   case nir_intrinsic_image_deref_atomic_imin:
1504      params.op = LLVMAtomicRMWBinOpMin;
1505      break;
1506   case nir_intrinsic_image_deref_atomic_imax:
1507      params.op = LLVMAtomicRMWBinOpMax;
1508      break;
1509   default:
1510      break;
1511   }
1512
1513   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1514   for (unsigned i = 0; i < 4; i++)
1515      coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1516   if (params.target == PIPE_TEXTURE_1D_ARRAY)
1517      coords[2] = coords[1];
1518   params.coords = coords;
1519   if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1520      params.ms_index = get_src(bld_base, instr->src[2]);
1521   if (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
1522      LLVMValueRef cas_val = get_src(bld_base, instr->src[4]);
1523      params.indata[0] = in_val;
1524      params.indata2[0] = cas_val;
1525   } else
1526      params.indata[0] = in_val;
1527
1528   params.outdata = result;
1529   params.img_op = (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
1530   params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1531   params.image_index_offset = indir_index;
1532
1533   bld_base->image_op(bld_base, &params);
1534}
1535
1536
1537static void visit_image_size(struct lp_build_nir_context *bld_base,
1538                             nir_intrinsic_instr *instr,
1539                             LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1540{
1541   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1542   nir_variable *var = nir_deref_instr_get_variable(deref);
1543   struct lp_sampler_size_query_params params = { 0 };
1544   unsigned const_index;
1545   LLVMValueRef indir_index;
1546   const struct glsl_type *type = glsl_without_array(var->type);
1547   get_deref_offset(bld_base, deref, false, NULL, NULL,
1548                    &const_index, &indir_index);
1549   params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1550   params.texture_unit_offset = indir_index;
1551   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1552   params.sizes_out = result;
1553
1554   bld_base->image_size(bld_base, &params);
1555}
1556
1557static void visit_image_samples(struct lp_build_nir_context *bld_base,
1558                                nir_intrinsic_instr *instr,
1559                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1560{
1561   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1562   nir_variable *var = nir_deref_instr_get_variable(deref);
1563   struct lp_sampler_size_query_params params = { 0 };
1564   unsigned const_index;
1565   LLVMValueRef indir_index;
1566   const struct glsl_type *type = glsl_without_array(var->type);
1567   get_deref_offset(bld_base, deref, false, NULL, NULL,
1568                    &const_index, &indir_index);
1569
1570   params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1571   params.texture_unit_offset = indir_index;
1572   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1573   params.sizes_out = result;
1574   params.samples_only = true;
1575
1576   bld_base->image_size(bld_base, &params);
1577}
1578
1579static void visit_shared_load(struct lp_build_nir_context *bld_base,
1580                                nir_intrinsic_instr *instr,
1581                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1582{
1583   LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1584   bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1585                      NULL, offset, result);
1586}
1587
1588static void visit_shared_store(struct lp_build_nir_context *bld_base,
1589                               nir_intrinsic_instr *instr)
1590{
1591   LLVMValueRef val = get_src(bld_base, instr->src[0]);
1592   LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1593   int writemask = instr->const_index[1];
1594   int nc = nir_src_num_components(instr->src[0]);
1595   int bitsize = nir_src_bit_size(instr->src[0]);
1596   bld_base->store_mem(bld_base, writemask, nc, bitsize, NULL, offset, val);
1597}
1598
1599static void visit_shared_atomic(struct lp_build_nir_context *bld_base,
1600                                nir_intrinsic_instr *instr,
1601                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1602{
1603   LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1604   LLVMValueRef val = get_src(bld_base, instr->src[1]);
1605   LLVMValueRef val2 = NULL;
1606   int bitsize = nir_src_bit_size(instr->src[1]);
1607   if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap)
1608      val2 = get_src(bld_base, instr->src[2]);
1609
1610   bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, NULL, offset, val, val2, &result[0]);
1611
1612}
1613
1614static void visit_barrier(struct lp_build_nir_context *bld_base)
1615{
1616   bld_base->barrier(bld_base);
1617}
1618
1619static void visit_discard(struct lp_build_nir_context *bld_base,
1620                          nir_intrinsic_instr *instr)
1621{
1622   LLVMValueRef cond = NULL;
1623   if (instr->intrinsic == nir_intrinsic_discard_if) {
1624      cond = get_src(bld_base, instr->src[0]);
1625      cond = cast_type(bld_base, cond, nir_type_int, 32);
1626   }
1627   bld_base->discard(bld_base, cond);
1628}
1629
1630static void visit_load_kernel_input(struct lp_build_nir_context *bld_base,
1631                                    nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1632{
1633   LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1634
1635   bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
1636   bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1637                             nir_src_bit_size(instr->src[0]),
1638                             offset_is_uniform, offset, result);
1639}
1640
1641static void visit_load_global(struct lp_build_nir_context *bld_base,
1642                              nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1643{
1644   LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1645   bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1646                         nir_src_bit_size(instr->src[0]),
1647                         addr, result);
1648}
1649
1650static void visit_store_global(struct lp_build_nir_context *bld_base,
1651                               nir_intrinsic_instr *instr)
1652{
1653   LLVMValueRef val = get_src(bld_base, instr->src[0]);
1654   int nc = nir_src_num_components(instr->src[0]);
1655   int bitsize = nir_src_bit_size(instr->src[0]);
1656   LLVMValueRef addr = get_src(bld_base, instr->src[1]);
1657   int addr_bitsize = nir_src_bit_size(instr->src[1]);
1658   int writemask = instr->const_index[0];
1659   bld_base->store_global(bld_base, writemask, nc, bitsize, addr_bitsize, addr, val);
1660}
1661
1662static void visit_global_atomic(struct lp_build_nir_context *bld_base,
1663                                nir_intrinsic_instr *instr,
1664                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1665{
1666   LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1667   LLVMValueRef val = get_src(bld_base, instr->src[1]);
1668   LLVMValueRef val2 = NULL;
1669   int addr_bitsize = nir_src_bit_size(instr->src[0]);
1670   int val_bitsize = nir_src_bit_size(instr->src[1]);
1671   if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
1672      val2 = get_src(bld_base, instr->src[2]);
1673
1674   bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize,
1675                           val_bitsize, addr, val, val2, &result[0]);
1676}
1677
1678static void visit_interp(struct lp_build_nir_context *bld_base,
1679                         nir_intrinsic_instr *instr,
1680                         LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1681{
1682   struct gallivm_state *gallivm = bld_base->base.gallivm;
1683   LLVMBuilderRef builder = gallivm->builder;
1684   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1685   unsigned num_components = nir_dest_num_components(instr->dest);
1686   nir_variable *var = nir_deref_instr_get_variable(deref);
1687   unsigned const_index;
1688   LLVMValueRef indir_index;
1689   LLVMValueRef offsets[2] = { NULL, NULL };
1690   get_deref_offset(bld_base, deref, false, NULL, NULL,
1691                    &const_index, &indir_index);
1692   bool centroid = instr->intrinsic == nir_intrinsic_interp_deref_at_centroid;
1693   bool sample = false;
1694   if (instr->intrinsic == nir_intrinsic_interp_deref_at_offset) {
1695      for (unsigned i = 0; i < 2; i++) {
1696         offsets[i] = LLVMBuildExtractValue(builder, get_src(bld_base, instr->src[1]), i, "");
1697         offsets[i] = cast_type(bld_base, offsets[i], nir_type_float, 32);
1698      }
1699   } else if (instr->intrinsic == nir_intrinsic_interp_deref_at_sample) {
1700      offsets[0] = get_src(bld_base, instr->src[1]);
1701      offsets[0] = cast_type(bld_base, offsets[0], nir_type_int, 32);
1702      sample = true;
1703   }
1704   bld_base->interp_at(bld_base, num_components, var, centroid, sample, const_index, indir_index, offsets, result);
1705}
1706
1707static void visit_load_scratch(struct lp_build_nir_context *bld_base,
1708                               nir_intrinsic_instr *instr,
1709                               LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1710{
1711   LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1712
1713   bld_base->load_scratch(bld_base, nir_dest_num_components(instr->dest),
1714                          nir_dest_bit_size(instr->dest), offset, result);
1715}
1716
1717static void visit_store_scratch(struct lp_build_nir_context *bld_base,
1718                                nir_intrinsic_instr *instr)
1719{
1720   LLVMValueRef val = get_src(bld_base, instr->src[0]);
1721   LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1722   int writemask = instr->const_index[2];
1723   int nc = nir_src_num_components(instr->src[0]);
1724   int bitsize = nir_src_bit_size(instr->src[0]);
1725   bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val);
1726}
1727
1728
1729static void visit_intrinsic(struct lp_build_nir_context *bld_base,
1730                            nir_intrinsic_instr *instr)
1731{
1732   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS] = {0};
1733   switch (instr->intrinsic) {
1734   case nir_intrinsic_load_input:
1735      visit_load_input(bld_base, instr, result);
1736      break;
1737   case nir_intrinsic_store_output:
1738      visit_store_output(bld_base, instr);
1739      break;
1740   case nir_intrinsic_load_deref:
1741      visit_load_var(bld_base, instr, result);
1742      break;
1743   case nir_intrinsic_store_deref:
1744      visit_store_var(bld_base, instr);
1745      break;
1746   case nir_intrinsic_load_ubo:
1747      visit_load_ubo(bld_base, instr, result);
1748      break;
1749   case nir_intrinsic_load_push_constant:
1750      visit_load_push_constant(bld_base, instr, result);
1751      break;
1752   case nir_intrinsic_load_ssbo:
1753      visit_load_ssbo(bld_base, instr, result);
1754      break;
1755   case nir_intrinsic_store_ssbo:
1756      visit_store_ssbo(bld_base, instr);
1757      break;
1758   case nir_intrinsic_get_ssbo_size:
1759      visit_get_ssbo_size(bld_base, instr, result);
1760      break;
1761   case nir_intrinsic_load_vertex_id:
1762   case nir_intrinsic_load_primitive_id:
1763   case nir_intrinsic_load_instance_id:
1764   case nir_intrinsic_load_base_instance:
1765   case nir_intrinsic_load_base_vertex:
1766   case nir_intrinsic_load_first_vertex:
1767   case nir_intrinsic_load_workgroup_id:
1768   case nir_intrinsic_load_local_invocation_id:
1769   case nir_intrinsic_load_local_invocation_index:
1770   case nir_intrinsic_load_num_workgroups:
1771   case nir_intrinsic_load_invocation_id:
1772   case nir_intrinsic_load_front_face:
1773   case nir_intrinsic_load_draw_id:
1774   case nir_intrinsic_load_workgroup_size:
1775   case nir_intrinsic_load_work_dim:
1776   case nir_intrinsic_load_tess_coord:
1777   case nir_intrinsic_load_tess_level_outer:
1778   case nir_intrinsic_load_tess_level_inner:
1779   case nir_intrinsic_load_patch_vertices_in:
1780   case nir_intrinsic_load_sample_id:
1781   case nir_intrinsic_load_sample_pos:
1782   case nir_intrinsic_load_sample_mask_in:
1783   case nir_intrinsic_load_view_index:
1784   case nir_intrinsic_load_subgroup_invocation:
1785   case nir_intrinsic_load_subgroup_id:
1786   case nir_intrinsic_load_num_subgroups:
1787      bld_base->sysval_intrin(bld_base, instr, result);
1788      break;
1789   case nir_intrinsic_load_helper_invocation:
1790      bld_base->helper_invocation(bld_base, &result[0]);
1791      break;
1792   case nir_intrinsic_discard_if:
1793   case nir_intrinsic_discard:
1794      visit_discard(bld_base, instr);
1795      break;
1796   case nir_intrinsic_emit_vertex:
1797      bld_base->emit_vertex(bld_base, nir_intrinsic_stream_id(instr));
1798      break;
1799   case nir_intrinsic_end_primitive:
1800      bld_base->end_primitive(bld_base, nir_intrinsic_stream_id(instr));
1801      break;
1802   case nir_intrinsic_ssbo_atomic_add:
1803   case nir_intrinsic_ssbo_atomic_imin:
1804   case nir_intrinsic_ssbo_atomic_imax:
1805   case nir_intrinsic_ssbo_atomic_umin:
1806   case nir_intrinsic_ssbo_atomic_umax:
1807   case nir_intrinsic_ssbo_atomic_and:
1808   case nir_intrinsic_ssbo_atomic_or:
1809   case nir_intrinsic_ssbo_atomic_xor:
1810   case nir_intrinsic_ssbo_atomic_exchange:
1811   case nir_intrinsic_ssbo_atomic_comp_swap:
1812      visit_ssbo_atomic(bld_base, instr, result);
1813      break;
1814   case nir_intrinsic_image_deref_load:
1815      visit_load_image(bld_base, instr, result);
1816      break;
1817   case nir_intrinsic_image_deref_store:
1818      visit_store_image(bld_base, instr);
1819      break;
1820   case nir_intrinsic_image_deref_atomic_add:
1821   case nir_intrinsic_image_deref_atomic_imin:
1822   case nir_intrinsic_image_deref_atomic_imax:
1823   case nir_intrinsic_image_deref_atomic_umin:
1824   case nir_intrinsic_image_deref_atomic_umax:
1825   case nir_intrinsic_image_deref_atomic_and:
1826   case nir_intrinsic_image_deref_atomic_or:
1827   case nir_intrinsic_image_deref_atomic_xor:
1828   case nir_intrinsic_image_deref_atomic_exchange:
1829   case nir_intrinsic_image_deref_atomic_comp_swap:
1830      visit_atomic_image(bld_base, instr, result);
1831      break;
1832   case nir_intrinsic_image_deref_size:
1833      visit_image_size(bld_base, instr, result);
1834      break;
1835   case nir_intrinsic_image_deref_samples:
1836      visit_image_samples(bld_base, instr, result);
1837      break;
1838   case nir_intrinsic_load_shared:
1839      visit_shared_load(bld_base, instr, result);
1840      break;
1841   case nir_intrinsic_store_shared:
1842      visit_shared_store(bld_base, instr);
1843      break;
1844   case nir_intrinsic_shared_atomic_add:
1845   case nir_intrinsic_shared_atomic_imin:
1846   case nir_intrinsic_shared_atomic_umin:
1847   case nir_intrinsic_shared_atomic_imax:
1848   case nir_intrinsic_shared_atomic_umax:
1849   case nir_intrinsic_shared_atomic_and:
1850   case nir_intrinsic_shared_atomic_or:
1851   case nir_intrinsic_shared_atomic_xor:
1852   case nir_intrinsic_shared_atomic_exchange:
1853   case nir_intrinsic_shared_atomic_comp_swap:
1854      visit_shared_atomic(bld_base, instr, result);
1855      break;
1856   case nir_intrinsic_control_barrier:
1857      visit_barrier(bld_base);
1858      break;
1859   case nir_intrinsic_group_memory_barrier:
1860   case nir_intrinsic_memory_barrier:
1861   case nir_intrinsic_memory_barrier_shared:
1862   case nir_intrinsic_memory_barrier_buffer:
1863   case nir_intrinsic_memory_barrier_image:
1864   case nir_intrinsic_memory_barrier_tcs_patch:
1865      break;
1866   case nir_intrinsic_load_kernel_input:
1867      visit_load_kernel_input(bld_base, instr, result);
1868     break;
1869   case nir_intrinsic_load_global:
1870   case nir_intrinsic_load_global_constant:
1871      visit_load_global(bld_base, instr, result);
1872      break;
1873   case nir_intrinsic_store_global:
1874      visit_store_global(bld_base, instr);
1875      break;
1876   case nir_intrinsic_global_atomic_add:
1877   case nir_intrinsic_global_atomic_imin:
1878   case nir_intrinsic_global_atomic_umin:
1879   case nir_intrinsic_global_atomic_imax:
1880   case nir_intrinsic_global_atomic_umax:
1881   case nir_intrinsic_global_atomic_and:
1882   case nir_intrinsic_global_atomic_or:
1883   case nir_intrinsic_global_atomic_xor:
1884   case nir_intrinsic_global_atomic_exchange:
1885   case nir_intrinsic_global_atomic_comp_swap:
1886      visit_global_atomic(bld_base, instr, result);
1887      break;
1888   case nir_intrinsic_vote_all:
1889   case nir_intrinsic_vote_any:
1890   case nir_intrinsic_vote_ieq:
1891   case nir_intrinsic_vote_feq:
1892      bld_base->vote(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
1893      break;
1894   case nir_intrinsic_elect:
1895      bld_base->elect(bld_base, result);
1896      break;
1897   case nir_intrinsic_reduce:
1898   case nir_intrinsic_inclusive_scan:
1899   case nir_intrinsic_exclusive_scan:
1900      bld_base->reduce(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
1901      break;
1902   case nir_intrinsic_ballot:
1903      bld_base->ballot(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, 32), instr, result);
1904      break;
1905   case nir_intrinsic_read_invocation:
1906   case nir_intrinsic_read_first_invocation: {
1907      LLVMValueRef src1 = NULL;
1908
1909      if (instr->intrinsic == nir_intrinsic_read_invocation)
1910         src1 = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_int, 32);
1911      bld_base->read_invocation(bld_base, get_src(bld_base, instr->src[0]), nir_src_bit_size(instr->src[0]), src1, result);
1912      break;
1913   }
1914   case nir_intrinsic_interp_deref_at_offset:
1915   case nir_intrinsic_interp_deref_at_centroid:
1916   case nir_intrinsic_interp_deref_at_sample:
1917      visit_interp(bld_base, instr, result);
1918      break;
1919   case nir_intrinsic_load_scratch:
1920      visit_load_scratch(bld_base, instr, result);
1921      break;
1922   case nir_intrinsic_store_scratch:
1923      visit_store_scratch(bld_base, instr);
1924      break;
1925   default:
1926      fprintf(stderr, "Unsupported intrinsic: ");
1927      nir_print_instr(&instr->instr, stderr);
1928      fprintf(stderr, "\n");
1929      assert(0);
1930      break;
1931   }
1932   if (result[0]) {
1933      assign_dest(bld_base, &instr->dest, result);
1934   }
1935}
1936
1937static void visit_txs(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
1938{
1939   struct lp_sampler_size_query_params params = { 0 };
1940   LLVMValueRef sizes_out[NIR_MAX_VEC_COMPONENTS];
1941   LLVMValueRef explicit_lod = NULL;
1942   LLVMValueRef texture_unit_offset = NULL;
1943   for (unsigned i = 0; i < instr->num_srcs; i++) {
1944      switch (instr->src[i].src_type) {
1945      case nir_tex_src_lod:
1946         explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
1947         break;
1948      case nir_tex_src_texture_offset:
1949         texture_unit_offset = get_src(bld_base, instr->src[i].src);
1950         break;
1951      default:
1952         break;
1953      }
1954   }
1955
1956   params.target = glsl_sampler_to_pipe(instr->sampler_dim, instr->is_array);
1957   params.texture_unit = instr->texture_index;
1958   params.explicit_lod = explicit_lod;
1959   params.is_sviewinfo = TRUE;
1960   params.sizes_out = sizes_out;
1961   params.samples_only = (instr->op == nir_texop_texture_samples);
1962   params.texture_unit_offset = texture_unit_offset;
1963
1964   if (instr->op == nir_texop_query_levels)
1965      params.explicit_lod = bld_base->uint_bld.zero;
1966   bld_base->tex_size(bld_base, &params);
1967   assign_dest(bld_base, &instr->dest, &sizes_out[instr->op == nir_texop_query_levels ? 3 : 0]);
1968}
1969
1970static enum lp_sampler_lod_property lp_build_nir_lod_property(struct lp_build_nir_context *bld_base,
1971                                                              nir_src lod_src)
1972{
1973   enum lp_sampler_lod_property lod_property;
1974
1975   if (nir_src_is_dynamically_uniform(lod_src))
1976      lod_property = LP_SAMPLER_LOD_SCALAR;
1977   else if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1978      if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
1979         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1980      else
1981         lod_property = LP_SAMPLER_LOD_PER_QUAD;
1982   }
1983   else
1984      lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1985   return lod_property;
1986}
1987
1988static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
1989{
1990   struct gallivm_state *gallivm = bld_base->base.gallivm;
1991   LLVMBuilderRef builder = gallivm->builder;
1992   LLVMValueRef coords[5];
1993   LLVMValueRef offsets[3] = { NULL };
1994   LLVMValueRef explicit_lod = NULL, ms_index = NULL;
1995   struct lp_sampler_params params;
1996   struct lp_derivatives derivs;
1997   unsigned sample_key = 0;
1998   nir_deref_instr *texture_deref_instr = NULL;
1999   nir_deref_instr *sampler_deref_instr = NULL;
2000   LLVMValueRef texture_unit_offset = NULL;
2001   LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS];
2002   unsigned lod_src = 0;
2003   LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type);
2004
2005   memset(&params, 0, sizeof(params));
2006   enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2007
2008   if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels || instr->op == nir_texop_texture_samples) {
2009      visit_txs(bld_base, instr);
2010      return;
2011   }
2012   if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2013      sample_key |= LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2014   else if (instr->op == nir_texop_tg4) {
2015      sample_key |= LP_SAMPLER_OP_GATHER << LP_SAMPLER_OP_TYPE_SHIFT;
2016      sample_key |= (instr->component << LP_SAMPLER_GATHER_COMP_SHIFT);
2017   } else if (instr->op == nir_texop_lod)
2018      sample_key |= LP_SAMPLER_OP_LODQ << LP_SAMPLER_OP_TYPE_SHIFT;
2019   for (unsigned i = 0; i < instr->num_srcs; i++) {
2020      switch (instr->src[i].src_type) {
2021      case nir_tex_src_coord: {
2022         LLVMValueRef coord = get_src(bld_base, instr->src[i].src);
2023         if (instr->coord_components == 1)
2024            coords[0] = coord;
2025         else {
2026            for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2027               coords[chan] = LLVMBuildExtractValue(builder, coord,
2028                                                    chan, "");
2029         }
2030         for (unsigned chan = instr->coord_components; chan < 5; chan++)
2031            coords[chan] = coord_undef;
2032
2033         break;
2034      }
2035      case nir_tex_src_texture_deref:
2036         texture_deref_instr = nir_src_as_deref(instr->src[i].src);
2037         break;
2038      case nir_tex_src_sampler_deref:
2039         sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
2040         break;
2041      case nir_tex_src_comparator:
2042         sample_key |= LP_SAMPLER_SHADOW;
2043         coords[4] = get_src(bld_base, instr->src[i].src);
2044         coords[4] = cast_type(bld_base, coords[4], nir_type_float, 32);
2045         break;
2046      case nir_tex_src_bias:
2047         sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2048         lod_src = i;
2049         explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
2050         break;
2051      case nir_tex_src_lod:
2052         sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2053         lod_src = i;
2054         if (instr->op == nir_texop_txf)
2055            explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2056         else
2057            explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
2058         break;
2059      case nir_tex_src_ddx: {
2060         int deriv_cnt = instr->coord_components;
2061         if (instr->is_array)
2062            deriv_cnt--;
2063         LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2064         if (deriv_cnt == 1)
2065            derivs.ddx[0] = deriv_val;
2066         else
2067            for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2068               derivs.ddx[chan] = LLVMBuildExtractValue(builder, deriv_val,
2069                                                        chan, "");
2070         for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2071            derivs.ddx[chan] = cast_type(bld_base, derivs.ddx[chan], nir_type_float, 32);
2072         break;
2073      }
2074      case nir_tex_src_ddy: {
2075         int deriv_cnt = instr->coord_components;
2076         if (instr->is_array)
2077            deriv_cnt--;
2078         LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2079         if (deriv_cnt == 1)
2080            derivs.ddy[0] = deriv_val;
2081         else
2082            for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2083               derivs.ddy[chan] = LLVMBuildExtractValue(builder, deriv_val,
2084                                                        chan, "");
2085         for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2086            derivs.ddy[chan] = cast_type(bld_base, derivs.ddy[chan], nir_type_float, 32);
2087         break;
2088      }
2089      case nir_tex_src_offset: {
2090         int offset_cnt = instr->coord_components;
2091         if (instr->is_array)
2092            offset_cnt--;
2093         LLVMValueRef offset_val = get_src(bld_base, instr->src[i].src);
2094         sample_key |= LP_SAMPLER_OFFSETS;
2095         if (offset_cnt == 1)
2096            offsets[0] = cast_type(bld_base, offset_val, nir_type_int, 32);
2097         else {
2098            for (unsigned chan = 0; chan < offset_cnt; ++chan) {
2099               offsets[chan] = LLVMBuildExtractValue(builder, offset_val,
2100                                                     chan, "");
2101               offsets[chan] = cast_type(bld_base, offsets[chan], nir_type_int, 32);
2102            }
2103         }
2104         break;
2105      }
2106      case nir_tex_src_ms_index:
2107         sample_key |= LP_SAMPLER_FETCH_MS;
2108         ms_index = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2109         break;
2110
2111      case nir_tex_src_texture_offset:
2112         texture_unit_offset = get_src(bld_base, instr->src[i].src);
2113         break;
2114      case nir_tex_src_sampler_offset:
2115         break;
2116      default:
2117         assert(0);
2118         break;
2119      }
2120   }
2121   if (!sampler_deref_instr)
2122      sampler_deref_instr = texture_deref_instr;
2123
2124   if (explicit_lod)
2125      lod_property = lp_build_nir_lod_property(bld_base, instr->src[lod_src].src);
2126
2127   if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb ||
2128       instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod)
2129      for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2130         coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32);
2131   else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2132      for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2133         coords[chan] = cast_type(bld_base, coords[chan], nir_type_int, 32);
2134
2135   if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
2136      /* move layer coord for 1d arrays. */
2137      coords[2] = coords[1];
2138      coords[1] = coord_undef;
2139   }
2140
2141   uint32_t samp_base_index = 0, tex_base_index = 0;
2142   if (!sampler_deref_instr) {
2143      int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
2144      if (samp_src_index == -1) {
2145         samp_base_index = instr->sampler_index;
2146      }
2147   }
2148   if (!texture_deref_instr) {
2149      int tex_src_index = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
2150      if (tex_src_index == -1) {
2151         tex_base_index = instr->texture_index;
2152      }
2153   }
2154
2155   if (instr->op == nir_texop_txd) {
2156      sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2157      params.derivs = &derivs;
2158      if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2159         if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2160            lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2161         else
2162            lod_property = LP_SAMPLER_LOD_PER_QUAD;
2163      } else
2164         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2165   }
2166
2167   sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2168   params.sample_key = sample_key;
2169   params.offsets = offsets;
2170   params.texture_index = tex_base_index;
2171   params.texture_index_offset = texture_unit_offset;
2172   params.sampler_index = samp_base_index;
2173   params.coords = coords;
2174   params.texel = texel;
2175   params.lod = explicit_lod;
2176   params.ms_index = ms_index;
2177   params.aniso_filter_table = bld_base->aniso_filter_table;
2178   bld_base->tex(bld_base, &params);
2179
2180   if (nir_dest_bit_size(instr->dest) != 32) {
2181      assert(nir_dest_bit_size(instr->dest) == 16);
2182      LLVMTypeRef vec_type = NULL;
2183      bool is_float = false;
2184      switch (nir_alu_type_get_base_type(instr->dest_type)) {
2185      case nir_type_float:
2186         is_float = true;
2187	 break;
2188      case nir_type_int:
2189         vec_type = bld_base->int16_bld.vec_type;
2190         break;
2191      case nir_type_uint:
2192         vec_type = bld_base->uint16_bld.vec_type;
2193         break;
2194      default:
2195         unreachable("unexpected alu type");
2196      }
2197      for (int i = 0; i < nir_dest_num_components(instr->dest); ++i) {
2198         if (is_float) {
2199            texel[i] = lp_build_float_to_half(gallivm, texel[i]);
2200         } else {
2201            texel[i] = LLVMBuildBitCast(builder, texel[i], bld_base->int_bld.vec_type, "");
2202            texel[i] = LLVMBuildTrunc(builder, texel[i], vec_type, "");
2203         }
2204      }
2205   }
2206
2207   assign_dest(bld_base, &instr->dest, texel);
2208
2209}
2210
2211static void visit_ssa_undef(struct lp_build_nir_context *bld_base,
2212                            const nir_ssa_undef_instr *instr)
2213{
2214   unsigned num_components = instr->def.num_components;
2215   LLVMValueRef undef[NIR_MAX_VEC_COMPONENTS];
2216   struct lp_build_context *undef_bld = get_int_bld(bld_base, true, instr->def.bit_size);
2217   for (unsigned i = 0; i < num_components; i++)
2218      undef[i] = LLVMGetUndef(undef_bld->vec_type);
2219   memset(&undef[num_components], 0, NIR_MAX_VEC_COMPONENTS - num_components);
2220   assign_ssa_dest(bld_base, &instr->def, undef);
2221}
2222
2223static void visit_jump(struct lp_build_nir_context *bld_base,
2224                       const nir_jump_instr *instr)
2225{
2226   switch (instr->type) {
2227   case nir_jump_break:
2228      bld_base->break_stmt(bld_base);
2229      break;
2230   case nir_jump_continue:
2231      bld_base->continue_stmt(bld_base);
2232      break;
2233   default:
2234      unreachable("Unknown jump instr\n");
2235   }
2236}
2237
2238static void visit_deref(struct lp_build_nir_context *bld_base,
2239                        nir_deref_instr *instr)
2240{
2241   if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared |
2242                                        nir_var_mem_global))
2243      return;
2244   LLVMValueRef result = NULL;
2245   switch(instr->deref_type) {
2246   case nir_deref_type_var: {
2247      struct hash_entry *entry = _mesa_hash_table_search(bld_base->vars, instr->var);
2248      result = entry->data;
2249      break;
2250   }
2251   default:
2252      unreachable("Unhandled deref_instr deref type");
2253   }
2254
2255   assign_ssa(bld_base, instr->dest.ssa.index, result);
2256}
2257
2258static void visit_block(struct lp_build_nir_context *bld_base, nir_block *block)
2259{
2260   nir_foreach_instr(instr, block)
2261   {
2262      switch (instr->type) {
2263      case nir_instr_type_alu:
2264         visit_alu(bld_base, nir_instr_as_alu(instr));
2265         break;
2266      case nir_instr_type_load_const:
2267         visit_load_const(bld_base, nir_instr_as_load_const(instr));
2268         break;
2269      case nir_instr_type_intrinsic:
2270         visit_intrinsic(bld_base, nir_instr_as_intrinsic(instr));
2271         break;
2272      case nir_instr_type_tex:
2273         visit_tex(bld_base, nir_instr_as_tex(instr));
2274         break;
2275      case nir_instr_type_phi:
2276         assert(0);
2277         break;
2278      case nir_instr_type_ssa_undef:
2279         visit_ssa_undef(bld_base, nir_instr_as_ssa_undef(instr));
2280         break;
2281      case nir_instr_type_jump:
2282         visit_jump(bld_base, nir_instr_as_jump(instr));
2283         break;
2284      case nir_instr_type_deref:
2285         visit_deref(bld_base, nir_instr_as_deref(instr));
2286         break;
2287      default:
2288         fprintf(stderr, "Unknown NIR instr type: ");
2289         nir_print_instr(instr, stderr);
2290         fprintf(stderr, "\n");
2291         abort();
2292      }
2293   }
2294}
2295
2296static void visit_if(struct lp_build_nir_context *bld_base, nir_if *if_stmt)
2297{
2298   LLVMValueRef cond = get_src(bld_base, if_stmt->condition);
2299
2300   bld_base->if_cond(bld_base, cond);
2301   visit_cf_list(bld_base, &if_stmt->then_list);
2302
2303   if (!exec_list_is_empty(&if_stmt->else_list)) {
2304      bld_base->else_stmt(bld_base);
2305      visit_cf_list(bld_base, &if_stmt->else_list);
2306   }
2307   bld_base->endif_stmt(bld_base);
2308}
2309
2310static void visit_loop(struct lp_build_nir_context *bld_base, nir_loop *loop)
2311{
2312   bld_base->bgnloop(bld_base);
2313   visit_cf_list(bld_base, &loop->body);
2314   bld_base->endloop(bld_base);
2315}
2316
2317static void visit_cf_list(struct lp_build_nir_context *bld_base,
2318                          struct exec_list *list)
2319{
2320   foreach_list_typed(nir_cf_node, node, node, list)
2321   {
2322      switch (node->type) {
2323      case nir_cf_node_block:
2324         visit_block(bld_base, nir_cf_node_as_block(node));
2325         break;
2326
2327      case nir_cf_node_if:
2328         visit_if(bld_base, nir_cf_node_as_if(node));
2329         break;
2330
2331      case nir_cf_node_loop:
2332         visit_loop(bld_base, nir_cf_node_as_loop(node));
2333         break;
2334
2335      default:
2336         assert(0);
2337      }
2338   }
2339}
2340
2341static void
2342handle_shader_output_decl(struct lp_build_nir_context *bld_base,
2343                          struct nir_shader *nir,
2344                          struct nir_variable *variable)
2345{
2346   bld_base->emit_var_decl(bld_base, variable);
2347}
2348
2349/* vector registers are stored as arrays in LLVM side,
2350   so we can use GEP on them, as to do exec mask stores
2351   we need to operate on a single components.
2352   arrays are:
2353   0.x, 1.x, 2.x, 3.x
2354   0.y, 1.y, 2.y, 3.y
2355   ....
2356*/
2357static LLVMTypeRef get_register_type(struct lp_build_nir_context *bld_base,
2358                                     nir_register *reg)
2359{
2360   struct lp_build_context *int_bld = get_int_bld(bld_base, true, reg->bit_size == 1 ? 32 : reg->bit_size);
2361
2362   LLVMTypeRef type = int_bld->vec_type;
2363   if (reg->num_array_elems)
2364      type = LLVMArrayType(type, reg->num_array_elems);
2365   if (reg->num_components > 1)
2366      type = LLVMArrayType(type, reg->num_components);
2367
2368   return type;
2369}
2370
2371
2372bool lp_build_nir_llvm(
2373   struct lp_build_nir_context *bld_base,
2374   struct nir_shader *nir)
2375{
2376   struct nir_function *func;
2377
2378   nir_convert_from_ssa(nir, true);
2379   nir_lower_locals_to_regs(nir);
2380   nir_remove_dead_derefs(nir);
2381   nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
2382
2383   nir_foreach_shader_out_variable(variable, nir)
2384      handle_shader_output_decl(bld_base, nir, variable);
2385
2386   if (nir->info.io_lowered) {
2387      uint64_t outputs_written = nir->info.outputs_written;
2388
2389      while (outputs_written) {
2390         unsigned location = u_bit_scan64(&outputs_written);
2391         nir_variable var = {0};
2392
2393         var.type = glsl_vec4_type();
2394         var.data.mode = nir_var_shader_out;
2395         var.data.location = location;
2396         var.data.driver_location = util_bitcount64(nir->info.outputs_written &
2397                                                    BITFIELD64_MASK(location));
2398         bld_base->emit_var_decl(bld_base, &var);
2399      }
2400   }
2401
2402   bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2403                                            _mesa_key_pointer_equal);
2404   bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2405                                            _mesa_key_pointer_equal);
2406   bld_base->range_ht = _mesa_pointer_hash_table_create(NULL);
2407
2408   func = (struct nir_function *)exec_list_get_head(&nir->functions);
2409
2410   nir_foreach_register(reg, &func->impl->registers) {
2411      LLVMTypeRef type = get_register_type(bld_base, reg);
2412      LLVMValueRef reg_alloc = lp_build_alloca(bld_base->base.gallivm,
2413                                               type, "reg");
2414      _mesa_hash_table_insert(bld_base->regs, reg, reg_alloc);
2415   }
2416   nir_index_ssa_defs(func->impl);
2417   bld_base->ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
2418   visit_cf_list(bld_base, &func->impl->body);
2419
2420   free(bld_base->ssa_defs);
2421   ralloc_free(bld_base->vars);
2422   ralloc_free(bld_base->regs);
2423   ralloc_free(bld_base->range_ht);
2424   return true;
2425}
2426
2427/* do some basic opts to remove some things we don't want to see. */
2428void lp_build_opt_nir(struct nir_shader *nir)
2429{
2430   bool progress;
2431
2432   static const struct nir_lower_tex_options lower_tex_options = {
2433      .lower_tg4_offsets = true,
2434      .lower_txp = ~0u,
2435   };
2436   NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
2437   NIR_PASS_V(nir, nir_lower_frexp);
2438
2439   NIR_PASS_V(nir, nir_lower_flrp, 16|32|64, true);
2440   NIR_PASS_V(nir, nir_lower_fp16_casts);
2441   do {
2442      progress = false;
2443      NIR_PASS(progress, nir, nir_opt_constant_folding);
2444      NIR_PASS(progress, nir, nir_opt_algebraic);
2445      NIR_PASS(progress, nir, nir_lower_pack);
2446
2447      nir_lower_tex_options options = { 0, };
2448      NIR_PASS_V(nir, nir_lower_tex, &options);
2449
2450      const nir_lower_subgroups_options subgroups_options = {
2451	.subgroup_size = lp_native_vector_width / 32,
2452	.ballot_bit_size = 32,
2453        .ballot_components = 1,
2454	.lower_to_scalar = true,
2455	.lower_subgroup_masks = true,
2456      };
2457      NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options);
2458
2459   } while (progress);
2460
2461   do {
2462      progress = false;
2463      NIR_PASS(progress, nir, nir_opt_algebraic_late);
2464      if (progress) {
2465         NIR_PASS_V(nir, nir_copy_prop);
2466         NIR_PASS_V(nir, nir_opt_dce);
2467         NIR_PASS_V(nir, nir_opt_cse);
2468      }
2469   } while (progress);
2470
2471   if (nir_lower_bool_to_int32(nir)) {
2472      NIR_PASS_V(nir, nir_copy_prop);
2473      NIR_PASS_V(nir, nir_opt_dce);
2474   }
2475}
2476