1/*
2 * Copyright © 2014-2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef NIR_BUILDER_H
25#define NIR_BUILDER_H
26
27#include "nir_control_flow.h"
28#include "util/bitscan.h"
29#include "util/half_float.h"
30
31struct exec_list;
32
33typedef struct nir_builder {
34   nir_cursor cursor;
35
36   /* Whether new ALU instructions will be marked "exact" */
37   bool exact;
38
39   /* Whether to run divergence analysis on inserted instructions (loop merge
40    * and header phis are not updated). */
41   bool update_divergence;
42
43   nir_shader *shader;
44   nir_function_impl *impl;
45} nir_builder;
46
47static inline void
48nir_builder_init(nir_builder *build, nir_function_impl *impl)
49{
50   memset(build, 0, sizeof(*build));
51   build->exact = false;
52   build->impl = impl;
53   build->shader = impl->function->shader;
54}
55
56static inline nir_builder MUST_CHECK PRINTFLIKE(3, 4)
57nir_builder_init_simple_shader(gl_shader_stage stage,
58                               const nir_shader_compiler_options *options,
59                               const char *name, ...)
60{
61   nir_builder b;
62
63   memset(&b, 0, sizeof(b));
64   b.shader = nir_shader_create(NULL, stage, options, NULL);
65
66   if (name) {
67      va_list args;
68      va_start(args, name);
69      b.shader->info.name = ralloc_vasprintf(b.shader, name, args);
70      va_end(args);
71   }
72
73   nir_function *func = nir_function_create(b.shader, "main");
74   func->is_entrypoint = true;
75   b.exact = false;
76   b.impl = nir_function_impl_create(func);
77   b.cursor = nir_after_cf_list(&b.impl->body);
78
79   return b;
80}
81
82typedef bool (*nir_instr_pass_cb)(struct nir_builder *, nir_instr *, void *);
83
84/**
85 * Iterates over all the instructions in a NIR shader and calls the given pass
86 * on them.
87 *
88 * The pass should return true if it modified the shader.  In that case, only
89 * the preserved metadata flags will be preserved in the function impl.
90 *
91 * The builder will be initialized to point at the function impl, but its
92 * cursor is unset.
93 */
94static inline bool
95nir_shader_instructions_pass(nir_shader *shader,
96                             nir_instr_pass_cb pass,
97                             nir_metadata preserved,
98                             void *cb_data)
99{
100   bool progress = false;
101
102   nir_foreach_function(function, shader) {
103      if (!function->impl)
104         continue;
105
106      bool func_progress = false;
107      nir_builder b;
108      nir_builder_init(&b, function->impl);
109
110      nir_foreach_block_safe(block, function->impl) {
111         nir_foreach_instr_safe(instr, block) {
112            func_progress |= pass(&b, instr, cb_data);
113         }
114      }
115
116      if (func_progress) {
117         nir_metadata_preserve(function->impl, preserved);
118         progress = true;
119      } else {
120         nir_metadata_preserve(function->impl, nir_metadata_all);
121      }
122   }
123
124   return progress;
125}
126
127static inline void
128nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
129{
130   nir_instr_insert(build->cursor, instr);
131
132   if (build->update_divergence)
133      nir_update_instr_divergence(build->shader, instr);
134
135   /* Move the cursor forward. */
136   build->cursor = nir_after_instr(instr);
137}
138
139static inline nir_instr *
140nir_builder_last_instr(nir_builder *build)
141{
142   assert(build->cursor.option == nir_cursor_after_instr);
143   return build->cursor.instr;
144}
145
146static inline void
147nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf)
148{
149   nir_cf_node_insert(build->cursor, cf);
150}
151
152static inline bool
153nir_builder_is_inside_cf(nir_builder *build, nir_cf_node *cf_node)
154{
155   nir_block *block = nir_cursor_current_block(build->cursor);
156   for (nir_cf_node *n = &block->cf_node; n; n = n->parent) {
157      if (n == cf_node)
158         return true;
159   }
160   return false;
161}
162
163static inline nir_if *
164nir_push_if_src(nir_builder *build, nir_src condition)
165{
166   nir_if *nif = nir_if_create(build->shader);
167   nif->condition = condition;
168   nir_builder_cf_insert(build, &nif->cf_node);
169   build->cursor = nir_before_cf_list(&nif->then_list);
170   return nif;
171}
172
173static inline nir_if *
174nir_push_if(nir_builder *build, nir_ssa_def *condition)
175{
176   return nir_push_if_src(build, nir_src_for_ssa(condition));
177}
178
179static inline nir_if *
180nir_push_else(nir_builder *build, nir_if *nif)
181{
182   if (nif) {
183      assert(nir_builder_is_inside_cf(build, &nif->cf_node));
184   } else {
185      nir_block *block = nir_cursor_current_block(build->cursor);
186      nif = nir_cf_node_as_if(block->cf_node.parent);
187   }
188   build->cursor = nir_before_cf_list(&nif->else_list);
189   return nif;
190}
191
192static inline void
193nir_pop_if(nir_builder *build, nir_if *nif)
194{
195   if (nif) {
196      assert(nir_builder_is_inside_cf(build, &nif->cf_node));
197   } else {
198      nir_block *block = nir_cursor_current_block(build->cursor);
199      nif = nir_cf_node_as_if(block->cf_node.parent);
200   }
201   build->cursor = nir_after_cf_node(&nif->cf_node);
202}
203
204static inline nir_ssa_def *
205nir_if_phi(nir_builder *build, nir_ssa_def *then_def, nir_ssa_def *else_def)
206{
207   nir_block *block = nir_cursor_current_block(build->cursor);
208   nir_if *nif = nir_cf_node_as_if(nir_cf_node_prev(&block->cf_node));
209
210   nir_phi_instr *phi = nir_phi_instr_create(build->shader);
211   nir_phi_instr_add_src(phi, nir_if_last_then_block(nif), nir_src_for_ssa(then_def));
212   nir_phi_instr_add_src(phi, nir_if_last_else_block(nif), nir_src_for_ssa(else_def));
213
214   assert(then_def->num_components == else_def->num_components);
215   assert(then_def->bit_size == else_def->bit_size);
216   nir_ssa_dest_init(&phi->instr, &phi->dest,
217                     then_def->num_components, then_def->bit_size, NULL);
218
219   nir_builder_instr_insert(build, &phi->instr);
220
221   return &phi->dest.ssa;
222}
223
224static inline nir_loop *
225nir_push_loop(nir_builder *build)
226{
227   nir_loop *loop = nir_loop_create(build->shader);
228   nir_builder_cf_insert(build, &loop->cf_node);
229   build->cursor = nir_before_cf_list(&loop->body);
230   return loop;
231}
232
233static inline void
234nir_pop_loop(nir_builder *build, nir_loop *loop)
235{
236   if (loop) {
237      assert(nir_builder_is_inside_cf(build, &loop->cf_node));
238   } else {
239      nir_block *block = nir_cursor_current_block(build->cursor);
240      loop = nir_cf_node_as_loop(block->cf_node.parent);
241   }
242   build->cursor = nir_after_cf_node(&loop->cf_node);
243}
244
245static inline nir_ssa_def *
246nir_ssa_undef(nir_builder *build, unsigned num_components, unsigned bit_size)
247{
248   nir_ssa_undef_instr *undef =
249      nir_ssa_undef_instr_create(build->shader, num_components, bit_size);
250   if (!undef)
251      return NULL;
252
253   nir_instr_insert(nir_before_cf_list(&build->impl->body), &undef->instr);
254   if (build->update_divergence)
255      nir_update_instr_divergence(build->shader, &undef->instr);
256
257   return &undef->def;
258}
259
260static inline nir_ssa_def *
261nir_build_imm(nir_builder *build, unsigned num_components,
262              unsigned bit_size, const nir_const_value *value)
263{
264   nir_load_const_instr *load_const =
265      nir_load_const_instr_create(build->shader, num_components, bit_size);
266   if (!load_const)
267      return NULL;
268
269   memcpy(load_const->value, value, sizeof(nir_const_value) * num_components);
270
271   nir_builder_instr_insert(build, &load_const->instr);
272
273   return &load_const->def;
274}
275
276static inline nir_ssa_def *
277nir_imm_zero(nir_builder *build, unsigned num_components, unsigned bit_size)
278{
279   nir_load_const_instr *load_const =
280      nir_load_const_instr_create(build->shader, num_components, bit_size);
281
282   /* nir_load_const_instr_create uses rzalloc so it's already zero */
283
284   nir_builder_instr_insert(build, &load_const->instr);
285
286   return &load_const->def;
287}
288
289static inline nir_ssa_def *
290nir_imm_boolN_t(nir_builder *build, bool x, unsigned bit_size)
291{
292   nir_const_value v = nir_const_value_for_bool(x, bit_size);
293   return nir_build_imm(build, 1, bit_size, &v);
294}
295
296static inline nir_ssa_def *
297nir_imm_bool(nir_builder *build, bool x)
298{
299   return nir_imm_boolN_t(build, x, 1);
300}
301
302static inline nir_ssa_def *
303nir_imm_true(nir_builder *build)
304{
305   return nir_imm_bool(build, true);
306}
307
308static inline nir_ssa_def *
309nir_imm_false(nir_builder *build)
310{
311   return nir_imm_bool(build, false);
312}
313
314static inline nir_ssa_def *
315nir_imm_floatN_t(nir_builder *build, double x, unsigned bit_size)
316{
317   nir_const_value v = nir_const_value_for_float(x, bit_size);
318   return nir_build_imm(build, 1, bit_size, &v);
319}
320
321static inline nir_ssa_def *
322nir_imm_float16(nir_builder *build, float x)
323{
324   return nir_imm_floatN_t(build, x, 16);
325}
326
327static inline nir_ssa_def *
328nir_imm_float(nir_builder *build, float x)
329{
330   return nir_imm_floatN_t(build, x, 32);
331}
332
333static inline nir_ssa_def *
334nir_imm_double(nir_builder *build, double x)
335{
336   return nir_imm_floatN_t(build, x, 64);
337}
338
339static inline nir_ssa_def *
340nir_imm_vec2(nir_builder *build, float x, float y)
341{
342   nir_const_value v[2] = {
343      nir_const_value_for_float(x, 32),
344      nir_const_value_for_float(y, 32),
345   };
346   return nir_build_imm(build, 2, 32, v);
347}
348
349static inline nir_ssa_def *
350nir_imm_vec3(nir_builder *build, float x, float y, float z)
351{
352   nir_const_value v[3] = {
353      nir_const_value_for_float(x, 32),
354      nir_const_value_for_float(y, 32),
355      nir_const_value_for_float(z, 32),
356   };
357   return nir_build_imm(build, 3, 32, v);
358}
359
360static inline nir_ssa_def *
361nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
362{
363   nir_const_value v[4] = {
364      nir_const_value_for_float(x, 32),
365      nir_const_value_for_float(y, 32),
366      nir_const_value_for_float(z, 32),
367      nir_const_value_for_float(w, 32),
368   };
369
370   return nir_build_imm(build, 4, 32, v);
371}
372
373static inline nir_ssa_def *
374nir_imm_vec4_16(nir_builder *build, float x, float y, float z, float w)
375{
376   nir_const_value v[4] = {
377      nir_const_value_for_float(x, 16),
378      nir_const_value_for_float(y, 16),
379      nir_const_value_for_float(z, 16),
380      nir_const_value_for_float(w, 16),
381   };
382
383   return nir_build_imm(build, 4, 16, v);
384}
385
386static inline nir_ssa_def *
387nir_imm_intN_t(nir_builder *build, uint64_t x, unsigned bit_size)
388{
389   nir_const_value v = nir_const_value_for_raw_uint(x, bit_size);
390   return nir_build_imm(build, 1, bit_size, &v);
391}
392
393static inline nir_ssa_def *
394nir_imm_int(nir_builder *build, int x)
395{
396   return nir_imm_intN_t(build, x, 32);
397}
398
399static inline nir_ssa_def *
400nir_imm_int64(nir_builder *build, int64_t x)
401{
402   return nir_imm_intN_t(build, x, 64);
403}
404
405static inline nir_ssa_def *
406nir_imm_ivec2(nir_builder *build, int x, int y)
407{
408   nir_const_value v[2] = {
409      nir_const_value_for_int(x, 32),
410      nir_const_value_for_int(y, 32),
411   };
412
413   return nir_build_imm(build, 2, 32, v);
414}
415
416static inline nir_ssa_def *
417nir_imm_ivec3(nir_builder *build, int x, int y, int z)
418{
419   nir_const_value v[3] = {
420      nir_const_value_for_int(x, 32),
421      nir_const_value_for_int(y, 32),
422      nir_const_value_for_int(z, 32),
423   };
424
425   return nir_build_imm(build, 3, 32, v);
426}
427
428static inline nir_ssa_def *
429nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w)
430{
431   nir_const_value v[4] = {
432      nir_const_value_for_int(x, 32),
433      nir_const_value_for_int(y, 32),
434      nir_const_value_for_int(z, 32),
435      nir_const_value_for_int(w, 32),
436   };
437
438   return nir_build_imm(build, 4, 32, v);
439}
440
441static inline nir_ssa_def *
442nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr)
443{
444   const nir_op_info *op_info = &nir_op_infos[instr->op];
445
446   instr->exact = build->exact;
447
448   /* Guess the number of components the destination temporary should have
449    * based on our input sizes, if it's not fixed for the op.
450    */
451   unsigned num_components = op_info->output_size;
452   if (num_components == 0) {
453      for (unsigned i = 0; i < op_info->num_inputs; i++) {
454         if (op_info->input_sizes[i] == 0)
455            num_components = MAX2(num_components,
456                                  instr->src[i].src.ssa->num_components);
457      }
458   }
459   assert(num_components != 0);
460
461   /* Figure out the bitwidth based on the source bitwidth if the instruction
462    * is variable-width.
463    */
464   unsigned bit_size = nir_alu_type_get_type_size(op_info->output_type);
465   if (bit_size == 0) {
466      for (unsigned i = 0; i < op_info->num_inputs; i++) {
467         unsigned src_bit_size = instr->src[i].src.ssa->bit_size;
468         if (nir_alu_type_get_type_size(op_info->input_types[i]) == 0) {
469            if (bit_size)
470               assert(src_bit_size == bit_size);
471            else
472               bit_size = src_bit_size;
473         } else {
474            assert(src_bit_size ==
475               nir_alu_type_get_type_size(op_info->input_types[i]));
476         }
477      }
478   }
479
480   /* When in doubt, assume 32. */
481   if (bit_size == 0)
482      bit_size = 32;
483
484   /* Make sure we don't swizzle from outside of our source vector (like if a
485    * scalar value was passed into a multiply with a vector).
486    */
487   for (unsigned i = 0; i < op_info->num_inputs; i++) {
488      for (unsigned j = instr->src[i].src.ssa->num_components;
489           j < NIR_MAX_VEC_COMPONENTS; j++) {
490         instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1;
491      }
492   }
493
494   nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
495                     bit_size, NULL);
496   instr->dest.write_mask = (1 << num_components) - 1;
497
498   nir_builder_instr_insert(build, &instr->instr);
499
500   return &instr->dest.dest.ssa;
501}
502
503static inline nir_ssa_def *
504nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
505              nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)
506{
507   nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
508   if (!instr)
509      return NULL;
510
511   instr->src[0].src = nir_src_for_ssa(src0);
512   if (src1)
513      instr->src[1].src = nir_src_for_ssa(src1);
514   if (src2)
515      instr->src[2].src = nir_src_for_ssa(src2);
516   if (src3)
517      instr->src[3].src = nir_src_for_ssa(src3);
518
519   return nir_builder_alu_instr_finish_and_insert(build, instr);
520}
521
522/* for the couple special cases with more than 4 src args: */
523static inline nir_ssa_def *
524nir_build_alu_src_arr(nir_builder *build, nir_op op, nir_ssa_def **srcs)
525{
526   const nir_op_info *op_info = &nir_op_infos[op];
527   nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
528   if (!instr)
529      return NULL;
530
531   for (unsigned i = 0; i < op_info->num_inputs; i++)
532      instr->src[i].src = nir_src_for_ssa(srcs[i]);
533
534   return nir_builder_alu_instr_finish_and_insert(build, instr);
535}
536
537/* Generic builder for system values. */
538static inline nir_ssa_def *
539nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index,
540                      unsigned num_components, unsigned bit_size)
541{
542   nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op);
543   if (nir_intrinsic_infos[op].dest_components > 0)
544      assert(num_components == nir_intrinsic_infos[op].dest_components);
545   else
546      load->num_components = num_components;
547   load->const_index[0] = index;
548
549   nir_ssa_dest_init(&load->instr, &load->dest,
550                     num_components, bit_size, NULL);
551   nir_builder_instr_insert(build, &load->instr);
552   return &load->dest.ssa;
553}
554
555#include "nir_builder_opcodes.h"
556#undef nir_deref_mode_is
557
558static inline nir_ssa_def *
559nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components)
560{
561   return nir_build_alu_src_arr(build, nir_op_vec(num_components), comp);
562}
563
564static inline nir_ssa_def *
565nir_mov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
566{
567   assert(!src.abs && !src.negate);
568   if (src.src.is_ssa && src.src.ssa->num_components == num_components) {
569      bool any_swizzles = false;
570      for (unsigned i = 0; i < num_components; i++) {
571         if (src.swizzle[i] != i)
572            any_swizzles = true;
573      }
574      if (!any_swizzles)
575         return src.src.ssa;
576   }
577
578   nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_mov);
579   nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components,
580                     nir_src_bit_size(src.src), NULL);
581   mov->exact = build->exact;
582   mov->dest.write_mask = (1 << num_components) - 1;
583   mov->src[0] = src;
584   nir_builder_instr_insert(build, &mov->instr);
585
586   return &mov->dest.dest.ssa;
587}
588
589/**
590 * Construct a mov that reswizzles the source's components.
591 */
592static inline nir_ssa_def *
593nir_swizzle(nir_builder *build, nir_ssa_def *src, const unsigned *swiz,
594            unsigned num_components)
595{
596   assert(num_components <= NIR_MAX_VEC_COMPONENTS);
597   nir_alu_src alu_src = { NIR_SRC_INIT };
598   alu_src.src = nir_src_for_ssa(src);
599
600   bool is_identity_swizzle = true;
601   for (unsigned i = 0; i < num_components && i < NIR_MAX_VEC_COMPONENTS; i++) {
602      if (swiz[i] != i)
603         is_identity_swizzle = false;
604      alu_src.swizzle[i] = swiz[i];
605   }
606
607   if (num_components == src->num_components && is_identity_swizzle)
608      return src;
609
610   return nir_mov_alu(build, alu_src, num_components);
611}
612
613/* Selects the right fdot given the number of components in each source. */
614static inline nir_ssa_def *
615nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)
616{
617   assert(src0->num_components == src1->num_components);
618   switch (src0->num_components) {
619   case 1: return nir_fmul(build, src0, src1);
620   case 2: return nir_fdot2(build, src0, src1);
621   case 3: return nir_fdot3(build, src0, src1);
622   case 4: return nir_fdot4(build, src0, src1);
623   case 5: return nir_fdot5(build, src0, src1);
624   case 8: return nir_fdot8(build, src0, src1);
625   case 16: return nir_fdot16(build, src0, src1);
626   default:
627      unreachable("bad component size");
628   }
629
630   return NULL;
631}
632
633static inline nir_ssa_def *
634nir_ball_iequal(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1)
635{
636   switch (src0->num_components) {
637   case 1: return nir_ieq(b, src0, src1);
638   case 2: return nir_ball_iequal2(b, src0, src1);
639   case 3: return nir_ball_iequal3(b, src0, src1);
640   case 4: return nir_ball_iequal4(b, src0, src1);
641   case 5: return nir_ball_iequal5(b, src0, src1);
642   case 8: return nir_ball_iequal8(b, src0, src1);
643   case 16: return nir_ball_iequal16(b, src0, src1);
644   default:
645      unreachable("bad component size");
646   }
647}
648
649static inline nir_ssa_def *
650nir_ball(nir_builder *b, nir_ssa_def *src)
651{
652   return nir_ball_iequal(b, src, nir_imm_true(b));
653}
654
655static inline nir_ssa_def *
656nir_bany_inequal(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1)
657{
658   switch (src0->num_components) {
659   case 1: return nir_ine(b, src0, src1);
660   case 2: return nir_bany_inequal2(b, src0, src1);
661   case 3: return nir_bany_inequal3(b, src0, src1);
662   case 4: return nir_bany_inequal4(b, src0, src1);
663   case 5: return nir_bany_inequal5(b, src0, src1);
664   case 8: return nir_bany_inequal8(b, src0, src1);
665   case 16: return nir_bany_inequal16(b, src0, src1);
666   default:
667      unreachable("bad component size");
668   }
669}
670
671static inline nir_ssa_def *
672nir_bany(nir_builder *b, nir_ssa_def *src)
673{
674   return nir_bany_inequal(b, src, nir_imm_false(b));
675}
676
677static inline nir_ssa_def *
678nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
679{
680   return nir_swizzle(b, def, &c, 1);
681}
682
683static inline nir_ssa_def *
684nir_channels(nir_builder *b, nir_ssa_def *def, nir_component_mask_t mask)
685{
686   unsigned num_channels = 0, swizzle[NIR_MAX_VEC_COMPONENTS] = { 0 };
687
688   for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
689      if ((mask & (1 << i)) == 0)
690         continue;
691      swizzle[num_channels++] = i;
692   }
693
694   return nir_swizzle(b, def, swizzle, num_channels);
695}
696
697static inline nir_ssa_def *
698_nir_select_from_array_helper(nir_builder *b, nir_ssa_def **arr,
699                              nir_ssa_def *idx,
700                              unsigned start, unsigned end)
701{
702   if (start == end - 1) {
703      return arr[start];
704   } else {
705      unsigned mid = start + (end - start) / 2;
706      return nir_bcsel(b, nir_ilt(b, idx, nir_imm_intN_t(b, mid, idx->bit_size)),
707                       _nir_select_from_array_helper(b, arr, idx, start, mid),
708                       _nir_select_from_array_helper(b, arr, idx, mid, end));
709   }
710}
711
712static inline nir_ssa_def *
713nir_select_from_ssa_def_array(nir_builder *b, nir_ssa_def **arr,
714                              unsigned arr_len, nir_ssa_def *idx)
715{
716   return _nir_select_from_array_helper(b, arr, idx, 0, arr_len);
717}
718
719static inline nir_ssa_def *
720nir_vector_extract(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *c)
721{
722   nir_src c_src = nir_src_for_ssa(c);
723   if (nir_src_is_const(c_src)) {
724      uint64_t c_const = nir_src_as_uint(c_src);
725      if (c_const < vec->num_components)
726         return nir_channel(b, vec, c_const);
727      else
728         return nir_ssa_undef(b, 1, vec->bit_size);
729   } else {
730      nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
731      for (unsigned i = 0; i < vec->num_components; i++)
732         comps[i] = nir_channel(b, vec, i);
733      return nir_select_from_ssa_def_array(b, comps, vec->num_components, c);
734   }
735}
736
737/** Replaces the component of `vec` specified by `c` with `scalar` */
738static inline nir_ssa_def *
739nir_vector_insert_imm(nir_builder *b, nir_ssa_def *vec,
740                      nir_ssa_def *scalar, unsigned c)
741{
742   assert(scalar->num_components == 1);
743   assert(c < vec->num_components);
744
745   nir_op vec_op = nir_op_vec(vec->num_components);
746   nir_alu_instr *vec_instr = nir_alu_instr_create(b->shader, vec_op);
747
748   for (unsigned i = 0; i < vec->num_components; i++) {
749      if (i == c) {
750         vec_instr->src[i].src = nir_src_for_ssa(scalar);
751         vec_instr->src[i].swizzle[0] = 0;
752      } else {
753         vec_instr->src[i].src = nir_src_for_ssa(vec);
754         vec_instr->src[i].swizzle[0] = i;
755      }
756   }
757
758   return nir_builder_alu_instr_finish_and_insert(b, vec_instr);
759}
760
761/** Replaces the component of `vec` specified by `c` with `scalar` */
762static inline nir_ssa_def *
763nir_vector_insert(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *scalar,
764                  nir_ssa_def *c)
765{
766   assert(scalar->num_components == 1);
767   assert(c->num_components == 1);
768
769   nir_src c_src = nir_src_for_ssa(c);
770   if (nir_src_is_const(c_src)) {
771      uint64_t c_const = nir_src_as_uint(c_src);
772      if (c_const < vec->num_components)
773         return nir_vector_insert_imm(b, vec, scalar, c_const);
774      else
775         return vec;
776   } else {
777      nir_const_value per_comp_idx_const[NIR_MAX_VEC_COMPONENTS];
778      for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
779         per_comp_idx_const[i] = nir_const_value_for_int(i, c->bit_size);
780      nir_ssa_def *per_comp_idx =
781         nir_build_imm(b, vec->num_components,
782                       c->bit_size, per_comp_idx_const);
783
784      /* nir_builder will automatically splat out scalars to vectors so an
785       * insert is as simple as "if I'm the channel, replace me with the
786       * scalar."
787       */
788      return nir_bcsel(b, nir_ieq(b, c, per_comp_idx), scalar, vec);
789   }
790}
791
792static inline nir_ssa_def *
793nir_i2i(nir_builder *build, nir_ssa_def *x, unsigned dest_bit_size)
794{
795   if (x->bit_size == dest_bit_size)
796      return x;
797
798   switch (dest_bit_size) {
799   case 64: return nir_i2i64(build, x);
800   case 32: return nir_i2i32(build, x);
801   case 16: return nir_i2i16(build, x);
802   case 8:  return nir_i2i8(build, x);
803   default: unreachable("Invalid bit size");
804   }
805}
806
807static inline nir_ssa_def *
808nir_u2u(nir_builder *build, nir_ssa_def *x, unsigned dest_bit_size)
809{
810   if (x->bit_size == dest_bit_size)
811      return x;
812
813   switch (dest_bit_size) {
814   case 64: return nir_u2u64(build, x);
815   case 32: return nir_u2u32(build, x);
816   case 16: return nir_u2u16(build, x);
817   case 8:  return nir_u2u8(build, x);
818   default: unreachable("Invalid bit size");
819   }
820}
821
822static inline nir_ssa_def *
823nir_iadd_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
824{
825   assert(x->bit_size <= 64);
826   y &= BITFIELD64_MASK(x->bit_size);
827
828   if (y == 0) {
829      return x;
830   } else {
831      return nir_iadd(build, x, nir_imm_intN_t(build, y, x->bit_size));
832   }
833}
834
835static inline nir_ssa_def *
836nir_iadd_imm_nuw(nir_builder *b, nir_ssa_def *x, uint64_t y)
837{
838   nir_ssa_def *d = nir_iadd_imm(b, x, y);
839   if (d != x && d->parent_instr->type == nir_instr_type_alu)
840      nir_instr_as_alu(d->parent_instr)->no_unsigned_wrap = true;
841   return d;
842}
843
844static inline nir_ssa_def *
845nir_iadd_nuw(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
846{
847   nir_ssa_def *d = nir_iadd(b, x, y);
848   nir_instr_as_alu(d->parent_instr)->no_unsigned_wrap = true;
849   return d;
850}
851
852static inline nir_ssa_def *
853nir_ieq_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
854{
855   return nir_ieq(build, x, nir_imm_intN_t(build, y, x->bit_size));
856}
857
858/* Use nir_iadd(x, -y) for reversing parameter ordering */
859static inline nir_ssa_def *
860nir_isub_imm(nir_builder *build, uint64_t y, nir_ssa_def *x)
861{
862   return nir_isub(build, nir_imm_intN_t(build, y, x->bit_size), x);
863}
864
865static inline nir_ssa_def *
866_nir_mul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y, bool amul)
867{
868   assert(x->bit_size <= 64);
869   y &= BITFIELD64_MASK(x->bit_size);
870
871   if (y == 0) {
872      return nir_imm_intN_t(build, 0, x->bit_size);
873   } else if (y == 1) {
874      return x;
875   } else if (!build->shader->options->lower_bitops &&
876              util_is_power_of_two_or_zero64(y)) {
877      return nir_ishl(build, x, nir_imm_int(build, ffsll(y) - 1));
878   } else if (amul) {
879      return nir_amul(build, x, nir_imm_intN_t(build, y, x->bit_size));
880   } else {
881      return nir_imul(build, x, nir_imm_intN_t(build, y, x->bit_size));
882   }
883}
884
885static inline nir_ssa_def *
886nir_imul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
887{
888   return _nir_mul_imm(build, x, y, false);
889}
890
891static inline nir_ssa_def *
892nir_amul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
893{
894   return _nir_mul_imm(build, x, y, true);
895}
896
897static inline nir_ssa_def *
898nir_fadd_imm(nir_builder *build, nir_ssa_def *x, double y)
899{
900   return nir_fadd(build, x, nir_imm_floatN_t(build, y, x->bit_size));
901}
902
903static inline nir_ssa_def *
904nir_fmul_imm(nir_builder *build, nir_ssa_def *x, double y)
905{
906   return nir_fmul(build, x, nir_imm_floatN_t(build, y, x->bit_size));
907}
908
909static inline nir_ssa_def *
910nir_iand_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
911{
912   assert(x->bit_size <= 64);
913   y &= BITFIELD64_MASK(x->bit_size);
914
915   if (y == 0) {
916      return nir_imm_intN_t(build, 0, x->bit_size);
917   } else if (y == BITFIELD64_MASK(x->bit_size)) {
918      return x;
919   } else {
920      return nir_iand(build, x, nir_imm_intN_t(build, y, x->bit_size));
921   }
922}
923
924static inline nir_ssa_def *
925nir_ishr_imm(nir_builder *build, nir_ssa_def *x, uint32_t y)
926{
927   if (y == 0) {
928      return x;
929   } else {
930      return nir_ishr(build, x, nir_imm_int(build, y));
931   }
932}
933
934static inline nir_ssa_def *
935nir_ushr_imm(nir_builder *build, nir_ssa_def *x, uint32_t y)
936{
937   if (y == 0) {
938      return x;
939   } else {
940      return nir_ushr(build, x, nir_imm_int(build, y));
941   }
942}
943
944static inline nir_ssa_def *
945nir_udiv_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
946{
947   assert(x->bit_size <= 64);
948   y &= BITFIELD64_MASK(x->bit_size);
949
950   if (y == 1) {
951      return x;
952   } else if (util_is_power_of_two_nonzero(y)) {
953      return nir_ushr_imm(build, x, ffsll(y) - 1);
954   } else {
955      return nir_udiv(build, x, nir_imm_intN_t(build, y, x->bit_size));
956   }
957}
958
959static inline nir_ssa_def *
960nir_fclamp(nir_builder *b,
961           nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)
962{
963   return nir_fmin(b, nir_fmax(b, x, min_val), max_val);
964}
965
966static inline nir_ssa_def *
967nir_iclamp(nir_builder *b,
968           nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)
969{
970   return nir_imin(b, nir_imax(b, x, min_val), max_val);
971}
972
973static inline nir_ssa_def *
974nir_uclamp(nir_builder *b,
975           nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)
976{
977   return nir_umin(b, nir_umax(b, x, min_val), max_val);
978}
979
980static inline nir_ssa_def *
981nir_ffma_imm12(nir_builder *build, nir_ssa_def *src0, double src1, double src2)
982{
983   if (build->shader->options->avoid_ternary_with_two_constants)
984      return nir_fadd_imm(build, nir_fmul_imm(build, src0, src1), src2);
985   else
986      return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size),
987                             nir_imm_floatN_t(build, src2, src0->bit_size));
988}
989
990static inline nir_ssa_def *
991nir_ffma_imm1(nir_builder *build, nir_ssa_def *src0, double src1, nir_ssa_def *src2)
992{
993   return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2);
994}
995
996static inline nir_ssa_def *
997nir_ffma_imm2(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1, double src2)
998{
999   return nir_ffma(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
1000}
1001
1002static inline nir_ssa_def *
1003nir_a_minus_bc(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1,
1004               nir_ssa_def *src2)
1005{
1006   return nir_ffma(build, nir_fneg(build, src1), src2, src0);
1007}
1008
1009static inline nir_ssa_def *
1010nir_pack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
1011{
1012   assert(src->num_components * src->bit_size == dest_bit_size);
1013
1014   switch (dest_bit_size) {
1015   case 64:
1016      switch (src->bit_size) {
1017      case 32: return nir_pack_64_2x32(b, src);
1018      case 16: return nir_pack_64_4x16(b, src);
1019      default: break;
1020      }
1021      break;
1022
1023   case 32:
1024      if (src->bit_size == 16)
1025         return nir_pack_32_2x16(b, src);
1026      break;
1027
1028   default:
1029      break;
1030   }
1031
1032   /* If we got here, we have no dedicated unpack opcode. */
1033   nir_ssa_def *dest = nir_imm_intN_t(b, 0, dest_bit_size);
1034   for (unsigned i = 0; i < src->num_components; i++) {
1035      nir_ssa_def *val = nir_u2u(b, nir_channel(b, src, i), dest_bit_size);
1036      val = nir_ishl(b, val, nir_imm_int(b, i * src->bit_size));
1037      dest = nir_ior(b, dest, val);
1038   }
1039   return dest;
1040}
1041
1042static inline nir_ssa_def *
1043nir_unpack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
1044{
1045   assert(src->num_components == 1);
1046   assert(src->bit_size > dest_bit_size);
1047   const unsigned dest_num_components = src->bit_size / dest_bit_size;
1048   assert(dest_num_components <= NIR_MAX_VEC_COMPONENTS);
1049
1050   switch (src->bit_size) {
1051   case 64:
1052      switch (dest_bit_size) {
1053      case 32: return nir_unpack_64_2x32(b, src);
1054      case 16: return nir_unpack_64_4x16(b, src);
1055      default: break;
1056      }
1057      break;
1058
1059   case 32:
1060      if (dest_bit_size == 16)
1061         return nir_unpack_32_2x16(b, src);
1062      break;
1063
1064   default:
1065      break;
1066   }
1067
1068   /* If we got here, we have no dedicated unpack opcode. */
1069   nir_ssa_def *dest_comps[NIR_MAX_VEC_COMPONENTS];
1070   for (unsigned i = 0; i < dest_num_components; i++) {
1071      nir_ssa_def *val = nir_ushr_imm(b, src, i * dest_bit_size);
1072      dest_comps[i] = nir_u2u(b, val, dest_bit_size);
1073   }
1074   return nir_vec(b, dest_comps, dest_num_components);
1075}
1076
1077/**
1078 * Treats srcs as if it's one big blob of bits and extracts the range of bits
1079 * given by
1080 *
1081 *       [first_bit, first_bit + dest_num_components * dest_bit_size)
1082 *
1083 * The range can have any alignment or size as long as it's an integer number
1084 * of destination components and fits inside the concatenated sources.
1085 *
1086 * TODO: The one caveat here is that we can't handle byte alignment if 64-bit
1087 * values are involved because that would require pack/unpack to/from a vec8
1088 * which NIR currently does not support.
1089 */
1090static inline nir_ssa_def *
1091nir_extract_bits(nir_builder *b, nir_ssa_def **srcs, unsigned num_srcs,
1092                 unsigned first_bit,
1093                 unsigned dest_num_components, unsigned dest_bit_size)
1094{
1095   const unsigned num_bits = dest_num_components * dest_bit_size;
1096
1097   /* Figure out the common bit size */
1098   unsigned common_bit_size = dest_bit_size;
1099   for (unsigned i = 0; i < num_srcs; i++)
1100      common_bit_size = MIN2(common_bit_size, srcs[i]->bit_size);
1101   if (first_bit > 0)
1102      common_bit_size = MIN2(common_bit_size, (1u << (ffs(first_bit) - 1)));
1103
1104   /* We don't want to have to deal with 1-bit values */
1105   assert(common_bit_size >= 8);
1106
1107   nir_ssa_def *common_comps[NIR_MAX_VEC_COMPONENTS * sizeof(uint64_t)];
1108   assert(num_bits / common_bit_size <= ARRAY_SIZE(common_comps));
1109
1110   /* First, unpack to the common bit size and select the components from the
1111    * source.
1112    */
1113   int src_idx = -1;
1114   unsigned src_start_bit = 0;
1115   unsigned src_end_bit = 0;
1116   for (unsigned i = 0; i < num_bits / common_bit_size; i++) {
1117      const unsigned bit = first_bit + (i * common_bit_size);
1118      while (bit >= src_end_bit) {
1119         src_idx++;
1120         assert(src_idx < (int) num_srcs);
1121         src_start_bit = src_end_bit;
1122         src_end_bit += srcs[src_idx]->bit_size *
1123                        srcs[src_idx]->num_components;
1124      }
1125      assert(bit >= src_start_bit);
1126      assert(bit + common_bit_size <= src_end_bit);
1127      const unsigned rel_bit = bit - src_start_bit;
1128      const unsigned src_bit_size = srcs[src_idx]->bit_size;
1129
1130      nir_ssa_def *comp = nir_channel(b, srcs[src_idx],
1131                                      rel_bit / src_bit_size);
1132      if (srcs[src_idx]->bit_size > common_bit_size) {
1133         nir_ssa_def *unpacked = nir_unpack_bits(b, comp, common_bit_size);
1134         comp = nir_channel(b, unpacked, (rel_bit % src_bit_size) /
1135                                         common_bit_size);
1136      }
1137      common_comps[i] = comp;
1138   }
1139
1140   /* Now, re-pack the destination if we have to */
1141   if (dest_bit_size > common_bit_size) {
1142      unsigned common_per_dest = dest_bit_size / common_bit_size;
1143      nir_ssa_def *dest_comps[NIR_MAX_VEC_COMPONENTS];
1144      for (unsigned i = 0; i < dest_num_components; i++) {
1145         nir_ssa_def *unpacked = nir_vec(b, common_comps + i * common_per_dest,
1146                                         common_per_dest);
1147         dest_comps[i] = nir_pack_bits(b, unpacked, dest_bit_size);
1148      }
1149      return nir_vec(b, dest_comps, dest_num_components);
1150   } else {
1151      assert(dest_bit_size == common_bit_size);
1152      return nir_vec(b, common_comps, dest_num_components);
1153   }
1154}
1155
1156static inline nir_ssa_def *
1157nir_bitcast_vector(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
1158{
1159   assert((src->bit_size * src->num_components) % dest_bit_size == 0);
1160   const unsigned dest_num_components =
1161      (src->bit_size * src->num_components) / dest_bit_size;
1162   assert(dest_num_components <= NIR_MAX_VEC_COMPONENTS);
1163
1164   return nir_extract_bits(b, &src, 1, 0, dest_num_components, dest_bit_size);
1165}
1166
1167/**
1168 * Pad a value to N components with undefs of matching bit size.
1169 * If the value already contains >= num_components, it is returned without change.
1170 */
1171static inline nir_ssa_def *
1172nir_pad_vector(nir_builder *b, nir_ssa_def *src, unsigned num_components)
1173{
1174   assert(src->num_components <= num_components);
1175   if (src->num_components == num_components)
1176      return src;
1177
1178   nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS];
1179   nir_ssa_def *undef = nir_ssa_undef(b, 1, src->bit_size);
1180   unsigned i = 0;
1181   for (; i < src->num_components; i++)
1182      components[i] = nir_channel(b, src, i);
1183   for (; i < num_components; i++)
1184      components[i] = undef;
1185
1186   return nir_vec(b, components, num_components);
1187}
1188
1189/**
1190 * Pad a value to N components with copies of the given immediate of matching
1191 * bit size. If the value already contains >= num_components, it is returned
1192 * without change.
1193 */
1194static inline nir_ssa_def *
1195nir_pad_vector_imm_int(nir_builder *b, nir_ssa_def *src, uint64_t imm_val,
1196                       unsigned num_components)
1197{
1198   assert(src->num_components <= num_components);
1199   if (src->num_components == num_components)
1200      return src;
1201
1202   nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS];
1203   nir_ssa_def *imm = nir_imm_intN_t(b, imm_val, src->bit_size);
1204   unsigned i = 0;
1205   for (; i < src->num_components; i++)
1206      components[i] = nir_channel(b, src, i);
1207   for (; i < num_components; i++)
1208      components[i] = imm;
1209
1210   return nir_vec(b, components, num_components);
1211}
1212
1213/**
1214 * Pad a value to 4 components with undefs of matching bit size.
1215 * If the value already contains >= 4 components, it is returned without change.
1216 */
1217static inline nir_ssa_def *
1218nir_pad_vec4(nir_builder *b, nir_ssa_def *src)
1219{
1220   return nir_pad_vector(b, src, 4);
1221}
1222
1223/**
1224 * Turns a nir_src into a nir_ssa_def * so it can be passed to
1225 * nir_build_alu()-based builder calls.
1226 *
1227 * See nir_ssa_for_alu_src() for alu instructions.
1228 */
1229static inline nir_ssa_def *
1230nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
1231{
1232   if (src.is_ssa && src.ssa->num_components == num_components)
1233      return src.ssa;
1234
1235   assert((unsigned)num_components <= nir_src_num_components(src));
1236
1237   nir_alu_src alu = { NIR_SRC_INIT };
1238   alu.src = src;
1239   for (int j = 0; j < NIR_MAX_VEC_COMPONENTS; j++)
1240      alu.swizzle[j] = j;
1241
1242   return nir_mov_alu(build, alu, num_components);
1243}
1244
1245/**
1246 * Similar to nir_ssa_for_src(), but for alu srcs, respecting the
1247 * nir_alu_src's swizzle.
1248 */
1249static inline nir_ssa_def *
1250nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn)
1251{
1252   if (nir_alu_src_is_trivial_ssa(instr, srcn))
1253      return instr->src[srcn].src.ssa;
1254
1255   nir_alu_src *src = &instr->src[srcn];
1256   unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn);
1257   return nir_mov_alu(build, *src, num_components);
1258}
1259
1260static inline unsigned
1261nir_get_ptr_bitsize(nir_shader *shader)
1262{
1263   if (shader->info.stage == MESA_SHADER_KERNEL)
1264      return shader->info.cs.ptr_size;
1265   return 32;
1266}
1267
1268static inline nir_deref_instr *
1269nir_build_deref_var(nir_builder *build, nir_variable *var)
1270{
1271   nir_deref_instr *deref =
1272      nir_deref_instr_create(build->shader, nir_deref_type_var);
1273
1274   deref->modes = (nir_variable_mode)var->data.mode;
1275   deref->type = var->type;
1276   deref->var = var;
1277
1278   nir_ssa_dest_init(&deref->instr, &deref->dest, 1,
1279                     nir_get_ptr_bitsize(build->shader), NULL);
1280
1281   nir_builder_instr_insert(build, &deref->instr);
1282
1283   return deref;
1284}
1285
1286static inline nir_deref_instr *
1287nir_build_deref_array(nir_builder *build, nir_deref_instr *parent,
1288                      nir_ssa_def *index)
1289{
1290   assert(glsl_type_is_array(parent->type) ||
1291          glsl_type_is_matrix(parent->type) ||
1292          glsl_type_is_vector(parent->type));
1293
1294   assert(index->bit_size == parent->dest.ssa.bit_size);
1295
1296   nir_deref_instr *deref =
1297      nir_deref_instr_create(build->shader, nir_deref_type_array);
1298
1299   deref->modes = parent->modes;
1300   deref->type = glsl_get_array_element(parent->type);
1301   deref->parent = nir_src_for_ssa(&parent->dest.ssa);
1302   deref->arr.index = nir_src_for_ssa(index);
1303
1304   nir_ssa_dest_init(&deref->instr, &deref->dest,
1305                     parent->dest.ssa.num_components,
1306                     parent->dest.ssa.bit_size, NULL);
1307
1308   nir_builder_instr_insert(build, &deref->instr);
1309
1310   return deref;
1311}
1312
1313static inline nir_deref_instr *
1314nir_build_deref_array_imm(nir_builder *build, nir_deref_instr *parent,
1315                          int64_t index)
1316{
1317   assert(parent->dest.is_ssa);
1318   nir_ssa_def *idx_ssa = nir_imm_intN_t(build, index,
1319                                         parent->dest.ssa.bit_size);
1320
1321   return nir_build_deref_array(build, parent, idx_ssa);
1322}
1323
1324static inline nir_deref_instr *
1325nir_build_deref_ptr_as_array(nir_builder *build, nir_deref_instr *parent,
1326                             nir_ssa_def *index)
1327{
1328   assert(parent->deref_type == nir_deref_type_array ||
1329          parent->deref_type == nir_deref_type_ptr_as_array ||
1330          parent->deref_type == nir_deref_type_cast);
1331
1332   assert(index->bit_size == parent->dest.ssa.bit_size);
1333
1334   nir_deref_instr *deref =
1335      nir_deref_instr_create(build->shader, nir_deref_type_ptr_as_array);
1336
1337   deref->modes = parent->modes;
1338   deref->type = parent->type;
1339   deref->parent = nir_src_for_ssa(&parent->dest.ssa);
1340   deref->arr.index = nir_src_for_ssa(index);
1341
1342   nir_ssa_dest_init(&deref->instr, &deref->dest,
1343                     parent->dest.ssa.num_components,
1344                     parent->dest.ssa.bit_size, NULL);
1345
1346   nir_builder_instr_insert(build, &deref->instr);
1347
1348   return deref;
1349}
1350
1351static inline nir_deref_instr *
1352nir_build_deref_array_wildcard(nir_builder *build, nir_deref_instr *parent)
1353{
1354   assert(glsl_type_is_array(parent->type) ||
1355          glsl_type_is_matrix(parent->type));
1356
1357   nir_deref_instr *deref =
1358      nir_deref_instr_create(build->shader, nir_deref_type_array_wildcard);
1359
1360   deref->modes = parent->modes;
1361   deref->type = glsl_get_array_element(parent->type);
1362   deref->parent = nir_src_for_ssa(&parent->dest.ssa);
1363
1364   nir_ssa_dest_init(&deref->instr, &deref->dest,
1365                     parent->dest.ssa.num_components,
1366                     parent->dest.ssa.bit_size, NULL);
1367
1368   nir_builder_instr_insert(build, &deref->instr);
1369
1370   return deref;
1371}
1372
1373static inline nir_deref_instr *
1374nir_build_deref_struct(nir_builder *build, nir_deref_instr *parent,
1375                       unsigned index)
1376{
1377   assert(glsl_type_is_struct_or_ifc(parent->type));
1378
1379   nir_deref_instr *deref =
1380      nir_deref_instr_create(build->shader, nir_deref_type_struct);
1381
1382   deref->modes = parent->modes;
1383   deref->type = glsl_get_struct_field(parent->type, index);
1384   deref->parent = nir_src_for_ssa(&parent->dest.ssa);
1385   deref->strct.index = index;
1386
1387   nir_ssa_dest_init(&deref->instr, &deref->dest,
1388                     parent->dest.ssa.num_components,
1389                     parent->dest.ssa.bit_size, NULL);
1390
1391   nir_builder_instr_insert(build, &deref->instr);
1392
1393   return deref;
1394}
1395
1396static inline nir_deref_instr *
1397nir_build_deref_cast(nir_builder *build, nir_ssa_def *parent,
1398                     nir_variable_mode modes, const struct glsl_type *type,
1399                     unsigned ptr_stride)
1400{
1401   nir_deref_instr *deref =
1402      nir_deref_instr_create(build->shader, nir_deref_type_cast);
1403
1404   deref->modes = modes;
1405   deref->type = type;
1406   deref->parent = nir_src_for_ssa(parent);
1407   deref->cast.ptr_stride = ptr_stride;
1408
1409   nir_ssa_dest_init(&deref->instr, &deref->dest,
1410                     parent->num_components, parent->bit_size, NULL);
1411
1412   nir_builder_instr_insert(build, &deref->instr);
1413
1414   return deref;
1415}
1416
1417static inline nir_deref_instr *
1418nir_alignment_deref_cast(nir_builder *build, nir_deref_instr *parent,
1419                         uint32_t align_mul, uint32_t align_offset)
1420{
1421   nir_deref_instr *deref =
1422      nir_deref_instr_create(build->shader, nir_deref_type_cast);
1423
1424   deref->modes = parent->modes;
1425   deref->type = parent->type;
1426   deref->parent = nir_src_for_ssa(&parent->dest.ssa);
1427   deref->cast.ptr_stride = nir_deref_instr_array_stride(deref);
1428   deref->cast.align_mul = align_mul;
1429   deref->cast.align_offset = align_offset;
1430
1431   nir_ssa_dest_init(&deref->instr, &deref->dest,
1432                     parent->dest.ssa.num_components,
1433                     parent->dest.ssa.bit_size, NULL);
1434
1435   nir_builder_instr_insert(build, &deref->instr);
1436
1437   return deref;
1438}
1439
1440/** Returns a deref that follows another but starting from the given parent
1441 *
1442 * The new deref will be the same type and take the same array or struct index
1443 * as the leader deref but it may have a different parent.  This is very
1444 * useful for walking deref paths.
1445 */
1446static inline nir_deref_instr *
1447nir_build_deref_follower(nir_builder *b, nir_deref_instr *parent,
1448                         nir_deref_instr *leader)
1449{
1450   /* If the derefs would have the same parent, don't make a new one */
1451   assert(leader->parent.is_ssa);
1452   if (leader->parent.ssa == &parent->dest.ssa)
1453      return leader;
1454
1455   UNUSED nir_deref_instr *leader_parent = nir_src_as_deref(leader->parent);
1456
1457   switch (leader->deref_type) {
1458   case nir_deref_type_var:
1459      unreachable("A var dereference cannot have a parent");
1460      break;
1461
1462   case nir_deref_type_array:
1463   case nir_deref_type_array_wildcard:
1464      assert(glsl_type_is_matrix(parent->type) ||
1465             glsl_type_is_array(parent->type) ||
1466             (leader->deref_type == nir_deref_type_array &&
1467              glsl_type_is_vector(parent->type)));
1468      assert(glsl_get_length(parent->type) ==
1469             glsl_get_length(leader_parent->type));
1470
1471      if (leader->deref_type == nir_deref_type_array) {
1472         assert(leader->arr.index.is_ssa);
1473         nir_ssa_def *index = nir_i2i(b, leader->arr.index.ssa,
1474                                         parent->dest.ssa.bit_size);
1475         return nir_build_deref_array(b, parent, index);
1476      } else {
1477         return nir_build_deref_array_wildcard(b, parent);
1478      }
1479
1480   case nir_deref_type_struct:
1481      assert(glsl_type_is_struct_or_ifc(parent->type));
1482      assert(glsl_get_length(parent->type) ==
1483             glsl_get_length(leader_parent->type));
1484
1485      return nir_build_deref_struct(b, parent, leader->strct.index);
1486
1487   default:
1488      unreachable("Invalid deref instruction type");
1489   }
1490}
1491
1492static inline nir_ssa_def *
1493nir_load_reg(nir_builder *build, nir_register *reg)
1494{
1495   return nir_ssa_for_src(build, nir_src_for_reg(reg), reg->num_components);
1496}
1497
1498static inline void
1499nir_store_reg(nir_builder *build, nir_register *reg,
1500              nir_ssa_def *def, nir_component_mask_t write_mask)
1501{
1502   assert(reg->num_components == def->num_components);
1503   assert(reg->bit_size == def->bit_size);
1504
1505   nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_mov);
1506   mov->src[0].src = nir_src_for_ssa(def);
1507   mov->dest.dest = nir_dest_for_reg(reg);
1508   mov->dest.write_mask = write_mask & BITFIELD_MASK(reg->num_components);
1509   nir_builder_instr_insert(build, &mov->instr);
1510}
1511
1512static inline nir_ssa_def *
1513nir_load_deref_with_access(nir_builder *build, nir_deref_instr *deref,
1514                           enum gl_access_qualifier access)
1515{
1516   return nir_build_load_deref(build, glsl_get_vector_elements(deref->type),
1517                               glsl_get_bit_size(deref->type), &deref->dest.ssa,
1518                               access);
1519}
1520
1521#undef nir_load_deref
1522static inline nir_ssa_def *
1523nir_load_deref(nir_builder *build, nir_deref_instr *deref)
1524{
1525   return nir_load_deref_with_access(build, deref, (enum gl_access_qualifier)0);
1526}
1527
1528static inline void
1529nir_store_deref_with_access(nir_builder *build, nir_deref_instr *deref,
1530                            nir_ssa_def *value, unsigned writemask,
1531                            enum gl_access_qualifier access)
1532{
1533   writemask &= (1u << value->num_components) - 1u;
1534   nir_build_store_deref(build, &deref->dest.ssa, value, writemask, access);
1535}
1536
1537#undef nir_store_deref
1538static inline void
1539nir_store_deref(nir_builder *build, nir_deref_instr *deref,
1540                nir_ssa_def *value, unsigned writemask)
1541{
1542   nir_store_deref_with_access(build, deref, value, writemask,
1543                               (enum gl_access_qualifier)0);
1544}
1545
1546static inline void
1547nir_copy_deref_with_access(nir_builder *build, nir_deref_instr *dest,
1548                           nir_deref_instr *src,
1549                           enum gl_access_qualifier dest_access,
1550                           enum gl_access_qualifier src_access)
1551{
1552   nir_build_copy_deref(build, &dest->dest.ssa, &src->dest.ssa, dest_access, src_access);
1553}
1554
1555#undef nir_copy_deref
1556static inline void
1557nir_copy_deref(nir_builder *build, nir_deref_instr *dest, nir_deref_instr *src)
1558{
1559   nir_copy_deref_with_access(build, dest, src,
1560                              (enum gl_access_qualifier) 0,
1561                              (enum gl_access_qualifier) 0);
1562}
1563
1564static inline void
1565nir_memcpy_deref_with_access(nir_builder *build, nir_deref_instr *dest,
1566                             nir_deref_instr *src, nir_ssa_def *size,
1567                             enum gl_access_qualifier dest_access,
1568                             enum gl_access_qualifier src_access)
1569{
1570   nir_build_memcpy_deref(build, &dest->dest.ssa, &src->dest.ssa,
1571                          size, dest_access, src_access);
1572}
1573
1574#undef nir_memcpy_deref
1575static inline void
1576nir_memcpy_deref(nir_builder *build, nir_deref_instr *dest,
1577                 nir_deref_instr *src, nir_ssa_def *size)
1578{
1579   nir_memcpy_deref_with_access(build, dest, src, size,
1580                                (enum gl_access_qualifier)0,
1581                                (enum gl_access_qualifier)0);
1582}
1583
1584static inline nir_ssa_def *
1585nir_load_var(nir_builder *build, nir_variable *var)
1586{
1587   return nir_load_deref(build, nir_build_deref_var(build, var));
1588}
1589
1590static inline void
1591nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value,
1592              unsigned writemask)
1593{
1594   nir_store_deref(build, nir_build_deref_var(build, var), value, writemask);
1595}
1596
1597static inline void
1598nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src)
1599{
1600   nir_copy_deref(build, nir_build_deref_var(build, dest),
1601                         nir_build_deref_var(build, src));
1602}
1603
1604#undef nir_load_global
1605static inline nir_ssa_def *
1606nir_load_global(nir_builder *build, nir_ssa_def *addr, unsigned align,
1607                unsigned num_components, unsigned bit_size)
1608{
1609   nir_intrinsic_instr *load =
1610      nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_global);
1611   load->num_components = num_components;
1612   load->src[0] = nir_src_for_ssa(addr);
1613   nir_intrinsic_set_align(load, align, 0);
1614   nir_ssa_dest_init(&load->instr, &load->dest,
1615                     num_components, bit_size, NULL);
1616   nir_builder_instr_insert(build, &load->instr);
1617   return &load->dest.ssa;
1618}
1619
1620#undef nir_store_global
1621static inline void
1622nir_store_global(nir_builder *build, nir_ssa_def *addr, unsigned align,
1623                 nir_ssa_def *value, nir_component_mask_t write_mask)
1624{
1625   nir_intrinsic_instr *store =
1626      nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_global);
1627   store->num_components = value->num_components;
1628   store->src[0] = nir_src_for_ssa(value);
1629   store->src[1] = nir_src_for_ssa(addr);
1630   nir_intrinsic_set_write_mask(store,
1631      write_mask & BITFIELD_MASK(value->num_components));
1632   nir_intrinsic_set_align(store, align, 0);
1633   nir_builder_instr_insert(build, &store->instr);
1634}
1635
1636#undef nir_load_global_constant
1637static inline nir_ssa_def *
1638nir_load_global_constant(nir_builder *build, nir_ssa_def *addr, unsigned align,
1639                         unsigned num_components, unsigned bit_size)
1640{
1641   nir_intrinsic_instr *load =
1642      nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_global_constant);
1643   load->num_components = num_components;
1644   load->src[0] = nir_src_for_ssa(addr);
1645   nir_intrinsic_set_align(load, align, 0);
1646   nir_ssa_dest_init(&load->instr, &load->dest,
1647                     num_components, bit_size, NULL);
1648   nir_builder_instr_insert(build, &load->instr);
1649   return &load->dest.ssa;
1650}
1651
1652#undef nir_load_param
1653static inline nir_ssa_def *
1654nir_load_param(nir_builder *build, uint32_t param_idx)
1655{
1656   assert(param_idx < build->impl->function->num_params);
1657   nir_parameter *param = &build->impl->function->params[param_idx];
1658   return nir_build_load_param(build, param->num_components, param->bit_size, param_idx);
1659}
1660
1661/**
1662 * This function takes an I/O intrinsic like load/store_input,
1663 * and emits a sequence that calculates the full offset of that instruction,
1664 * including a stride to the base and component offsets.
1665 */
1666static inline nir_ssa_def *
1667nir_build_calc_io_offset(nir_builder *b,
1668                         nir_intrinsic_instr *intrin,
1669                         nir_ssa_def *base_stride,
1670                         unsigned component_stride)
1671{
1672   /* base is the driver_location, which is in slots (1 slot = 4x4 bytes) */
1673   nir_ssa_def *base_op = nir_imul_imm(b, base_stride, nir_intrinsic_base(intrin));
1674
1675   /* offset should be interpreted in relation to the base,
1676    * so the instruction effectively reads/writes another input/output
1677    * when it has an offset
1678    */
1679   nir_ssa_def *offset_op = nir_imul(b, base_stride, nir_ssa_for_src(b, *nir_get_io_offset_src(intrin), 1));
1680
1681   /* component is in bytes */
1682   unsigned const_op = nir_intrinsic_component(intrin) * component_stride;
1683
1684   return nir_iadd_imm_nuw(b, nir_iadd_nuw(b, base_op, offset_op), const_op);
1685}
1686
1687/* calculate a `(1 << value) - 1` in ssa without overflows */
1688static inline nir_ssa_def *
1689nir_mask(nir_builder *b, nir_ssa_def *bits, unsigned dst_bit_size)
1690{
1691   return nir_ushr(b, nir_imm_intN_t(b, -1, dst_bit_size),
1692                      nir_isub_imm(b, dst_bit_size, nir_u2u32(b, bits)));
1693}
1694
1695static inline nir_ssa_def *
1696nir_f2b(nir_builder *build, nir_ssa_def *f)
1697{
1698   return nir_f2b1(build, f);
1699}
1700
1701static inline nir_ssa_def *
1702nir_i2b(nir_builder *build, nir_ssa_def *i)
1703{
1704   return nir_i2b1(build, i);
1705}
1706
1707static inline nir_ssa_def *
1708nir_b2f(nir_builder *build, nir_ssa_def *b, uint32_t bit_size)
1709{
1710   switch (bit_size) {
1711   case 64: return nir_b2f64(build, b);
1712   case 32: return nir_b2f32(build, b);
1713   case 16: return nir_b2f16(build, b);
1714   default:
1715      unreachable("Invalid bit-size");
1716   };
1717}
1718
1719static inline nir_ssa_def *
1720nir_b2i(nir_builder *build, nir_ssa_def *b, uint32_t bit_size)
1721{
1722   switch (bit_size) {
1723   case 64: return nir_b2i64(build, b);
1724   case 32: return nir_b2i32(build, b);
1725   case 16: return nir_b2i16(build, b);
1726   case 8:  return nir_b2i8(build, b);
1727   default:
1728      unreachable("Invalid bit-size");
1729   };
1730}
1731static inline nir_ssa_def *
1732nir_load_barycentric(nir_builder *build, nir_intrinsic_op op,
1733                     unsigned interp_mode)
1734{
1735   unsigned num_components = op == nir_intrinsic_load_barycentric_model ? 3 : 2;
1736   nir_intrinsic_instr *bary = nir_intrinsic_instr_create(build->shader, op);
1737   nir_ssa_dest_init(&bary->instr, &bary->dest, num_components, 32, NULL);
1738   nir_intrinsic_set_interp_mode(bary, interp_mode);
1739   nir_builder_instr_insert(build, &bary->instr);
1740   return &bary->dest.ssa;
1741}
1742
1743static inline void
1744nir_jump(nir_builder *build, nir_jump_type jump_type)
1745{
1746   assert(jump_type != nir_jump_goto && jump_type != nir_jump_goto_if);
1747   nir_jump_instr *jump = nir_jump_instr_create(build->shader, jump_type);
1748   nir_builder_instr_insert(build, &jump->instr);
1749}
1750
1751static inline void
1752nir_goto(nir_builder *build, struct nir_block *target)
1753{
1754   assert(!build->impl->structured);
1755   nir_jump_instr *jump = nir_jump_instr_create(build->shader, nir_jump_goto);
1756   jump->target = target;
1757   nir_builder_instr_insert(build, &jump->instr);
1758}
1759
1760static inline void
1761nir_goto_if(nir_builder *build, struct nir_block *target, nir_src cond,
1762            struct nir_block *else_target)
1763{
1764   assert(!build->impl->structured);
1765   nir_jump_instr *jump = nir_jump_instr_create(build->shader, nir_jump_goto_if);
1766   jump->condition = cond;
1767   jump->target = target;
1768   jump->else_target = else_target;
1769   nir_builder_instr_insert(build, &jump->instr);
1770}
1771
1772static inline nir_ssa_def *
1773nir_compare_func(nir_builder *b, enum compare_func func,
1774                 nir_ssa_def *src0, nir_ssa_def *src1)
1775{
1776   switch (func) {
1777   case COMPARE_FUNC_NEVER:
1778      return nir_imm_int(b, 0);
1779   case COMPARE_FUNC_ALWAYS:
1780      return nir_imm_int(b, ~0);
1781   case COMPARE_FUNC_EQUAL:
1782      return nir_feq(b, src0, src1);
1783   case COMPARE_FUNC_NOTEQUAL:
1784      return nir_fneu(b, src0, src1);
1785   case COMPARE_FUNC_GREATER:
1786      return nir_flt(b, src1, src0);
1787   case COMPARE_FUNC_GEQUAL:
1788      return nir_fge(b, src0, src1);
1789   case COMPARE_FUNC_LESS:
1790      return nir_flt(b, src0, src1);
1791   case COMPARE_FUNC_LEQUAL:
1792      return nir_fge(b, src1, src0);
1793   }
1794   unreachable("bad compare func");
1795}
1796
1797static inline void
1798nir_scoped_memory_barrier(nir_builder *b,
1799                          nir_scope scope,
1800                          nir_memory_semantics semantics,
1801                          nir_variable_mode modes)
1802{
1803   nir_scoped_barrier(b, NIR_SCOPE_NONE, scope, semantics, modes);
1804}
1805
1806static inline nir_ssa_def *
1807nir_type_convert(nir_builder *b,
1808                    nir_ssa_def *src,
1809                    nir_alu_type src_type,
1810                    nir_alu_type dest_type)
1811{
1812   assert(nir_alu_type_get_type_size(src_type) == 0 ||
1813          nir_alu_type_get_type_size(src_type) == src->bit_size);
1814
1815   src_type = (nir_alu_type) (src_type | src->bit_size);
1816
1817   nir_op opcode =
1818      nir_type_conversion_op(src_type, dest_type, nir_rounding_mode_undef);
1819
1820   return nir_build_alu(b, opcode, src, NULL, NULL, NULL);
1821}
1822
1823static inline nir_ssa_def *
1824nir_convert_to_bit_size(nir_builder *b,
1825                    nir_ssa_def *src,
1826                    nir_alu_type type,
1827                    unsigned bit_size)
1828{
1829   return nir_type_convert(b, src, type, (nir_alu_type) (type | bit_size));
1830}
1831
1832static inline nir_ssa_def *
1833nir_i2iN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1834{
1835   return nir_convert_to_bit_size(b, src, nir_type_int, bit_size);
1836}
1837
1838static inline nir_ssa_def *
1839nir_u2uN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1840{
1841   return nir_convert_to_bit_size(b, src, nir_type_uint, bit_size);
1842}
1843
1844static inline nir_ssa_def *
1845nir_b2bN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1846{
1847   return nir_convert_to_bit_size(b, src, nir_type_bool, bit_size);
1848}
1849
1850static inline nir_ssa_def *
1851nir_f2fN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1852{
1853   return nir_convert_to_bit_size(b, src, nir_type_float, bit_size);
1854}
1855
1856static inline nir_ssa_def *
1857nir_i2fN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1858{
1859   return nir_type_convert(b, src, nir_type_int,
1860         (nir_alu_type) (nir_type_float | bit_size));
1861}
1862
1863static inline nir_ssa_def *
1864nir_u2fN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1865{
1866   return nir_type_convert(b, src, nir_type_uint,
1867         (nir_alu_type) (nir_type_float | bit_size));
1868}
1869
1870static inline nir_ssa_def *
1871nir_f2uN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1872{
1873   return nir_type_convert(b, src, nir_type_float,
1874         (nir_alu_type) (nir_type_uint | bit_size));
1875}
1876
1877static inline nir_ssa_def *
1878nir_f2iN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1879{
1880   return nir_type_convert(b, src, nir_type_float,
1881         (nir_alu_type) (nir_type_int | bit_size));
1882}
1883
1884#endif /* NIR_BUILDER_H */
1885