1/*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28#include "nir.h"
29#include "nir_control_flow_private.h"
30#include "util/half_float.h"
31#include <limits.h>
32#include <assert.h>
33#include <math.h>
34#include "util/u_math.h"
35
36#include "main/menums.h" /* BITFIELD64_MASK */
37
38nir_shader *
39nir_shader_create(void *mem_ctx,
40                  gl_shader_stage stage,
41                  const nir_shader_compiler_options *options,
42                  shader_info *si)
43{
44   nir_shader *shader = rzalloc(mem_ctx, nir_shader);
45
46   exec_list_make_empty(&shader->uniforms);
47   exec_list_make_empty(&shader->inputs);
48   exec_list_make_empty(&shader->outputs);
49   exec_list_make_empty(&shader->shared);
50
51   shader->options = options;
52
53   if (si) {
54      assert(si->stage == stage);
55      shader->info = *si;
56   } else {
57      shader->info.stage = stage;
58   }
59
60   exec_list_make_empty(&shader->functions);
61   exec_list_make_empty(&shader->globals);
62   exec_list_make_empty(&shader->system_values);
63
64   shader->num_inputs = 0;
65   shader->num_outputs = 0;
66   shader->num_uniforms = 0;
67   shader->num_shared = 0;
68
69   return shader;
70}
71
72static nir_register *
73reg_create(void *mem_ctx, struct exec_list *list)
74{
75   nir_register *reg = ralloc(mem_ctx, nir_register);
76
77   list_inithead(&reg->uses);
78   list_inithead(&reg->defs);
79   list_inithead(&reg->if_uses);
80
81   reg->num_components = 0;
82   reg->bit_size = 32;
83   reg->num_array_elems = 0;
84   reg->name = NULL;
85
86   exec_list_push_tail(list, &reg->node);
87
88   return reg;
89}
90
91nir_register *
92nir_local_reg_create(nir_function_impl *impl)
93{
94   nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
95   reg->index = impl->reg_alloc++;
96
97   return reg;
98}
99
100void
101nir_reg_remove(nir_register *reg)
102{
103   exec_node_remove(&reg->node);
104}
105
106void
107nir_shader_add_variable(nir_shader *shader, nir_variable *var)
108{
109   switch (var->data.mode) {
110   case nir_var_all:
111      assert(!"invalid mode");
112      break;
113
114   case nir_var_function_temp:
115      assert(!"nir_shader_add_variable cannot be used for local variables");
116      break;
117
118   case nir_var_shader_temp:
119      exec_list_push_tail(&shader->globals, &var->node);
120      break;
121
122   case nir_var_shader_in:
123      exec_list_push_tail(&shader->inputs, &var->node);
124      break;
125
126   case nir_var_shader_out:
127      exec_list_push_tail(&shader->outputs, &var->node);
128      break;
129
130   case nir_var_uniform:
131   case nir_var_mem_ubo:
132   case nir_var_mem_ssbo:
133      exec_list_push_tail(&shader->uniforms, &var->node);
134      break;
135
136   case nir_var_mem_shared:
137      assert(gl_shader_stage_is_compute(shader->info.stage));
138      exec_list_push_tail(&shader->shared, &var->node);
139      break;
140
141   case nir_var_mem_global:
142      assert(!"nir_shader_add_variable cannot be used for global memory");
143      break;
144
145   case nir_var_system_value:
146      exec_list_push_tail(&shader->system_values, &var->node);
147      break;
148   }
149}
150
151nir_variable *
152nir_variable_create(nir_shader *shader, nir_variable_mode mode,
153                    const struct glsl_type *type, const char *name)
154{
155   nir_variable *var = rzalloc(shader, nir_variable);
156   var->name = ralloc_strdup(var, name);
157   var->type = type;
158   var->data.mode = mode;
159   var->data.how_declared = nir_var_declared_normally;
160
161   if ((mode == nir_var_shader_in &&
162        shader->info.stage != MESA_SHADER_VERTEX) ||
163       (mode == nir_var_shader_out &&
164        shader->info.stage != MESA_SHADER_FRAGMENT))
165      var->data.interpolation = INTERP_MODE_SMOOTH;
166
167   if (mode == nir_var_shader_in || mode == nir_var_uniform)
168      var->data.read_only = true;
169
170   nir_shader_add_variable(shader, var);
171
172   return var;
173}
174
175nir_variable *
176nir_local_variable_create(nir_function_impl *impl,
177                          const struct glsl_type *type, const char *name)
178{
179   nir_variable *var = rzalloc(impl->function->shader, nir_variable);
180   var->name = ralloc_strdup(var, name);
181   var->type = type;
182   var->data.mode = nir_var_function_temp;
183
184   nir_function_impl_add_variable(impl, var);
185
186   return var;
187}
188
189nir_function *
190nir_function_create(nir_shader *shader, const char *name)
191{
192   nir_function *func = ralloc(shader, nir_function);
193
194   exec_list_push_tail(&shader->functions, &func->node);
195
196   func->name = ralloc_strdup(func, name);
197   func->shader = shader;
198   func->num_params = 0;
199   func->params = NULL;
200   func->impl = NULL;
201   func->is_entrypoint = false;
202
203   return func;
204}
205
206/* NOTE: if the instruction you are copying a src to is already added
207 * to the IR, use nir_instr_rewrite_src() instead.
208 */
209void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
210{
211   dest->is_ssa = src->is_ssa;
212   if (src->is_ssa) {
213      dest->ssa = src->ssa;
214   } else {
215      dest->reg.base_offset = src->reg.base_offset;
216      dest->reg.reg = src->reg.reg;
217      if (src->reg.indirect) {
218         dest->reg.indirect = ralloc(mem_ctx, nir_src);
219         nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
220      } else {
221         dest->reg.indirect = NULL;
222      }
223   }
224}
225
226void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
227{
228   /* Copying an SSA definition makes no sense whatsoever. */
229   assert(!src->is_ssa);
230
231   dest->is_ssa = false;
232
233   dest->reg.base_offset = src->reg.base_offset;
234   dest->reg.reg = src->reg.reg;
235   if (src->reg.indirect) {
236      dest->reg.indirect = ralloc(instr, nir_src);
237      nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
238   } else {
239      dest->reg.indirect = NULL;
240   }
241}
242
243void
244nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
245                 nir_alu_instr *instr)
246{
247   nir_src_copy(&dest->src, &src->src, &instr->instr);
248   dest->abs = src->abs;
249   dest->negate = src->negate;
250   for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
251      dest->swizzle[i] = src->swizzle[i];
252}
253
254void
255nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
256                  nir_alu_instr *instr)
257{
258   nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
259   dest->write_mask = src->write_mask;
260   dest->saturate = src->saturate;
261}
262
263
264static void
265cf_init(nir_cf_node *node, nir_cf_node_type type)
266{
267   exec_node_init(&node->node);
268   node->parent = NULL;
269   node->type = type;
270}
271
272nir_function_impl *
273nir_function_impl_create_bare(nir_shader *shader)
274{
275   nir_function_impl *impl = ralloc(shader, nir_function_impl);
276
277   impl->function = NULL;
278
279   cf_init(&impl->cf_node, nir_cf_node_function);
280
281   exec_list_make_empty(&impl->body);
282   exec_list_make_empty(&impl->registers);
283   exec_list_make_empty(&impl->locals);
284   impl->reg_alloc = 0;
285   impl->ssa_alloc = 0;
286   impl->valid_metadata = nir_metadata_none;
287
288   /* create start & end blocks */
289   nir_block *start_block = nir_block_create(shader);
290   nir_block *end_block = nir_block_create(shader);
291   start_block->cf_node.parent = &impl->cf_node;
292   end_block->cf_node.parent = &impl->cf_node;
293   impl->end_block = end_block;
294
295   exec_list_push_tail(&impl->body, &start_block->cf_node.node);
296
297   start_block->successors[0] = end_block;
298   _mesa_set_add(end_block->predecessors, start_block);
299   return impl;
300}
301
302nir_function_impl *
303nir_function_impl_create(nir_function *function)
304{
305   assert(function->impl == NULL);
306
307   nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
308
309   function->impl = impl;
310   impl->function = function;
311
312   return impl;
313}
314
315nir_block *
316nir_block_create(nir_shader *shader)
317{
318   nir_block *block = rzalloc(shader, nir_block);
319
320   cf_init(&block->cf_node, nir_cf_node_block);
321
322   block->successors[0] = block->successors[1] = NULL;
323   block->predecessors = _mesa_pointer_set_create(block);
324   block->imm_dom = NULL;
325   /* XXX maybe it would be worth it to defer allocation?  This
326    * way it doesn't get allocated for shader refs that never run
327    * nir_calc_dominance?  For example, state-tracker creates an
328    * initial IR, clones that, runs appropriate lowering pass, passes
329    * to driver which does common lowering/opt, and then stores ref
330    * which is later used to do state specific lowering and futher
331    * opt.  Do any of the references not need dominance metadata?
332    */
333   block->dom_frontier = _mesa_pointer_set_create(block);
334
335   exec_list_make_empty(&block->instr_list);
336
337   return block;
338}
339
340static inline void
341src_init(nir_src *src)
342{
343   src->is_ssa = false;
344   src->reg.reg = NULL;
345   src->reg.indirect = NULL;
346   src->reg.base_offset = 0;
347}
348
349nir_if *
350nir_if_create(nir_shader *shader)
351{
352   nir_if *if_stmt = ralloc(shader, nir_if);
353
354   if_stmt->control = nir_selection_control_none;
355
356   cf_init(&if_stmt->cf_node, nir_cf_node_if);
357   src_init(&if_stmt->condition);
358
359   nir_block *then = nir_block_create(shader);
360   exec_list_make_empty(&if_stmt->then_list);
361   exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
362   then->cf_node.parent = &if_stmt->cf_node;
363
364   nir_block *else_stmt = nir_block_create(shader);
365   exec_list_make_empty(&if_stmt->else_list);
366   exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
367   else_stmt->cf_node.parent = &if_stmt->cf_node;
368
369   return if_stmt;
370}
371
372nir_loop *
373nir_loop_create(nir_shader *shader)
374{
375   nir_loop *loop = rzalloc(shader, nir_loop);
376
377   cf_init(&loop->cf_node, nir_cf_node_loop);
378
379   nir_block *body = nir_block_create(shader);
380   exec_list_make_empty(&loop->body);
381   exec_list_push_tail(&loop->body, &body->cf_node.node);
382   body->cf_node.parent = &loop->cf_node;
383
384   body->successors[0] = body;
385   _mesa_set_add(body->predecessors, body);
386
387   return loop;
388}
389
390static void
391instr_init(nir_instr *instr, nir_instr_type type)
392{
393   instr->type = type;
394   instr->block = NULL;
395   exec_node_init(&instr->node);
396}
397
398static void
399dest_init(nir_dest *dest)
400{
401   dest->is_ssa = false;
402   dest->reg.reg = NULL;
403   dest->reg.indirect = NULL;
404   dest->reg.base_offset = 0;
405}
406
407static void
408alu_dest_init(nir_alu_dest *dest)
409{
410   dest_init(&dest->dest);
411   dest->saturate = false;
412   dest->write_mask = 0xf;
413}
414
415static void
416alu_src_init(nir_alu_src *src)
417{
418   src_init(&src->src);
419   src->abs = src->negate = false;
420   for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
421      src->swizzle[i] = i;
422}
423
424nir_alu_instr *
425nir_alu_instr_create(nir_shader *shader, nir_op op)
426{
427   unsigned num_srcs = nir_op_infos[op].num_inputs;
428   /* TODO: don't use rzalloc */
429   nir_alu_instr *instr =
430      rzalloc_size(shader,
431                   sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
432
433   instr_init(&instr->instr, nir_instr_type_alu);
434   instr->op = op;
435   alu_dest_init(&instr->dest);
436   for (unsigned i = 0; i < num_srcs; i++)
437      alu_src_init(&instr->src[i]);
438
439   return instr;
440}
441
442nir_deref_instr *
443nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
444{
445   nir_deref_instr *instr =
446      rzalloc_size(shader, sizeof(nir_deref_instr));
447
448   instr_init(&instr->instr, nir_instr_type_deref);
449
450   instr->deref_type = deref_type;
451   if (deref_type != nir_deref_type_var)
452      src_init(&instr->parent);
453
454   if (deref_type == nir_deref_type_array ||
455       deref_type == nir_deref_type_ptr_as_array)
456      src_init(&instr->arr.index);
457
458   dest_init(&instr->dest);
459
460   return instr;
461}
462
463nir_jump_instr *
464nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
465{
466   nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
467   instr_init(&instr->instr, nir_instr_type_jump);
468   instr->type = type;
469   return instr;
470}
471
472nir_load_const_instr *
473nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
474                            unsigned bit_size)
475{
476   nir_load_const_instr *instr =
477      rzalloc_size(shader, sizeof(*instr) + num_components * sizeof(*instr->value));
478   instr_init(&instr->instr, nir_instr_type_load_const);
479
480   nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
481
482   return instr;
483}
484
485nir_intrinsic_instr *
486nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
487{
488   unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
489   /* TODO: don't use rzalloc */
490   nir_intrinsic_instr *instr =
491      rzalloc_size(shader,
492                  sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
493
494   instr_init(&instr->instr, nir_instr_type_intrinsic);
495   instr->intrinsic = op;
496
497   if (nir_intrinsic_infos[op].has_dest)
498      dest_init(&instr->dest);
499
500   for (unsigned i = 0; i < num_srcs; i++)
501      src_init(&instr->src[i]);
502
503   return instr;
504}
505
506nir_call_instr *
507nir_call_instr_create(nir_shader *shader, nir_function *callee)
508{
509   const unsigned num_params = callee->num_params;
510   nir_call_instr *instr =
511      rzalloc_size(shader, sizeof(*instr) +
512                   num_params * sizeof(instr->params[0]));
513
514   instr_init(&instr->instr, nir_instr_type_call);
515   instr->callee = callee;
516   instr->num_params = num_params;
517   for (unsigned i = 0; i < num_params; i++)
518      src_init(&instr->params[i]);
519
520   return instr;
521}
522
523static int8_t default_tg4_offsets[4][2] =
524{
525   { 0, 1 },
526   { 1, 1 },
527   { 1, 0 },
528   { 0, 0 },
529};
530
531nir_tex_instr *
532nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
533{
534   nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
535   instr_init(&instr->instr, nir_instr_type_tex);
536
537   dest_init(&instr->dest);
538
539   instr->num_srcs = num_srcs;
540   instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
541   for (unsigned i = 0; i < num_srcs; i++)
542      src_init(&instr->src[i].src);
543
544   instr->texture_index = 0;
545   instr->texture_array_size = 0;
546   instr->sampler_index = 0;
547   memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
548
549   return instr;
550}
551
552void
553nir_tex_instr_add_src(nir_tex_instr *tex,
554                      nir_tex_src_type src_type,
555                      nir_src src)
556{
557   nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
558                                         tex->num_srcs + 1);
559
560   for (unsigned i = 0; i < tex->num_srcs; i++) {
561      new_srcs[i].src_type = tex->src[i].src_type;
562      nir_instr_move_src(&tex->instr, &new_srcs[i].src,
563                         &tex->src[i].src);
564   }
565
566   ralloc_free(tex->src);
567   tex->src = new_srcs;
568
569   tex->src[tex->num_srcs].src_type = src_type;
570   nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
571   tex->num_srcs++;
572}
573
574void
575nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
576{
577   assert(src_idx < tex->num_srcs);
578
579   /* First rewrite the source to NIR_SRC_INIT */
580   nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
581
582   /* Now, move all of the other sources down */
583   for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
584      tex->src[i-1].src_type = tex->src[i].src_type;
585      nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
586   }
587   tex->num_srcs--;
588}
589
590bool
591nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
592{
593   if (tex->op != nir_texop_tg4)
594      return false;
595   return memcmp(tex->tg4_offsets, default_tg4_offsets,
596                 sizeof(tex->tg4_offsets)) != 0;
597}
598
599nir_phi_instr *
600nir_phi_instr_create(nir_shader *shader)
601{
602   nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
603   instr_init(&instr->instr, nir_instr_type_phi);
604
605   dest_init(&instr->dest);
606   exec_list_make_empty(&instr->srcs);
607   return instr;
608}
609
610nir_parallel_copy_instr *
611nir_parallel_copy_instr_create(nir_shader *shader)
612{
613   nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
614   instr_init(&instr->instr, nir_instr_type_parallel_copy);
615
616   exec_list_make_empty(&instr->entries);
617
618   return instr;
619}
620
621nir_ssa_undef_instr *
622nir_ssa_undef_instr_create(nir_shader *shader,
623                           unsigned num_components,
624                           unsigned bit_size)
625{
626   nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
627   instr_init(&instr->instr, nir_instr_type_ssa_undef);
628
629   nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
630
631   return instr;
632}
633
634static nir_const_value
635const_value_float(double d, unsigned bit_size)
636{
637   nir_const_value v;
638   memset(&v, 0, sizeof(v));
639   switch (bit_size) {
640   case 16: v.u16 = _mesa_float_to_half(d);  break;
641   case 32: v.f32 = d;                       break;
642   case 64: v.f64 = d;                       break;
643   default:
644      unreachable("Invalid bit size");
645   }
646   return v;
647}
648
649static nir_const_value
650const_value_int(int64_t i, unsigned bit_size)
651{
652   nir_const_value v;
653   memset(&v, 0, sizeof(v));
654   switch (bit_size) {
655   case 1:  v.b   = i & 1;  break;
656   case 8:  v.i8  = i;  break;
657   case 16: v.i16 = i;  break;
658   case 32: v.i32 = i;  break;
659   case 64: v.i64 = i;  break;
660   default:
661      unreachable("Invalid bit size");
662   }
663   return v;
664}
665
666nir_const_value
667nir_alu_binop_identity(nir_op binop, unsigned bit_size)
668{
669   const int64_t max_int = (1ull << (bit_size - 1)) - 1;
670   const int64_t min_int = -max_int - 1;
671   switch (binop) {
672   case nir_op_iadd:
673      return const_value_int(0, bit_size);
674   case nir_op_fadd:
675      return const_value_float(0, bit_size);
676   case nir_op_imul:
677      return const_value_int(1, bit_size);
678   case nir_op_fmul:
679      return const_value_float(1, bit_size);
680   case nir_op_imin:
681      return const_value_int(max_int, bit_size);
682   case nir_op_umin:
683      return const_value_int(~0ull, bit_size);
684   case nir_op_fmin:
685      return const_value_float(INFINITY, bit_size);
686   case nir_op_imax:
687      return const_value_int(min_int, bit_size);
688   case nir_op_umax:
689      return const_value_int(0, bit_size);
690   case nir_op_fmax:
691      return const_value_float(-INFINITY, bit_size);
692   case nir_op_iand:
693      return const_value_int(~0ull, bit_size);
694   case nir_op_ior:
695      return const_value_int(0, bit_size);
696   case nir_op_ixor:
697      return const_value_int(0, bit_size);
698   default:
699      unreachable("Invalid reduction operation");
700   }
701}
702
703nir_function_impl *
704nir_cf_node_get_function(nir_cf_node *node)
705{
706   while (node->type != nir_cf_node_function) {
707      node = node->parent;
708   }
709
710   return nir_cf_node_as_function(node);
711}
712
713/* Reduces a cursor by trying to convert everything to after and trying to
714 * go up to block granularity when possible.
715 */
716static nir_cursor
717reduce_cursor(nir_cursor cursor)
718{
719   switch (cursor.option) {
720   case nir_cursor_before_block:
721      assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL ||
722             nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block);
723      if (exec_list_is_empty(&cursor.block->instr_list)) {
724         /* Empty block.  After is as good as before. */
725         cursor.option = nir_cursor_after_block;
726      }
727      return cursor;
728
729   case nir_cursor_after_block:
730      return cursor;
731
732   case nir_cursor_before_instr: {
733      nir_instr *prev_instr = nir_instr_prev(cursor.instr);
734      if (prev_instr) {
735         /* Before this instruction is after the previous */
736         cursor.instr = prev_instr;
737         cursor.option = nir_cursor_after_instr;
738      } else {
739         /* No previous instruction.  Switch to before block */
740         cursor.block = cursor.instr->block;
741         cursor.option = nir_cursor_before_block;
742      }
743      return reduce_cursor(cursor);
744   }
745
746   case nir_cursor_after_instr:
747      if (nir_instr_next(cursor.instr) == NULL) {
748         /* This is the last instruction, switch to after block */
749         cursor.option = nir_cursor_after_block;
750         cursor.block = cursor.instr->block;
751      }
752      return cursor;
753
754   default:
755      unreachable("Inavlid cursor option");
756   }
757}
758
759bool
760nir_cursors_equal(nir_cursor a, nir_cursor b)
761{
762   /* Reduced cursors should be unique */
763   a = reduce_cursor(a);
764   b = reduce_cursor(b);
765
766   return a.block == b.block && a.option == b.option;
767}
768
769static bool
770add_use_cb(nir_src *src, void *state)
771{
772   nir_instr *instr = state;
773
774   src->parent_instr = instr;
775   list_addtail(&src->use_link,
776                src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
777
778   return true;
779}
780
781static bool
782add_ssa_def_cb(nir_ssa_def *def, void *state)
783{
784   nir_instr *instr = state;
785
786   if (instr->block && def->index == UINT_MAX) {
787      nir_function_impl *impl =
788         nir_cf_node_get_function(&instr->block->cf_node);
789
790      def->index = impl->ssa_alloc++;
791   }
792
793   return true;
794}
795
796static bool
797add_reg_def_cb(nir_dest *dest, void *state)
798{
799   nir_instr *instr = state;
800
801   if (!dest->is_ssa) {
802      dest->reg.parent_instr = instr;
803      list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
804   }
805
806   return true;
807}
808
809static void
810add_defs_uses(nir_instr *instr)
811{
812   nir_foreach_src(instr, add_use_cb, instr);
813   nir_foreach_dest(instr, add_reg_def_cb, instr);
814   nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
815}
816
817void
818nir_instr_insert(nir_cursor cursor, nir_instr *instr)
819{
820   switch (cursor.option) {
821   case nir_cursor_before_block:
822      /* Only allow inserting jumps into empty blocks. */
823      if (instr->type == nir_instr_type_jump)
824         assert(exec_list_is_empty(&cursor.block->instr_list));
825
826      instr->block = cursor.block;
827      add_defs_uses(instr);
828      exec_list_push_head(&cursor.block->instr_list, &instr->node);
829      break;
830   case nir_cursor_after_block: {
831      /* Inserting instructions after a jump is illegal. */
832      nir_instr *last = nir_block_last_instr(cursor.block);
833      assert(last == NULL || last->type != nir_instr_type_jump);
834      (void) last;
835
836      instr->block = cursor.block;
837      add_defs_uses(instr);
838      exec_list_push_tail(&cursor.block->instr_list, &instr->node);
839      break;
840   }
841   case nir_cursor_before_instr:
842      assert(instr->type != nir_instr_type_jump);
843      instr->block = cursor.instr->block;
844      add_defs_uses(instr);
845      exec_node_insert_node_before(&cursor.instr->node, &instr->node);
846      break;
847   case nir_cursor_after_instr:
848      /* Inserting instructions after a jump is illegal. */
849      assert(cursor.instr->type != nir_instr_type_jump);
850
851      /* Only allow inserting jumps at the end of the block. */
852      if (instr->type == nir_instr_type_jump)
853         assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
854
855      instr->block = cursor.instr->block;
856      add_defs_uses(instr);
857      exec_node_insert_after(&cursor.instr->node, &instr->node);
858      break;
859   }
860
861   if (instr->type == nir_instr_type_jump)
862      nir_handle_add_jump(instr->block);
863}
864
865static bool
866src_is_valid(const nir_src *src)
867{
868   return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
869}
870
871static bool
872remove_use_cb(nir_src *src, void *state)
873{
874   (void) state;
875
876   if (src_is_valid(src))
877      list_del(&src->use_link);
878
879   return true;
880}
881
882static bool
883remove_def_cb(nir_dest *dest, void *state)
884{
885   (void) state;
886
887   if (!dest->is_ssa)
888      list_del(&dest->reg.def_link);
889
890   return true;
891}
892
893static void
894remove_defs_uses(nir_instr *instr)
895{
896   nir_foreach_dest(instr, remove_def_cb, instr);
897   nir_foreach_src(instr, remove_use_cb, instr);
898}
899
900void nir_instr_remove_v(nir_instr *instr)
901{
902   remove_defs_uses(instr);
903   exec_node_remove(&instr->node);
904
905   if (instr->type == nir_instr_type_jump) {
906      nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
907      nir_handle_remove_jump(instr->block, jump_instr->type);
908   }
909}
910
911/*@}*/
912
913void
914nir_index_local_regs(nir_function_impl *impl)
915{
916   unsigned index = 0;
917   foreach_list_typed(nir_register, reg, node, &impl->registers) {
918      reg->index = index++;
919   }
920   impl->reg_alloc = index;
921}
922
923static bool
924visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
925{
926   return cb(&instr->dest.dest, state);
927}
928
929static bool
930visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state)
931{
932   return cb(&instr->dest, state);
933}
934
935static bool
936visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
937                     void *state)
938{
939   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
940      return cb(&instr->dest, state);
941
942   return true;
943}
944
945static bool
946visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb,
947                   void *state)
948{
949   return cb(&instr->dest, state);
950}
951
952static bool
953visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state)
954{
955   return cb(&instr->dest, state);
956}
957
958static bool
959visit_parallel_copy_dest(nir_parallel_copy_instr *instr,
960                         nir_foreach_dest_cb cb, void *state)
961{
962   nir_foreach_parallel_copy_entry(entry, instr) {
963      if (!cb(&entry->dest, state))
964         return false;
965   }
966
967   return true;
968}
969
970bool
971nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
972{
973   switch (instr->type) {
974   case nir_instr_type_alu:
975      return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
976   case nir_instr_type_deref:
977      return visit_deref_dest(nir_instr_as_deref(instr), cb, state);
978   case nir_instr_type_intrinsic:
979      return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
980   case nir_instr_type_tex:
981      return visit_texture_dest(nir_instr_as_tex(instr), cb, state);
982   case nir_instr_type_phi:
983      return visit_phi_dest(nir_instr_as_phi(instr), cb, state);
984   case nir_instr_type_parallel_copy:
985      return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr),
986                                      cb, state);
987
988   case nir_instr_type_load_const:
989   case nir_instr_type_ssa_undef:
990   case nir_instr_type_call:
991   case nir_instr_type_jump:
992      break;
993
994   default:
995      unreachable("Invalid instruction type");
996      break;
997   }
998
999   return true;
1000}
1001
1002struct foreach_ssa_def_state {
1003   nir_foreach_ssa_def_cb cb;
1004   void *client_state;
1005};
1006
1007static inline bool
1008nir_ssa_def_visitor(nir_dest *dest, void *void_state)
1009{
1010   struct foreach_ssa_def_state *state = void_state;
1011
1012   if (dest->is_ssa)
1013      return state->cb(&dest->ssa, state->client_state);
1014   else
1015      return true;
1016}
1017
1018bool
1019nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
1020{
1021   switch (instr->type) {
1022   case nir_instr_type_alu:
1023   case nir_instr_type_deref:
1024   case nir_instr_type_tex:
1025   case nir_instr_type_intrinsic:
1026   case nir_instr_type_phi:
1027   case nir_instr_type_parallel_copy: {
1028      struct foreach_ssa_def_state foreach_state = {cb, state};
1029      return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
1030   }
1031
1032   case nir_instr_type_load_const:
1033      return cb(&nir_instr_as_load_const(instr)->def, state);
1034   case nir_instr_type_ssa_undef:
1035      return cb(&nir_instr_as_ssa_undef(instr)->def, state);
1036   case nir_instr_type_call:
1037   case nir_instr_type_jump:
1038      return true;
1039   default:
1040      unreachable("Invalid instruction type");
1041   }
1042}
1043
1044static bool
1045visit_src(nir_src *src, nir_foreach_src_cb cb, void *state)
1046{
1047   if (!cb(src, state))
1048      return false;
1049   if (!src->is_ssa && src->reg.indirect)
1050      return cb(src->reg.indirect, state);
1051   return true;
1052}
1053
1054static bool
1055visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
1056{
1057   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1058      if (!visit_src(&instr->src[i].src, cb, state))
1059         return false;
1060
1061   return true;
1062}
1063
1064static bool
1065visit_deref_instr_src(nir_deref_instr *instr,
1066                      nir_foreach_src_cb cb, void *state)
1067{
1068   if (instr->deref_type != nir_deref_type_var) {
1069      if (!visit_src(&instr->parent, cb, state))
1070         return false;
1071   }
1072
1073   if (instr->deref_type == nir_deref_type_array ||
1074       instr->deref_type == nir_deref_type_ptr_as_array) {
1075      if (!visit_src(&instr->arr.index, cb, state))
1076         return false;
1077   }
1078
1079   return true;
1080}
1081
1082static bool
1083visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
1084{
1085   for (unsigned i = 0; i < instr->num_srcs; i++) {
1086      if (!visit_src(&instr->src[i].src, cb, state))
1087         return false;
1088   }
1089
1090   return true;
1091}
1092
1093static bool
1094visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
1095                    void *state)
1096{
1097   unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
1098   for (unsigned i = 0; i < num_srcs; i++) {
1099      if (!visit_src(&instr->src[i], cb, state))
1100         return false;
1101   }
1102
1103   return true;
1104}
1105
1106static bool
1107visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
1108{
1109   for (unsigned i = 0; i < instr->num_params; i++) {
1110      if (!visit_src(&instr->params[i], cb, state))
1111         return false;
1112   }
1113
1114   return true;
1115}
1116
1117static bool
1118visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
1119{
1120   nir_foreach_phi_src(src, instr) {
1121      if (!visit_src(&src->src, cb, state))
1122         return false;
1123   }
1124
1125   return true;
1126}
1127
1128static bool
1129visit_parallel_copy_src(nir_parallel_copy_instr *instr,
1130                        nir_foreach_src_cb cb, void *state)
1131{
1132   nir_foreach_parallel_copy_entry(entry, instr) {
1133      if (!visit_src(&entry->src, cb, state))
1134         return false;
1135   }
1136
1137   return true;
1138}
1139
1140typedef struct {
1141   void *state;
1142   nir_foreach_src_cb cb;
1143} visit_dest_indirect_state;
1144
1145static bool
1146visit_dest_indirect(nir_dest *dest, void *_state)
1147{
1148   visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state;
1149
1150   if (!dest->is_ssa && dest->reg.indirect)
1151      return state->cb(dest->reg.indirect, state->state);
1152
1153   return true;
1154}
1155
1156bool
1157nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
1158{
1159   switch (instr->type) {
1160   case nir_instr_type_alu:
1161      if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
1162         return false;
1163      break;
1164   case nir_instr_type_deref:
1165      if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state))
1166         return false;
1167      break;
1168   case nir_instr_type_intrinsic:
1169      if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
1170         return false;
1171      break;
1172   case nir_instr_type_tex:
1173      if (!visit_tex_src(nir_instr_as_tex(instr), cb, state))
1174         return false;
1175      break;
1176   case nir_instr_type_call:
1177      if (!visit_call_src(nir_instr_as_call(instr), cb, state))
1178         return false;
1179      break;
1180   case nir_instr_type_load_const:
1181      /* Constant load instructions have no regular sources */
1182      break;
1183   case nir_instr_type_phi:
1184      if (!visit_phi_src(nir_instr_as_phi(instr), cb, state))
1185         return false;
1186      break;
1187   case nir_instr_type_parallel_copy:
1188      if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr),
1189                                   cb, state))
1190         return false;
1191      break;
1192   case nir_instr_type_jump:
1193   case nir_instr_type_ssa_undef:
1194      return true;
1195
1196   default:
1197      unreachable("Invalid instruction type");
1198      break;
1199   }
1200
1201   visit_dest_indirect_state dest_state;
1202   dest_state.state = state;
1203   dest_state.cb = cb;
1204   return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
1205}
1206
1207nir_const_value
1208nir_const_value_for_float(double f, unsigned bit_size)
1209{
1210   nir_const_value v;
1211   memset(&v, 0, sizeof(v));
1212
1213   switch (bit_size) {
1214   case 16:
1215      v.u16 = _mesa_float_to_half(f);
1216      break;
1217   case 32:
1218      v.f32 = f;
1219      break;
1220   case 64:
1221      v.f64 = f;
1222      break;
1223   default:
1224      unreachable("Invalid bit size");
1225   }
1226
1227   return v;
1228}
1229
1230double
1231nir_const_value_as_float(nir_const_value value, unsigned bit_size)
1232{
1233   switch (bit_size) {
1234   case 16: return _mesa_half_to_float(value.u16);
1235   case 32: return value.f32;
1236   case 64: return value.f64;
1237   default:
1238      unreachable("Invalid bit size");
1239   }
1240}
1241
1242int64_t
1243nir_src_comp_as_int(nir_src src, unsigned comp)
1244{
1245   assert(nir_src_is_const(src));
1246   nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1247
1248   assert(comp < load->def.num_components);
1249   switch (load->def.bit_size) {
1250   /* int1_t uses 0/-1 convention */
1251   case 1:  return -(int)load->value[comp].b;
1252   case 8:  return load->value[comp].i8;
1253   case 16: return load->value[comp].i16;
1254   case 32: return load->value[comp].i32;
1255   case 64: return load->value[comp].i64;
1256   default:
1257      unreachable("Invalid bit size");
1258   }
1259}
1260
1261uint64_t
1262nir_src_comp_as_uint(nir_src src, unsigned comp)
1263{
1264   assert(nir_src_is_const(src));
1265   nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1266
1267   assert(comp < load->def.num_components);
1268   switch (load->def.bit_size) {
1269   case 1:  return load->value[comp].b;
1270   case 8:  return load->value[comp].u8;
1271   case 16: return load->value[comp].u16;
1272   case 32: return load->value[comp].u32;
1273   case 64: return load->value[comp].u64;
1274   default:
1275      unreachable("Invalid bit size");
1276   }
1277}
1278
1279bool
1280nir_src_comp_as_bool(nir_src src, unsigned comp)
1281{
1282   int64_t i = nir_src_comp_as_int(src, comp);
1283
1284   /* Booleans of any size use 0/-1 convention */
1285   assert(i == 0 || i == -1);
1286
1287   return i;
1288}
1289
1290double
1291nir_src_comp_as_float(nir_src src, unsigned comp)
1292{
1293   assert(nir_src_is_const(src));
1294   nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1295
1296   assert(comp < load->def.num_components);
1297   switch (load->def.bit_size) {
1298   case 16: return _mesa_half_to_float(load->value[comp].u16);
1299   case 32: return load->value[comp].f32;
1300   case 64: return load->value[comp].f64;
1301   default:
1302      unreachable("Invalid bit size");
1303   }
1304}
1305
1306int64_t
1307nir_src_as_int(nir_src src)
1308{
1309   assert(nir_src_num_components(src) == 1);
1310   return nir_src_comp_as_int(src, 0);
1311}
1312
1313uint64_t
1314nir_src_as_uint(nir_src src)
1315{
1316   assert(nir_src_num_components(src) == 1);
1317   return nir_src_comp_as_uint(src, 0);
1318}
1319
1320bool
1321nir_src_as_bool(nir_src src)
1322{
1323   assert(nir_src_num_components(src) == 1);
1324   return nir_src_comp_as_bool(src, 0);
1325}
1326
1327double
1328nir_src_as_float(nir_src src)
1329{
1330   assert(nir_src_num_components(src) == 1);
1331   return nir_src_comp_as_float(src, 0);
1332}
1333
1334nir_const_value *
1335nir_src_as_const_value(nir_src src)
1336{
1337   if (!src.is_ssa)
1338      return NULL;
1339
1340   if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1341      return NULL;
1342
1343   nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1344
1345   return load->value;
1346}
1347
1348/**
1349 * Returns true if the source is known to be dynamically uniform. Otherwise it
1350 * returns false which means it may or may not be dynamically uniform but it
1351 * can't be determined.
1352 */
1353bool
1354nir_src_is_dynamically_uniform(nir_src src)
1355{
1356   if (!src.is_ssa)
1357      return false;
1358
1359   /* Constants are trivially dynamically uniform */
1360   if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1361      return true;
1362
1363   /* As are uniform variables */
1364   if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1365      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1366
1367      if (intr->intrinsic == nir_intrinsic_load_uniform)
1368         return true;
1369   }
1370
1371   /* XXX: this could have many more tests, such as when a sampler function is
1372    * called with dynamically uniform arguments.
1373    */
1374   return false;
1375}
1376
1377static void
1378src_remove_all_uses(nir_src *src)
1379{
1380   for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1381      if (!src_is_valid(src))
1382         continue;
1383
1384      list_del(&src->use_link);
1385   }
1386}
1387
1388static void
1389src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1390{
1391   for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1392      if (!src_is_valid(src))
1393         continue;
1394
1395      if (parent_instr) {
1396         src->parent_instr = parent_instr;
1397         if (src->is_ssa)
1398            list_addtail(&src->use_link, &src->ssa->uses);
1399         else
1400            list_addtail(&src->use_link, &src->reg.reg->uses);
1401      } else {
1402         assert(parent_if);
1403         src->parent_if = parent_if;
1404         if (src->is_ssa)
1405            list_addtail(&src->use_link, &src->ssa->if_uses);
1406         else
1407            list_addtail(&src->use_link, &src->reg.reg->if_uses);
1408      }
1409   }
1410}
1411
1412void
1413nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
1414{
1415   assert(!src_is_valid(src) || src->parent_instr == instr);
1416
1417   src_remove_all_uses(src);
1418   *src = new_src;
1419   src_add_all_uses(src, instr, NULL);
1420}
1421
1422void
1423nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1424{
1425   assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
1426
1427   src_remove_all_uses(dest);
1428   src_remove_all_uses(src);
1429   *dest = *src;
1430   *src = NIR_SRC_INIT;
1431   src_add_all_uses(dest, dest_instr, NULL);
1432}
1433
1434void
1435nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
1436{
1437   nir_src *src = &if_stmt->condition;
1438   assert(!src_is_valid(src) || src->parent_if == if_stmt);
1439
1440   src_remove_all_uses(src);
1441   *src = new_src;
1442   src_add_all_uses(src, NULL, if_stmt);
1443}
1444
1445void
1446nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
1447{
1448   if (dest->is_ssa) {
1449      /* We can only overwrite an SSA destination if it has no uses. */
1450      assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
1451   } else {
1452      list_del(&dest->reg.def_link);
1453      if (dest->reg.indirect)
1454         src_remove_all_uses(dest->reg.indirect);
1455   }
1456
1457   /* We can't re-write with an SSA def */
1458   assert(!new_dest.is_ssa);
1459
1460   nir_dest_copy(dest, &new_dest, instr);
1461
1462   dest->reg.parent_instr = instr;
1463   list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
1464
1465   if (dest->reg.indirect)
1466      src_add_all_uses(dest->reg.indirect, instr, NULL);
1467}
1468
1469/* note: does *not* take ownership of 'name' */
1470void
1471nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
1472                 unsigned num_components,
1473                 unsigned bit_size, const char *name)
1474{
1475   def->name = ralloc_strdup(instr, name);
1476   def->parent_instr = instr;
1477   list_inithead(&def->uses);
1478   list_inithead(&def->if_uses);
1479   def->num_components = num_components;
1480   def->bit_size = bit_size;
1481
1482   if (instr->block) {
1483      nir_function_impl *impl =
1484         nir_cf_node_get_function(&instr->block->cf_node);
1485
1486      def->index = impl->ssa_alloc++;
1487   } else {
1488      def->index = UINT_MAX;
1489   }
1490}
1491
1492/* note: does *not* take ownership of 'name' */
1493void
1494nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
1495                 unsigned num_components, unsigned bit_size,
1496                 const char *name)
1497{
1498   dest->is_ssa = true;
1499   nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name);
1500}
1501
1502void
1503nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
1504{
1505   assert(!new_src.is_ssa || def != new_src.ssa);
1506
1507   nir_foreach_use_safe(use_src, def)
1508      nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1509
1510   nir_foreach_if_use_safe(use_src, def)
1511      nir_if_rewrite_condition(use_src->parent_if, new_src);
1512}
1513
1514static bool
1515is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1516{
1517   assert(start->block == end->block);
1518
1519   if (between->block != start->block)
1520      return false;
1521
1522   /* Search backwards looking for "between" */
1523   while (start != end) {
1524      if (between == end)
1525         return true;
1526
1527      end = nir_instr_prev(end);
1528      assert(end);
1529   }
1530
1531   return false;
1532}
1533
1534/* Replaces all uses of the given SSA def with the given source but only if
1535 * the use comes after the after_me instruction.  This can be useful if you
1536 * are emitting code to fix up the result of some instruction: you can freely
1537 * use the result in that code and then call rewrite_uses_after and pass the
1538 * last fixup instruction as after_me and it will replace all of the uses you
1539 * want without touching the fixup code.
1540 *
1541 * This function assumes that after_me is in the same block as
1542 * def->parent_instr and that after_me comes after def->parent_instr.
1543 */
1544void
1545nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
1546                               nir_instr *after_me)
1547{
1548   if (new_src.is_ssa && def == new_src.ssa)
1549      return;
1550
1551   nir_foreach_use_safe(use_src, def) {
1552      assert(use_src->parent_instr != def->parent_instr);
1553      /* Since def already dominates all of its uses, the only way a use can
1554       * not be dominated by after_me is if it is between def and after_me in
1555       * the instruction list.
1556       */
1557      if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
1558         nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1559   }
1560
1561   nir_foreach_if_use_safe(use_src, def)
1562      nir_if_rewrite_condition(use_src->parent_if, new_src);
1563}
1564
1565nir_component_mask_t
1566nir_ssa_def_components_read(const nir_ssa_def *def)
1567{
1568   nir_component_mask_t read_mask = 0;
1569   nir_foreach_use(use, def) {
1570      if (use->parent_instr->type == nir_instr_type_alu) {
1571         nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
1572         nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src);
1573         int src_idx = alu_src - &alu->src[0];
1574         assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1575         read_mask |= nir_alu_instr_src_read_mask(alu, src_idx);
1576      } else {
1577         return (1 << def->num_components) - 1;
1578      }
1579   }
1580
1581   if (!list_empty(&def->if_uses))
1582      read_mask |= 1;
1583
1584   return read_mask;
1585}
1586
1587nir_block *
1588nir_block_cf_tree_next(nir_block *block)
1589{
1590   if (block == NULL) {
1591      /* nir_foreach_block_safe() will call this function on a NULL block
1592       * after the last iteration, but it won't use the result so just return
1593       * NULL here.
1594       */
1595      return NULL;
1596   }
1597
1598   nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1599   if (cf_next)
1600      return nir_cf_node_cf_tree_first(cf_next);
1601
1602   nir_cf_node *parent = block->cf_node.parent;
1603
1604   switch (parent->type) {
1605   case nir_cf_node_if: {
1606      /* Are we at the end of the if? Go to the beginning of the else */
1607      nir_if *if_stmt = nir_cf_node_as_if(parent);
1608      if (block == nir_if_last_then_block(if_stmt))
1609         return nir_if_first_else_block(if_stmt);
1610
1611      assert(block == nir_if_last_else_block(if_stmt));
1612      /* fall through */
1613   }
1614
1615   case nir_cf_node_loop:
1616      return nir_cf_node_as_block(nir_cf_node_next(parent));
1617
1618   case nir_cf_node_function:
1619      return NULL;
1620
1621   default:
1622      unreachable("unknown cf node type");
1623   }
1624}
1625
1626nir_block *
1627nir_block_cf_tree_prev(nir_block *block)
1628{
1629   if (block == NULL) {
1630      /* do this for consistency with nir_block_cf_tree_next() */
1631      return NULL;
1632   }
1633
1634   nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1635   if (cf_prev)
1636      return nir_cf_node_cf_tree_last(cf_prev);
1637
1638   nir_cf_node *parent = block->cf_node.parent;
1639
1640   switch (parent->type) {
1641   case nir_cf_node_if: {
1642      /* Are we at the beginning of the else? Go to the end of the if */
1643      nir_if *if_stmt = nir_cf_node_as_if(parent);
1644      if (block == nir_if_first_else_block(if_stmt))
1645         return nir_if_last_then_block(if_stmt);
1646
1647      assert(block == nir_if_first_then_block(if_stmt));
1648      /* fall through */
1649   }
1650
1651   case nir_cf_node_loop:
1652      return nir_cf_node_as_block(nir_cf_node_prev(parent));
1653
1654   case nir_cf_node_function:
1655      return NULL;
1656
1657   default:
1658      unreachable("unknown cf node type");
1659   }
1660}
1661
1662nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node)
1663{
1664   switch (node->type) {
1665   case nir_cf_node_function: {
1666      nir_function_impl *impl = nir_cf_node_as_function(node);
1667      return nir_start_block(impl);
1668   }
1669
1670   case nir_cf_node_if: {
1671      nir_if *if_stmt = nir_cf_node_as_if(node);
1672      return nir_if_first_then_block(if_stmt);
1673   }
1674
1675   case nir_cf_node_loop: {
1676      nir_loop *loop = nir_cf_node_as_loop(node);
1677      return nir_loop_first_block(loop);
1678   }
1679
1680   case nir_cf_node_block: {
1681      return nir_cf_node_as_block(node);
1682   }
1683
1684   default:
1685      unreachable("unknown node type");
1686   }
1687}
1688
1689nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node)
1690{
1691   switch (node->type) {
1692   case nir_cf_node_function: {
1693      nir_function_impl *impl = nir_cf_node_as_function(node);
1694      return nir_impl_last_block(impl);
1695   }
1696
1697   case nir_cf_node_if: {
1698      nir_if *if_stmt = nir_cf_node_as_if(node);
1699      return nir_if_last_else_block(if_stmt);
1700   }
1701
1702   case nir_cf_node_loop: {
1703      nir_loop *loop = nir_cf_node_as_loop(node);
1704      return nir_loop_last_block(loop);
1705   }
1706
1707   case nir_cf_node_block: {
1708      return nir_cf_node_as_block(node);
1709   }
1710
1711   default:
1712      unreachable("unknown node type");
1713   }
1714}
1715
1716nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node)
1717{
1718   if (node->type == nir_cf_node_block)
1719      return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1720   else if (node->type == nir_cf_node_function)
1721      return NULL;
1722   else
1723      return nir_cf_node_as_block(nir_cf_node_next(node));
1724}
1725
1726nir_if *
1727nir_block_get_following_if(nir_block *block)
1728{
1729   if (exec_node_is_tail_sentinel(&block->cf_node.node))
1730      return NULL;
1731
1732   if (nir_cf_node_is_last(&block->cf_node))
1733      return NULL;
1734
1735   nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1736
1737   if (next_node->type != nir_cf_node_if)
1738      return NULL;
1739
1740   return nir_cf_node_as_if(next_node);
1741}
1742
1743nir_loop *
1744nir_block_get_following_loop(nir_block *block)
1745{
1746   if (exec_node_is_tail_sentinel(&block->cf_node.node))
1747      return NULL;
1748
1749   if (nir_cf_node_is_last(&block->cf_node))
1750      return NULL;
1751
1752   nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1753
1754   if (next_node->type != nir_cf_node_loop)
1755      return NULL;
1756
1757   return nir_cf_node_as_loop(next_node);
1758}
1759
1760void
1761nir_index_blocks(nir_function_impl *impl)
1762{
1763   unsigned index = 0;
1764
1765   if (impl->valid_metadata & nir_metadata_block_index)
1766      return;
1767
1768   nir_foreach_block(block, impl) {
1769      block->index = index++;
1770   }
1771
1772   /* The end_block isn't really part of the program, which is why its index
1773    * is >= num_blocks.
1774    */
1775   impl->num_blocks = impl->end_block->index = index;
1776}
1777
1778static bool
1779index_ssa_def_cb(nir_ssa_def *def, void *state)
1780{
1781   unsigned *index = (unsigned *) state;
1782   def->index = (*index)++;
1783
1784   return true;
1785}
1786
1787/**
1788 * The indices are applied top-to-bottom which has the very nice property
1789 * that, if A dominates B, then A->index <= B->index.
1790 */
1791void
1792nir_index_ssa_defs(nir_function_impl *impl)
1793{
1794   unsigned index = 0;
1795
1796   nir_foreach_block(block, impl) {
1797      nir_foreach_instr(instr, block)
1798         nir_foreach_ssa_def(instr, index_ssa_def_cb, &index);
1799   }
1800
1801   impl->ssa_alloc = index;
1802}
1803
1804/**
1805 * The indices are applied top-to-bottom which has the very nice property
1806 * that, if A dominates B, then A->index <= B->index.
1807 */
1808unsigned
1809nir_index_instrs(nir_function_impl *impl)
1810{
1811   unsigned index = 0;
1812
1813   nir_foreach_block(block, impl) {
1814      nir_foreach_instr(instr, block)
1815         instr->index = index++;
1816   }
1817
1818   return index;
1819}
1820
1821nir_intrinsic_op
1822nir_intrinsic_from_system_value(gl_system_value val)
1823{
1824   switch (val) {
1825   case SYSTEM_VALUE_VERTEX_ID:
1826      return nir_intrinsic_load_vertex_id;
1827   case SYSTEM_VALUE_INSTANCE_ID:
1828      return nir_intrinsic_load_instance_id;
1829   case SYSTEM_VALUE_DRAW_ID:
1830      return nir_intrinsic_load_draw_id;
1831   case SYSTEM_VALUE_BASE_INSTANCE:
1832      return nir_intrinsic_load_base_instance;
1833   case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1834      return nir_intrinsic_load_vertex_id_zero_base;
1835   case SYSTEM_VALUE_IS_INDEXED_DRAW:
1836      return nir_intrinsic_load_is_indexed_draw;
1837   case SYSTEM_VALUE_FIRST_VERTEX:
1838      return nir_intrinsic_load_first_vertex;
1839   case SYSTEM_VALUE_BASE_VERTEX:
1840      return nir_intrinsic_load_base_vertex;
1841   case SYSTEM_VALUE_INVOCATION_ID:
1842      return nir_intrinsic_load_invocation_id;
1843   case SYSTEM_VALUE_FRAG_COORD:
1844      return nir_intrinsic_load_frag_coord;
1845   case SYSTEM_VALUE_FRONT_FACE:
1846      return nir_intrinsic_load_front_face;
1847   case SYSTEM_VALUE_SAMPLE_ID:
1848      return nir_intrinsic_load_sample_id;
1849   case SYSTEM_VALUE_SAMPLE_POS:
1850      return nir_intrinsic_load_sample_pos;
1851   case SYSTEM_VALUE_SAMPLE_MASK_IN:
1852      return nir_intrinsic_load_sample_mask_in;
1853   case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1854      return nir_intrinsic_load_local_invocation_id;
1855   case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
1856      return nir_intrinsic_load_local_invocation_index;
1857   case SYSTEM_VALUE_WORK_GROUP_ID:
1858      return nir_intrinsic_load_work_group_id;
1859   case SYSTEM_VALUE_NUM_WORK_GROUPS:
1860      return nir_intrinsic_load_num_work_groups;
1861   case SYSTEM_VALUE_PRIMITIVE_ID:
1862      return nir_intrinsic_load_primitive_id;
1863   case SYSTEM_VALUE_TESS_COORD:
1864      return nir_intrinsic_load_tess_coord;
1865   case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1866      return nir_intrinsic_load_tess_level_outer;
1867   case SYSTEM_VALUE_TESS_LEVEL_INNER:
1868      return nir_intrinsic_load_tess_level_inner;
1869   case SYSTEM_VALUE_VERTICES_IN:
1870      return nir_intrinsic_load_patch_vertices_in;
1871   case SYSTEM_VALUE_HELPER_INVOCATION:
1872      return nir_intrinsic_load_helper_invocation;
1873   case SYSTEM_VALUE_VIEW_INDEX:
1874      return nir_intrinsic_load_view_index;
1875   case SYSTEM_VALUE_SUBGROUP_SIZE:
1876      return nir_intrinsic_load_subgroup_size;
1877   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1878      return nir_intrinsic_load_subgroup_invocation;
1879   case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1880      return nir_intrinsic_load_subgroup_eq_mask;
1881   case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1882      return nir_intrinsic_load_subgroup_ge_mask;
1883   case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1884      return nir_intrinsic_load_subgroup_gt_mask;
1885   case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1886      return nir_intrinsic_load_subgroup_le_mask;
1887   case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1888      return nir_intrinsic_load_subgroup_lt_mask;
1889   case SYSTEM_VALUE_NUM_SUBGROUPS:
1890      return nir_intrinsic_load_num_subgroups;
1891   case SYSTEM_VALUE_SUBGROUP_ID:
1892      return nir_intrinsic_load_subgroup_id;
1893   case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1894      return nir_intrinsic_load_local_group_size;
1895   case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
1896      return nir_intrinsic_load_global_invocation_id;
1897   case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
1898      return nir_intrinsic_load_global_invocation_index;
1899   case SYSTEM_VALUE_WORK_DIM:
1900      return nir_intrinsic_load_work_dim;
1901   default:
1902      unreachable("system value does not directly correspond to intrinsic");
1903   }
1904}
1905
1906gl_system_value
1907nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
1908{
1909   switch (intrin) {
1910   case nir_intrinsic_load_vertex_id:
1911      return SYSTEM_VALUE_VERTEX_ID;
1912   case nir_intrinsic_load_instance_id:
1913      return SYSTEM_VALUE_INSTANCE_ID;
1914   case nir_intrinsic_load_draw_id:
1915      return SYSTEM_VALUE_DRAW_ID;
1916   case nir_intrinsic_load_base_instance:
1917      return SYSTEM_VALUE_BASE_INSTANCE;
1918   case nir_intrinsic_load_vertex_id_zero_base:
1919      return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
1920   case nir_intrinsic_load_first_vertex:
1921      return SYSTEM_VALUE_FIRST_VERTEX;
1922   case nir_intrinsic_load_is_indexed_draw:
1923      return SYSTEM_VALUE_IS_INDEXED_DRAW;
1924   case nir_intrinsic_load_base_vertex:
1925      return SYSTEM_VALUE_BASE_VERTEX;
1926   case nir_intrinsic_load_invocation_id:
1927      return SYSTEM_VALUE_INVOCATION_ID;
1928   case nir_intrinsic_load_frag_coord:
1929      return SYSTEM_VALUE_FRAG_COORD;
1930   case nir_intrinsic_load_front_face:
1931      return SYSTEM_VALUE_FRONT_FACE;
1932   case nir_intrinsic_load_sample_id:
1933      return SYSTEM_VALUE_SAMPLE_ID;
1934   case nir_intrinsic_load_sample_pos:
1935      return SYSTEM_VALUE_SAMPLE_POS;
1936   case nir_intrinsic_load_sample_mask_in:
1937      return SYSTEM_VALUE_SAMPLE_MASK_IN;
1938   case nir_intrinsic_load_local_invocation_id:
1939      return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
1940   case nir_intrinsic_load_local_invocation_index:
1941      return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
1942   case nir_intrinsic_load_num_work_groups:
1943      return SYSTEM_VALUE_NUM_WORK_GROUPS;
1944   case nir_intrinsic_load_work_group_id:
1945      return SYSTEM_VALUE_WORK_GROUP_ID;
1946   case nir_intrinsic_load_primitive_id:
1947      return SYSTEM_VALUE_PRIMITIVE_ID;
1948   case nir_intrinsic_load_tess_coord:
1949      return SYSTEM_VALUE_TESS_COORD;
1950   case nir_intrinsic_load_tess_level_outer:
1951      return SYSTEM_VALUE_TESS_LEVEL_OUTER;
1952   case nir_intrinsic_load_tess_level_inner:
1953      return SYSTEM_VALUE_TESS_LEVEL_INNER;
1954   case nir_intrinsic_load_patch_vertices_in:
1955      return SYSTEM_VALUE_VERTICES_IN;
1956   case nir_intrinsic_load_helper_invocation:
1957      return SYSTEM_VALUE_HELPER_INVOCATION;
1958   case nir_intrinsic_load_view_index:
1959      return SYSTEM_VALUE_VIEW_INDEX;
1960   case nir_intrinsic_load_subgroup_size:
1961      return SYSTEM_VALUE_SUBGROUP_SIZE;
1962   case nir_intrinsic_load_subgroup_invocation:
1963      return SYSTEM_VALUE_SUBGROUP_INVOCATION;
1964   case nir_intrinsic_load_subgroup_eq_mask:
1965      return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
1966   case nir_intrinsic_load_subgroup_ge_mask:
1967      return SYSTEM_VALUE_SUBGROUP_GE_MASK;
1968   case nir_intrinsic_load_subgroup_gt_mask:
1969      return SYSTEM_VALUE_SUBGROUP_GT_MASK;
1970   case nir_intrinsic_load_subgroup_le_mask:
1971      return SYSTEM_VALUE_SUBGROUP_LE_MASK;
1972   case nir_intrinsic_load_subgroup_lt_mask:
1973      return SYSTEM_VALUE_SUBGROUP_LT_MASK;
1974   case nir_intrinsic_load_num_subgroups:
1975      return SYSTEM_VALUE_NUM_SUBGROUPS;
1976   case nir_intrinsic_load_subgroup_id:
1977      return SYSTEM_VALUE_SUBGROUP_ID;
1978   case nir_intrinsic_load_local_group_size:
1979      return SYSTEM_VALUE_LOCAL_GROUP_SIZE;
1980   case nir_intrinsic_load_global_invocation_id:
1981      return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
1982   default:
1983      unreachable("intrinsic doesn't produce a system value");
1984   }
1985}
1986
1987/* OpenGL utility method that remaps the location attributes if they are
1988 * doubles. Not needed for vulkan due the differences on the input location
1989 * count for doubles on vulkan vs OpenGL
1990 *
1991 * The bitfield returned in dual_slot is one bit for each double input slot in
1992 * the original OpenGL single-slot input numbering.  The mapping from old
1993 * locations to new locations is as follows:
1994 *
1995 *    new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
1996 */
1997void
1998nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
1999{
2000   assert(shader->info.stage == MESA_SHADER_VERTEX);
2001
2002   *dual_slot = 0;
2003   nir_foreach_variable(var, &shader->inputs) {
2004      if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
2005         unsigned slots = glsl_count_attribute_slots(var->type, true);
2006         *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
2007      }
2008   }
2009
2010   nir_foreach_variable(var, &shader->inputs) {
2011      var->data.location +=
2012         util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
2013   }
2014}
2015
2016/* Returns an attribute mask that has been re-compacted using the given
2017 * dual_slot mask.
2018 */
2019uint64_t
2020nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
2021{
2022   while (dual_slot) {
2023      unsigned loc = u_bit_scan64(&dual_slot);
2024      /* mask of all bits up to and including loc */
2025      uint64_t mask = BITFIELD64_MASK(loc + 1);
2026      attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
2027   }
2028   return attribs;
2029}
2030
2031void
2032nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src,
2033                            bool bindless)
2034{
2035   enum gl_access_qualifier access = nir_intrinsic_access(intrin);
2036
2037   switch (intrin->intrinsic) {
2038#define CASE(op) \
2039   case nir_intrinsic_image_deref_##op: \
2040      intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \
2041                                   : nir_intrinsic_image_##op; \
2042      break;
2043   CASE(load)
2044   CASE(store)
2045   CASE(atomic_add)
2046   CASE(atomic_min)
2047   CASE(atomic_max)
2048   CASE(atomic_and)
2049   CASE(atomic_or)
2050   CASE(atomic_xor)
2051   CASE(atomic_exchange)
2052   CASE(atomic_comp_swap)
2053   CASE(atomic_fadd)
2054   CASE(size)
2055   CASE(samples)
2056   CASE(load_raw_intel)
2057   CASE(store_raw_intel)
2058#undef CASE
2059   default:
2060      unreachable("Unhanded image intrinsic");
2061   }
2062
2063   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2064   nir_variable *var = nir_deref_instr_get_variable(deref);
2065
2066   nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
2067   nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
2068   nir_intrinsic_set_access(intrin, access | var->data.image.access);
2069   nir_intrinsic_set_format(intrin, var->data.image.format);
2070
2071   nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
2072                         nir_src_for_ssa(src));
2073}
2074