nir.c revision 01e04c3f
1/*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28#include "nir.h"
29#include "nir_control_flow_private.h"
30#include "util/half_float.h"
31#include <limits.h>
32#include <assert.h>
33#include <math.h>
34#include "util/u_math.h"
35
36#include "main/menums.h" /* BITFIELD64_MASK */
37
38nir_shader *
39nir_shader_create(void *mem_ctx,
40                  gl_shader_stage stage,
41                  const nir_shader_compiler_options *options,
42                  shader_info *si)
43{
44   nir_shader *shader = rzalloc(mem_ctx, nir_shader);
45
46   exec_list_make_empty(&shader->uniforms);
47   exec_list_make_empty(&shader->inputs);
48   exec_list_make_empty(&shader->outputs);
49   exec_list_make_empty(&shader->shared);
50
51   shader->options = options;
52
53   if (si) {
54      assert(si->stage == stage);
55      shader->info = *si;
56   } else {
57      shader->info.stage = stage;
58   }
59
60   exec_list_make_empty(&shader->functions);
61   exec_list_make_empty(&shader->registers);
62   exec_list_make_empty(&shader->globals);
63   exec_list_make_empty(&shader->system_values);
64   shader->reg_alloc = 0;
65
66   shader->num_inputs = 0;
67   shader->num_outputs = 0;
68   shader->num_uniforms = 0;
69   shader->num_shared = 0;
70
71   return shader;
72}
73
74static nir_register *
75reg_create(void *mem_ctx, struct exec_list *list)
76{
77   nir_register *reg = ralloc(mem_ctx, nir_register);
78
79   list_inithead(&reg->uses);
80   list_inithead(&reg->defs);
81   list_inithead(&reg->if_uses);
82
83   reg->num_components = 0;
84   reg->bit_size = 32;
85   reg->num_array_elems = 0;
86   reg->is_packed = false;
87   reg->name = NULL;
88
89   exec_list_push_tail(list, &reg->node);
90
91   return reg;
92}
93
94nir_register *
95nir_global_reg_create(nir_shader *shader)
96{
97   nir_register *reg = reg_create(shader, &shader->registers);
98   reg->index = shader->reg_alloc++;
99   reg->is_global = true;
100
101   return reg;
102}
103
104nir_register *
105nir_local_reg_create(nir_function_impl *impl)
106{
107   nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
108   reg->index = impl->reg_alloc++;
109   reg->is_global = false;
110
111   return reg;
112}
113
114void
115nir_reg_remove(nir_register *reg)
116{
117   exec_node_remove(&reg->node);
118}
119
120void
121nir_shader_add_variable(nir_shader *shader, nir_variable *var)
122{
123   switch (var->data.mode) {
124   case nir_var_all:
125      assert(!"invalid mode");
126      break;
127
128   case nir_var_local:
129      assert(!"nir_shader_add_variable cannot be used for local variables");
130      break;
131
132   case nir_var_global:
133      exec_list_push_tail(&shader->globals, &var->node);
134      break;
135
136   case nir_var_shader_in:
137      exec_list_push_tail(&shader->inputs, &var->node);
138      break;
139
140   case nir_var_shader_out:
141      exec_list_push_tail(&shader->outputs, &var->node);
142      break;
143
144   case nir_var_uniform:
145   case nir_var_shader_storage:
146      exec_list_push_tail(&shader->uniforms, &var->node);
147      break;
148
149   case nir_var_shared:
150      assert(shader->info.stage == MESA_SHADER_COMPUTE);
151      exec_list_push_tail(&shader->shared, &var->node);
152      break;
153
154   case nir_var_system_value:
155      exec_list_push_tail(&shader->system_values, &var->node);
156      break;
157   }
158}
159
160nir_variable *
161nir_variable_create(nir_shader *shader, nir_variable_mode mode,
162                    const struct glsl_type *type, const char *name)
163{
164   nir_variable *var = rzalloc(shader, nir_variable);
165   var->name = ralloc_strdup(var, name);
166   var->type = type;
167   var->data.mode = mode;
168   var->data.how_declared = nir_var_declared_normally;
169
170   if ((mode == nir_var_shader_in &&
171        shader->info.stage != MESA_SHADER_VERTEX) ||
172       (mode == nir_var_shader_out &&
173        shader->info.stage != MESA_SHADER_FRAGMENT))
174      var->data.interpolation = INTERP_MODE_SMOOTH;
175
176   if (mode == nir_var_shader_in || mode == nir_var_uniform)
177      var->data.read_only = true;
178
179   nir_shader_add_variable(shader, var);
180
181   return var;
182}
183
184nir_variable *
185nir_local_variable_create(nir_function_impl *impl,
186                          const struct glsl_type *type, const char *name)
187{
188   nir_variable *var = rzalloc(impl->function->shader, nir_variable);
189   var->name = ralloc_strdup(var, name);
190   var->type = type;
191   var->data.mode = nir_var_local;
192
193   nir_function_impl_add_variable(impl, var);
194
195   return var;
196}
197
198nir_function *
199nir_function_create(nir_shader *shader, const char *name)
200{
201   nir_function *func = ralloc(shader, nir_function);
202
203   exec_list_push_tail(&shader->functions, &func->node);
204
205   func->name = ralloc_strdup(func, name);
206   func->shader = shader;
207   func->num_params = 0;
208   func->params = NULL;
209   func->impl = NULL;
210
211   return func;
212}
213
214/* NOTE: if the instruction you are copying a src to is already added
215 * to the IR, use nir_instr_rewrite_src() instead.
216 */
217void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
218{
219   dest->is_ssa = src->is_ssa;
220   if (src->is_ssa) {
221      dest->ssa = src->ssa;
222   } else {
223      dest->reg.base_offset = src->reg.base_offset;
224      dest->reg.reg = src->reg.reg;
225      if (src->reg.indirect) {
226         dest->reg.indirect = ralloc(mem_ctx, nir_src);
227         nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
228      } else {
229         dest->reg.indirect = NULL;
230      }
231   }
232}
233
234void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
235{
236   /* Copying an SSA definition makes no sense whatsoever. */
237   assert(!src->is_ssa);
238
239   dest->is_ssa = false;
240
241   dest->reg.base_offset = src->reg.base_offset;
242   dest->reg.reg = src->reg.reg;
243   if (src->reg.indirect) {
244      dest->reg.indirect = ralloc(instr, nir_src);
245      nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
246   } else {
247      dest->reg.indirect = NULL;
248   }
249}
250
251void
252nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
253                 nir_alu_instr *instr)
254{
255   nir_src_copy(&dest->src, &src->src, &instr->instr);
256   dest->abs = src->abs;
257   dest->negate = src->negate;
258   for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
259      dest->swizzle[i] = src->swizzle[i];
260}
261
262void
263nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
264                  nir_alu_instr *instr)
265{
266   nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
267   dest->write_mask = src->write_mask;
268   dest->saturate = src->saturate;
269}
270
271
272static void
273cf_init(nir_cf_node *node, nir_cf_node_type type)
274{
275   exec_node_init(&node->node);
276   node->parent = NULL;
277   node->type = type;
278}
279
280nir_function_impl *
281nir_function_impl_create_bare(nir_shader *shader)
282{
283   nir_function_impl *impl = ralloc(shader, nir_function_impl);
284
285   impl->function = NULL;
286
287   cf_init(&impl->cf_node, nir_cf_node_function);
288
289   exec_list_make_empty(&impl->body);
290   exec_list_make_empty(&impl->registers);
291   exec_list_make_empty(&impl->locals);
292   impl->reg_alloc = 0;
293   impl->ssa_alloc = 0;
294   impl->valid_metadata = nir_metadata_none;
295
296   /* create start & end blocks */
297   nir_block *start_block = nir_block_create(shader);
298   nir_block *end_block = nir_block_create(shader);
299   start_block->cf_node.parent = &impl->cf_node;
300   end_block->cf_node.parent = &impl->cf_node;
301   impl->end_block = end_block;
302
303   exec_list_push_tail(&impl->body, &start_block->cf_node.node);
304
305   start_block->successors[0] = end_block;
306   _mesa_set_add(end_block->predecessors, start_block);
307   return impl;
308}
309
310nir_function_impl *
311nir_function_impl_create(nir_function *function)
312{
313   assert(function->impl == NULL);
314
315   nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
316
317   function->impl = impl;
318   impl->function = function;
319
320   return impl;
321}
322
323nir_block *
324nir_block_create(nir_shader *shader)
325{
326   nir_block *block = rzalloc(shader, nir_block);
327
328   cf_init(&block->cf_node, nir_cf_node_block);
329
330   block->successors[0] = block->successors[1] = NULL;
331   block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
332                                          _mesa_key_pointer_equal);
333   block->imm_dom = NULL;
334   /* XXX maybe it would be worth it to defer allocation?  This
335    * way it doesn't get allocated for shader refs that never run
336    * nir_calc_dominance?  For example, state-tracker creates an
337    * initial IR, clones that, runs appropriate lowering pass, passes
338    * to driver which does common lowering/opt, and then stores ref
339    * which is later used to do state specific lowering and futher
340    * opt.  Do any of the references not need dominance metadata?
341    */
342   block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
343                                          _mesa_key_pointer_equal);
344
345   exec_list_make_empty(&block->instr_list);
346
347   return block;
348}
349
350static inline void
351src_init(nir_src *src)
352{
353   src->is_ssa = false;
354   src->reg.reg = NULL;
355   src->reg.indirect = NULL;
356   src->reg.base_offset = 0;
357}
358
359nir_if *
360nir_if_create(nir_shader *shader)
361{
362   nir_if *if_stmt = ralloc(shader, nir_if);
363
364   cf_init(&if_stmt->cf_node, nir_cf_node_if);
365   src_init(&if_stmt->condition);
366
367   nir_block *then = nir_block_create(shader);
368   exec_list_make_empty(&if_stmt->then_list);
369   exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
370   then->cf_node.parent = &if_stmt->cf_node;
371
372   nir_block *else_stmt = nir_block_create(shader);
373   exec_list_make_empty(&if_stmt->else_list);
374   exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
375   else_stmt->cf_node.parent = &if_stmt->cf_node;
376
377   return if_stmt;
378}
379
380nir_loop *
381nir_loop_create(nir_shader *shader)
382{
383   nir_loop *loop = rzalloc(shader, nir_loop);
384
385   cf_init(&loop->cf_node, nir_cf_node_loop);
386
387   nir_block *body = nir_block_create(shader);
388   exec_list_make_empty(&loop->body);
389   exec_list_push_tail(&loop->body, &body->cf_node.node);
390   body->cf_node.parent = &loop->cf_node;
391
392   body->successors[0] = body;
393   _mesa_set_add(body->predecessors, body);
394
395   return loop;
396}
397
398static void
399instr_init(nir_instr *instr, nir_instr_type type)
400{
401   instr->type = type;
402   instr->block = NULL;
403   exec_node_init(&instr->node);
404}
405
406static void
407dest_init(nir_dest *dest)
408{
409   dest->is_ssa = false;
410   dest->reg.reg = NULL;
411   dest->reg.indirect = NULL;
412   dest->reg.base_offset = 0;
413}
414
415static void
416alu_dest_init(nir_alu_dest *dest)
417{
418   dest_init(&dest->dest);
419   dest->saturate = false;
420   dest->write_mask = 0xf;
421}
422
423static void
424alu_src_init(nir_alu_src *src)
425{
426   src_init(&src->src);
427   src->abs = src->negate = false;
428   for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
429      src->swizzle[i] = i;
430}
431
432nir_alu_instr *
433nir_alu_instr_create(nir_shader *shader, nir_op op)
434{
435   unsigned num_srcs = nir_op_infos[op].num_inputs;
436   /* TODO: don't use rzalloc */
437   nir_alu_instr *instr =
438      rzalloc_size(shader,
439                   sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
440
441   instr_init(&instr->instr, nir_instr_type_alu);
442   instr->op = op;
443   alu_dest_init(&instr->dest);
444   for (unsigned i = 0; i < num_srcs; i++)
445      alu_src_init(&instr->src[i]);
446
447   return instr;
448}
449
450nir_deref_instr *
451nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
452{
453   nir_deref_instr *instr =
454      rzalloc_size(shader, sizeof(nir_deref_instr));
455
456   instr_init(&instr->instr, nir_instr_type_deref);
457
458   instr->deref_type = deref_type;
459   if (deref_type != nir_deref_type_var)
460      src_init(&instr->parent);
461
462   if (deref_type == nir_deref_type_array)
463      src_init(&instr->arr.index);
464
465   dest_init(&instr->dest);
466
467   return instr;
468}
469
470nir_jump_instr *
471nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
472{
473   nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
474   instr_init(&instr->instr, nir_instr_type_jump);
475   instr->type = type;
476   return instr;
477}
478
479nir_load_const_instr *
480nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
481                            unsigned bit_size)
482{
483   nir_load_const_instr *instr = rzalloc(shader, nir_load_const_instr);
484   instr_init(&instr->instr, nir_instr_type_load_const);
485
486   nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
487
488   return instr;
489}
490
491nir_intrinsic_instr *
492nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
493{
494   unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
495   /* TODO: don't use rzalloc */
496   nir_intrinsic_instr *instr =
497      rzalloc_size(shader,
498                  sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
499
500   instr_init(&instr->instr, nir_instr_type_intrinsic);
501   instr->intrinsic = op;
502
503   if (nir_intrinsic_infos[op].has_dest)
504      dest_init(&instr->dest);
505
506   for (unsigned i = 0; i < num_srcs; i++)
507      src_init(&instr->src[i]);
508
509   return instr;
510}
511
512nir_call_instr *
513nir_call_instr_create(nir_shader *shader, nir_function *callee)
514{
515   const unsigned num_params = callee->num_params;
516   nir_call_instr *instr =
517      rzalloc_size(shader, sizeof(*instr) +
518                   num_params * sizeof(instr->params[0]));
519
520   instr_init(&instr->instr, nir_instr_type_call);
521   instr->callee = callee;
522   instr->num_params = num_params;
523   for (unsigned i = 0; i < num_params; i++)
524      src_init(&instr->params[i]);
525
526   return instr;
527}
528
529nir_tex_instr *
530nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
531{
532   nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
533   instr_init(&instr->instr, nir_instr_type_tex);
534
535   dest_init(&instr->dest);
536
537   instr->num_srcs = num_srcs;
538   instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
539   for (unsigned i = 0; i < num_srcs; i++)
540      src_init(&instr->src[i].src);
541
542   instr->texture_index = 0;
543   instr->texture_array_size = 0;
544   instr->sampler_index = 0;
545
546   return instr;
547}
548
549void
550nir_tex_instr_add_src(nir_tex_instr *tex,
551                      nir_tex_src_type src_type,
552                      nir_src src)
553{
554   nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
555                                         tex->num_srcs + 1);
556
557   for (unsigned i = 0; i < tex->num_srcs; i++) {
558      new_srcs[i].src_type = tex->src[i].src_type;
559      nir_instr_move_src(&tex->instr, &new_srcs[i].src,
560                         &tex->src[i].src);
561   }
562
563   ralloc_free(tex->src);
564   tex->src = new_srcs;
565
566   tex->src[tex->num_srcs].src_type = src_type;
567   nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
568   tex->num_srcs++;
569}
570
571void
572nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
573{
574   assert(src_idx < tex->num_srcs);
575
576   /* First rewrite the source to NIR_SRC_INIT */
577   nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
578
579   /* Now, move all of the other sources down */
580   for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
581      tex->src[i-1].src_type = tex->src[i].src_type;
582      nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
583   }
584   tex->num_srcs--;
585}
586
587nir_phi_instr *
588nir_phi_instr_create(nir_shader *shader)
589{
590   nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
591   instr_init(&instr->instr, nir_instr_type_phi);
592
593   dest_init(&instr->dest);
594   exec_list_make_empty(&instr->srcs);
595   return instr;
596}
597
598nir_parallel_copy_instr *
599nir_parallel_copy_instr_create(nir_shader *shader)
600{
601   nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
602   instr_init(&instr->instr, nir_instr_type_parallel_copy);
603
604   exec_list_make_empty(&instr->entries);
605
606   return instr;
607}
608
609nir_ssa_undef_instr *
610nir_ssa_undef_instr_create(nir_shader *shader,
611                           unsigned num_components,
612                           unsigned bit_size)
613{
614   nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
615   instr_init(&instr->instr, nir_instr_type_ssa_undef);
616
617   nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
618
619   return instr;
620}
621
622static nir_const_value
623const_value_float(double d, unsigned bit_size)
624{
625   nir_const_value v;
626   switch (bit_size) {
627   case 16: v.u16[0] = _mesa_float_to_half(d);  break;
628   case 32: v.f32[0] = d;                       break;
629   case 64: v.f64[0] = d;                       break;
630   default:
631      unreachable("Invalid bit size");
632   }
633   return v;
634}
635
636static nir_const_value
637const_value_int(int64_t i, unsigned bit_size)
638{
639   nir_const_value v;
640   switch (bit_size) {
641   case 8:  v.i8[0]  = i;  break;
642   case 16: v.i16[0] = i;  break;
643   case 32: v.i32[0] = i;  break;
644   case 64: v.i64[0] = i;  break;
645   default:
646      unreachable("Invalid bit size");
647   }
648   return v;
649}
650
651nir_const_value
652nir_alu_binop_identity(nir_op binop, unsigned bit_size)
653{
654   const int64_t max_int = (1ull << (bit_size - 1)) - 1;
655   const int64_t min_int = -max_int - 1;
656   switch (binop) {
657   case nir_op_iadd:
658      return const_value_int(0, bit_size);
659   case nir_op_fadd:
660      return const_value_float(0, bit_size);
661   case nir_op_imul:
662      return const_value_int(1, bit_size);
663   case nir_op_fmul:
664      return const_value_float(1, bit_size);
665   case nir_op_imin:
666      return const_value_int(max_int, bit_size);
667   case nir_op_umin:
668      return const_value_int(~0ull, bit_size);
669   case nir_op_fmin:
670      return const_value_float(INFINITY, bit_size);
671   case nir_op_imax:
672      return const_value_int(min_int, bit_size);
673   case nir_op_umax:
674      return const_value_int(0, bit_size);
675   case nir_op_fmax:
676      return const_value_float(-INFINITY, bit_size);
677   case nir_op_iand:
678      return const_value_int(~0ull, bit_size);
679   case nir_op_ior:
680      return const_value_int(0, bit_size);
681   case nir_op_ixor:
682      return const_value_int(0, bit_size);
683   default:
684      unreachable("Invalid reduction operation");
685   }
686}
687
688nir_function_impl *
689nir_cf_node_get_function(nir_cf_node *node)
690{
691   while (node->type != nir_cf_node_function) {
692      node = node->parent;
693   }
694
695   return nir_cf_node_as_function(node);
696}
697
698/* Reduces a cursor by trying to convert everything to after and trying to
699 * go up to block granularity when possible.
700 */
701static nir_cursor
702reduce_cursor(nir_cursor cursor)
703{
704   switch (cursor.option) {
705   case nir_cursor_before_block:
706      assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL ||
707             nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block);
708      if (exec_list_is_empty(&cursor.block->instr_list)) {
709         /* Empty block.  After is as good as before. */
710         cursor.option = nir_cursor_after_block;
711      }
712      return cursor;
713
714   case nir_cursor_after_block:
715      return cursor;
716
717   case nir_cursor_before_instr: {
718      nir_instr *prev_instr = nir_instr_prev(cursor.instr);
719      if (prev_instr) {
720         /* Before this instruction is after the previous */
721         cursor.instr = prev_instr;
722         cursor.option = nir_cursor_after_instr;
723      } else {
724         /* No previous instruction.  Switch to before block */
725         cursor.block = cursor.instr->block;
726         cursor.option = nir_cursor_before_block;
727      }
728      return reduce_cursor(cursor);
729   }
730
731   case nir_cursor_after_instr:
732      if (nir_instr_next(cursor.instr) == NULL) {
733         /* This is the last instruction, switch to after block */
734         cursor.option = nir_cursor_after_block;
735         cursor.block = cursor.instr->block;
736      }
737      return cursor;
738
739   default:
740      unreachable("Inavlid cursor option");
741   }
742}
743
744bool
745nir_cursors_equal(nir_cursor a, nir_cursor b)
746{
747   /* Reduced cursors should be unique */
748   a = reduce_cursor(a);
749   b = reduce_cursor(b);
750
751   return a.block == b.block && a.option == b.option;
752}
753
754static bool
755add_use_cb(nir_src *src, void *state)
756{
757   nir_instr *instr = state;
758
759   src->parent_instr = instr;
760   list_addtail(&src->use_link,
761                src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
762
763   return true;
764}
765
766static bool
767add_ssa_def_cb(nir_ssa_def *def, void *state)
768{
769   nir_instr *instr = state;
770
771   if (instr->block && def->index == UINT_MAX) {
772      nir_function_impl *impl =
773         nir_cf_node_get_function(&instr->block->cf_node);
774
775      def->index = impl->ssa_alloc++;
776   }
777
778   return true;
779}
780
781static bool
782add_reg_def_cb(nir_dest *dest, void *state)
783{
784   nir_instr *instr = state;
785
786   if (!dest->is_ssa) {
787      dest->reg.parent_instr = instr;
788      list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
789   }
790
791   return true;
792}
793
794static void
795add_defs_uses(nir_instr *instr)
796{
797   nir_foreach_src(instr, add_use_cb, instr);
798   nir_foreach_dest(instr, add_reg_def_cb, instr);
799   nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
800}
801
802void
803nir_instr_insert(nir_cursor cursor, nir_instr *instr)
804{
805   switch (cursor.option) {
806   case nir_cursor_before_block:
807      /* Only allow inserting jumps into empty blocks. */
808      if (instr->type == nir_instr_type_jump)
809         assert(exec_list_is_empty(&cursor.block->instr_list));
810
811      instr->block = cursor.block;
812      add_defs_uses(instr);
813      exec_list_push_head(&cursor.block->instr_list, &instr->node);
814      break;
815   case nir_cursor_after_block: {
816      /* Inserting instructions after a jump is illegal. */
817      nir_instr *last = nir_block_last_instr(cursor.block);
818      assert(last == NULL || last->type != nir_instr_type_jump);
819      (void) last;
820
821      instr->block = cursor.block;
822      add_defs_uses(instr);
823      exec_list_push_tail(&cursor.block->instr_list, &instr->node);
824      break;
825   }
826   case nir_cursor_before_instr:
827      assert(instr->type != nir_instr_type_jump);
828      instr->block = cursor.instr->block;
829      add_defs_uses(instr);
830      exec_node_insert_node_before(&cursor.instr->node, &instr->node);
831      break;
832   case nir_cursor_after_instr:
833      /* Inserting instructions after a jump is illegal. */
834      assert(cursor.instr->type != nir_instr_type_jump);
835
836      /* Only allow inserting jumps at the end of the block. */
837      if (instr->type == nir_instr_type_jump)
838         assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
839
840      instr->block = cursor.instr->block;
841      add_defs_uses(instr);
842      exec_node_insert_after(&cursor.instr->node, &instr->node);
843      break;
844   }
845
846   if (instr->type == nir_instr_type_jump)
847      nir_handle_add_jump(instr->block);
848}
849
850static bool
851src_is_valid(const nir_src *src)
852{
853   return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
854}
855
856static bool
857remove_use_cb(nir_src *src, void *state)
858{
859   (void) state;
860
861   if (src_is_valid(src))
862      list_del(&src->use_link);
863
864   return true;
865}
866
867static bool
868remove_def_cb(nir_dest *dest, void *state)
869{
870   (void) state;
871
872   if (!dest->is_ssa)
873      list_del(&dest->reg.def_link);
874
875   return true;
876}
877
878static void
879remove_defs_uses(nir_instr *instr)
880{
881   nir_foreach_dest(instr, remove_def_cb, instr);
882   nir_foreach_src(instr, remove_use_cb, instr);
883}
884
885void nir_instr_remove_v(nir_instr *instr)
886{
887   remove_defs_uses(instr);
888   exec_node_remove(&instr->node);
889
890   if (instr->type == nir_instr_type_jump) {
891      nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
892      nir_handle_remove_jump(instr->block, jump_instr->type);
893   }
894}
895
896/*@}*/
897
898void
899nir_index_local_regs(nir_function_impl *impl)
900{
901   unsigned index = 0;
902   foreach_list_typed(nir_register, reg, node, &impl->registers) {
903      reg->index = index++;
904   }
905   impl->reg_alloc = index;
906}
907
908void
909nir_index_global_regs(nir_shader *shader)
910{
911   unsigned index = 0;
912   foreach_list_typed(nir_register, reg, node, &shader->registers) {
913      reg->index = index++;
914   }
915   shader->reg_alloc = index;
916}
917
918static bool
919visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
920{
921   return cb(&instr->dest.dest, state);
922}
923
924static bool
925visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state)
926{
927   return cb(&instr->dest, state);
928}
929
930static bool
931visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
932                     void *state)
933{
934   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
935      return cb(&instr->dest, state);
936
937   return true;
938}
939
940static bool
941visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb,
942                   void *state)
943{
944   return cb(&instr->dest, state);
945}
946
947static bool
948visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state)
949{
950   return cb(&instr->dest, state);
951}
952
953static bool
954visit_parallel_copy_dest(nir_parallel_copy_instr *instr,
955                         nir_foreach_dest_cb cb, void *state)
956{
957   nir_foreach_parallel_copy_entry(entry, instr) {
958      if (!cb(&entry->dest, state))
959         return false;
960   }
961
962   return true;
963}
964
965bool
966nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
967{
968   switch (instr->type) {
969   case nir_instr_type_alu:
970      return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
971   case nir_instr_type_deref:
972      return visit_deref_dest(nir_instr_as_deref(instr), cb, state);
973   case nir_instr_type_intrinsic:
974      return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
975   case nir_instr_type_tex:
976      return visit_texture_dest(nir_instr_as_tex(instr), cb, state);
977   case nir_instr_type_phi:
978      return visit_phi_dest(nir_instr_as_phi(instr), cb, state);
979   case nir_instr_type_parallel_copy:
980      return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr),
981                                      cb, state);
982
983   case nir_instr_type_load_const:
984   case nir_instr_type_ssa_undef:
985   case nir_instr_type_call:
986   case nir_instr_type_jump:
987      break;
988
989   default:
990      unreachable("Invalid instruction type");
991      break;
992   }
993
994   return true;
995}
996
997struct foreach_ssa_def_state {
998   nir_foreach_ssa_def_cb cb;
999   void *client_state;
1000};
1001
1002static inline bool
1003nir_ssa_def_visitor(nir_dest *dest, void *void_state)
1004{
1005   struct foreach_ssa_def_state *state = void_state;
1006
1007   if (dest->is_ssa)
1008      return state->cb(&dest->ssa, state->client_state);
1009   else
1010      return true;
1011}
1012
1013bool
1014nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
1015{
1016   switch (instr->type) {
1017   case nir_instr_type_alu:
1018   case nir_instr_type_deref:
1019   case nir_instr_type_tex:
1020   case nir_instr_type_intrinsic:
1021   case nir_instr_type_phi:
1022   case nir_instr_type_parallel_copy: {
1023      struct foreach_ssa_def_state foreach_state = {cb, state};
1024      return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
1025   }
1026
1027   case nir_instr_type_load_const:
1028      return cb(&nir_instr_as_load_const(instr)->def, state);
1029   case nir_instr_type_ssa_undef:
1030      return cb(&nir_instr_as_ssa_undef(instr)->def, state);
1031   case nir_instr_type_call:
1032   case nir_instr_type_jump:
1033      return true;
1034   default:
1035      unreachable("Invalid instruction type");
1036   }
1037}
1038
1039static bool
1040visit_src(nir_src *src, nir_foreach_src_cb cb, void *state)
1041{
1042   if (!cb(src, state))
1043      return false;
1044   if (!src->is_ssa && src->reg.indirect)
1045      return cb(src->reg.indirect, state);
1046   return true;
1047}
1048
1049static bool
1050visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
1051{
1052   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1053      if (!visit_src(&instr->src[i].src, cb, state))
1054         return false;
1055
1056   return true;
1057}
1058
1059static bool
1060visit_deref_instr_src(nir_deref_instr *instr,
1061                      nir_foreach_src_cb cb, void *state)
1062{
1063   if (instr->deref_type != nir_deref_type_var) {
1064      if (!visit_src(&instr->parent, cb, state))
1065         return false;
1066   }
1067
1068   if (instr->deref_type == nir_deref_type_array) {
1069      if (!visit_src(&instr->arr.index, cb, state))
1070         return false;
1071   }
1072
1073   return true;
1074}
1075
1076static bool
1077visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
1078{
1079   for (unsigned i = 0; i < instr->num_srcs; i++) {
1080      if (!visit_src(&instr->src[i].src, cb, state))
1081         return false;
1082   }
1083
1084   return true;
1085}
1086
1087static bool
1088visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
1089                    void *state)
1090{
1091   unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
1092   for (unsigned i = 0; i < num_srcs; i++) {
1093      if (!visit_src(&instr->src[i], cb, state))
1094         return false;
1095   }
1096
1097   return true;
1098}
1099
1100static bool
1101visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
1102{
1103   for (unsigned i = 0; i < instr->num_params; i++) {
1104      if (!visit_src(&instr->params[i], cb, state))
1105         return false;
1106   }
1107
1108   return true;
1109}
1110
1111static bool
1112visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
1113{
1114   nir_foreach_phi_src(src, instr) {
1115      if (!visit_src(&src->src, cb, state))
1116         return false;
1117   }
1118
1119   return true;
1120}
1121
1122static bool
1123visit_parallel_copy_src(nir_parallel_copy_instr *instr,
1124                        nir_foreach_src_cb cb, void *state)
1125{
1126   nir_foreach_parallel_copy_entry(entry, instr) {
1127      if (!visit_src(&entry->src, cb, state))
1128         return false;
1129   }
1130
1131   return true;
1132}
1133
1134typedef struct {
1135   void *state;
1136   nir_foreach_src_cb cb;
1137} visit_dest_indirect_state;
1138
1139static bool
1140visit_dest_indirect(nir_dest *dest, void *_state)
1141{
1142   visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state;
1143
1144   if (!dest->is_ssa && dest->reg.indirect)
1145      return state->cb(dest->reg.indirect, state->state);
1146
1147   return true;
1148}
1149
1150bool
1151nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
1152{
1153   switch (instr->type) {
1154   case nir_instr_type_alu:
1155      if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
1156         return false;
1157      break;
1158   case nir_instr_type_deref:
1159      if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state))
1160         return false;
1161      break;
1162   case nir_instr_type_intrinsic:
1163      if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
1164         return false;
1165      break;
1166   case nir_instr_type_tex:
1167      if (!visit_tex_src(nir_instr_as_tex(instr), cb, state))
1168         return false;
1169      break;
1170   case nir_instr_type_call:
1171      if (!visit_call_src(nir_instr_as_call(instr), cb, state))
1172         return false;
1173      break;
1174   case nir_instr_type_load_const:
1175      /* Constant load instructions have no regular sources */
1176      break;
1177   case nir_instr_type_phi:
1178      if (!visit_phi_src(nir_instr_as_phi(instr), cb, state))
1179         return false;
1180      break;
1181   case nir_instr_type_parallel_copy:
1182      if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr),
1183                                   cb, state))
1184         return false;
1185      break;
1186   case nir_instr_type_jump:
1187   case nir_instr_type_ssa_undef:
1188      return true;
1189
1190   default:
1191      unreachable("Invalid instruction type");
1192      break;
1193   }
1194
1195   visit_dest_indirect_state dest_state;
1196   dest_state.state = state;
1197   dest_state.cb = cb;
1198   return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
1199}
1200
1201int64_t
1202nir_src_comp_as_int(nir_src src, unsigned comp)
1203{
1204   assert(nir_src_is_const(src));
1205   nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1206
1207   assert(comp < load->def.num_components);
1208   switch (load->def.bit_size) {
1209   case 8:  return load->value.i8[comp];
1210   case 16: return load->value.i16[comp];
1211   case 32: return load->value.i32[comp];
1212   case 64: return load->value.i64[comp];
1213   default:
1214      unreachable("Invalid bit size");
1215   }
1216}
1217
1218uint64_t
1219nir_src_comp_as_uint(nir_src src, unsigned comp)
1220{
1221   assert(nir_src_is_const(src));
1222   nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1223
1224   assert(comp < load->def.num_components);
1225   switch (load->def.bit_size) {
1226   case 8:  return load->value.u8[comp];
1227   case 16: return load->value.u16[comp];
1228   case 32: return load->value.u32[comp];
1229   case 64: return load->value.u64[comp];
1230   default:
1231      unreachable("Invalid bit size");
1232   }
1233}
1234
1235bool
1236nir_src_comp_as_bool(nir_src src, unsigned comp)
1237{
1238   assert(nir_src_is_const(src));
1239   nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1240
1241   assert(comp < load->def.num_components);
1242   assert(load->def.bit_size == 32);
1243   assert(load->value.u32[comp] == NIR_TRUE ||
1244          load->value.u32[comp] == NIR_FALSE);
1245
1246   return load->value.u32[comp];
1247}
1248
1249double
1250nir_src_comp_as_float(nir_src src, unsigned comp)
1251{
1252   assert(nir_src_is_const(src));
1253   nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1254
1255   assert(comp < load->def.num_components);
1256   switch (load->def.bit_size) {
1257   case 16: return _mesa_half_to_float(load->value.u16[comp]);
1258   case 32: return load->value.f32[comp];
1259   case 64: return load->value.f64[comp];
1260   default:
1261      unreachable("Invalid bit size");
1262   }
1263}
1264
1265int64_t
1266nir_src_as_int(nir_src src)
1267{
1268   assert(nir_src_num_components(src) == 1);
1269   return nir_src_comp_as_int(src, 0);
1270}
1271
1272uint64_t
1273nir_src_as_uint(nir_src src)
1274{
1275   assert(nir_src_num_components(src) == 1);
1276   return nir_src_comp_as_uint(src, 0);
1277}
1278
1279bool
1280nir_src_as_bool(nir_src src)
1281{
1282   assert(nir_src_num_components(src) == 1);
1283   return nir_src_comp_as_bool(src, 0);
1284}
1285
1286double
1287nir_src_as_float(nir_src src)
1288{
1289   assert(nir_src_num_components(src) == 1);
1290   return nir_src_comp_as_float(src, 0);
1291}
1292
1293nir_const_value *
1294nir_src_as_const_value(nir_src src)
1295{
1296   if (!src.is_ssa)
1297      return NULL;
1298
1299   if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1300      return NULL;
1301
1302   nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1303
1304   return &load->value;
1305}
1306
1307/**
1308 * Returns true if the source is known to be dynamically uniform. Otherwise it
1309 * returns false which means it may or may not be dynamically uniform but it
1310 * can't be determined.
1311 */
1312bool
1313nir_src_is_dynamically_uniform(nir_src src)
1314{
1315   if (!src.is_ssa)
1316      return false;
1317
1318   /* Constants are trivially dynamically uniform */
1319   if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1320      return true;
1321
1322   /* As are uniform variables */
1323   if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1324      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1325
1326      if (intr->intrinsic == nir_intrinsic_load_uniform)
1327         return true;
1328   }
1329
1330   /* XXX: this could have many more tests, such as when a sampler function is
1331    * called with dynamically uniform arguments.
1332    */
1333   return false;
1334}
1335
1336static void
1337src_remove_all_uses(nir_src *src)
1338{
1339   for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1340      if (!src_is_valid(src))
1341         continue;
1342
1343      list_del(&src->use_link);
1344   }
1345}
1346
1347static void
1348src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1349{
1350   for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1351      if (!src_is_valid(src))
1352         continue;
1353
1354      if (parent_instr) {
1355         src->parent_instr = parent_instr;
1356         if (src->is_ssa)
1357            list_addtail(&src->use_link, &src->ssa->uses);
1358         else
1359            list_addtail(&src->use_link, &src->reg.reg->uses);
1360      } else {
1361         assert(parent_if);
1362         src->parent_if = parent_if;
1363         if (src->is_ssa)
1364            list_addtail(&src->use_link, &src->ssa->if_uses);
1365         else
1366            list_addtail(&src->use_link, &src->reg.reg->if_uses);
1367      }
1368   }
1369}
1370
1371void
1372nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
1373{
1374   assert(!src_is_valid(src) || src->parent_instr == instr);
1375
1376   src_remove_all_uses(src);
1377   *src = new_src;
1378   src_add_all_uses(src, instr, NULL);
1379}
1380
1381void
1382nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1383{
1384   assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
1385
1386   src_remove_all_uses(dest);
1387   src_remove_all_uses(src);
1388   *dest = *src;
1389   *src = NIR_SRC_INIT;
1390   src_add_all_uses(dest, dest_instr, NULL);
1391}
1392
1393void
1394nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
1395{
1396   nir_src *src = &if_stmt->condition;
1397   assert(!src_is_valid(src) || src->parent_if == if_stmt);
1398
1399   src_remove_all_uses(src);
1400   *src = new_src;
1401   src_add_all_uses(src, NULL, if_stmt);
1402}
1403
1404void
1405nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
1406{
1407   if (dest->is_ssa) {
1408      /* We can only overwrite an SSA destination if it has no uses. */
1409      assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
1410   } else {
1411      list_del(&dest->reg.def_link);
1412      if (dest->reg.indirect)
1413         src_remove_all_uses(dest->reg.indirect);
1414   }
1415
1416   /* We can't re-write with an SSA def */
1417   assert(!new_dest.is_ssa);
1418
1419   nir_dest_copy(dest, &new_dest, instr);
1420
1421   dest->reg.parent_instr = instr;
1422   list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
1423
1424   if (dest->reg.indirect)
1425      src_add_all_uses(dest->reg.indirect, instr, NULL);
1426}
1427
1428/* note: does *not* take ownership of 'name' */
1429void
1430nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
1431                 unsigned num_components,
1432                 unsigned bit_size, const char *name)
1433{
1434   def->name = ralloc_strdup(instr, name);
1435   def->parent_instr = instr;
1436   list_inithead(&def->uses);
1437   list_inithead(&def->if_uses);
1438   def->num_components = num_components;
1439   def->bit_size = bit_size;
1440
1441   if (instr->block) {
1442      nir_function_impl *impl =
1443         nir_cf_node_get_function(&instr->block->cf_node);
1444
1445      def->index = impl->ssa_alloc++;
1446   } else {
1447      def->index = UINT_MAX;
1448   }
1449}
1450
1451/* note: does *not* take ownership of 'name' */
1452void
1453nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
1454                 unsigned num_components, unsigned bit_size,
1455                 const char *name)
1456{
1457   dest->is_ssa = true;
1458   nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name);
1459}
1460
1461void
1462nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
1463{
1464   assert(!new_src.is_ssa || def != new_src.ssa);
1465
1466   nir_foreach_use_safe(use_src, def)
1467      nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1468
1469   nir_foreach_if_use_safe(use_src, def)
1470      nir_if_rewrite_condition(use_src->parent_if, new_src);
1471}
1472
1473static bool
1474is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1475{
1476   assert(start->block == end->block);
1477
1478   if (between->block != start->block)
1479      return false;
1480
1481   /* Search backwards looking for "between" */
1482   while (start != end) {
1483      if (between == end)
1484         return true;
1485
1486      end = nir_instr_prev(end);
1487      assert(end);
1488   }
1489
1490   return false;
1491}
1492
1493/* Replaces all uses of the given SSA def with the given source but only if
1494 * the use comes after the after_me instruction.  This can be useful if you
1495 * are emitting code to fix up the result of some instruction: you can freely
1496 * use the result in that code and then call rewrite_uses_after and pass the
1497 * last fixup instruction as after_me and it will replace all of the uses you
1498 * want without touching the fixup code.
1499 *
1500 * This function assumes that after_me is in the same block as
1501 * def->parent_instr and that after_me comes after def->parent_instr.
1502 */
1503void
1504nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
1505                               nir_instr *after_me)
1506{
1507   assert(!new_src.is_ssa || def != new_src.ssa);
1508
1509   nir_foreach_use_safe(use_src, def) {
1510      assert(use_src->parent_instr != def->parent_instr);
1511      /* Since def already dominates all of its uses, the only way a use can
1512       * not be dominated by after_me is if it is between def and after_me in
1513       * the instruction list.
1514       */
1515      if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
1516         nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1517   }
1518
1519   nir_foreach_if_use_safe(use_src, def)
1520      nir_if_rewrite_condition(use_src->parent_if, new_src);
1521}
1522
1523nir_component_mask_t
1524nir_ssa_def_components_read(const nir_ssa_def *def)
1525{
1526   nir_component_mask_t read_mask = 0;
1527   nir_foreach_use(use, def) {
1528      if (use->parent_instr->type == nir_instr_type_alu) {
1529         nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
1530         nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src);
1531         int src_idx = alu_src - &alu->src[0];
1532         assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1533
1534         for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
1535            if (!nir_alu_instr_channel_used(alu, src_idx, c))
1536               continue;
1537
1538            read_mask |= (1 << alu_src->swizzle[c]);
1539         }
1540      } else {
1541         return (1 << def->num_components) - 1;
1542      }
1543   }
1544
1545   if (!list_empty(&def->if_uses))
1546      read_mask |= 1;
1547
1548   return read_mask;
1549}
1550
1551nir_block *
1552nir_block_cf_tree_next(nir_block *block)
1553{
1554   if (block == NULL) {
1555      /* nir_foreach_block_safe() will call this function on a NULL block
1556       * after the last iteration, but it won't use the result so just return
1557       * NULL here.
1558       */
1559      return NULL;
1560   }
1561
1562   nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1563   if (cf_next)
1564      return nir_cf_node_cf_tree_first(cf_next);
1565
1566   nir_cf_node *parent = block->cf_node.parent;
1567
1568   switch (parent->type) {
1569   case nir_cf_node_if: {
1570      /* Are we at the end of the if? Go to the beginning of the else */
1571      nir_if *if_stmt = nir_cf_node_as_if(parent);
1572      if (block == nir_if_last_then_block(if_stmt))
1573         return nir_if_first_else_block(if_stmt);
1574
1575      assert(block == nir_if_last_else_block(if_stmt));
1576      /* fall through */
1577   }
1578
1579   case nir_cf_node_loop:
1580      return nir_cf_node_as_block(nir_cf_node_next(parent));
1581
1582   case nir_cf_node_function:
1583      return NULL;
1584
1585   default:
1586      unreachable("unknown cf node type");
1587   }
1588}
1589
1590nir_block *
1591nir_block_cf_tree_prev(nir_block *block)
1592{
1593   if (block == NULL) {
1594      /* do this for consistency with nir_block_cf_tree_next() */
1595      return NULL;
1596   }
1597
1598   nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1599   if (cf_prev)
1600      return nir_cf_node_cf_tree_last(cf_prev);
1601
1602   nir_cf_node *parent = block->cf_node.parent;
1603
1604   switch (parent->type) {
1605   case nir_cf_node_if: {
1606      /* Are we at the beginning of the else? Go to the end of the if */
1607      nir_if *if_stmt = nir_cf_node_as_if(parent);
1608      if (block == nir_if_first_else_block(if_stmt))
1609         return nir_if_last_then_block(if_stmt);
1610
1611      assert(block == nir_if_first_then_block(if_stmt));
1612      /* fall through */
1613   }
1614
1615   case nir_cf_node_loop:
1616      return nir_cf_node_as_block(nir_cf_node_prev(parent));
1617
1618   case nir_cf_node_function:
1619      return NULL;
1620
1621   default:
1622      unreachable("unknown cf node type");
1623   }
1624}
1625
1626nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node)
1627{
1628   switch (node->type) {
1629   case nir_cf_node_function: {
1630      nir_function_impl *impl = nir_cf_node_as_function(node);
1631      return nir_start_block(impl);
1632   }
1633
1634   case nir_cf_node_if: {
1635      nir_if *if_stmt = nir_cf_node_as_if(node);
1636      return nir_if_first_then_block(if_stmt);
1637   }
1638
1639   case nir_cf_node_loop: {
1640      nir_loop *loop = nir_cf_node_as_loop(node);
1641      return nir_loop_first_block(loop);
1642   }
1643
1644   case nir_cf_node_block: {
1645      return nir_cf_node_as_block(node);
1646   }
1647
1648   default:
1649      unreachable("unknown node type");
1650   }
1651}
1652
1653nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node)
1654{
1655   switch (node->type) {
1656   case nir_cf_node_function: {
1657      nir_function_impl *impl = nir_cf_node_as_function(node);
1658      return nir_impl_last_block(impl);
1659   }
1660
1661   case nir_cf_node_if: {
1662      nir_if *if_stmt = nir_cf_node_as_if(node);
1663      return nir_if_last_else_block(if_stmt);
1664   }
1665
1666   case nir_cf_node_loop: {
1667      nir_loop *loop = nir_cf_node_as_loop(node);
1668      return nir_loop_last_block(loop);
1669   }
1670
1671   case nir_cf_node_block: {
1672      return nir_cf_node_as_block(node);
1673   }
1674
1675   default:
1676      unreachable("unknown node type");
1677   }
1678}
1679
1680nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node)
1681{
1682   if (node->type == nir_cf_node_block)
1683      return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1684   else if (node->type == nir_cf_node_function)
1685      return NULL;
1686   else
1687      return nir_cf_node_as_block(nir_cf_node_next(node));
1688}
1689
1690nir_if *
1691nir_block_get_following_if(nir_block *block)
1692{
1693   if (exec_node_is_tail_sentinel(&block->cf_node.node))
1694      return NULL;
1695
1696   if (nir_cf_node_is_last(&block->cf_node))
1697      return NULL;
1698
1699   nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1700
1701   if (next_node->type != nir_cf_node_if)
1702      return NULL;
1703
1704   return nir_cf_node_as_if(next_node);
1705}
1706
1707nir_loop *
1708nir_block_get_following_loop(nir_block *block)
1709{
1710   if (exec_node_is_tail_sentinel(&block->cf_node.node))
1711      return NULL;
1712
1713   if (nir_cf_node_is_last(&block->cf_node))
1714      return NULL;
1715
1716   nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1717
1718   if (next_node->type != nir_cf_node_loop)
1719      return NULL;
1720
1721   return nir_cf_node_as_loop(next_node);
1722}
1723
1724void
1725nir_index_blocks(nir_function_impl *impl)
1726{
1727   unsigned index = 0;
1728
1729   if (impl->valid_metadata & nir_metadata_block_index)
1730      return;
1731
1732   nir_foreach_block(block, impl) {
1733      block->index = index++;
1734   }
1735
1736   /* The end_block isn't really part of the program, which is why its index
1737    * is >= num_blocks.
1738    */
1739   impl->num_blocks = impl->end_block->index = index;
1740}
1741
1742static bool
1743index_ssa_def_cb(nir_ssa_def *def, void *state)
1744{
1745   unsigned *index = (unsigned *) state;
1746   def->index = (*index)++;
1747
1748   return true;
1749}
1750
1751/**
1752 * The indices are applied top-to-bottom which has the very nice property
1753 * that, if A dominates B, then A->index <= B->index.
1754 */
1755void
1756nir_index_ssa_defs(nir_function_impl *impl)
1757{
1758   unsigned index = 0;
1759
1760   nir_foreach_block(block, impl) {
1761      nir_foreach_instr(instr, block)
1762         nir_foreach_ssa_def(instr, index_ssa_def_cb, &index);
1763   }
1764
1765   impl->ssa_alloc = index;
1766}
1767
1768/**
1769 * The indices are applied top-to-bottom which has the very nice property
1770 * that, if A dominates B, then A->index <= B->index.
1771 */
1772unsigned
1773nir_index_instrs(nir_function_impl *impl)
1774{
1775   unsigned index = 0;
1776
1777   nir_foreach_block(block, impl) {
1778      nir_foreach_instr(instr, block)
1779         instr->index = index++;
1780   }
1781
1782   return index;
1783}
1784
1785nir_intrinsic_op
1786nir_intrinsic_from_system_value(gl_system_value val)
1787{
1788   switch (val) {
1789   case SYSTEM_VALUE_VERTEX_ID:
1790      return nir_intrinsic_load_vertex_id;
1791   case SYSTEM_VALUE_INSTANCE_ID:
1792      return nir_intrinsic_load_instance_id;
1793   case SYSTEM_VALUE_DRAW_ID:
1794      return nir_intrinsic_load_draw_id;
1795   case SYSTEM_VALUE_BASE_INSTANCE:
1796      return nir_intrinsic_load_base_instance;
1797   case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1798      return nir_intrinsic_load_vertex_id_zero_base;
1799   case SYSTEM_VALUE_IS_INDEXED_DRAW:
1800      return nir_intrinsic_load_is_indexed_draw;
1801   case SYSTEM_VALUE_FIRST_VERTEX:
1802      return nir_intrinsic_load_first_vertex;
1803   case SYSTEM_VALUE_BASE_VERTEX:
1804      return nir_intrinsic_load_base_vertex;
1805   case SYSTEM_VALUE_INVOCATION_ID:
1806      return nir_intrinsic_load_invocation_id;
1807   case SYSTEM_VALUE_FRAG_COORD:
1808      return nir_intrinsic_load_frag_coord;
1809   case SYSTEM_VALUE_FRONT_FACE:
1810      return nir_intrinsic_load_front_face;
1811   case SYSTEM_VALUE_SAMPLE_ID:
1812      return nir_intrinsic_load_sample_id;
1813   case SYSTEM_VALUE_SAMPLE_POS:
1814      return nir_intrinsic_load_sample_pos;
1815   case SYSTEM_VALUE_SAMPLE_MASK_IN:
1816      return nir_intrinsic_load_sample_mask_in;
1817   case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1818      return nir_intrinsic_load_local_invocation_id;
1819   case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
1820      return nir_intrinsic_load_local_invocation_index;
1821   case SYSTEM_VALUE_WORK_GROUP_ID:
1822      return nir_intrinsic_load_work_group_id;
1823   case SYSTEM_VALUE_NUM_WORK_GROUPS:
1824      return nir_intrinsic_load_num_work_groups;
1825   case SYSTEM_VALUE_PRIMITIVE_ID:
1826      return nir_intrinsic_load_primitive_id;
1827   case SYSTEM_VALUE_TESS_COORD:
1828      return nir_intrinsic_load_tess_coord;
1829   case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1830      return nir_intrinsic_load_tess_level_outer;
1831   case SYSTEM_VALUE_TESS_LEVEL_INNER:
1832      return nir_intrinsic_load_tess_level_inner;
1833   case SYSTEM_VALUE_VERTICES_IN:
1834      return nir_intrinsic_load_patch_vertices_in;
1835   case SYSTEM_VALUE_HELPER_INVOCATION:
1836      return nir_intrinsic_load_helper_invocation;
1837   case SYSTEM_VALUE_VIEW_INDEX:
1838      return nir_intrinsic_load_view_index;
1839   case SYSTEM_VALUE_SUBGROUP_SIZE:
1840      return nir_intrinsic_load_subgroup_size;
1841   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1842      return nir_intrinsic_load_subgroup_invocation;
1843   case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1844      return nir_intrinsic_load_subgroup_eq_mask;
1845   case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1846      return nir_intrinsic_load_subgroup_ge_mask;
1847   case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1848      return nir_intrinsic_load_subgroup_gt_mask;
1849   case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1850      return nir_intrinsic_load_subgroup_le_mask;
1851   case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1852      return nir_intrinsic_load_subgroup_lt_mask;
1853   case SYSTEM_VALUE_NUM_SUBGROUPS:
1854      return nir_intrinsic_load_num_subgroups;
1855   case SYSTEM_VALUE_SUBGROUP_ID:
1856      return nir_intrinsic_load_subgroup_id;
1857   case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1858      return nir_intrinsic_load_local_group_size;
1859   case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
1860      return nir_intrinsic_load_global_invocation_id;
1861   case SYSTEM_VALUE_WORK_DIM:
1862      return nir_intrinsic_load_work_dim;
1863   default:
1864      unreachable("system value does not directly correspond to intrinsic");
1865   }
1866}
1867
1868gl_system_value
1869nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
1870{
1871   switch (intrin) {
1872   case nir_intrinsic_load_vertex_id:
1873      return SYSTEM_VALUE_VERTEX_ID;
1874   case nir_intrinsic_load_instance_id:
1875      return SYSTEM_VALUE_INSTANCE_ID;
1876   case nir_intrinsic_load_draw_id:
1877      return SYSTEM_VALUE_DRAW_ID;
1878   case nir_intrinsic_load_base_instance:
1879      return SYSTEM_VALUE_BASE_INSTANCE;
1880   case nir_intrinsic_load_vertex_id_zero_base:
1881      return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
1882   case nir_intrinsic_load_first_vertex:
1883      return SYSTEM_VALUE_FIRST_VERTEX;
1884   case nir_intrinsic_load_is_indexed_draw:
1885      return SYSTEM_VALUE_IS_INDEXED_DRAW;
1886   case nir_intrinsic_load_base_vertex:
1887      return SYSTEM_VALUE_BASE_VERTEX;
1888   case nir_intrinsic_load_invocation_id:
1889      return SYSTEM_VALUE_INVOCATION_ID;
1890   case nir_intrinsic_load_frag_coord:
1891      return SYSTEM_VALUE_FRAG_COORD;
1892   case nir_intrinsic_load_front_face:
1893      return SYSTEM_VALUE_FRONT_FACE;
1894   case nir_intrinsic_load_sample_id:
1895      return SYSTEM_VALUE_SAMPLE_ID;
1896   case nir_intrinsic_load_sample_pos:
1897      return SYSTEM_VALUE_SAMPLE_POS;
1898   case nir_intrinsic_load_sample_mask_in:
1899      return SYSTEM_VALUE_SAMPLE_MASK_IN;
1900   case nir_intrinsic_load_local_invocation_id:
1901      return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
1902   case nir_intrinsic_load_local_invocation_index:
1903      return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
1904   case nir_intrinsic_load_num_work_groups:
1905      return SYSTEM_VALUE_NUM_WORK_GROUPS;
1906   case nir_intrinsic_load_work_group_id:
1907      return SYSTEM_VALUE_WORK_GROUP_ID;
1908   case nir_intrinsic_load_primitive_id:
1909      return SYSTEM_VALUE_PRIMITIVE_ID;
1910   case nir_intrinsic_load_tess_coord:
1911      return SYSTEM_VALUE_TESS_COORD;
1912   case nir_intrinsic_load_tess_level_outer:
1913      return SYSTEM_VALUE_TESS_LEVEL_OUTER;
1914   case nir_intrinsic_load_tess_level_inner:
1915      return SYSTEM_VALUE_TESS_LEVEL_INNER;
1916   case nir_intrinsic_load_patch_vertices_in:
1917      return SYSTEM_VALUE_VERTICES_IN;
1918   case nir_intrinsic_load_helper_invocation:
1919      return SYSTEM_VALUE_HELPER_INVOCATION;
1920   case nir_intrinsic_load_view_index:
1921      return SYSTEM_VALUE_VIEW_INDEX;
1922   case nir_intrinsic_load_subgroup_size:
1923      return SYSTEM_VALUE_SUBGROUP_SIZE;
1924   case nir_intrinsic_load_subgroup_invocation:
1925      return SYSTEM_VALUE_SUBGROUP_INVOCATION;
1926   case nir_intrinsic_load_subgroup_eq_mask:
1927      return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
1928   case nir_intrinsic_load_subgroup_ge_mask:
1929      return SYSTEM_VALUE_SUBGROUP_GE_MASK;
1930   case nir_intrinsic_load_subgroup_gt_mask:
1931      return SYSTEM_VALUE_SUBGROUP_GT_MASK;
1932   case nir_intrinsic_load_subgroup_le_mask:
1933      return SYSTEM_VALUE_SUBGROUP_LE_MASK;
1934   case nir_intrinsic_load_subgroup_lt_mask:
1935      return SYSTEM_VALUE_SUBGROUP_LT_MASK;
1936   case nir_intrinsic_load_num_subgroups:
1937      return SYSTEM_VALUE_NUM_SUBGROUPS;
1938   case nir_intrinsic_load_subgroup_id:
1939      return SYSTEM_VALUE_SUBGROUP_ID;
1940   case nir_intrinsic_load_local_group_size:
1941      return SYSTEM_VALUE_LOCAL_GROUP_SIZE;
1942   case nir_intrinsic_load_global_invocation_id:
1943      return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
1944   default:
1945      unreachable("intrinsic doesn't produce a system value");
1946   }
1947}
1948
1949/* OpenGL utility method that remaps the location attributes if they are
1950 * doubles. Not needed for vulkan due the differences on the input location
1951 * count for doubles on vulkan vs OpenGL
1952 *
1953 * The bitfield returned in dual_slot is one bit for each double input slot in
1954 * the original OpenGL single-slot input numbering.  The mapping from old
1955 * locations to new locations is as follows:
1956 *
1957 *    new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
1958 */
1959void
1960nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
1961{
1962   assert(shader->info.stage == MESA_SHADER_VERTEX);
1963
1964   *dual_slot = 0;
1965   nir_foreach_variable(var, &shader->inputs) {
1966      if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
1967         unsigned slots = glsl_count_attribute_slots(var->type, true);
1968         *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
1969      }
1970   }
1971
1972   nir_foreach_variable(var, &shader->inputs) {
1973      var->data.location +=
1974         util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
1975   }
1976}
1977
1978/* Returns an attribute mask that has been re-compacted using the given
1979 * dual_slot mask.
1980 */
1981uint64_t
1982nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
1983{
1984   while (dual_slot) {
1985      unsigned loc = u_bit_scan64(&dual_slot);
1986      /* mask of all bits up to and including loc */
1987      uint64_t mask = BITFIELD64_MASK(loc + 1);
1988      attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
1989   }
1990   return attribs;
1991}
1992