1/*
2 * Copyright © 2014 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28#ifndef NIR_H
29#define NIR_H
30
31#include "util/hash_table.h"
32#include "compiler/glsl/list.h"
33#include "GL/gl.h" /* GLenum */
34#include "util/list.h"
35#include "util/log.h"
36#include "util/ralloc.h"
37#include "util/set.h"
38#include "util/bitscan.h"
39#include "util/bitset.h"
40#include "util/compiler.h"
41#include "util/enum_operators.h"
42#include "util/macros.h"
43#include "util/format/u_format.h"
44#include "compiler/nir_types.h"
45#include "compiler/shader_enums.h"
46#include "compiler/shader_info.h"
47#define XXH_INLINE_ALL
48#include "util/xxhash.h"
49#include <stdio.h>
50
51#ifndef NDEBUG
52#include "util/debug.h"
53#endif /* NDEBUG */
54
55#include "nir_opcodes.h"
56
57#if defined(_WIN32) && !defined(snprintf)
58#define snprintf _snprintf
59#endif
60
61#ifdef __cplusplus
62extern "C" {
63#endif
64
65#define NIR_FALSE 0u
66#define NIR_TRUE (~0u)
67#define NIR_MAX_VEC_COMPONENTS 16
68#define NIR_MAX_MATRIX_COLUMNS 4
69#define NIR_STREAM_PACKED (1 << 8)
70typedef uint16_t nir_component_mask_t;
71
72static inline bool
73nir_num_components_valid(unsigned num_components)
74{
75   return (num_components >= 1  &&
76           num_components <= 5) ||
77           num_components == 8  ||
78           num_components == 16;
79}
80
81bool nir_component_mask_can_reinterpret(nir_component_mask_t mask,
82                                        unsigned old_bit_size,
83                                        unsigned new_bit_size);
84nir_component_mask_t
85nir_component_mask_reinterpret(nir_component_mask_t mask,
86                               unsigned old_bit_size,
87                               unsigned new_bit_size);
88
89/** Defines a cast function
90 *
91 * This macro defines a cast function from in_type to out_type where
92 * out_type is some structure type that contains a field of type out_type.
93 *
94 * Note that you have to be a bit careful as the generated cast function
95 * destroys constness.
96 */
97#define NIR_DEFINE_CAST(name, in_type, out_type, field, \
98                        type_field, type_value)         \
99static inline out_type *                                \
100name(const in_type *parent)                             \
101{                                                       \
102   assert(parent && parent->type_field == type_value);  \
103   return exec_node_data(out_type, parent, field);      \
104}
105
106struct nir_function;
107struct nir_shader;
108struct nir_instr;
109struct nir_builder;
110
111
112/**
113 * Description of built-in state associated with a uniform
114 *
115 * \sa nir_variable::state_slots
116 */
117typedef struct {
118   gl_state_index16 tokens[STATE_LENGTH];
119   uint16_t swizzle;
120} nir_state_slot;
121
122typedef enum {
123   nir_var_shader_in       = (1 << 0),
124   nir_var_shader_out      = (1 << 1),
125   nir_var_shader_temp     = (1 << 2),
126   nir_var_function_temp   = (1 << 3),
127   nir_var_uniform         = (1 << 4),
128   nir_var_mem_ubo         = (1 << 5),
129   nir_var_system_value    = (1 << 6),
130   nir_var_mem_ssbo        = (1 << 7),
131   nir_var_mem_shared      = (1 << 8),
132   nir_var_mem_global      = (1 << 9),
133   nir_var_mem_generic     = (nir_var_shader_temp |
134                              nir_var_function_temp |
135                              nir_var_mem_shared |
136                              nir_var_mem_global),
137   nir_var_mem_push_const  = (1 << 10), /* not actually used for variables */
138   nir_var_mem_constant    = (1 << 11),
139   /** Incoming call or ray payload data for ray-tracing shaders */
140   nir_var_shader_call_data = (1 << 12),
141   /** Ray hit attributes */
142   nir_var_ray_hit_attrib  = (1 << 13),
143   nir_var_read_only_modes = nir_var_shader_in | nir_var_uniform |
144                             nir_var_system_value | nir_var_mem_constant |
145                             nir_var_mem_ubo,
146   /** Modes where vector derefs can be indexed as arrays */
147   nir_var_vec_indexable_modes = nir_var_mem_ubo | nir_var_mem_ssbo |
148                                 nir_var_mem_shared | nir_var_mem_global |
149                                 nir_var_mem_push_const,
150   nir_num_variable_modes  = 14,
151   nir_var_all             = (1 << nir_num_variable_modes) - 1,
152} nir_variable_mode;
153MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_variable_mode)
154
155/**
156 * Rounding modes.
157 */
158typedef enum {
159   nir_rounding_mode_undef = 0,
160   nir_rounding_mode_rtne  = 1, /* round to nearest even */
161   nir_rounding_mode_ru    = 2, /* round up */
162   nir_rounding_mode_rd    = 3, /* round down */
163   nir_rounding_mode_rtz   = 4, /* round towards zero */
164} nir_rounding_mode;
165
166typedef union {
167   bool b;
168   float f32;
169   double f64;
170   int8_t i8;
171   uint8_t u8;
172   int16_t i16;
173   uint16_t u16;
174   int32_t i32;
175   uint32_t u32;
176   int64_t i64;
177   uint64_t u64;
178} nir_const_value;
179
180#define nir_const_value_to_array(arr, c, components, m) \
181{ \
182   for (unsigned i = 0; i < components; ++i) \
183      arr[i] = c[i].m; \
184} while (false)
185
186static inline nir_const_value
187nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size)
188{
189   nir_const_value v;
190   memset(&v, 0, sizeof(v));
191
192   switch (bit_size) {
193   case 1:  v.b   = x;  break;
194   case 8:  v.u8  = x;  break;
195   case 16: v.u16 = x;  break;
196   case 32: v.u32 = x;  break;
197   case 64: v.u64 = x;  break;
198   default:
199      unreachable("Invalid bit size");
200   }
201
202   return v;
203}
204
205static inline nir_const_value
206nir_const_value_for_int(int64_t i, unsigned bit_size)
207{
208   nir_const_value v;
209   memset(&v, 0, sizeof(v));
210
211   assert(bit_size <= 64);
212   if (bit_size < 64) {
213      assert(i >= (-(1ll << (bit_size - 1))));
214      assert(i < (1ll << (bit_size - 1)));
215   }
216
217   return nir_const_value_for_raw_uint(i, bit_size);
218}
219
220static inline nir_const_value
221nir_const_value_for_uint(uint64_t u, unsigned bit_size)
222{
223   nir_const_value v;
224   memset(&v, 0, sizeof(v));
225
226   assert(bit_size <= 64);
227   if (bit_size < 64)
228      assert(u < (1ull << bit_size));
229
230   return nir_const_value_for_raw_uint(u, bit_size);
231}
232
233static inline nir_const_value
234nir_const_value_for_bool(bool b, unsigned bit_size)
235{
236   /* Booleans use a 0/-1 convention */
237   return nir_const_value_for_int(-(int)b, bit_size);
238}
239
240/* This one isn't inline because it requires half-float conversion */
241nir_const_value nir_const_value_for_float(double b, unsigned bit_size);
242
243static inline int64_t
244nir_const_value_as_int(nir_const_value value, unsigned bit_size)
245{
246   switch (bit_size) {
247   /* int1_t uses 0/-1 convention */
248   case 1:  return -(int)value.b;
249   case 8:  return value.i8;
250   case 16: return value.i16;
251   case 32: return value.i32;
252   case 64: return value.i64;
253   default:
254      unreachable("Invalid bit size");
255   }
256}
257
258static inline uint64_t
259nir_const_value_as_uint(nir_const_value value, unsigned bit_size)
260{
261   switch (bit_size) {
262   case 1:  return value.b;
263   case 8:  return value.u8;
264   case 16: return value.u16;
265   case 32: return value.u32;
266   case 64: return value.u64;
267   default:
268      unreachable("Invalid bit size");
269   }
270}
271
272static inline bool
273nir_const_value_as_bool(nir_const_value value, unsigned bit_size)
274{
275   int64_t i = nir_const_value_as_int(value, bit_size);
276
277   /* Booleans of any size use 0/-1 convention */
278   assert(i == 0 || i == -1);
279
280   return i;
281}
282
283/* This one isn't inline because it requires half-float conversion */
284double nir_const_value_as_float(nir_const_value value, unsigned bit_size);
285
286typedef struct nir_constant {
287   /**
288    * Value of the constant.
289    *
290    * The field used to back the values supplied by the constant is determined
291    * by the type associated with the \c nir_variable.  Constants may be
292    * scalars, vectors, or matrices.
293    */
294   nir_const_value values[NIR_MAX_VEC_COMPONENTS];
295
296   /* we could get this from the var->type but makes clone *much* easier to
297    * not have to care about the type.
298    */
299   unsigned num_elements;
300
301   /* Array elements / Structure Fields */
302   struct nir_constant **elements;
303} nir_constant;
304
305/**
306 * \brief Layout qualifiers for gl_FragDepth.
307 *
308 * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
309 * with a layout qualifier.
310 */
311typedef enum {
312    nir_depth_layout_none, /**< No depth layout is specified. */
313    nir_depth_layout_any,
314    nir_depth_layout_greater,
315    nir_depth_layout_less,
316    nir_depth_layout_unchanged
317} nir_depth_layout;
318
319/**
320 * Enum keeping track of how a variable was declared.
321 */
322typedef enum {
323   /**
324    * Normal declaration.
325    */
326   nir_var_declared_normally = 0,
327
328   /**
329    * Variable is implicitly generated by the compiler and should not be
330    * visible via the API.
331    */
332   nir_var_hidden,
333} nir_var_declaration_type;
334
335/**
336 * Either a uniform, global variable, shader input, or shader output. Based on
337 * ir_variable - it should be easy to translate between the two.
338 */
339
340typedef struct nir_variable {
341   struct exec_node node;
342
343   /**
344    * Declared type of the variable
345    */
346   const struct glsl_type *type;
347
348   /**
349    * Declared name of the variable
350    */
351   char *name;
352
353   struct nir_variable_data {
354      /**
355       * Storage class of the variable.
356       *
357       * \sa nir_variable_mode
358       */
359      unsigned mode:14;
360
361      /**
362       * Is the variable read-only?
363       *
364       * This is set for variables declared as \c const, shader inputs,
365       * and uniforms.
366       */
367      unsigned read_only:1;
368      unsigned centroid:1;
369      unsigned sample:1;
370      unsigned patch:1;
371      unsigned invariant:1;
372
373     /**
374       * Precision qualifier.
375       *
376       * In desktop GLSL we do not care about precision qualifiers at all, in
377       * fact, the spec says that precision qualifiers are ignored.
378       *
379       * To make things easy, we make it so that this field is always
380       * GLSL_PRECISION_NONE on desktop shaders. This way all the variables
381       * have the same precision value and the checks we add in the compiler
382       * for this field will never break a desktop shader compile.
383       */
384      unsigned precision:2;
385
386      /**
387       * Can this variable be coalesced with another?
388       *
389       * This is set by nir_lower_io_to_temporaries to say that any
390       * copies involving this variable should stay put. Propagating it can
391       * duplicate the resulting load/store, which is not wanted, and may
392       * result in a load/store of the variable with an indirect offset which
393       * the backend may not be able to handle.
394       */
395      unsigned cannot_coalesce:1;
396
397      /**
398       * When separate shader programs are enabled, only input/outputs between
399       * the stages of a multi-stage separate program can be safely removed
400       * from the shader interface. Other input/outputs must remains active.
401       *
402       * This is also used to make sure xfb varyings that are unused by the
403       * fragment shader are not removed.
404       */
405      unsigned always_active_io:1;
406
407      /**
408       * Interpolation mode for shader inputs / outputs
409       *
410       * \sa glsl_interp_mode
411       */
412      unsigned interpolation:3;
413
414      /**
415       * If non-zero, then this variable may be packed along with other variables
416       * into a single varying slot, so this offset should be applied when
417       * accessing components.  For example, an offset of 1 means that the x
418       * component of this variable is actually stored in component y of the
419       * location specified by \c location.
420       */
421      unsigned location_frac:2;
422
423      /**
424       * If true, this variable represents an array of scalars that should
425       * be tightly packed.  In other words, consecutive array elements
426       * should be stored one component apart, rather than one slot apart.
427       */
428      unsigned compact:1;
429
430      /**
431       * Whether this is a fragment shader output implicitly initialized with
432       * the previous contents of the specified render target at the
433       * framebuffer location corresponding to this shader invocation.
434       */
435      unsigned fb_fetch_output:1;
436
437      /**
438       * Non-zero if this variable is considered bindless as defined by
439       * ARB_bindless_texture.
440       */
441      unsigned bindless:1;
442
443      /**
444       * Was an explicit binding set in the shader?
445       */
446      unsigned explicit_binding:1;
447
448      /**
449       * Was the location explicitly set in the shader?
450       *
451       * If the location is explicitly set in the shader, it \b cannot be changed
452       * by the linker or by the API (e.g., calls to \c glBindAttribLocation have
453       * no effect).
454       */
455      unsigned explicit_location:1;
456
457      /**
458       * Was a transfer feedback buffer set in the shader?
459       */
460      unsigned explicit_xfb_buffer:1;
461
462      /**
463       * Was a transfer feedback stride set in the shader?
464       */
465      unsigned explicit_xfb_stride:1;
466
467      /**
468       * Was an explicit offset set in the shader?
469       */
470      unsigned explicit_offset:1;
471
472      /**
473       * Layout of the matrix.  Uses glsl_matrix_layout values.
474       */
475      unsigned matrix_layout:2;
476
477      /**
478       * Non-zero if this variable was created by lowering a named interface
479       * block.
480       */
481      unsigned from_named_ifc_block:1;
482
483      /**
484       * How the variable was declared.  See nir_var_declaration_type.
485       *
486       * This is used to detect variables generated by the compiler, so should
487       * not be visible via the API.
488       */
489      unsigned how_declared:2;
490
491      /**
492       * Is this variable per-view?  If so, we know it must be an array with
493       * size corresponding to the number of views.
494       */
495      unsigned per_view:1;
496
497      /**
498       * Whether the variable is per-primitive.
499       * Can be use by Mesh Shader outputs and corresponding Fragment Shader inputs.
500       */
501      unsigned per_primitive:1;
502
503      /**
504       * \brief Layout qualifier for gl_FragDepth. See nir_depth_layout.
505       *
506       * This is not equal to \c ir_depth_layout_none if and only if this
507       * variable is \c gl_FragDepth and a layout qualifier is specified.
508       */
509      unsigned depth_layout:3;
510
511      /**
512       * Vertex stream output identifier.
513       *
514       * For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i]
515       * indicate the stream of the i-th component.
516       */
517      unsigned stream:9;
518
519      /**
520       * See gl_access_qualifier.
521       *
522       * Access flags for memory variables (SSBO/global), image uniforms, and
523       * bindless images in uniforms/inputs/outputs.
524       */
525      unsigned access:8;
526
527      /**
528       * Descriptor set binding for sampler or UBO.
529       */
530      unsigned descriptor_set:5;
531
532      /**
533       * output index for dual source blending.
534       */
535      unsigned index;
536
537      /**
538       * Initial binding point for a sampler or UBO.
539       *
540       * For array types, this represents the binding point for the first element.
541       */
542      unsigned binding;
543
544      /**
545       * Storage location of the base of this variable
546       *
547       * The precise meaning of this field depends on the nature of the variable.
548       *
549       *   - Vertex shader input: one of the values from \c gl_vert_attrib.
550       *   - Vertex shader output: one of the values from \c gl_varying_slot.
551       *   - Geometry shader input: one of the values from \c gl_varying_slot.
552       *   - Geometry shader output: one of the values from \c gl_varying_slot.
553       *   - Fragment shader input: one of the values from \c gl_varying_slot.
554       *   - Fragment shader output: one of the values from \c gl_frag_result.
555       *   - Task shader output: one of the values from \c gl_varying_slot.
556       *   - Mesh shader input: one of the values from \c gl_varying_slot.
557       *   - Mesh shader output: one of the values from \c gl_varying_slot.
558       *   - Uniforms: Per-stage uniform slot number for default uniform block.
559       *   - Uniforms: Index within the uniform block definition for UBO members.
560       *   - Non-UBO Uniforms: uniform slot number.
561       *   - Other: This field is not currently used.
562       *
563       * If the variable is a uniform, shader input, or shader output, and the
564       * slot has not been assigned, the value will be -1.
565       */
566      int location;
567
568      /**
569       * The actual location of the variable in the IR. Only valid for inputs,
570       * outputs, uniforms (including samplers and images), and for UBO and SSBO
571       * variables in GLSL.
572       */
573      unsigned driver_location;
574
575      /**
576       * Location an atomic counter or transform feedback is stored at.
577       */
578      unsigned offset;
579
580      union {
581         struct {
582            /** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */
583            enum pipe_format format;
584         } image;
585
586         struct {
587            /**
588             * For OpenCL inline samplers. See cl_sampler_addressing_mode and cl_sampler_filter_mode
589             */
590            unsigned is_inline_sampler : 1;
591            unsigned addressing_mode : 3;
592            unsigned normalized_coordinates : 1;
593            unsigned filter_mode : 1;
594         } sampler;
595
596         struct {
597            /**
598             * Transform feedback buffer.
599             */
600            uint16_t buffer:2;
601
602            /**
603             * Transform feedback stride.
604             */
605            uint16_t stride;
606         } xfb;
607      };
608   } data;
609
610   /**
611    * Identifier for this variable generated by nir_index_vars() that is unique
612    * among other variables in the same exec_list.
613    */
614   unsigned index;
615
616   /* Number of nir_variable_data members */
617   uint16_t num_members;
618
619   /**
620    * Built-in state that backs this uniform
621    *
622    * Once set at variable creation, \c state_slots must remain invariant.
623    * This is because, ideally, this array would be shared by all clones of
624    * this variable in the IR tree.  In other words, we'd really like for it
625    * to be a fly-weight.
626    *
627    * If the variable is not a uniform, \c num_state_slots will be zero and
628    * \c state_slots will be \c NULL.
629    */
630   /*@{*/
631   uint16_t num_state_slots;    /**< Number of state slots used */
632   nir_state_slot *state_slots;  /**< State descriptors. */
633   /*@}*/
634
635   /**
636    * Constant expression assigned in the initializer of the variable
637    *
638    * This field should only be used temporarily by creators of NIR shaders
639    * and then nir_lower_variable_initializers can be used to get rid of them.
640    * Most of the rest of NIR ignores this field or asserts that it's NULL.
641    */
642   nir_constant *constant_initializer;
643
644   /**
645    * Global variable assigned in the initializer of the variable
646    * This field should only be used temporarily by creators of NIR shaders
647    * and then nir_lower_variable_initializers can be used to get rid of them.
648    * Most of the rest of NIR ignores this field or asserts that it's NULL.
649    */
650   struct nir_variable *pointer_initializer;
651
652   /**
653    * For variables that are in an interface block or are an instance of an
654    * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block.
655    *
656    * \sa ir_variable::location
657    */
658   const struct glsl_type *interface_type;
659
660   /**
661    * Description of per-member data for per-member struct variables
662    *
663    * This is used for variables which are actually an amalgamation of
664    * multiple entities such as a struct of built-in values or a struct of
665    * inputs each with their own layout specifier.  This is only allowed on
666    * variables with a struct or array of array of struct type.
667    */
668   struct nir_variable_data *members;
669} nir_variable;
670
671static inline bool
672_nir_shader_variable_has_mode(nir_variable *var, unsigned modes)
673{
674   /* This isn't a shader variable */
675   assert(!(modes & nir_var_function_temp));
676   return var->data.mode & modes;
677}
678
679#define nir_foreach_variable_in_list(var, var_list) \
680   foreach_list_typed(nir_variable, var, node, var_list)
681
682#define nir_foreach_variable_in_list_safe(var, var_list) \
683   foreach_list_typed_safe(nir_variable, var, node, var_list)
684
685#define nir_foreach_variable_in_shader(var, shader) \
686   nir_foreach_variable_in_list(var, &(shader)->variables)
687
688#define nir_foreach_variable_in_shader_safe(var, shader) \
689   nir_foreach_variable_in_list_safe(var, &(shader)->variables)
690
691#define nir_foreach_variable_with_modes(var, shader, modes) \
692   nir_foreach_variable_in_shader(var, shader) \
693      if (_nir_shader_variable_has_mode(var, modes))
694
695#define nir_foreach_variable_with_modes_safe(var, shader, modes) \
696   nir_foreach_variable_in_shader_safe(var, shader) \
697      if (_nir_shader_variable_has_mode(var, modes))
698
699#define nir_foreach_shader_in_variable(var, shader) \
700   nir_foreach_variable_with_modes(var, shader, nir_var_shader_in)
701
702#define nir_foreach_shader_in_variable_safe(var, shader) \
703   nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_in)
704
705#define nir_foreach_shader_out_variable(var, shader) \
706   nir_foreach_variable_with_modes(var, shader, nir_var_shader_out)
707
708#define nir_foreach_shader_out_variable_safe(var, shader) \
709   nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_out)
710
711#define nir_foreach_uniform_variable(var, shader) \
712   nir_foreach_variable_with_modes(var, shader, nir_var_uniform)
713
714#define nir_foreach_uniform_variable_safe(var, shader) \
715   nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform)
716
717static inline bool
718nir_variable_is_global(const nir_variable *var)
719{
720   return var->data.mode != nir_var_function_temp;
721}
722
723typedef struct nir_register {
724   struct exec_node node;
725
726   unsigned num_components; /** < number of vector components */
727   unsigned num_array_elems; /** < size of array (0 for no array) */
728
729   /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
730   uint8_t bit_size;
731
732   /**
733    * True if this register may have different values in different SIMD
734    * invocations of the shader.
735    */
736   bool divergent;
737
738   /** generic register index. */
739   unsigned index;
740
741   /** set of nir_srcs where this register is used (read from) */
742   struct list_head uses;
743
744   /** set of nir_dests where this register is defined (written to) */
745   struct list_head defs;
746
747   /** set of nir_ifs where this register is used as a condition */
748   struct list_head if_uses;
749} nir_register;
750
751#define nir_foreach_register(reg, reg_list) \
752   foreach_list_typed(nir_register, reg, node, reg_list)
753#define nir_foreach_register_safe(reg, reg_list) \
754   foreach_list_typed_safe(nir_register, reg, node, reg_list)
755
756typedef enum PACKED {
757   nir_instr_type_alu,
758   nir_instr_type_deref,
759   nir_instr_type_call,
760   nir_instr_type_tex,
761   nir_instr_type_intrinsic,
762   nir_instr_type_load_const,
763   nir_instr_type_jump,
764   nir_instr_type_ssa_undef,
765   nir_instr_type_phi,
766   nir_instr_type_parallel_copy,
767} nir_instr_type;
768
769typedef struct nir_instr {
770   struct exec_node node;
771   struct list_head gc_node;
772   struct nir_block *block;
773   nir_instr_type type;
774
775   /* A temporary for optimization and analysis passes to use for storing
776    * flags.  For instance, DCE uses this to store the "dead/live" info.
777    */
778   uint8_t pass_flags;
779
780   /** generic instruction index. */
781   uint32_t index;
782} nir_instr;
783
784static inline nir_instr *
785nir_instr_next(nir_instr *instr)
786{
787   struct exec_node *next = exec_node_get_next(&instr->node);
788   if (exec_node_is_tail_sentinel(next))
789      return NULL;
790   else
791      return exec_node_data(nir_instr, next, node);
792}
793
794static inline nir_instr *
795nir_instr_prev(nir_instr *instr)
796{
797   struct exec_node *prev = exec_node_get_prev(&instr->node);
798   if (exec_node_is_head_sentinel(prev))
799      return NULL;
800   else
801      return exec_node_data(nir_instr, prev, node);
802}
803
804static inline bool
805nir_instr_is_first(const nir_instr *instr)
806{
807   return exec_node_is_head_sentinel(exec_node_get_prev_const(&instr->node));
808}
809
810static inline bool
811nir_instr_is_last(const nir_instr *instr)
812{
813   return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node));
814}
815
816typedef struct nir_ssa_def {
817   /** Instruction which produces this SSA value. */
818   nir_instr *parent_instr;
819
820   /** set of nir_instrs where this register is used (read from) */
821   struct list_head uses;
822
823   /** set of nir_ifs where this register is used as a condition */
824   struct list_head if_uses;
825
826   /** generic SSA definition index. */
827   unsigned index;
828
829   uint8_t num_components;
830
831   /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
832   uint8_t bit_size;
833
834   /**
835    * True if this SSA value may have different values in different SIMD
836    * invocations of the shader.  This is set by nir_divergence_analysis.
837    */
838   bool divergent;
839} nir_ssa_def;
840
841struct nir_src;
842
843typedef struct {
844   nir_register *reg;
845   struct nir_src *indirect; /** < NULL for no indirect offset */
846   unsigned base_offset;
847
848   /* TODO use-def chain goes here */
849} nir_reg_src;
850
851typedef struct {
852   nir_instr *parent_instr;
853   struct list_head def_link;
854
855   nir_register *reg;
856   struct nir_src *indirect; /** < NULL for no indirect offset */
857   unsigned base_offset;
858
859   /* TODO def-use chain goes here */
860} nir_reg_dest;
861
862struct nir_if;
863
864typedef struct nir_src {
865   union {
866      /** Instruction that consumes this value as a source. */
867      nir_instr *parent_instr;
868      struct nir_if *parent_if;
869   };
870
871   struct list_head use_link;
872
873   union {
874      nir_reg_src reg;
875      nir_ssa_def *ssa;
876   };
877
878   bool is_ssa;
879} nir_src;
880
881static inline nir_src
882nir_src_init(void)
883{
884   nir_src src = { { NULL } };
885   return src;
886}
887
888#define NIR_SRC_INIT nir_src_init()
889
890#define nir_foreach_use(src, reg_or_ssa_def) \
891   list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
892
893#define nir_foreach_use_safe(src, reg_or_ssa_def) \
894   list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
895
896#define nir_foreach_if_use(src, reg_or_ssa_def) \
897   list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link)
898
899#define nir_foreach_if_use_safe(src, reg_or_ssa_def) \
900   list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link)
901
902typedef struct {
903   union {
904      nir_reg_dest reg;
905      nir_ssa_def ssa;
906   };
907
908   bool is_ssa;
909} nir_dest;
910
911static inline nir_dest
912nir_dest_init(void)
913{
914   nir_dest dest = { { { NULL } } };
915   return dest;
916}
917
918#define NIR_DEST_INIT nir_dest_init()
919
920#define nir_foreach_def(dest, reg) \
921   list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
922
923#define nir_foreach_def_safe(dest, reg) \
924   list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link)
925
926static inline nir_src
927nir_src_for_ssa(nir_ssa_def *def)
928{
929   nir_src src = NIR_SRC_INIT;
930
931   src.is_ssa = true;
932   src.ssa = def;
933
934   return src;
935}
936
937static inline nir_src
938nir_src_for_reg(nir_register *reg)
939{
940   nir_src src = NIR_SRC_INIT;
941
942   src.is_ssa = false;
943   src.reg.reg = reg;
944   src.reg.indirect = NULL;
945   src.reg.base_offset = 0;
946
947   return src;
948}
949
950static inline nir_dest
951nir_dest_for_reg(nir_register *reg)
952{
953   nir_dest dest = NIR_DEST_INIT;
954
955   dest.reg.reg = reg;
956
957   return dest;
958}
959
960static inline unsigned
961nir_src_bit_size(nir_src src)
962{
963   return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size;
964}
965
966static inline unsigned
967nir_src_num_components(nir_src src)
968{
969   return src.is_ssa ? src.ssa->num_components : src.reg.reg->num_components;
970}
971
972static inline bool
973nir_src_is_const(nir_src src)
974{
975   return src.is_ssa &&
976          src.ssa->parent_instr->type == nir_instr_type_load_const;
977}
978
979static inline bool
980nir_src_is_undef(nir_src src)
981{
982   return src.is_ssa &&
983          src.ssa->parent_instr->type == nir_instr_type_ssa_undef;
984}
985
986static inline bool
987nir_src_is_divergent(nir_src src)
988{
989   return src.is_ssa ? src.ssa->divergent : src.reg.reg->divergent;
990}
991
992static inline unsigned
993nir_dest_bit_size(nir_dest dest)
994{
995   return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size;
996}
997
998static inline unsigned
999nir_dest_num_components(nir_dest dest)
1000{
1001   return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components;
1002}
1003
1004static inline bool
1005nir_dest_is_divergent(nir_dest dest)
1006{
1007   return dest.is_ssa ? dest.ssa.divergent : dest.reg.reg->divergent;
1008}
1009
1010/* Are all components the same, ie. .xxxx */
1011static inline bool
1012nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
1013{
1014   for (unsigned i = 1; i < nr_comp; i++)
1015      if (swiz[i] != swiz[0])
1016         return false;
1017   return true;
1018}
1019
1020/* Are all components sequential, ie. .yzw */
1021static inline bool
1022nir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
1023{
1024   for (unsigned i = 1; i < nr_comp; i++)
1025      if (swiz[i] != (swiz[0] + i))
1026         return false;
1027   return true;
1028}
1029
1030void nir_src_copy(nir_src *dest, const nir_src *src);
1031void nir_dest_copy(nir_dest *dest, const nir_dest *src);
1032
1033typedef struct {
1034   /** Base source */
1035   nir_src src;
1036
1037   /**
1038    * \name input modifiers
1039    */
1040   /*@{*/
1041   /**
1042    * For inputs interpreted as floating point, flips the sign bit. For
1043    * inputs interpreted as integers, performs the two's complement negation.
1044    */
1045   bool negate;
1046
1047   /**
1048    * Clears the sign bit for floating point values, and computes the integer
1049    * absolute value for integers. Note that the negate modifier acts after
1050    * the absolute value modifier, therefore if both are set then all inputs
1051    * will become negative.
1052    */
1053   bool abs;
1054   /*@}*/
1055
1056   /**
1057    * For each input component, says which component of the register it is
1058    * chosen from.
1059    *
1060    * Note that which elements of the swizzle are used and which are ignored
1061    * are based on the write mask for most opcodes - for example, a statement
1062    * like "foo.xzw = bar.zyx" would have a writemask of 1101b and a swizzle
1063    * of {2, 1, x, 0} where x means "don't care."
1064    */
1065   uint8_t swizzle[NIR_MAX_VEC_COMPONENTS];
1066} nir_alu_src;
1067
1068typedef struct {
1069   /** Base destination */
1070   nir_dest dest;
1071
1072   /**
1073    * Saturate output modifier
1074    *
1075    * Only valid for opcodes that output floating-point numbers. Clamps the
1076    * output to between 0.0 and 1.0 inclusive.
1077    */
1078   bool saturate;
1079
1080   /**
1081    * Write-mask
1082    *
1083    * Ignored if dest.is_ssa is true
1084    */
1085   unsigned write_mask : NIR_MAX_VEC_COMPONENTS;
1086} nir_alu_dest;
1087
1088/** NIR sized and unsized types
1089 *
1090 * The values in this enum are carefully chosen so that the sized type is
1091 * just the unsized type OR the number of bits.
1092 */
1093typedef enum PACKED {
1094   nir_type_invalid = 0, /* Not a valid type */
1095   nir_type_int =       2,
1096   nir_type_uint =      4,
1097   nir_type_bool =      6,
1098   nir_type_float =     128,
1099   nir_type_bool1 =     1  | nir_type_bool,
1100   nir_type_bool8 =     8  | nir_type_bool,
1101   nir_type_bool16 =    16 | nir_type_bool,
1102   nir_type_bool32 =    32 | nir_type_bool,
1103   nir_type_int1 =      1  | nir_type_int,
1104   nir_type_int8 =      8  | nir_type_int,
1105   nir_type_int16 =     16 | nir_type_int,
1106   nir_type_int32 =     32 | nir_type_int,
1107   nir_type_int64 =     64 | nir_type_int,
1108   nir_type_uint1 =     1  | nir_type_uint,
1109   nir_type_uint8 =     8  | nir_type_uint,
1110   nir_type_uint16 =    16 | nir_type_uint,
1111   nir_type_uint32 =    32 | nir_type_uint,
1112   nir_type_uint64 =    64 | nir_type_uint,
1113   nir_type_float16 =   16 | nir_type_float,
1114   nir_type_float32 =   32 | nir_type_float,
1115   nir_type_float64 =   64 | nir_type_float,
1116} nir_alu_type;
1117
1118#define NIR_ALU_TYPE_SIZE_MASK 0x79
1119#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x86
1120
1121static inline unsigned
1122nir_alu_type_get_type_size(nir_alu_type type)
1123{
1124   return type & NIR_ALU_TYPE_SIZE_MASK;
1125}
1126
1127static inline nir_alu_type
1128nir_alu_type_get_base_type(nir_alu_type type)
1129{
1130   return (nir_alu_type)(type & NIR_ALU_TYPE_BASE_TYPE_MASK);
1131}
1132
1133static inline nir_alu_type
1134nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
1135{
1136   switch (base_type) {
1137   case GLSL_TYPE_BOOL:
1138      return nir_type_bool1;
1139      break;
1140   case GLSL_TYPE_UINT:
1141      return nir_type_uint32;
1142      break;
1143   case GLSL_TYPE_INT:
1144      return nir_type_int32;
1145      break;
1146   case GLSL_TYPE_UINT16:
1147      return nir_type_uint16;
1148      break;
1149   case GLSL_TYPE_INT16:
1150      return nir_type_int16;
1151      break;
1152   case GLSL_TYPE_UINT8:
1153      return nir_type_uint8;
1154   case GLSL_TYPE_INT8:
1155      return nir_type_int8;
1156   case GLSL_TYPE_UINT64:
1157      return nir_type_uint64;
1158      break;
1159   case GLSL_TYPE_INT64:
1160      return nir_type_int64;
1161      break;
1162   case GLSL_TYPE_FLOAT:
1163      return nir_type_float32;
1164      break;
1165   case GLSL_TYPE_FLOAT16:
1166      return nir_type_float16;
1167      break;
1168   case GLSL_TYPE_DOUBLE:
1169      return nir_type_float64;
1170      break;
1171
1172   case GLSL_TYPE_SAMPLER:
1173   case GLSL_TYPE_IMAGE:
1174   case GLSL_TYPE_ATOMIC_UINT:
1175   case GLSL_TYPE_STRUCT:
1176   case GLSL_TYPE_INTERFACE:
1177   case GLSL_TYPE_ARRAY:
1178   case GLSL_TYPE_VOID:
1179   case GLSL_TYPE_SUBROUTINE:
1180   case GLSL_TYPE_FUNCTION:
1181   case GLSL_TYPE_ERROR:
1182      return nir_type_invalid;
1183   }
1184
1185   unreachable("unknown type");
1186}
1187
1188static inline nir_alu_type
1189nir_get_nir_type_for_glsl_type(const struct glsl_type *type)
1190{
1191   return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type));
1192}
1193
1194static inline enum glsl_base_type
1195nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)
1196{
1197   switch (base_type) {
1198   case nir_type_bool1:
1199      return GLSL_TYPE_BOOL;
1200   case nir_type_uint32:
1201      return GLSL_TYPE_UINT;
1202   case nir_type_int32:
1203      return GLSL_TYPE_INT;
1204   case nir_type_uint16:
1205      return GLSL_TYPE_UINT16;
1206   case nir_type_int16:
1207      return GLSL_TYPE_INT16;
1208   case nir_type_uint8:
1209      return GLSL_TYPE_UINT8;
1210   case nir_type_int8:
1211      return GLSL_TYPE_INT8;
1212   case nir_type_uint64:
1213      return GLSL_TYPE_UINT64;
1214   case nir_type_int64:
1215      return GLSL_TYPE_INT64;
1216   case nir_type_float32:
1217      return GLSL_TYPE_FLOAT;
1218   case nir_type_float16:
1219      return GLSL_TYPE_FLOAT16;
1220   case nir_type_float64:
1221      return GLSL_TYPE_DOUBLE;
1222
1223   default: unreachable("Not a sized nir_alu_type");
1224   }
1225}
1226
1227nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst,
1228                              nir_rounding_mode rnd);
1229
1230static inline nir_op
1231nir_op_vec(unsigned components)
1232{
1233   switch (components) {
1234   case  1: return nir_op_mov;
1235   case  2: return nir_op_vec2;
1236   case  3: return nir_op_vec3;
1237   case  4: return nir_op_vec4;
1238   case  5: return nir_op_vec5;
1239   case  8: return nir_op_vec8;
1240   case 16: return nir_op_vec16;
1241   default: unreachable("bad component count");
1242   }
1243}
1244
1245static inline bool
1246nir_op_is_vec(nir_op op)
1247{
1248   switch (op) {
1249   case nir_op_mov:
1250   case nir_op_vec2:
1251   case nir_op_vec3:
1252   case nir_op_vec4:
1253   case nir_op_vec5:
1254   case nir_op_vec8:
1255   case nir_op_vec16:
1256      return true;
1257   default:
1258      return false;
1259   }
1260}
1261
1262static inline bool
1263nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size)
1264{
1265    return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) ||
1266        (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) ||
1267        (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64);
1268}
1269
1270static inline bool
1271nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size)
1272{
1273    return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) ||
1274        (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) ||
1275        (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64);
1276}
1277
1278static inline bool
1279nir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size)
1280{
1281    return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) ||
1282        (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) ||
1283        (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64);
1284}
1285
1286static inline bool
1287nir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size)
1288{
1289    return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) ||
1290        (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) ||
1291        (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
1292}
1293
1294static inline bool
1295nir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size)
1296{
1297    return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) ||
1298        (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) ||
1299        (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64);
1300}
1301
1302static inline bool
1303nir_has_any_rounding_mode_rtz(unsigned execution_mode)
1304{
1305    return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) ||
1306        (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) ||
1307        (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64);
1308}
1309
1310static inline bool
1311nir_has_any_rounding_mode_rtne(unsigned execution_mode)
1312{
1313    return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) ||
1314        (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) ||
1315        (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
1316}
1317
1318static inline nir_rounding_mode
1319nir_get_rounding_mode_from_float_controls(unsigned execution_mode,
1320                                          nir_alu_type type)
1321{
1322   if (nir_alu_type_get_base_type(type) != nir_type_float)
1323      return nir_rounding_mode_undef;
1324
1325   unsigned bit_size = nir_alu_type_get_type_size(type);
1326
1327   if (nir_is_rounding_mode_rtz(execution_mode, bit_size))
1328      return nir_rounding_mode_rtz;
1329   if (nir_is_rounding_mode_rtne(execution_mode, bit_size))
1330      return nir_rounding_mode_rtne;
1331   return nir_rounding_mode_undef;
1332}
1333
1334static inline bool
1335nir_has_any_rounding_mode_enabled(unsigned execution_mode)
1336{
1337   bool result =
1338      nir_has_any_rounding_mode_rtne(execution_mode) ||
1339      nir_has_any_rounding_mode_rtz(execution_mode);
1340   return result;
1341}
1342
1343typedef enum {
1344   /**
1345    * Operation where the first two sources are commutative.
1346    *
1347    * For 2-source operations, this just mathematical commutativity.  Some
1348    * 3-source operations, like ffma, are only commutative in the first two
1349    * sources.
1350    */
1351   NIR_OP_IS_2SRC_COMMUTATIVE = (1 << 0),
1352
1353   /**
1354    * Operation is associative
1355    */
1356   NIR_OP_IS_ASSOCIATIVE = (1 << 1),
1357} nir_op_algebraic_property;
1358
1359/* vec16 is the widest ALU op in NIR, making the max number of input of ALU
1360 * instructions to be the same as NIR_MAX_VEC_COMPONENTS.
1361 */
1362#define NIR_ALU_MAX_INPUTS NIR_MAX_VEC_COMPONENTS
1363
1364typedef struct nir_op_info {
1365   /** Name of the NIR ALU opcode */
1366   const char *name;
1367
1368   /** Number of inputs (sources) */
1369   uint8_t num_inputs;
1370
1371   /**
1372    * The number of components in the output
1373    *
1374    * If non-zero, this is the size of the output and input sizes are
1375    * explicitly given; swizzle and writemask are still in effect, but if
1376    * the output component is masked out, then the input component may
1377    * still be in use.
1378    *
1379    * If zero, the opcode acts in the standard, per-component manner; the
1380    * operation is performed on each component (except the ones that are
1381    * masked out) with the input being taken from the input swizzle for
1382    * that component.
1383    *
1384    * The size of some of the inputs may be given (i.e. non-zero) even
1385    * though output_size is zero; in that case, the inputs with a zero
1386    * size act per-component, while the inputs with non-zero size don't.
1387    */
1388   uint8_t output_size;
1389
1390   /**
1391    * The type of vector that the instruction outputs. Note that the
1392    * staurate modifier is only allowed on outputs with the float type.
1393    */
1394   nir_alu_type output_type;
1395
1396   /**
1397    * The number of components in each input
1398    *
1399    * See nir_op_infos::output_size for more detail about the relationship
1400    * between input and output sizes.
1401    */
1402   uint8_t input_sizes[NIR_ALU_MAX_INPUTS];
1403
1404   /**
1405    * The type of vector that each input takes. Note that negate and
1406    * absolute value are only allowed on inputs with int or float type and
1407    * behave differently on the two.
1408    */
1409   nir_alu_type input_types[NIR_ALU_MAX_INPUTS];
1410
1411   /** Algebraic properties of this opcode */
1412   nir_op_algebraic_property algebraic_properties;
1413
1414   /** Whether this represents a numeric conversion opcode */
1415   bool is_conversion;
1416} nir_op_info;
1417
1418/** Metadata for each nir_op, indexed by opcode */
1419extern const nir_op_info nir_op_infos[nir_num_opcodes];
1420
1421typedef struct nir_alu_instr {
1422   /** Base instruction */
1423   nir_instr instr;
1424
1425   /** Opcode */
1426   nir_op op;
1427
1428   /** Indicates that this ALU instruction generates an exact value
1429    *
1430    * This is kind of a mixture of GLSL "precise" and "invariant" and not
1431    * really equivalent to either.  This indicates that the value generated by
1432    * this operation is high-precision and any code transformations that touch
1433    * it must ensure that the resulting value is bit-for-bit identical to the
1434    * original.
1435    */
1436   bool exact:1;
1437
1438   /**
1439    * Indicates that this instruction doese not cause signed integer wrapping
1440    * to occur, in the form of overflow or underflow.
1441    */
1442   bool no_signed_wrap:1;
1443
1444   /**
1445    * Indicates that this instruction does not cause unsigned integer wrapping
1446    * to occur, in the form of overflow or underflow.
1447    */
1448   bool no_unsigned_wrap:1;
1449
1450   /** Destination */
1451   nir_alu_dest dest;
1452
1453   /** Sources
1454    *
1455    * The size of the array is given by nir_op_info::num_inputs.
1456    */
1457   nir_alu_src src[];
1458} nir_alu_instr;
1459
1460void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src);
1461void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src);
1462
1463bool nir_alu_instr_is_copy(nir_alu_instr *instr);
1464
1465/* is this source channel used? */
1466static inline bool
1467nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src,
1468                           unsigned channel)
1469{
1470   if (nir_op_infos[instr->op].input_sizes[src] > 0)
1471      return channel < nir_op_infos[instr->op].input_sizes[src];
1472
1473   return (instr->dest.write_mask >> channel) & 1;
1474}
1475
1476static inline nir_component_mask_t
1477nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
1478{
1479   nir_component_mask_t read_mask = 0;
1480   for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
1481      if (!nir_alu_instr_channel_used(instr, src, c))
1482         continue;
1483
1484      read_mask |= (1 << instr->src[src].swizzle[c]);
1485   }
1486   return read_mask;
1487}
1488
1489/**
1490 * Get the number of channels used for a source
1491 */
1492static inline unsigned
1493nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
1494{
1495   if (nir_op_infos[instr->op].input_sizes[src] > 0)
1496      return nir_op_infos[instr->op].input_sizes[src];
1497
1498   return nir_dest_num_components(instr->dest.dest);
1499}
1500
1501static inline bool
1502nir_alu_instr_is_comparison(const nir_alu_instr *instr)
1503{
1504   switch (instr->op) {
1505   case nir_op_flt:
1506   case nir_op_fge:
1507   case nir_op_feq:
1508   case nir_op_fneu:
1509   case nir_op_ilt:
1510   case nir_op_ult:
1511   case nir_op_ige:
1512   case nir_op_uge:
1513   case nir_op_ieq:
1514   case nir_op_ine:
1515   case nir_op_i2b1:
1516   case nir_op_f2b1:
1517   case nir_op_inot:
1518      return true;
1519   default:
1520      return false;
1521   }
1522}
1523
1524bool nir_const_value_negative_equal(nir_const_value c1, nir_const_value c2,
1525                                    nir_alu_type full_type);
1526
1527bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
1528                        unsigned src1, unsigned src2);
1529
1530bool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1,
1531                                 const nir_alu_instr *alu2,
1532                                 unsigned src1, unsigned src2);
1533
1534bool nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn);
1535
1536typedef enum {
1537   nir_deref_type_var,
1538   nir_deref_type_array,
1539   nir_deref_type_array_wildcard,
1540   nir_deref_type_ptr_as_array,
1541   nir_deref_type_struct,
1542   nir_deref_type_cast,
1543} nir_deref_type;
1544
1545typedef struct {
1546   nir_instr instr;
1547
1548   /** The type of this deref instruction */
1549   nir_deref_type deref_type;
1550
1551   /** Bitmask what modes the underlying variable might be
1552    *
1553    * For OpenCL-style generic pointers, we may not know exactly what mode it
1554    * is at any given point in time in the compile process.  This bitfield
1555    * contains the set of modes which it MAY be.
1556    *
1557    * Generally, this field should not be accessed directly.  Use one of the
1558    * nir_deref_mode_ helpers instead.
1559    */
1560   nir_variable_mode modes;
1561
1562   /** The dereferenced type of the resulting pointer value */
1563   const struct glsl_type *type;
1564
1565   union {
1566      /** Variable being dereferenced if deref_type is a deref_var */
1567      nir_variable *var;
1568
1569      /** Parent deref if deref_type is not deref_var */
1570      nir_src parent;
1571   };
1572
1573   /** Additional deref parameters */
1574   union {
1575      struct {
1576         nir_src index;
1577      } arr;
1578
1579      struct {
1580         unsigned index;
1581      } strct;
1582
1583      struct {
1584         unsigned ptr_stride;
1585         unsigned align_mul;
1586         unsigned align_offset;
1587      } cast;
1588   };
1589
1590   /** Destination to store the resulting "pointer" */
1591   nir_dest dest;
1592} nir_deref_instr;
1593
1594/** Returns true if deref might have one of the given modes
1595 *
1596 * For multi-mode derefs, this returns true if any of the possible modes for
1597 * the deref to have any of the specified modes.  This function returning true
1598 * does NOT mean that the deref definitely has one of those modes.  It simply
1599 * means that, with the best information we have at the time, it might.
1600 */
1601static inline bool
1602nir_deref_mode_may_be(const nir_deref_instr *deref, nir_variable_mode modes)
1603{
1604   assert(!(modes & ~nir_var_all));
1605   assert(deref->modes != 0);
1606   return deref->modes & modes;
1607}
1608
1609/** Returns true if deref must have one of the given modes
1610 *
1611 * For multi-mode derefs, this returns true if NIR can prove that the given
1612 * deref has one of the specified modes.  This function returning false does
1613 * NOT mean that deref doesn't have one of the given mode.  It very well may
1614 * have one of those modes, we just don't have enough information to prove
1615 * that it does for sure.
1616 */
1617static inline bool
1618nir_deref_mode_must_be(const nir_deref_instr *deref, nir_variable_mode modes)
1619{
1620   assert(!(modes & ~nir_var_all));
1621   assert(deref->modes != 0);
1622   return !(deref->modes & ~modes);
1623}
1624
1625/** Returns true if deref has the given mode
1626 *
1627 * This returns true if the deref has exactly the mode specified.  If the
1628 * deref may have that mode but may also have a different mode (i.e. modes has
1629 * multiple bits set), this will assert-fail.
1630 *
1631 * If you're confused about which nir_deref_mode_ helper to use, use this one
1632 * or nir_deref_mode_is_one_of below.
1633 */
1634static inline bool
1635nir_deref_mode_is(const nir_deref_instr *deref, nir_variable_mode mode)
1636{
1637   assert(util_bitcount(mode) == 1 && (mode & nir_var_all));
1638   assert(deref->modes != 0);
1639
1640   /* This is only for "simple" cases so, if modes might interact with this
1641    * deref then the deref has to have a single mode.
1642    */
1643   if (nir_deref_mode_may_be(deref, mode)) {
1644      assert(util_bitcount(deref->modes) == 1);
1645      assert(deref->modes == mode);
1646   }
1647
1648   return deref->modes == mode;
1649}
1650
1651/** Returns true if deref has one of the given modes
1652 *
1653 * This returns true if the deref has exactly one possible mode and that mode
1654 * is one of the modes specified.  If the deref may have one of those modes
1655 * but may also have a different mode (i.e. modes has multiple bits set), this
1656 * will assert-fail.
1657 */
1658static inline bool
1659nir_deref_mode_is_one_of(const nir_deref_instr *deref, nir_variable_mode modes)
1660{
1661   /* This is only for "simple" cases so, if modes might interact with this
1662    * deref then the deref has to have a single mode.
1663    */
1664   if (nir_deref_mode_may_be(deref, modes)) {
1665      assert(util_bitcount(deref->modes) == 1);
1666      assert(nir_deref_mode_must_be(deref, modes));
1667   }
1668
1669   return nir_deref_mode_may_be(deref, modes);
1670}
1671
1672/** Returns true if deref's possible modes lie in the given set of modes
1673 *
1674 * This returns true if the deref's modes lie in the given set of modes.  If
1675 * the deref's modes overlap with the specified modes but aren't entirely
1676 * contained in the specified set of modes, this will assert-fail.  In
1677 * particular, if this is used in a generic pointers scenario, the specified
1678 * modes has to contain all or none of the possible generic pointer modes.
1679 *
1680 * This is intended mostly for mass-lowering of derefs which might have
1681 * generic pointers.
1682 */
1683static inline bool
1684nir_deref_mode_is_in_set(const nir_deref_instr *deref, nir_variable_mode modes)
1685{
1686   if (nir_deref_mode_may_be(deref, modes))
1687      assert(nir_deref_mode_must_be(deref, modes));
1688
1689   return nir_deref_mode_may_be(deref, modes);
1690}
1691
1692static inline nir_deref_instr *nir_src_as_deref(nir_src src);
1693
1694static inline nir_deref_instr *
1695nir_deref_instr_parent(const nir_deref_instr *instr)
1696{
1697   if (instr->deref_type == nir_deref_type_var)
1698      return NULL;
1699   else
1700      return nir_src_as_deref(instr->parent);
1701}
1702
1703static inline nir_variable *
1704nir_deref_instr_get_variable(const nir_deref_instr *instr)
1705{
1706   while (instr->deref_type != nir_deref_type_var) {
1707      if (instr->deref_type == nir_deref_type_cast)
1708         return NULL;
1709
1710      instr = nir_deref_instr_parent(instr);
1711   }
1712
1713   return instr->var;
1714}
1715
1716bool nir_deref_instr_has_indirect(nir_deref_instr *instr);
1717bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr);
1718bool nir_deref_instr_has_complex_use(nir_deref_instr *instr);
1719
1720bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr);
1721
1722unsigned nir_deref_instr_array_stride(nir_deref_instr *instr);
1723
1724typedef struct {
1725   nir_instr instr;
1726
1727   struct nir_function *callee;
1728
1729   unsigned num_params;
1730   nir_src params[];
1731} nir_call_instr;
1732
1733#include "nir_intrinsics.h"
1734
1735#define NIR_INTRINSIC_MAX_CONST_INDEX 5
1736
1737/** Represents an intrinsic
1738 *
1739 * An intrinsic is an instruction type for handling things that are
1740 * more-or-less regular operations but don't just consume and produce SSA
1741 * values like ALU operations do.  Intrinsics are not for things that have
1742 * special semantic meaning such as phi nodes and parallel copies.
1743 * Examples of intrinsics include variable load/store operations, system
1744 * value loads, and the like.  Even though texturing more-or-less falls
1745 * under this category, texturing is its own instruction type because
1746 * trying to represent texturing with intrinsics would lead to a
1747 * combinatorial explosion of intrinsic opcodes.
1748 *
1749 * By having a single instruction type for handling a lot of different
1750 * cases, optimization passes can look for intrinsics and, for the most
1751 * part, completely ignore them.  Each intrinsic type also has a few
1752 * possible flags that govern whether or not they can be reordered or
1753 * eliminated.  That way passes like dead code elimination can still work
1754 * on intrisics without understanding the meaning of each.
1755 *
1756 * Each intrinsic has some number of constant indices, some number of
1757 * variables, and some number of sources.  What these sources, variables,
1758 * and indices mean depends on the intrinsic and is documented with the
1759 * intrinsic declaration in nir_intrinsics.h.  Intrinsics and texture
1760 * instructions are the only types of instruction that can operate on
1761 * variables.
1762 */
1763typedef struct {
1764   nir_instr instr;
1765
1766   nir_intrinsic_op intrinsic;
1767
1768   nir_dest dest;
1769
1770   /** number of components if this is a vectorized intrinsic
1771    *
1772    * Similarly to ALU operations, some intrinsics are vectorized.
1773    * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0.
1774    * For vectorized intrinsics, the num_components field specifies the
1775    * number of destination components and the number of source components
1776    * for all sources with nir_intrinsic_infos.src_components[i] == 0.
1777    */
1778   uint8_t num_components;
1779
1780   int const_index[NIR_INTRINSIC_MAX_CONST_INDEX];
1781
1782   nir_src src[];
1783} nir_intrinsic_instr;
1784
1785static inline nir_variable *
1786nir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i)
1787{
1788   return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i]));
1789}
1790
1791typedef enum {
1792   /* Memory ordering. */
1793   NIR_MEMORY_ACQUIRE        = 1 << 0,
1794   NIR_MEMORY_RELEASE        = 1 << 1,
1795   NIR_MEMORY_ACQ_REL        = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE,
1796
1797   /* Memory visibility operations. */
1798   NIR_MEMORY_MAKE_AVAILABLE = 1 << 2,
1799   NIR_MEMORY_MAKE_VISIBLE   = 1 << 3,
1800} nir_memory_semantics;
1801
1802typedef enum {
1803   NIR_SCOPE_NONE,
1804   NIR_SCOPE_INVOCATION,
1805   NIR_SCOPE_SUBGROUP,
1806   NIR_SCOPE_SHADER_CALL,
1807   NIR_SCOPE_WORKGROUP,
1808   NIR_SCOPE_QUEUE_FAMILY,
1809   NIR_SCOPE_DEVICE,
1810} nir_scope;
1811
1812/**
1813 * \name NIR intrinsics semantic flags
1814 *
1815 * information about what the compiler can do with the intrinsics.
1816 *
1817 * \sa nir_intrinsic_info::flags
1818 */
1819typedef enum {
1820   /**
1821    * whether the intrinsic can be safely eliminated if none of its output
1822    * value is not being used.
1823    */
1824   NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0),
1825
1826   /**
1827    * Whether the intrinsic can be reordered with respect to any other
1828    * intrinsic, i.e. whether the only reordering dependencies of the
1829    * intrinsic are due to the register reads/writes.
1830    */
1831   NIR_INTRINSIC_CAN_REORDER = (1 << 1),
1832} nir_intrinsic_semantic_flag;
1833
1834/**
1835 * Maximum valid value for a nir align_mul value (in intrinsics or derefs).
1836 *
1837 * Offsets can be signed, so this is the largest power of two in int32_t.
1838 */
1839#define NIR_ALIGN_MUL_MAX 0x40000000
1840
1841typedef struct nir_io_semantics {
1842   unsigned location:7; /* gl_vert_attrib, gl_varying_slot, or gl_frag_result */
1843   unsigned num_slots:6;  /* max 32, may be pessimistic with const indexing */
1844   unsigned dual_source_blend_index:1;
1845   unsigned fb_fetch_output:1; /* for GL_KHR_blend_equation_advanced */
1846   unsigned gs_streams:8; /* xxyyzzww: 2-bit stream index for each component */
1847   unsigned medium_precision:1; /* GLSL mediump qualifier */
1848   unsigned per_view:1;
1849   unsigned high_16bits:1; /* whether accessing low or high half of the slot */
1850   unsigned _pad:6;
1851} nir_io_semantics;
1852
1853#define NIR_INTRINSIC_MAX_INPUTS 11
1854
1855typedef struct {
1856   const char *name;
1857
1858   uint8_t num_srcs; /** < number of register/SSA inputs */
1859
1860   /** number of components of each input register
1861    *
1862    * If this value is 0, the number of components is given by the
1863    * num_components field of nir_intrinsic_instr.  If this value is -1, the
1864    * intrinsic consumes however many components are provided and it is not
1865    * validated at all.
1866    */
1867   int8_t src_components[NIR_INTRINSIC_MAX_INPUTS];
1868
1869   bool has_dest;
1870
1871   /** number of components of the output register
1872    *
1873    * If this value is 0, the number of components is given by the
1874    * num_components field of nir_intrinsic_instr.
1875    */
1876   uint8_t dest_components;
1877
1878   /** bitfield of legal bit sizes */
1879   uint8_t dest_bit_sizes;
1880
1881   /** source which the destination bit size must match
1882    *
1883    * Some intrinsics, such as subgroup intrinsics, are data manipulation
1884    * intrinsics and they have similar bit-size rules to ALU ops. This enables
1885    * validation to validate a bit more and enables auto-generated builder code
1886    * to properly determine destination bit sizes automatically.
1887    */
1888   int8_t bit_size_src;
1889
1890   /** the number of constant indices used by the intrinsic */
1891   uint8_t num_indices;
1892
1893   /** list of indices */
1894   uint8_t indices[NIR_INTRINSIC_MAX_CONST_INDEX];
1895
1896   /** indicates the usage of intr->const_index[n] */
1897   uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];
1898
1899   /** semantic flags for calls to this intrinsic */
1900   nir_intrinsic_semantic_flag flags;
1901} nir_intrinsic_info;
1902
1903extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
1904
1905static inline unsigned
1906nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
1907{
1908   const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1909   assert(srcn < info->num_srcs);
1910   if (info->src_components[srcn] > 0)
1911      return info->src_components[srcn];
1912   else if (info->src_components[srcn] == 0)
1913      return intr->num_components;
1914   else
1915      return nir_src_num_components(intr->src[srcn]);
1916}
1917
1918static inline unsigned
1919nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
1920{
1921   const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1922   if (!info->has_dest)
1923      return 0;
1924   else if (info->dest_components)
1925      return info->dest_components;
1926   else
1927      return intr->num_components;
1928}
1929
1930/**
1931 * Helper to copy const_index[] from src to dst, without assuming they
1932 * match in order.
1933 */
1934static inline void
1935nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
1936{
1937   if (src->intrinsic == dst->intrinsic) {
1938      memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
1939      return;
1940   }
1941
1942   const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
1943   const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
1944
1945   for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
1946      if (src_info->index_map[i] == 0)
1947         continue;
1948
1949      /* require that dst instruction also uses the same const_index[]: */
1950      assert(dst_info->index_map[i] > 0);
1951
1952      dst->const_index[dst_info->index_map[i] - 1] =
1953            src->const_index[src_info->index_map[i] - 1];
1954   }
1955}
1956
1957#include "nir_intrinsics_indices.h"
1958
1959static inline void
1960nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
1961                        unsigned align_mul, unsigned align_offset)
1962{
1963   assert(util_is_power_of_two_nonzero(align_mul));
1964   assert(align_offset < align_mul);
1965   nir_intrinsic_set_align_mul(intrin, align_mul);
1966   nir_intrinsic_set_align_offset(intrin, align_offset);
1967}
1968
1969/** Returns a simple alignment for a load/store intrinsic offset
1970 *
1971 * Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL
1972 * and ALIGN_OFFSET parameters, this helper takes both into account and
1973 * provides a single simple alignment parameter.  The offset X is guaranteed
1974 * to satisfy X % align == 0.
1975 */
1976static inline unsigned
1977nir_intrinsic_align(const nir_intrinsic_instr *intrin)
1978{
1979   const unsigned align_mul = nir_intrinsic_align_mul(intrin);
1980   const unsigned align_offset = nir_intrinsic_align_offset(intrin);
1981   assert(align_offset < align_mul);
1982   return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
1983}
1984
1985static inline bool
1986nir_intrinsic_has_align(const nir_intrinsic_instr *intrin)
1987{
1988   return nir_intrinsic_has_align_mul(intrin) &&
1989          nir_intrinsic_has_align_offset(intrin);
1990}
1991
1992unsigned
1993nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr);
1994
1995/* Converts a image_deref_* intrinsic into a image_* one */
1996void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr,
1997                                 nir_ssa_def *handle, bool bindless);
1998
1999/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */
2000static inline bool
2001nir_intrinsic_can_reorder(nir_intrinsic_instr *instr)
2002{
2003   if (instr->intrinsic == nir_intrinsic_load_deref) {
2004      nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
2005      return nir_deref_mode_is_in_set(deref, nir_var_read_only_modes) ||
2006             (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER);
2007   } else if (instr->intrinsic == nir_intrinsic_load_ssbo ||
2008              instr->intrinsic == nir_intrinsic_bindless_image_load ||
2009              instr->intrinsic == nir_intrinsic_image_deref_load ||
2010              instr->intrinsic == nir_intrinsic_image_load) {
2011      return nir_intrinsic_access(instr) & ACCESS_CAN_REORDER;
2012   } else {
2013      const nir_intrinsic_info *info =
2014         &nir_intrinsic_infos[instr->intrinsic];
2015      return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
2016             (info->flags & NIR_INTRINSIC_CAN_REORDER);
2017   }
2018}
2019
2020bool nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr);
2021
2022/** Texture instruction source type */
2023typedef enum {
2024   /** Texture coordinate
2025    *
2026    * Must have nir_tex_instr::coord_components components.
2027    */
2028   nir_tex_src_coord,
2029
2030   /** Projector
2031    *
2032    * The texture coordinate (except for the array component, if any) is
2033    * divided by this value before LOD computation and sampling.
2034    *
2035    * Must be a float scalar.
2036    */
2037   nir_tex_src_projector,
2038
2039   /** Shadow comparator
2040    *
2041    * For shadow sampling, the fetched texel values are compared against the
2042    * shadow comparator using the compare op specified by the sampler object
2043    * and converted to 1.0 if the comparison succeeds and 0.0 if it fails.
2044    * Interpolation happens after this conversion so the actual result may be
2045    * anywhere in the range [0.0, 1.0].
2046    *
2047    * Only valid if nir_tex_instr::is_shadow and must be a float scalar.
2048    */
2049   nir_tex_src_comparator,
2050
2051   /** Coordinate offset
2052    *
2053    * An integer value that is added to the texel address before sampling.
2054    * This is only allowed with operations that take an explicit LOD as it is
2055    * applied in integer texel space after LOD selection and not normalized
2056    * coordinate space.
2057    */
2058   nir_tex_src_offset,
2059
2060   /** LOD bias
2061    *
2062    * This value is added to the computed LOD before mip-mapping.
2063    */
2064   nir_tex_src_bias,
2065
2066   /** Explicit LOD */
2067   nir_tex_src_lod,
2068
2069   /** Min LOD
2070    *
2071    * The computed LOD is clamped to be at least as large as min_lod before
2072    * mip-mapping.
2073    */
2074   nir_tex_src_min_lod,
2075
2076   /** MSAA sample index */
2077   nir_tex_src_ms_index,
2078
2079   /** Intel-specific MSAA compression data */
2080   nir_tex_src_ms_mcs_intel,
2081
2082   /** Explicit horizontal (X-major) coordinate derivative */
2083   nir_tex_src_ddx,
2084
2085   /** Explicit vertical (Y-major) coordinate derivative */
2086   nir_tex_src_ddy,
2087
2088   /** Texture variable dereference */
2089   nir_tex_src_texture_deref,
2090
2091   /** Sampler variable dereference */
2092   nir_tex_src_sampler_deref,
2093
2094   /** Texture index offset
2095    *
2096    * This is added to nir_tex_instr::texture_index.  Unless
2097    * nir_tex_instr::texture_non_uniform is set, this is guaranteed to be
2098    * dynamically uniform.
2099    */
2100   nir_tex_src_texture_offset,
2101
2102   /** Dynamically uniform sampler index offset
2103    *
2104    * This is added to nir_tex_instr::sampler_index.  Unless
2105    * nir_tex_instr::sampler_non_uniform is set, this is guaranteed to be
2106    * dynamically uniform.
2107    */
2108   nir_tex_src_sampler_offset,
2109
2110   /** Bindless texture handle
2111    *
2112    * This is, unfortunately, a bit overloaded at the moment.  There are
2113    * generally two types of bindless handles:
2114    *
2115    *  1. For GL_ARB_bindless bindless handles. These are part of the
2116    *     GL/Gallium-level API and are always a 64-bit integer.
2117    *
2118    *  2. HW-specific handles.  GL_ARB_bindless handles may be lowered to
2119    *     these.  Also, these are used by many Vulkan drivers to implement
2120    *     descriptor sets, especially for UPDATE_AFTER_BIND descriptors.
2121    *     The details of hardware handles (bit size, format, etc.) is
2122    *     HW-specific.
2123    *
2124    * Because of this overloading and the resulting ambiguity, we currently
2125    * don't validate anything for these.
2126    */
2127   nir_tex_src_texture_handle,
2128
2129   /** Bindless sampler handle
2130    *
2131    * See nir_tex_src_texture_handle,
2132    */
2133   nir_tex_src_sampler_handle,
2134
2135   /** Plane index for multi-plane YCbCr textures */
2136   nir_tex_src_plane,
2137
2138   /**
2139    * Backend-specific vec4 tex src argument.
2140    *
2141    * Can be used to have NIR optimization (copy propagation, lower_vec_to_movs)
2142    * apply to the packing of the tex srcs.  This lowering must only happen
2143    * after nir_lower_tex().
2144    *
2145    * The nir_tex_instr_src_type() of this argument is float, so no lowering
2146    * will happen if nir_lower_int_to_float is used.
2147    */
2148   nir_tex_src_backend1,
2149
2150   /** Second backend-specific vec4 tex src argument, see nir_tex_src_backend1. */
2151   nir_tex_src_backend2,
2152
2153   nir_num_tex_src_types
2154} nir_tex_src_type;
2155
2156/** A texture instruction source */
2157typedef struct {
2158   /** Base source */
2159   nir_src src;
2160
2161   /** Type of this source */
2162   nir_tex_src_type src_type;
2163} nir_tex_src;
2164
2165/** Texture instruction opcode */
2166typedef enum {
2167   nir_texop_tex,                /**< Regular texture look-up */
2168   nir_texop_txb,                /**< Texture look-up with LOD bias */
2169   nir_texop_txl,                /**< Texture look-up with explicit LOD */
2170   nir_texop_txd,                /**< Texture look-up with partial derivatives */
2171   nir_texop_txf,                /**< Texel fetch with explicit LOD */
2172   nir_texop_txf_ms,             /**< Multisample texture fetch */
2173   nir_texop_txf_ms_fb,          /**< Multisample texture fetch from framebuffer */
2174   nir_texop_txf_ms_mcs_intel,   /**< Multisample compression value fetch */
2175   nir_texop_txs,                /**< Texture size */
2176   nir_texop_lod,                /**< Texture lod query */
2177   nir_texop_tg4,                /**< Texture gather */
2178   nir_texop_query_levels,       /**< Texture levels query */
2179   nir_texop_texture_samples,    /**< Texture samples query */
2180   nir_texop_samples_identical,  /**< Query whether all samples are definitely
2181                                  * identical.
2182                                  */
2183   nir_texop_tex_prefetch,       /**< Regular texture look-up, eligible for pre-dispatch */
2184   nir_texop_fragment_fetch_amd,      /**< Multisample fragment color texture fetch */
2185   nir_texop_fragment_mask_fetch_amd, /**< Multisample fragment mask texture fetch */
2186} nir_texop;
2187
2188/** Represents a texture instruction */
2189typedef struct {
2190   /** Base instruction */
2191   nir_instr instr;
2192
2193   /** Dimensionality of the texture operation
2194    *
2195    * This will typically match the dimensionality of the texture deref type
2196    * if a nir_tex_src_texture_deref is present.  However, it may not if
2197    * texture lowering has occurred.
2198    */
2199   enum glsl_sampler_dim sampler_dim;
2200
2201   /** ALU type of the destination
2202    *
2203    * This is the canonical sampled type for this texture operation and may
2204    * not exactly match the sampled type of the deref type when a
2205    * nir_tex_src_texture_deref is present.  For OpenCL, the sampled type of
2206    * the texture deref will be GLSL_TYPE_VOID and this is allowed to be
2207    * anything.  With SPIR-V, the signedness of integer types is allowed to
2208    * differ.  For all APIs, the bit size may differ if the driver has done
2209    * any sort of mediump or similar lowering since texture types always have
2210    * 32-bit sampled types.
2211    */
2212   nir_alu_type dest_type;
2213
2214   /** Texture opcode */
2215   nir_texop op;
2216
2217   /** Destination */
2218   nir_dest dest;
2219
2220   /** Array of sources
2221    *
2222    * This array has nir_tex_instr::num_srcs elements
2223    */
2224   nir_tex_src *src;
2225
2226   /** Number of sources */
2227   unsigned num_srcs;
2228
2229   /** Number of components in the coordinate, if any */
2230   unsigned coord_components;
2231
2232   /** True if the texture instruction acts on an array texture */
2233   bool is_array;
2234
2235   /** True if the texture instruction performs a shadow comparison
2236    *
2237    * If this is true, the texture instruction must have a
2238    * nir_tex_src_comparator.
2239    */
2240   bool is_shadow;
2241
2242   /**
2243    * If is_shadow is true, whether this is the old-style shadow that outputs
2244    * 4 components or the new-style shadow that outputs 1 component.
2245    */
2246   bool is_new_style_shadow;
2247
2248   /**
2249    * True if this texture instruction should return a sparse residency code.
2250    * The code is in the last component of the result.
2251    */
2252   bool is_sparse;
2253
2254   /** nir_texop_tg4 component selector
2255    *
2256    * This determines which RGBA component is gathered.
2257    */
2258   unsigned component : 2;
2259
2260   /** Validation needs to know this for gradient component count */
2261   unsigned array_is_lowered_cube : 1;
2262
2263   /** Gather offsets */
2264   int8_t tg4_offsets[4][2];
2265
2266   /** True if the texture index or handle is not dynamically uniform */
2267   bool texture_non_uniform;
2268
2269   /** True if the sampler index or handle is not dynamically uniform */
2270   bool sampler_non_uniform;
2271
2272   /** The texture index
2273    *
2274    * If this texture instruction has a nir_tex_src_texture_offset source,
2275    * then the texture index is given by texture_index + texture_offset.
2276    */
2277   unsigned texture_index;
2278
2279   /** The sampler index
2280    *
2281    * The following operations do not require a sampler and, as such, this
2282    * field should be ignored:
2283    *    - nir_texop_txf
2284    *    - nir_texop_txf_ms
2285    *    - nir_texop_txs
2286    *    - nir_texop_query_levels
2287    *    - nir_texop_texture_samples
2288    *    - nir_texop_samples_identical
2289    *
2290    * If this texture instruction has a nir_tex_src_sampler_offset source,
2291    * then the sampler index is given by sampler_index + sampler_offset.
2292    */
2293   unsigned sampler_index;
2294} nir_tex_instr;
2295
2296/**
2297 * Returns true if the texture operation requires a sampler as a general rule
2298 *
2299 * Note that the specific hw/driver backend could require to a sampler
2300 * object/configuration packet in any case, for some other reason.
2301 *
2302 * @see nir_tex_instr::sampler_index.
2303 */
2304static inline bool
2305nir_tex_instr_need_sampler(const nir_tex_instr *instr)
2306{
2307   switch (instr->op) {
2308   case nir_texop_txf:
2309   case nir_texop_txf_ms:
2310   case nir_texop_txs:
2311   case nir_texop_query_levels:
2312   case nir_texop_texture_samples:
2313   case nir_texop_samples_identical:
2314      return false;
2315   default:
2316      return true;
2317   }
2318}
2319
2320/** Returns the number of components returned by this nir_tex_instr
2321 *
2322 * Useful for code building texture instructions when you don't want to think
2323 * about how many components a particular texture op returns.  This does not
2324 * include the sparse residency code.
2325 */
2326static inline unsigned
2327nir_tex_instr_result_size(const nir_tex_instr *instr)
2328{
2329   switch (instr->op) {
2330   case nir_texop_txs: {
2331      unsigned ret;
2332      switch (instr->sampler_dim) {
2333         case GLSL_SAMPLER_DIM_1D:
2334         case GLSL_SAMPLER_DIM_BUF:
2335            ret = 1;
2336            break;
2337         case GLSL_SAMPLER_DIM_2D:
2338         case GLSL_SAMPLER_DIM_CUBE:
2339         case GLSL_SAMPLER_DIM_MS:
2340         case GLSL_SAMPLER_DIM_RECT:
2341         case GLSL_SAMPLER_DIM_EXTERNAL:
2342         case GLSL_SAMPLER_DIM_SUBPASS:
2343            ret = 2;
2344            break;
2345         case GLSL_SAMPLER_DIM_3D:
2346            ret = 3;
2347            break;
2348         default:
2349            unreachable("not reached");
2350      }
2351      if (instr->is_array)
2352         ret++;
2353      return ret;
2354   }
2355
2356   case nir_texop_lod:
2357      return 2;
2358
2359   case nir_texop_texture_samples:
2360   case nir_texop_query_levels:
2361   case nir_texop_samples_identical:
2362   case nir_texop_fragment_mask_fetch_amd:
2363      return 1;
2364
2365   default:
2366      if (instr->is_shadow && instr->is_new_style_shadow)
2367         return 1;
2368
2369      return 4;
2370   }
2371}
2372
2373/**
2374 * Returns the destination size of this nir_tex_instr including the sparse
2375 * residency code, if any.
2376 */
2377static inline unsigned
2378nir_tex_instr_dest_size(const nir_tex_instr *instr)
2379{
2380   /* One more component is needed for the residency code. */
2381   return nir_tex_instr_result_size(instr) + instr->is_sparse;
2382}
2383
2384/**
2385 * Returns true if this texture operation queries something about the texture
2386 * rather than actually sampling it.
2387 */
2388static inline bool
2389nir_tex_instr_is_query(const nir_tex_instr *instr)
2390{
2391   switch (instr->op) {
2392   case nir_texop_txs:
2393   case nir_texop_lod:
2394   case nir_texop_texture_samples:
2395   case nir_texop_query_levels:
2396      return true;
2397   case nir_texop_tex:
2398   case nir_texop_txb:
2399   case nir_texop_txl:
2400   case nir_texop_txd:
2401   case nir_texop_txf:
2402   case nir_texop_txf_ms:
2403   case nir_texop_txf_ms_fb:
2404   case nir_texop_txf_ms_mcs_intel:
2405   case nir_texop_tg4:
2406      return false;
2407   default:
2408      unreachable("Invalid texture opcode");
2409   }
2410}
2411
2412/** Returns true if this texture instruction does implicit derivatives
2413 *
2414 * This is important as there are extra control-flow rules around derivatives
2415 * and texture instructions which perform them implicitly.
2416 */
2417static inline bool
2418nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
2419{
2420   switch (instr->op) {
2421   case nir_texop_tex:
2422   case nir_texop_txb:
2423   case nir_texop_lod:
2424      return true;
2425   default:
2426      return false;
2427   }
2428}
2429
2430/** Returns the ALU type of the given texture instruction source */
2431static inline nir_alu_type
2432nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
2433{
2434   switch (instr->src[src].src_type) {
2435   case nir_tex_src_coord:
2436      switch (instr->op) {
2437      case nir_texop_txf:
2438      case nir_texop_txf_ms:
2439      case nir_texop_txf_ms_fb:
2440      case nir_texop_txf_ms_mcs_intel:
2441      case nir_texop_samples_identical:
2442         return nir_type_int;
2443
2444      default:
2445         return nir_type_float;
2446      }
2447
2448   case nir_tex_src_lod:
2449      switch (instr->op) {
2450      case nir_texop_txs:
2451      case nir_texop_txf:
2452      case nir_texop_txf_ms:
2453         return nir_type_int;
2454
2455      default:
2456         return nir_type_float;
2457      }
2458
2459   case nir_tex_src_projector:
2460   case nir_tex_src_comparator:
2461   case nir_tex_src_bias:
2462   case nir_tex_src_min_lod:
2463   case nir_tex_src_ddx:
2464   case nir_tex_src_ddy:
2465   case nir_tex_src_backend1:
2466   case nir_tex_src_backend2:
2467      return nir_type_float;
2468
2469   case nir_tex_src_offset:
2470   case nir_tex_src_ms_index:
2471   case nir_tex_src_plane:
2472      return nir_type_int;
2473
2474   case nir_tex_src_ms_mcs_intel:
2475   case nir_tex_src_texture_deref:
2476   case nir_tex_src_sampler_deref:
2477   case nir_tex_src_texture_offset:
2478   case nir_tex_src_sampler_offset:
2479   case nir_tex_src_texture_handle:
2480   case nir_tex_src_sampler_handle:
2481      return nir_type_uint;
2482
2483   case nir_num_tex_src_types:
2484      unreachable("nir_num_tex_src_types is not a valid source type");
2485   }
2486
2487   unreachable("Invalid texture source type");
2488}
2489
2490/**
2491 * Returns the number of components required by the given texture instruction
2492 * source
2493 */
2494static inline unsigned
2495nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
2496{
2497   if (instr->src[src].src_type == nir_tex_src_coord)
2498      return instr->coord_components;
2499
2500   /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs_intel */
2501   if (instr->src[src].src_type == nir_tex_src_ms_mcs_intel)
2502      return 4;
2503
2504   if (instr->src[src].src_type == nir_tex_src_ddx ||
2505       instr->src[src].src_type == nir_tex_src_ddy) {
2506
2507      if (instr->is_array && !instr->array_is_lowered_cube)
2508         return instr->coord_components - 1;
2509      else
2510         return instr->coord_components;
2511   }
2512
2513   /* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for
2514    * the offset, since a cube maps to a single face.
2515    */
2516   if (instr->src[src].src_type == nir_tex_src_offset) {
2517      if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
2518         return 2;
2519      else if (instr->is_array)
2520         return instr->coord_components - 1;
2521      else
2522         return instr->coord_components;
2523   }
2524
2525   if (instr->src[src].src_type == nir_tex_src_backend1 ||
2526       instr->src[src].src_type == nir_tex_src_backend2)
2527      return nir_src_num_components(instr->src[src].src);
2528
2529   return 1;
2530}
2531
2532/**
2533 * Returns the index of the texture instruction source with the given
2534 * nir_tex_src_type or -1 if no such source exists.
2535 */
2536static inline int
2537nir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type)
2538{
2539   for (unsigned i = 0; i < instr->num_srcs; i++)
2540      if (instr->src[i].src_type == type)
2541         return (int) i;
2542
2543   return -1;
2544}
2545
2546/** Adds a source to a texture instruction */
2547void nir_tex_instr_add_src(nir_tex_instr *tex,
2548                           nir_tex_src_type src_type,
2549                           nir_src src);
2550
2551/** Removes a source from a texture instruction */
2552void nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx);
2553
2554bool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex);
2555
2556typedef struct {
2557   nir_instr instr;
2558
2559   nir_ssa_def def;
2560
2561   nir_const_value value[];
2562} nir_load_const_instr;
2563
2564typedef enum {
2565   /** Return from a function
2566    *
2567    * This instruction is a classic function return.  It jumps to
2568    * nir_function_impl::end_block.  No return value is provided in this
2569    * instruction.  Instead, the function is expected to write any return
2570    * data to a deref passed in from the caller.
2571    */
2572   nir_jump_return,
2573
2574   /** Immediately exit the current shader
2575    *
2576    * This instruction is roughly the equivalent of C's "exit()" in that it
2577    * immediately terminates the current shader invocation.  From a CFG
2578    * perspective, it looks like a jump to nir_function_impl::end_block but
2579    * it actually jumps to the end block of the shader entrypoint.  A halt
2580    * instruction in the shader entrypoint itself is semantically identical
2581    * to a return.
2582    *
2583    * For shaders with built-in I/O, any outputs written prior to a halt
2584    * instruction remain written and any outputs not written prior to the
2585    * halt have undefined values.  It does NOT cause an implicit discard of
2586    * written results.  If one wants discard results in a fragment shader,
2587    * for instance, a discard or demote intrinsic is required.
2588    */
2589   nir_jump_halt,
2590
2591   /** Break out of the inner-most loop
2592    *
2593    * This has the same semantics as C's "break" statement.
2594    */
2595   nir_jump_break,
2596
2597   /** Jump back to the top of the inner-most loop
2598    *
2599    * This has the same semantics as C's "continue" statement assuming that a
2600    * NIR loop is implemented as "while (1) { body }".
2601    */
2602   nir_jump_continue,
2603
2604   /** Jumps for unstructured CFG.
2605    *
2606    * As within an unstructured CFG we can't rely on block ordering we need to
2607    * place explicit jumps at the end of every block.
2608    */
2609   nir_jump_goto,
2610   nir_jump_goto_if,
2611} nir_jump_type;
2612
2613typedef struct {
2614   nir_instr instr;
2615   nir_jump_type type;
2616   nir_src condition;
2617   struct nir_block *target;
2618   struct nir_block *else_target;
2619} nir_jump_instr;
2620
2621/* creates a new SSA variable in an undefined state */
2622
2623typedef struct {
2624   nir_instr instr;
2625   nir_ssa_def def;
2626} nir_ssa_undef_instr;
2627
2628typedef struct {
2629   struct exec_node node;
2630
2631   /* The predecessor block corresponding to this source */
2632   struct nir_block *pred;
2633
2634   nir_src src;
2635} nir_phi_src;
2636
2637#define nir_foreach_phi_src(phi_src, phi) \
2638   foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs)
2639#define nir_foreach_phi_src_safe(phi_src, phi) \
2640   foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs)
2641
2642typedef struct {
2643   nir_instr instr;
2644
2645   struct exec_list srcs; /** < list of nir_phi_src */
2646
2647   nir_dest dest;
2648} nir_phi_instr;
2649
2650static inline nir_phi_src *
2651nir_phi_get_src_from_block(nir_phi_instr *phi, struct nir_block *block)
2652{
2653   nir_foreach_phi_src(src, phi) {
2654      if (src->pred == block)
2655         return src;
2656   }
2657
2658   assert(!"Block is not a predecessor of phi.");
2659   return NULL;
2660}
2661
2662typedef struct {
2663   struct exec_node node;
2664   nir_src src;
2665   nir_dest dest;
2666} nir_parallel_copy_entry;
2667
2668#define nir_foreach_parallel_copy_entry(entry, pcopy) \
2669   foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries)
2670
2671typedef struct {
2672   nir_instr instr;
2673
2674   /* A list of nir_parallel_copy_entrys.  The sources of all of the
2675    * entries are copied to the corresponding destinations "in parallel".
2676    * In other words, if we have two entries: a -> b and b -> a, the values
2677    * get swapped.
2678    */
2679   struct exec_list entries;
2680} nir_parallel_copy_instr;
2681
2682NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr,
2683                type, nir_instr_type_alu)
2684NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr,
2685                type, nir_instr_type_deref)
2686NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr,
2687                type, nir_instr_type_call)
2688NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr,
2689                type, nir_instr_type_jump)
2690NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr,
2691                type, nir_instr_type_tex)
2692NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr,
2693                type, nir_instr_type_intrinsic)
2694NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr,
2695                type, nir_instr_type_load_const)
2696NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr,
2697                type, nir_instr_type_ssa_undef)
2698NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr,
2699                type, nir_instr_type_phi)
2700NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
2701                nir_parallel_copy_instr, instr,
2702                type, nir_instr_type_parallel_copy)
2703
2704
2705#define NIR_DEFINE_SRC_AS_CONST(type, suffix)               \
2706static inline type                                          \
2707nir_src_comp_as_##suffix(nir_src src, unsigned comp)        \
2708{                                                           \
2709   assert(nir_src_is_const(src));                           \
2710   nir_load_const_instr *load =                             \
2711      nir_instr_as_load_const(src.ssa->parent_instr);       \
2712   assert(comp < load->def.num_components);                 \
2713   return nir_const_value_as_##suffix(load->value[comp],    \
2714                                      load->def.bit_size);  \
2715}                                                           \
2716                                                            \
2717static inline type                                          \
2718nir_src_as_##suffix(nir_src src)                            \
2719{                                                           \
2720   assert(nir_src_num_components(src) == 1);                \
2721   return nir_src_comp_as_##suffix(src, 0);                 \
2722}
2723
2724NIR_DEFINE_SRC_AS_CONST(int64_t,    int)
2725NIR_DEFINE_SRC_AS_CONST(uint64_t,   uint)
2726NIR_DEFINE_SRC_AS_CONST(bool,       bool)
2727NIR_DEFINE_SRC_AS_CONST(double,     float)
2728
2729#undef NIR_DEFINE_SRC_AS_CONST
2730
2731
2732typedef struct {
2733   nir_ssa_def *def;
2734   unsigned comp;
2735} nir_ssa_scalar;
2736
2737static inline bool
2738nir_ssa_scalar_is_const(nir_ssa_scalar s)
2739{
2740   return s.def->parent_instr->type == nir_instr_type_load_const;
2741}
2742
2743static inline nir_const_value
2744nir_ssa_scalar_as_const_value(nir_ssa_scalar s)
2745{
2746   assert(s.comp < s.def->num_components);
2747   nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr);
2748   return load->value[s.comp];
2749}
2750
2751#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix)                     \
2752static inline type                                                   \
2753nir_ssa_scalar_as_##suffix(nir_ssa_scalar s)                         \
2754{                                                                    \
2755   return nir_const_value_as_##suffix(                               \
2756      nir_ssa_scalar_as_const_value(s), s.def->bit_size);            \
2757}
2758
2759NIR_DEFINE_SCALAR_AS_CONST(int64_t,    int)
2760NIR_DEFINE_SCALAR_AS_CONST(uint64_t,   uint)
2761NIR_DEFINE_SCALAR_AS_CONST(bool,       bool)
2762NIR_DEFINE_SCALAR_AS_CONST(double,     float)
2763
2764#undef NIR_DEFINE_SCALAR_AS_CONST
2765
2766static inline bool
2767nir_ssa_scalar_is_alu(nir_ssa_scalar s)
2768{
2769   return s.def->parent_instr->type == nir_instr_type_alu;
2770}
2771
2772static inline nir_op
2773nir_ssa_scalar_alu_op(nir_ssa_scalar s)
2774{
2775   return nir_instr_as_alu(s.def->parent_instr)->op;
2776}
2777
2778static inline nir_ssa_scalar
2779nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx)
2780{
2781   nir_ssa_scalar out = { NULL, 0 };
2782
2783   nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2784   assert(alu_src_idx < nir_op_infos[alu->op].num_inputs);
2785
2786   /* Our component must be written */
2787   assert(s.comp < s.def->num_components);
2788   assert(alu->dest.write_mask & (1u << s.comp));
2789
2790   assert(alu->src[alu_src_idx].src.is_ssa);
2791   out.def = alu->src[alu_src_idx].src.ssa;
2792
2793   if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) {
2794      /* The ALU src is unsized so the source component follows the
2795       * destination component.
2796       */
2797      out.comp = alu->src[alu_src_idx].swizzle[s.comp];
2798   } else {
2799      /* This is a sized source so all source components work together to
2800       * produce all the destination components.  Since we need to return a
2801       * scalar, this only works if the source is a scalar.
2802       */
2803      assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1);
2804      out.comp = alu->src[alu_src_idx].swizzle[0];
2805   }
2806   assert(out.comp < out.def->num_components);
2807
2808   return out;
2809}
2810
2811nir_ssa_scalar nir_ssa_scalar_chase_movs(nir_ssa_scalar s);
2812
2813/** Returns a nir_ssa_scalar where we've followed the bit-exact mov/vec use chain to the original definition */
2814static inline nir_ssa_scalar
2815nir_ssa_scalar_resolved(nir_ssa_def *def, unsigned channel)
2816{
2817   nir_ssa_scalar s = { def, channel };
2818   return nir_ssa_scalar_chase_movs(s);
2819}
2820
2821
2822typedef struct {
2823   bool success;
2824
2825   nir_variable *var;
2826   unsigned desc_set;
2827   unsigned binding;
2828   unsigned num_indices;
2829   nir_src indices[4];
2830   bool read_first_invocation;
2831} nir_binding;
2832
2833nir_binding nir_chase_binding(nir_src rsrc);
2834nir_variable *nir_get_binding_variable(struct nir_shader *shader, nir_binding binding);
2835
2836
2837/*
2838 * Control flow
2839 *
2840 * Control flow consists of a tree of control flow nodes, which include
2841 * if-statements and loops. The leaves of the tree are basic blocks, lists of
2842 * instructions that always run start-to-finish. Each basic block also keeps
2843 * track of its successors (blocks which may run immediately after the current
2844 * block) and predecessors (blocks which could have run immediately before the
2845 * current block). Each function also has a start block and an end block which
2846 * all return statements point to (which is always empty). Together, all the
2847 * blocks with their predecessors and successors make up the control flow
2848 * graph (CFG) of the function. There are helpers that modify the tree of
2849 * control flow nodes while modifying the CFG appropriately; these should be
2850 * used instead of modifying the tree directly.
2851 */
2852
2853typedef enum {
2854   nir_cf_node_block,
2855   nir_cf_node_if,
2856   nir_cf_node_loop,
2857   nir_cf_node_function
2858} nir_cf_node_type;
2859
2860typedef struct nir_cf_node {
2861   struct exec_node node;
2862   nir_cf_node_type type;
2863   struct nir_cf_node *parent;
2864} nir_cf_node;
2865
2866typedef struct nir_block {
2867   nir_cf_node cf_node;
2868
2869   struct exec_list instr_list; /** < list of nir_instr */
2870
2871   /** generic block index; generated by nir_index_blocks */
2872   unsigned index;
2873
2874   /*
2875    * Each block can only have up to 2 successors, so we put them in a simple
2876    * array - no need for anything more complicated.
2877    */
2878   struct nir_block *successors[2];
2879
2880   /* Set of nir_block predecessors in the CFG */
2881   struct set *predecessors;
2882
2883   /*
2884    * this node's immediate dominator in the dominance tree - set to NULL for
2885    * the start block.
2886    */
2887   struct nir_block *imm_dom;
2888
2889   /* This node's children in the dominance tree */
2890   unsigned num_dom_children;
2891   struct nir_block **dom_children;
2892
2893   /* Set of nir_blocks on the dominance frontier of this block */
2894   struct set *dom_frontier;
2895
2896   /*
2897    * These two indices have the property that dom_{pre,post}_index for each
2898    * child of this block in the dominance tree will always be between
2899    * dom_pre_index and dom_post_index for this block, which makes testing if
2900    * a given block is dominated by another block an O(1) operation.
2901    */
2902   uint32_t dom_pre_index, dom_post_index;
2903
2904   /**
2905    * Value just before the first nir_instr->index in the block, but after
2906    * end_ip that of any predecessor block.
2907    */
2908   uint32_t start_ip;
2909   /**
2910    * Value just after the last nir_instr->index in the block, but before the
2911    * start_ip of any successor block.
2912    */
2913   uint32_t end_ip;
2914
2915   /* SSA def live in and out for this block; used for liveness analysis.
2916    * Indexed by ssa_def->index
2917    */
2918   BITSET_WORD *live_in;
2919   BITSET_WORD *live_out;
2920} nir_block;
2921
2922static inline bool
2923nir_block_is_reachable(nir_block *b)
2924{
2925   /* See also nir_block_dominates */
2926   return b->dom_post_index != 0;
2927}
2928
2929static inline nir_instr *
2930nir_block_first_instr(nir_block *block)
2931{
2932   struct exec_node *head = exec_list_get_head(&block->instr_list);
2933   return exec_node_data(nir_instr, head, node);
2934}
2935
2936static inline nir_instr *
2937nir_block_last_instr(nir_block *block)
2938{
2939   struct exec_node *tail = exec_list_get_tail(&block->instr_list);
2940   return exec_node_data(nir_instr, tail, node);
2941}
2942
2943static inline bool
2944nir_block_ends_in_jump(nir_block *block)
2945{
2946   return !exec_list_is_empty(&block->instr_list) &&
2947          nir_block_last_instr(block)->type == nir_instr_type_jump;
2948}
2949
2950static inline bool
2951nir_block_ends_in_return_or_halt(nir_block *block)
2952{
2953   if (exec_list_is_empty(&block->instr_list))
2954      return false;
2955
2956   nir_instr *instr = nir_block_last_instr(block);
2957   if (instr->type != nir_instr_type_jump)
2958      return false;
2959
2960   nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
2961   return jump_instr->type == nir_jump_return ||
2962          jump_instr->type == nir_jump_halt;
2963}
2964
2965static inline bool
2966nir_block_ends_in_break(nir_block *block)
2967{
2968   if (exec_list_is_empty(&block->instr_list))
2969      return false;
2970
2971   nir_instr *instr = nir_block_last_instr(block);
2972   return instr->type == nir_instr_type_jump &&
2973      nir_instr_as_jump(instr)->type == nir_jump_break;
2974}
2975
2976#define nir_foreach_instr(instr, block) \
2977   foreach_list_typed(nir_instr, instr, node, &(block)->instr_list)
2978#define nir_foreach_instr_reverse(instr, block) \
2979   foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list)
2980#define nir_foreach_instr_safe(instr, block) \
2981   foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list)
2982#define nir_foreach_instr_reverse_safe(instr, block) \
2983   foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list)
2984
2985static inline nir_phi_instr *
2986nir_block_last_phi_instr(nir_block *block)
2987{
2988   nir_phi_instr *last_phi = NULL;
2989   nir_foreach_instr(instr, block) {
2990      if (instr->type == nir_instr_type_phi)
2991         last_phi = nir_instr_as_phi(instr);
2992      else
2993         return last_phi;
2994   }
2995   return last_phi;
2996}
2997
2998typedef enum {
2999   nir_selection_control_none = 0x0,
3000   nir_selection_control_flatten = 0x1,
3001   nir_selection_control_dont_flatten = 0x2,
3002} nir_selection_control;
3003
3004typedef struct nir_if {
3005   nir_cf_node cf_node;
3006   nir_src condition;
3007   nir_selection_control control;
3008
3009   struct exec_list then_list; /** < list of nir_cf_node */
3010   struct exec_list else_list; /** < list of nir_cf_node */
3011} nir_if;
3012
3013typedef struct {
3014   nir_if *nif;
3015
3016   /** Instruction that generates nif::condition. */
3017   nir_instr *conditional_instr;
3018
3019   /** Block within ::nif that has the break instruction. */
3020   nir_block *break_block;
3021
3022   /** Last block for the then- or else-path that does not contain the break. */
3023   nir_block *continue_from_block;
3024
3025   /** True when ::break_block is in the else-path of ::nif. */
3026   bool continue_from_then;
3027   bool induction_rhs;
3028
3029   /* This is true if the terminators exact trip count is unknown. For
3030    * example:
3031    *
3032    *    for (int i = 0; i < imin(x, 4); i++)
3033    *       ...
3034    *
3035    * Here loop analysis would have set a max_trip_count of 4 however we dont
3036    * know for sure that this is the exact trip count.
3037    */
3038   bool exact_trip_count_unknown;
3039
3040   struct list_head loop_terminator_link;
3041} nir_loop_terminator;
3042
3043typedef struct {
3044   /* Induction variable. */
3045   nir_ssa_def *def;
3046
3047   /* Init statement with only uniform. */
3048   nir_src *init_src;
3049
3050   /* Update statement with only uniform. */
3051   nir_alu_src *update_src;
3052} nir_loop_induction_variable;
3053
3054typedef struct {
3055   /* Estimated cost (in number of instructions) of the loop */
3056   unsigned instr_cost;
3057
3058   /* Guessed trip count based on array indexing */
3059   unsigned guessed_trip_count;
3060
3061   /* Maximum number of times the loop is run (if known) */
3062   unsigned max_trip_count;
3063
3064   /* Do we know the exact number of times the loop will be run */
3065   bool exact_trip_count_known;
3066
3067   /* Unroll the loop regardless of its size */
3068   bool force_unroll;
3069
3070   /* Does the loop contain complex loop terminators, continues or other
3071    * complex behaviours? If this is true we can't rely on
3072    * loop_terminator_list to be complete or accurate.
3073    */
3074   bool complex_loop;
3075
3076   nir_loop_terminator *limiting_terminator;
3077
3078   /* A list of loop_terminators terminating this loop. */
3079   struct list_head loop_terminator_list;
3080
3081   /* array of induction variables for this loop */
3082   nir_loop_induction_variable *induction_vars;
3083   unsigned num_induction_vars;
3084} nir_loop_info;
3085
3086typedef enum {
3087   nir_loop_control_none = 0x0,
3088   nir_loop_control_unroll = 0x1,
3089   nir_loop_control_dont_unroll = 0x2,
3090} nir_loop_control;
3091
3092typedef struct {
3093   nir_cf_node cf_node;
3094
3095   struct exec_list body; /** < list of nir_cf_node */
3096
3097   nir_loop_info *info;
3098   nir_loop_control control;
3099   bool partially_unrolled;
3100   bool divergent;
3101} nir_loop;
3102
3103/**
3104 * Various bits of metadata that can may be created or required by
3105 * optimization and analysis passes
3106 */
3107typedef enum {
3108   nir_metadata_none = 0x0,
3109
3110   /** Indicates that nir_block::index values are valid.
3111    *
3112    * The start block has index 0 and they increase through a natural walk of
3113    * the CFG.  nir_function_impl::num_blocks is the number of blocks and
3114    * every block index is in the range [0, nir_function_impl::num_blocks].
3115    *
3116    * A pass can preserve this metadata type if it doesn't touch the CFG.
3117    */
3118   nir_metadata_block_index = 0x1,
3119
3120   /** Indicates that block dominance information is valid
3121    *
3122    * This includes:
3123    *
3124    *   - nir_block::num_dom_children
3125    *   - nir_block::dom_children
3126    *   - nir_block::dom_frontier
3127    *   - nir_block::dom_pre_index
3128    *   - nir_block::dom_post_index
3129    *
3130    * A pass can preserve this metadata type if it doesn't touch the CFG.
3131    */
3132   nir_metadata_dominance = 0x2,
3133
3134   /** Indicates that SSA def data-flow liveness information is valid
3135    *
3136    * This includes:
3137    *
3138    *   - nir_block::live_in
3139    *   - nir_block::live_out
3140    *
3141    * A pass can preserve this metadata type if it never adds or removes any
3142    * SSA defs or uses of SSA defs (most passes shouldn't preserve this
3143    * metadata type).
3144    */
3145   nir_metadata_live_ssa_defs = 0x4,
3146
3147   /** A dummy metadata value to track when a pass forgot to call
3148    * nir_metadata_preserve.
3149    *
3150    * A pass should always clear this value even if it doesn't make any
3151    * progress to indicate that it thought about preserving metadata.
3152    */
3153   nir_metadata_not_properly_reset = 0x8,
3154
3155   /** Indicates that loop analysis information is valid.
3156    *
3157    * This includes everything pointed to by nir_loop::info.
3158    *
3159    * A pass can preserve this metadata type if it is guaranteed to not affect
3160    * any loop metadata.  However, since loop metadata includes things like
3161    * loop counts which depend on arithmetic in the loop, this is very hard to
3162    * determine.  Most passes shouldn't preserve this metadata type.
3163    */
3164   nir_metadata_loop_analysis = 0x10,
3165
3166   /** Indicates that nir_instr::index values are valid.
3167    *
3168    * The start instruction has index 0 and they increase through a natural
3169    * walk of instructions in blocks in the CFG.  The indices my have holes
3170    * after passes such as DCE.
3171    *
3172    * A pass can preserve this metadata type if it never adds or moves any
3173    * instructions (most passes shouldn't preserve this metadata type), but
3174    * can preserve it if it only removes instructions.
3175    */
3176   nir_metadata_instr_index = 0x20,
3177
3178   /** All metadata
3179    *
3180    * This includes all nir_metadata flags except not_properly_reset.  Passes
3181    * which do not change the shader in any way should call
3182    *
3183    *    nir_metadata_preserve(impl, nir_metadata_all);
3184    */
3185   nir_metadata_all = ~nir_metadata_not_properly_reset,
3186} nir_metadata;
3187MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_metadata)
3188
3189typedef struct {
3190   nir_cf_node cf_node;
3191
3192   /** pointer to the function of which this is an implementation */
3193   struct nir_function *function;
3194
3195   struct exec_list body; /** < list of nir_cf_node */
3196
3197   nir_block *end_block;
3198
3199   /** list for all local variables in the function */
3200   struct exec_list locals;
3201
3202   /** list of local registers in the function */
3203   struct exec_list registers;
3204
3205   /** next available local register index */
3206   unsigned reg_alloc;
3207
3208   /** next available SSA value index */
3209   unsigned ssa_alloc;
3210
3211   /* total number of basic blocks, only valid when block_index_dirty = false */
3212   unsigned num_blocks;
3213
3214   /** True if this nir_function_impl uses structured control-flow
3215    *
3216    * Structured nir_function_impls have different validation rules.
3217    */
3218   bool structured;
3219
3220   nir_metadata valid_metadata;
3221} nir_function_impl;
3222
3223#define nir_foreach_function_temp_variable(var, impl) \
3224   foreach_list_typed(nir_variable, var, node, &(impl)->locals)
3225
3226#define nir_foreach_function_temp_variable_safe(var, impl) \
3227   foreach_list_typed_safe(nir_variable, var, node, &(impl)->locals)
3228
3229ATTRIBUTE_RETURNS_NONNULL static inline nir_block *
3230nir_start_block(nir_function_impl *impl)
3231{
3232   return (nir_block *) impl->body.head_sentinel.next;
3233}
3234
3235ATTRIBUTE_RETURNS_NONNULL static inline nir_block *
3236nir_impl_last_block(nir_function_impl *impl)
3237{
3238   return (nir_block *) impl->body.tail_sentinel.prev;
3239}
3240
3241static inline nir_cf_node *
3242nir_cf_node_next(nir_cf_node *node)
3243{
3244   struct exec_node *next = exec_node_get_next(&node->node);
3245   if (exec_node_is_tail_sentinel(next))
3246      return NULL;
3247   else
3248      return exec_node_data(nir_cf_node, next, node);
3249}
3250
3251static inline nir_cf_node *
3252nir_cf_node_prev(nir_cf_node *node)
3253{
3254   struct exec_node *prev = exec_node_get_prev(&node->node);
3255   if (exec_node_is_head_sentinel(prev))
3256      return NULL;
3257   else
3258      return exec_node_data(nir_cf_node, prev, node);
3259}
3260
3261static inline bool
3262nir_cf_node_is_first(const nir_cf_node *node)
3263{
3264   return exec_node_is_head_sentinel(node->node.prev);
3265}
3266
3267static inline bool
3268nir_cf_node_is_last(const nir_cf_node *node)
3269{
3270   return exec_node_is_tail_sentinel(node->node.next);
3271}
3272
3273NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node,
3274                type, nir_cf_node_block)
3275NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node,
3276                type, nir_cf_node_if)
3277NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node,
3278                type, nir_cf_node_loop)
3279NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node,
3280                nir_function_impl, cf_node, type, nir_cf_node_function)
3281
3282static inline nir_block *
3283nir_if_first_then_block(nir_if *if_stmt)
3284{
3285   struct exec_node *head = exec_list_get_head(&if_stmt->then_list);
3286   return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3287}
3288
3289static inline nir_block *
3290nir_if_last_then_block(nir_if *if_stmt)
3291{
3292   struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list);
3293   return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
3294}
3295
3296static inline nir_block *
3297nir_if_first_else_block(nir_if *if_stmt)
3298{
3299   struct exec_node *head = exec_list_get_head(&if_stmt->else_list);
3300   return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3301}
3302
3303static inline nir_block *
3304nir_if_last_else_block(nir_if *if_stmt)
3305{
3306   struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list);
3307   return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
3308}
3309
3310static inline nir_block *
3311nir_loop_first_block(nir_loop *loop)
3312{
3313   struct exec_node *head = exec_list_get_head(&loop->body);
3314   return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3315}
3316
3317static inline nir_block *
3318nir_loop_last_block(nir_loop *loop)
3319{
3320   struct exec_node *tail = exec_list_get_tail(&loop->body);
3321   return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
3322}
3323
3324/**
3325 * Return true if this list of cf_nodes contains a single empty block.
3326 */
3327static inline bool
3328nir_cf_list_is_empty_block(struct exec_list *cf_list)
3329{
3330   if (exec_list_is_singular(cf_list)) {
3331      struct exec_node *head = exec_list_get_head(cf_list);
3332      nir_block *block =
3333         nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3334      return exec_list_is_empty(&block->instr_list);
3335   }
3336   return false;
3337}
3338
3339typedef struct {
3340   uint8_t num_components;
3341   uint8_t bit_size;
3342} nir_parameter;
3343
3344typedef struct nir_printf_info {
3345   unsigned num_args;
3346   unsigned *arg_sizes;
3347   unsigned string_size;
3348   char *strings;
3349} nir_printf_info;
3350
3351typedef struct nir_function {
3352   struct exec_node node;
3353
3354   const char *name;
3355   struct nir_shader *shader;
3356
3357   unsigned num_params;
3358   nir_parameter *params;
3359
3360   /** The implementation of this function.
3361    *
3362    * If the function is only declared and not implemented, this is NULL.
3363    */
3364   nir_function_impl *impl;
3365
3366   bool is_entrypoint;
3367} nir_function;
3368
3369typedef enum {
3370   nir_lower_imul64 = (1 << 0),
3371   nir_lower_isign64 = (1 << 1),
3372   /** Lower all int64 modulus and division opcodes */
3373   nir_lower_divmod64 = (1 << 2),
3374   /** Lower all 64-bit umul_high and imul_high opcodes */
3375   nir_lower_imul_high64 = (1 << 3),
3376   nir_lower_mov64 = (1 << 4),
3377   nir_lower_icmp64 = (1 << 5),
3378   nir_lower_iadd64 = (1 << 6),
3379   nir_lower_iabs64 = (1 << 7),
3380   nir_lower_ineg64 = (1 << 8),
3381   nir_lower_logic64 = (1 << 9),
3382   nir_lower_minmax64 = (1 << 10),
3383   nir_lower_shift64 = (1 << 11),
3384   nir_lower_imul_2x32_64 = (1 << 12),
3385   nir_lower_extract64 = (1 << 13),
3386   nir_lower_ufind_msb64 = (1 << 14),
3387   nir_lower_bit_count64 = (1 << 15),
3388   nir_lower_subgroup_shuffle64 = (1 << 16),
3389   nir_lower_scan_reduce_bitwise64 = (1 << 17),
3390   nir_lower_scan_reduce_iadd64 = (1 << 18),
3391   nir_lower_vote_ieq64 = (1 << 19),
3392} nir_lower_int64_options;
3393
3394typedef enum {
3395   nir_lower_drcp = (1 << 0),
3396   nir_lower_dsqrt = (1 << 1),
3397   nir_lower_drsq = (1 << 2),
3398   nir_lower_dtrunc = (1 << 3),
3399   nir_lower_dfloor = (1 << 4),
3400   nir_lower_dceil = (1 << 5),
3401   nir_lower_dfract = (1 << 6),
3402   nir_lower_dround_even = (1 << 7),
3403   nir_lower_dmod = (1 << 8),
3404   nir_lower_dsub = (1 << 9),
3405   nir_lower_ddiv = (1 << 10),
3406   nir_lower_fp64_full_software = (1 << 11),
3407} nir_lower_doubles_options;
3408
3409typedef enum {
3410   nir_divergence_single_prim_per_subgroup = (1 << 0),
3411   nir_divergence_single_patch_per_tcs_subgroup = (1 << 1),
3412   nir_divergence_single_patch_per_tes_subgroup = (1 << 2),
3413   nir_divergence_view_index_uniform = (1 << 3),
3414   nir_divergence_single_frag_shading_rate_per_subgroup = (1 << 4),
3415   nir_divergence_multiple_workgroup_per_compute_subgroup = (1 << 5),
3416} nir_divergence_options;
3417
3418typedef enum {
3419   nir_pack_varying_interp_mode_none          = (1 << 0),
3420   nir_pack_varying_interp_mode_smooth        = (1 << 1),
3421   nir_pack_varying_interp_mode_flat          = (1 << 2),
3422   nir_pack_varying_interp_mode_noperspective = (1 << 3),
3423   nir_pack_varying_interp_loc_sample         = (1 << 16),
3424   nir_pack_varying_interp_loc_centroid       = (1 << 17),
3425   nir_pack_varying_interp_loc_center         = (1 << 18),
3426} nir_pack_varying_options;
3427
3428/** An instruction filtering callback
3429 *
3430 * Returns true if the instruction should be processed and false otherwise.
3431 */
3432typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *);
3433
3434typedef struct nir_shader_compiler_options {
3435   bool lower_fdiv;
3436   bool lower_ffma16;
3437   bool lower_ffma32;
3438   bool lower_ffma64;
3439   bool fuse_ffma16;
3440   bool fuse_ffma32;
3441   bool fuse_ffma64;
3442   bool lower_flrp16;
3443   bool lower_flrp32;
3444   /** Lowers flrp when it does not support doubles */
3445   bool lower_flrp64;
3446   bool lower_fpow;
3447   bool lower_fsat;
3448   bool lower_fsqrt;
3449   bool lower_sincos;
3450   bool lower_fmod;
3451   /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */
3452   bool lower_bitfield_extract;
3453   /** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */
3454   bool lower_bitfield_extract_to_shifts;
3455   /** Lowers bitfield_insert to bfi/bfm */
3456   bool lower_bitfield_insert;
3457   /** Lowers bitfield_insert to compares, and shifts. */
3458   bool lower_bitfield_insert_to_shifts;
3459   /** Lowers bitfield_insert to bfm/bitfield_select. */
3460   bool lower_bitfield_insert_to_bitfield_select;
3461   /** Lowers bitfield_reverse to shifts. */
3462   bool lower_bitfield_reverse;
3463   /** Lowers bit_count to shifts. */
3464   bool lower_bit_count;
3465   /** Lowers ifind_msb to compare and ufind_msb */
3466   bool lower_ifind_msb;
3467   /** Lowers ifind_msb and ufind_msb to reverse variants */
3468   bool lower_find_msb_to_reverse;
3469   /** Lowers find_lsb to ufind_msb and logic ops */
3470   bool lower_find_lsb;
3471   bool lower_uadd_carry;
3472   bool lower_usub_borrow;
3473   /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */
3474   bool lower_mul_high;
3475   /** lowers fneg to fmul(x, -1.0). Driver must call nir_opt_algebraic_late() */
3476   bool lower_fneg;
3477   /** lowers ineg to isub. Driver must call nir_opt_algebraic_late(). */
3478   bool lower_ineg;
3479   /** lowers fisnormal to alu ops. */
3480   bool lower_fisnormal;
3481
3482   /* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */
3483   bool lower_scmp;
3484
3485   /* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */
3486   bool lower_vector_cmp;
3487
3488   /** enable rules to avoid bit ops */
3489   bool lower_bitops;
3490
3491   /** enables rules to lower isign to imin+imax */
3492   bool lower_isign;
3493
3494   /** enables rules to lower fsign to fsub and flt */
3495   bool lower_fsign;
3496
3497   /** enables rules to lower iabs to ineg+imax */
3498   bool lower_iabs;
3499
3500   /** enable rules that avoid generating umax from signed integer ops */
3501   bool lower_umax;
3502
3503   /** enable rules that avoid generating umin from signed integer ops */
3504   bool lower_umin;
3505
3506   /* lower fdph to fdot4 */
3507   bool lower_fdph;
3508
3509   /** lower fdot to fmul and fsum/fadd. */
3510   bool lower_fdot;
3511
3512   /* Does the native fdot instruction replicate its result for four
3513    * components?  If so, then opt_algebraic_late will turn all fdotN
3514    * instructions into fdotN_replicated instructions.
3515    */
3516   bool fdot_replicates;
3517
3518   /** lowers ffloor to fsub+ffract: */
3519   bool lower_ffloor;
3520
3521   /** lowers ffract to fsub+ffloor: */
3522   bool lower_ffract;
3523
3524   /** lowers fceil to fneg+ffloor+fneg: */
3525   bool lower_fceil;
3526
3527   bool lower_ftrunc;
3528
3529   bool lower_ldexp;
3530
3531   bool lower_pack_half_2x16;
3532   bool lower_pack_unorm_2x16;
3533   bool lower_pack_snorm_2x16;
3534   bool lower_pack_unorm_4x8;
3535   bool lower_pack_snorm_4x8;
3536   bool lower_pack_64_2x32;
3537   bool lower_pack_64_4x16;
3538   bool lower_pack_32_2x16;
3539   bool lower_pack_64_2x32_split;
3540   bool lower_pack_32_2x16_split;
3541   bool lower_unpack_half_2x16;
3542   bool lower_unpack_unorm_2x16;
3543   bool lower_unpack_snorm_2x16;
3544   bool lower_unpack_unorm_4x8;
3545   bool lower_unpack_snorm_4x8;
3546   bool lower_unpack_64_2x32_split;
3547   bool lower_unpack_32_2x16_split;
3548
3549   bool lower_pack_split;
3550
3551   bool lower_extract_byte;
3552   bool lower_extract_word;
3553   bool lower_insert_byte;
3554   bool lower_insert_word;
3555
3556   bool lower_all_io_to_temps;
3557   bool lower_all_io_to_elements;
3558
3559   /* Indicates that the driver only has zero-based vertex id */
3560   bool vertex_id_zero_based;
3561
3562   /**
3563    * If enabled, gl_BaseVertex will be lowered as:
3564    * is_indexed_draw (~0/0) & firstvertex
3565    */
3566   bool lower_base_vertex;
3567
3568   /**
3569    * If enabled, gl_HelperInvocation will be lowered as:
3570    *
3571    *   !((1 << sample_id) & sample_mask_in))
3572    *
3573    * This depends on some possibly hw implementation details, which may
3574    * not be true for all hw.  In particular that the FS is only executed
3575    * for covered samples or for helper invocations.  So, do not blindly
3576    * enable this option.
3577    *
3578    * Note: See also issue #22 in ARB_shader_image_load_store
3579    */
3580   bool lower_helper_invocation;
3581
3582   /**
3583    * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
3584    *
3585    *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
3586    *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
3587    */
3588   bool optimize_sample_mask_in;
3589
3590   bool lower_cs_local_index_from_id;
3591   bool lower_cs_local_id_from_index;
3592
3593   /* Prevents lowering global_invocation_id to be in terms of workgroup_id */
3594   bool has_cs_global_id;
3595
3596   bool lower_device_index_to_zero;
3597
3598   /* Set if nir_lower_pntc_ytransform() should invert gl_PointCoord.
3599    * Either when frame buffer is flipped or GL_POINT_SPRITE_COORD_ORIGIN
3600    * is GL_LOWER_LEFT.
3601    */
3602   bool lower_wpos_pntc;
3603
3604   /**
3605    * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be
3606    * lowered to simple arithmetic.
3607    *
3608    * If this flag is set, the lowering will be applied to all bit-sizes of
3609    * these instructions.
3610    *
3611    * \sa ::lower_hadd64
3612    */
3613   bool lower_hadd;
3614
3615   /**
3616    * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions
3617    * should be lowered to simple arithmetic.
3618    *
3619    * If this flag is set, the lowering will be applied to only 64-bit
3620    * versions of these instructions.
3621    *
3622    * \sa ::lower_hadd
3623    */
3624   bool lower_hadd64;
3625
3626   /**
3627    * Set if nir_op_uadd_sat and nir_op_usub_sat should be lowered to simple
3628    * arithmetic.
3629    *
3630    * If this flag is set, the lowering will be applied to all bit-sizes of
3631    * these instructions.
3632    *
3633    * \sa ::lower_usub_sat64
3634    */
3635   bool lower_uadd_sat;
3636
3637   /**
3638    * Set if only 64-bit nir_op_usub_sat should be lowered to simple
3639    * arithmetic.
3640    *
3641    * \sa ::lower_add_sat
3642    */
3643   bool lower_usub_sat64;
3644
3645   /**
3646    * Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple
3647    * arithmetic.
3648    *
3649    * If this flag is set, the lowering will be applied to all bit-sizes of
3650    * these instructions.
3651    */
3652   bool lower_iadd_sat;
3653
3654   /**
3655    * Should IO be re-vectorized?  Some scalar ISAs still operate on vec4's
3656    * for IO purposes and would prefer loads/stores be vectorized.
3657    */
3658   bool vectorize_io;
3659   bool lower_to_scalar;
3660   nir_instr_filter_cb lower_to_scalar_filter;
3661
3662   /**
3663    * Whether nir_opt_vectorize should only create 16-bit 2D vectors.
3664    */
3665   bool vectorize_vec2_16bit;
3666
3667   /**
3668    * Should the linker unify inputs_read/outputs_written between adjacent
3669    * shader stages which are linked into a single program?
3670    */
3671   bool unify_interfaces;
3672
3673   /**
3674    * Should nir_lower_io() create load_interpolated_input intrinsics?
3675    *
3676    * If not, it generates regular load_input intrinsics and interpolation
3677    * information must be inferred from the list of input nir_variables.
3678    */
3679   bool use_interpolated_input_intrinsics;
3680
3681
3682   /**
3683    * Whether nir_lower_io() will lower interpolateAt functions to
3684    * load_interpolated_input intrinsics.
3685    *
3686    * Unlike use_interpolated_input_intrinsics this will only lower these
3687    * functions and leave input load intrinsics untouched.
3688    */
3689   bool lower_interpolate_at;
3690
3691   /* Lowers when 32x32->64 bit multiplication is not supported */
3692   bool lower_mul_2x32_64;
3693
3694   /* Lowers when rotate instruction is not supported */
3695   bool lower_rotate;
3696
3697   /** Backend supports ternary addition */
3698   bool has_iadd3;
3699
3700   /**
3701    * Backend supports imul24, and would like to use it (when possible)
3702    * for address/offset calculation.  If true, driver should call
3703    * nir_lower_amul().  (If not set, amul will automatically be lowered
3704    * to imul.)
3705    */
3706   bool has_imul24;
3707
3708   /** Backend supports umul24, if not set  umul24 will automatically be lowered
3709    * to imul with masked inputs */
3710   bool has_umul24;
3711
3712   /** Backend supports umad24, if not set  umad24 will automatically be lowered
3713    * to imul with masked inputs and iadd */
3714   bool has_umad24;
3715
3716   /* Backend supports fused comapre against zero and csel */
3717   bool has_fused_comp_and_csel;
3718
3719   /** Backend supports fsub, if not set fsub will automatically be lowered to
3720    * fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */
3721   bool has_fsub;
3722
3723   /** Backend supports isub, if not set isub will automatically be lowered to
3724    * iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */
3725   bool has_isub;
3726
3727   /** Backend supports pack_32_4x8 or pack_32_4x8_split. */
3728   bool has_pack_32_4x8;
3729
3730   /** Backend supports txs, if not nir_lower_tex(..) uses txs-free variants
3731    * for rect texture lowering. */
3732   bool has_txs;
3733
3734   /** Backend supports sdot_4x8 and udot_4x8 opcodes. */
3735   bool has_dot_4x8;
3736
3737   /** Backend supports sudot_4x8 opcodes. */
3738   bool has_sudot_4x8;
3739
3740   /** Backend supports sdot_2x16 and udot_2x16 opcodes. */
3741   bool has_dot_2x16;
3742
3743   /* Whether to generate only scoped_barrier intrinsics instead of the set of
3744    * memory and control barrier intrinsics based on GLSL.
3745    */
3746   bool use_scoped_barrier;
3747
3748   /**
3749    * Is this the Intel vec4 backend?
3750    *
3751    * Used to inhibit algebraic optimizations that are known to be harmful on
3752    * the Intel vec4 backend.  This is generally applicable to any
3753    * optimization that might cause more immediate values to be used in
3754    * 3-source (e.g., ffma and flrp) instructions.
3755    */
3756   bool intel_vec4;
3757
3758   /**
3759    * For most Intel GPUs, all ternary operations such as FMA and BFE cannot
3760    * have immediates, so two to three instructions may eventually be needed.
3761    */
3762   bool avoid_ternary_with_two_constants;
3763
3764   /** Whether 8-bit ALU is supported. */
3765   bool support_8bit_alu;
3766
3767   /** Whether 16-bit ALU is supported. */
3768   bool support_16bit_alu;
3769
3770   unsigned max_unroll_iterations;
3771   unsigned max_unroll_iterations_aggressive;
3772
3773   bool lower_uniforms_to_ubo;
3774
3775   /* If the precision is ignored, backends that don't handle
3776    * different precisions when passing data between stages and use
3777    * vectorized IO can pack more varyings when linking. */
3778   bool linker_ignore_precision;
3779
3780   /**
3781    * Specifies which type of indirectly accessed variables should force
3782    * loop unrolling.
3783    */
3784   nir_variable_mode force_indirect_unrolling;
3785
3786   nir_lower_int64_options lower_int64_options;
3787   nir_lower_doubles_options lower_doubles_options;
3788   nir_divergence_options divergence_analysis_options;
3789
3790   /**
3791    * Support pack varyings with different interpolation location
3792    * (center, centroid, sample) and mode (flat, noperspective, smooth)
3793    * into same slot.
3794    */
3795   nir_pack_varying_options pack_varying_options;
3796} nir_shader_compiler_options;
3797
3798typedef struct nir_shader {
3799   /** list of uniforms (nir_variable) */
3800   struct exec_list variables;
3801
3802   /** Set of driver-specific options for the shader.
3803    *
3804    * The memory for the options is expected to be kept in a single static
3805    * copy by the driver.
3806    */
3807   const struct nir_shader_compiler_options *options;
3808
3809   /** Various bits of compile-time information about a given shader */
3810   struct shader_info info;
3811
3812   struct exec_list functions; /** < list of nir_function */
3813
3814   struct list_head gc_list; /** < list of all nir_instrs allocated on the shader but not yet freed. */
3815
3816   /**
3817    * The size of the variable space for load_input_*, load_uniform_*, etc.
3818    * intrinsics.  This is in back-end specific units which is likely one of
3819    * bytes, dwords, or vec4s depending on context and back-end.
3820    */
3821   unsigned num_inputs, num_uniforms, num_outputs;
3822
3823   /** Size in bytes of required scratch space */
3824   unsigned scratch_size;
3825
3826   /** Constant data associated with this shader.
3827    *
3828    * Constant data is loaded through load_constant intrinsics (as compared to
3829    * the NIR load_const instructions which have the constant value inlined
3830    * into them).  This is usually generated by nir_opt_large_constants (so
3831    * shaders don't have to load_const into a temporary array when they want
3832    * to indirect on a const array).
3833    */
3834   void *constant_data;
3835   /** Size of the constant data associated with the shader, in bytes */
3836   unsigned constant_data_size;
3837
3838   unsigned printf_info_count;
3839   nir_printf_info *printf_info;
3840} nir_shader;
3841
3842#define nir_foreach_function(func, shader) \
3843   foreach_list_typed(nir_function, func, node, &(shader)->functions)
3844
3845static inline nir_function_impl *
3846nir_shader_get_entrypoint(nir_shader *shader)
3847{
3848   nir_function *func = NULL;
3849
3850   nir_foreach_function(function, shader) {
3851      assert(func == NULL);
3852      if (function->is_entrypoint) {
3853         func = function;
3854#ifndef NDEBUG
3855         break;
3856#endif
3857      }
3858   }
3859
3860   if (!func)
3861      return NULL;
3862
3863   assert(func->num_params == 0);
3864   assert(func->impl);
3865   return func->impl;
3866}
3867
3868typedef struct nir_liveness_bounds {
3869   uint32_t start;
3870   uint32_t end;
3871} nir_liveness_bounds;
3872
3873typedef struct nir_instr_liveness {
3874   /**
3875    * nir_instr->index for the start and end of a single live interval for SSA
3876    * defs.  ssa values last used by a nir_if condition will have an interval
3877    * ending at the first instruction after the last one before the if
3878    * condition.
3879    *
3880    * Indexed by def->index (impl->ssa_alloc elements).
3881    */
3882   struct nir_liveness_bounds *defs;
3883} nir_instr_liveness;
3884
3885nir_instr_liveness *
3886nir_live_ssa_defs_per_instr(nir_function_impl *impl);
3887
3888nir_shader *nir_shader_create(void *mem_ctx,
3889                              gl_shader_stage stage,
3890                              const nir_shader_compiler_options *options,
3891                              shader_info *si);
3892
3893nir_register *nir_local_reg_create(nir_function_impl *impl);
3894
3895void nir_reg_remove(nir_register *reg);
3896
3897/** Adds a variable to the appropriate list in nir_shader */
3898void nir_shader_add_variable(nir_shader *shader, nir_variable *var);
3899
3900static inline void
3901nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var)
3902{
3903   assert(var->data.mode == nir_var_function_temp);
3904   exec_list_push_tail(&impl->locals, &var->node);
3905}
3906
3907/** creates a variable, sets a few defaults, and adds it to the list */
3908nir_variable *nir_variable_create(nir_shader *shader,
3909                                  nir_variable_mode mode,
3910                                  const struct glsl_type *type,
3911                                  const char *name);
3912/** creates a local variable and adds it to the list */
3913nir_variable *nir_local_variable_create(nir_function_impl *impl,
3914                                        const struct glsl_type *type,
3915                                        const char *name);
3916
3917nir_variable *nir_find_variable_with_location(nir_shader *shader,
3918                                              nir_variable_mode mode,
3919                                              unsigned location);
3920
3921nir_variable *nir_find_variable_with_driver_location(nir_shader *shader,
3922                                                     nir_variable_mode mode,
3923                                                     unsigned location);
3924
3925void nir_sort_variables_with_modes(nir_shader *shader,
3926                                   int (*compar)(const nir_variable *,
3927                                                 const nir_variable *),
3928                                   nir_variable_mode modes);
3929
3930/** creates a function and adds it to the shader's list of functions */
3931nir_function *nir_function_create(nir_shader *shader, const char *name);
3932
3933nir_function_impl *nir_function_impl_create(nir_function *func);
3934/** creates a function_impl that isn't tied to any particular function */
3935nir_function_impl *nir_function_impl_create_bare(nir_shader *shader);
3936
3937nir_block *nir_block_create(nir_shader *shader);
3938nir_if *nir_if_create(nir_shader *shader);
3939nir_loop *nir_loop_create(nir_shader *shader);
3940
3941nir_function_impl *nir_cf_node_get_function(nir_cf_node *node);
3942
3943/** requests that the given pieces of metadata be generated */
3944void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...);
3945/** dirties all but the preserved metadata */
3946void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
3947/** Preserves all metadata for the given shader */
3948void nir_shader_preserve_all_metadata(nir_shader *shader);
3949
3950/** creates an instruction with default swizzle/writemask/etc. with NULL registers */
3951nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
3952
3953nir_deref_instr *nir_deref_instr_create(nir_shader *shader,
3954                                        nir_deref_type deref_type);
3955
3956nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
3957
3958nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
3959                                                  unsigned num_components,
3960                                                  unsigned bit_size);
3961
3962nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
3963                                                nir_intrinsic_op op);
3964
3965nir_call_instr *nir_call_instr_create(nir_shader *shader,
3966                                      nir_function *callee);
3967
3968/** Creates a NIR texture instruction */
3969nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs);
3970
3971nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
3972nir_phi_src *nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src);
3973
3974nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
3975
3976nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
3977                                                unsigned num_components,
3978                                                unsigned bit_size);
3979
3980nir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size);
3981
3982/**
3983 * NIR Cursors and Instruction Insertion API
3984 * @{
3985 *
3986 * A tiny struct representing a point to insert/extract instructions or
3987 * control flow nodes.  Helps reduce the combinatorial explosion of possible
3988 * points to insert/extract.
3989 *
3990 * \sa nir_control_flow.h
3991 */
3992typedef enum {
3993   nir_cursor_before_block,
3994   nir_cursor_after_block,
3995   nir_cursor_before_instr,
3996   nir_cursor_after_instr,
3997} nir_cursor_option;
3998
3999typedef struct {
4000   nir_cursor_option option;
4001   union {
4002      nir_block *block;
4003      nir_instr *instr;
4004   };
4005} nir_cursor;
4006
4007static inline nir_block *
4008nir_cursor_current_block(nir_cursor cursor)
4009{
4010   if (cursor.option == nir_cursor_before_instr ||
4011       cursor.option == nir_cursor_after_instr) {
4012      return cursor.instr->block;
4013   } else {
4014      return cursor.block;
4015   }
4016}
4017
4018bool nir_cursors_equal(nir_cursor a, nir_cursor b);
4019
4020static inline nir_cursor
4021nir_before_block(nir_block *block)
4022{
4023   nir_cursor cursor;
4024   cursor.option = nir_cursor_before_block;
4025   cursor.block = block;
4026   return cursor;
4027}
4028
4029static inline nir_cursor
4030nir_after_block(nir_block *block)
4031{
4032   nir_cursor cursor;
4033   cursor.option = nir_cursor_after_block;
4034   cursor.block = block;
4035   return cursor;
4036}
4037
4038static inline nir_cursor
4039nir_before_instr(nir_instr *instr)
4040{
4041   nir_cursor cursor;
4042   cursor.option = nir_cursor_before_instr;
4043   cursor.instr = instr;
4044   return cursor;
4045}
4046
4047static inline nir_cursor
4048nir_after_instr(nir_instr *instr)
4049{
4050   nir_cursor cursor;
4051   cursor.option = nir_cursor_after_instr;
4052   cursor.instr = instr;
4053   return cursor;
4054}
4055
4056static inline nir_cursor
4057nir_before_block_after_phis(nir_block *block)
4058{
4059   nir_phi_instr *last_phi = nir_block_last_phi_instr(block);
4060   if (last_phi)
4061      return nir_after_instr(&last_phi->instr);
4062   else
4063      return nir_before_block(block);
4064}
4065
4066static inline nir_cursor
4067nir_after_block_before_jump(nir_block *block)
4068{
4069   nir_instr *last_instr = nir_block_last_instr(block);
4070   if (last_instr && last_instr->type == nir_instr_type_jump) {
4071      return nir_before_instr(last_instr);
4072   } else {
4073      return nir_after_block(block);
4074   }
4075}
4076
4077static inline nir_cursor
4078nir_before_src(nir_src *src, bool is_if_condition)
4079{
4080   if (is_if_condition) {
4081      nir_block *prev_block =
4082         nir_cf_node_as_block(nir_cf_node_prev(&src->parent_if->cf_node));
4083      assert(!nir_block_ends_in_jump(prev_block));
4084      return nir_after_block(prev_block);
4085   } else if (src->parent_instr->type == nir_instr_type_phi) {
4086#ifndef NDEBUG
4087      nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr);
4088      bool found = false;
4089      nir_foreach_phi_src(phi_src, cond_phi) {
4090         if (phi_src->src.ssa == src->ssa) {
4091            found = true;
4092            break;
4093         }
4094      }
4095      assert(found);
4096#endif
4097      /* The LIST_ENTRY macro is a generic container-of macro, it just happens
4098       * to have a more specific name.
4099       */
4100      nir_phi_src *phi_src = LIST_ENTRY(nir_phi_src, src, src);
4101      return nir_after_block_before_jump(phi_src->pred);
4102   } else {
4103      return nir_before_instr(src->parent_instr);
4104   }
4105}
4106
4107static inline nir_cursor
4108nir_before_cf_node(nir_cf_node *node)
4109{
4110   if (node->type == nir_cf_node_block)
4111      return nir_before_block(nir_cf_node_as_block(node));
4112
4113   return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node)));
4114}
4115
4116static inline nir_cursor
4117nir_after_cf_node(nir_cf_node *node)
4118{
4119   if (node->type == nir_cf_node_block)
4120      return nir_after_block(nir_cf_node_as_block(node));
4121
4122   return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node)));
4123}
4124
4125static inline nir_cursor
4126nir_after_phis(nir_block *block)
4127{
4128   nir_foreach_instr(instr, block) {
4129      if (instr->type != nir_instr_type_phi)
4130         return nir_before_instr(instr);
4131   }
4132   return nir_after_block(block);
4133}
4134
4135static inline nir_cursor
4136nir_after_instr_and_phis(nir_instr *instr)
4137{
4138   if (instr->type == nir_instr_type_phi)
4139      return nir_after_phis(instr->block);
4140   else
4141      return nir_after_instr(instr);
4142}
4143
4144static inline nir_cursor
4145nir_after_cf_node_and_phis(nir_cf_node *node)
4146{
4147   if (node->type == nir_cf_node_block)
4148      return nir_after_block(nir_cf_node_as_block(node));
4149
4150   nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node));
4151
4152   return nir_after_phis(block);
4153}
4154
4155static inline nir_cursor
4156nir_before_cf_list(struct exec_list *cf_list)
4157{
4158   nir_cf_node *first_node = exec_node_data(nir_cf_node,
4159                                            exec_list_get_head(cf_list), node);
4160   return nir_before_cf_node(first_node);
4161}
4162
4163static inline nir_cursor
4164nir_after_cf_list(struct exec_list *cf_list)
4165{
4166   nir_cf_node *last_node = exec_node_data(nir_cf_node,
4167                                           exec_list_get_tail(cf_list), node);
4168   return nir_after_cf_node(last_node);
4169}
4170
4171/**
4172 * Insert a NIR instruction at the given cursor.
4173 *
4174 * Note: This does not update the cursor.
4175 */
4176void nir_instr_insert(nir_cursor cursor, nir_instr *instr);
4177
4178bool nir_instr_move(nir_cursor cursor, nir_instr *instr);
4179
4180static inline void
4181nir_instr_insert_before(nir_instr *instr, nir_instr *before)
4182{
4183   nir_instr_insert(nir_before_instr(instr), before);
4184}
4185
4186static inline void
4187nir_instr_insert_after(nir_instr *instr, nir_instr *after)
4188{
4189   nir_instr_insert(nir_after_instr(instr), after);
4190}
4191
4192static inline void
4193nir_instr_insert_before_block(nir_block *block, nir_instr *before)
4194{
4195   nir_instr_insert(nir_before_block(block), before);
4196}
4197
4198static inline void
4199nir_instr_insert_after_block(nir_block *block, nir_instr *after)
4200{
4201   nir_instr_insert(nir_after_block(block), after);
4202}
4203
4204static inline void
4205nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
4206{
4207   nir_instr_insert(nir_before_cf_node(node), before);
4208}
4209
4210static inline void
4211nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
4212{
4213   nir_instr_insert(nir_after_cf_node(node), after);
4214}
4215
4216static inline void
4217nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
4218{
4219   nir_instr_insert(nir_before_cf_list(list), before);
4220}
4221
4222static inline void
4223nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
4224{
4225   nir_instr_insert(nir_after_cf_list(list), after);
4226}
4227
4228void nir_instr_remove_v(nir_instr *instr);
4229void nir_instr_free(nir_instr *instr);
4230void nir_instr_free_list(struct exec_list *list);
4231
4232static inline nir_cursor
4233nir_instr_remove(nir_instr *instr)
4234{
4235   nir_cursor cursor;
4236   nir_instr *prev = nir_instr_prev(instr);
4237   if (prev) {
4238      cursor = nir_after_instr(prev);
4239   } else {
4240      cursor = nir_before_block(instr->block);
4241   }
4242   nir_instr_remove_v(instr);
4243   return cursor;
4244}
4245
4246nir_cursor nir_instr_free_and_dce(nir_instr *instr);
4247
4248/** @} */
4249
4250nir_ssa_def *nir_instr_ssa_def(nir_instr *instr);
4251
4252typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state);
4253typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state);
4254typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
4255bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb,
4256                         void *state);
4257static inline bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state);
4258static inline bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
4259bool nir_foreach_phi_src_leaving_block(nir_block *instr,
4260                                       nir_foreach_src_cb cb,
4261                                       void *state);
4262
4263nir_const_value *nir_src_as_const_value(nir_src src);
4264
4265#define NIR_SRC_AS_(name, c_type, type_enum, cast_macro)                \
4266static inline c_type *                                                  \
4267nir_src_as_ ## name (nir_src src)                                       \
4268{                                                                       \
4269    return src.is_ssa && src.ssa->parent_instr->type == type_enum       \
4270           ? cast_macro(src.ssa->parent_instr) : NULL;                  \
4271}
4272
4273NIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu)
4274NIR_SRC_AS_(intrinsic, nir_intrinsic_instr,
4275            nir_instr_type_intrinsic, nir_instr_as_intrinsic)
4276NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref)
4277
4278bool nir_src_is_dynamically_uniform(nir_src src);
4279bool nir_srcs_equal(nir_src src1, nir_src src2);
4280bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2);
4281
4282static inline void
4283nir_instr_rewrite_src_ssa(ASSERTED nir_instr *instr,
4284                          nir_src *src, nir_ssa_def *new_ssa)
4285{
4286   assert(src->parent_instr == instr);
4287   assert(src->is_ssa && src->ssa);
4288   list_del(&src->use_link);
4289   src->ssa = new_ssa;
4290   list_addtail(&src->use_link, &new_ssa->uses);
4291}
4292
4293void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
4294void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
4295
4296static inline void
4297nir_if_rewrite_condition_ssa(ASSERTED nir_if *if_stmt,
4298                             nir_src *src, nir_ssa_def *new_ssa)
4299{
4300   assert(src->parent_if == if_stmt);
4301   assert(src->is_ssa && src->ssa);
4302   list_del(&src->use_link);
4303   src->ssa = new_ssa;
4304   list_addtail(&src->use_link, &new_ssa->if_uses);
4305}
4306
4307void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
4308void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
4309                            nir_dest new_dest);
4310
4311void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
4312                       unsigned num_components, unsigned bit_size,
4313                       const char *name);
4314void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
4315                      unsigned num_components, unsigned bit_size);
4316static inline void
4317nir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest,
4318                           const struct glsl_type *type,
4319                           const char *name)
4320{
4321   assert(glsl_type_is_vector_or_scalar(type));
4322   nir_ssa_dest_init(instr, dest, glsl_get_components(type),
4323                     glsl_get_bit_size(type), name);
4324}
4325void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa);
4326void nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src);
4327void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa,
4328                                    nir_instr *after_me);
4329
4330nir_component_mask_t nir_src_components_read(const nir_src *src);
4331nir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def);
4332
4333static inline bool
4334nir_ssa_def_is_unused(nir_ssa_def *ssa)
4335{
4336   return list_is_empty(&ssa->uses) && list_is_empty(&ssa->if_uses);
4337}
4338
4339
4340/** Returns the next block, disregarding structure
4341 *
4342 * The ordering is deterministic but has no guarantees beyond that.  In
4343 * particular, it is not guaranteed to be dominance-preserving.
4344 */
4345nir_block *nir_block_unstructured_next(nir_block *block);
4346nir_block *nir_unstructured_start_block(nir_function_impl *impl);
4347
4348#define nir_foreach_block_unstructured(block, impl) \
4349   for (nir_block *block = nir_unstructured_start_block(impl); block != NULL; \
4350        block = nir_block_unstructured_next(block))
4351
4352#define nir_foreach_block_unstructured_safe(block, impl) \
4353   for (nir_block *block = nir_unstructured_start_block(impl), \
4354        *next = nir_block_unstructured_next(block); \
4355        block != NULL; \
4356        block = next, next = nir_block_unstructured_next(block))
4357
4358/*
4359 * finds the next basic block in source-code order, returns NULL if there is
4360 * none
4361 */
4362
4363nir_block *nir_block_cf_tree_next(nir_block *block);
4364
4365/* Performs the opposite of nir_block_cf_tree_next() */
4366
4367nir_block *nir_block_cf_tree_prev(nir_block *block);
4368
4369/* Gets the first block in a CF node in source-code order */
4370
4371nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node);
4372
4373/* Gets the last block in a CF node in source-code order */
4374
4375nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node);
4376
4377/* Gets the next block after a CF node in source-code order */
4378
4379nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node);
4380
4381/* Macros for loops that visit blocks in source-code order */
4382
4383#define nir_foreach_block(block, impl) \
4384   for (nir_block *block = nir_start_block(impl); block != NULL; \
4385        block = nir_block_cf_tree_next(block))
4386
4387#define nir_foreach_block_safe(block, impl) \
4388   for (nir_block *block = nir_start_block(impl), \
4389        *next = nir_block_cf_tree_next(block); \
4390        block != NULL; \
4391        block = next, next = nir_block_cf_tree_next(block))
4392
4393#define nir_foreach_block_reverse(block, impl) \
4394   for (nir_block *block = nir_impl_last_block(impl); block != NULL; \
4395        block = nir_block_cf_tree_prev(block))
4396
4397#define nir_foreach_block_reverse_safe(block, impl) \
4398   for (nir_block *block = nir_impl_last_block(impl), \
4399        *prev = nir_block_cf_tree_prev(block); \
4400        block != NULL; \
4401        block = prev, prev = nir_block_cf_tree_prev(block))
4402
4403#define nir_foreach_block_in_cf_node(block, node) \
4404   for (nir_block *block = nir_cf_node_cf_tree_first(node); \
4405        block != nir_cf_node_cf_tree_next(node); \
4406        block = nir_block_cf_tree_next(block))
4407
4408/* If the following CF node is an if, this function returns that if.
4409 * Otherwise, it returns NULL.
4410 */
4411nir_if *nir_block_get_following_if(nir_block *block);
4412
4413nir_loop *nir_block_get_following_loop(nir_block *block);
4414
4415nir_block **nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx);
4416
4417void nir_index_local_regs(nir_function_impl *impl);
4418void nir_index_ssa_defs(nir_function_impl *impl);
4419unsigned nir_index_instrs(nir_function_impl *impl);
4420
4421void nir_index_blocks(nir_function_impl *impl);
4422
4423unsigned nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes);
4424unsigned nir_function_impl_index_vars(nir_function_impl *impl);
4425
4426void nir_print_shader(nir_shader *shader, FILE *fp);
4427void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors);
4428void nir_print_instr(const nir_instr *instr, FILE *fp);
4429void nir_print_deref(const nir_deref_instr *deref, FILE *fp);
4430void nir_log_shader_annotated_tagged(enum mesa_log_level level, const char *tag, nir_shader *shader, struct hash_table *annotations);
4431#define nir_log_shadere(s) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), NULL)
4432#define nir_log_shaderw(s) nir_log_shader_annotated_tagged(MESA_LOG_WARN, (MESA_LOG_TAG), (s), NULL)
4433#define nir_log_shaderi(s) nir_log_shader_annotated_tagged(MESA_LOG_INFO, (MESA_LOG_TAG), (s), NULL)
4434#define nir_log_shader_annotated(s, annotations) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), annotations)
4435
4436char *nir_shader_as_str(nir_shader *nir, void *mem_ctx);
4437char *nir_shader_as_str_annotated(nir_shader *nir, struct hash_table *annotations, void *mem_ctx);
4438
4439/** Shallow clone of a single instruction. */
4440nir_instr *nir_instr_clone(nir_shader *s, const nir_instr *orig);
4441
4442/** Shallow clone of a single ALU instruction. */
4443nir_alu_instr *nir_alu_instr_clone(nir_shader *s, const nir_alu_instr *orig);
4444
4445nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s);
4446nir_function_impl *nir_function_impl_clone(nir_shader *shader,
4447                                           const nir_function_impl *fi);
4448nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var);
4449nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader);
4450
4451void nir_shader_replace(nir_shader *dest, nir_shader *src);
4452
4453void nir_shader_serialize_deserialize(nir_shader *s);
4454
4455#ifndef NDEBUG
4456void nir_validate_shader(nir_shader *shader, const char *when);
4457void nir_validate_ssa_dominance(nir_shader *shader, const char *when);
4458void nir_metadata_set_validation_flag(nir_shader *shader);
4459void nir_metadata_check_validation_flag(nir_shader *shader);
4460
4461static inline bool
4462should_skip_nir(const char *name)
4463{
4464   static const char *list = NULL;
4465   if (!list) {
4466      /* Comma separated list of names to skip. */
4467      list = getenv("NIR_SKIP");
4468      if (!list)
4469         list = "";
4470   }
4471
4472   if (!list[0])
4473      return false;
4474
4475   return comma_separated_list_contains(list, name);
4476}
4477
4478static inline bool
4479should_clone_nir(void)
4480{
4481   static int should_clone = -1;
4482   if (should_clone < 0)
4483      should_clone = env_var_as_boolean("NIR_TEST_CLONE", false);
4484
4485   return should_clone;
4486}
4487
4488static inline bool
4489should_serialize_deserialize_nir(void)
4490{
4491   static int test_serialize = -1;
4492   if (test_serialize < 0)
4493      test_serialize = env_var_as_boolean("NIR_TEST_SERIALIZE", false);
4494
4495   return test_serialize;
4496}
4497
4498static inline bool
4499should_print_nir(nir_shader *shader)
4500{
4501   static int should_print = -1;
4502   if (should_print < 0)
4503      should_print = env_var_as_unsigned("NIR_PRINT", 0);
4504
4505   if (should_print == 1)
4506      return !shader->info.internal;
4507
4508   return should_print;
4509}
4510#else
4511static inline void nir_validate_shader(nir_shader *shader, const char *when) { (void) shader; (void)when; }
4512static inline void nir_validate_ssa_dominance(nir_shader *shader, const char *when) { (void) shader; (void)when; }
4513static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; }
4514static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
4515static inline bool should_skip_nir(UNUSED const char *pass_name) { return false; }
4516static inline bool should_clone_nir(void) { return false; }
4517static inline bool should_serialize_deserialize_nir(void) { return false; }
4518static inline bool should_print_nir(nir_shader *shader) { return false; }
4519#endif /* NDEBUG */
4520
4521#define _PASS(pass, nir, do_pass) do {                               \
4522   if (should_skip_nir(#pass)) {                                     \
4523      printf("skipping %s\n", #pass);                                \
4524      break;                                                         \
4525   }                                                                 \
4526   do_pass                                                           \
4527   if (should_clone_nir()) {                                         \
4528      nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \
4529      nir_shader_replace(nir, clone);                                \
4530   }                                                                 \
4531   if (should_serialize_deserialize_nir()) {                         \
4532      nir_shader_serialize_deserialize(nir);                         \
4533   }                                                                 \
4534} while (0)
4535
4536#define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir,          \
4537   nir_metadata_set_validation_flag(nir);                            \
4538   if (should_print_nir(nir))                                           \
4539      printf("%s\n", #pass);                                         \
4540   if (pass(nir, ##__VA_ARGS__)) {                                   \
4541      nir_validate_shader(nir, "after " #pass);                      \
4542      progress = true;                                               \
4543      if (should_print_nir(nir))                                        \
4544         nir_print_shader(nir, stdout);                              \
4545      nir_metadata_check_validation_flag(nir);                       \
4546   }                                                                 \
4547)
4548
4549#define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir,                  \
4550   if (should_print_nir(nir))                                           \
4551      printf("%s\n", #pass);                                         \
4552   pass(nir, ##__VA_ARGS__);                                         \
4553   nir_validate_shader(nir, "after " #pass);                         \
4554   if (should_print_nir(nir))                                           \
4555      nir_print_shader(nir, stdout);                                 \
4556)
4557
4558#define NIR_SKIP(name) should_skip_nir(#name)
4559
4560/** An instruction filtering callback with writemask
4561 *
4562 * Returns true if the instruction should be processed with the associated
4563 * writemask and false otherwise.
4564 */
4565typedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *,
4566                                              unsigned writemask, const void *);
4567
4568/** A simple instruction lowering callback
4569 *
4570 * Many instruction lowering passes can be written as a simple function which
4571 * takes an instruction as its input and returns a sequence of instructions
4572 * that implement the consumed instruction.  This function type represents
4573 * such a lowering function.  When called, a function with this prototype
4574 * should either return NULL indicating that no lowering needs to be done or
4575 * emit a sequence of instructions using the provided builder (whose cursor
4576 * will already be placed after the instruction to be lowered) and return the
4577 * resulting nir_ssa_def.
4578 */
4579typedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *,
4580                                           nir_instr *, void *);
4581
4582/**
4583 * Special return value for nir_lower_instr_cb when some progress occurred
4584 * (like changing an input to the instr) that didn't result in a replacement
4585 * SSA def being generated.
4586 */
4587#define NIR_LOWER_INSTR_PROGRESS ((nir_ssa_def *)(uintptr_t)1)
4588
4589/**
4590 * Special return value for nir_lower_instr_cb when some progress occurred
4591 * that should remove the current instruction that doesn't create an output
4592 * (like a store)
4593 */
4594
4595#define NIR_LOWER_INSTR_PROGRESS_REPLACE ((nir_ssa_def *)(uintptr_t)2)
4596
4597/** Iterate over all the instructions in a nir_function_impl and lower them
4598 *  using the provided callbacks
4599 *
4600 * This function implements the guts of a standard lowering pass for you.  It
4601 * iterates over all of the instructions in a nir_function_impl and calls the
4602 * filter callback on each one.  If the filter callback returns true, it then
4603 * calls the lowering call back on the instruction.  (Splitting it this way
4604 * allows us to avoid some save/restore work for instructions we know won't be
4605 * lowered.)  If the instruction is dead after the lowering is complete, it
4606 * will be removed.  If new instructions are added, the lowering callback will
4607 * also be called on them in case multiple lowerings are required.
4608 *
4609 * If the callback indicates that the original instruction is replaced (either
4610 * through a new SSA def or NIR_LOWER_INSTR_PROGRESS_REPLACE), then the
4611 * instruction is removed along with any now-dead SSA defs it used.
4612 *
4613 * The metadata for the nir_function_impl will also be updated.  If any blocks
4614 * are added (they cannot be removed), dominance and block indices will be
4615 * invalidated.
4616 */
4617bool nir_function_impl_lower_instructions(nir_function_impl *impl,
4618                                          nir_instr_filter_cb filter,
4619                                          nir_lower_instr_cb lower,
4620                                          void *cb_data);
4621bool nir_shader_lower_instructions(nir_shader *shader,
4622                                   nir_instr_filter_cb filter,
4623                                   nir_lower_instr_cb lower,
4624                                   void *cb_data);
4625
4626void nir_calc_dominance_impl(nir_function_impl *impl);
4627void nir_calc_dominance(nir_shader *shader);
4628
4629nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
4630bool nir_block_dominates(nir_block *parent, nir_block *child);
4631bool nir_block_is_unreachable(nir_block *block);
4632
4633void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
4634void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
4635
4636void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp);
4637void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
4638
4639void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
4640void nir_dump_cfg(nir_shader *shader, FILE *fp);
4641
4642void nir_gs_count_vertices_and_primitives(const nir_shader *shader,
4643                                          int *out_vtxcnt,
4644                                          int *out_prmcnt,
4645                                          unsigned num_streams);
4646
4647bool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes);
4648bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes);
4649bool nir_split_var_copies(nir_shader *shader);
4650bool nir_split_per_member_structs(nir_shader *shader);
4651bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes);
4652
4653bool nir_lower_returns_impl(nir_function_impl *impl);
4654bool nir_lower_returns(nir_shader *shader);
4655
4656void nir_inline_function_impl(struct nir_builder *b,
4657                              const nir_function_impl *impl,
4658                              nir_ssa_def **params,
4659                              struct hash_table *shader_var_remap);
4660bool nir_inline_functions(nir_shader *shader);
4661
4662void nir_find_inlinable_uniforms(nir_shader *shader);
4663void nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms,
4664                         const uint32_t *uniform_values,
4665                         const uint16_t *uniform_dw_offsets);
4666
4667bool nir_propagate_invariant(nir_shader *shader, bool invariant_prim);
4668
4669void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader);
4670void nir_lower_deref_copy_instr(struct nir_builder *b,
4671                                nir_intrinsic_instr *copy);
4672bool nir_lower_var_copies(nir_shader *shader);
4673
4674bool nir_opt_memcpy(nir_shader *shader);
4675bool nir_lower_memcpy(nir_shader *shader);
4676
4677void nir_fixup_deref_modes(nir_shader *shader);
4678
4679bool nir_lower_global_vars_to_local(nir_shader *shader);
4680
4681typedef enum {
4682   nir_lower_direct_array_deref_of_vec_load     = (1 << 0),
4683   nir_lower_indirect_array_deref_of_vec_load   = (1 << 1),
4684   nir_lower_direct_array_deref_of_vec_store    = (1 << 2),
4685   nir_lower_indirect_array_deref_of_vec_store  = (1 << 3),
4686} nir_lower_array_deref_of_vec_options;
4687
4688bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes,
4689                                  nir_lower_array_deref_of_vec_options options);
4690
4691bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes,
4692                               uint32_t max_lower_array_len);
4693
4694bool nir_lower_indirect_builtin_uniform_derefs(nir_shader *shader);
4695
4696bool nir_lower_locals_to_regs(nir_shader *shader);
4697
4698void nir_lower_io_to_temporaries(nir_shader *shader,
4699                                 nir_function_impl *entrypoint,
4700                                 bool outputs, bool inputs);
4701
4702bool nir_lower_vars_to_scratch(nir_shader *shader,
4703                               nir_variable_mode modes,
4704                               int size_threshold,
4705                               glsl_type_size_align_func size_align);
4706
4707void nir_lower_clip_halfz(nir_shader *shader);
4708
4709void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
4710
4711void nir_gather_ssa_types(nir_function_impl *impl,
4712                          BITSET_WORD *float_types,
4713                          BITSET_WORD *int_types);
4714
4715void nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
4716                              unsigned *size,
4717                              int (*type_size)(const struct glsl_type *, bool));
4718
4719/* Some helpers to do very simple linking */
4720bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer);
4721bool nir_remove_unused_io_vars(nir_shader *shader, nir_variable_mode mode,
4722                               uint64_t *used_by_other_stage,
4723                               uint64_t *used_by_other_stage_patches);
4724void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
4725                          bool default_to_smooth_interp);
4726void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer);
4727bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer);
4728void nir_link_varying_precision(nir_shader *producer, nir_shader *consumer);
4729
4730bool nir_lower_amul(nir_shader *shader,
4731                    int (*type_size)(const struct glsl_type *, bool));
4732
4733bool nir_lower_ubo_vec4(nir_shader *shader);
4734
4735void nir_assign_io_var_locations(nir_shader *shader,
4736                                 nir_variable_mode mode,
4737                                 unsigned *size,
4738                                 gl_shader_stage stage);
4739
4740typedef struct {
4741   uint8_t num_linked_io_vars;
4742   uint8_t num_linked_patch_io_vars;
4743} nir_linked_io_var_info;
4744
4745nir_linked_io_var_info
4746nir_assign_linked_io_var_locations(nir_shader *producer,
4747                                   nir_shader *consumer);
4748
4749typedef enum {
4750   /* If set, this causes all 64-bit IO operations to be lowered on-the-fly
4751    * to 32-bit operations.  This is only valid for nir_var_shader_in/out
4752    * modes.
4753    */
4754   nir_lower_io_lower_64bit_to_32 = (1 << 0),
4755
4756   /* If set, this forces all non-flat fragment shader inputs to be
4757    * interpolated as if with the "sample" qualifier.  This requires
4758    * nir_shader_compiler_options::use_interpolated_input_intrinsics.
4759    */
4760   nir_lower_io_force_sample_interpolation = (1 << 1),
4761} nir_lower_io_options;
4762bool nir_lower_io(nir_shader *shader,
4763                  nir_variable_mode modes,
4764                  int (*type_size)(const struct glsl_type *, bool),
4765                  nir_lower_io_options);
4766
4767bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes);
4768
4769bool
4770nir_lower_vars_to_explicit_types(nir_shader *shader,
4771                                 nir_variable_mode modes,
4772                                 glsl_type_size_align_func type_info);
4773void
4774nir_gather_explicit_io_initializers(nir_shader *shader,
4775                                    void *dst, size_t dst_size,
4776                                    nir_variable_mode mode);
4777
4778bool nir_lower_vec3_to_vec4(nir_shader *shader, nir_variable_mode modes);
4779
4780typedef enum {
4781   /**
4782    * An address format which is a simple 32-bit global GPU address.
4783    */
4784   nir_address_format_32bit_global,
4785
4786   /**
4787    * An address format which is a simple 64-bit global GPU address.
4788    */
4789   nir_address_format_64bit_global,
4790
4791   /**
4792    * An address format which is a 64-bit global base address and a 32-bit
4793    * offset.
4794    *
4795    * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base
4796    * address stored with the low bits in .x and high bits in .y, .z is
4797    * undefined, and .w is an offset.  This is intended to match
4798    * 64bit_bounded_global but without the bounds checking.
4799    */
4800   nir_address_format_64bit_global_32bit_offset,
4801
4802   /**
4803    * An address format which is a bounds-checked 64-bit global GPU address.
4804    *
4805    * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base
4806    * address stored with the low bits in .x and high bits in .y, .z is a
4807    * size, and .w is an offset.  When the final I/O operation is lowered, .w
4808    * is checked against .z and the operation is predicated on the result.
4809    */
4810   nir_address_format_64bit_bounded_global,
4811
4812   /**
4813    * An address format which is comprised of a vec2 where the first
4814    * component is a buffer index and the second is an offset.
4815    */
4816   nir_address_format_32bit_index_offset,
4817
4818   /**
4819    * An address format which is a 64-bit value, where the high 32 bits
4820    * are a buffer index, and the low 32 bits are an offset.
4821    */
4822    nir_address_format_32bit_index_offset_pack64,
4823
4824   /**
4825    * An address format which is comprised of a vec3 where the first two
4826    * components specify the buffer and the third is an offset.
4827    */
4828   nir_address_format_vec2_index_32bit_offset,
4829
4830   /**
4831    * An address format which represents generic pointers with a 62-bit
4832    * pointer and a 2-bit enum in the top two bits.  The top two bits have
4833    * the following meanings:
4834    *
4835    *  - 0x0: Global memory
4836    *  - 0x1: Shared memory
4837    *  - 0x2: Scratch memory
4838    *  - 0x3: Global memory
4839    *
4840    * The redundancy between 0x0 and 0x3 is because of Intel sign-extension of
4841    * addresses.  Valid global memory addresses may naturally have either 0 or
4842    * ~0 as their high bits.
4843    *
4844    * Shared and scratch pointers are represented as 32-bit offsets with the
4845    * top 32 bits only being used for the enum.  This allows us to avoid
4846    * 64-bit address calculations in a bunch of cases.
4847    */
4848   nir_address_format_62bit_generic,
4849
4850   /**
4851    * An address format which is a simple 32-bit offset.
4852    */
4853   nir_address_format_32bit_offset,
4854
4855   /**
4856    * An address format which is a simple 32-bit offset cast to 64-bit.
4857    */
4858    nir_address_format_32bit_offset_as_64bit,
4859
4860   /**
4861    * An address format representing a purely logical addressing model.  In
4862    * this model, all deref chains must be complete from the dereference
4863    * operation to the variable.  Cast derefs are not allowed.  These
4864    * addresses will be 32-bit scalars but the format is immaterial because
4865    * you can always chase the chain.
4866    */
4867   nir_address_format_logical,
4868} nir_address_format;
4869
4870static inline unsigned
4871nir_address_format_bit_size(nir_address_format addr_format)
4872{
4873   switch (addr_format) {
4874   case nir_address_format_32bit_global:              return 32;
4875   case nir_address_format_64bit_global:              return 64;
4876   case nir_address_format_64bit_global_32bit_offset: return 32;
4877   case nir_address_format_64bit_bounded_global:      return 32;
4878   case nir_address_format_32bit_index_offset:        return 32;
4879   case nir_address_format_32bit_index_offset_pack64: return 64;
4880   case nir_address_format_vec2_index_32bit_offset:   return 32;
4881   case nir_address_format_62bit_generic:             return 64;
4882   case nir_address_format_32bit_offset:              return 32;
4883   case nir_address_format_32bit_offset_as_64bit:     return 64;
4884   case nir_address_format_logical:                   return 32;
4885   }
4886   unreachable("Invalid address format");
4887}
4888
4889static inline unsigned
4890nir_address_format_num_components(nir_address_format addr_format)
4891{
4892   switch (addr_format) {
4893   case nir_address_format_32bit_global:              return 1;
4894   case nir_address_format_64bit_global:              return 1;
4895   case nir_address_format_64bit_global_32bit_offset: return 4;
4896   case nir_address_format_64bit_bounded_global:      return 4;
4897   case nir_address_format_32bit_index_offset:        return 2;
4898   case nir_address_format_32bit_index_offset_pack64: return 1;
4899   case nir_address_format_vec2_index_32bit_offset:   return 3;
4900   case nir_address_format_62bit_generic:             return 1;
4901   case nir_address_format_32bit_offset:              return 1;
4902   case nir_address_format_32bit_offset_as_64bit:     return 1;
4903   case nir_address_format_logical:                   return 1;
4904   }
4905   unreachable("Invalid address format");
4906}
4907
4908static inline const struct glsl_type *
4909nir_address_format_to_glsl_type(nir_address_format addr_format)
4910{
4911   unsigned bit_size = nir_address_format_bit_size(addr_format);
4912   assert(bit_size == 32 || bit_size == 64);
4913   return glsl_vector_type(bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64,
4914                           nir_address_format_num_components(addr_format));
4915}
4916
4917const nir_const_value *nir_address_format_null_value(nir_address_format addr_format);
4918
4919nir_ssa_def *nir_build_addr_ieq(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
4920                                nir_address_format addr_format);
4921
4922nir_ssa_def *nir_build_addr_isub(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
4923                                 nir_address_format addr_format);
4924
4925nir_ssa_def * nir_explicit_io_address_from_deref(struct nir_builder *b,
4926                                                 nir_deref_instr *deref,
4927                                                 nir_ssa_def *base_addr,
4928                                                 nir_address_format addr_format);
4929
4930bool nir_get_explicit_deref_align(nir_deref_instr *deref,
4931                                  bool default_to_type_align,
4932                                  uint32_t *align_mul,
4933                                  uint32_t *align_offset);
4934
4935void nir_lower_explicit_io_instr(struct nir_builder *b,
4936                                 nir_intrinsic_instr *io_instr,
4937                                 nir_ssa_def *addr,
4938                                 nir_address_format addr_format);
4939
4940bool nir_lower_explicit_io(nir_shader *shader,
4941                           nir_variable_mode modes,
4942                           nir_address_format);
4943
4944bool
4945nir_lower_shader_calls(nir_shader *shader,
4946                       nir_address_format address_format,
4947                       unsigned stack_alignment,
4948                       nir_shader ***resume_shaders_out,
4949                       uint32_t *num_resume_shaders_out,
4950                       void *mem_ctx);
4951
4952nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
4953nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
4954nir_src *nir_get_shader_call_payload_src(nir_intrinsic_instr *call);
4955
4956bool nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage);
4957
4958bool nir_lower_regs_to_ssa_impl(nir_function_impl *impl);
4959bool nir_lower_regs_to_ssa(nir_shader *shader);
4960bool nir_lower_vars_to_ssa(nir_shader *shader);
4961
4962bool nir_remove_dead_derefs(nir_shader *shader);
4963bool nir_remove_dead_derefs_impl(nir_function_impl *impl);
4964
4965typedef struct nir_remove_dead_variables_options {
4966   bool (*can_remove_var)(nir_variable *var, void *data);
4967   void *can_remove_var_data;
4968} nir_remove_dead_variables_options;
4969
4970bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes,
4971                               const nir_remove_dead_variables_options *options);
4972
4973bool nir_lower_variable_initializers(nir_shader *shader,
4974                                     nir_variable_mode modes);
4975bool nir_zero_initialize_shared_memory(nir_shader *shader,
4976                                       const unsigned shared_size,
4977                                       const unsigned chunk_size);
4978
4979bool nir_move_vec_src_uses_to_dest(nir_shader *shader);
4980bool nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb,
4981                           const void *_data);
4982void nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
4983                          bool alpha_to_one,
4984                          const gl_state_index16 *alpha_ref_state_tokens);
4985bool nir_lower_alu(nir_shader *shader);
4986
4987bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask,
4988                    bool always_precise);
4989
4990bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
4991bool nir_lower_bool_to_bitsize(nir_shader *shader);
4992bool nir_lower_bool_to_float(nir_shader *shader);
4993bool nir_lower_bool_to_int32(nir_shader *shader);
4994bool nir_opt_simplify_convert_alu_types(nir_shader *shader);
4995bool nir_lower_convert_alu_types(nir_shader *shader,
4996                                 bool (*should_lower)(nir_intrinsic_instr *));
4997bool nir_lower_constant_convert_alu_types(nir_shader *shader);
4998bool nir_lower_alu_conversion_to_intrinsic(nir_shader *shader);
4999bool nir_lower_int_to_float(nir_shader *shader);
5000bool nir_lower_load_const_to_scalar(nir_shader *shader);
5001bool nir_lower_read_invocation_to_scalar(nir_shader *shader);
5002bool nir_lower_phis_to_scalar(nir_shader *shader, bool lower_all);
5003void nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer);
5004void nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader,
5005                                                  bool outputs_only);
5006void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask);
5007bool nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask);
5008bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask);
5009bool nir_vectorize_tess_levels(nir_shader *shader);
5010
5011bool nir_lower_fragcolor(nir_shader *shader, unsigned max_cbufs);
5012bool nir_lower_fragcoord_wtrans(nir_shader *shader);
5013void nir_lower_viewport_transform(nir_shader *shader);
5014bool nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4);
5015
5016bool nir_lower_is_helper_invocation(nir_shader *shader);
5017
5018typedef struct nir_lower_subgroups_options {
5019   uint8_t subgroup_size;
5020   uint8_t ballot_bit_size;
5021   uint8_t ballot_components;
5022   bool lower_to_scalar:1;
5023   bool lower_vote_trivial:1;
5024   bool lower_vote_eq:1;
5025   bool lower_subgroup_masks:1;
5026   bool lower_shuffle:1;
5027   bool lower_shuffle_to_32bit:1;
5028   bool lower_shuffle_to_swizzle_amd:1;
5029   bool lower_quad:1;
5030   bool lower_quad_broadcast_dynamic:1;
5031   bool lower_quad_broadcast_dynamic_to_const:1;
5032   bool lower_elect:1;
5033   bool lower_read_invocation_to_cond:1;
5034} nir_lower_subgroups_options;
5035
5036bool nir_lower_subgroups(nir_shader *shader,
5037                         const nir_lower_subgroups_options *options);
5038
5039bool nir_lower_system_values(nir_shader *shader);
5040
5041typedef struct nir_lower_compute_system_values_options {
5042   bool has_base_global_invocation_id:1;
5043   bool has_base_workgroup_id:1;
5044   bool shuffle_local_ids_for_quad_derivatives:1;
5045   bool lower_local_invocation_index:1;
5046} nir_lower_compute_system_values_options;
5047
5048bool nir_lower_compute_system_values(nir_shader *shader,
5049                                     const nir_lower_compute_system_values_options *options);
5050
5051struct nir_lower_sysvals_to_varyings_options {
5052   bool frag_coord:1;
5053   bool front_face:1;
5054   bool point_coord:1;
5055};
5056
5057bool
5058nir_lower_sysvals_to_varyings(nir_shader *shader,
5059                              const struct nir_lower_sysvals_to_varyings_options *options);
5060
5061enum PACKED nir_lower_tex_packing {
5062   /** No packing */
5063   nir_lower_tex_packing_none = 0,
5064   /**
5065    * The sampler returns up to 2 32-bit words of half floats or 16-bit signed
5066    * or unsigned ints based on the sampler type
5067    */
5068   nir_lower_tex_packing_16,
5069   /** The sampler returns 1 32-bit word of 4x8 unorm */
5070   nir_lower_tex_packing_8,
5071};
5072
5073typedef struct nir_lower_tex_options {
5074   /**
5075    * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
5076    * sampler types a texture projector is lowered.
5077    */
5078   unsigned lower_txp;
5079
5080   /**
5081    * If true, lower away nir_tex_src_offset for all texelfetch instructions.
5082    */
5083   bool lower_txf_offset;
5084
5085   /**
5086    * If true, lower away nir_tex_src_offset for all rect textures.
5087    */
5088   bool lower_rect_offset;
5089
5090   /**
5091    * If true, lower rect textures to 2D, using txs to fetch the
5092    * texture dimensions and dividing the texture coords by the
5093    * texture dims to normalize.
5094    */
5095   bool lower_rect;
5096
5097   /**
5098    * If true, convert yuv to rgb.
5099    */
5100   unsigned lower_y_uv_external;
5101   unsigned lower_y_u_v_external;
5102   unsigned lower_yx_xuxv_external;
5103   unsigned lower_xy_uxvx_external;
5104   unsigned lower_ayuv_external;
5105   unsigned lower_xyuv_external;
5106   unsigned lower_yuv_external;
5107   unsigned lower_yu_yv_external;
5108   unsigned lower_y41x_external;
5109   unsigned bt709_external;
5110   unsigned bt2020_external;
5111
5112   /**
5113    * To emulate certain texture wrap modes, this can be used
5114    * to saturate the specified tex coord to [0.0, 1.0].  The
5115    * bits are according to sampler #, ie. if, for example:
5116    *
5117    *   (conf->saturate_s & (1 << n))
5118    *
5119    * is true, then the s coord for sampler n is saturated.
5120    *
5121    * Note that clamping must happen *after* projector lowering
5122    * so any projected texture sample instruction with a clamped
5123    * coordinate gets automatically lowered, regardless of the
5124    * 'lower_txp' setting.
5125    */
5126   unsigned saturate_s;
5127   unsigned saturate_t;
5128   unsigned saturate_r;
5129
5130   /* Bitmask of textures that need swizzling.
5131    *
5132    * If (swizzle_result & (1 << texture_index)), then the swizzle in
5133    * swizzles[texture_index] is applied to the result of the texturing
5134    * operation.
5135    */
5136   unsigned swizzle_result;
5137
5138   /* A swizzle for each texture.  Values 0-3 represent x, y, z, or w swizzles
5139    * while 4 and 5 represent 0 and 1 respectively.
5140    *
5141    * Indexed by texture-id.
5142    */
5143   uint8_t swizzles[32][4];
5144
5145   /* Can be used to scale sampled values in range required by the
5146    * format.
5147    *
5148    * Indexed by texture-id.
5149    */
5150   float scale_factors[32];
5151
5152   /**
5153    * Bitmap of textures that need srgb to linear conversion.  If
5154    * (lower_srgb & (1 << texture_index)) then the rgb (xyz) components
5155    * of the texture are lowered to linear.
5156    */
5157   unsigned lower_srgb;
5158
5159   /**
5160    * If true, lower nir_texop_txd on cube maps with nir_texop_txl.
5161    */
5162   bool lower_txd_cube_map;
5163
5164   /**
5165    * If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl.
5166    */
5167   bool lower_txd_3d;
5168
5169   /**
5170    * If true, lower nir_texop_txd on shadow samplers (except cube maps)
5171    * with nir_texop_txl. Notice that cube map shadow samplers are lowered
5172    * with lower_txd_cube_map.
5173    */
5174   bool lower_txd_shadow;
5175
5176   /**
5177    * If true, lower nir_texop_txd on all samplers to a nir_texop_txl.
5178    * Implies lower_txd_cube_map and lower_txd_shadow.
5179    */
5180   bool lower_txd;
5181
5182   /**
5183    * If true, lower nir_texop_txb that try to use shadow compare and min_lod
5184    * at the same time to a nir_texop_lod, some math, and nir_texop_tex.
5185    */
5186   bool lower_txb_shadow_clamp;
5187
5188   /**
5189    * If true, lower nir_texop_txd on shadow samplers when it uses min_lod
5190    * with nir_texop_txl.  This includes cube maps.
5191    */
5192   bool lower_txd_shadow_clamp;
5193
5194   /**
5195    * If true, lower nir_texop_txd on when it uses both offset and min_lod
5196    * with nir_texop_txl.  This includes cube maps.
5197    */
5198   bool lower_txd_offset_clamp;
5199
5200   /**
5201    * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the
5202    * sampler is bindless.
5203    */
5204   bool lower_txd_clamp_bindless_sampler;
5205
5206   /**
5207    * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the
5208    * sampler index is not statically determinable to be less than 16.
5209    */
5210   bool lower_txd_clamp_if_sampler_index_not_lt_16;
5211
5212   /**
5213    * If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with
5214    * 0-lod followed by a nir_ishr.
5215    */
5216   bool lower_txs_lod;
5217
5218   /**
5219    * If true, lower nir_texop_txs for cube arrays to a nir_texop_txs with a
5220    * 2D array type followed by a nir_idiv by 6.
5221    */
5222   bool lower_txs_cube_array;
5223
5224   /**
5225    * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's
5226    * mixed-up tg4 locations.
5227    */
5228   bool lower_tg4_broadcom_swizzle;
5229
5230   /**
5231    * If true, lowers tg4 with 4 constant offsets to 4 tg4 calls
5232    */
5233   bool lower_tg4_offsets;
5234
5235   /**
5236    * Lower txf_ms to fragment_mask_fetch and fragment_fetch and samples_identical to
5237    * fragment_mask_fetch.
5238    */
5239   bool lower_to_fragment_fetch_amd;
5240
5241   /**
5242    * To lower packed sampler return formats.
5243    *
5244    * Indexed by sampler-id.
5245    */
5246   enum nir_lower_tex_packing lower_tex_packing[32];
5247} nir_lower_tex_options;
5248
5249/** Lowers complex texture instructions to simpler ones */
5250bool nir_lower_tex(nir_shader *shader,
5251                   const nir_lower_tex_options *options);
5252
5253typedef struct nir_lower_image_options {
5254   /**
5255    * If true, lower cube size operations.
5256    */
5257   bool lower_cube_size;
5258} nir_lower_image_options;
5259
5260bool nir_lower_image(nir_shader *nir,
5261                     const nir_lower_image_options *options);
5262
5263bool nir_lower_readonly_images_to_tex(nir_shader *shader, bool per_variable);
5264
5265enum nir_lower_non_uniform_access_type {
5266   nir_lower_non_uniform_ubo_access     = (1 << 0),
5267   nir_lower_non_uniform_ssbo_access    = (1 << 1),
5268   nir_lower_non_uniform_texture_access = (1 << 2),
5269   nir_lower_non_uniform_image_access   = (1 << 3),
5270};
5271
5272/* Given the nir_src used for the resource, return the channels which might be non-uniform. */
5273typedef nir_component_mask_t (*nir_lower_non_uniform_access_callback)(const nir_src *, void *);
5274
5275typedef struct nir_lower_non_uniform_access_options {
5276   enum nir_lower_non_uniform_access_type types;
5277   nir_lower_non_uniform_access_callback callback;
5278   void *callback_data;
5279} nir_lower_non_uniform_access_options;
5280
5281bool nir_lower_non_uniform_access(nir_shader *shader,
5282                                  const nir_lower_non_uniform_access_options *options);
5283
5284typedef struct {
5285   /* If true, a 32-bit division lowering based on NV50LegalizeSSA::handleDIV()
5286    * is used. It is the faster of the two but it is not exact in some cases
5287    * (for example, 1091317713u / 1034u gives 5209173 instead of 1055432).
5288    *
5289    * If false, a lowering based on AMDGPUTargetLowering::LowerUDIVREM() and
5290    * AMDGPUTargetLowering::LowerSDIVREM() is used. It requires more
5291    * instructions than the nv50 path and many of them are integer
5292    * multiplications, so it is probably slower. It should always return the
5293    * correct result, though.
5294    */
5295   bool imprecise_32bit_lowering;
5296
5297   /* Whether 16-bit floating point arithmetic should be allowed in 8-bit
5298    * division lowering
5299    */
5300   bool allow_fp16;
5301} nir_lower_idiv_options;
5302
5303bool nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options);
5304
5305typedef struct nir_input_attachment_options {
5306   bool use_fragcoord_sysval;
5307   bool use_layer_id_sysval;
5308   bool use_view_id_for_layer;
5309} nir_input_attachment_options;
5310
5311bool nir_lower_input_attachments(nir_shader *shader,
5312                                 const nir_input_attachment_options *options);
5313
5314bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables,
5315                       bool use_vars,
5316                       bool use_clipdist_array,
5317                       const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
5318bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables,
5319                       bool use_clipdist_array,
5320                       const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
5321bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables,
5322                       bool use_clipdist_array);
5323bool nir_lower_clip_cull_distance_arrays(nir_shader *nir);
5324bool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable);
5325
5326void nir_lower_point_size_mov(nir_shader *shader,
5327                              const gl_state_index16 *pointsize_state_tokens);
5328
5329bool nir_lower_frexp(nir_shader *nir);
5330
5331void nir_lower_two_sided_color(nir_shader *shader, bool face_sysval);
5332
5333bool nir_lower_clamp_color_outputs(nir_shader *shader);
5334
5335bool nir_lower_flatshade(nir_shader *shader);
5336
5337void nir_lower_passthrough_edgeflags(nir_shader *shader);
5338bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count,
5339                              const gl_state_index16 *uniform_state_tokens);
5340
5341typedef struct nir_lower_wpos_ytransform_options {
5342   gl_state_index16 state_tokens[STATE_LENGTH];
5343   bool fs_coord_origin_upper_left :1;
5344   bool fs_coord_origin_lower_left :1;
5345   bool fs_coord_pixel_center_integer :1;
5346   bool fs_coord_pixel_center_half_integer :1;
5347} nir_lower_wpos_ytransform_options;
5348
5349bool nir_lower_wpos_ytransform(nir_shader *shader,
5350                               const nir_lower_wpos_ytransform_options *options);
5351bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading);
5352
5353bool nir_lower_pntc_ytransform(nir_shader *shader,
5354                               const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
5355
5356bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
5357
5358bool nir_lower_fb_read(nir_shader *shader);
5359
5360typedef struct nir_lower_drawpixels_options {
5361   gl_state_index16 texcoord_state_tokens[STATE_LENGTH];
5362   gl_state_index16 scale_state_tokens[STATE_LENGTH];
5363   gl_state_index16 bias_state_tokens[STATE_LENGTH];
5364   unsigned drawpix_sampler;
5365   unsigned pixelmap_sampler;
5366   bool pixel_maps :1;
5367   bool scale_and_bias :1;
5368} nir_lower_drawpixels_options;
5369
5370void nir_lower_drawpixels(nir_shader *shader,
5371                          const nir_lower_drawpixels_options *options);
5372
5373typedef struct nir_lower_bitmap_options {
5374   unsigned sampler;
5375   bool swizzle_xxxx;
5376} nir_lower_bitmap_options;
5377
5378void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options);
5379
5380bool nir_lower_atomics_to_ssbo(nir_shader *shader);
5381
5382typedef enum  {
5383   nir_lower_int_source_mods = 1 << 0,
5384   nir_lower_float_source_mods = 1 << 1,
5385   nir_lower_64bit_source_mods = 1 << 2,
5386   nir_lower_triop_abs = 1 << 3,
5387   nir_lower_all_source_mods = (1 << 4) - 1
5388} nir_lower_to_source_mods_flags;
5389
5390
5391bool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options);
5392
5393typedef enum {
5394   nir_lower_gs_intrinsics_per_stream = 1 << 0,
5395   nir_lower_gs_intrinsics_count_primitives = 1 << 1,
5396   nir_lower_gs_intrinsics_count_vertices_per_primitive = 1 << 2,
5397   nir_lower_gs_intrinsics_overwrite_incomplete = 1 << 3,
5398} nir_lower_gs_intrinsics_flags;
5399
5400bool nir_lower_gs_intrinsics(nir_shader *shader, nir_lower_gs_intrinsics_flags options);
5401
5402typedef unsigned (*nir_lower_bit_size_callback)(const nir_instr *, void *);
5403
5404bool nir_lower_bit_size(nir_shader *shader,
5405                        nir_lower_bit_size_callback callback,
5406                        void *callback_data);
5407bool nir_lower_64bit_phis(nir_shader *shader);
5408
5409nir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode);
5410bool nir_lower_int64(nir_shader *shader);
5411
5412nir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode);
5413bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64,
5414                       nir_lower_doubles_options options);
5415bool nir_lower_pack(nir_shader *shader);
5416
5417bool nir_recompute_io_bases(nir_function_impl *impl, nir_variable_mode modes);
5418bool nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes,
5419                          uint64_t varying_mask, bool use_16bit_slots);
5420bool nir_force_mediump_io(nir_shader *nir, nir_variable_mode modes,
5421                          nir_alu_type types);
5422bool nir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes);
5423bool nir_fold_16bit_sampler_conversions(nir_shader *nir,
5424                                        unsigned tex_src_types);
5425
5426typedef struct {
5427   bool legalize_type;         /* whether this src should be legalized */
5428   uint8_t bit_size;           /* bit_size to enforce */
5429   nir_tex_src_type match_src; /* if bit_size is 0, match bit size of this */
5430} nir_tex_src_type_constraint, nir_tex_src_type_constraints[nir_num_tex_src_types];
5431
5432bool nir_legalize_16bit_sampler_srcs(nir_shader *nir,
5433                                     nir_tex_src_type_constraints constraints);
5434
5435bool nir_lower_point_size(nir_shader *shader, float min, float max);
5436
5437void nir_lower_texcoord_replace(nir_shader *s, unsigned coord_replace,
5438                                bool point_coord_is_sysval, bool yinvert);
5439
5440typedef enum {
5441   nir_lower_interpolation_at_sample = (1 << 1),
5442   nir_lower_interpolation_at_offset = (1 << 2),
5443   nir_lower_interpolation_centroid  = (1 << 3),
5444   nir_lower_interpolation_pixel     = (1 << 4),
5445   nir_lower_interpolation_sample    = (1 << 5),
5446} nir_lower_interpolation_options;
5447
5448bool nir_lower_interpolation(nir_shader *shader,
5449                             nir_lower_interpolation_options options);
5450
5451bool nir_lower_discard_or_demote(nir_shader *shader,
5452                                 bool force_correct_quad_ops_after_discard);
5453
5454bool nir_lower_memory_model(nir_shader *shader);
5455
5456bool nir_lower_goto_ifs(nir_shader *shader);
5457
5458bool nir_shader_uses_view_index(nir_shader *shader);
5459bool nir_can_lower_multiview(nir_shader *shader);
5460bool nir_lower_multiview(nir_shader *shader, uint32_t view_mask);
5461
5462
5463bool nir_lower_fp16_casts(nir_shader *shader);
5464bool nir_normalize_cubemap_coords(nir_shader *shader);
5465
5466bool nir_shader_supports_implicit_lod(nir_shader *shader);
5467
5468void nir_live_ssa_defs_impl(nir_function_impl *impl);
5469
5470const BITSET_WORD *nir_get_live_ssa_defs(nir_cursor cursor, void *mem_ctx);
5471
5472void nir_loop_analyze_impl(nir_function_impl *impl,
5473                           nir_variable_mode indirect_mask);
5474
5475bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
5476
5477bool nir_repair_ssa_impl(nir_function_impl *impl);
5478bool nir_repair_ssa(nir_shader *shader);
5479
5480void nir_convert_loop_to_lcssa(nir_loop *loop);
5481bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants);
5482void nir_divergence_analysis(nir_shader *shader);
5483bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr);
5484
5485/* If phi_webs_only is true, only convert SSA values involved in phi nodes to
5486 * registers.  If false, convert all values (even those not involved in a phi
5487 * node) to registers.
5488 */
5489bool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only);
5490
5491bool nir_lower_phis_to_regs_block(nir_block *block);
5492bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
5493bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl);
5494
5495bool nir_lower_samplers(nir_shader *shader);
5496bool nir_lower_ssbo(nir_shader *shader);
5497
5498typedef struct nir_lower_printf_options {
5499   bool treat_doubles_as_floats : 1;
5500   unsigned max_buffer_size;
5501} nir_lower_printf_options;
5502
5503bool nir_lower_printf(nir_shader *nir, const nir_lower_printf_options *options);
5504
5505/* This is here for unit tests. */
5506bool nir_opt_comparison_pre_impl(nir_function_impl *impl);
5507
5508bool nir_opt_comparison_pre(nir_shader *shader);
5509
5510typedef struct nir_opt_access_options {
5511   bool is_vulkan;
5512   bool infer_non_readable;
5513} nir_opt_access_options;
5514
5515bool nir_opt_access(nir_shader *shader, const nir_opt_access_options *options);
5516bool nir_opt_algebraic(nir_shader *shader);
5517bool nir_opt_algebraic_before_ffma(nir_shader *shader);
5518bool nir_opt_algebraic_late(nir_shader *shader);
5519bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader);
5520bool nir_opt_constant_folding(nir_shader *shader);
5521
5522/* Try to combine a and b into a.  Return true if combination was possible,
5523 * which will result in b being removed by the pass.  Return false if
5524 * combination wasn't possible.
5525 */
5526typedef bool (*nir_combine_memory_barrier_cb)(
5527   nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data);
5528
5529bool nir_opt_combine_memory_barriers(nir_shader *shader,
5530                                     nir_combine_memory_barrier_cb combine_cb,
5531                                     void *data);
5532
5533bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes);
5534
5535bool nir_copy_prop_impl(nir_function_impl *impl);
5536bool nir_copy_prop(nir_shader *shader);
5537
5538bool nir_opt_copy_prop_vars(nir_shader *shader);
5539
5540bool nir_opt_cse(nir_shader *shader);
5541
5542bool nir_opt_dce(nir_shader *shader);
5543
5544bool nir_opt_dead_cf(nir_shader *shader);
5545
5546bool nir_opt_dead_write_vars(nir_shader *shader);
5547
5548bool nir_opt_deref_impl(nir_function_impl *impl);
5549bool nir_opt_deref(nir_shader *shader);
5550
5551bool nir_opt_find_array_copies(nir_shader *shader);
5552
5553bool nir_opt_fragdepth(nir_shader *shader);
5554
5555bool nir_opt_gcm(nir_shader *shader, bool value_number);
5556
5557bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size);
5558
5559bool nir_opt_if(nir_shader *shader, bool aggressive_last_continue);
5560
5561bool nir_opt_intrinsics(nir_shader *shader);
5562
5563bool nir_opt_large_constants(nir_shader *shader,
5564                             glsl_type_size_align_func size_align,
5565                             unsigned threshold);
5566
5567bool nir_opt_loop_unroll(nir_shader *shader);
5568
5569typedef enum {
5570    nir_move_const_undef = (1 << 0),
5571    nir_move_load_ubo    = (1 << 1),
5572    nir_move_load_input  = (1 << 2),
5573    nir_move_comparisons = (1 << 3),
5574    nir_move_copies      = (1 << 4),
5575    nir_move_load_ssbo   = (1 << 5),
5576} nir_move_options;
5577
5578bool nir_can_move_instr(nir_instr *instr, nir_move_options options);
5579
5580bool nir_opt_sink(nir_shader *shader, nir_move_options options);
5581
5582bool nir_opt_move(nir_shader *shader, nir_move_options options);
5583
5584bool nir_opt_offsets(nir_shader *shader);
5585
5586bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
5587                             bool indirect_load_ok, bool expensive_alu_ok);
5588
5589bool nir_opt_rematerialize_compares(nir_shader *shader);
5590
5591bool nir_opt_remove_phis(nir_shader *shader);
5592bool nir_opt_remove_phis_block(nir_block *block);
5593
5594bool nir_opt_phi_precision(nir_shader *shader);
5595
5596bool nir_opt_shrink_vectors(nir_shader *shader, bool shrink_image_store);
5597
5598bool nir_opt_trivial_continues(nir_shader *shader);
5599
5600bool nir_opt_undef(nir_shader *shader);
5601
5602bool nir_lower_undef_to_zero(nir_shader *shader);
5603
5604bool nir_opt_uniform_atomics(nir_shader *shader);
5605
5606typedef bool (*nir_opt_vectorize_cb)(const nir_instr *instr, void *data);
5607
5608bool nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,
5609                       void *data);
5610
5611bool nir_opt_conditional_discard(nir_shader *shader);
5612bool nir_opt_move_discards_to_top(nir_shader *shader);
5613
5614typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul,
5615                                              unsigned align_offset,
5616                                              unsigned bit_size,
5617                                              unsigned num_components,
5618                                              nir_intrinsic_instr *low, nir_intrinsic_instr *high,
5619                                              void *data);
5620
5621typedef struct {
5622   nir_should_vectorize_mem_func callback;
5623   nir_variable_mode modes;
5624   nir_variable_mode robust_modes;
5625   void *cb_data;
5626} nir_load_store_vectorize_options;
5627
5628bool nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_options *options);
5629
5630void nir_sweep(nir_shader *shader);
5631
5632void nir_remap_dual_slot_attributes(nir_shader *shader,
5633                                    uint64_t *dual_slot_inputs);
5634uint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot);
5635
5636nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
5637gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
5638
5639static inline bool
5640nir_variable_is_in_ubo(const nir_variable *var)
5641{
5642   return (var->data.mode == nir_var_mem_ubo &&
5643           var->interface_type != NULL);
5644}
5645
5646static inline bool
5647nir_variable_is_in_ssbo(const nir_variable *var)
5648{
5649   return (var->data.mode == nir_var_mem_ssbo &&
5650           var->interface_type != NULL);
5651}
5652
5653static inline bool
5654nir_variable_is_in_block(const nir_variable *var)
5655{
5656   return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var);
5657}
5658
5659typedef struct nir_unsigned_upper_bound_config {
5660   unsigned min_subgroup_size;
5661   unsigned max_subgroup_size;
5662   unsigned max_workgroup_invocations;
5663   unsigned max_workgroup_count[3];
5664   unsigned max_workgroup_size[3];
5665
5666   uint32_t vertex_attrib_max[32];
5667} nir_unsigned_upper_bound_config;
5668
5669uint32_t
5670nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
5671                         nir_ssa_scalar scalar,
5672                         const nir_unsigned_upper_bound_config *config);
5673
5674bool
5675nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht,
5676                            nir_ssa_scalar ssa, unsigned const_val,
5677                            const nir_unsigned_upper_bound_config *config);
5678
5679#include "nir_inline_helpers.h"
5680
5681#ifdef __cplusplus
5682} /* extern "C" */
5683#endif
5684
5685#endif /* NIR_H */
5686