101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2014 Connor Abbott 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg * 2301e04c3fSmrg * Authors: 2401e04c3fSmrg * Connor Abbott (cwabbott0@gmail.com) 2501e04c3fSmrg * 2601e04c3fSmrg */ 2701e04c3fSmrg 2801e04c3fSmrg#ifndef NIR_H 2901e04c3fSmrg#define NIR_H 3001e04c3fSmrg 3101e04c3fSmrg#include "util/hash_table.h" 3201e04c3fSmrg#include "compiler/glsl/list.h" 3301e04c3fSmrg#include "GL/gl.h" /* GLenum */ 3401e04c3fSmrg#include "util/list.h" 357ec681f3Smrg#include "util/log.h" 3601e04c3fSmrg#include "util/ralloc.h" 3701e04c3fSmrg#include "util/set.h" 387e102996Smaya#include "util/bitscan.h" 3901e04c3fSmrg#include "util/bitset.h" 407ec681f3Smrg#include "util/compiler.h" 417ec681f3Smrg#include "util/enum_operators.h" 4201e04c3fSmrg#include "util/macros.h" 437ec681f3Smrg#include "util/format/u_format.h" 4401e04c3fSmrg#include "compiler/nir_types.h" 4501e04c3fSmrg#include "compiler/shader_enums.h" 4601e04c3fSmrg#include "compiler/shader_info.h" 477ec681f3Smrg#define XXH_INLINE_ALL 487ec681f3Smrg#include "util/xxhash.h" 4901e04c3fSmrg#include <stdio.h> 5001e04c3fSmrg 5101e04c3fSmrg#ifndef NDEBUG 5201e04c3fSmrg#include "util/debug.h" 5301e04c3fSmrg#endif /* NDEBUG */ 5401e04c3fSmrg 5501e04c3fSmrg#include "nir_opcodes.h" 5601e04c3fSmrg 5701e04c3fSmrg#if defined(_WIN32) && !defined(snprintf) 5801e04c3fSmrg#define snprintf _snprintf 5901e04c3fSmrg#endif 6001e04c3fSmrg 6101e04c3fSmrg#ifdef __cplusplus 6201e04c3fSmrgextern "C" { 6301e04c3fSmrg#endif 6401e04c3fSmrg 6501e04c3fSmrg#define NIR_FALSE 0u 6601e04c3fSmrg#define NIR_TRUE (~0u) 677ec681f3Smrg#define NIR_MAX_VEC_COMPONENTS 16 687e102996Smaya#define NIR_MAX_MATRIX_COLUMNS 4 697ec681f3Smrg#define NIR_STREAM_PACKED (1 << 8) 707ec681f3Smrgtypedef uint16_t nir_component_mask_t; 717ec681f3Smrg 727ec681f3Smrgstatic inline bool 737ec681f3Smrgnir_num_components_valid(unsigned num_components) 747ec681f3Smrg{ 757ec681f3Smrg return (num_components >= 1 && 767ec681f3Smrg num_components <= 5) || 777ec681f3Smrg num_components == 8 || 787ec681f3Smrg num_components == 16; 797ec681f3Smrg} 807ec681f3Smrg 817ec681f3Smrgbool nir_component_mask_can_reinterpret(nir_component_mask_t mask, 827ec681f3Smrg unsigned old_bit_size, 837ec681f3Smrg unsigned new_bit_size); 847ec681f3Smrgnir_component_mask_t 857ec681f3Smrgnir_component_mask_reinterpret(nir_component_mask_t mask, 867ec681f3Smrg unsigned old_bit_size, 877ec681f3Smrg unsigned new_bit_size); 8801e04c3fSmrg 8901e04c3fSmrg/** Defines a cast function 9001e04c3fSmrg * 9101e04c3fSmrg * This macro defines a cast function from in_type to out_type where 9201e04c3fSmrg * out_type is some structure type that contains a field of type out_type. 9301e04c3fSmrg * 9401e04c3fSmrg * Note that you have to be a bit careful as the generated cast function 9501e04c3fSmrg * destroys constness. 9601e04c3fSmrg */ 9701e04c3fSmrg#define NIR_DEFINE_CAST(name, in_type, out_type, field, \ 9801e04c3fSmrg type_field, type_value) \ 9901e04c3fSmrgstatic inline out_type * \ 10001e04c3fSmrgname(const in_type *parent) \ 10101e04c3fSmrg{ \ 10201e04c3fSmrg assert(parent && parent->type_field == type_value); \ 10301e04c3fSmrg return exec_node_data(out_type, parent, field); \ 10401e04c3fSmrg} 10501e04c3fSmrg 10601e04c3fSmrgstruct nir_function; 10701e04c3fSmrgstruct nir_shader; 10801e04c3fSmrgstruct nir_instr; 10901e04c3fSmrgstruct nir_builder; 11001e04c3fSmrg 11101e04c3fSmrg 11201e04c3fSmrg/** 11301e04c3fSmrg * Description of built-in state associated with a uniform 11401e04c3fSmrg * 11501e04c3fSmrg * \sa nir_variable::state_slots 11601e04c3fSmrg */ 11701e04c3fSmrgtypedef struct { 11801e04c3fSmrg gl_state_index16 tokens[STATE_LENGTH]; 1197ec681f3Smrg uint16_t swizzle; 12001e04c3fSmrg} nir_state_slot; 12101e04c3fSmrg 12201e04c3fSmrgtypedef enum { 12301e04c3fSmrg nir_var_shader_in = (1 << 0), 12401e04c3fSmrg nir_var_shader_out = (1 << 1), 1257e102996Smaya nir_var_shader_temp = (1 << 2), 1267e102996Smaya nir_var_function_temp = (1 << 3), 12701e04c3fSmrg nir_var_uniform = (1 << 4), 1287e102996Smaya nir_var_mem_ubo = (1 << 5), 12901e04c3fSmrg nir_var_system_value = (1 << 6), 1307e102996Smaya nir_var_mem_ssbo = (1 << 7), 1317e102996Smaya nir_var_mem_shared = (1 << 8), 1327e102996Smaya nir_var_mem_global = (1 << 9), 1337ec681f3Smrg nir_var_mem_generic = (nir_var_shader_temp | 1347ec681f3Smrg nir_var_function_temp | 1357ec681f3Smrg nir_var_mem_shared | 1367ec681f3Smrg nir_var_mem_global), 1377ec681f3Smrg nir_var_mem_push_const = (1 << 10), /* not actually used for variables */ 1387ec681f3Smrg nir_var_mem_constant = (1 << 11), 1397ec681f3Smrg /** Incoming call or ray payload data for ray-tracing shaders */ 1407ec681f3Smrg nir_var_shader_call_data = (1 << 12), 1417ec681f3Smrg /** Ray hit attributes */ 1427ec681f3Smrg nir_var_ray_hit_attrib = (1 << 13), 1437ec681f3Smrg nir_var_read_only_modes = nir_var_shader_in | nir_var_uniform | 1447ec681f3Smrg nir_var_system_value | nir_var_mem_constant | 1457ec681f3Smrg nir_var_mem_ubo, 1467ec681f3Smrg /** Modes where vector derefs can be indexed as arrays */ 1477ec681f3Smrg nir_var_vec_indexable_modes = nir_var_mem_ubo | nir_var_mem_ssbo | 1487ec681f3Smrg nir_var_mem_shared | nir_var_mem_global | 1497ec681f3Smrg nir_var_mem_push_const, 1507ec681f3Smrg nir_num_variable_modes = 14, 1517ec681f3Smrg nir_var_all = (1 << nir_num_variable_modes) - 1, 15201e04c3fSmrg} nir_variable_mode; 1537ec681f3SmrgMESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_variable_mode) 15401e04c3fSmrg 15501e04c3fSmrg/** 15601e04c3fSmrg * Rounding modes. 15701e04c3fSmrg */ 15801e04c3fSmrgtypedef enum { 15901e04c3fSmrg nir_rounding_mode_undef = 0, 16001e04c3fSmrg nir_rounding_mode_rtne = 1, /* round to nearest even */ 16101e04c3fSmrg nir_rounding_mode_ru = 2, /* round up */ 16201e04c3fSmrg nir_rounding_mode_rd = 3, /* round down */ 16301e04c3fSmrg nir_rounding_mode_rtz = 4, /* round towards zero */ 16401e04c3fSmrg} nir_rounding_mode; 16501e04c3fSmrg 16601e04c3fSmrgtypedef union { 1677e102996Smaya bool b; 1687e102996Smaya float f32; 1697e102996Smaya double f64; 1707e102996Smaya int8_t i8; 1717e102996Smaya uint8_t u8; 1727e102996Smaya int16_t i16; 1737e102996Smaya uint16_t u16; 1747e102996Smaya int32_t i32; 1757e102996Smaya uint32_t u32; 1767e102996Smaya int64_t i64; 1777e102996Smaya uint64_t u64; 17801e04c3fSmrg} nir_const_value; 17901e04c3fSmrg 1807e102996Smaya#define nir_const_value_to_array(arr, c, components, m) \ 1817e102996Smaya{ \ 1827e102996Smaya for (unsigned i = 0; i < components; ++i) \ 1837e102996Smaya arr[i] = c[i].m; \ 1847e102996Smaya} while (false) 1857e102996Smaya 1867e102996Smayastatic inline nir_const_value 1877e102996Smayanir_const_value_for_raw_uint(uint64_t x, unsigned bit_size) 1887e102996Smaya{ 1897e102996Smaya nir_const_value v; 1907e102996Smaya memset(&v, 0, sizeof(v)); 1917e102996Smaya 1927e102996Smaya switch (bit_size) { 1937e102996Smaya case 1: v.b = x; break; 1947e102996Smaya case 8: v.u8 = x; break; 1957e102996Smaya case 16: v.u16 = x; break; 1967e102996Smaya case 32: v.u32 = x; break; 1977e102996Smaya case 64: v.u64 = x; break; 1987e102996Smaya default: 1997e102996Smaya unreachable("Invalid bit size"); 2007e102996Smaya } 2017e102996Smaya 2027e102996Smaya return v; 2037e102996Smaya} 2047e102996Smaya 2057e102996Smayastatic inline nir_const_value 2067e102996Smayanir_const_value_for_int(int64_t i, unsigned bit_size) 2077e102996Smaya{ 2087e102996Smaya nir_const_value v; 2097e102996Smaya memset(&v, 0, sizeof(v)); 2107e102996Smaya 2117e102996Smaya assert(bit_size <= 64); 2127e102996Smaya if (bit_size < 64) { 2137e102996Smaya assert(i >= (-(1ll << (bit_size - 1)))); 2147e102996Smaya assert(i < (1ll << (bit_size - 1))); 2157e102996Smaya } 2167e102996Smaya 2177e102996Smaya return nir_const_value_for_raw_uint(i, bit_size); 2187e102996Smaya} 2197e102996Smaya 2207e102996Smayastatic inline nir_const_value 2217e102996Smayanir_const_value_for_uint(uint64_t u, unsigned bit_size) 2227e102996Smaya{ 2237e102996Smaya nir_const_value v; 2247e102996Smaya memset(&v, 0, sizeof(v)); 2257e102996Smaya 2267e102996Smaya assert(bit_size <= 64); 2277e102996Smaya if (bit_size < 64) 2287e102996Smaya assert(u < (1ull << bit_size)); 2297e102996Smaya 2307e102996Smaya return nir_const_value_for_raw_uint(u, bit_size); 2317e102996Smaya} 2327e102996Smaya 2337e102996Smayastatic inline nir_const_value 2347e102996Smayanir_const_value_for_bool(bool b, unsigned bit_size) 2357e102996Smaya{ 2367e102996Smaya /* Booleans use a 0/-1 convention */ 2377e102996Smaya return nir_const_value_for_int(-(int)b, bit_size); 2387e102996Smaya} 2397e102996Smaya 2407e102996Smaya/* This one isn't inline because it requires half-float conversion */ 2417e102996Smayanir_const_value nir_const_value_for_float(double b, unsigned bit_size); 2427e102996Smaya 2437e102996Smayastatic inline int64_t 2447e102996Smayanir_const_value_as_int(nir_const_value value, unsigned bit_size) 2457e102996Smaya{ 2467e102996Smaya switch (bit_size) { 2477e102996Smaya /* int1_t uses 0/-1 convention */ 2487e102996Smaya case 1: return -(int)value.b; 2497e102996Smaya case 8: return value.i8; 2507e102996Smaya case 16: return value.i16; 2517e102996Smaya case 32: return value.i32; 2527e102996Smaya case 64: return value.i64; 2537e102996Smaya default: 2547e102996Smaya unreachable("Invalid bit size"); 2557e102996Smaya } 2567e102996Smaya} 2577e102996Smaya 2587ec681f3Smrgstatic inline uint64_t 2597e102996Smayanir_const_value_as_uint(nir_const_value value, unsigned bit_size) 2607e102996Smaya{ 2617e102996Smaya switch (bit_size) { 2627e102996Smaya case 1: return value.b; 2637e102996Smaya case 8: return value.u8; 2647e102996Smaya case 16: return value.u16; 2657e102996Smaya case 32: return value.u32; 2667e102996Smaya case 64: return value.u64; 2677e102996Smaya default: 2687e102996Smaya unreachable("Invalid bit size"); 2697e102996Smaya } 2707e102996Smaya} 2717e102996Smaya 2727e102996Smayastatic inline bool 2737e102996Smayanir_const_value_as_bool(nir_const_value value, unsigned bit_size) 2747e102996Smaya{ 2757e102996Smaya int64_t i = nir_const_value_as_int(value, bit_size); 2767e102996Smaya 2777e102996Smaya /* Booleans of any size use 0/-1 convention */ 2787e102996Smaya assert(i == 0 || i == -1); 2797e102996Smaya 2807e102996Smaya return i; 2817e102996Smaya} 2827e102996Smaya 2837e102996Smaya/* This one isn't inline because it requires half-float conversion */ 2847e102996Smayadouble nir_const_value_as_float(nir_const_value value, unsigned bit_size); 2857e102996Smaya 28601e04c3fSmrgtypedef struct nir_constant { 28701e04c3fSmrg /** 28801e04c3fSmrg * Value of the constant. 28901e04c3fSmrg * 29001e04c3fSmrg * The field used to back the values supplied by the constant is determined 29101e04c3fSmrg * by the type associated with the \c nir_variable. Constants may be 29201e04c3fSmrg * scalars, vectors, or matrices. 29301e04c3fSmrg */ 2947ec681f3Smrg nir_const_value values[NIR_MAX_VEC_COMPONENTS]; 29501e04c3fSmrg 29601e04c3fSmrg /* we could get this from the var->type but makes clone *much* easier to 29701e04c3fSmrg * not have to care about the type. 29801e04c3fSmrg */ 29901e04c3fSmrg unsigned num_elements; 30001e04c3fSmrg 30101e04c3fSmrg /* Array elements / Structure Fields */ 30201e04c3fSmrg struct nir_constant **elements; 30301e04c3fSmrg} nir_constant; 30401e04c3fSmrg 30501e04c3fSmrg/** 30601e04c3fSmrg * \brief Layout qualifiers for gl_FragDepth. 30701e04c3fSmrg * 30801e04c3fSmrg * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared 30901e04c3fSmrg * with a layout qualifier. 31001e04c3fSmrg */ 31101e04c3fSmrgtypedef enum { 31201e04c3fSmrg nir_depth_layout_none, /**< No depth layout is specified. */ 31301e04c3fSmrg nir_depth_layout_any, 31401e04c3fSmrg nir_depth_layout_greater, 31501e04c3fSmrg nir_depth_layout_less, 31601e04c3fSmrg nir_depth_layout_unchanged 31701e04c3fSmrg} nir_depth_layout; 31801e04c3fSmrg 31901e04c3fSmrg/** 32001e04c3fSmrg * Enum keeping track of how a variable was declared. 32101e04c3fSmrg */ 32201e04c3fSmrgtypedef enum { 32301e04c3fSmrg /** 32401e04c3fSmrg * Normal declaration. 32501e04c3fSmrg */ 32601e04c3fSmrg nir_var_declared_normally = 0, 32701e04c3fSmrg 32801e04c3fSmrg /** 32901e04c3fSmrg * Variable is implicitly generated by the compiler and should not be 33001e04c3fSmrg * visible via the API. 33101e04c3fSmrg */ 33201e04c3fSmrg nir_var_hidden, 33301e04c3fSmrg} nir_var_declaration_type; 33401e04c3fSmrg 33501e04c3fSmrg/** 33601e04c3fSmrg * Either a uniform, global variable, shader input, or shader output. Based on 33701e04c3fSmrg * ir_variable - it should be easy to translate between the two. 33801e04c3fSmrg */ 33901e04c3fSmrg 34001e04c3fSmrgtypedef struct nir_variable { 34101e04c3fSmrg struct exec_node node; 34201e04c3fSmrg 34301e04c3fSmrg /** 34401e04c3fSmrg * Declared type of the variable 34501e04c3fSmrg */ 34601e04c3fSmrg const struct glsl_type *type; 34701e04c3fSmrg 34801e04c3fSmrg /** 34901e04c3fSmrg * Declared name of the variable 35001e04c3fSmrg */ 35101e04c3fSmrg char *name; 35201e04c3fSmrg 35301e04c3fSmrg struct nir_variable_data { 35401e04c3fSmrg /** 35501e04c3fSmrg * Storage class of the variable. 35601e04c3fSmrg * 35701e04c3fSmrg * \sa nir_variable_mode 35801e04c3fSmrg */ 3597ec681f3Smrg unsigned mode:14; 36001e04c3fSmrg 36101e04c3fSmrg /** 36201e04c3fSmrg * Is the variable read-only? 36301e04c3fSmrg * 36401e04c3fSmrg * This is set for variables declared as \c const, shader inputs, 36501e04c3fSmrg * and uniforms. 36601e04c3fSmrg */ 36701e04c3fSmrg unsigned read_only:1; 36801e04c3fSmrg unsigned centroid:1; 36901e04c3fSmrg unsigned sample:1; 37001e04c3fSmrg unsigned patch:1; 37101e04c3fSmrg unsigned invariant:1; 37201e04c3fSmrg 3737ec681f3Smrg /** 3747ec681f3Smrg * Precision qualifier. 3757ec681f3Smrg * 3767ec681f3Smrg * In desktop GLSL we do not care about precision qualifiers at all, in 3777ec681f3Smrg * fact, the spec says that precision qualifiers are ignored. 3787ec681f3Smrg * 3797ec681f3Smrg * To make things easy, we make it so that this field is always 3807ec681f3Smrg * GLSL_PRECISION_NONE on desktop shaders. This way all the variables 3817ec681f3Smrg * have the same precision value and the checks we add in the compiler 3827ec681f3Smrg * for this field will never break a desktop shader compile. 3837ec681f3Smrg */ 3847ec681f3Smrg unsigned precision:2; 3857ec681f3Smrg 3867ec681f3Smrg /** 3877ec681f3Smrg * Can this variable be coalesced with another? 3887ec681f3Smrg * 3897ec681f3Smrg * This is set by nir_lower_io_to_temporaries to say that any 3907ec681f3Smrg * copies involving this variable should stay put. Propagating it can 3917ec681f3Smrg * duplicate the resulting load/store, which is not wanted, and may 3927ec681f3Smrg * result in a load/store of the variable with an indirect offset which 3937ec681f3Smrg * the backend may not be able to handle. 3947ec681f3Smrg */ 3957ec681f3Smrg unsigned cannot_coalesce:1; 3967ec681f3Smrg 39701e04c3fSmrg /** 39801e04c3fSmrg * When separate shader programs are enabled, only input/outputs between 39901e04c3fSmrg * the stages of a multi-stage separate program can be safely removed 40001e04c3fSmrg * from the shader interface. Other input/outputs must remains active. 40101e04c3fSmrg * 40201e04c3fSmrg * This is also used to make sure xfb varyings that are unused by the 40301e04c3fSmrg * fragment shader are not removed. 40401e04c3fSmrg */ 40501e04c3fSmrg unsigned always_active_io:1; 40601e04c3fSmrg 40701e04c3fSmrg /** 40801e04c3fSmrg * Interpolation mode for shader inputs / outputs 40901e04c3fSmrg * 41001e04c3fSmrg * \sa glsl_interp_mode 41101e04c3fSmrg */ 4127ec681f3Smrg unsigned interpolation:3; 41301e04c3fSmrg 41401e04c3fSmrg /** 41501e04c3fSmrg * If non-zero, then this variable may be packed along with other variables 41601e04c3fSmrg * into a single varying slot, so this offset should be applied when 41701e04c3fSmrg * accessing components. For example, an offset of 1 means that the x 41801e04c3fSmrg * component of this variable is actually stored in component y of the 41901e04c3fSmrg * location specified by \c location. 42001e04c3fSmrg */ 42101e04c3fSmrg unsigned location_frac:2; 42201e04c3fSmrg 42301e04c3fSmrg /** 42401e04c3fSmrg * If true, this variable represents an array of scalars that should 42501e04c3fSmrg * be tightly packed. In other words, consecutive array elements 42601e04c3fSmrg * should be stored one component apart, rather than one slot apart. 42701e04c3fSmrg */ 42801e04c3fSmrg unsigned compact:1; 42901e04c3fSmrg 43001e04c3fSmrg /** 43101e04c3fSmrg * Whether this is a fragment shader output implicitly initialized with 43201e04c3fSmrg * the previous contents of the specified render target at the 43301e04c3fSmrg * framebuffer location corresponding to this shader invocation. 43401e04c3fSmrg */ 43501e04c3fSmrg unsigned fb_fetch_output:1; 43601e04c3fSmrg 43701e04c3fSmrg /** 43801e04c3fSmrg * Non-zero if this variable is considered bindless as defined by 43901e04c3fSmrg * ARB_bindless_texture. 44001e04c3fSmrg */ 44101e04c3fSmrg unsigned bindless:1; 44201e04c3fSmrg 44301e04c3fSmrg /** 44401e04c3fSmrg * Was an explicit binding set in the shader? 44501e04c3fSmrg */ 44601e04c3fSmrg unsigned explicit_binding:1; 44701e04c3fSmrg 4487ec681f3Smrg /** 4497ec681f3Smrg * Was the location explicitly set in the shader? 4507ec681f3Smrg * 4517ec681f3Smrg * If the location is explicitly set in the shader, it \b cannot be changed 4527ec681f3Smrg * by the linker or by the API (e.g., calls to \c glBindAttribLocation have 4537ec681f3Smrg * no effect). 4547ec681f3Smrg */ 4557ec681f3Smrg unsigned explicit_location:1; 4567ec681f3Smrg 45701e04c3fSmrg /** 45801e04c3fSmrg * Was a transfer feedback buffer set in the shader? 45901e04c3fSmrg */ 46001e04c3fSmrg unsigned explicit_xfb_buffer:1; 46101e04c3fSmrg 46201e04c3fSmrg /** 46301e04c3fSmrg * Was a transfer feedback stride set in the shader? 46401e04c3fSmrg */ 46501e04c3fSmrg unsigned explicit_xfb_stride:1; 46601e04c3fSmrg 46701e04c3fSmrg /** 46801e04c3fSmrg * Was an explicit offset set in the shader? 46901e04c3fSmrg */ 47001e04c3fSmrg unsigned explicit_offset:1; 47101e04c3fSmrg 47201e04c3fSmrg /** 4737ec681f3Smrg * Layout of the matrix. Uses glsl_matrix_layout values. 47401e04c3fSmrg */ 4757ec681f3Smrg unsigned matrix_layout:2; 47601e04c3fSmrg 47701e04c3fSmrg /** 4787ec681f3Smrg * Non-zero if this variable was created by lowering a named interface 4797ec681f3Smrg * block. 4807ec681f3Smrg */ 4817ec681f3Smrg unsigned from_named_ifc_block:1; 4827ec681f3Smrg 4837ec681f3Smrg /** 4847ec681f3Smrg * How the variable was declared. See nir_var_declaration_type. 48501e04c3fSmrg * 4867ec681f3Smrg * This is used to detect variables generated by the compiler, so should 4877ec681f3Smrg * not be visible via the API. 48801e04c3fSmrg */ 4897ec681f3Smrg unsigned how_declared:2; 49001e04c3fSmrg 49101e04c3fSmrg /** 4927ec681f3Smrg * Is this variable per-view? If so, we know it must be an array with 4937ec681f3Smrg * size corresponding to the number of views. 49401e04c3fSmrg */ 4957ec681f3Smrg unsigned per_view:1; 49601e04c3fSmrg 49701e04c3fSmrg /** 4987ec681f3Smrg * Whether the variable is per-primitive. 4997ec681f3Smrg * Can be use by Mesh Shader outputs and corresponding Fragment Shader inputs. 50001e04c3fSmrg */ 5017ec681f3Smrg unsigned per_primitive:1; 50201e04c3fSmrg 50301e04c3fSmrg /** 5047ec681f3Smrg * \brief Layout qualifier for gl_FragDepth. See nir_depth_layout. 5057ec681f3Smrg * 5067ec681f3Smrg * This is not equal to \c ir_depth_layout_none if and only if this 5077ec681f3Smrg * variable is \c gl_FragDepth and a layout qualifier is specified. 50801e04c3fSmrg */ 5097ec681f3Smrg unsigned depth_layout:3; 51001e04c3fSmrg 51101e04c3fSmrg /** 5127ec681f3Smrg * Vertex stream output identifier. 5137ec681f3Smrg * 5147ec681f3Smrg * For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i] 5157ec681f3Smrg * indicate the stream of the i-th component. 51601e04c3fSmrg */ 5177ec681f3Smrg unsigned stream:9; 51801e04c3fSmrg 51901e04c3fSmrg /** 5207ec681f3Smrg * See gl_access_qualifier. 52101e04c3fSmrg * 5227ec681f3Smrg * Access flags for memory variables (SSBO/global), image uniforms, and 5237ec681f3Smrg * bindless images in uniforms/inputs/outputs. 52401e04c3fSmrg */ 5257ec681f3Smrg unsigned access:8; 52601e04c3fSmrg 52701e04c3fSmrg /** 5287ec681f3Smrg * Descriptor set binding for sampler or UBO. 52901e04c3fSmrg */ 5307ec681f3Smrg unsigned descriptor_set:5; 53101e04c3fSmrg 53201e04c3fSmrg /** 5337ec681f3Smrg * output index for dual source blending. 53401e04c3fSmrg */ 5357ec681f3Smrg unsigned index; 53601e04c3fSmrg 53701e04c3fSmrg /** 5387ec681f3Smrg * Initial binding point for a sampler or UBO. 5397ec681f3Smrg * 5407ec681f3Smrg * For array types, this represents the binding point for the first element. 54101e04c3fSmrg */ 5427ec681f3Smrg unsigned binding; 54301e04c3fSmrg 54401e04c3fSmrg /** 5457ec681f3Smrg * Storage location of the base of this variable 54601e04c3fSmrg * 5477ec681f3Smrg * The precise meaning of this field depends on the nature of the variable. 5487ec681f3Smrg * 5497ec681f3Smrg * - Vertex shader input: one of the values from \c gl_vert_attrib. 5507ec681f3Smrg * - Vertex shader output: one of the values from \c gl_varying_slot. 5517ec681f3Smrg * - Geometry shader input: one of the values from \c gl_varying_slot. 5527ec681f3Smrg * - Geometry shader output: one of the values from \c gl_varying_slot. 5537ec681f3Smrg * - Fragment shader input: one of the values from \c gl_varying_slot. 5547ec681f3Smrg * - Fragment shader output: one of the values from \c gl_frag_result. 5557ec681f3Smrg * - Task shader output: one of the values from \c gl_varying_slot. 5567ec681f3Smrg * - Mesh shader input: one of the values from \c gl_varying_slot. 5577ec681f3Smrg * - Mesh shader output: one of the values from \c gl_varying_slot. 5587ec681f3Smrg * - Uniforms: Per-stage uniform slot number for default uniform block. 5597ec681f3Smrg * - Uniforms: Index within the uniform block definition for UBO members. 5607ec681f3Smrg * - Non-UBO Uniforms: uniform slot number. 5617ec681f3Smrg * - Other: This field is not currently used. 5627ec681f3Smrg * 5637ec681f3Smrg * If the variable is a uniform, shader input, or shader output, and the 5647ec681f3Smrg * slot has not been assigned, the value will be -1. 56501e04c3fSmrg */ 5667ec681f3Smrg int location; 56701e04c3fSmrg 56801e04c3fSmrg /** 5697ec681f3Smrg * The actual location of the variable in the IR. Only valid for inputs, 5707ec681f3Smrg * outputs, uniforms (including samplers and images), and for UBO and SSBO 5717ec681f3Smrg * variables in GLSL. 57201e04c3fSmrg */ 5737ec681f3Smrg unsigned driver_location; 5747ec681f3Smrg 5757ec681f3Smrg /** 5767ec681f3Smrg * Location an atomic counter or transform feedback is stored at. 5777ec681f3Smrg */ 5787ec681f3Smrg unsigned offset; 57901e04c3fSmrg 5807ec681f3Smrg union { 5817ec681f3Smrg struct { 5827ec681f3Smrg /** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */ 5837ec681f3Smrg enum pipe_format format; 5847ec681f3Smrg } image; 5857ec681f3Smrg 5867ec681f3Smrg struct { 5877ec681f3Smrg /** 5887ec681f3Smrg * For OpenCL inline samplers. See cl_sampler_addressing_mode and cl_sampler_filter_mode 5897ec681f3Smrg */ 5907ec681f3Smrg unsigned is_inline_sampler : 1; 5917ec681f3Smrg unsigned addressing_mode : 3; 5927ec681f3Smrg unsigned normalized_coordinates : 1; 5937ec681f3Smrg unsigned filter_mode : 1; 5947ec681f3Smrg } sampler; 5957ec681f3Smrg 5967ec681f3Smrg struct { 5977ec681f3Smrg /** 5987ec681f3Smrg * Transform feedback buffer. 5997ec681f3Smrg */ 6007ec681f3Smrg uint16_t buffer:2; 6017ec681f3Smrg 6027ec681f3Smrg /** 6037ec681f3Smrg * Transform feedback stride. 6047ec681f3Smrg */ 6057ec681f3Smrg uint16_t stride; 6067ec681f3Smrg } xfb; 6077ec681f3Smrg }; 60801e04c3fSmrg } data; 60901e04c3fSmrg 6107ec681f3Smrg /** 6117ec681f3Smrg * Identifier for this variable generated by nir_index_vars() that is unique 6127ec681f3Smrg * among other variables in the same exec_list. 6137ec681f3Smrg */ 6147ec681f3Smrg unsigned index; 6157ec681f3Smrg 6167ec681f3Smrg /* Number of nir_variable_data members */ 6177ec681f3Smrg uint16_t num_members; 6187ec681f3Smrg 61901e04c3fSmrg /** 62001e04c3fSmrg * Built-in state that backs this uniform 62101e04c3fSmrg * 62201e04c3fSmrg * Once set at variable creation, \c state_slots must remain invariant. 62301e04c3fSmrg * This is because, ideally, this array would be shared by all clones of 62401e04c3fSmrg * this variable in the IR tree. In other words, we'd really like for it 62501e04c3fSmrg * to be a fly-weight. 62601e04c3fSmrg * 62701e04c3fSmrg * If the variable is not a uniform, \c num_state_slots will be zero and 62801e04c3fSmrg * \c state_slots will be \c NULL. 62901e04c3fSmrg */ 63001e04c3fSmrg /*@{*/ 6317ec681f3Smrg uint16_t num_state_slots; /**< Number of state slots used */ 63201e04c3fSmrg nir_state_slot *state_slots; /**< State descriptors. */ 63301e04c3fSmrg /*@}*/ 63401e04c3fSmrg 63501e04c3fSmrg /** 63601e04c3fSmrg * Constant expression assigned in the initializer of the variable 63701e04c3fSmrg * 63801e04c3fSmrg * This field should only be used temporarily by creators of NIR shaders 6397ec681f3Smrg * and then nir_lower_variable_initializers can be used to get rid of them. 64001e04c3fSmrg * Most of the rest of NIR ignores this field or asserts that it's NULL. 64101e04c3fSmrg */ 64201e04c3fSmrg nir_constant *constant_initializer; 64301e04c3fSmrg 6447ec681f3Smrg /** 6457ec681f3Smrg * Global variable assigned in the initializer of the variable 6467ec681f3Smrg * This field should only be used temporarily by creators of NIR shaders 6477ec681f3Smrg * and then nir_lower_variable_initializers can be used to get rid of them. 6487ec681f3Smrg * Most of the rest of NIR ignores this field or asserts that it's NULL. 6497ec681f3Smrg */ 6507ec681f3Smrg struct nir_variable *pointer_initializer; 6517ec681f3Smrg 65201e04c3fSmrg /** 65301e04c3fSmrg * For variables that are in an interface block or are an instance of an 65401e04c3fSmrg * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. 65501e04c3fSmrg * 65601e04c3fSmrg * \sa ir_variable::location 65701e04c3fSmrg */ 65801e04c3fSmrg const struct glsl_type *interface_type; 65901e04c3fSmrg 66001e04c3fSmrg /** 66101e04c3fSmrg * Description of per-member data for per-member struct variables 66201e04c3fSmrg * 66301e04c3fSmrg * This is used for variables which are actually an amalgamation of 66401e04c3fSmrg * multiple entities such as a struct of built-in values or a struct of 66501e04c3fSmrg * inputs each with their own layout specifier. This is only allowed on 66601e04c3fSmrg * variables with a struct or array of array of struct type. 66701e04c3fSmrg */ 66801e04c3fSmrg struct nir_variable_data *members; 66901e04c3fSmrg} nir_variable; 67001e04c3fSmrg 6717ec681f3Smrgstatic inline bool 6727ec681f3Smrg_nir_shader_variable_has_mode(nir_variable *var, unsigned modes) 6737ec681f3Smrg{ 6747ec681f3Smrg /* This isn't a shader variable */ 6757ec681f3Smrg assert(!(modes & nir_var_function_temp)); 6767ec681f3Smrg return var->data.mode & modes; 6777ec681f3Smrg} 6787ec681f3Smrg 6797ec681f3Smrg#define nir_foreach_variable_in_list(var, var_list) \ 68001e04c3fSmrg foreach_list_typed(nir_variable, var, node, var_list) 68101e04c3fSmrg 6827ec681f3Smrg#define nir_foreach_variable_in_list_safe(var, var_list) \ 68301e04c3fSmrg foreach_list_typed_safe(nir_variable, var, node, var_list) 68401e04c3fSmrg 6857ec681f3Smrg#define nir_foreach_variable_in_shader(var, shader) \ 6867ec681f3Smrg nir_foreach_variable_in_list(var, &(shader)->variables) 6877ec681f3Smrg 6887ec681f3Smrg#define nir_foreach_variable_in_shader_safe(var, shader) \ 6897ec681f3Smrg nir_foreach_variable_in_list_safe(var, &(shader)->variables) 6907ec681f3Smrg 6917ec681f3Smrg#define nir_foreach_variable_with_modes(var, shader, modes) \ 6927ec681f3Smrg nir_foreach_variable_in_shader(var, shader) \ 6937ec681f3Smrg if (_nir_shader_variable_has_mode(var, modes)) 6947ec681f3Smrg 6957ec681f3Smrg#define nir_foreach_variable_with_modes_safe(var, shader, modes) \ 6967ec681f3Smrg nir_foreach_variable_in_shader_safe(var, shader) \ 6977ec681f3Smrg if (_nir_shader_variable_has_mode(var, modes)) 6987ec681f3Smrg 6997ec681f3Smrg#define nir_foreach_shader_in_variable(var, shader) \ 7007ec681f3Smrg nir_foreach_variable_with_modes(var, shader, nir_var_shader_in) 7017ec681f3Smrg 7027ec681f3Smrg#define nir_foreach_shader_in_variable_safe(var, shader) \ 7037ec681f3Smrg nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_in) 7047ec681f3Smrg 7057ec681f3Smrg#define nir_foreach_shader_out_variable(var, shader) \ 7067ec681f3Smrg nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) 7077ec681f3Smrg 7087ec681f3Smrg#define nir_foreach_shader_out_variable_safe(var, shader) \ 7097ec681f3Smrg nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_out) 7107ec681f3Smrg 7117ec681f3Smrg#define nir_foreach_uniform_variable(var, shader) \ 7127ec681f3Smrg nir_foreach_variable_with_modes(var, shader, nir_var_uniform) 7137ec681f3Smrg 7147ec681f3Smrg#define nir_foreach_uniform_variable_safe(var, shader) \ 7157ec681f3Smrg nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform) 7167ec681f3Smrg 71701e04c3fSmrgstatic inline bool 71801e04c3fSmrgnir_variable_is_global(const nir_variable *var) 71901e04c3fSmrg{ 7207e102996Smaya return var->data.mode != nir_var_function_temp; 72101e04c3fSmrg} 72201e04c3fSmrg 72301e04c3fSmrgtypedef struct nir_register { 72401e04c3fSmrg struct exec_node node; 72501e04c3fSmrg 72601e04c3fSmrg unsigned num_components; /** < number of vector components */ 72701e04c3fSmrg unsigned num_array_elems; /** < size of array (0 for no array) */ 72801e04c3fSmrg 72901e04c3fSmrg /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 73001e04c3fSmrg uint8_t bit_size; 73101e04c3fSmrg 7327ec681f3Smrg /** 7337ec681f3Smrg * True if this register may have different values in different SIMD 7347ec681f3Smrg * invocations of the shader. 7357ec681f3Smrg */ 7367ec681f3Smrg bool divergent; 7377ec681f3Smrg 73801e04c3fSmrg /** generic register index. */ 73901e04c3fSmrg unsigned index; 74001e04c3fSmrg 74101e04c3fSmrg /** set of nir_srcs where this register is used (read from) */ 74201e04c3fSmrg struct list_head uses; 74301e04c3fSmrg 74401e04c3fSmrg /** set of nir_dests where this register is defined (written to) */ 74501e04c3fSmrg struct list_head defs; 74601e04c3fSmrg 74701e04c3fSmrg /** set of nir_ifs where this register is used as a condition */ 74801e04c3fSmrg struct list_head if_uses; 74901e04c3fSmrg} nir_register; 75001e04c3fSmrg 75101e04c3fSmrg#define nir_foreach_register(reg, reg_list) \ 75201e04c3fSmrg foreach_list_typed(nir_register, reg, node, reg_list) 75301e04c3fSmrg#define nir_foreach_register_safe(reg, reg_list) \ 75401e04c3fSmrg foreach_list_typed_safe(nir_register, reg, node, reg_list) 75501e04c3fSmrg 7567e102996Smayatypedef enum PACKED { 75701e04c3fSmrg nir_instr_type_alu, 75801e04c3fSmrg nir_instr_type_deref, 75901e04c3fSmrg nir_instr_type_call, 76001e04c3fSmrg nir_instr_type_tex, 76101e04c3fSmrg nir_instr_type_intrinsic, 76201e04c3fSmrg nir_instr_type_load_const, 76301e04c3fSmrg nir_instr_type_jump, 76401e04c3fSmrg nir_instr_type_ssa_undef, 76501e04c3fSmrg nir_instr_type_phi, 76601e04c3fSmrg nir_instr_type_parallel_copy, 76701e04c3fSmrg} nir_instr_type; 76801e04c3fSmrg 76901e04c3fSmrgtypedef struct nir_instr { 77001e04c3fSmrg struct exec_node node; 7717ec681f3Smrg struct list_head gc_node; 77201e04c3fSmrg struct nir_block *block; 7737e102996Smaya nir_instr_type type; 77401e04c3fSmrg 77501e04c3fSmrg /* A temporary for optimization and analysis passes to use for storing 77601e04c3fSmrg * flags. For instance, DCE uses this to store the "dead/live" info. 77701e04c3fSmrg */ 77801e04c3fSmrg uint8_t pass_flags; 7797e102996Smaya 7807e102996Smaya /** generic instruction index. */ 7817ec681f3Smrg uint32_t index; 78201e04c3fSmrg} nir_instr; 78301e04c3fSmrg 78401e04c3fSmrgstatic inline nir_instr * 78501e04c3fSmrgnir_instr_next(nir_instr *instr) 78601e04c3fSmrg{ 78701e04c3fSmrg struct exec_node *next = exec_node_get_next(&instr->node); 78801e04c3fSmrg if (exec_node_is_tail_sentinel(next)) 78901e04c3fSmrg return NULL; 79001e04c3fSmrg else 79101e04c3fSmrg return exec_node_data(nir_instr, next, node); 79201e04c3fSmrg} 79301e04c3fSmrg 79401e04c3fSmrgstatic inline nir_instr * 79501e04c3fSmrgnir_instr_prev(nir_instr *instr) 79601e04c3fSmrg{ 79701e04c3fSmrg struct exec_node *prev = exec_node_get_prev(&instr->node); 79801e04c3fSmrg if (exec_node_is_head_sentinel(prev)) 79901e04c3fSmrg return NULL; 80001e04c3fSmrg else 80101e04c3fSmrg return exec_node_data(nir_instr, prev, node); 80201e04c3fSmrg} 80301e04c3fSmrg 80401e04c3fSmrgstatic inline bool 80501e04c3fSmrgnir_instr_is_first(const nir_instr *instr) 80601e04c3fSmrg{ 80701e04c3fSmrg return exec_node_is_head_sentinel(exec_node_get_prev_const(&instr->node)); 80801e04c3fSmrg} 80901e04c3fSmrg 81001e04c3fSmrgstatic inline bool 81101e04c3fSmrgnir_instr_is_last(const nir_instr *instr) 81201e04c3fSmrg{ 81301e04c3fSmrg return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node)); 81401e04c3fSmrg} 81501e04c3fSmrg 81601e04c3fSmrgtypedef struct nir_ssa_def { 81701e04c3fSmrg /** Instruction which produces this SSA value. */ 81801e04c3fSmrg nir_instr *parent_instr; 81901e04c3fSmrg 82001e04c3fSmrg /** set of nir_instrs where this register is used (read from) */ 82101e04c3fSmrg struct list_head uses; 82201e04c3fSmrg 82301e04c3fSmrg /** set of nir_ifs where this register is used as a condition */ 82401e04c3fSmrg struct list_head if_uses; 82501e04c3fSmrg 8267ec681f3Smrg /** generic SSA definition index. */ 8277ec681f3Smrg unsigned index; 8287ec681f3Smrg 82901e04c3fSmrg uint8_t num_components; 83001e04c3fSmrg 83101e04c3fSmrg /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 83201e04c3fSmrg uint8_t bit_size; 8337ec681f3Smrg 8347ec681f3Smrg /** 8357ec681f3Smrg * True if this SSA value may have different values in different SIMD 8367ec681f3Smrg * invocations of the shader. This is set by nir_divergence_analysis. 8377ec681f3Smrg */ 8387ec681f3Smrg bool divergent; 83901e04c3fSmrg} nir_ssa_def; 84001e04c3fSmrg 84101e04c3fSmrgstruct nir_src; 84201e04c3fSmrg 84301e04c3fSmrgtypedef struct { 84401e04c3fSmrg nir_register *reg; 84501e04c3fSmrg struct nir_src *indirect; /** < NULL for no indirect offset */ 84601e04c3fSmrg unsigned base_offset; 84701e04c3fSmrg 84801e04c3fSmrg /* TODO use-def chain goes here */ 84901e04c3fSmrg} nir_reg_src; 85001e04c3fSmrg 85101e04c3fSmrgtypedef struct { 85201e04c3fSmrg nir_instr *parent_instr; 85301e04c3fSmrg struct list_head def_link; 85401e04c3fSmrg 85501e04c3fSmrg nir_register *reg; 85601e04c3fSmrg struct nir_src *indirect; /** < NULL for no indirect offset */ 85701e04c3fSmrg unsigned base_offset; 85801e04c3fSmrg 85901e04c3fSmrg /* TODO def-use chain goes here */ 86001e04c3fSmrg} nir_reg_dest; 86101e04c3fSmrg 86201e04c3fSmrgstruct nir_if; 86301e04c3fSmrg 86401e04c3fSmrgtypedef struct nir_src { 86501e04c3fSmrg union { 86601e04c3fSmrg /** Instruction that consumes this value as a source. */ 86701e04c3fSmrg nir_instr *parent_instr; 86801e04c3fSmrg struct nir_if *parent_if; 86901e04c3fSmrg }; 87001e04c3fSmrg 87101e04c3fSmrg struct list_head use_link; 87201e04c3fSmrg 87301e04c3fSmrg union { 87401e04c3fSmrg nir_reg_src reg; 87501e04c3fSmrg nir_ssa_def *ssa; 87601e04c3fSmrg }; 87701e04c3fSmrg 87801e04c3fSmrg bool is_ssa; 87901e04c3fSmrg} nir_src; 88001e04c3fSmrg 88101e04c3fSmrgstatic inline nir_src 88201e04c3fSmrgnir_src_init(void) 88301e04c3fSmrg{ 88401e04c3fSmrg nir_src src = { { NULL } }; 88501e04c3fSmrg return src; 88601e04c3fSmrg} 88701e04c3fSmrg 88801e04c3fSmrg#define NIR_SRC_INIT nir_src_init() 88901e04c3fSmrg 89001e04c3fSmrg#define nir_foreach_use(src, reg_or_ssa_def) \ 89101e04c3fSmrg list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 89201e04c3fSmrg 89301e04c3fSmrg#define nir_foreach_use_safe(src, reg_or_ssa_def) \ 89401e04c3fSmrg list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 89501e04c3fSmrg 89601e04c3fSmrg#define nir_foreach_if_use(src, reg_or_ssa_def) \ 89701e04c3fSmrg list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 89801e04c3fSmrg 89901e04c3fSmrg#define nir_foreach_if_use_safe(src, reg_or_ssa_def) \ 90001e04c3fSmrg list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 90101e04c3fSmrg 90201e04c3fSmrgtypedef struct { 90301e04c3fSmrg union { 90401e04c3fSmrg nir_reg_dest reg; 90501e04c3fSmrg nir_ssa_def ssa; 90601e04c3fSmrg }; 90701e04c3fSmrg 90801e04c3fSmrg bool is_ssa; 90901e04c3fSmrg} nir_dest; 91001e04c3fSmrg 91101e04c3fSmrgstatic inline nir_dest 91201e04c3fSmrgnir_dest_init(void) 91301e04c3fSmrg{ 91401e04c3fSmrg nir_dest dest = { { { NULL } } }; 91501e04c3fSmrg return dest; 91601e04c3fSmrg} 91701e04c3fSmrg 91801e04c3fSmrg#define NIR_DEST_INIT nir_dest_init() 91901e04c3fSmrg 92001e04c3fSmrg#define nir_foreach_def(dest, reg) \ 92101e04c3fSmrg list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) 92201e04c3fSmrg 92301e04c3fSmrg#define nir_foreach_def_safe(dest, reg) \ 92401e04c3fSmrg list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link) 92501e04c3fSmrg 92601e04c3fSmrgstatic inline nir_src 92701e04c3fSmrgnir_src_for_ssa(nir_ssa_def *def) 92801e04c3fSmrg{ 92901e04c3fSmrg nir_src src = NIR_SRC_INIT; 93001e04c3fSmrg 93101e04c3fSmrg src.is_ssa = true; 93201e04c3fSmrg src.ssa = def; 93301e04c3fSmrg 93401e04c3fSmrg return src; 93501e04c3fSmrg} 93601e04c3fSmrg 93701e04c3fSmrgstatic inline nir_src 93801e04c3fSmrgnir_src_for_reg(nir_register *reg) 93901e04c3fSmrg{ 94001e04c3fSmrg nir_src src = NIR_SRC_INIT; 94101e04c3fSmrg 94201e04c3fSmrg src.is_ssa = false; 94301e04c3fSmrg src.reg.reg = reg; 94401e04c3fSmrg src.reg.indirect = NULL; 94501e04c3fSmrg src.reg.base_offset = 0; 94601e04c3fSmrg 94701e04c3fSmrg return src; 94801e04c3fSmrg} 94901e04c3fSmrg 95001e04c3fSmrgstatic inline nir_dest 95101e04c3fSmrgnir_dest_for_reg(nir_register *reg) 95201e04c3fSmrg{ 95301e04c3fSmrg nir_dest dest = NIR_DEST_INIT; 95401e04c3fSmrg 95501e04c3fSmrg dest.reg.reg = reg; 95601e04c3fSmrg 95701e04c3fSmrg return dest; 95801e04c3fSmrg} 95901e04c3fSmrg 96001e04c3fSmrgstatic inline unsigned 96101e04c3fSmrgnir_src_bit_size(nir_src src) 96201e04c3fSmrg{ 96301e04c3fSmrg return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size; 96401e04c3fSmrg} 96501e04c3fSmrg 96601e04c3fSmrgstatic inline unsigned 96701e04c3fSmrgnir_src_num_components(nir_src src) 96801e04c3fSmrg{ 96901e04c3fSmrg return src.is_ssa ? src.ssa->num_components : src.reg.reg->num_components; 97001e04c3fSmrg} 97101e04c3fSmrg 97201e04c3fSmrgstatic inline bool 97301e04c3fSmrgnir_src_is_const(nir_src src) 97401e04c3fSmrg{ 97501e04c3fSmrg return src.is_ssa && 97601e04c3fSmrg src.ssa->parent_instr->type == nir_instr_type_load_const; 97701e04c3fSmrg} 97801e04c3fSmrg 9797ec681f3Smrgstatic inline bool 9807ec681f3Smrgnir_src_is_undef(nir_src src) 9817ec681f3Smrg{ 9827ec681f3Smrg return src.is_ssa && 9837ec681f3Smrg src.ssa->parent_instr->type == nir_instr_type_ssa_undef; 9847ec681f3Smrg} 9857ec681f3Smrg 9867ec681f3Smrgstatic inline bool 9877ec681f3Smrgnir_src_is_divergent(nir_src src) 9887ec681f3Smrg{ 9897ec681f3Smrg return src.is_ssa ? src.ssa->divergent : src.reg.reg->divergent; 9907ec681f3Smrg} 99101e04c3fSmrg 99201e04c3fSmrgstatic inline unsigned 99301e04c3fSmrgnir_dest_bit_size(nir_dest dest) 99401e04c3fSmrg{ 99501e04c3fSmrg return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size; 99601e04c3fSmrg} 99701e04c3fSmrg 99801e04c3fSmrgstatic inline unsigned 99901e04c3fSmrgnir_dest_num_components(nir_dest dest) 100001e04c3fSmrg{ 100101e04c3fSmrg return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components; 100201e04c3fSmrg} 100301e04c3fSmrg 10047ec681f3Smrgstatic inline bool 10057ec681f3Smrgnir_dest_is_divergent(nir_dest dest) 10067ec681f3Smrg{ 10077ec681f3Smrg return dest.is_ssa ? dest.ssa.divergent : dest.reg.reg->divergent; 10087ec681f3Smrg} 10097ec681f3Smrg 10107ec681f3Smrg/* Are all components the same, ie. .xxxx */ 10117ec681f3Smrgstatic inline bool 10127ec681f3Smrgnir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp) 10137ec681f3Smrg{ 10147ec681f3Smrg for (unsigned i = 1; i < nr_comp; i++) 10157ec681f3Smrg if (swiz[i] != swiz[0]) 10167ec681f3Smrg return false; 10177ec681f3Smrg return true; 10187ec681f3Smrg} 10197ec681f3Smrg 10207ec681f3Smrg/* Are all components sequential, ie. .yzw */ 10217ec681f3Smrgstatic inline bool 10227ec681f3Smrgnir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp) 10237ec681f3Smrg{ 10247ec681f3Smrg for (unsigned i = 1; i < nr_comp; i++) 10257ec681f3Smrg if (swiz[i] != (swiz[0] + i)) 10267ec681f3Smrg return false; 10277ec681f3Smrg return true; 10287ec681f3Smrg} 10297ec681f3Smrg 10307ec681f3Smrgvoid nir_src_copy(nir_src *dest, const nir_src *src); 10317ec681f3Smrgvoid nir_dest_copy(nir_dest *dest, const nir_dest *src); 103201e04c3fSmrg 103301e04c3fSmrgtypedef struct { 10347ec681f3Smrg /** Base source */ 103501e04c3fSmrg nir_src src; 103601e04c3fSmrg 103701e04c3fSmrg /** 103801e04c3fSmrg * \name input modifiers 103901e04c3fSmrg */ 104001e04c3fSmrg /*@{*/ 104101e04c3fSmrg /** 104201e04c3fSmrg * For inputs interpreted as floating point, flips the sign bit. For 104301e04c3fSmrg * inputs interpreted as integers, performs the two's complement negation. 104401e04c3fSmrg */ 104501e04c3fSmrg bool negate; 104601e04c3fSmrg 104701e04c3fSmrg /** 104801e04c3fSmrg * Clears the sign bit for floating point values, and computes the integer 104901e04c3fSmrg * absolute value for integers. Note that the negate modifier acts after 105001e04c3fSmrg * the absolute value modifier, therefore if both are set then all inputs 105101e04c3fSmrg * will become negative. 105201e04c3fSmrg */ 105301e04c3fSmrg bool abs; 105401e04c3fSmrg /*@}*/ 105501e04c3fSmrg 105601e04c3fSmrg /** 105701e04c3fSmrg * For each input component, says which component of the register it is 10587ec681f3Smrg * chosen from. 10597ec681f3Smrg * 10607ec681f3Smrg * Note that which elements of the swizzle are used and which are ignored 10617ec681f3Smrg * are based on the write mask for most opcodes - for example, a statement 10627ec681f3Smrg * like "foo.xzw = bar.zyx" would have a writemask of 1101b and a swizzle 10637ec681f3Smrg * of {2, 1, x, 0} where x means "don't care." 106401e04c3fSmrg */ 106501e04c3fSmrg uint8_t swizzle[NIR_MAX_VEC_COMPONENTS]; 106601e04c3fSmrg} nir_alu_src; 106701e04c3fSmrg 106801e04c3fSmrgtypedef struct { 10697ec681f3Smrg /** Base destination */ 107001e04c3fSmrg nir_dest dest; 107101e04c3fSmrg 107201e04c3fSmrg /** 10737ec681f3Smrg * Saturate output modifier 107401e04c3fSmrg * 107501e04c3fSmrg * Only valid for opcodes that output floating-point numbers. Clamps the 107601e04c3fSmrg * output to between 0.0 and 1.0 inclusive. 107701e04c3fSmrg */ 107801e04c3fSmrg bool saturate; 107901e04c3fSmrg 10807ec681f3Smrg /** 10817ec681f3Smrg * Write-mask 10827ec681f3Smrg * 10837ec681f3Smrg * Ignored if dest.is_ssa is true 10847ec681f3Smrg */ 10857ec681f3Smrg unsigned write_mask : NIR_MAX_VEC_COMPONENTS; 108601e04c3fSmrg} nir_alu_dest; 108701e04c3fSmrg 10887e102996Smaya/** NIR sized and unsized types 10897e102996Smaya * 10907e102996Smaya * The values in this enum are carefully chosen so that the sized type is 10917e102996Smaya * just the unsized type OR the number of bits. 10927e102996Smaya */ 10937ec681f3Smrgtypedef enum PACKED { 109401e04c3fSmrg nir_type_invalid = 0, /* Not a valid type */ 10957e102996Smaya nir_type_int = 2, 10967e102996Smaya nir_type_uint = 4, 10977e102996Smaya nir_type_bool = 6, 10987e102996Smaya nir_type_float = 128, 10997e102996Smaya nir_type_bool1 = 1 | nir_type_bool, 11007ec681f3Smrg nir_type_bool8 = 8 | nir_type_bool, 11017ec681f3Smrg nir_type_bool16 = 16 | nir_type_bool, 110201e04c3fSmrg nir_type_bool32 = 32 | nir_type_bool, 11037e102996Smaya nir_type_int1 = 1 | nir_type_int, 110401e04c3fSmrg nir_type_int8 = 8 | nir_type_int, 110501e04c3fSmrg nir_type_int16 = 16 | nir_type_int, 110601e04c3fSmrg nir_type_int32 = 32 | nir_type_int, 110701e04c3fSmrg nir_type_int64 = 64 | nir_type_int, 11087e102996Smaya nir_type_uint1 = 1 | nir_type_uint, 110901e04c3fSmrg nir_type_uint8 = 8 | nir_type_uint, 111001e04c3fSmrg nir_type_uint16 = 16 | nir_type_uint, 111101e04c3fSmrg nir_type_uint32 = 32 | nir_type_uint, 111201e04c3fSmrg nir_type_uint64 = 64 | nir_type_uint, 111301e04c3fSmrg nir_type_float16 = 16 | nir_type_float, 111401e04c3fSmrg nir_type_float32 = 32 | nir_type_float, 111501e04c3fSmrg nir_type_float64 = 64 | nir_type_float, 111601e04c3fSmrg} nir_alu_type; 111701e04c3fSmrg 11187e102996Smaya#define NIR_ALU_TYPE_SIZE_MASK 0x79 11197e102996Smaya#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x86 112001e04c3fSmrg 112101e04c3fSmrgstatic inline unsigned 112201e04c3fSmrgnir_alu_type_get_type_size(nir_alu_type type) 112301e04c3fSmrg{ 112401e04c3fSmrg return type & NIR_ALU_TYPE_SIZE_MASK; 112501e04c3fSmrg} 112601e04c3fSmrg 11277ec681f3Smrgstatic inline nir_alu_type 112801e04c3fSmrgnir_alu_type_get_base_type(nir_alu_type type) 112901e04c3fSmrg{ 11307ec681f3Smrg return (nir_alu_type)(type & NIR_ALU_TYPE_BASE_TYPE_MASK); 113101e04c3fSmrg} 113201e04c3fSmrg 113301e04c3fSmrgstatic inline nir_alu_type 113401e04c3fSmrgnir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type) 113501e04c3fSmrg{ 113601e04c3fSmrg switch (base_type) { 113701e04c3fSmrg case GLSL_TYPE_BOOL: 11387e102996Smaya return nir_type_bool1; 113901e04c3fSmrg break; 114001e04c3fSmrg case GLSL_TYPE_UINT: 114101e04c3fSmrg return nir_type_uint32; 114201e04c3fSmrg break; 114301e04c3fSmrg case GLSL_TYPE_INT: 114401e04c3fSmrg return nir_type_int32; 114501e04c3fSmrg break; 114601e04c3fSmrg case GLSL_TYPE_UINT16: 114701e04c3fSmrg return nir_type_uint16; 114801e04c3fSmrg break; 114901e04c3fSmrg case GLSL_TYPE_INT16: 115001e04c3fSmrg return nir_type_int16; 115101e04c3fSmrg break; 115201e04c3fSmrg case GLSL_TYPE_UINT8: 115301e04c3fSmrg return nir_type_uint8; 115401e04c3fSmrg case GLSL_TYPE_INT8: 115501e04c3fSmrg return nir_type_int8; 115601e04c3fSmrg case GLSL_TYPE_UINT64: 115701e04c3fSmrg return nir_type_uint64; 115801e04c3fSmrg break; 115901e04c3fSmrg case GLSL_TYPE_INT64: 116001e04c3fSmrg return nir_type_int64; 116101e04c3fSmrg break; 116201e04c3fSmrg case GLSL_TYPE_FLOAT: 116301e04c3fSmrg return nir_type_float32; 116401e04c3fSmrg break; 116501e04c3fSmrg case GLSL_TYPE_FLOAT16: 116601e04c3fSmrg return nir_type_float16; 116701e04c3fSmrg break; 116801e04c3fSmrg case GLSL_TYPE_DOUBLE: 116901e04c3fSmrg return nir_type_float64; 117001e04c3fSmrg break; 11717ec681f3Smrg 11727ec681f3Smrg case GLSL_TYPE_SAMPLER: 11737ec681f3Smrg case GLSL_TYPE_IMAGE: 11747ec681f3Smrg case GLSL_TYPE_ATOMIC_UINT: 11757ec681f3Smrg case GLSL_TYPE_STRUCT: 11767ec681f3Smrg case GLSL_TYPE_INTERFACE: 11777ec681f3Smrg case GLSL_TYPE_ARRAY: 11787ec681f3Smrg case GLSL_TYPE_VOID: 11797ec681f3Smrg case GLSL_TYPE_SUBROUTINE: 11807ec681f3Smrg case GLSL_TYPE_FUNCTION: 11817ec681f3Smrg case GLSL_TYPE_ERROR: 11827ec681f3Smrg return nir_type_invalid; 118301e04c3fSmrg } 11847ec681f3Smrg 11857ec681f3Smrg unreachable("unknown type"); 118601e04c3fSmrg} 118701e04c3fSmrg 118801e04c3fSmrgstatic inline nir_alu_type 118901e04c3fSmrgnir_get_nir_type_for_glsl_type(const struct glsl_type *type) 119001e04c3fSmrg{ 119101e04c3fSmrg return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type)); 119201e04c3fSmrg} 119301e04c3fSmrg 11947ec681f3Smrgstatic inline enum glsl_base_type 11957ec681f3Smrgnir_get_glsl_base_type_for_nir_type(nir_alu_type base_type) 11967ec681f3Smrg{ 11977ec681f3Smrg switch (base_type) { 11987ec681f3Smrg case nir_type_bool1: 11997ec681f3Smrg return GLSL_TYPE_BOOL; 12007ec681f3Smrg case nir_type_uint32: 12017ec681f3Smrg return GLSL_TYPE_UINT; 12027ec681f3Smrg case nir_type_int32: 12037ec681f3Smrg return GLSL_TYPE_INT; 12047ec681f3Smrg case nir_type_uint16: 12057ec681f3Smrg return GLSL_TYPE_UINT16; 12067ec681f3Smrg case nir_type_int16: 12077ec681f3Smrg return GLSL_TYPE_INT16; 12087ec681f3Smrg case nir_type_uint8: 12097ec681f3Smrg return GLSL_TYPE_UINT8; 12107ec681f3Smrg case nir_type_int8: 12117ec681f3Smrg return GLSL_TYPE_INT8; 12127ec681f3Smrg case nir_type_uint64: 12137ec681f3Smrg return GLSL_TYPE_UINT64; 12147ec681f3Smrg case nir_type_int64: 12157ec681f3Smrg return GLSL_TYPE_INT64; 12167ec681f3Smrg case nir_type_float32: 12177ec681f3Smrg return GLSL_TYPE_FLOAT; 12187ec681f3Smrg case nir_type_float16: 12197ec681f3Smrg return GLSL_TYPE_FLOAT16; 12207ec681f3Smrg case nir_type_float64: 12217ec681f3Smrg return GLSL_TYPE_DOUBLE; 12227ec681f3Smrg 12237ec681f3Smrg default: unreachable("Not a sized nir_alu_type"); 12247ec681f3Smrg } 12257ec681f3Smrg} 12267ec681f3Smrg 122701e04c3fSmrgnir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, 122801e04c3fSmrg nir_rounding_mode rnd); 122901e04c3fSmrg 12307e102996Smayastatic inline nir_op 12317e102996Smayanir_op_vec(unsigned components) 12327e102996Smaya{ 12337e102996Smaya switch (components) { 12347ec681f3Smrg case 1: return nir_op_mov; 12357e102996Smaya case 2: return nir_op_vec2; 12367e102996Smaya case 3: return nir_op_vec3; 12377e102996Smaya case 4: return nir_op_vec4; 12387ec681f3Smrg case 5: return nir_op_vec5; 12397ec681f3Smrg case 8: return nir_op_vec8; 12407ec681f3Smrg case 16: return nir_op_vec16; 12417e102996Smaya default: unreachable("bad component count"); 12427e102996Smaya } 12437e102996Smaya} 12447e102996Smaya 12457ec681f3Smrgstatic inline bool 12467ec681f3Smrgnir_op_is_vec(nir_op op) 12477ec681f3Smrg{ 12487ec681f3Smrg switch (op) { 12497ec681f3Smrg case nir_op_mov: 12507ec681f3Smrg case nir_op_vec2: 12517ec681f3Smrg case nir_op_vec3: 12527ec681f3Smrg case nir_op_vec4: 12537ec681f3Smrg case nir_op_vec5: 12547ec681f3Smrg case nir_op_vec8: 12557ec681f3Smrg case nir_op_vec16: 12567ec681f3Smrg return true; 12577ec681f3Smrg default: 12587ec681f3Smrg return false; 12597ec681f3Smrg } 12607ec681f3Smrg} 12617ec681f3Smrg 12627ec681f3Smrgstatic inline bool 12637ec681f3Smrgnir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size) 12647ec681f3Smrg{ 12657ec681f3Smrg return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) || 12667ec681f3Smrg (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) || 12677ec681f3Smrg (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64); 12687ec681f3Smrg} 12697ec681f3Smrg 12707ec681f3Smrgstatic inline bool 12717ec681f3Smrgnir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size) 12727ec681f3Smrg{ 12737ec681f3Smrg return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) || 12747ec681f3Smrg (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) || 12757ec681f3Smrg (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64); 12767ec681f3Smrg} 12777ec681f3Smrg 12787ec681f3Smrgstatic inline bool 12797ec681f3Smrgnir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size) 12807ec681f3Smrg{ 12817ec681f3Smrg return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) || 12827ec681f3Smrg (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) || 12837ec681f3Smrg (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64); 12847ec681f3Smrg} 12857ec681f3Smrg 12867ec681f3Smrgstatic inline bool 12877ec681f3Smrgnir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size) 12887ec681f3Smrg{ 12897ec681f3Smrg return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || 12907ec681f3Smrg (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || 12917ec681f3Smrg (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); 12927ec681f3Smrg} 12937ec681f3Smrg 12947ec681f3Smrgstatic inline bool 12957ec681f3Smrgnir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size) 12967ec681f3Smrg{ 12977ec681f3Smrg return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || 12987ec681f3Smrg (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || 12997ec681f3Smrg (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); 13007ec681f3Smrg} 13017ec681f3Smrg 13027ec681f3Smrgstatic inline bool 13037ec681f3Smrgnir_has_any_rounding_mode_rtz(unsigned execution_mode) 13047ec681f3Smrg{ 13057ec681f3Smrg return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || 13067ec681f3Smrg (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || 13077ec681f3Smrg (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); 13087ec681f3Smrg} 13097ec681f3Smrg 13107ec681f3Smrgstatic inline bool 13117ec681f3Smrgnir_has_any_rounding_mode_rtne(unsigned execution_mode) 13127ec681f3Smrg{ 13137ec681f3Smrg return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || 13147ec681f3Smrg (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || 13157ec681f3Smrg (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); 13167ec681f3Smrg} 13177ec681f3Smrg 13187ec681f3Smrgstatic inline nir_rounding_mode 13197ec681f3Smrgnir_get_rounding_mode_from_float_controls(unsigned execution_mode, 13207ec681f3Smrg nir_alu_type type) 13217ec681f3Smrg{ 13227ec681f3Smrg if (nir_alu_type_get_base_type(type) != nir_type_float) 13237ec681f3Smrg return nir_rounding_mode_undef; 13247ec681f3Smrg 13257ec681f3Smrg unsigned bit_size = nir_alu_type_get_type_size(type); 13267ec681f3Smrg 13277ec681f3Smrg if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) 13287ec681f3Smrg return nir_rounding_mode_rtz; 13297ec681f3Smrg if (nir_is_rounding_mode_rtne(execution_mode, bit_size)) 13307ec681f3Smrg return nir_rounding_mode_rtne; 13317ec681f3Smrg return nir_rounding_mode_undef; 13327ec681f3Smrg} 13337ec681f3Smrg 13347ec681f3Smrgstatic inline bool 13357ec681f3Smrgnir_has_any_rounding_mode_enabled(unsigned execution_mode) 13367ec681f3Smrg{ 13377ec681f3Smrg bool result = 13387ec681f3Smrg nir_has_any_rounding_mode_rtne(execution_mode) || 13397ec681f3Smrg nir_has_any_rounding_mode_rtz(execution_mode); 13407ec681f3Smrg return result; 13417ec681f3Smrg} 13427ec681f3Smrg 134301e04c3fSmrgtypedef enum { 13447ec681f3Smrg /** 13457ec681f3Smrg * Operation where the first two sources are commutative. 13467ec681f3Smrg * 13477ec681f3Smrg * For 2-source operations, this just mathematical commutativity. Some 13487ec681f3Smrg * 3-source operations, like ffma, are only commutative in the first two 13497ec681f3Smrg * sources. 13507ec681f3Smrg */ 13517ec681f3Smrg NIR_OP_IS_2SRC_COMMUTATIVE = (1 << 0), 13527ec681f3Smrg 13537ec681f3Smrg /** 13547ec681f3Smrg * Operation is associative 13557ec681f3Smrg */ 135601e04c3fSmrg NIR_OP_IS_ASSOCIATIVE = (1 << 1), 135701e04c3fSmrg} nir_op_algebraic_property; 135801e04c3fSmrg 13597ec681f3Smrg/* vec16 is the widest ALU op in NIR, making the max number of input of ALU 13607ec681f3Smrg * instructions to be the same as NIR_MAX_VEC_COMPONENTS. 13617ec681f3Smrg */ 13627ec681f3Smrg#define NIR_ALU_MAX_INPUTS NIR_MAX_VEC_COMPONENTS 13637ec681f3Smrg 13647ec681f3Smrgtypedef struct nir_op_info { 13657ec681f3Smrg /** Name of the NIR ALU opcode */ 136601e04c3fSmrg const char *name; 136701e04c3fSmrg 13687ec681f3Smrg /** Number of inputs (sources) */ 13697ec681f3Smrg uint8_t num_inputs; 137001e04c3fSmrg 137101e04c3fSmrg /** 137201e04c3fSmrg * The number of components in the output 137301e04c3fSmrg * 137401e04c3fSmrg * If non-zero, this is the size of the output and input sizes are 137501e04c3fSmrg * explicitly given; swizzle and writemask are still in effect, but if 137601e04c3fSmrg * the output component is masked out, then the input component may 137701e04c3fSmrg * still be in use. 137801e04c3fSmrg * 137901e04c3fSmrg * If zero, the opcode acts in the standard, per-component manner; the 138001e04c3fSmrg * operation is performed on each component (except the ones that are 138101e04c3fSmrg * masked out) with the input being taken from the input swizzle for 138201e04c3fSmrg * that component. 138301e04c3fSmrg * 138401e04c3fSmrg * The size of some of the inputs may be given (i.e. non-zero) even 138501e04c3fSmrg * though output_size is zero; in that case, the inputs with a zero 138601e04c3fSmrg * size act per-component, while the inputs with non-zero size don't. 138701e04c3fSmrg */ 13887ec681f3Smrg uint8_t output_size; 138901e04c3fSmrg 139001e04c3fSmrg /** 139101e04c3fSmrg * The type of vector that the instruction outputs. Note that the 139201e04c3fSmrg * staurate modifier is only allowed on outputs with the float type. 139301e04c3fSmrg */ 139401e04c3fSmrg nir_alu_type output_type; 139501e04c3fSmrg 139601e04c3fSmrg /** 139701e04c3fSmrg * The number of components in each input 13987ec681f3Smrg * 13997ec681f3Smrg * See nir_op_infos::output_size for more detail about the relationship 14007ec681f3Smrg * between input and output sizes. 140101e04c3fSmrg */ 14027ec681f3Smrg uint8_t input_sizes[NIR_ALU_MAX_INPUTS]; 140301e04c3fSmrg 140401e04c3fSmrg /** 140501e04c3fSmrg * The type of vector that each input takes. Note that negate and 140601e04c3fSmrg * absolute value are only allowed on inputs with int or float type and 140701e04c3fSmrg * behave differently on the two. 140801e04c3fSmrg */ 14097ec681f3Smrg nir_alu_type input_types[NIR_ALU_MAX_INPUTS]; 141001e04c3fSmrg 14117ec681f3Smrg /** Algebraic properties of this opcode */ 141201e04c3fSmrg nir_op_algebraic_property algebraic_properties; 14137e102996Smaya 14147ec681f3Smrg /** Whether this represents a numeric conversion opcode */ 14157e102996Smaya bool is_conversion; 141601e04c3fSmrg} nir_op_info; 141701e04c3fSmrg 14187ec681f3Smrg/** Metadata for each nir_op, indexed by opcode */ 141901e04c3fSmrgextern const nir_op_info nir_op_infos[nir_num_opcodes]; 142001e04c3fSmrg 142101e04c3fSmrgtypedef struct nir_alu_instr { 14227ec681f3Smrg /** Base instruction */ 142301e04c3fSmrg nir_instr instr; 14247ec681f3Smrg 14257ec681f3Smrg /** Opcode */ 142601e04c3fSmrg nir_op op; 142701e04c3fSmrg 142801e04c3fSmrg /** Indicates that this ALU instruction generates an exact value 142901e04c3fSmrg * 143001e04c3fSmrg * This is kind of a mixture of GLSL "precise" and "invariant" and not 143101e04c3fSmrg * really equivalent to either. This indicates that the value generated by 143201e04c3fSmrg * this operation is high-precision and any code transformations that touch 143301e04c3fSmrg * it must ensure that the resulting value is bit-for-bit identical to the 143401e04c3fSmrg * original. 143501e04c3fSmrg */ 14367ec681f3Smrg bool exact:1; 14377ec681f3Smrg 14387ec681f3Smrg /** 14397ec681f3Smrg * Indicates that this instruction doese not cause signed integer wrapping 14407ec681f3Smrg * to occur, in the form of overflow or underflow. 14417ec681f3Smrg */ 14427ec681f3Smrg bool no_signed_wrap:1; 144301e04c3fSmrg 14447ec681f3Smrg /** 14457ec681f3Smrg * Indicates that this instruction does not cause unsigned integer wrapping 14467ec681f3Smrg * to occur, in the form of overflow or underflow. 14477ec681f3Smrg */ 14487ec681f3Smrg bool no_unsigned_wrap:1; 14497ec681f3Smrg 14507ec681f3Smrg /** Destination */ 145101e04c3fSmrg nir_alu_dest dest; 14527ec681f3Smrg 14537ec681f3Smrg /** Sources 14547ec681f3Smrg * 14557ec681f3Smrg * The size of the array is given by nir_op_info::num_inputs. 14567ec681f3Smrg */ 145701e04c3fSmrg nir_alu_src src[]; 145801e04c3fSmrg} nir_alu_instr; 145901e04c3fSmrg 14607ec681f3Smrgvoid nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src); 14617ec681f3Smrgvoid nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src); 14627ec681f3Smrg 14637ec681f3Smrgbool nir_alu_instr_is_copy(nir_alu_instr *instr); 146401e04c3fSmrg 146501e04c3fSmrg/* is this source channel used? */ 146601e04c3fSmrgstatic inline bool 146701e04c3fSmrgnir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src, 146801e04c3fSmrg unsigned channel) 146901e04c3fSmrg{ 147001e04c3fSmrg if (nir_op_infos[instr->op].input_sizes[src] > 0) 147101e04c3fSmrg return channel < nir_op_infos[instr->op].input_sizes[src]; 147201e04c3fSmrg 147301e04c3fSmrg return (instr->dest.write_mask >> channel) & 1; 147401e04c3fSmrg} 147501e04c3fSmrg 14767e102996Smayastatic inline nir_component_mask_t 14777e102996Smayanir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src) 14787e102996Smaya{ 14797e102996Smaya nir_component_mask_t read_mask = 0; 14807e102996Smaya for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) { 14817e102996Smaya if (!nir_alu_instr_channel_used(instr, src, c)) 14827e102996Smaya continue; 14837e102996Smaya 14847e102996Smaya read_mask |= (1 << instr->src[src].swizzle[c]); 14857e102996Smaya } 14867e102996Smaya return read_mask; 14877e102996Smaya} 14887e102996Smaya 14897ec681f3Smrg/** 14907ec681f3Smrg * Get the number of channels used for a source 149101e04c3fSmrg */ 149201e04c3fSmrgstatic inline unsigned 149301e04c3fSmrgnir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src) 149401e04c3fSmrg{ 149501e04c3fSmrg if (nir_op_infos[instr->op].input_sizes[src] > 0) 149601e04c3fSmrg return nir_op_infos[instr->op].input_sizes[src]; 149701e04c3fSmrg 14987ec681f3Smrg return nir_dest_num_components(instr->dest.dest); 149901e04c3fSmrg} 150001e04c3fSmrg 15017ec681f3Smrgstatic inline bool 15027ec681f3Smrgnir_alu_instr_is_comparison(const nir_alu_instr *instr) 15037ec681f3Smrg{ 15047ec681f3Smrg switch (instr->op) { 15057ec681f3Smrg case nir_op_flt: 15067ec681f3Smrg case nir_op_fge: 15077ec681f3Smrg case nir_op_feq: 15087ec681f3Smrg case nir_op_fneu: 15097ec681f3Smrg case nir_op_ilt: 15107ec681f3Smrg case nir_op_ult: 15117ec681f3Smrg case nir_op_ige: 15127ec681f3Smrg case nir_op_uge: 15137ec681f3Smrg case nir_op_ieq: 15147ec681f3Smrg case nir_op_ine: 15157ec681f3Smrg case nir_op_i2b1: 15167ec681f3Smrg case nir_op_f2b1: 15177ec681f3Smrg case nir_op_inot: 15187ec681f3Smrg return true; 15197ec681f3Smrg default: 15207ec681f3Smrg return false; 15217ec681f3Smrg } 15227ec681f3Smrg} 15237ec681f3Smrg 15247ec681f3Smrgbool nir_const_value_negative_equal(nir_const_value c1, nir_const_value c2, 15257ec681f3Smrg nir_alu_type full_type); 15267ec681f3Smrg 15277ec681f3Smrgbool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, 15287ec681f3Smrg unsigned src1, unsigned src2); 15297ec681f3Smrg 15307ec681f3Smrgbool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1, 15317ec681f3Smrg const nir_alu_instr *alu2, 15327ec681f3Smrg unsigned src1, unsigned src2); 15337ec681f3Smrg 15347ec681f3Smrgbool nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn); 15357ec681f3Smrg 15367ec681f3Smrgtypedef enum { 15377ec681f3Smrg nir_deref_type_var, 15387ec681f3Smrg nir_deref_type_array, 15397ec681f3Smrg nir_deref_type_array_wildcard, 15407ec681f3Smrg nir_deref_type_ptr_as_array, 15417ec681f3Smrg nir_deref_type_struct, 15427ec681f3Smrg nir_deref_type_cast, 15437ec681f3Smrg} nir_deref_type; 15447ec681f3Smrg 15457ec681f3Smrgtypedef struct { 154601e04c3fSmrg nir_instr instr; 154701e04c3fSmrg 154801e04c3fSmrg /** The type of this deref instruction */ 154901e04c3fSmrg nir_deref_type deref_type; 155001e04c3fSmrg 15517ec681f3Smrg /** Bitmask what modes the underlying variable might be 15527ec681f3Smrg * 15537ec681f3Smrg * For OpenCL-style generic pointers, we may not know exactly what mode it 15547ec681f3Smrg * is at any given point in time in the compile process. This bitfield 15557ec681f3Smrg * contains the set of modes which it MAY be. 15567ec681f3Smrg * 15577ec681f3Smrg * Generally, this field should not be accessed directly. Use one of the 15587ec681f3Smrg * nir_deref_mode_ helpers instead. 15597ec681f3Smrg */ 15607ec681f3Smrg nir_variable_mode modes; 156101e04c3fSmrg 156201e04c3fSmrg /** The dereferenced type of the resulting pointer value */ 156301e04c3fSmrg const struct glsl_type *type; 156401e04c3fSmrg 156501e04c3fSmrg union { 156601e04c3fSmrg /** Variable being dereferenced if deref_type is a deref_var */ 156701e04c3fSmrg nir_variable *var; 156801e04c3fSmrg 156901e04c3fSmrg /** Parent deref if deref_type is not deref_var */ 157001e04c3fSmrg nir_src parent; 157101e04c3fSmrg }; 157201e04c3fSmrg 157301e04c3fSmrg /** Additional deref parameters */ 157401e04c3fSmrg union { 157501e04c3fSmrg struct { 157601e04c3fSmrg nir_src index; 157701e04c3fSmrg } arr; 157801e04c3fSmrg 157901e04c3fSmrg struct { 158001e04c3fSmrg unsigned index; 158101e04c3fSmrg } strct; 15827e102996Smaya 15837e102996Smaya struct { 15847e102996Smaya unsigned ptr_stride; 15857ec681f3Smrg unsigned align_mul; 15867ec681f3Smrg unsigned align_offset; 15877e102996Smaya } cast; 158801e04c3fSmrg }; 158901e04c3fSmrg 159001e04c3fSmrg /** Destination to store the resulting "pointer" */ 159101e04c3fSmrg nir_dest dest; 159201e04c3fSmrg} nir_deref_instr; 159301e04c3fSmrg 15947ec681f3Smrg/** Returns true if deref might have one of the given modes 15957ec681f3Smrg * 15967ec681f3Smrg * For multi-mode derefs, this returns true if any of the possible modes for 15977ec681f3Smrg * the deref to have any of the specified modes. This function returning true 15987ec681f3Smrg * does NOT mean that the deref definitely has one of those modes. It simply 15997ec681f3Smrg * means that, with the best information we have at the time, it might. 16007ec681f3Smrg */ 16017ec681f3Smrgstatic inline bool 16027ec681f3Smrgnir_deref_mode_may_be(const nir_deref_instr *deref, nir_variable_mode modes) 16037ec681f3Smrg{ 16047ec681f3Smrg assert(!(modes & ~nir_var_all)); 16057ec681f3Smrg assert(deref->modes != 0); 16067ec681f3Smrg return deref->modes & modes; 16077ec681f3Smrg} 16087ec681f3Smrg 16097ec681f3Smrg/** Returns true if deref must have one of the given modes 16107ec681f3Smrg * 16117ec681f3Smrg * For multi-mode derefs, this returns true if NIR can prove that the given 16127ec681f3Smrg * deref has one of the specified modes. This function returning false does 16137ec681f3Smrg * NOT mean that deref doesn't have one of the given mode. It very well may 16147ec681f3Smrg * have one of those modes, we just don't have enough information to prove 16157ec681f3Smrg * that it does for sure. 16167ec681f3Smrg */ 16177ec681f3Smrgstatic inline bool 16187ec681f3Smrgnir_deref_mode_must_be(const nir_deref_instr *deref, nir_variable_mode modes) 16197ec681f3Smrg{ 16207ec681f3Smrg assert(!(modes & ~nir_var_all)); 16217ec681f3Smrg assert(deref->modes != 0); 16227ec681f3Smrg return !(deref->modes & ~modes); 16237ec681f3Smrg} 16247ec681f3Smrg 16257ec681f3Smrg/** Returns true if deref has the given mode 16267ec681f3Smrg * 16277ec681f3Smrg * This returns true if the deref has exactly the mode specified. If the 16287ec681f3Smrg * deref may have that mode but may also have a different mode (i.e. modes has 16297ec681f3Smrg * multiple bits set), this will assert-fail. 16307ec681f3Smrg * 16317ec681f3Smrg * If you're confused about which nir_deref_mode_ helper to use, use this one 16327ec681f3Smrg * or nir_deref_mode_is_one_of below. 16337ec681f3Smrg */ 16347ec681f3Smrgstatic inline bool 16357ec681f3Smrgnir_deref_mode_is(const nir_deref_instr *deref, nir_variable_mode mode) 16367ec681f3Smrg{ 16377ec681f3Smrg assert(util_bitcount(mode) == 1 && (mode & nir_var_all)); 16387ec681f3Smrg assert(deref->modes != 0); 16397ec681f3Smrg 16407ec681f3Smrg /* This is only for "simple" cases so, if modes might interact with this 16417ec681f3Smrg * deref then the deref has to have a single mode. 16427ec681f3Smrg */ 16437ec681f3Smrg if (nir_deref_mode_may_be(deref, mode)) { 16447ec681f3Smrg assert(util_bitcount(deref->modes) == 1); 16457ec681f3Smrg assert(deref->modes == mode); 16467ec681f3Smrg } 16477ec681f3Smrg 16487ec681f3Smrg return deref->modes == mode; 16497ec681f3Smrg} 16507ec681f3Smrg 16517ec681f3Smrg/** Returns true if deref has one of the given modes 16527ec681f3Smrg * 16537ec681f3Smrg * This returns true if the deref has exactly one possible mode and that mode 16547ec681f3Smrg * is one of the modes specified. If the deref may have one of those modes 16557ec681f3Smrg * but may also have a different mode (i.e. modes has multiple bits set), this 16567ec681f3Smrg * will assert-fail. 16577ec681f3Smrg */ 16587ec681f3Smrgstatic inline bool 16597ec681f3Smrgnir_deref_mode_is_one_of(const nir_deref_instr *deref, nir_variable_mode modes) 16607ec681f3Smrg{ 16617ec681f3Smrg /* This is only for "simple" cases so, if modes might interact with this 16627ec681f3Smrg * deref then the deref has to have a single mode. 16637ec681f3Smrg */ 16647ec681f3Smrg if (nir_deref_mode_may_be(deref, modes)) { 16657ec681f3Smrg assert(util_bitcount(deref->modes) == 1); 16667ec681f3Smrg assert(nir_deref_mode_must_be(deref, modes)); 16677ec681f3Smrg } 16687ec681f3Smrg 16697ec681f3Smrg return nir_deref_mode_may_be(deref, modes); 16707ec681f3Smrg} 16717ec681f3Smrg 16727ec681f3Smrg/** Returns true if deref's possible modes lie in the given set of modes 16737ec681f3Smrg * 16747ec681f3Smrg * This returns true if the deref's modes lie in the given set of modes. If 16757ec681f3Smrg * the deref's modes overlap with the specified modes but aren't entirely 16767ec681f3Smrg * contained in the specified set of modes, this will assert-fail. In 16777ec681f3Smrg * particular, if this is used in a generic pointers scenario, the specified 16787ec681f3Smrg * modes has to contain all or none of the possible generic pointer modes. 16797ec681f3Smrg * 16807ec681f3Smrg * This is intended mostly for mass-lowering of derefs which might have 16817ec681f3Smrg * generic pointers. 16827ec681f3Smrg */ 16837ec681f3Smrgstatic inline bool 16847ec681f3Smrgnir_deref_mode_is_in_set(const nir_deref_instr *deref, nir_variable_mode modes) 16857ec681f3Smrg{ 16867ec681f3Smrg if (nir_deref_mode_may_be(deref, modes)) 16877ec681f3Smrg assert(nir_deref_mode_must_be(deref, modes)); 16887ec681f3Smrg 16897ec681f3Smrg return nir_deref_mode_may_be(deref, modes); 16907ec681f3Smrg} 16917ec681f3Smrg 16927e102996Smayastatic inline nir_deref_instr *nir_src_as_deref(nir_src src); 169301e04c3fSmrg 169401e04c3fSmrgstatic inline nir_deref_instr * 169501e04c3fSmrgnir_deref_instr_parent(const nir_deref_instr *instr) 169601e04c3fSmrg{ 169701e04c3fSmrg if (instr->deref_type == nir_deref_type_var) 169801e04c3fSmrg return NULL; 169901e04c3fSmrg else 170001e04c3fSmrg return nir_src_as_deref(instr->parent); 170101e04c3fSmrg} 170201e04c3fSmrg 170301e04c3fSmrgstatic inline nir_variable * 170401e04c3fSmrgnir_deref_instr_get_variable(const nir_deref_instr *instr) 170501e04c3fSmrg{ 170601e04c3fSmrg while (instr->deref_type != nir_deref_type_var) { 170701e04c3fSmrg if (instr->deref_type == nir_deref_type_cast) 170801e04c3fSmrg return NULL; 170901e04c3fSmrg 171001e04c3fSmrg instr = nir_deref_instr_parent(instr); 171101e04c3fSmrg } 171201e04c3fSmrg 171301e04c3fSmrg return instr->var; 171401e04c3fSmrg} 171501e04c3fSmrg 171601e04c3fSmrgbool nir_deref_instr_has_indirect(nir_deref_instr *instr); 17177ec681f3Smrgbool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr); 17187ec681f3Smrgbool nir_deref_instr_has_complex_use(nir_deref_instr *instr); 171901e04c3fSmrg 172001e04c3fSmrgbool nir_deref_instr_remove_if_unused(nir_deref_instr *instr); 172101e04c3fSmrg 17227ec681f3Smrgunsigned nir_deref_instr_array_stride(nir_deref_instr *instr); 17237e102996Smaya 172401e04c3fSmrgtypedef struct { 172501e04c3fSmrg nir_instr instr; 172601e04c3fSmrg 172701e04c3fSmrg struct nir_function *callee; 172801e04c3fSmrg 172901e04c3fSmrg unsigned num_params; 173001e04c3fSmrg nir_src params[]; 173101e04c3fSmrg} nir_call_instr; 173201e04c3fSmrg 173301e04c3fSmrg#include "nir_intrinsics.h" 173401e04c3fSmrg 17357ec681f3Smrg#define NIR_INTRINSIC_MAX_CONST_INDEX 5 173601e04c3fSmrg 173701e04c3fSmrg/** Represents an intrinsic 173801e04c3fSmrg * 173901e04c3fSmrg * An intrinsic is an instruction type for handling things that are 174001e04c3fSmrg * more-or-less regular operations but don't just consume and produce SSA 174101e04c3fSmrg * values like ALU operations do. Intrinsics are not for things that have 174201e04c3fSmrg * special semantic meaning such as phi nodes and parallel copies. 174301e04c3fSmrg * Examples of intrinsics include variable load/store operations, system 174401e04c3fSmrg * value loads, and the like. Even though texturing more-or-less falls 174501e04c3fSmrg * under this category, texturing is its own instruction type because 174601e04c3fSmrg * trying to represent texturing with intrinsics would lead to a 174701e04c3fSmrg * combinatorial explosion of intrinsic opcodes. 174801e04c3fSmrg * 174901e04c3fSmrg * By having a single instruction type for handling a lot of different 175001e04c3fSmrg * cases, optimization passes can look for intrinsics and, for the most 175101e04c3fSmrg * part, completely ignore them. Each intrinsic type also has a few 175201e04c3fSmrg * possible flags that govern whether or not they can be reordered or 175301e04c3fSmrg * eliminated. That way passes like dead code elimination can still work 175401e04c3fSmrg * on intrisics without understanding the meaning of each. 175501e04c3fSmrg * 175601e04c3fSmrg * Each intrinsic has some number of constant indices, some number of 175701e04c3fSmrg * variables, and some number of sources. What these sources, variables, 175801e04c3fSmrg * and indices mean depends on the intrinsic and is documented with the 175901e04c3fSmrg * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture 176001e04c3fSmrg * instructions are the only types of instruction that can operate on 176101e04c3fSmrg * variables. 176201e04c3fSmrg */ 176301e04c3fSmrgtypedef struct { 176401e04c3fSmrg nir_instr instr; 176501e04c3fSmrg 176601e04c3fSmrg nir_intrinsic_op intrinsic; 176701e04c3fSmrg 176801e04c3fSmrg nir_dest dest; 176901e04c3fSmrg 177001e04c3fSmrg /** number of components if this is a vectorized intrinsic 177101e04c3fSmrg * 177201e04c3fSmrg * Similarly to ALU operations, some intrinsics are vectorized. 177301e04c3fSmrg * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0. 177401e04c3fSmrg * For vectorized intrinsics, the num_components field specifies the 177501e04c3fSmrg * number of destination components and the number of source components 177601e04c3fSmrg * for all sources with nir_intrinsic_infos.src_components[i] == 0. 177701e04c3fSmrg */ 177801e04c3fSmrg uint8_t num_components; 177901e04c3fSmrg 178001e04c3fSmrg int const_index[NIR_INTRINSIC_MAX_CONST_INDEX]; 178101e04c3fSmrg 178201e04c3fSmrg nir_src src[]; 178301e04c3fSmrg} nir_intrinsic_instr; 178401e04c3fSmrg 178501e04c3fSmrgstatic inline nir_variable * 178601e04c3fSmrgnir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i) 178701e04c3fSmrg{ 178801e04c3fSmrg return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); 178901e04c3fSmrg} 179001e04c3fSmrg 17917ec681f3Smrgtypedef enum { 17927ec681f3Smrg /* Memory ordering. */ 17937ec681f3Smrg NIR_MEMORY_ACQUIRE = 1 << 0, 17947ec681f3Smrg NIR_MEMORY_RELEASE = 1 << 1, 17957ec681f3Smrg NIR_MEMORY_ACQ_REL = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE, 17967ec681f3Smrg 17977ec681f3Smrg /* Memory visibility operations. */ 17987ec681f3Smrg NIR_MEMORY_MAKE_AVAILABLE = 1 << 2, 17997ec681f3Smrg NIR_MEMORY_MAKE_VISIBLE = 1 << 3, 18007ec681f3Smrg} nir_memory_semantics; 18017ec681f3Smrg 18027ec681f3Smrgtypedef enum { 18037ec681f3Smrg NIR_SCOPE_NONE, 18047ec681f3Smrg NIR_SCOPE_INVOCATION, 18057ec681f3Smrg NIR_SCOPE_SUBGROUP, 18067ec681f3Smrg NIR_SCOPE_SHADER_CALL, 18077ec681f3Smrg NIR_SCOPE_WORKGROUP, 18087ec681f3Smrg NIR_SCOPE_QUEUE_FAMILY, 18097ec681f3Smrg NIR_SCOPE_DEVICE, 18107ec681f3Smrg} nir_scope; 18117ec681f3Smrg 181201e04c3fSmrg/** 181301e04c3fSmrg * \name NIR intrinsics semantic flags 181401e04c3fSmrg * 181501e04c3fSmrg * information about what the compiler can do with the intrinsics. 181601e04c3fSmrg * 181701e04c3fSmrg * \sa nir_intrinsic_info::flags 181801e04c3fSmrg */ 181901e04c3fSmrgtypedef enum { 182001e04c3fSmrg /** 182101e04c3fSmrg * whether the intrinsic can be safely eliminated if none of its output 182201e04c3fSmrg * value is not being used. 182301e04c3fSmrg */ 182401e04c3fSmrg NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0), 182501e04c3fSmrg 182601e04c3fSmrg /** 182701e04c3fSmrg * Whether the intrinsic can be reordered with respect to any other 182801e04c3fSmrg * intrinsic, i.e. whether the only reordering dependencies of the 182901e04c3fSmrg * intrinsic are due to the register reads/writes. 183001e04c3fSmrg */ 183101e04c3fSmrg NIR_INTRINSIC_CAN_REORDER = (1 << 1), 183201e04c3fSmrg} nir_intrinsic_semantic_flag; 183301e04c3fSmrg 183401e04c3fSmrg/** 18357ec681f3Smrg * Maximum valid value for a nir align_mul value (in intrinsics or derefs). 183601e04c3fSmrg * 18377ec681f3Smrg * Offsets can be signed, so this is the largest power of two in int32_t. 183801e04c3fSmrg */ 18397ec681f3Smrg#define NIR_ALIGN_MUL_MAX 0x40000000 18407ec681f3Smrg 18417ec681f3Smrgtypedef struct nir_io_semantics { 18427ec681f3Smrg unsigned location:7; /* gl_vert_attrib, gl_varying_slot, or gl_frag_result */ 18437ec681f3Smrg unsigned num_slots:6; /* max 32, may be pessimistic with const indexing */ 18447ec681f3Smrg unsigned dual_source_blend_index:1; 18457ec681f3Smrg unsigned fb_fetch_output:1; /* for GL_KHR_blend_equation_advanced */ 18467ec681f3Smrg unsigned gs_streams:8; /* xxyyzzww: 2-bit stream index for each component */ 18477ec681f3Smrg unsigned medium_precision:1; /* GLSL mediump qualifier */ 18487ec681f3Smrg unsigned per_view:1; 18497ec681f3Smrg unsigned high_16bits:1; /* whether accessing low or high half of the slot */ 18507ec681f3Smrg unsigned _pad:6; 18517ec681f3Smrg} nir_io_semantics; 18527ec681f3Smrg 18537ec681f3Smrg#define NIR_INTRINSIC_MAX_INPUTS 11 185401e04c3fSmrg 185501e04c3fSmrgtypedef struct { 185601e04c3fSmrg const char *name; 185701e04c3fSmrg 18587ec681f3Smrg uint8_t num_srcs; /** < number of register/SSA inputs */ 185901e04c3fSmrg 186001e04c3fSmrg /** number of components of each input register 186101e04c3fSmrg * 186201e04c3fSmrg * If this value is 0, the number of components is given by the 18637e102996Smaya * num_components field of nir_intrinsic_instr. If this value is -1, the 18647e102996Smaya * intrinsic consumes however many components are provided and it is not 18657e102996Smaya * validated at all. 186601e04c3fSmrg */ 18677ec681f3Smrg int8_t src_components[NIR_INTRINSIC_MAX_INPUTS]; 186801e04c3fSmrg 186901e04c3fSmrg bool has_dest; 187001e04c3fSmrg 187101e04c3fSmrg /** number of components of the output register 187201e04c3fSmrg * 187301e04c3fSmrg * If this value is 0, the number of components is given by the 187401e04c3fSmrg * num_components field of nir_intrinsic_instr. 187501e04c3fSmrg */ 18767ec681f3Smrg uint8_t dest_components; 187701e04c3fSmrg 18787e102996Smaya /** bitfield of legal bit sizes */ 18797ec681f3Smrg uint8_t dest_bit_sizes; 18807ec681f3Smrg 18817ec681f3Smrg /** source which the destination bit size must match 18827ec681f3Smrg * 18837ec681f3Smrg * Some intrinsics, such as subgroup intrinsics, are data manipulation 18847ec681f3Smrg * intrinsics and they have similar bit-size rules to ALU ops. This enables 18857ec681f3Smrg * validation to validate a bit more and enables auto-generated builder code 18867ec681f3Smrg * to properly determine destination bit sizes automatically. 18877ec681f3Smrg */ 18887ec681f3Smrg int8_t bit_size_src; 18897e102996Smaya 189001e04c3fSmrg /** the number of constant indices used by the intrinsic */ 18917ec681f3Smrg uint8_t num_indices; 18927ec681f3Smrg 18937ec681f3Smrg /** list of indices */ 18947ec681f3Smrg uint8_t indices[NIR_INTRINSIC_MAX_CONST_INDEX]; 189501e04c3fSmrg 189601e04c3fSmrg /** indicates the usage of intr->const_index[n] */ 18977ec681f3Smrg uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; 189801e04c3fSmrg 189901e04c3fSmrg /** semantic flags for calls to this intrinsic */ 190001e04c3fSmrg nir_intrinsic_semantic_flag flags; 190101e04c3fSmrg} nir_intrinsic_info; 190201e04c3fSmrg 190301e04c3fSmrgextern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; 190401e04c3fSmrg 190501e04c3fSmrgstatic inline unsigned 19067ec681f3Smrgnir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn) 190701e04c3fSmrg{ 190801e04c3fSmrg const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; 190901e04c3fSmrg assert(srcn < info->num_srcs); 19107e102996Smaya if (info->src_components[srcn] > 0) 191101e04c3fSmrg return info->src_components[srcn]; 19127e102996Smaya else if (info->src_components[srcn] == 0) 191301e04c3fSmrg return intr->num_components; 19147e102996Smaya else 19157e102996Smaya return nir_src_num_components(intr->src[srcn]); 191601e04c3fSmrg} 191701e04c3fSmrg 191801e04c3fSmrgstatic inline unsigned 191901e04c3fSmrgnir_intrinsic_dest_components(nir_intrinsic_instr *intr) 192001e04c3fSmrg{ 192101e04c3fSmrg const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; 192201e04c3fSmrg if (!info->has_dest) 192301e04c3fSmrg return 0; 192401e04c3fSmrg else if (info->dest_components) 192501e04c3fSmrg return info->dest_components; 192601e04c3fSmrg else 192701e04c3fSmrg return intr->num_components; 192801e04c3fSmrg} 192901e04c3fSmrg 19307ec681f3Smrg/** 19317ec681f3Smrg * Helper to copy const_index[] from src to dst, without assuming they 19327ec681f3Smrg * match in order. 19337ec681f3Smrg */ 19347ec681f3Smrgstatic inline void 19357ec681f3Smrgnir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src) 19367ec681f3Smrg{ 19377ec681f3Smrg if (src->intrinsic == dst->intrinsic) { 19387ec681f3Smrg memcpy(dst->const_index, src->const_index, sizeof(dst->const_index)); 19397ec681f3Smrg return; 19407ec681f3Smrg } 19417ec681f3Smrg 19427ec681f3Smrg const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic]; 19437ec681f3Smrg const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic]; 19447ec681f3Smrg 19457ec681f3Smrg for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) { 19467ec681f3Smrg if (src_info->index_map[i] == 0) 19477ec681f3Smrg continue; 19487ec681f3Smrg 19497ec681f3Smrg /* require that dst instruction also uses the same const_index[]: */ 19507ec681f3Smrg assert(dst_info->index_map[i] > 0); 19517ec681f3Smrg 19527ec681f3Smrg dst->const_index[dst_info->index_map[i] - 1] = 19537ec681f3Smrg src->const_index[src_info->index_map[i] - 1]; 19547ec681f3Smrg } 19557ec681f3Smrg} 19567ec681f3Smrg 19577ec681f3Smrg#include "nir_intrinsics_indices.h" 19587e102996Smaya 19597e102996Smayastatic inline void 19607e102996Smayanir_intrinsic_set_align(nir_intrinsic_instr *intrin, 19617e102996Smaya unsigned align_mul, unsigned align_offset) 19627e102996Smaya{ 19637e102996Smaya assert(util_is_power_of_two_nonzero(align_mul)); 19647e102996Smaya assert(align_offset < align_mul); 19657e102996Smaya nir_intrinsic_set_align_mul(intrin, align_mul); 19667e102996Smaya nir_intrinsic_set_align_offset(intrin, align_offset); 19677e102996Smaya} 19687e102996Smaya 19697e102996Smaya/** Returns a simple alignment for a load/store intrinsic offset 19707e102996Smaya * 19717e102996Smaya * Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL 19727e102996Smaya * and ALIGN_OFFSET parameters, this helper takes both into account and 19737e102996Smaya * provides a single simple alignment parameter. The offset X is guaranteed 19747e102996Smaya * to satisfy X % align == 0. 19757e102996Smaya */ 19767e102996Smayastatic inline unsigned 19777e102996Smayanir_intrinsic_align(const nir_intrinsic_instr *intrin) 19787e102996Smaya{ 19797e102996Smaya const unsigned align_mul = nir_intrinsic_align_mul(intrin); 19807e102996Smaya const unsigned align_offset = nir_intrinsic_align_offset(intrin); 19817e102996Smaya assert(align_offset < align_mul); 19827e102996Smaya return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; 19837e102996Smaya} 19847e102996Smaya 19857ec681f3Smrgstatic inline bool 19867ec681f3Smrgnir_intrinsic_has_align(const nir_intrinsic_instr *intrin) 19877ec681f3Smrg{ 19887ec681f3Smrg return nir_intrinsic_has_align_mul(intrin) && 19897ec681f3Smrg nir_intrinsic_has_align_offset(intrin); 19907ec681f3Smrg} 19917ec681f3Smrg 19927ec681f3Smrgunsigned 19937ec681f3Smrgnir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr); 19947ec681f3Smrg 19957e102996Smaya/* Converts a image_deref_* intrinsic into a image_* one */ 19967e102996Smayavoid nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr, 19977e102996Smaya nir_ssa_def *handle, bool bindless); 19987e102996Smaya 19997e102996Smaya/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */ 20007e102996Smayastatic inline bool 20017e102996Smayanir_intrinsic_can_reorder(nir_intrinsic_instr *instr) 20027e102996Smaya{ 20037ec681f3Smrg if (instr->intrinsic == nir_intrinsic_load_deref) { 20047ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); 20057ec681f3Smrg return nir_deref_mode_is_in_set(deref, nir_var_read_only_modes) || 20067ec681f3Smrg (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER); 20077ec681f3Smrg } else if (instr->intrinsic == nir_intrinsic_load_ssbo || 20087ec681f3Smrg instr->intrinsic == nir_intrinsic_bindless_image_load || 20097ec681f3Smrg instr->intrinsic == nir_intrinsic_image_deref_load || 20107ec681f3Smrg instr->intrinsic == nir_intrinsic_image_load) { 20117ec681f3Smrg return nir_intrinsic_access(instr) & ACCESS_CAN_REORDER; 20127ec681f3Smrg } else { 20137ec681f3Smrg const nir_intrinsic_info *info = 20147ec681f3Smrg &nir_intrinsic_infos[instr->intrinsic]; 20157ec681f3Smrg return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && 20167ec681f3Smrg (info->flags & NIR_INTRINSIC_CAN_REORDER); 20177ec681f3Smrg } 20187e102996Smaya} 201901e04c3fSmrg 20207ec681f3Smrgbool nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr); 202101e04c3fSmrg 20227ec681f3Smrg/** Texture instruction source type */ 202301e04c3fSmrgtypedef enum { 20247ec681f3Smrg /** Texture coordinate 20257ec681f3Smrg * 20267ec681f3Smrg * Must have nir_tex_instr::coord_components components. 20277ec681f3Smrg */ 202801e04c3fSmrg nir_tex_src_coord, 20297ec681f3Smrg 20307ec681f3Smrg /** Projector 20317ec681f3Smrg * 20327ec681f3Smrg * The texture coordinate (except for the array component, if any) is 20337ec681f3Smrg * divided by this value before LOD computation and sampling. 20347ec681f3Smrg * 20357ec681f3Smrg * Must be a float scalar. 20367ec681f3Smrg */ 203701e04c3fSmrg nir_tex_src_projector, 20387ec681f3Smrg 20397ec681f3Smrg /** Shadow comparator 20407ec681f3Smrg * 20417ec681f3Smrg * For shadow sampling, the fetched texel values are compared against the 20427ec681f3Smrg * shadow comparator using the compare op specified by the sampler object 20437ec681f3Smrg * and converted to 1.0 if the comparison succeeds and 0.0 if it fails. 20447ec681f3Smrg * Interpolation happens after this conversion so the actual result may be 20457ec681f3Smrg * anywhere in the range [0.0, 1.0]. 20467ec681f3Smrg * 20477ec681f3Smrg * Only valid if nir_tex_instr::is_shadow and must be a float scalar. 20487ec681f3Smrg */ 20497ec681f3Smrg nir_tex_src_comparator, 20507ec681f3Smrg 20517ec681f3Smrg /** Coordinate offset 20527ec681f3Smrg * 20537ec681f3Smrg * An integer value that is added to the texel address before sampling. 20547ec681f3Smrg * This is only allowed with operations that take an explicit LOD as it is 20557ec681f3Smrg * applied in integer texel space after LOD selection and not normalized 20567ec681f3Smrg * coordinate space. 20577ec681f3Smrg */ 205801e04c3fSmrg nir_tex_src_offset, 20597ec681f3Smrg 20607ec681f3Smrg /** LOD bias 20617ec681f3Smrg * 20627ec681f3Smrg * This value is added to the computed LOD before mip-mapping. 20637ec681f3Smrg */ 206401e04c3fSmrg nir_tex_src_bias, 20657ec681f3Smrg 20667ec681f3Smrg /** Explicit LOD */ 206701e04c3fSmrg nir_tex_src_lod, 20687ec681f3Smrg 20697ec681f3Smrg /** Min LOD 20707ec681f3Smrg * 20717ec681f3Smrg * The computed LOD is clamped to be at least as large as min_lod before 20727ec681f3Smrg * mip-mapping. 20737ec681f3Smrg */ 20747e102996Smaya nir_tex_src_min_lod, 20757ec681f3Smrg 20767ec681f3Smrg /** MSAA sample index */ 20777ec681f3Smrg nir_tex_src_ms_index, 20787ec681f3Smrg 20797ec681f3Smrg /** Intel-specific MSAA compression data */ 20807ec681f3Smrg nir_tex_src_ms_mcs_intel, 20817ec681f3Smrg 20827ec681f3Smrg /** Explicit horizontal (X-major) coordinate derivative */ 208301e04c3fSmrg nir_tex_src_ddx, 20847ec681f3Smrg 20857ec681f3Smrg /** Explicit vertical (Y-major) coordinate derivative */ 208601e04c3fSmrg nir_tex_src_ddy, 20877ec681f3Smrg 20887ec681f3Smrg /** Texture variable dereference */ 20897ec681f3Smrg nir_tex_src_texture_deref, 20907ec681f3Smrg 20917ec681f3Smrg /** Sampler variable dereference */ 20927ec681f3Smrg nir_tex_src_sampler_deref, 20937ec681f3Smrg 20947ec681f3Smrg /** Texture index offset 20957ec681f3Smrg * 20967ec681f3Smrg * This is added to nir_tex_instr::texture_index. Unless 20977ec681f3Smrg * nir_tex_instr::texture_non_uniform is set, this is guaranteed to be 20987ec681f3Smrg * dynamically uniform. 20997ec681f3Smrg */ 21007ec681f3Smrg nir_tex_src_texture_offset, 21017ec681f3Smrg 21027ec681f3Smrg /** Dynamically uniform sampler index offset 21037ec681f3Smrg * 21047ec681f3Smrg * This is added to nir_tex_instr::sampler_index. Unless 21057ec681f3Smrg * nir_tex_instr::sampler_non_uniform is set, this is guaranteed to be 21067ec681f3Smrg * dynamically uniform. 21077ec681f3Smrg */ 21087ec681f3Smrg nir_tex_src_sampler_offset, 21097ec681f3Smrg 21107ec681f3Smrg /** Bindless texture handle 21117ec681f3Smrg * 21127ec681f3Smrg * This is, unfortunately, a bit overloaded at the moment. There are 21137ec681f3Smrg * generally two types of bindless handles: 21147ec681f3Smrg * 21157ec681f3Smrg * 1. For GL_ARB_bindless bindless handles. These are part of the 21167ec681f3Smrg * GL/Gallium-level API and are always a 64-bit integer. 21177ec681f3Smrg * 21187ec681f3Smrg * 2. HW-specific handles. GL_ARB_bindless handles may be lowered to 21197ec681f3Smrg * these. Also, these are used by many Vulkan drivers to implement 21207ec681f3Smrg * descriptor sets, especially for UPDATE_AFTER_BIND descriptors. 21217ec681f3Smrg * The details of hardware handles (bit size, format, etc.) is 21227ec681f3Smrg * HW-specific. 21237ec681f3Smrg * 21247ec681f3Smrg * Because of this overloading and the resulting ambiguity, we currently 21257ec681f3Smrg * don't validate anything for these. 21267ec681f3Smrg */ 21277ec681f3Smrg nir_tex_src_texture_handle, 21287ec681f3Smrg 21297ec681f3Smrg /** Bindless sampler handle 21307ec681f3Smrg * 21317ec681f3Smrg * See nir_tex_src_texture_handle, 21327ec681f3Smrg */ 21337ec681f3Smrg nir_tex_src_sampler_handle, 21347ec681f3Smrg 21357ec681f3Smrg /** Plane index for multi-plane YCbCr textures */ 21367ec681f3Smrg nir_tex_src_plane, 21377ec681f3Smrg 21387ec681f3Smrg /** 21397ec681f3Smrg * Backend-specific vec4 tex src argument. 21407ec681f3Smrg * 21417ec681f3Smrg * Can be used to have NIR optimization (copy propagation, lower_vec_to_movs) 21427ec681f3Smrg * apply to the packing of the tex srcs. This lowering must only happen 21437ec681f3Smrg * after nir_lower_tex(). 21447ec681f3Smrg * 21457ec681f3Smrg * The nir_tex_instr_src_type() of this argument is float, so no lowering 21467ec681f3Smrg * will happen if nir_lower_int_to_float is used. 21477ec681f3Smrg */ 21487ec681f3Smrg nir_tex_src_backend1, 21497ec681f3Smrg 21507ec681f3Smrg /** Second backend-specific vec4 tex src argument, see nir_tex_src_backend1. */ 21517ec681f3Smrg nir_tex_src_backend2, 21527ec681f3Smrg 215301e04c3fSmrg nir_num_tex_src_types 215401e04c3fSmrg} nir_tex_src_type; 215501e04c3fSmrg 21567ec681f3Smrg/** A texture instruction source */ 215701e04c3fSmrgtypedef struct { 21587ec681f3Smrg /** Base source */ 215901e04c3fSmrg nir_src src; 21607ec681f3Smrg 21617ec681f3Smrg /** Type of this source */ 216201e04c3fSmrg nir_tex_src_type src_type; 216301e04c3fSmrg} nir_tex_src; 216401e04c3fSmrg 21657ec681f3Smrg/** Texture instruction opcode */ 216601e04c3fSmrgtypedef enum { 216701e04c3fSmrg nir_texop_tex, /**< Regular texture look-up */ 216801e04c3fSmrg nir_texop_txb, /**< Texture look-up with LOD bias */ 216901e04c3fSmrg nir_texop_txl, /**< Texture look-up with explicit LOD */ 217001e04c3fSmrg nir_texop_txd, /**< Texture look-up with partial derivatives */ 217101e04c3fSmrg nir_texop_txf, /**< Texel fetch with explicit LOD */ 21727e102996Smaya nir_texop_txf_ms, /**< Multisample texture fetch */ 21737e102996Smaya nir_texop_txf_ms_fb, /**< Multisample texture fetch from framebuffer */ 21747ec681f3Smrg nir_texop_txf_ms_mcs_intel, /**< Multisample compression value fetch */ 217501e04c3fSmrg nir_texop_txs, /**< Texture size */ 217601e04c3fSmrg nir_texop_lod, /**< Texture lod query */ 217701e04c3fSmrg nir_texop_tg4, /**< Texture gather */ 217801e04c3fSmrg nir_texop_query_levels, /**< Texture levels query */ 217901e04c3fSmrg nir_texop_texture_samples, /**< Texture samples query */ 218001e04c3fSmrg nir_texop_samples_identical, /**< Query whether all samples are definitely 218101e04c3fSmrg * identical. 218201e04c3fSmrg */ 21837ec681f3Smrg nir_texop_tex_prefetch, /**< Regular texture look-up, eligible for pre-dispatch */ 21847ec681f3Smrg nir_texop_fragment_fetch_amd, /**< Multisample fragment color texture fetch */ 21857ec681f3Smrg nir_texop_fragment_mask_fetch_amd, /**< Multisample fragment mask texture fetch */ 218601e04c3fSmrg} nir_texop; 218701e04c3fSmrg 21887ec681f3Smrg/** Represents a texture instruction */ 218901e04c3fSmrgtypedef struct { 21907ec681f3Smrg /** Base instruction */ 219101e04c3fSmrg nir_instr instr; 219201e04c3fSmrg 21937ec681f3Smrg /** Dimensionality of the texture operation 21947ec681f3Smrg * 21957ec681f3Smrg * This will typically match the dimensionality of the texture deref type 21967ec681f3Smrg * if a nir_tex_src_texture_deref is present. However, it may not if 21977ec681f3Smrg * texture lowering has occurred. 21987ec681f3Smrg */ 219901e04c3fSmrg enum glsl_sampler_dim sampler_dim; 22007ec681f3Smrg 22017ec681f3Smrg /** ALU type of the destination 22027ec681f3Smrg * 22037ec681f3Smrg * This is the canonical sampled type for this texture operation and may 22047ec681f3Smrg * not exactly match the sampled type of the deref type when a 22057ec681f3Smrg * nir_tex_src_texture_deref is present. For OpenCL, the sampled type of 22067ec681f3Smrg * the texture deref will be GLSL_TYPE_VOID and this is allowed to be 22077ec681f3Smrg * anything. With SPIR-V, the signedness of integer types is allowed to 22087ec681f3Smrg * differ. For all APIs, the bit size may differ if the driver has done 22097ec681f3Smrg * any sort of mediump or similar lowering since texture types always have 22107ec681f3Smrg * 32-bit sampled types. 22117ec681f3Smrg */ 221201e04c3fSmrg nir_alu_type dest_type; 221301e04c3fSmrg 22147ec681f3Smrg /** Texture opcode */ 221501e04c3fSmrg nir_texop op; 22167ec681f3Smrg 22177ec681f3Smrg /** Destination */ 221801e04c3fSmrg nir_dest dest; 22197ec681f3Smrg 22207ec681f3Smrg /** Array of sources 22217ec681f3Smrg * 22227ec681f3Smrg * This array has nir_tex_instr::num_srcs elements 22237ec681f3Smrg */ 222401e04c3fSmrg nir_tex_src *src; 22257ec681f3Smrg 22267ec681f3Smrg /** Number of sources */ 22277ec681f3Smrg unsigned num_srcs; 22287ec681f3Smrg 22297ec681f3Smrg /** Number of components in the coordinate, if any */ 22307ec681f3Smrg unsigned coord_components; 22317ec681f3Smrg 22327ec681f3Smrg /** True if the texture instruction acts on an array texture */ 22337ec681f3Smrg bool is_array; 22347ec681f3Smrg 22357ec681f3Smrg /** True if the texture instruction performs a shadow comparison 22367ec681f3Smrg * 22377ec681f3Smrg * If this is true, the texture instruction must have a 22387ec681f3Smrg * nir_tex_src_comparator. 22397ec681f3Smrg */ 22407ec681f3Smrg bool is_shadow; 224101e04c3fSmrg 224201e04c3fSmrg /** 22437ec681f3Smrg * If is_shadow is true, whether this is the old-style shadow that outputs 22447ec681f3Smrg * 4 components or the new-style shadow that outputs 1 component. 224501e04c3fSmrg */ 224601e04c3fSmrg bool is_new_style_shadow; 224701e04c3fSmrg 22487ec681f3Smrg /** 22497ec681f3Smrg * True if this texture instruction should return a sparse residency code. 22507ec681f3Smrg * The code is in the last component of the result. 22517ec681f3Smrg */ 22527ec681f3Smrg bool is_sparse; 22537ec681f3Smrg 22547ec681f3Smrg /** nir_texop_tg4 component selector 22557ec681f3Smrg * 22567ec681f3Smrg * This determines which RGBA component is gathered. 22577ec681f3Smrg */ 225801e04c3fSmrg unsigned component : 2; 225901e04c3fSmrg 22607ec681f3Smrg /** Validation needs to know this for gradient component count */ 22617ec681f3Smrg unsigned array_is_lowered_cube : 1; 22627ec681f3Smrg 22637ec681f3Smrg /** Gather offsets */ 22647e102996Smaya int8_t tg4_offsets[4][2]; 22657e102996Smaya 22667ec681f3Smrg /** True if the texture index or handle is not dynamically uniform */ 22677e102996Smaya bool texture_non_uniform; 22687e102996Smaya 22697ec681f3Smrg /** True if the sampler index or handle is not dynamically uniform */ 22707e102996Smaya bool sampler_non_uniform; 22717e102996Smaya 227201e04c3fSmrg /** The texture index 227301e04c3fSmrg * 227401e04c3fSmrg * If this texture instruction has a nir_tex_src_texture_offset source, 227501e04c3fSmrg * then the texture index is given by texture_index + texture_offset. 227601e04c3fSmrg */ 227701e04c3fSmrg unsigned texture_index; 227801e04c3fSmrg 227901e04c3fSmrg /** The sampler index 228001e04c3fSmrg * 228101e04c3fSmrg * The following operations do not require a sampler and, as such, this 228201e04c3fSmrg * field should be ignored: 228301e04c3fSmrg * - nir_texop_txf 228401e04c3fSmrg * - nir_texop_txf_ms 228501e04c3fSmrg * - nir_texop_txs 228601e04c3fSmrg * - nir_texop_query_levels 228701e04c3fSmrg * - nir_texop_texture_samples 228801e04c3fSmrg * - nir_texop_samples_identical 228901e04c3fSmrg * 229001e04c3fSmrg * If this texture instruction has a nir_tex_src_sampler_offset source, 229101e04c3fSmrg * then the sampler index is given by sampler_index + sampler_offset. 229201e04c3fSmrg */ 229301e04c3fSmrg unsigned sampler_index; 229401e04c3fSmrg} nir_tex_instr; 229501e04c3fSmrg 22967ec681f3Smrg/** 22977ec681f3Smrg * Returns true if the texture operation requires a sampler as a general rule 22987ec681f3Smrg * 22997ec681f3Smrg * Note that the specific hw/driver backend could require to a sampler 23007ec681f3Smrg * object/configuration packet in any case, for some other reason. 23017ec681f3Smrg * 23027ec681f3Smrg * @see nir_tex_instr::sampler_index. 23037ec681f3Smrg */ 23047ec681f3Smrgstatic inline bool 23057ec681f3Smrgnir_tex_instr_need_sampler(const nir_tex_instr *instr) 23067ec681f3Smrg{ 23077ec681f3Smrg switch (instr->op) { 23087ec681f3Smrg case nir_texop_txf: 23097ec681f3Smrg case nir_texop_txf_ms: 23107ec681f3Smrg case nir_texop_txs: 23117ec681f3Smrg case nir_texop_query_levels: 23127ec681f3Smrg case nir_texop_texture_samples: 23137ec681f3Smrg case nir_texop_samples_identical: 23147ec681f3Smrg return false; 23157ec681f3Smrg default: 23167ec681f3Smrg return true; 23177ec681f3Smrg } 23187ec681f3Smrg} 23197ec681f3Smrg 23207ec681f3Smrg/** Returns the number of components returned by this nir_tex_instr 23217ec681f3Smrg * 23227ec681f3Smrg * Useful for code building texture instructions when you don't want to think 23237ec681f3Smrg * about how many components a particular texture op returns. This does not 23247ec681f3Smrg * include the sparse residency code. 23257ec681f3Smrg */ 232601e04c3fSmrgstatic inline unsigned 23277ec681f3Smrgnir_tex_instr_result_size(const nir_tex_instr *instr) 232801e04c3fSmrg{ 232901e04c3fSmrg switch (instr->op) { 233001e04c3fSmrg case nir_texop_txs: { 233101e04c3fSmrg unsigned ret; 233201e04c3fSmrg switch (instr->sampler_dim) { 233301e04c3fSmrg case GLSL_SAMPLER_DIM_1D: 233401e04c3fSmrg case GLSL_SAMPLER_DIM_BUF: 233501e04c3fSmrg ret = 1; 233601e04c3fSmrg break; 233701e04c3fSmrg case GLSL_SAMPLER_DIM_2D: 233801e04c3fSmrg case GLSL_SAMPLER_DIM_CUBE: 233901e04c3fSmrg case GLSL_SAMPLER_DIM_MS: 234001e04c3fSmrg case GLSL_SAMPLER_DIM_RECT: 234101e04c3fSmrg case GLSL_SAMPLER_DIM_EXTERNAL: 234201e04c3fSmrg case GLSL_SAMPLER_DIM_SUBPASS: 234301e04c3fSmrg ret = 2; 234401e04c3fSmrg break; 234501e04c3fSmrg case GLSL_SAMPLER_DIM_3D: 234601e04c3fSmrg ret = 3; 234701e04c3fSmrg break; 234801e04c3fSmrg default: 234901e04c3fSmrg unreachable("not reached"); 235001e04c3fSmrg } 235101e04c3fSmrg if (instr->is_array) 235201e04c3fSmrg ret++; 235301e04c3fSmrg return ret; 235401e04c3fSmrg } 235501e04c3fSmrg 235601e04c3fSmrg case nir_texop_lod: 235701e04c3fSmrg return 2; 235801e04c3fSmrg 235901e04c3fSmrg case nir_texop_texture_samples: 236001e04c3fSmrg case nir_texop_query_levels: 236101e04c3fSmrg case nir_texop_samples_identical: 23627ec681f3Smrg case nir_texop_fragment_mask_fetch_amd: 236301e04c3fSmrg return 1; 236401e04c3fSmrg 236501e04c3fSmrg default: 236601e04c3fSmrg if (instr->is_shadow && instr->is_new_style_shadow) 236701e04c3fSmrg return 1; 236801e04c3fSmrg 236901e04c3fSmrg return 4; 237001e04c3fSmrg } 237101e04c3fSmrg} 237201e04c3fSmrg 23737ec681f3Smrg/** 23747ec681f3Smrg * Returns the destination size of this nir_tex_instr including the sparse 23757ec681f3Smrg * residency code, if any. 23767ec681f3Smrg */ 23777ec681f3Smrgstatic inline unsigned 23787ec681f3Smrgnir_tex_instr_dest_size(const nir_tex_instr *instr) 23797ec681f3Smrg{ 23807ec681f3Smrg /* One more component is needed for the residency code. */ 23817ec681f3Smrg return nir_tex_instr_result_size(instr) + instr->is_sparse; 23827ec681f3Smrg} 23837ec681f3Smrg 23847ec681f3Smrg/** 23857ec681f3Smrg * Returns true if this texture operation queries something about the texture 238601e04c3fSmrg * rather than actually sampling it. 238701e04c3fSmrg */ 238801e04c3fSmrgstatic inline bool 238901e04c3fSmrgnir_tex_instr_is_query(const nir_tex_instr *instr) 239001e04c3fSmrg{ 239101e04c3fSmrg switch (instr->op) { 239201e04c3fSmrg case nir_texop_txs: 239301e04c3fSmrg case nir_texop_lod: 239401e04c3fSmrg case nir_texop_texture_samples: 239501e04c3fSmrg case nir_texop_query_levels: 239601e04c3fSmrg return true; 239701e04c3fSmrg case nir_texop_tex: 239801e04c3fSmrg case nir_texop_txb: 239901e04c3fSmrg case nir_texop_txl: 240001e04c3fSmrg case nir_texop_txd: 240101e04c3fSmrg case nir_texop_txf: 240201e04c3fSmrg case nir_texop_txf_ms: 24037e102996Smaya case nir_texop_txf_ms_fb: 24047ec681f3Smrg case nir_texop_txf_ms_mcs_intel: 240501e04c3fSmrg case nir_texop_tg4: 240601e04c3fSmrg return false; 240701e04c3fSmrg default: 240801e04c3fSmrg unreachable("Invalid texture opcode"); 240901e04c3fSmrg } 241001e04c3fSmrg} 241101e04c3fSmrg 24127ec681f3Smrg/** Returns true if this texture instruction does implicit derivatives 24137ec681f3Smrg * 24147ec681f3Smrg * This is important as there are extra control-flow rules around derivatives 24157ec681f3Smrg * and texture instructions which perform them implicitly. 24167ec681f3Smrg */ 241701e04c3fSmrgstatic inline bool 24187ec681f3Smrgnir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr) 241901e04c3fSmrg{ 242001e04c3fSmrg switch (instr->op) { 24217ec681f3Smrg case nir_texop_tex: 24227ec681f3Smrg case nir_texop_txb: 24237ec681f3Smrg case nir_texop_lod: 242401e04c3fSmrg return true; 242501e04c3fSmrg default: 242601e04c3fSmrg return false; 242701e04c3fSmrg } 242801e04c3fSmrg} 242901e04c3fSmrg 24307ec681f3Smrg/** Returns the ALU type of the given texture instruction source */ 243101e04c3fSmrgstatic inline nir_alu_type 243201e04c3fSmrgnir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src) 243301e04c3fSmrg{ 243401e04c3fSmrg switch (instr->src[src].src_type) { 243501e04c3fSmrg case nir_tex_src_coord: 243601e04c3fSmrg switch (instr->op) { 243701e04c3fSmrg case nir_texop_txf: 243801e04c3fSmrg case nir_texop_txf_ms: 24397e102996Smaya case nir_texop_txf_ms_fb: 24407ec681f3Smrg case nir_texop_txf_ms_mcs_intel: 244101e04c3fSmrg case nir_texop_samples_identical: 244201e04c3fSmrg return nir_type_int; 244301e04c3fSmrg 244401e04c3fSmrg default: 244501e04c3fSmrg return nir_type_float; 244601e04c3fSmrg } 244701e04c3fSmrg 244801e04c3fSmrg case nir_tex_src_lod: 244901e04c3fSmrg switch (instr->op) { 245001e04c3fSmrg case nir_texop_txs: 245101e04c3fSmrg case nir_texop_txf: 24527ec681f3Smrg case nir_texop_txf_ms: 245301e04c3fSmrg return nir_type_int; 245401e04c3fSmrg 245501e04c3fSmrg default: 245601e04c3fSmrg return nir_type_float; 245701e04c3fSmrg } 245801e04c3fSmrg 245901e04c3fSmrg case nir_tex_src_projector: 246001e04c3fSmrg case nir_tex_src_comparator: 246101e04c3fSmrg case nir_tex_src_bias: 24627ec681f3Smrg case nir_tex_src_min_lod: 246301e04c3fSmrg case nir_tex_src_ddx: 246401e04c3fSmrg case nir_tex_src_ddy: 24657ec681f3Smrg case nir_tex_src_backend1: 24667ec681f3Smrg case nir_tex_src_backend2: 246701e04c3fSmrg return nir_type_float; 246801e04c3fSmrg 246901e04c3fSmrg case nir_tex_src_offset: 247001e04c3fSmrg case nir_tex_src_ms_index: 24717ec681f3Smrg case nir_tex_src_plane: 24727ec681f3Smrg return nir_type_int; 24737ec681f3Smrg 24747ec681f3Smrg case nir_tex_src_ms_mcs_intel: 24757ec681f3Smrg case nir_tex_src_texture_deref: 24767ec681f3Smrg case nir_tex_src_sampler_deref: 247701e04c3fSmrg case nir_tex_src_texture_offset: 247801e04c3fSmrg case nir_tex_src_sampler_offset: 24797ec681f3Smrg case nir_tex_src_texture_handle: 24807ec681f3Smrg case nir_tex_src_sampler_handle: 24817ec681f3Smrg return nir_type_uint; 248201e04c3fSmrg 24837ec681f3Smrg case nir_num_tex_src_types: 24847ec681f3Smrg unreachable("nir_num_tex_src_types is not a valid source type"); 248501e04c3fSmrg } 24867ec681f3Smrg 24877ec681f3Smrg unreachable("Invalid texture source type"); 248801e04c3fSmrg} 248901e04c3fSmrg 24907ec681f3Smrg/** 24917ec681f3Smrg * Returns the number of components required by the given texture instruction 24927ec681f3Smrg * source 24937ec681f3Smrg */ 249401e04c3fSmrgstatic inline unsigned 249501e04c3fSmrgnir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src) 249601e04c3fSmrg{ 249701e04c3fSmrg if (instr->src[src].src_type == nir_tex_src_coord) 249801e04c3fSmrg return instr->coord_components; 249901e04c3fSmrg 25007ec681f3Smrg /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs_intel */ 25017ec681f3Smrg if (instr->src[src].src_type == nir_tex_src_ms_mcs_intel) 250201e04c3fSmrg return 4; 250301e04c3fSmrg 250401e04c3fSmrg if (instr->src[src].src_type == nir_tex_src_ddx || 250501e04c3fSmrg instr->src[src].src_type == nir_tex_src_ddy) { 25067ec681f3Smrg 25077ec681f3Smrg if (instr->is_array && !instr->array_is_lowered_cube) 250801e04c3fSmrg return instr->coord_components - 1; 250901e04c3fSmrg else 251001e04c3fSmrg return instr->coord_components; 251101e04c3fSmrg } 251201e04c3fSmrg 251301e04c3fSmrg /* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for 251401e04c3fSmrg * the offset, since a cube maps to a single face. 251501e04c3fSmrg */ 251601e04c3fSmrg if (instr->src[src].src_type == nir_tex_src_offset) { 251701e04c3fSmrg if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) 251801e04c3fSmrg return 2; 251901e04c3fSmrg else if (instr->is_array) 252001e04c3fSmrg return instr->coord_components - 1; 252101e04c3fSmrg else 252201e04c3fSmrg return instr->coord_components; 252301e04c3fSmrg } 252401e04c3fSmrg 25257ec681f3Smrg if (instr->src[src].src_type == nir_tex_src_backend1 || 25267ec681f3Smrg instr->src[src].src_type == nir_tex_src_backend2) 25277ec681f3Smrg return nir_src_num_components(instr->src[src].src); 25287ec681f3Smrg 252901e04c3fSmrg return 1; 253001e04c3fSmrg} 253101e04c3fSmrg 25327ec681f3Smrg/** 25337ec681f3Smrg * Returns the index of the texture instruction source with the given 25347ec681f3Smrg * nir_tex_src_type or -1 if no such source exists. 25357ec681f3Smrg */ 253601e04c3fSmrgstatic inline int 253701e04c3fSmrgnir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type) 253801e04c3fSmrg{ 253901e04c3fSmrg for (unsigned i = 0; i < instr->num_srcs; i++) 254001e04c3fSmrg if (instr->src[i].src_type == type) 254101e04c3fSmrg return (int) i; 254201e04c3fSmrg 254301e04c3fSmrg return -1; 254401e04c3fSmrg} 254501e04c3fSmrg 25467ec681f3Smrg/** Adds a source to a texture instruction */ 254701e04c3fSmrgvoid nir_tex_instr_add_src(nir_tex_instr *tex, 254801e04c3fSmrg nir_tex_src_type src_type, 254901e04c3fSmrg nir_src src); 255001e04c3fSmrg 25517ec681f3Smrg/** Removes a source from a texture instruction */ 255201e04c3fSmrgvoid nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx); 255301e04c3fSmrg 25547e102996Smayabool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex); 25557e102996Smaya 255601e04c3fSmrgtypedef struct { 255701e04c3fSmrg nir_instr instr; 255801e04c3fSmrg 255901e04c3fSmrg nir_ssa_def def; 25607e102996Smaya 25617e102996Smaya nir_const_value value[]; 256201e04c3fSmrg} nir_load_const_instr; 256301e04c3fSmrg 256401e04c3fSmrgtypedef enum { 25657ec681f3Smrg /** Return from a function 25667ec681f3Smrg * 25677ec681f3Smrg * This instruction is a classic function return. It jumps to 25687ec681f3Smrg * nir_function_impl::end_block. No return value is provided in this 25697ec681f3Smrg * instruction. Instead, the function is expected to write any return 25707ec681f3Smrg * data to a deref passed in from the caller. 25717ec681f3Smrg */ 257201e04c3fSmrg nir_jump_return, 25737ec681f3Smrg 25747ec681f3Smrg /** Immediately exit the current shader 25757ec681f3Smrg * 25767ec681f3Smrg * This instruction is roughly the equivalent of C's "exit()" in that it 25777ec681f3Smrg * immediately terminates the current shader invocation. From a CFG 25787ec681f3Smrg * perspective, it looks like a jump to nir_function_impl::end_block but 25797ec681f3Smrg * it actually jumps to the end block of the shader entrypoint. A halt 25807ec681f3Smrg * instruction in the shader entrypoint itself is semantically identical 25817ec681f3Smrg * to a return. 25827ec681f3Smrg * 25837ec681f3Smrg * For shaders with built-in I/O, any outputs written prior to a halt 25847ec681f3Smrg * instruction remain written and any outputs not written prior to the 25857ec681f3Smrg * halt have undefined values. It does NOT cause an implicit discard of 25867ec681f3Smrg * written results. If one wants discard results in a fragment shader, 25877ec681f3Smrg * for instance, a discard or demote intrinsic is required. 25887ec681f3Smrg */ 25897ec681f3Smrg nir_jump_halt, 25907ec681f3Smrg 25917ec681f3Smrg /** Break out of the inner-most loop 25927ec681f3Smrg * 25937ec681f3Smrg * This has the same semantics as C's "break" statement. 25947ec681f3Smrg */ 259501e04c3fSmrg nir_jump_break, 25967ec681f3Smrg 25977ec681f3Smrg /** Jump back to the top of the inner-most loop 25987ec681f3Smrg * 25997ec681f3Smrg * This has the same semantics as C's "continue" statement assuming that a 26007ec681f3Smrg * NIR loop is implemented as "while (1) { body }". 26017ec681f3Smrg */ 260201e04c3fSmrg nir_jump_continue, 26037ec681f3Smrg 26047ec681f3Smrg /** Jumps for unstructured CFG. 26057ec681f3Smrg * 26067ec681f3Smrg * As within an unstructured CFG we can't rely on block ordering we need to 26077ec681f3Smrg * place explicit jumps at the end of every block. 26087ec681f3Smrg */ 26097ec681f3Smrg nir_jump_goto, 26107ec681f3Smrg nir_jump_goto_if, 261101e04c3fSmrg} nir_jump_type; 261201e04c3fSmrg 261301e04c3fSmrgtypedef struct { 261401e04c3fSmrg nir_instr instr; 261501e04c3fSmrg nir_jump_type type; 26167ec681f3Smrg nir_src condition; 26177ec681f3Smrg struct nir_block *target; 26187ec681f3Smrg struct nir_block *else_target; 261901e04c3fSmrg} nir_jump_instr; 262001e04c3fSmrg 262101e04c3fSmrg/* creates a new SSA variable in an undefined state */ 262201e04c3fSmrg 262301e04c3fSmrgtypedef struct { 262401e04c3fSmrg nir_instr instr; 262501e04c3fSmrg nir_ssa_def def; 262601e04c3fSmrg} nir_ssa_undef_instr; 262701e04c3fSmrg 262801e04c3fSmrgtypedef struct { 262901e04c3fSmrg struct exec_node node; 263001e04c3fSmrg 263101e04c3fSmrg /* The predecessor block corresponding to this source */ 263201e04c3fSmrg struct nir_block *pred; 263301e04c3fSmrg 263401e04c3fSmrg nir_src src; 263501e04c3fSmrg} nir_phi_src; 263601e04c3fSmrg 263701e04c3fSmrg#define nir_foreach_phi_src(phi_src, phi) \ 263801e04c3fSmrg foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs) 263901e04c3fSmrg#define nir_foreach_phi_src_safe(phi_src, phi) \ 264001e04c3fSmrg foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs) 264101e04c3fSmrg 264201e04c3fSmrgtypedef struct { 264301e04c3fSmrg nir_instr instr; 264401e04c3fSmrg 264501e04c3fSmrg struct exec_list srcs; /** < list of nir_phi_src */ 264601e04c3fSmrg 264701e04c3fSmrg nir_dest dest; 264801e04c3fSmrg} nir_phi_instr; 264901e04c3fSmrg 26507ec681f3Smrgstatic inline nir_phi_src * 26517ec681f3Smrgnir_phi_get_src_from_block(nir_phi_instr *phi, struct nir_block *block) 26527ec681f3Smrg{ 26537ec681f3Smrg nir_foreach_phi_src(src, phi) { 26547ec681f3Smrg if (src->pred == block) 26557ec681f3Smrg return src; 26567ec681f3Smrg } 26577ec681f3Smrg 26587ec681f3Smrg assert(!"Block is not a predecessor of phi."); 26597ec681f3Smrg return NULL; 26607ec681f3Smrg} 26617ec681f3Smrg 266201e04c3fSmrgtypedef struct { 266301e04c3fSmrg struct exec_node node; 266401e04c3fSmrg nir_src src; 266501e04c3fSmrg nir_dest dest; 266601e04c3fSmrg} nir_parallel_copy_entry; 266701e04c3fSmrg 266801e04c3fSmrg#define nir_foreach_parallel_copy_entry(entry, pcopy) \ 266901e04c3fSmrg foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries) 267001e04c3fSmrg 267101e04c3fSmrgtypedef struct { 267201e04c3fSmrg nir_instr instr; 267301e04c3fSmrg 267401e04c3fSmrg /* A list of nir_parallel_copy_entrys. The sources of all of the 267501e04c3fSmrg * entries are copied to the corresponding destinations "in parallel". 267601e04c3fSmrg * In other words, if we have two entries: a -> b and b -> a, the values 267701e04c3fSmrg * get swapped. 267801e04c3fSmrg */ 267901e04c3fSmrg struct exec_list entries; 268001e04c3fSmrg} nir_parallel_copy_instr; 268101e04c3fSmrg 268201e04c3fSmrgNIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr, 268301e04c3fSmrg type, nir_instr_type_alu) 26847e102996SmayaNIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr, 26857e102996Smaya type, nir_instr_type_deref) 268601e04c3fSmrgNIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr, 268701e04c3fSmrg type, nir_instr_type_call) 268801e04c3fSmrgNIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr, 268901e04c3fSmrg type, nir_instr_type_jump) 269001e04c3fSmrgNIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr, 269101e04c3fSmrg type, nir_instr_type_tex) 269201e04c3fSmrgNIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr, 269301e04c3fSmrg type, nir_instr_type_intrinsic) 269401e04c3fSmrgNIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr, 269501e04c3fSmrg type, nir_instr_type_load_const) 269601e04c3fSmrgNIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr, 269701e04c3fSmrg type, nir_instr_type_ssa_undef) 269801e04c3fSmrgNIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr, 269901e04c3fSmrg type, nir_instr_type_phi) 270001e04c3fSmrgNIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, 270101e04c3fSmrg nir_parallel_copy_instr, instr, 270201e04c3fSmrg type, nir_instr_type_parallel_copy) 270301e04c3fSmrg 27047ec681f3Smrg 27057ec681f3Smrg#define NIR_DEFINE_SRC_AS_CONST(type, suffix) \ 27067ec681f3Smrgstatic inline type \ 27077ec681f3Smrgnir_src_comp_as_##suffix(nir_src src, unsigned comp) \ 27087ec681f3Smrg{ \ 27097ec681f3Smrg assert(nir_src_is_const(src)); \ 27107ec681f3Smrg nir_load_const_instr *load = \ 27117ec681f3Smrg nir_instr_as_load_const(src.ssa->parent_instr); \ 27127ec681f3Smrg assert(comp < load->def.num_components); \ 27137ec681f3Smrg return nir_const_value_as_##suffix(load->value[comp], \ 27147ec681f3Smrg load->def.bit_size); \ 27157ec681f3Smrg} \ 27167ec681f3Smrg \ 27177ec681f3Smrgstatic inline type \ 27187ec681f3Smrgnir_src_as_##suffix(nir_src src) \ 27197ec681f3Smrg{ \ 27207ec681f3Smrg assert(nir_src_num_components(src) == 1); \ 27217ec681f3Smrg return nir_src_comp_as_##suffix(src, 0); \ 27227ec681f3Smrg} 27237ec681f3Smrg 27247ec681f3SmrgNIR_DEFINE_SRC_AS_CONST(int64_t, int) 27257ec681f3SmrgNIR_DEFINE_SRC_AS_CONST(uint64_t, uint) 27267ec681f3SmrgNIR_DEFINE_SRC_AS_CONST(bool, bool) 27277ec681f3SmrgNIR_DEFINE_SRC_AS_CONST(double, float) 27287ec681f3Smrg 27297ec681f3Smrg#undef NIR_DEFINE_SRC_AS_CONST 27307ec681f3Smrg 27317ec681f3Smrg 27327e102996Smayatypedef struct { 27337e102996Smaya nir_ssa_def *def; 27347e102996Smaya unsigned comp; 27357e102996Smaya} nir_ssa_scalar; 27367e102996Smaya 27377e102996Smayastatic inline bool 27387e102996Smayanir_ssa_scalar_is_const(nir_ssa_scalar s) 27397e102996Smaya{ 27407e102996Smaya return s.def->parent_instr->type == nir_instr_type_load_const; 27417e102996Smaya} 27427e102996Smaya 27437e102996Smayastatic inline nir_const_value 27447e102996Smayanir_ssa_scalar_as_const_value(nir_ssa_scalar s) 27457e102996Smaya{ 27467e102996Smaya assert(s.comp < s.def->num_components); 27477e102996Smaya nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr); 27487e102996Smaya return load->value[s.comp]; 27497e102996Smaya} 27507e102996Smaya 27517e102996Smaya#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \ 27527e102996Smayastatic inline type \ 27537e102996Smayanir_ssa_scalar_as_##suffix(nir_ssa_scalar s) \ 27547e102996Smaya{ \ 27557e102996Smaya return nir_const_value_as_##suffix( \ 27567e102996Smaya nir_ssa_scalar_as_const_value(s), s.def->bit_size); \ 27577e102996Smaya} 27587e102996Smaya 27597e102996SmayaNIR_DEFINE_SCALAR_AS_CONST(int64_t, int) 27607e102996SmayaNIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint) 27617e102996SmayaNIR_DEFINE_SCALAR_AS_CONST(bool, bool) 27627e102996SmayaNIR_DEFINE_SCALAR_AS_CONST(double, float) 27637e102996Smaya 27647e102996Smaya#undef NIR_DEFINE_SCALAR_AS_CONST 27657e102996Smaya 27667e102996Smayastatic inline bool 27677e102996Smayanir_ssa_scalar_is_alu(nir_ssa_scalar s) 27687e102996Smaya{ 27697e102996Smaya return s.def->parent_instr->type == nir_instr_type_alu; 27707e102996Smaya} 27717e102996Smaya 27727e102996Smayastatic inline nir_op 27737e102996Smayanir_ssa_scalar_alu_op(nir_ssa_scalar s) 27747e102996Smaya{ 27757e102996Smaya return nir_instr_as_alu(s.def->parent_instr)->op; 27767e102996Smaya} 27777e102996Smaya 27787e102996Smayastatic inline nir_ssa_scalar 27797e102996Smayanir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx) 27807e102996Smaya{ 27817e102996Smaya nir_ssa_scalar out = { NULL, 0 }; 27827e102996Smaya 27837e102996Smaya nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr); 27847e102996Smaya assert(alu_src_idx < nir_op_infos[alu->op].num_inputs); 27857e102996Smaya 27867e102996Smaya /* Our component must be written */ 27877e102996Smaya assert(s.comp < s.def->num_components); 27887e102996Smaya assert(alu->dest.write_mask & (1u << s.comp)); 27897e102996Smaya 27907e102996Smaya assert(alu->src[alu_src_idx].src.is_ssa); 27917e102996Smaya out.def = alu->src[alu_src_idx].src.ssa; 27927e102996Smaya 27937e102996Smaya if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) { 27947e102996Smaya /* The ALU src is unsized so the source component follows the 27957e102996Smaya * destination component. 27967e102996Smaya */ 27977e102996Smaya out.comp = alu->src[alu_src_idx].swizzle[s.comp]; 27987e102996Smaya } else { 27997e102996Smaya /* This is a sized source so all source components work together to 28007e102996Smaya * produce all the destination components. Since we need to return a 28017e102996Smaya * scalar, this only works if the source is a scalar. 28027e102996Smaya */ 28037e102996Smaya assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1); 28047e102996Smaya out.comp = alu->src[alu_src_idx].swizzle[0]; 28057e102996Smaya } 28067e102996Smaya assert(out.comp < out.def->num_components); 28077e102996Smaya 28087e102996Smaya return out; 28097e102996Smaya} 28107e102996Smaya 28117ec681f3Smrgnir_ssa_scalar nir_ssa_scalar_chase_movs(nir_ssa_scalar s); 28127ec681f3Smrg 28137ec681f3Smrg/** Returns a nir_ssa_scalar where we've followed the bit-exact mov/vec use chain to the original definition */ 28147ec681f3Smrgstatic inline nir_ssa_scalar 28157ec681f3Smrgnir_ssa_scalar_resolved(nir_ssa_def *def, unsigned channel) 28167ec681f3Smrg{ 28177ec681f3Smrg nir_ssa_scalar s = { def, channel }; 28187ec681f3Smrg return nir_ssa_scalar_chase_movs(s); 28197ec681f3Smrg} 28207ec681f3Smrg 28217ec681f3Smrg 28227ec681f3Smrgtypedef struct { 28237ec681f3Smrg bool success; 28247ec681f3Smrg 28257ec681f3Smrg nir_variable *var; 28267ec681f3Smrg unsigned desc_set; 28277ec681f3Smrg unsigned binding; 28287ec681f3Smrg unsigned num_indices; 28297ec681f3Smrg nir_src indices[4]; 28307ec681f3Smrg bool read_first_invocation; 28317ec681f3Smrg} nir_binding; 28327ec681f3Smrg 28337ec681f3Smrgnir_binding nir_chase_binding(nir_src rsrc); 28347ec681f3Smrgnir_variable *nir_get_binding_variable(struct nir_shader *shader, nir_binding binding); 28357ec681f3Smrg 28367ec681f3Smrg 283701e04c3fSmrg/* 283801e04c3fSmrg * Control flow 283901e04c3fSmrg * 284001e04c3fSmrg * Control flow consists of a tree of control flow nodes, which include 284101e04c3fSmrg * if-statements and loops. The leaves of the tree are basic blocks, lists of 284201e04c3fSmrg * instructions that always run start-to-finish. Each basic block also keeps 284301e04c3fSmrg * track of its successors (blocks which may run immediately after the current 284401e04c3fSmrg * block) and predecessors (blocks which could have run immediately before the 284501e04c3fSmrg * current block). Each function also has a start block and an end block which 284601e04c3fSmrg * all return statements point to (which is always empty). Together, all the 284701e04c3fSmrg * blocks with their predecessors and successors make up the control flow 284801e04c3fSmrg * graph (CFG) of the function. There are helpers that modify the tree of 284901e04c3fSmrg * control flow nodes while modifying the CFG appropriately; these should be 285001e04c3fSmrg * used instead of modifying the tree directly. 285101e04c3fSmrg */ 285201e04c3fSmrg 285301e04c3fSmrgtypedef enum { 285401e04c3fSmrg nir_cf_node_block, 285501e04c3fSmrg nir_cf_node_if, 285601e04c3fSmrg nir_cf_node_loop, 285701e04c3fSmrg nir_cf_node_function 285801e04c3fSmrg} nir_cf_node_type; 285901e04c3fSmrg 286001e04c3fSmrgtypedef struct nir_cf_node { 286101e04c3fSmrg struct exec_node node; 286201e04c3fSmrg nir_cf_node_type type; 286301e04c3fSmrg struct nir_cf_node *parent; 286401e04c3fSmrg} nir_cf_node; 286501e04c3fSmrg 286601e04c3fSmrgtypedef struct nir_block { 286701e04c3fSmrg nir_cf_node cf_node; 286801e04c3fSmrg 286901e04c3fSmrg struct exec_list instr_list; /** < list of nir_instr */ 287001e04c3fSmrg 287101e04c3fSmrg /** generic block index; generated by nir_index_blocks */ 287201e04c3fSmrg unsigned index; 287301e04c3fSmrg 287401e04c3fSmrg /* 287501e04c3fSmrg * Each block can only have up to 2 successors, so we put them in a simple 287601e04c3fSmrg * array - no need for anything more complicated. 287701e04c3fSmrg */ 287801e04c3fSmrg struct nir_block *successors[2]; 287901e04c3fSmrg 288001e04c3fSmrg /* Set of nir_block predecessors in the CFG */ 288101e04c3fSmrg struct set *predecessors; 288201e04c3fSmrg 288301e04c3fSmrg /* 288401e04c3fSmrg * this node's immediate dominator in the dominance tree - set to NULL for 288501e04c3fSmrg * the start block. 288601e04c3fSmrg */ 288701e04c3fSmrg struct nir_block *imm_dom; 288801e04c3fSmrg 288901e04c3fSmrg /* This node's children in the dominance tree */ 289001e04c3fSmrg unsigned num_dom_children; 289101e04c3fSmrg struct nir_block **dom_children; 289201e04c3fSmrg 289301e04c3fSmrg /* Set of nir_blocks on the dominance frontier of this block */ 289401e04c3fSmrg struct set *dom_frontier; 289501e04c3fSmrg 289601e04c3fSmrg /* 289701e04c3fSmrg * These two indices have the property that dom_{pre,post}_index for each 289801e04c3fSmrg * child of this block in the dominance tree will always be between 289901e04c3fSmrg * dom_pre_index and dom_post_index for this block, which makes testing if 290001e04c3fSmrg * a given block is dominated by another block an O(1) operation. 290101e04c3fSmrg */ 29027ec681f3Smrg uint32_t dom_pre_index, dom_post_index; 29037ec681f3Smrg 29047ec681f3Smrg /** 29057ec681f3Smrg * Value just before the first nir_instr->index in the block, but after 29067ec681f3Smrg * end_ip that of any predecessor block. 29077ec681f3Smrg */ 29087ec681f3Smrg uint32_t start_ip; 29097ec681f3Smrg /** 29107ec681f3Smrg * Value just after the last nir_instr->index in the block, but before the 29117ec681f3Smrg * start_ip of any successor block. 29127ec681f3Smrg */ 29137ec681f3Smrg uint32_t end_ip; 291401e04c3fSmrg 29157ec681f3Smrg /* SSA def live in and out for this block; used for liveness analysis. 29167ec681f3Smrg * Indexed by ssa_def->index 29177ec681f3Smrg */ 291801e04c3fSmrg BITSET_WORD *live_in; 291901e04c3fSmrg BITSET_WORD *live_out; 292001e04c3fSmrg} nir_block; 292101e04c3fSmrg 29227ec681f3Smrgstatic inline bool 29237ec681f3Smrgnir_block_is_reachable(nir_block *b) 29247ec681f3Smrg{ 29257ec681f3Smrg /* See also nir_block_dominates */ 29267ec681f3Smrg return b->dom_post_index != 0; 29277ec681f3Smrg} 29287ec681f3Smrg 292901e04c3fSmrgstatic inline nir_instr * 293001e04c3fSmrgnir_block_first_instr(nir_block *block) 293101e04c3fSmrg{ 293201e04c3fSmrg struct exec_node *head = exec_list_get_head(&block->instr_list); 293301e04c3fSmrg return exec_node_data(nir_instr, head, node); 293401e04c3fSmrg} 293501e04c3fSmrg 293601e04c3fSmrgstatic inline nir_instr * 293701e04c3fSmrgnir_block_last_instr(nir_block *block) 293801e04c3fSmrg{ 293901e04c3fSmrg struct exec_node *tail = exec_list_get_tail(&block->instr_list); 294001e04c3fSmrg return exec_node_data(nir_instr, tail, node); 294101e04c3fSmrg} 294201e04c3fSmrg 294301e04c3fSmrgstatic inline bool 294401e04c3fSmrgnir_block_ends_in_jump(nir_block *block) 294501e04c3fSmrg{ 294601e04c3fSmrg return !exec_list_is_empty(&block->instr_list) && 294701e04c3fSmrg nir_block_last_instr(block)->type == nir_instr_type_jump; 294801e04c3fSmrg} 294901e04c3fSmrg 29507ec681f3Smrgstatic inline bool 29517ec681f3Smrgnir_block_ends_in_return_or_halt(nir_block *block) 29527ec681f3Smrg{ 29537ec681f3Smrg if (exec_list_is_empty(&block->instr_list)) 29547ec681f3Smrg return false; 29557ec681f3Smrg 29567ec681f3Smrg nir_instr *instr = nir_block_last_instr(block); 29577ec681f3Smrg if (instr->type != nir_instr_type_jump) 29587ec681f3Smrg return false; 29597ec681f3Smrg 29607ec681f3Smrg nir_jump_instr *jump_instr = nir_instr_as_jump(instr); 29617ec681f3Smrg return jump_instr->type == nir_jump_return || 29627ec681f3Smrg jump_instr->type == nir_jump_halt; 29637ec681f3Smrg} 29647ec681f3Smrg 29657ec681f3Smrgstatic inline bool 29667ec681f3Smrgnir_block_ends_in_break(nir_block *block) 29677ec681f3Smrg{ 29687ec681f3Smrg if (exec_list_is_empty(&block->instr_list)) 29697ec681f3Smrg return false; 29707ec681f3Smrg 29717ec681f3Smrg nir_instr *instr = nir_block_last_instr(block); 29727ec681f3Smrg return instr->type == nir_instr_type_jump && 29737ec681f3Smrg nir_instr_as_jump(instr)->type == nir_jump_break; 29747ec681f3Smrg} 29757ec681f3Smrg 297601e04c3fSmrg#define nir_foreach_instr(instr, block) \ 297701e04c3fSmrg foreach_list_typed(nir_instr, instr, node, &(block)->instr_list) 297801e04c3fSmrg#define nir_foreach_instr_reverse(instr, block) \ 297901e04c3fSmrg foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) 298001e04c3fSmrg#define nir_foreach_instr_safe(instr, block) \ 298101e04c3fSmrg foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) 298201e04c3fSmrg#define nir_foreach_instr_reverse_safe(instr, block) \ 298301e04c3fSmrg foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list) 298401e04c3fSmrg 29857ec681f3Smrgstatic inline nir_phi_instr * 29867ec681f3Smrgnir_block_last_phi_instr(nir_block *block) 29877ec681f3Smrg{ 29887ec681f3Smrg nir_phi_instr *last_phi = NULL; 29897ec681f3Smrg nir_foreach_instr(instr, block) { 29907ec681f3Smrg if (instr->type == nir_instr_type_phi) 29917ec681f3Smrg last_phi = nir_instr_as_phi(instr); 29927ec681f3Smrg else 29937ec681f3Smrg return last_phi; 29947ec681f3Smrg } 29957ec681f3Smrg return last_phi; 29967ec681f3Smrg} 29977ec681f3Smrg 29987e102996Smayatypedef enum { 29997e102996Smaya nir_selection_control_none = 0x0, 30007e102996Smaya nir_selection_control_flatten = 0x1, 30017e102996Smaya nir_selection_control_dont_flatten = 0x2, 30027e102996Smaya} nir_selection_control; 30037e102996Smaya 300401e04c3fSmrgtypedef struct nir_if { 300501e04c3fSmrg nir_cf_node cf_node; 300601e04c3fSmrg nir_src condition; 30077e102996Smaya nir_selection_control control; 300801e04c3fSmrg 300901e04c3fSmrg struct exec_list then_list; /** < list of nir_cf_node */ 301001e04c3fSmrg struct exec_list else_list; /** < list of nir_cf_node */ 301101e04c3fSmrg} nir_if; 301201e04c3fSmrg 301301e04c3fSmrgtypedef struct { 301401e04c3fSmrg nir_if *nif; 301501e04c3fSmrg 30167e102996Smaya /** Instruction that generates nif::condition. */ 301701e04c3fSmrg nir_instr *conditional_instr; 301801e04c3fSmrg 30197e102996Smaya /** Block within ::nif that has the break instruction. */ 302001e04c3fSmrg nir_block *break_block; 30217e102996Smaya 30227e102996Smaya /** Last block for the then- or else-path that does not contain the break. */ 302301e04c3fSmrg nir_block *continue_from_block; 302401e04c3fSmrg 30257e102996Smaya /** True when ::break_block is in the else-path of ::nif. */ 302601e04c3fSmrg bool continue_from_then; 30277e102996Smaya bool induction_rhs; 30287e102996Smaya 30297e102996Smaya /* This is true if the terminators exact trip count is unknown. For 30307e102996Smaya * example: 30317e102996Smaya * 30327e102996Smaya * for (int i = 0; i < imin(x, 4); i++) 30337e102996Smaya * ... 30347e102996Smaya * 30357e102996Smaya * Here loop analysis would have set a max_trip_count of 4 however we dont 30367e102996Smaya * know for sure that this is the exact trip count. 30377e102996Smaya */ 30387e102996Smaya bool exact_trip_count_unknown; 303901e04c3fSmrg 304001e04c3fSmrg struct list_head loop_terminator_link; 304101e04c3fSmrg} nir_loop_terminator; 304201e04c3fSmrg 30437ec681f3Smrgtypedef struct { 30447ec681f3Smrg /* Induction variable. */ 30457ec681f3Smrg nir_ssa_def *def; 30467ec681f3Smrg 30477ec681f3Smrg /* Init statement with only uniform. */ 30487ec681f3Smrg nir_src *init_src; 30497ec681f3Smrg 30507ec681f3Smrg /* Update statement with only uniform. */ 30517ec681f3Smrg nir_alu_src *update_src; 30527ec681f3Smrg} nir_loop_induction_variable; 30537ec681f3Smrg 305401e04c3fSmrgtypedef struct { 30557e102996Smaya /* Estimated cost (in number of instructions) of the loop */ 30567e102996Smaya unsigned instr_cost; 30577e102996Smaya 30587e102996Smaya /* Guessed trip count based on array indexing */ 30597e102996Smaya unsigned guessed_trip_count; 306001e04c3fSmrg 30617e102996Smaya /* Maximum number of times the loop is run (if known) */ 30627e102996Smaya unsigned max_trip_count; 30637e102996Smaya 30647e102996Smaya /* Do we know the exact number of times the loop will be run */ 30657e102996Smaya bool exact_trip_count_known; 306601e04c3fSmrg 306701e04c3fSmrg /* Unroll the loop regardless of its size */ 306801e04c3fSmrg bool force_unroll; 306901e04c3fSmrg 307001e04c3fSmrg /* Does the loop contain complex loop terminators, continues or other 307101e04c3fSmrg * complex behaviours? If this is true we can't rely on 307201e04c3fSmrg * loop_terminator_list to be complete or accurate. 307301e04c3fSmrg */ 307401e04c3fSmrg bool complex_loop; 307501e04c3fSmrg 307601e04c3fSmrg nir_loop_terminator *limiting_terminator; 307701e04c3fSmrg 307801e04c3fSmrg /* A list of loop_terminators terminating this loop. */ 307901e04c3fSmrg struct list_head loop_terminator_list; 30807ec681f3Smrg 30817ec681f3Smrg /* array of induction variables for this loop */ 30827ec681f3Smrg nir_loop_induction_variable *induction_vars; 30837ec681f3Smrg unsigned num_induction_vars; 308401e04c3fSmrg} nir_loop_info; 308501e04c3fSmrg 30867e102996Smayatypedef enum { 30877e102996Smaya nir_loop_control_none = 0x0, 30887e102996Smaya nir_loop_control_unroll = 0x1, 30897e102996Smaya nir_loop_control_dont_unroll = 0x2, 30907e102996Smaya} nir_loop_control; 30917e102996Smaya 309201e04c3fSmrgtypedef struct { 309301e04c3fSmrg nir_cf_node cf_node; 309401e04c3fSmrg 309501e04c3fSmrg struct exec_list body; /** < list of nir_cf_node */ 309601e04c3fSmrg 309701e04c3fSmrg nir_loop_info *info; 30987e102996Smaya nir_loop_control control; 30997e102996Smaya bool partially_unrolled; 31007ec681f3Smrg bool divergent; 310101e04c3fSmrg} nir_loop; 310201e04c3fSmrg 310301e04c3fSmrg/** 310401e04c3fSmrg * Various bits of metadata that can may be created or required by 310501e04c3fSmrg * optimization and analysis passes 310601e04c3fSmrg */ 310701e04c3fSmrgtypedef enum { 310801e04c3fSmrg nir_metadata_none = 0x0, 31097ec681f3Smrg 31107ec681f3Smrg /** Indicates that nir_block::index values are valid. 31117ec681f3Smrg * 31127ec681f3Smrg * The start block has index 0 and they increase through a natural walk of 31137ec681f3Smrg * the CFG. nir_function_impl::num_blocks is the number of blocks and 31147ec681f3Smrg * every block index is in the range [0, nir_function_impl::num_blocks]. 31157ec681f3Smrg * 31167ec681f3Smrg * A pass can preserve this metadata type if it doesn't touch the CFG. 31177ec681f3Smrg */ 311801e04c3fSmrg nir_metadata_block_index = 0x1, 31197ec681f3Smrg 31207ec681f3Smrg /** Indicates that block dominance information is valid 31217ec681f3Smrg * 31227ec681f3Smrg * This includes: 31237ec681f3Smrg * 31247ec681f3Smrg * - nir_block::num_dom_children 31257ec681f3Smrg * - nir_block::dom_children 31267ec681f3Smrg * - nir_block::dom_frontier 31277ec681f3Smrg * - nir_block::dom_pre_index 31287ec681f3Smrg * - nir_block::dom_post_index 31297ec681f3Smrg * 31307ec681f3Smrg * A pass can preserve this metadata type if it doesn't touch the CFG. 31317ec681f3Smrg */ 313201e04c3fSmrg nir_metadata_dominance = 0x2, 31337ec681f3Smrg 31347ec681f3Smrg /** Indicates that SSA def data-flow liveness information is valid 31357ec681f3Smrg * 31367ec681f3Smrg * This includes: 31377ec681f3Smrg * 31387ec681f3Smrg * - nir_block::live_in 31397ec681f3Smrg * - nir_block::live_out 31407ec681f3Smrg * 31417ec681f3Smrg * A pass can preserve this metadata type if it never adds or removes any 31427ec681f3Smrg * SSA defs or uses of SSA defs (most passes shouldn't preserve this 31437ec681f3Smrg * metadata type). 31447ec681f3Smrg */ 314501e04c3fSmrg nir_metadata_live_ssa_defs = 0x4, 31467ec681f3Smrg 31477ec681f3Smrg /** A dummy metadata value to track when a pass forgot to call 31487ec681f3Smrg * nir_metadata_preserve. 31497ec681f3Smrg * 31507ec681f3Smrg * A pass should always clear this value even if it doesn't make any 31517ec681f3Smrg * progress to indicate that it thought about preserving metadata. 31527ec681f3Smrg */ 315301e04c3fSmrg nir_metadata_not_properly_reset = 0x8, 31547ec681f3Smrg 31557ec681f3Smrg /** Indicates that loop analysis information is valid. 31567ec681f3Smrg * 31577ec681f3Smrg * This includes everything pointed to by nir_loop::info. 31587ec681f3Smrg * 31597ec681f3Smrg * A pass can preserve this metadata type if it is guaranteed to not affect 31607ec681f3Smrg * any loop metadata. However, since loop metadata includes things like 31617ec681f3Smrg * loop counts which depend on arithmetic in the loop, this is very hard to 31627ec681f3Smrg * determine. Most passes shouldn't preserve this metadata type. 31637ec681f3Smrg */ 316401e04c3fSmrg nir_metadata_loop_analysis = 0x10, 31657ec681f3Smrg 31667ec681f3Smrg /** Indicates that nir_instr::index values are valid. 31677ec681f3Smrg * 31687ec681f3Smrg * The start instruction has index 0 and they increase through a natural 31697ec681f3Smrg * walk of instructions in blocks in the CFG. The indices my have holes 31707ec681f3Smrg * after passes such as DCE. 31717ec681f3Smrg * 31727ec681f3Smrg * A pass can preserve this metadata type if it never adds or moves any 31737ec681f3Smrg * instructions (most passes shouldn't preserve this metadata type), but 31747ec681f3Smrg * can preserve it if it only removes instructions. 31757ec681f3Smrg */ 31767ec681f3Smrg nir_metadata_instr_index = 0x20, 31777ec681f3Smrg 31787ec681f3Smrg /** All metadata 31797ec681f3Smrg * 31807ec681f3Smrg * This includes all nir_metadata flags except not_properly_reset. Passes 31817ec681f3Smrg * which do not change the shader in any way should call 31827ec681f3Smrg * 31837ec681f3Smrg * nir_metadata_preserve(impl, nir_metadata_all); 31847ec681f3Smrg */ 31857ec681f3Smrg nir_metadata_all = ~nir_metadata_not_properly_reset, 318601e04c3fSmrg} nir_metadata; 31877ec681f3SmrgMESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_metadata) 318801e04c3fSmrg 318901e04c3fSmrgtypedef struct { 319001e04c3fSmrg nir_cf_node cf_node; 319101e04c3fSmrg 319201e04c3fSmrg /** pointer to the function of which this is an implementation */ 319301e04c3fSmrg struct nir_function *function; 319401e04c3fSmrg 319501e04c3fSmrg struct exec_list body; /** < list of nir_cf_node */ 319601e04c3fSmrg 319701e04c3fSmrg nir_block *end_block; 319801e04c3fSmrg 319901e04c3fSmrg /** list for all local variables in the function */ 320001e04c3fSmrg struct exec_list locals; 320101e04c3fSmrg 320201e04c3fSmrg /** list of local registers in the function */ 320301e04c3fSmrg struct exec_list registers; 320401e04c3fSmrg 320501e04c3fSmrg /** next available local register index */ 320601e04c3fSmrg unsigned reg_alloc; 320701e04c3fSmrg 320801e04c3fSmrg /** next available SSA value index */ 320901e04c3fSmrg unsigned ssa_alloc; 321001e04c3fSmrg 321101e04c3fSmrg /* total number of basic blocks, only valid when block_index_dirty = false */ 321201e04c3fSmrg unsigned num_blocks; 321301e04c3fSmrg 32147ec681f3Smrg /** True if this nir_function_impl uses structured control-flow 32157ec681f3Smrg * 32167ec681f3Smrg * Structured nir_function_impls have different validation rules. 32177ec681f3Smrg */ 32187ec681f3Smrg bool structured; 32197ec681f3Smrg 322001e04c3fSmrg nir_metadata valid_metadata; 322101e04c3fSmrg} nir_function_impl; 322201e04c3fSmrg 32237ec681f3Smrg#define nir_foreach_function_temp_variable(var, impl) \ 32247ec681f3Smrg foreach_list_typed(nir_variable, var, node, &(impl)->locals) 32257ec681f3Smrg 32267ec681f3Smrg#define nir_foreach_function_temp_variable_safe(var, impl) \ 32277ec681f3Smrg foreach_list_typed_safe(nir_variable, var, node, &(impl)->locals) 32287ec681f3Smrg 322901e04c3fSmrgATTRIBUTE_RETURNS_NONNULL static inline nir_block * 323001e04c3fSmrgnir_start_block(nir_function_impl *impl) 323101e04c3fSmrg{ 323201e04c3fSmrg return (nir_block *) impl->body.head_sentinel.next; 323301e04c3fSmrg} 323401e04c3fSmrg 323501e04c3fSmrgATTRIBUTE_RETURNS_NONNULL static inline nir_block * 323601e04c3fSmrgnir_impl_last_block(nir_function_impl *impl) 323701e04c3fSmrg{ 323801e04c3fSmrg return (nir_block *) impl->body.tail_sentinel.prev; 323901e04c3fSmrg} 324001e04c3fSmrg 324101e04c3fSmrgstatic inline nir_cf_node * 324201e04c3fSmrgnir_cf_node_next(nir_cf_node *node) 324301e04c3fSmrg{ 324401e04c3fSmrg struct exec_node *next = exec_node_get_next(&node->node); 324501e04c3fSmrg if (exec_node_is_tail_sentinel(next)) 324601e04c3fSmrg return NULL; 324701e04c3fSmrg else 324801e04c3fSmrg return exec_node_data(nir_cf_node, next, node); 324901e04c3fSmrg} 325001e04c3fSmrg 325101e04c3fSmrgstatic inline nir_cf_node * 325201e04c3fSmrgnir_cf_node_prev(nir_cf_node *node) 325301e04c3fSmrg{ 325401e04c3fSmrg struct exec_node *prev = exec_node_get_prev(&node->node); 325501e04c3fSmrg if (exec_node_is_head_sentinel(prev)) 325601e04c3fSmrg return NULL; 325701e04c3fSmrg else 325801e04c3fSmrg return exec_node_data(nir_cf_node, prev, node); 325901e04c3fSmrg} 326001e04c3fSmrg 326101e04c3fSmrgstatic inline bool 326201e04c3fSmrgnir_cf_node_is_first(const nir_cf_node *node) 326301e04c3fSmrg{ 326401e04c3fSmrg return exec_node_is_head_sentinel(node->node.prev); 326501e04c3fSmrg} 326601e04c3fSmrg 326701e04c3fSmrgstatic inline bool 326801e04c3fSmrgnir_cf_node_is_last(const nir_cf_node *node) 326901e04c3fSmrg{ 327001e04c3fSmrg return exec_node_is_tail_sentinel(node->node.next); 327101e04c3fSmrg} 327201e04c3fSmrg 327301e04c3fSmrgNIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node, 327401e04c3fSmrg type, nir_cf_node_block) 327501e04c3fSmrgNIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node, 327601e04c3fSmrg type, nir_cf_node_if) 327701e04c3fSmrgNIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node, 327801e04c3fSmrg type, nir_cf_node_loop) 327901e04c3fSmrgNIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, 328001e04c3fSmrg nir_function_impl, cf_node, type, nir_cf_node_function) 328101e04c3fSmrg 328201e04c3fSmrgstatic inline nir_block * 328301e04c3fSmrgnir_if_first_then_block(nir_if *if_stmt) 328401e04c3fSmrg{ 328501e04c3fSmrg struct exec_node *head = exec_list_get_head(&if_stmt->then_list); 328601e04c3fSmrg return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 328701e04c3fSmrg} 328801e04c3fSmrg 328901e04c3fSmrgstatic inline nir_block * 329001e04c3fSmrgnir_if_last_then_block(nir_if *if_stmt) 329101e04c3fSmrg{ 329201e04c3fSmrg struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list); 329301e04c3fSmrg return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 329401e04c3fSmrg} 329501e04c3fSmrg 329601e04c3fSmrgstatic inline nir_block * 329701e04c3fSmrgnir_if_first_else_block(nir_if *if_stmt) 329801e04c3fSmrg{ 329901e04c3fSmrg struct exec_node *head = exec_list_get_head(&if_stmt->else_list); 330001e04c3fSmrg return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 330101e04c3fSmrg} 330201e04c3fSmrg 330301e04c3fSmrgstatic inline nir_block * 330401e04c3fSmrgnir_if_last_else_block(nir_if *if_stmt) 330501e04c3fSmrg{ 330601e04c3fSmrg struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list); 330701e04c3fSmrg return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 330801e04c3fSmrg} 330901e04c3fSmrg 331001e04c3fSmrgstatic inline nir_block * 331101e04c3fSmrgnir_loop_first_block(nir_loop *loop) 331201e04c3fSmrg{ 331301e04c3fSmrg struct exec_node *head = exec_list_get_head(&loop->body); 331401e04c3fSmrg return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 331501e04c3fSmrg} 331601e04c3fSmrg 331701e04c3fSmrgstatic inline nir_block * 331801e04c3fSmrgnir_loop_last_block(nir_loop *loop) 331901e04c3fSmrg{ 332001e04c3fSmrg struct exec_node *tail = exec_list_get_tail(&loop->body); 332101e04c3fSmrg return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 332201e04c3fSmrg} 332301e04c3fSmrg 33247e102996Smaya/** 33257e102996Smaya * Return true if this list of cf_nodes contains a single empty block. 33267e102996Smaya */ 33277e102996Smayastatic inline bool 33287e102996Smayanir_cf_list_is_empty_block(struct exec_list *cf_list) 33297e102996Smaya{ 33307e102996Smaya if (exec_list_is_singular(cf_list)) { 33317e102996Smaya struct exec_node *head = exec_list_get_head(cf_list); 33327e102996Smaya nir_block *block = 33337e102996Smaya nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 33347e102996Smaya return exec_list_is_empty(&block->instr_list); 33357e102996Smaya } 33367e102996Smaya return false; 33377e102996Smaya} 33387e102996Smaya 333901e04c3fSmrgtypedef struct { 334001e04c3fSmrg uint8_t num_components; 334101e04c3fSmrg uint8_t bit_size; 334201e04c3fSmrg} nir_parameter; 334301e04c3fSmrg 33447ec681f3Smrgtypedef struct nir_printf_info { 33457ec681f3Smrg unsigned num_args; 33467ec681f3Smrg unsigned *arg_sizes; 33477ec681f3Smrg unsigned string_size; 33487ec681f3Smrg char *strings; 33497ec681f3Smrg} nir_printf_info; 33507ec681f3Smrg 335101e04c3fSmrgtypedef struct nir_function { 335201e04c3fSmrg struct exec_node node; 335301e04c3fSmrg 335401e04c3fSmrg const char *name; 335501e04c3fSmrg struct nir_shader *shader; 335601e04c3fSmrg 335701e04c3fSmrg unsigned num_params; 335801e04c3fSmrg nir_parameter *params; 335901e04c3fSmrg 336001e04c3fSmrg /** The implementation of this function. 336101e04c3fSmrg * 336201e04c3fSmrg * If the function is only declared and not implemented, this is NULL. 336301e04c3fSmrg */ 336401e04c3fSmrg nir_function_impl *impl; 33657e102996Smaya 33667e102996Smaya bool is_entrypoint; 336701e04c3fSmrg} nir_function; 336801e04c3fSmrg 33697e102996Smayatypedef enum { 33707e102996Smaya nir_lower_imul64 = (1 << 0), 33717e102996Smaya nir_lower_isign64 = (1 << 1), 33727e102996Smaya /** Lower all int64 modulus and division opcodes */ 33737e102996Smaya nir_lower_divmod64 = (1 << 2), 33747e102996Smaya /** Lower all 64-bit umul_high and imul_high opcodes */ 33757e102996Smaya nir_lower_imul_high64 = (1 << 3), 33767e102996Smaya nir_lower_mov64 = (1 << 4), 33777e102996Smaya nir_lower_icmp64 = (1 << 5), 33787e102996Smaya nir_lower_iadd64 = (1 << 6), 33797e102996Smaya nir_lower_iabs64 = (1 << 7), 33807e102996Smaya nir_lower_ineg64 = (1 << 8), 33817e102996Smaya nir_lower_logic64 = (1 << 9), 33827e102996Smaya nir_lower_minmax64 = (1 << 10), 33837e102996Smaya nir_lower_shift64 = (1 << 11), 33847e102996Smaya nir_lower_imul_2x32_64 = (1 << 12), 33857e102996Smaya nir_lower_extract64 = (1 << 13), 33867ec681f3Smrg nir_lower_ufind_msb64 = (1 << 14), 33877ec681f3Smrg nir_lower_bit_count64 = (1 << 15), 33887ec681f3Smrg nir_lower_subgroup_shuffle64 = (1 << 16), 33897ec681f3Smrg nir_lower_scan_reduce_bitwise64 = (1 << 17), 33907ec681f3Smrg nir_lower_scan_reduce_iadd64 = (1 << 18), 33917ec681f3Smrg nir_lower_vote_ieq64 = (1 << 19), 33927e102996Smaya} nir_lower_int64_options; 33937e102996Smaya 33947e102996Smayatypedef enum { 33957e102996Smaya nir_lower_drcp = (1 << 0), 33967e102996Smaya nir_lower_dsqrt = (1 << 1), 33977e102996Smaya nir_lower_drsq = (1 << 2), 33987e102996Smaya nir_lower_dtrunc = (1 << 3), 33997e102996Smaya nir_lower_dfloor = (1 << 4), 34007e102996Smaya nir_lower_dceil = (1 << 5), 34017e102996Smaya nir_lower_dfract = (1 << 6), 34027e102996Smaya nir_lower_dround_even = (1 << 7), 34037e102996Smaya nir_lower_dmod = (1 << 8), 34047ec681f3Smrg nir_lower_dsub = (1 << 9), 34057ec681f3Smrg nir_lower_ddiv = (1 << 10), 34067ec681f3Smrg nir_lower_fp64_full_software = (1 << 11), 34077e102996Smaya} nir_lower_doubles_options; 34087e102996Smaya 34097ec681f3Smrgtypedef enum { 34107ec681f3Smrg nir_divergence_single_prim_per_subgroup = (1 << 0), 34117ec681f3Smrg nir_divergence_single_patch_per_tcs_subgroup = (1 << 1), 34127ec681f3Smrg nir_divergence_single_patch_per_tes_subgroup = (1 << 2), 34137ec681f3Smrg nir_divergence_view_index_uniform = (1 << 3), 34147ec681f3Smrg nir_divergence_single_frag_shading_rate_per_subgroup = (1 << 4), 34157ec681f3Smrg nir_divergence_multiple_workgroup_per_compute_subgroup = (1 << 5), 34167ec681f3Smrg} nir_divergence_options; 34177ec681f3Smrg 34187ec681f3Smrgtypedef enum { 34197ec681f3Smrg nir_pack_varying_interp_mode_none = (1 << 0), 34207ec681f3Smrg nir_pack_varying_interp_mode_smooth = (1 << 1), 34217ec681f3Smrg nir_pack_varying_interp_mode_flat = (1 << 2), 34227ec681f3Smrg nir_pack_varying_interp_mode_noperspective = (1 << 3), 34237ec681f3Smrg nir_pack_varying_interp_loc_sample = (1 << 16), 34247ec681f3Smrg nir_pack_varying_interp_loc_centroid = (1 << 17), 34257ec681f3Smrg nir_pack_varying_interp_loc_center = (1 << 18), 34267ec681f3Smrg} nir_pack_varying_options; 34277ec681f3Smrg 34287ec681f3Smrg/** An instruction filtering callback 34297ec681f3Smrg * 34307ec681f3Smrg * Returns true if the instruction should be processed and false otherwise. 34317ec681f3Smrg */ 34327ec681f3Smrgtypedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *); 34337ec681f3Smrg 343401e04c3fSmrgtypedef struct nir_shader_compiler_options { 343501e04c3fSmrg bool lower_fdiv; 34367ec681f3Smrg bool lower_ffma16; 34377ec681f3Smrg bool lower_ffma32; 34387ec681f3Smrg bool lower_ffma64; 34397ec681f3Smrg bool fuse_ffma16; 34407ec681f3Smrg bool fuse_ffma32; 34417ec681f3Smrg bool fuse_ffma64; 34427e102996Smaya bool lower_flrp16; 344301e04c3fSmrg bool lower_flrp32; 344401e04c3fSmrg /** Lowers flrp when it does not support doubles */ 344501e04c3fSmrg bool lower_flrp64; 344601e04c3fSmrg bool lower_fpow; 344701e04c3fSmrg bool lower_fsat; 344801e04c3fSmrg bool lower_fsqrt; 34497ec681f3Smrg bool lower_sincos; 34507ec681f3Smrg bool lower_fmod; 345101e04c3fSmrg /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */ 345201e04c3fSmrg bool lower_bitfield_extract; 34537ec681f3Smrg /** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */ 345401e04c3fSmrg bool lower_bitfield_extract_to_shifts; 345501e04c3fSmrg /** Lowers bitfield_insert to bfi/bfm */ 345601e04c3fSmrg bool lower_bitfield_insert; 34577ec681f3Smrg /** Lowers bitfield_insert to compares, and shifts. */ 345801e04c3fSmrg bool lower_bitfield_insert_to_shifts; 34597ec681f3Smrg /** Lowers bitfield_insert to bfm/bitfield_select. */ 34607ec681f3Smrg bool lower_bitfield_insert_to_bitfield_select; 346101e04c3fSmrg /** Lowers bitfield_reverse to shifts. */ 346201e04c3fSmrg bool lower_bitfield_reverse; 346301e04c3fSmrg /** Lowers bit_count to shifts. */ 346401e04c3fSmrg bool lower_bit_count; 346501e04c3fSmrg /** Lowers ifind_msb to compare and ufind_msb */ 346601e04c3fSmrg bool lower_ifind_msb; 34677ec681f3Smrg /** Lowers ifind_msb and ufind_msb to reverse variants */ 34687ec681f3Smrg bool lower_find_msb_to_reverse; 346901e04c3fSmrg /** Lowers find_lsb to ufind_msb and logic ops */ 347001e04c3fSmrg bool lower_find_lsb; 347101e04c3fSmrg bool lower_uadd_carry; 347201e04c3fSmrg bool lower_usub_borrow; 347301e04c3fSmrg /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */ 347401e04c3fSmrg bool lower_mul_high; 34757ec681f3Smrg /** lowers fneg to fmul(x, -1.0). Driver must call nir_opt_algebraic_late() */ 34767ec681f3Smrg bool lower_fneg; 34777ec681f3Smrg /** lowers ineg to isub. Driver must call nir_opt_algebraic_late(). */ 34787ec681f3Smrg bool lower_ineg; 34797ec681f3Smrg /** lowers fisnormal to alu ops. */ 34807ec681f3Smrg bool lower_fisnormal; 34817ec681f3Smrg 34827ec681f3Smrg /* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */ 348301e04c3fSmrg bool lower_scmp; 348401e04c3fSmrg 34857ec681f3Smrg /* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */ 34867ec681f3Smrg bool lower_vector_cmp; 34877ec681f3Smrg 34887ec681f3Smrg /** enable rules to avoid bit ops */ 34897ec681f3Smrg bool lower_bitops; 349001e04c3fSmrg 34917e102996Smaya /** enables rules to lower isign to imin+imax */ 34927e102996Smaya bool lower_isign; 34937e102996Smaya 34947e102996Smaya /** enables rules to lower fsign to fsub and flt */ 34957e102996Smaya bool lower_fsign; 349601e04c3fSmrg 34977ec681f3Smrg /** enables rules to lower iabs to ineg+imax */ 34987ec681f3Smrg bool lower_iabs; 34997ec681f3Smrg 35007ec681f3Smrg /** enable rules that avoid generating umax from signed integer ops */ 35017ec681f3Smrg bool lower_umax; 35027ec681f3Smrg 35037ec681f3Smrg /** enable rules that avoid generating umin from signed integer ops */ 35047ec681f3Smrg bool lower_umin; 35057ec681f3Smrg 35067ec681f3Smrg /* lower fdph to fdot4 */ 35077ec681f3Smrg bool lower_fdph; 35087ec681f3Smrg 35097ec681f3Smrg /** lower fdot to fmul and fsum/fadd. */ 35107ec681f3Smrg bool lower_fdot; 35117ec681f3Smrg 351201e04c3fSmrg /* Does the native fdot instruction replicate its result for four 351301e04c3fSmrg * components? If so, then opt_algebraic_late will turn all fdotN 35147ec681f3Smrg * instructions into fdotN_replicated instructions. 351501e04c3fSmrg */ 351601e04c3fSmrg bool fdot_replicates; 351701e04c3fSmrg 35187e102996Smaya /** lowers ffloor to fsub+ffract: */ 35197e102996Smaya bool lower_ffloor; 35207e102996Smaya 352101e04c3fSmrg /** lowers ffract to fsub+ffloor: */ 352201e04c3fSmrg bool lower_ffract; 352301e04c3fSmrg 35247e102996Smaya /** lowers fceil to fneg+ffloor+fneg: */ 35257e102996Smaya bool lower_fceil; 35267e102996Smaya 35277e102996Smaya bool lower_ftrunc; 35287e102996Smaya 352901e04c3fSmrg bool lower_ldexp; 353001e04c3fSmrg 353101e04c3fSmrg bool lower_pack_half_2x16; 353201e04c3fSmrg bool lower_pack_unorm_2x16; 353301e04c3fSmrg bool lower_pack_snorm_2x16; 353401e04c3fSmrg bool lower_pack_unorm_4x8; 353501e04c3fSmrg bool lower_pack_snorm_4x8; 35367ec681f3Smrg bool lower_pack_64_2x32; 35377ec681f3Smrg bool lower_pack_64_4x16; 35387ec681f3Smrg bool lower_pack_32_2x16; 35397ec681f3Smrg bool lower_pack_64_2x32_split; 35407ec681f3Smrg bool lower_pack_32_2x16_split; 354101e04c3fSmrg bool lower_unpack_half_2x16; 354201e04c3fSmrg bool lower_unpack_unorm_2x16; 354301e04c3fSmrg bool lower_unpack_snorm_2x16; 354401e04c3fSmrg bool lower_unpack_unorm_4x8; 354501e04c3fSmrg bool lower_unpack_snorm_4x8; 35467ec681f3Smrg bool lower_unpack_64_2x32_split; 35477ec681f3Smrg bool lower_unpack_32_2x16_split; 35487ec681f3Smrg 35497ec681f3Smrg bool lower_pack_split; 355001e04c3fSmrg 355101e04c3fSmrg bool lower_extract_byte; 355201e04c3fSmrg bool lower_extract_word; 35537ec681f3Smrg bool lower_insert_byte; 35547ec681f3Smrg bool lower_insert_word; 355501e04c3fSmrg 355601e04c3fSmrg bool lower_all_io_to_temps; 35577e102996Smaya bool lower_all_io_to_elements; 355801e04c3fSmrg 355901e04c3fSmrg /* Indicates that the driver only has zero-based vertex id */ 356001e04c3fSmrg bool vertex_id_zero_based; 356101e04c3fSmrg 356201e04c3fSmrg /** 356301e04c3fSmrg * If enabled, gl_BaseVertex will be lowered as: 356401e04c3fSmrg * is_indexed_draw (~0/0) & firstvertex 356501e04c3fSmrg */ 356601e04c3fSmrg bool lower_base_vertex; 356701e04c3fSmrg 356801e04c3fSmrg /** 356901e04c3fSmrg * If enabled, gl_HelperInvocation will be lowered as: 357001e04c3fSmrg * 357101e04c3fSmrg * !((1 << sample_id) & sample_mask_in)) 357201e04c3fSmrg * 357301e04c3fSmrg * This depends on some possibly hw implementation details, which may 357401e04c3fSmrg * not be true for all hw. In particular that the FS is only executed 357501e04c3fSmrg * for covered samples or for helper invocations. So, do not blindly 357601e04c3fSmrg * enable this option. 357701e04c3fSmrg * 357801e04c3fSmrg * Note: See also issue #22 in ARB_shader_image_load_store 357901e04c3fSmrg */ 35807ec681f3Smrg bool lower_helper_invocation; 35817ec681f3Smrg 35827ec681f3Smrg /** 35837ec681f3Smrg * Convert gl_SampleMaskIn to gl_HelperInvocation as follows: 35847ec681f3Smrg * 35857ec681f3Smrg * gl_SampleMaskIn == 0 ---> gl_HelperInvocation 35867ec681f3Smrg * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation 35877ec681f3Smrg */ 35887ec681f3Smrg bool optimize_sample_mask_in; 35897ec681f3Smrg 35907ec681f3Smrg bool lower_cs_local_index_from_id; 35917ec681f3Smrg bool lower_cs_local_id_from_index; 35927ec681f3Smrg 35937ec681f3Smrg /* Prevents lowering global_invocation_id to be in terms of workgroup_id */ 35947ec681f3Smrg bool has_cs_global_id; 35957ec681f3Smrg 35967ec681f3Smrg bool lower_device_index_to_zero; 35977ec681f3Smrg 35987ec681f3Smrg /* Set if nir_lower_pntc_ytransform() should invert gl_PointCoord. 35997ec681f3Smrg * Either when frame buffer is flipped or GL_POINT_SPRITE_COORD_ORIGIN 36007ec681f3Smrg * is GL_LOWER_LEFT. 36017ec681f3Smrg */ 36027ec681f3Smrg bool lower_wpos_pntc; 36037ec681f3Smrg 36047ec681f3Smrg /** 36057ec681f3Smrg * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be 36067ec681f3Smrg * lowered to simple arithmetic. 36077ec681f3Smrg * 36087ec681f3Smrg * If this flag is set, the lowering will be applied to all bit-sizes of 36097ec681f3Smrg * these instructions. 36107ec681f3Smrg * 36117ec681f3Smrg * \sa ::lower_hadd64 36127ec681f3Smrg */ 36137ec681f3Smrg bool lower_hadd; 36147ec681f3Smrg 36157ec681f3Smrg /** 36167ec681f3Smrg * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions 36177ec681f3Smrg * should be lowered to simple arithmetic. 36187ec681f3Smrg * 36197ec681f3Smrg * If this flag is set, the lowering will be applied to only 64-bit 36207ec681f3Smrg * versions of these instructions. 36217ec681f3Smrg * 36227ec681f3Smrg * \sa ::lower_hadd 36237ec681f3Smrg */ 36247ec681f3Smrg bool lower_hadd64; 36257ec681f3Smrg 36267ec681f3Smrg /** 36277ec681f3Smrg * Set if nir_op_uadd_sat and nir_op_usub_sat should be lowered to simple 36287ec681f3Smrg * arithmetic. 36297ec681f3Smrg * 36307ec681f3Smrg * If this flag is set, the lowering will be applied to all bit-sizes of 36317ec681f3Smrg * these instructions. 36327ec681f3Smrg * 36337ec681f3Smrg * \sa ::lower_usub_sat64 36347ec681f3Smrg */ 36357ec681f3Smrg bool lower_uadd_sat; 36367ec681f3Smrg 36377ec681f3Smrg /** 36387ec681f3Smrg * Set if only 64-bit nir_op_usub_sat should be lowered to simple 36397ec681f3Smrg * arithmetic. 36407ec681f3Smrg * 36417ec681f3Smrg * \sa ::lower_add_sat 36427ec681f3Smrg */ 36437ec681f3Smrg bool lower_usub_sat64; 36447ec681f3Smrg 36457ec681f3Smrg /** 36467ec681f3Smrg * Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple 36477ec681f3Smrg * arithmetic. 36487ec681f3Smrg * 36497ec681f3Smrg * If this flag is set, the lowering will be applied to all bit-sizes of 36507ec681f3Smrg * these instructions. 36517ec681f3Smrg */ 36527ec681f3Smrg bool lower_iadd_sat; 36537ec681f3Smrg 36547ec681f3Smrg /** 36557ec681f3Smrg * Should IO be re-vectorized? Some scalar ISAs still operate on vec4's 36567ec681f3Smrg * for IO purposes and would prefer loads/stores be vectorized. 36577ec681f3Smrg */ 36587ec681f3Smrg bool vectorize_io; 36597ec681f3Smrg bool lower_to_scalar; 36607ec681f3Smrg nir_instr_filter_cb lower_to_scalar_filter; 36617ec681f3Smrg 36627ec681f3Smrg /** 36637ec681f3Smrg * Whether nir_opt_vectorize should only create 16-bit 2D vectors. 36647ec681f3Smrg */ 36657ec681f3Smrg bool vectorize_vec2_16bit; 36667ec681f3Smrg 36677ec681f3Smrg /** 36687ec681f3Smrg * Should the linker unify inputs_read/outputs_written between adjacent 36697ec681f3Smrg * shader stages which are linked into a single program? 36707ec681f3Smrg */ 36717ec681f3Smrg bool unify_interfaces; 36727ec681f3Smrg 36737ec681f3Smrg /** 36747ec681f3Smrg * Should nir_lower_io() create load_interpolated_input intrinsics? 36757ec681f3Smrg * 36767ec681f3Smrg * If not, it generates regular load_input intrinsics and interpolation 36777ec681f3Smrg * information must be inferred from the list of input nir_variables. 36787ec681f3Smrg */ 36797ec681f3Smrg bool use_interpolated_input_intrinsics; 36807ec681f3Smrg 36817ec681f3Smrg 36827ec681f3Smrg /** 36837ec681f3Smrg * Whether nir_lower_io() will lower interpolateAt functions to 36847ec681f3Smrg * load_interpolated_input intrinsics. 36857ec681f3Smrg * 36867ec681f3Smrg * Unlike use_interpolated_input_intrinsics this will only lower these 36877ec681f3Smrg * functions and leave input load intrinsics untouched. 36887ec681f3Smrg */ 36897ec681f3Smrg bool lower_interpolate_at; 36907ec681f3Smrg 36917ec681f3Smrg /* Lowers when 32x32->64 bit multiplication is not supported */ 36927ec681f3Smrg bool lower_mul_2x32_64; 36937ec681f3Smrg 36947ec681f3Smrg /* Lowers when rotate instruction is not supported */ 36957ec681f3Smrg bool lower_rotate; 36967ec681f3Smrg 36977ec681f3Smrg /** Backend supports ternary addition */ 36987ec681f3Smrg bool has_iadd3; 36997ec681f3Smrg 37007ec681f3Smrg /** 37017ec681f3Smrg * Backend supports imul24, and would like to use it (when possible) 37027ec681f3Smrg * for address/offset calculation. If true, driver should call 37037ec681f3Smrg * nir_lower_amul(). (If not set, amul will automatically be lowered 37047ec681f3Smrg * to imul.) 37057ec681f3Smrg */ 37067ec681f3Smrg bool has_imul24; 37077ec681f3Smrg 37087ec681f3Smrg /** Backend supports umul24, if not set umul24 will automatically be lowered 37097ec681f3Smrg * to imul with masked inputs */ 37107ec681f3Smrg bool has_umul24; 37117ec681f3Smrg 37127ec681f3Smrg /** Backend supports umad24, if not set umad24 will automatically be lowered 37137ec681f3Smrg * to imul with masked inputs and iadd */ 37147ec681f3Smrg bool has_umad24; 37157ec681f3Smrg 37167ec681f3Smrg /* Backend supports fused comapre against zero and csel */ 37177ec681f3Smrg bool has_fused_comp_and_csel; 37187ec681f3Smrg 37197ec681f3Smrg /** Backend supports fsub, if not set fsub will automatically be lowered to 37207ec681f3Smrg * fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */ 37217ec681f3Smrg bool has_fsub; 37227ec681f3Smrg 37237ec681f3Smrg /** Backend supports isub, if not set isub will automatically be lowered to 37247ec681f3Smrg * iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */ 37257ec681f3Smrg bool has_isub; 37267ec681f3Smrg 37277ec681f3Smrg /** Backend supports pack_32_4x8 or pack_32_4x8_split. */ 37287ec681f3Smrg bool has_pack_32_4x8; 37297ec681f3Smrg 37307ec681f3Smrg /** Backend supports txs, if not nir_lower_tex(..) uses txs-free variants 37317ec681f3Smrg * for rect texture lowering. */ 37327ec681f3Smrg bool has_txs; 37337ec681f3Smrg 37347ec681f3Smrg /** Backend supports sdot_4x8 and udot_4x8 opcodes. */ 37357ec681f3Smrg bool has_dot_4x8; 37367ec681f3Smrg 37377ec681f3Smrg /** Backend supports sudot_4x8 opcodes. */ 37387ec681f3Smrg bool has_sudot_4x8; 37397ec681f3Smrg 37407ec681f3Smrg /** Backend supports sdot_2x16 and udot_2x16 opcodes. */ 37417ec681f3Smrg bool has_dot_2x16; 37427ec681f3Smrg 37437ec681f3Smrg /* Whether to generate only scoped_barrier intrinsics instead of the set of 37447ec681f3Smrg * memory and control barrier intrinsics based on GLSL. 37457ec681f3Smrg */ 37467ec681f3Smrg bool use_scoped_barrier; 374701e04c3fSmrg 37487e102996Smaya /** 37497ec681f3Smrg * Is this the Intel vec4 backend? 37507e102996Smaya * 37517ec681f3Smrg * Used to inhibit algebraic optimizations that are known to be harmful on 37527ec681f3Smrg * the Intel vec4 backend. This is generally applicable to any 37537ec681f3Smrg * optimization that might cause more immediate values to be used in 37547ec681f3Smrg * 3-source (e.g., ffma and flrp) instructions. 37557e102996Smaya */ 37567ec681f3Smrg bool intel_vec4; 37577e102996Smaya 37587ec681f3Smrg /** 37597ec681f3Smrg * For most Intel GPUs, all ternary operations such as FMA and BFE cannot 37607ec681f3Smrg * have immediates, so two to three instructions may eventually be needed. 37617ec681f3Smrg */ 37627ec681f3Smrg bool avoid_ternary_with_two_constants; 376301e04c3fSmrg 37647ec681f3Smrg /** Whether 8-bit ALU is supported. */ 37657ec681f3Smrg bool support_8bit_alu; 376601e04c3fSmrg 37677ec681f3Smrg /** Whether 16-bit ALU is supported. */ 37687ec681f3Smrg bool support_16bit_alu; 376901e04c3fSmrg 37707ec681f3Smrg unsigned max_unroll_iterations; 37717ec681f3Smrg unsigned max_unroll_iterations_aggressive; 37727e102996Smaya 37737ec681f3Smrg bool lower_uniforms_to_ubo; 377401e04c3fSmrg 37757ec681f3Smrg /* If the precision is ignored, backends that don't handle 37767ec681f3Smrg * different precisions when passing data between stages and use 37777ec681f3Smrg * vectorized IO can pack more varyings when linking. */ 37787ec681f3Smrg bool linker_ignore_precision; 37797e102996Smaya 37807ec681f3Smrg /** 37817ec681f3Smrg * Specifies which type of indirectly accessed variables should force 37827ec681f3Smrg * loop unrolling. 37837ec681f3Smrg */ 37847ec681f3Smrg nir_variable_mode force_indirect_unrolling; 37857e102996Smaya 37867e102996Smaya nir_lower_int64_options lower_int64_options; 37877e102996Smaya nir_lower_doubles_options lower_doubles_options; 37887ec681f3Smrg nir_divergence_options divergence_analysis_options; 37897ec681f3Smrg 37907ec681f3Smrg /** 37917ec681f3Smrg * Support pack varyings with different interpolation location 37927ec681f3Smrg * (center, centroid, sample) and mode (flat, noperspective, smooth) 37937ec681f3Smrg * into same slot. 37947ec681f3Smrg */ 37957ec681f3Smrg nir_pack_varying_options pack_varying_options; 379601e04c3fSmrg} nir_shader_compiler_options; 379701e04c3fSmrg 379801e04c3fSmrgtypedef struct nir_shader { 379901e04c3fSmrg /** list of uniforms (nir_variable) */ 38007ec681f3Smrg struct exec_list variables; 380101e04c3fSmrg 380201e04c3fSmrg /** Set of driver-specific options for the shader. 380301e04c3fSmrg * 380401e04c3fSmrg * The memory for the options is expected to be kept in a single static 380501e04c3fSmrg * copy by the driver. 380601e04c3fSmrg */ 380701e04c3fSmrg const struct nir_shader_compiler_options *options; 380801e04c3fSmrg 380901e04c3fSmrg /** Various bits of compile-time information about a given shader */ 381001e04c3fSmrg struct shader_info info; 381101e04c3fSmrg 381201e04c3fSmrg struct exec_list functions; /** < list of nir_function */ 381301e04c3fSmrg 38147ec681f3Smrg struct list_head gc_list; /** < list of all nir_instrs allocated on the shader but not yet freed. */ 38157ec681f3Smrg 381601e04c3fSmrg /** 38177ec681f3Smrg * The size of the variable space for load_input_*, load_uniform_*, etc. 38187ec681f3Smrg * intrinsics. This is in back-end specific units which is likely one of 38197ec681f3Smrg * bytes, dwords, or vec4s depending on context and back-end. 382001e04c3fSmrg */ 38217ec681f3Smrg unsigned num_inputs, num_uniforms, num_outputs; 382201e04c3fSmrg 38237e102996Smaya /** Size in bytes of required scratch space */ 38247e102996Smaya unsigned scratch_size; 38257e102996Smaya 382601e04c3fSmrg /** Constant data associated with this shader. 382701e04c3fSmrg * 38287ec681f3Smrg * Constant data is loaded through load_constant intrinsics (as compared to 38297ec681f3Smrg * the NIR load_const instructions which have the constant value inlined 38307ec681f3Smrg * into them). This is usually generated by nir_opt_large_constants (so 38317ec681f3Smrg * shaders don't have to load_const into a temporary array when they want 38327ec681f3Smrg * to indirect on a const array). 383301e04c3fSmrg */ 383401e04c3fSmrg void *constant_data; 38357ec681f3Smrg /** Size of the constant data associated with the shader, in bytes */ 383601e04c3fSmrg unsigned constant_data_size; 38377ec681f3Smrg 38387ec681f3Smrg unsigned printf_info_count; 38397ec681f3Smrg nir_printf_info *printf_info; 384001e04c3fSmrg} nir_shader; 384101e04c3fSmrg 38427e102996Smaya#define nir_foreach_function(func, shader) \ 38437e102996Smaya foreach_list_typed(nir_function, func, node, &(shader)->functions) 38447e102996Smaya 384501e04c3fSmrgstatic inline nir_function_impl * 384601e04c3fSmrgnir_shader_get_entrypoint(nir_shader *shader) 384701e04c3fSmrg{ 38487e102996Smaya nir_function *func = NULL; 38497e102996Smaya 38507e102996Smaya nir_foreach_function(function, shader) { 38517e102996Smaya assert(func == NULL); 38527e102996Smaya if (function->is_entrypoint) { 38537e102996Smaya func = function; 38547e102996Smaya#ifndef NDEBUG 38557e102996Smaya break; 38567e102996Smaya#endif 38577e102996Smaya } 38587e102996Smaya } 38597e102996Smaya 38607e102996Smaya if (!func) 38617e102996Smaya return NULL; 38627e102996Smaya 386301e04c3fSmrg assert(func->num_params == 0); 386401e04c3fSmrg assert(func->impl); 386501e04c3fSmrg return func->impl; 386601e04c3fSmrg} 386701e04c3fSmrg 38687ec681f3Smrgtypedef struct nir_liveness_bounds { 38697ec681f3Smrg uint32_t start; 38707ec681f3Smrg uint32_t end; 38717ec681f3Smrg} nir_liveness_bounds; 38727ec681f3Smrg 38737ec681f3Smrgtypedef struct nir_instr_liveness { 38747ec681f3Smrg /** 38757ec681f3Smrg * nir_instr->index for the start and end of a single live interval for SSA 38767ec681f3Smrg * defs. ssa values last used by a nir_if condition will have an interval 38777ec681f3Smrg * ending at the first instruction after the last one before the if 38787ec681f3Smrg * condition. 38797ec681f3Smrg * 38807ec681f3Smrg * Indexed by def->index (impl->ssa_alloc elements). 38817ec681f3Smrg */ 38827ec681f3Smrg struct nir_liveness_bounds *defs; 38837ec681f3Smrg} nir_instr_liveness; 38847ec681f3Smrg 38857ec681f3Smrgnir_instr_liveness * 38867ec681f3Smrgnir_live_ssa_defs_per_instr(nir_function_impl *impl); 38877ec681f3Smrg 388801e04c3fSmrgnir_shader *nir_shader_create(void *mem_ctx, 388901e04c3fSmrg gl_shader_stage stage, 389001e04c3fSmrg const nir_shader_compiler_options *options, 389101e04c3fSmrg shader_info *si); 389201e04c3fSmrg 389301e04c3fSmrgnir_register *nir_local_reg_create(nir_function_impl *impl); 389401e04c3fSmrg 389501e04c3fSmrgvoid nir_reg_remove(nir_register *reg); 389601e04c3fSmrg 389701e04c3fSmrg/** Adds a variable to the appropriate list in nir_shader */ 389801e04c3fSmrgvoid nir_shader_add_variable(nir_shader *shader, nir_variable *var); 389901e04c3fSmrg 390001e04c3fSmrgstatic inline void 390101e04c3fSmrgnir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var) 390201e04c3fSmrg{ 39037e102996Smaya assert(var->data.mode == nir_var_function_temp); 390401e04c3fSmrg exec_list_push_tail(&impl->locals, &var->node); 390501e04c3fSmrg} 390601e04c3fSmrg 390701e04c3fSmrg/** creates a variable, sets a few defaults, and adds it to the list */ 390801e04c3fSmrgnir_variable *nir_variable_create(nir_shader *shader, 390901e04c3fSmrg nir_variable_mode mode, 391001e04c3fSmrg const struct glsl_type *type, 391101e04c3fSmrg const char *name); 391201e04c3fSmrg/** creates a local variable and adds it to the list */ 391301e04c3fSmrgnir_variable *nir_local_variable_create(nir_function_impl *impl, 391401e04c3fSmrg const struct glsl_type *type, 391501e04c3fSmrg const char *name); 391601e04c3fSmrg 39177ec681f3Smrgnir_variable *nir_find_variable_with_location(nir_shader *shader, 39187ec681f3Smrg nir_variable_mode mode, 39197ec681f3Smrg unsigned location); 39207ec681f3Smrg 39217ec681f3Smrgnir_variable *nir_find_variable_with_driver_location(nir_shader *shader, 39227ec681f3Smrg nir_variable_mode mode, 39237ec681f3Smrg unsigned location); 39247ec681f3Smrg 39257ec681f3Smrgvoid nir_sort_variables_with_modes(nir_shader *shader, 39267ec681f3Smrg int (*compar)(const nir_variable *, 39277ec681f3Smrg const nir_variable *), 39287ec681f3Smrg nir_variable_mode modes); 39297ec681f3Smrg 393001e04c3fSmrg/** creates a function and adds it to the shader's list of functions */ 393101e04c3fSmrgnir_function *nir_function_create(nir_shader *shader, const char *name); 393201e04c3fSmrg 393301e04c3fSmrgnir_function_impl *nir_function_impl_create(nir_function *func); 393401e04c3fSmrg/** creates a function_impl that isn't tied to any particular function */ 393501e04c3fSmrgnir_function_impl *nir_function_impl_create_bare(nir_shader *shader); 393601e04c3fSmrg 393701e04c3fSmrgnir_block *nir_block_create(nir_shader *shader); 393801e04c3fSmrgnir_if *nir_if_create(nir_shader *shader); 393901e04c3fSmrgnir_loop *nir_loop_create(nir_shader *shader); 394001e04c3fSmrg 394101e04c3fSmrgnir_function_impl *nir_cf_node_get_function(nir_cf_node *node); 394201e04c3fSmrg 394301e04c3fSmrg/** requests that the given pieces of metadata be generated */ 394401e04c3fSmrgvoid nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...); 394501e04c3fSmrg/** dirties all but the preserved metadata */ 394601e04c3fSmrgvoid nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); 39477ec681f3Smrg/** Preserves all metadata for the given shader */ 39487ec681f3Smrgvoid nir_shader_preserve_all_metadata(nir_shader *shader); 394901e04c3fSmrg 395001e04c3fSmrg/** creates an instruction with default swizzle/writemask/etc. with NULL registers */ 395101e04c3fSmrgnir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); 395201e04c3fSmrg 395301e04c3fSmrgnir_deref_instr *nir_deref_instr_create(nir_shader *shader, 395401e04c3fSmrg nir_deref_type deref_type); 395501e04c3fSmrg 395601e04c3fSmrgnir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); 395701e04c3fSmrg 395801e04c3fSmrgnir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, 395901e04c3fSmrg unsigned num_components, 396001e04c3fSmrg unsigned bit_size); 396101e04c3fSmrg 396201e04c3fSmrgnir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, 396301e04c3fSmrg nir_intrinsic_op op); 396401e04c3fSmrg 396501e04c3fSmrgnir_call_instr *nir_call_instr_create(nir_shader *shader, 396601e04c3fSmrg nir_function *callee); 396701e04c3fSmrg 39687ec681f3Smrg/** Creates a NIR texture instruction */ 396901e04c3fSmrgnir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); 397001e04c3fSmrg 397101e04c3fSmrgnir_phi_instr *nir_phi_instr_create(nir_shader *shader); 39727ec681f3Smrgnir_phi_src *nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src); 397301e04c3fSmrg 397401e04c3fSmrgnir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); 397501e04c3fSmrg 397601e04c3fSmrgnir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader, 397701e04c3fSmrg unsigned num_components, 397801e04c3fSmrg unsigned bit_size); 397901e04c3fSmrg 398001e04c3fSmrgnir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size); 398101e04c3fSmrg 398201e04c3fSmrg/** 398301e04c3fSmrg * NIR Cursors and Instruction Insertion API 398401e04c3fSmrg * @{ 398501e04c3fSmrg * 398601e04c3fSmrg * A tiny struct representing a point to insert/extract instructions or 398701e04c3fSmrg * control flow nodes. Helps reduce the combinatorial explosion of possible 398801e04c3fSmrg * points to insert/extract. 398901e04c3fSmrg * 399001e04c3fSmrg * \sa nir_control_flow.h 399101e04c3fSmrg */ 399201e04c3fSmrgtypedef enum { 399301e04c3fSmrg nir_cursor_before_block, 399401e04c3fSmrg nir_cursor_after_block, 399501e04c3fSmrg nir_cursor_before_instr, 399601e04c3fSmrg nir_cursor_after_instr, 399701e04c3fSmrg} nir_cursor_option; 399801e04c3fSmrg 399901e04c3fSmrgtypedef struct { 400001e04c3fSmrg nir_cursor_option option; 400101e04c3fSmrg union { 400201e04c3fSmrg nir_block *block; 400301e04c3fSmrg nir_instr *instr; 400401e04c3fSmrg }; 400501e04c3fSmrg} nir_cursor; 400601e04c3fSmrg 400701e04c3fSmrgstatic inline nir_block * 400801e04c3fSmrgnir_cursor_current_block(nir_cursor cursor) 400901e04c3fSmrg{ 401001e04c3fSmrg if (cursor.option == nir_cursor_before_instr || 401101e04c3fSmrg cursor.option == nir_cursor_after_instr) { 401201e04c3fSmrg return cursor.instr->block; 401301e04c3fSmrg } else { 401401e04c3fSmrg return cursor.block; 401501e04c3fSmrg } 401601e04c3fSmrg} 401701e04c3fSmrg 401801e04c3fSmrgbool nir_cursors_equal(nir_cursor a, nir_cursor b); 401901e04c3fSmrg 402001e04c3fSmrgstatic inline nir_cursor 402101e04c3fSmrgnir_before_block(nir_block *block) 402201e04c3fSmrg{ 402301e04c3fSmrg nir_cursor cursor; 402401e04c3fSmrg cursor.option = nir_cursor_before_block; 402501e04c3fSmrg cursor.block = block; 402601e04c3fSmrg return cursor; 402701e04c3fSmrg} 402801e04c3fSmrg 402901e04c3fSmrgstatic inline nir_cursor 403001e04c3fSmrgnir_after_block(nir_block *block) 403101e04c3fSmrg{ 403201e04c3fSmrg nir_cursor cursor; 403301e04c3fSmrg cursor.option = nir_cursor_after_block; 403401e04c3fSmrg cursor.block = block; 403501e04c3fSmrg return cursor; 403601e04c3fSmrg} 403701e04c3fSmrg 403801e04c3fSmrgstatic inline nir_cursor 403901e04c3fSmrgnir_before_instr(nir_instr *instr) 404001e04c3fSmrg{ 404101e04c3fSmrg nir_cursor cursor; 404201e04c3fSmrg cursor.option = nir_cursor_before_instr; 404301e04c3fSmrg cursor.instr = instr; 404401e04c3fSmrg return cursor; 404501e04c3fSmrg} 404601e04c3fSmrg 404701e04c3fSmrgstatic inline nir_cursor 404801e04c3fSmrgnir_after_instr(nir_instr *instr) 404901e04c3fSmrg{ 405001e04c3fSmrg nir_cursor cursor; 405101e04c3fSmrg cursor.option = nir_cursor_after_instr; 405201e04c3fSmrg cursor.instr = instr; 405301e04c3fSmrg return cursor; 405401e04c3fSmrg} 405501e04c3fSmrg 40567ec681f3Smrgstatic inline nir_cursor 40577ec681f3Smrgnir_before_block_after_phis(nir_block *block) 40587ec681f3Smrg{ 40597ec681f3Smrg nir_phi_instr *last_phi = nir_block_last_phi_instr(block); 40607ec681f3Smrg if (last_phi) 40617ec681f3Smrg return nir_after_instr(&last_phi->instr); 40627ec681f3Smrg else 40637ec681f3Smrg return nir_before_block(block); 40647ec681f3Smrg} 40657ec681f3Smrg 406601e04c3fSmrgstatic inline nir_cursor 406701e04c3fSmrgnir_after_block_before_jump(nir_block *block) 406801e04c3fSmrg{ 406901e04c3fSmrg nir_instr *last_instr = nir_block_last_instr(block); 407001e04c3fSmrg if (last_instr && last_instr->type == nir_instr_type_jump) { 407101e04c3fSmrg return nir_before_instr(last_instr); 407201e04c3fSmrg } else { 407301e04c3fSmrg return nir_after_block(block); 407401e04c3fSmrg } 407501e04c3fSmrg} 407601e04c3fSmrg 407701e04c3fSmrgstatic inline nir_cursor 407801e04c3fSmrgnir_before_src(nir_src *src, bool is_if_condition) 407901e04c3fSmrg{ 408001e04c3fSmrg if (is_if_condition) { 408101e04c3fSmrg nir_block *prev_block = 408201e04c3fSmrg nir_cf_node_as_block(nir_cf_node_prev(&src->parent_if->cf_node)); 408301e04c3fSmrg assert(!nir_block_ends_in_jump(prev_block)); 408401e04c3fSmrg return nir_after_block(prev_block); 408501e04c3fSmrg } else if (src->parent_instr->type == nir_instr_type_phi) { 408601e04c3fSmrg#ifndef NDEBUG 408701e04c3fSmrg nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr); 408801e04c3fSmrg bool found = false; 408901e04c3fSmrg nir_foreach_phi_src(phi_src, cond_phi) { 409001e04c3fSmrg if (phi_src->src.ssa == src->ssa) { 409101e04c3fSmrg found = true; 409201e04c3fSmrg break; 409301e04c3fSmrg } 409401e04c3fSmrg } 409501e04c3fSmrg assert(found); 409601e04c3fSmrg#endif 409701e04c3fSmrg /* The LIST_ENTRY macro is a generic container-of macro, it just happens 409801e04c3fSmrg * to have a more specific name. 409901e04c3fSmrg */ 410001e04c3fSmrg nir_phi_src *phi_src = LIST_ENTRY(nir_phi_src, src, src); 410101e04c3fSmrg return nir_after_block_before_jump(phi_src->pred); 410201e04c3fSmrg } else { 410301e04c3fSmrg return nir_before_instr(src->parent_instr); 410401e04c3fSmrg } 410501e04c3fSmrg} 410601e04c3fSmrg 410701e04c3fSmrgstatic inline nir_cursor 410801e04c3fSmrgnir_before_cf_node(nir_cf_node *node) 410901e04c3fSmrg{ 411001e04c3fSmrg if (node->type == nir_cf_node_block) 411101e04c3fSmrg return nir_before_block(nir_cf_node_as_block(node)); 411201e04c3fSmrg 411301e04c3fSmrg return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); 411401e04c3fSmrg} 411501e04c3fSmrg 411601e04c3fSmrgstatic inline nir_cursor 411701e04c3fSmrgnir_after_cf_node(nir_cf_node *node) 411801e04c3fSmrg{ 411901e04c3fSmrg if (node->type == nir_cf_node_block) 412001e04c3fSmrg return nir_after_block(nir_cf_node_as_block(node)); 412101e04c3fSmrg 412201e04c3fSmrg return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); 412301e04c3fSmrg} 412401e04c3fSmrg 412501e04c3fSmrgstatic inline nir_cursor 412601e04c3fSmrgnir_after_phis(nir_block *block) 412701e04c3fSmrg{ 412801e04c3fSmrg nir_foreach_instr(instr, block) { 412901e04c3fSmrg if (instr->type != nir_instr_type_phi) 413001e04c3fSmrg return nir_before_instr(instr); 413101e04c3fSmrg } 413201e04c3fSmrg return nir_after_block(block); 413301e04c3fSmrg} 413401e04c3fSmrg 41357ec681f3Smrgstatic inline nir_cursor 41367ec681f3Smrgnir_after_instr_and_phis(nir_instr *instr) 41377ec681f3Smrg{ 41387ec681f3Smrg if (instr->type == nir_instr_type_phi) 41397ec681f3Smrg return nir_after_phis(instr->block); 41407ec681f3Smrg else 41417ec681f3Smrg return nir_after_instr(instr); 41427ec681f3Smrg} 41437ec681f3Smrg 414401e04c3fSmrgstatic inline nir_cursor 414501e04c3fSmrgnir_after_cf_node_and_phis(nir_cf_node *node) 414601e04c3fSmrg{ 414701e04c3fSmrg if (node->type == nir_cf_node_block) 414801e04c3fSmrg return nir_after_block(nir_cf_node_as_block(node)); 414901e04c3fSmrg 415001e04c3fSmrg nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node)); 415101e04c3fSmrg 415201e04c3fSmrg return nir_after_phis(block); 415301e04c3fSmrg} 415401e04c3fSmrg 415501e04c3fSmrgstatic inline nir_cursor 415601e04c3fSmrgnir_before_cf_list(struct exec_list *cf_list) 415701e04c3fSmrg{ 415801e04c3fSmrg nir_cf_node *first_node = exec_node_data(nir_cf_node, 415901e04c3fSmrg exec_list_get_head(cf_list), node); 416001e04c3fSmrg return nir_before_cf_node(first_node); 416101e04c3fSmrg} 416201e04c3fSmrg 416301e04c3fSmrgstatic inline nir_cursor 416401e04c3fSmrgnir_after_cf_list(struct exec_list *cf_list) 416501e04c3fSmrg{ 416601e04c3fSmrg nir_cf_node *last_node = exec_node_data(nir_cf_node, 416701e04c3fSmrg exec_list_get_tail(cf_list), node); 416801e04c3fSmrg return nir_after_cf_node(last_node); 416901e04c3fSmrg} 417001e04c3fSmrg 417101e04c3fSmrg/** 417201e04c3fSmrg * Insert a NIR instruction at the given cursor. 417301e04c3fSmrg * 417401e04c3fSmrg * Note: This does not update the cursor. 417501e04c3fSmrg */ 417601e04c3fSmrgvoid nir_instr_insert(nir_cursor cursor, nir_instr *instr); 417701e04c3fSmrg 41787ec681f3Smrgbool nir_instr_move(nir_cursor cursor, nir_instr *instr); 41797ec681f3Smrg 418001e04c3fSmrgstatic inline void 418101e04c3fSmrgnir_instr_insert_before(nir_instr *instr, nir_instr *before) 418201e04c3fSmrg{ 418301e04c3fSmrg nir_instr_insert(nir_before_instr(instr), before); 418401e04c3fSmrg} 418501e04c3fSmrg 418601e04c3fSmrgstatic inline void 418701e04c3fSmrgnir_instr_insert_after(nir_instr *instr, nir_instr *after) 418801e04c3fSmrg{ 418901e04c3fSmrg nir_instr_insert(nir_after_instr(instr), after); 419001e04c3fSmrg} 419101e04c3fSmrg 419201e04c3fSmrgstatic inline void 419301e04c3fSmrgnir_instr_insert_before_block(nir_block *block, nir_instr *before) 419401e04c3fSmrg{ 419501e04c3fSmrg nir_instr_insert(nir_before_block(block), before); 419601e04c3fSmrg} 419701e04c3fSmrg 419801e04c3fSmrgstatic inline void 419901e04c3fSmrgnir_instr_insert_after_block(nir_block *block, nir_instr *after) 420001e04c3fSmrg{ 420101e04c3fSmrg nir_instr_insert(nir_after_block(block), after); 420201e04c3fSmrg} 420301e04c3fSmrg 420401e04c3fSmrgstatic inline void 420501e04c3fSmrgnir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) 420601e04c3fSmrg{ 420701e04c3fSmrg nir_instr_insert(nir_before_cf_node(node), before); 420801e04c3fSmrg} 420901e04c3fSmrg 421001e04c3fSmrgstatic inline void 421101e04c3fSmrgnir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) 421201e04c3fSmrg{ 421301e04c3fSmrg nir_instr_insert(nir_after_cf_node(node), after); 421401e04c3fSmrg} 421501e04c3fSmrg 421601e04c3fSmrgstatic inline void 421701e04c3fSmrgnir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) 421801e04c3fSmrg{ 421901e04c3fSmrg nir_instr_insert(nir_before_cf_list(list), before); 422001e04c3fSmrg} 422101e04c3fSmrg 422201e04c3fSmrgstatic inline void 422301e04c3fSmrgnir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) 422401e04c3fSmrg{ 422501e04c3fSmrg nir_instr_insert(nir_after_cf_list(list), after); 422601e04c3fSmrg} 422701e04c3fSmrg 422801e04c3fSmrgvoid nir_instr_remove_v(nir_instr *instr); 42297ec681f3Smrgvoid nir_instr_free(nir_instr *instr); 42307ec681f3Smrgvoid nir_instr_free_list(struct exec_list *list); 423101e04c3fSmrg 423201e04c3fSmrgstatic inline nir_cursor 423301e04c3fSmrgnir_instr_remove(nir_instr *instr) 423401e04c3fSmrg{ 423501e04c3fSmrg nir_cursor cursor; 423601e04c3fSmrg nir_instr *prev = nir_instr_prev(instr); 423701e04c3fSmrg if (prev) { 423801e04c3fSmrg cursor = nir_after_instr(prev); 423901e04c3fSmrg } else { 424001e04c3fSmrg cursor = nir_before_block(instr->block); 424101e04c3fSmrg } 424201e04c3fSmrg nir_instr_remove_v(instr); 424301e04c3fSmrg return cursor; 424401e04c3fSmrg} 424501e04c3fSmrg 42467ec681f3Smrgnir_cursor nir_instr_free_and_dce(nir_instr *instr); 42477ec681f3Smrg 424801e04c3fSmrg/** @} */ 424901e04c3fSmrg 42507ec681f3Smrgnir_ssa_def *nir_instr_ssa_def(nir_instr *instr); 42517ec681f3Smrg 425201e04c3fSmrgtypedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state); 425301e04c3fSmrgtypedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state); 425401e04c3fSmrgtypedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); 425501e04c3fSmrgbool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, 425601e04c3fSmrg void *state); 42577ec681f3Smrgstatic inline bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); 42587ec681f3Smrgstatic inline bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); 42597ec681f3Smrgbool nir_foreach_phi_src_leaving_block(nir_block *instr, 42607ec681f3Smrg nir_foreach_src_cb cb, 42617ec681f3Smrg void *state); 426201e04c3fSmrg 426301e04c3fSmrgnir_const_value *nir_src_as_const_value(nir_src src); 426401e04c3fSmrg 426501e04c3fSmrg#define NIR_SRC_AS_(name, c_type, type_enum, cast_macro) \ 426601e04c3fSmrgstatic inline c_type * \ 42677e102996Smayanir_src_as_ ## name (nir_src src) \ 426801e04c3fSmrg{ \ 42697e102996Smaya return src.is_ssa && src.ssa->parent_instr->type == type_enum \ 42707e102996Smaya ? cast_macro(src.ssa->parent_instr) : NULL; \ 427101e04c3fSmrg} 427201e04c3fSmrg 427301e04c3fSmrgNIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu) 42747e102996SmayaNIR_SRC_AS_(intrinsic, nir_intrinsic_instr, 42757e102996Smaya nir_instr_type_intrinsic, nir_instr_as_intrinsic) 42767e102996SmayaNIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref) 427701e04c3fSmrg 427801e04c3fSmrgbool nir_src_is_dynamically_uniform(nir_src src); 427901e04c3fSmrgbool nir_srcs_equal(nir_src src1, nir_src src2); 42807e102996Smayabool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2); 42817ec681f3Smrg 42827ec681f3Smrgstatic inline void 42837ec681f3Smrgnir_instr_rewrite_src_ssa(ASSERTED nir_instr *instr, 42847ec681f3Smrg nir_src *src, nir_ssa_def *new_ssa) 42857ec681f3Smrg{ 42867ec681f3Smrg assert(src->parent_instr == instr); 42877ec681f3Smrg assert(src->is_ssa && src->ssa); 42887ec681f3Smrg list_del(&src->use_link); 42897ec681f3Smrg src->ssa = new_ssa; 42907ec681f3Smrg list_addtail(&src->use_link, &new_ssa->uses); 42917ec681f3Smrg} 42927ec681f3Smrg 429301e04c3fSmrgvoid nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); 429401e04c3fSmrgvoid nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); 42957ec681f3Smrg 42967ec681f3Smrgstatic inline void 42977ec681f3Smrgnir_if_rewrite_condition_ssa(ASSERTED nir_if *if_stmt, 42987ec681f3Smrg nir_src *src, nir_ssa_def *new_ssa) 42997ec681f3Smrg{ 43007ec681f3Smrg assert(src->parent_if == if_stmt); 43017ec681f3Smrg assert(src->is_ssa && src->ssa); 43027ec681f3Smrg list_del(&src->use_link); 43037ec681f3Smrg src->ssa = new_ssa; 43047ec681f3Smrg list_addtail(&src->use_link, &new_ssa->if_uses); 43057ec681f3Smrg} 43067ec681f3Smrg 430701e04c3fSmrgvoid nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); 430801e04c3fSmrgvoid nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, 430901e04c3fSmrg nir_dest new_dest); 431001e04c3fSmrg 431101e04c3fSmrgvoid nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, 431201e04c3fSmrg unsigned num_components, unsigned bit_size, 431301e04c3fSmrg const char *name); 431401e04c3fSmrgvoid nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, 43157ec681f3Smrg unsigned num_components, unsigned bit_size); 431601e04c3fSmrgstatic inline void 431701e04c3fSmrgnir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest, 431801e04c3fSmrg const struct glsl_type *type, 431901e04c3fSmrg const char *name) 432001e04c3fSmrg{ 432101e04c3fSmrg assert(glsl_type_is_vector_or_scalar(type)); 432201e04c3fSmrg nir_ssa_dest_init(instr, dest, glsl_get_components(type), 432301e04c3fSmrg glsl_get_bit_size(type), name); 432401e04c3fSmrg} 43257ec681f3Smrgvoid nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa); 43267ec681f3Smrgvoid nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src); 43277ec681f3Smrgvoid nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa, 432801e04c3fSmrg nir_instr *after_me); 432901e04c3fSmrg 43307ec681f3Smrgnir_component_mask_t nir_src_components_read(const nir_src *src); 433101e04c3fSmrgnir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def); 433201e04c3fSmrg 43337ec681f3Smrgstatic inline bool 43347ec681f3Smrgnir_ssa_def_is_unused(nir_ssa_def *ssa) 43357ec681f3Smrg{ 43367ec681f3Smrg return list_is_empty(&ssa->uses) && list_is_empty(&ssa->if_uses); 43377ec681f3Smrg} 43387ec681f3Smrg 43397ec681f3Smrg 43407ec681f3Smrg/** Returns the next block, disregarding structure 43417ec681f3Smrg * 43427ec681f3Smrg * The ordering is deterministic but has no guarantees beyond that. In 43437ec681f3Smrg * particular, it is not guaranteed to be dominance-preserving. 43447ec681f3Smrg */ 43457ec681f3Smrgnir_block *nir_block_unstructured_next(nir_block *block); 43467ec681f3Smrgnir_block *nir_unstructured_start_block(nir_function_impl *impl); 43477ec681f3Smrg 43487ec681f3Smrg#define nir_foreach_block_unstructured(block, impl) \ 43497ec681f3Smrg for (nir_block *block = nir_unstructured_start_block(impl); block != NULL; \ 43507ec681f3Smrg block = nir_block_unstructured_next(block)) 43517ec681f3Smrg 43527ec681f3Smrg#define nir_foreach_block_unstructured_safe(block, impl) \ 43537ec681f3Smrg for (nir_block *block = nir_unstructured_start_block(impl), \ 43547ec681f3Smrg *next = nir_block_unstructured_next(block); \ 43557ec681f3Smrg block != NULL; \ 43567ec681f3Smrg block = next, next = nir_block_unstructured_next(block)) 43577ec681f3Smrg 435801e04c3fSmrg/* 435901e04c3fSmrg * finds the next basic block in source-code order, returns NULL if there is 436001e04c3fSmrg * none 436101e04c3fSmrg */ 436201e04c3fSmrg 436301e04c3fSmrgnir_block *nir_block_cf_tree_next(nir_block *block); 436401e04c3fSmrg 436501e04c3fSmrg/* Performs the opposite of nir_block_cf_tree_next() */ 436601e04c3fSmrg 436701e04c3fSmrgnir_block *nir_block_cf_tree_prev(nir_block *block); 436801e04c3fSmrg 436901e04c3fSmrg/* Gets the first block in a CF node in source-code order */ 437001e04c3fSmrg 437101e04c3fSmrgnir_block *nir_cf_node_cf_tree_first(nir_cf_node *node); 437201e04c3fSmrg 437301e04c3fSmrg/* Gets the last block in a CF node in source-code order */ 437401e04c3fSmrg 437501e04c3fSmrgnir_block *nir_cf_node_cf_tree_last(nir_cf_node *node); 437601e04c3fSmrg 437701e04c3fSmrg/* Gets the next block after a CF node in source-code order */ 437801e04c3fSmrg 437901e04c3fSmrgnir_block *nir_cf_node_cf_tree_next(nir_cf_node *node); 438001e04c3fSmrg 438101e04c3fSmrg/* Macros for loops that visit blocks in source-code order */ 438201e04c3fSmrg 438301e04c3fSmrg#define nir_foreach_block(block, impl) \ 438401e04c3fSmrg for (nir_block *block = nir_start_block(impl); block != NULL; \ 438501e04c3fSmrg block = nir_block_cf_tree_next(block)) 438601e04c3fSmrg 438701e04c3fSmrg#define nir_foreach_block_safe(block, impl) \ 438801e04c3fSmrg for (nir_block *block = nir_start_block(impl), \ 438901e04c3fSmrg *next = nir_block_cf_tree_next(block); \ 439001e04c3fSmrg block != NULL; \ 439101e04c3fSmrg block = next, next = nir_block_cf_tree_next(block)) 439201e04c3fSmrg 439301e04c3fSmrg#define nir_foreach_block_reverse(block, impl) \ 439401e04c3fSmrg for (nir_block *block = nir_impl_last_block(impl); block != NULL; \ 439501e04c3fSmrg block = nir_block_cf_tree_prev(block)) 439601e04c3fSmrg 439701e04c3fSmrg#define nir_foreach_block_reverse_safe(block, impl) \ 439801e04c3fSmrg for (nir_block *block = nir_impl_last_block(impl), \ 439901e04c3fSmrg *prev = nir_block_cf_tree_prev(block); \ 440001e04c3fSmrg block != NULL; \ 440101e04c3fSmrg block = prev, prev = nir_block_cf_tree_prev(block)) 440201e04c3fSmrg 440301e04c3fSmrg#define nir_foreach_block_in_cf_node(block, node) \ 440401e04c3fSmrg for (nir_block *block = nir_cf_node_cf_tree_first(node); \ 440501e04c3fSmrg block != nir_cf_node_cf_tree_next(node); \ 440601e04c3fSmrg block = nir_block_cf_tree_next(block)) 440701e04c3fSmrg 440801e04c3fSmrg/* If the following CF node is an if, this function returns that if. 440901e04c3fSmrg * Otherwise, it returns NULL. 441001e04c3fSmrg */ 441101e04c3fSmrgnir_if *nir_block_get_following_if(nir_block *block); 441201e04c3fSmrg 441301e04c3fSmrgnir_loop *nir_block_get_following_loop(nir_block *block); 441401e04c3fSmrg 44157ec681f3Smrgnir_block **nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx); 44167ec681f3Smrg 441701e04c3fSmrgvoid nir_index_local_regs(nir_function_impl *impl); 441801e04c3fSmrgvoid nir_index_ssa_defs(nir_function_impl *impl); 441901e04c3fSmrgunsigned nir_index_instrs(nir_function_impl *impl); 442001e04c3fSmrg 442101e04c3fSmrgvoid nir_index_blocks(nir_function_impl *impl); 442201e04c3fSmrg 44237ec681f3Smrgunsigned nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes); 44247ec681f3Smrgunsigned nir_function_impl_index_vars(nir_function_impl *impl); 44257ec681f3Smrg 442601e04c3fSmrgvoid nir_print_shader(nir_shader *shader, FILE *fp); 442701e04c3fSmrgvoid nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors); 442801e04c3fSmrgvoid nir_print_instr(const nir_instr *instr, FILE *fp); 44297e102996Smayavoid nir_print_deref(const nir_deref_instr *deref, FILE *fp); 44307ec681f3Smrgvoid nir_log_shader_annotated_tagged(enum mesa_log_level level, const char *tag, nir_shader *shader, struct hash_table *annotations); 44317ec681f3Smrg#define nir_log_shadere(s) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), NULL) 44327ec681f3Smrg#define nir_log_shaderw(s) nir_log_shader_annotated_tagged(MESA_LOG_WARN, (MESA_LOG_TAG), (s), NULL) 44337ec681f3Smrg#define nir_log_shaderi(s) nir_log_shader_annotated_tagged(MESA_LOG_INFO, (MESA_LOG_TAG), (s), NULL) 44347ec681f3Smrg#define nir_log_shader_annotated(s, annotations) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), annotations) 44357ec681f3Smrg 44367ec681f3Smrgchar *nir_shader_as_str(nir_shader *nir, void *mem_ctx); 44377ec681f3Smrgchar *nir_shader_as_str_annotated(nir_shader *nir, struct hash_table *annotations, void *mem_ctx); 44387ec681f3Smrg 44397ec681f3Smrg/** Shallow clone of a single instruction. */ 44407ec681f3Smrgnir_instr *nir_instr_clone(nir_shader *s, const nir_instr *orig); 44417ec681f3Smrg 44427ec681f3Smrg/** Shallow clone of a single ALU instruction. */ 44437ec681f3Smrgnir_alu_instr *nir_alu_instr_clone(nir_shader *s, const nir_alu_instr *orig); 444401e04c3fSmrg 444501e04c3fSmrgnir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s); 44467e102996Smayanir_function_impl *nir_function_impl_clone(nir_shader *shader, 44477e102996Smaya const nir_function_impl *fi); 444801e04c3fSmrgnir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var); 444901e04c3fSmrgnir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader); 445001e04c3fSmrg 44517ec681f3Smrgvoid nir_shader_replace(nir_shader *dest, nir_shader *src); 44527ec681f3Smrg 44537ec681f3Smrgvoid nir_shader_serialize_deserialize(nir_shader *s); 445401e04c3fSmrg 445501e04c3fSmrg#ifndef NDEBUG 445601e04c3fSmrgvoid nir_validate_shader(nir_shader *shader, const char *when); 44577ec681f3Smrgvoid nir_validate_ssa_dominance(nir_shader *shader, const char *when); 445801e04c3fSmrgvoid nir_metadata_set_validation_flag(nir_shader *shader); 445901e04c3fSmrgvoid nir_metadata_check_validation_flag(nir_shader *shader); 446001e04c3fSmrg 44617e102996Smayastatic inline bool 44627e102996Smayashould_skip_nir(const char *name) 44637e102996Smaya{ 44647e102996Smaya static const char *list = NULL; 44657e102996Smaya if (!list) { 44667e102996Smaya /* Comma separated list of names to skip. */ 44677e102996Smaya list = getenv("NIR_SKIP"); 44687e102996Smaya if (!list) 44697e102996Smaya list = ""; 44707e102996Smaya } 44717e102996Smaya 44727e102996Smaya if (!list[0]) 44737e102996Smaya return false; 44747e102996Smaya 44757e102996Smaya return comma_separated_list_contains(list, name); 44767e102996Smaya} 44777e102996Smaya 447801e04c3fSmrgstatic inline bool 447901e04c3fSmrgshould_clone_nir(void) 448001e04c3fSmrg{ 448101e04c3fSmrg static int should_clone = -1; 448201e04c3fSmrg if (should_clone < 0) 448301e04c3fSmrg should_clone = env_var_as_boolean("NIR_TEST_CLONE", false); 448401e04c3fSmrg 448501e04c3fSmrg return should_clone; 448601e04c3fSmrg} 448701e04c3fSmrg 448801e04c3fSmrgstatic inline bool 448901e04c3fSmrgshould_serialize_deserialize_nir(void) 449001e04c3fSmrg{ 449101e04c3fSmrg static int test_serialize = -1; 449201e04c3fSmrg if (test_serialize < 0) 449301e04c3fSmrg test_serialize = env_var_as_boolean("NIR_TEST_SERIALIZE", false); 449401e04c3fSmrg 449501e04c3fSmrg return test_serialize; 449601e04c3fSmrg} 449701e04c3fSmrg 449801e04c3fSmrgstatic inline bool 44997ec681f3Smrgshould_print_nir(nir_shader *shader) 450001e04c3fSmrg{ 450101e04c3fSmrg static int should_print = -1; 450201e04c3fSmrg if (should_print < 0) 45037ec681f3Smrg should_print = env_var_as_unsigned("NIR_PRINT", 0); 45047ec681f3Smrg 45057ec681f3Smrg if (should_print == 1) 45067ec681f3Smrg return !shader->info.internal; 450701e04c3fSmrg 450801e04c3fSmrg return should_print; 450901e04c3fSmrg} 451001e04c3fSmrg#else 451101e04c3fSmrgstatic inline void nir_validate_shader(nir_shader *shader, const char *when) { (void) shader; (void)when; } 45127ec681f3Smrgstatic inline void nir_validate_ssa_dominance(nir_shader *shader, const char *when) { (void) shader; (void)when; } 451301e04c3fSmrgstatic inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; } 451401e04c3fSmrgstatic inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; } 45157e102996Smayastatic inline bool should_skip_nir(UNUSED const char *pass_name) { return false; } 451601e04c3fSmrgstatic inline bool should_clone_nir(void) { return false; } 451701e04c3fSmrgstatic inline bool should_serialize_deserialize_nir(void) { return false; } 45187ec681f3Smrgstatic inline bool should_print_nir(nir_shader *shader) { return false; } 451901e04c3fSmrg#endif /* NDEBUG */ 452001e04c3fSmrg 452101e04c3fSmrg#define _PASS(pass, nir, do_pass) do { \ 45227e102996Smaya if (should_skip_nir(#pass)) { \ 45237e102996Smaya printf("skipping %s\n", #pass); \ 45247e102996Smaya break; \ 45257e102996Smaya } \ 452601e04c3fSmrg do_pass \ 452701e04c3fSmrg if (should_clone_nir()) { \ 452801e04c3fSmrg nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ 45297ec681f3Smrg nir_shader_replace(nir, clone); \ 453001e04c3fSmrg } \ 453101e04c3fSmrg if (should_serialize_deserialize_nir()) { \ 45327ec681f3Smrg nir_shader_serialize_deserialize(nir); \ 453301e04c3fSmrg } \ 453401e04c3fSmrg} while (0) 453501e04c3fSmrg 453601e04c3fSmrg#define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir, \ 453701e04c3fSmrg nir_metadata_set_validation_flag(nir); \ 45387ec681f3Smrg if (should_print_nir(nir)) \ 453901e04c3fSmrg printf("%s\n", #pass); \ 454001e04c3fSmrg if (pass(nir, ##__VA_ARGS__)) { \ 45417ec681f3Smrg nir_validate_shader(nir, "after " #pass); \ 454201e04c3fSmrg progress = true; \ 45437ec681f3Smrg if (should_print_nir(nir)) \ 454401e04c3fSmrg nir_print_shader(nir, stdout); \ 454501e04c3fSmrg nir_metadata_check_validation_flag(nir); \ 454601e04c3fSmrg } \ 454701e04c3fSmrg) 454801e04c3fSmrg 454901e04c3fSmrg#define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir, \ 45507ec681f3Smrg if (should_print_nir(nir)) \ 455101e04c3fSmrg printf("%s\n", #pass); \ 455201e04c3fSmrg pass(nir, ##__VA_ARGS__); \ 45537ec681f3Smrg nir_validate_shader(nir, "after " #pass); \ 45547ec681f3Smrg if (should_print_nir(nir)) \ 455501e04c3fSmrg nir_print_shader(nir, stdout); \ 455601e04c3fSmrg) 455701e04c3fSmrg 45587e102996Smaya#define NIR_SKIP(name) should_skip_nir(#name) 45597e102996Smaya 45607ec681f3Smrg/** An instruction filtering callback with writemask 45617ec681f3Smrg * 45627ec681f3Smrg * Returns true if the instruction should be processed with the associated 45637ec681f3Smrg * writemask and false otherwise. 45647ec681f3Smrg */ 45657ec681f3Smrgtypedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *, 45667ec681f3Smrg unsigned writemask, const void *); 45677ec681f3Smrg 45687ec681f3Smrg/** A simple instruction lowering callback 45697ec681f3Smrg * 45707ec681f3Smrg * Many instruction lowering passes can be written as a simple function which 45717ec681f3Smrg * takes an instruction as its input and returns a sequence of instructions 45727ec681f3Smrg * that implement the consumed instruction. This function type represents 45737ec681f3Smrg * such a lowering function. When called, a function with this prototype 45747ec681f3Smrg * should either return NULL indicating that no lowering needs to be done or 45757ec681f3Smrg * emit a sequence of instructions using the provided builder (whose cursor 45767ec681f3Smrg * will already be placed after the instruction to be lowered) and return the 45777ec681f3Smrg * resulting nir_ssa_def. 45787ec681f3Smrg */ 45797ec681f3Smrgtypedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *, 45807ec681f3Smrg nir_instr *, void *); 45817ec681f3Smrg 45827ec681f3Smrg/** 45837ec681f3Smrg * Special return value for nir_lower_instr_cb when some progress occurred 45847ec681f3Smrg * (like changing an input to the instr) that didn't result in a replacement 45857ec681f3Smrg * SSA def being generated. 45867ec681f3Smrg */ 45877ec681f3Smrg#define NIR_LOWER_INSTR_PROGRESS ((nir_ssa_def *)(uintptr_t)1) 45887ec681f3Smrg 45897ec681f3Smrg/** 45907ec681f3Smrg * Special return value for nir_lower_instr_cb when some progress occurred 45917ec681f3Smrg * that should remove the current instruction that doesn't create an output 45927ec681f3Smrg * (like a store) 45937ec681f3Smrg */ 45947ec681f3Smrg 45957ec681f3Smrg#define NIR_LOWER_INSTR_PROGRESS_REPLACE ((nir_ssa_def *)(uintptr_t)2) 45967ec681f3Smrg 45977ec681f3Smrg/** Iterate over all the instructions in a nir_function_impl and lower them 45987ec681f3Smrg * using the provided callbacks 45997ec681f3Smrg * 46007ec681f3Smrg * This function implements the guts of a standard lowering pass for you. It 46017ec681f3Smrg * iterates over all of the instructions in a nir_function_impl and calls the 46027ec681f3Smrg * filter callback on each one. If the filter callback returns true, it then 46037ec681f3Smrg * calls the lowering call back on the instruction. (Splitting it this way 46047ec681f3Smrg * allows us to avoid some save/restore work for instructions we know won't be 46057ec681f3Smrg * lowered.) If the instruction is dead after the lowering is complete, it 46067ec681f3Smrg * will be removed. If new instructions are added, the lowering callback will 46077ec681f3Smrg * also be called on them in case multiple lowerings are required. 46087ec681f3Smrg * 46097ec681f3Smrg * If the callback indicates that the original instruction is replaced (either 46107ec681f3Smrg * through a new SSA def or NIR_LOWER_INSTR_PROGRESS_REPLACE), then the 46117ec681f3Smrg * instruction is removed along with any now-dead SSA defs it used. 46127ec681f3Smrg * 46137ec681f3Smrg * The metadata for the nir_function_impl will also be updated. If any blocks 46147ec681f3Smrg * are added (they cannot be removed), dominance and block indices will be 46157ec681f3Smrg * invalidated. 46167ec681f3Smrg */ 46177ec681f3Smrgbool nir_function_impl_lower_instructions(nir_function_impl *impl, 46187ec681f3Smrg nir_instr_filter_cb filter, 46197ec681f3Smrg nir_lower_instr_cb lower, 46207ec681f3Smrg void *cb_data); 46217ec681f3Smrgbool nir_shader_lower_instructions(nir_shader *shader, 46227ec681f3Smrg nir_instr_filter_cb filter, 46237ec681f3Smrg nir_lower_instr_cb lower, 46247ec681f3Smrg void *cb_data); 46257ec681f3Smrg 462601e04c3fSmrgvoid nir_calc_dominance_impl(nir_function_impl *impl); 462701e04c3fSmrgvoid nir_calc_dominance(nir_shader *shader); 462801e04c3fSmrg 462901e04c3fSmrgnir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); 463001e04c3fSmrgbool nir_block_dominates(nir_block *parent, nir_block *child); 46317e102996Smayabool nir_block_is_unreachable(nir_block *block); 463201e04c3fSmrg 463301e04c3fSmrgvoid nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); 463401e04c3fSmrgvoid nir_dump_dom_tree(nir_shader *shader, FILE *fp); 463501e04c3fSmrg 463601e04c3fSmrgvoid nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp); 463701e04c3fSmrgvoid nir_dump_dom_frontier(nir_shader *shader, FILE *fp); 463801e04c3fSmrg 463901e04c3fSmrgvoid nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp); 464001e04c3fSmrgvoid nir_dump_cfg(nir_shader *shader, FILE *fp); 464101e04c3fSmrg 46427ec681f3Smrgvoid nir_gs_count_vertices_and_primitives(const nir_shader *shader, 46437ec681f3Smrg int *out_vtxcnt, 46447ec681f3Smrg int *out_prmcnt, 46457ec681f3Smrg unsigned num_streams); 464601e04c3fSmrg 464701e04c3fSmrgbool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes); 464801e04c3fSmrgbool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes); 464901e04c3fSmrgbool nir_split_var_copies(nir_shader *shader); 465001e04c3fSmrgbool nir_split_per_member_structs(nir_shader *shader); 465101e04c3fSmrgbool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes); 465201e04c3fSmrg 465301e04c3fSmrgbool nir_lower_returns_impl(nir_function_impl *impl); 465401e04c3fSmrgbool nir_lower_returns(nir_shader *shader); 465501e04c3fSmrg 46567e102996Smayavoid nir_inline_function_impl(struct nir_builder *b, 46577e102996Smaya const nir_function_impl *impl, 46587ec681f3Smrg nir_ssa_def **params, 46597ec681f3Smrg struct hash_table *shader_var_remap); 466001e04c3fSmrgbool nir_inline_functions(nir_shader *shader); 466101e04c3fSmrg 46627ec681f3Smrgvoid nir_find_inlinable_uniforms(nir_shader *shader); 46637ec681f3Smrgvoid nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms, 46647ec681f3Smrg const uint32_t *uniform_values, 46657ec681f3Smrg const uint16_t *uniform_dw_offsets); 46667ec681f3Smrg 46677ec681f3Smrgbool nir_propagate_invariant(nir_shader *shader, bool invariant_prim); 466801e04c3fSmrg 466901e04c3fSmrgvoid nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader); 467001e04c3fSmrgvoid nir_lower_deref_copy_instr(struct nir_builder *b, 467101e04c3fSmrg nir_intrinsic_instr *copy); 467201e04c3fSmrgbool nir_lower_var_copies(nir_shader *shader); 467301e04c3fSmrg 46747ec681f3Smrgbool nir_opt_memcpy(nir_shader *shader); 46757ec681f3Smrgbool nir_lower_memcpy(nir_shader *shader); 46767ec681f3Smrg 467701e04c3fSmrgvoid nir_fixup_deref_modes(nir_shader *shader); 467801e04c3fSmrg 467901e04c3fSmrgbool nir_lower_global_vars_to_local(nir_shader *shader); 468001e04c3fSmrg 46817e102996Smayatypedef enum { 46827e102996Smaya nir_lower_direct_array_deref_of_vec_load = (1 << 0), 46837e102996Smaya nir_lower_indirect_array_deref_of_vec_load = (1 << 1), 46847e102996Smaya nir_lower_direct_array_deref_of_vec_store = (1 << 2), 46857e102996Smaya nir_lower_indirect_array_deref_of_vec_store = (1 << 3), 46867e102996Smaya} nir_lower_array_deref_of_vec_options; 46877e102996Smaya 46887e102996Smayabool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes, 46897e102996Smaya nir_lower_array_deref_of_vec_options options); 46907e102996Smaya 46917ec681f3Smrgbool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes, 46927ec681f3Smrg uint32_t max_lower_array_len); 46937ec681f3Smrg 46947ec681f3Smrgbool nir_lower_indirect_builtin_uniform_derefs(nir_shader *shader); 469501e04c3fSmrg 469601e04c3fSmrgbool nir_lower_locals_to_regs(nir_shader *shader); 469701e04c3fSmrg 469801e04c3fSmrgvoid nir_lower_io_to_temporaries(nir_shader *shader, 469901e04c3fSmrg nir_function_impl *entrypoint, 470001e04c3fSmrg bool outputs, bool inputs); 470101e04c3fSmrg 47027e102996Smayabool nir_lower_vars_to_scratch(nir_shader *shader, 47037e102996Smaya nir_variable_mode modes, 47047e102996Smaya int size_threshold, 47057e102996Smaya glsl_type_size_align_func size_align); 47067e102996Smaya 47077ec681f3Smrgvoid nir_lower_clip_halfz(nir_shader *shader); 47087ec681f3Smrg 470901e04c3fSmrgvoid nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); 471001e04c3fSmrg 47117e102996Smayavoid nir_gather_ssa_types(nir_function_impl *impl, 47127e102996Smaya BITSET_WORD *float_types, 47137e102996Smaya BITSET_WORD *int_types); 47147e102996Smaya 47157ec681f3Smrgvoid nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode, 47167ec681f3Smrg unsigned *size, 47177e102996Smaya int (*type_size)(const struct glsl_type *, bool)); 471801e04c3fSmrg 471901e04c3fSmrg/* Some helpers to do very simple linking */ 472001e04c3fSmrgbool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer); 47217ec681f3Smrgbool nir_remove_unused_io_vars(nir_shader *shader, nir_variable_mode mode, 472201e04c3fSmrg uint64_t *used_by_other_stage, 472301e04c3fSmrg uint64_t *used_by_other_stage_patches); 472401e04c3fSmrgvoid nir_compact_varyings(nir_shader *producer, nir_shader *consumer, 472501e04c3fSmrg bool default_to_smooth_interp); 472601e04c3fSmrgvoid nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer); 47277e102996Smayabool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer); 47287ec681f3Smrgvoid nir_link_varying_precision(nir_shader *producer, nir_shader *consumer); 47297ec681f3Smrg 47307ec681f3Smrgbool nir_lower_amul(nir_shader *shader, 47317ec681f3Smrg int (*type_size)(const struct glsl_type *, bool)); 47327ec681f3Smrg 47337ec681f3Smrgbool nir_lower_ubo_vec4(nir_shader *shader); 47347ec681f3Smrg 47357ec681f3Smrgvoid nir_assign_io_var_locations(nir_shader *shader, 47367ec681f3Smrg nir_variable_mode mode, 47377ec681f3Smrg unsigned *size, 47387ec681f3Smrg gl_shader_stage stage); 47397ec681f3Smrg 47407ec681f3Smrgtypedef struct { 47417ec681f3Smrg uint8_t num_linked_io_vars; 47427ec681f3Smrg uint8_t num_linked_patch_io_vars; 47437ec681f3Smrg} nir_linked_io_var_info; 47447ec681f3Smrg 47457ec681f3Smrgnir_linked_io_var_info 47467ec681f3Smrgnir_assign_linked_io_var_locations(nir_shader *producer, 47477ec681f3Smrg nir_shader *consumer); 474801e04c3fSmrg 474901e04c3fSmrgtypedef enum { 47507ec681f3Smrg /* If set, this causes all 64-bit IO operations to be lowered on-the-fly 47517ec681f3Smrg * to 32-bit operations. This is only valid for nir_var_shader_in/out 47527ec681f3Smrg * modes. 47537ec681f3Smrg */ 47547ec681f3Smrg nir_lower_io_lower_64bit_to_32 = (1 << 0), 47557ec681f3Smrg 475601e04c3fSmrg /* If set, this forces all non-flat fragment shader inputs to be 475701e04c3fSmrg * interpolated as if with the "sample" qualifier. This requires 475801e04c3fSmrg * nir_shader_compiler_options::use_interpolated_input_intrinsics. 475901e04c3fSmrg */ 476001e04c3fSmrg nir_lower_io_force_sample_interpolation = (1 << 1), 476101e04c3fSmrg} nir_lower_io_options; 476201e04c3fSmrgbool nir_lower_io(nir_shader *shader, 476301e04c3fSmrg nir_variable_mode modes, 47647e102996Smaya int (*type_size)(const struct glsl_type *, bool), 476501e04c3fSmrg nir_lower_io_options); 47667e102996Smaya 47677ec681f3Smrgbool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes); 47687ec681f3Smrg 47697ec681f3Smrgbool 47707ec681f3Smrgnir_lower_vars_to_explicit_types(nir_shader *shader, 47717ec681f3Smrg nir_variable_mode modes, 47727ec681f3Smrg glsl_type_size_align_func type_info); 47737ec681f3Smrgvoid 47747ec681f3Smrgnir_gather_explicit_io_initializers(nir_shader *shader, 47757ec681f3Smrg void *dst, size_t dst_size, 47767ec681f3Smrg nir_variable_mode mode); 47777ec681f3Smrg 47787ec681f3Smrgbool nir_lower_vec3_to_vec4(nir_shader *shader, nir_variable_mode modes); 47797ec681f3Smrg 47807e102996Smayatypedef enum { 47817e102996Smaya /** 47827e102996Smaya * An address format which is a simple 32-bit global GPU address. 47837e102996Smaya */ 47847e102996Smaya nir_address_format_32bit_global, 47857e102996Smaya 47867e102996Smaya /** 47877e102996Smaya * An address format which is a simple 64-bit global GPU address. 47887e102996Smaya */ 47897e102996Smaya nir_address_format_64bit_global, 47907e102996Smaya 47917ec681f3Smrg /** 47927ec681f3Smrg * An address format which is a 64-bit global base address and a 32-bit 47937ec681f3Smrg * offset. 47947ec681f3Smrg * 47957ec681f3Smrg * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base 47967ec681f3Smrg * address stored with the low bits in .x and high bits in .y, .z is 47977ec681f3Smrg * undefined, and .w is an offset. This is intended to match 47987ec681f3Smrg * 64bit_bounded_global but without the bounds checking. 47997ec681f3Smrg */ 48007ec681f3Smrg nir_address_format_64bit_global_32bit_offset, 48017ec681f3Smrg 48027e102996Smaya /** 48037e102996Smaya * An address format which is a bounds-checked 64-bit global GPU address. 48047e102996Smaya * 48057e102996Smaya * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base 48067e102996Smaya * address stored with the low bits in .x and high bits in .y, .z is a 48077e102996Smaya * size, and .w is an offset. When the final I/O operation is lowered, .w 48087e102996Smaya * is checked against .z and the operation is predicated on the result. 48097e102996Smaya */ 48107e102996Smaya nir_address_format_64bit_bounded_global, 48117e102996Smaya 48127e102996Smaya /** 48137e102996Smaya * An address format which is comprised of a vec2 where the first 48147e102996Smaya * component is a buffer index and the second is an offset. 48157e102996Smaya */ 48167e102996Smaya nir_address_format_32bit_index_offset, 48177ec681f3Smrg 48187ec681f3Smrg /** 48197ec681f3Smrg * An address format which is a 64-bit value, where the high 32 bits 48207ec681f3Smrg * are a buffer index, and the low 32 bits are an offset. 48217ec681f3Smrg */ 48227ec681f3Smrg nir_address_format_32bit_index_offset_pack64, 48237ec681f3Smrg 48247ec681f3Smrg /** 48257ec681f3Smrg * An address format which is comprised of a vec3 where the first two 48267ec681f3Smrg * components specify the buffer and the third is an offset. 48277ec681f3Smrg */ 48287ec681f3Smrg nir_address_format_vec2_index_32bit_offset, 48297ec681f3Smrg 48307ec681f3Smrg /** 48317ec681f3Smrg * An address format which represents generic pointers with a 62-bit 48327ec681f3Smrg * pointer and a 2-bit enum in the top two bits. The top two bits have 48337ec681f3Smrg * the following meanings: 48347ec681f3Smrg * 48357ec681f3Smrg * - 0x0: Global memory 48367ec681f3Smrg * - 0x1: Shared memory 48377ec681f3Smrg * - 0x2: Scratch memory 48387ec681f3Smrg * - 0x3: Global memory 48397ec681f3Smrg * 48407ec681f3Smrg * The redundancy between 0x0 and 0x3 is because of Intel sign-extension of 48417ec681f3Smrg * addresses. Valid global memory addresses may naturally have either 0 or 48427ec681f3Smrg * ~0 as their high bits. 48437ec681f3Smrg * 48447ec681f3Smrg * Shared and scratch pointers are represented as 32-bit offsets with the 48457ec681f3Smrg * top 32 bits only being used for the enum. This allows us to avoid 48467ec681f3Smrg * 64-bit address calculations in a bunch of cases. 48477ec681f3Smrg */ 48487ec681f3Smrg nir_address_format_62bit_generic, 48497ec681f3Smrg 48507ec681f3Smrg /** 48517ec681f3Smrg * An address format which is a simple 32-bit offset. 48527ec681f3Smrg */ 48537ec681f3Smrg nir_address_format_32bit_offset, 48547ec681f3Smrg 48557ec681f3Smrg /** 48567ec681f3Smrg * An address format which is a simple 32-bit offset cast to 64-bit. 48577ec681f3Smrg */ 48587ec681f3Smrg nir_address_format_32bit_offset_as_64bit, 48597ec681f3Smrg 48607ec681f3Smrg /** 48617ec681f3Smrg * An address format representing a purely logical addressing model. In 48627ec681f3Smrg * this model, all deref chains must be complete from the dereference 48637ec681f3Smrg * operation to the variable. Cast derefs are not allowed. These 48647ec681f3Smrg * addresses will be 32-bit scalars but the format is immaterial because 48657ec681f3Smrg * you can always chase the chain. 48667ec681f3Smrg */ 48677ec681f3Smrg nir_address_format_logical, 48687e102996Smaya} nir_address_format; 48697e102996Smaya 48707e102996Smayastatic inline unsigned 48717e102996Smayanir_address_format_bit_size(nir_address_format addr_format) 48727e102996Smaya{ 48737e102996Smaya switch (addr_format) { 48747ec681f3Smrg case nir_address_format_32bit_global: return 32; 48757ec681f3Smrg case nir_address_format_64bit_global: return 64; 48767ec681f3Smrg case nir_address_format_64bit_global_32bit_offset: return 32; 48777ec681f3Smrg case nir_address_format_64bit_bounded_global: return 32; 48787ec681f3Smrg case nir_address_format_32bit_index_offset: return 32; 48797ec681f3Smrg case nir_address_format_32bit_index_offset_pack64: return 64; 48807ec681f3Smrg case nir_address_format_vec2_index_32bit_offset: return 32; 48817ec681f3Smrg case nir_address_format_62bit_generic: return 64; 48827ec681f3Smrg case nir_address_format_32bit_offset: return 32; 48837ec681f3Smrg case nir_address_format_32bit_offset_as_64bit: return 64; 48847ec681f3Smrg case nir_address_format_logical: return 32; 48857e102996Smaya } 48867e102996Smaya unreachable("Invalid address format"); 48877e102996Smaya} 48887e102996Smaya 48897e102996Smayastatic inline unsigned 48907e102996Smayanir_address_format_num_components(nir_address_format addr_format) 48917e102996Smaya{ 48927e102996Smaya switch (addr_format) { 48937ec681f3Smrg case nir_address_format_32bit_global: return 1; 48947ec681f3Smrg case nir_address_format_64bit_global: return 1; 48957ec681f3Smrg case nir_address_format_64bit_global_32bit_offset: return 4; 48967ec681f3Smrg case nir_address_format_64bit_bounded_global: return 4; 48977ec681f3Smrg case nir_address_format_32bit_index_offset: return 2; 48987ec681f3Smrg case nir_address_format_32bit_index_offset_pack64: return 1; 48997ec681f3Smrg case nir_address_format_vec2_index_32bit_offset: return 3; 49007ec681f3Smrg case nir_address_format_62bit_generic: return 1; 49017ec681f3Smrg case nir_address_format_32bit_offset: return 1; 49027ec681f3Smrg case nir_address_format_32bit_offset_as_64bit: return 1; 49037ec681f3Smrg case nir_address_format_logical: return 1; 49047e102996Smaya } 49057e102996Smaya unreachable("Invalid address format"); 49067e102996Smaya} 49077e102996Smaya 49087e102996Smayastatic inline const struct glsl_type * 49097e102996Smayanir_address_format_to_glsl_type(nir_address_format addr_format) 49107e102996Smaya{ 49117e102996Smaya unsigned bit_size = nir_address_format_bit_size(addr_format); 49127e102996Smaya assert(bit_size == 32 || bit_size == 64); 49137e102996Smaya return glsl_vector_type(bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64, 49147e102996Smaya nir_address_format_num_components(addr_format)); 49157e102996Smaya} 49167e102996Smaya 49177ec681f3Smrgconst nir_const_value *nir_address_format_null_value(nir_address_format addr_format); 49187ec681f3Smrg 49197ec681f3Smrgnir_ssa_def *nir_build_addr_ieq(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 49207ec681f3Smrg nir_address_format addr_format); 49217ec681f3Smrg 49227ec681f3Smrgnir_ssa_def *nir_build_addr_isub(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 49237ec681f3Smrg nir_address_format addr_format); 49247ec681f3Smrg 49257e102996Smayanir_ssa_def * nir_explicit_io_address_from_deref(struct nir_builder *b, 49267e102996Smaya nir_deref_instr *deref, 49277e102996Smaya nir_ssa_def *base_addr, 49287e102996Smaya nir_address_format addr_format); 49297ec681f3Smrg 49307ec681f3Smrgbool nir_get_explicit_deref_align(nir_deref_instr *deref, 49317ec681f3Smrg bool default_to_type_align, 49327ec681f3Smrg uint32_t *align_mul, 49337ec681f3Smrg uint32_t *align_offset); 49347ec681f3Smrg 49357e102996Smayavoid nir_lower_explicit_io_instr(struct nir_builder *b, 49367e102996Smaya nir_intrinsic_instr *io_instr, 49377e102996Smaya nir_ssa_def *addr, 49387e102996Smaya nir_address_format addr_format); 49397e102996Smaya 49407e102996Smayabool nir_lower_explicit_io(nir_shader *shader, 49417e102996Smaya nir_variable_mode modes, 49427e102996Smaya nir_address_format); 49437e102996Smaya 49447ec681f3Smrgbool 49457ec681f3Smrgnir_lower_shader_calls(nir_shader *shader, 49467ec681f3Smrg nir_address_format address_format, 49477ec681f3Smrg unsigned stack_alignment, 49487ec681f3Smrg nir_shader ***resume_shaders_out, 49497ec681f3Smrg uint32_t *num_resume_shaders_out, 49507ec681f3Smrg void *mem_ctx); 49517ec681f3Smrg 495201e04c3fSmrgnir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); 495301e04c3fSmrgnir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); 49547ec681f3Smrgnir_src *nir_get_shader_call_payload_src(nir_intrinsic_instr *call); 495501e04c3fSmrg 49567ec681f3Smrgbool nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage); 495701e04c3fSmrg 495801e04c3fSmrgbool nir_lower_regs_to_ssa_impl(nir_function_impl *impl); 495901e04c3fSmrgbool nir_lower_regs_to_ssa(nir_shader *shader); 496001e04c3fSmrgbool nir_lower_vars_to_ssa(nir_shader *shader); 496101e04c3fSmrg 496201e04c3fSmrgbool nir_remove_dead_derefs(nir_shader *shader); 496301e04c3fSmrgbool nir_remove_dead_derefs_impl(nir_function_impl *impl); 49647ec681f3Smrg 49657ec681f3Smrgtypedef struct nir_remove_dead_variables_options { 49667ec681f3Smrg bool (*can_remove_var)(nir_variable *var, void *data); 49677ec681f3Smrg void *can_remove_var_data; 49687ec681f3Smrg} nir_remove_dead_variables_options; 49697ec681f3Smrg 49707ec681f3Smrgbool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes, 49717ec681f3Smrg const nir_remove_dead_variables_options *options); 49727ec681f3Smrg 49737ec681f3Smrgbool nir_lower_variable_initializers(nir_shader *shader, 497401e04c3fSmrg nir_variable_mode modes); 49757ec681f3Smrgbool nir_zero_initialize_shared_memory(nir_shader *shader, 49767ec681f3Smrg const unsigned shared_size, 49777ec681f3Smrg const unsigned chunk_size); 497801e04c3fSmrg 497901e04c3fSmrgbool nir_move_vec_src_uses_to_dest(nir_shader *shader); 49807ec681f3Smrgbool nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb, 49817ec681f3Smrg const void *_data); 498201e04c3fSmrgvoid nir_lower_alpha_test(nir_shader *shader, enum compare_func func, 49837ec681f3Smrg bool alpha_to_one, 49847ec681f3Smrg const gl_state_index16 *alpha_ref_state_tokens); 498501e04c3fSmrgbool nir_lower_alu(nir_shader *shader); 49867ec681f3Smrg 49877ec681f3Smrgbool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask, 49887ec681f3Smrg bool always_precise); 49897ec681f3Smrg 49907ec681f3Smrgbool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data); 49917ec681f3Smrgbool nir_lower_bool_to_bitsize(nir_shader *shader); 49927e102996Smayabool nir_lower_bool_to_float(nir_shader *shader); 49937e102996Smayabool nir_lower_bool_to_int32(nir_shader *shader); 49947ec681f3Smrgbool nir_opt_simplify_convert_alu_types(nir_shader *shader); 49957ec681f3Smrgbool nir_lower_convert_alu_types(nir_shader *shader, 49967ec681f3Smrg bool (*should_lower)(nir_intrinsic_instr *)); 49977ec681f3Smrgbool nir_lower_constant_convert_alu_types(nir_shader *shader); 49987ec681f3Smrgbool nir_lower_alu_conversion_to_intrinsic(nir_shader *shader); 49997ec681f3Smrgbool nir_lower_int_to_float(nir_shader *shader); 500001e04c3fSmrgbool nir_lower_load_const_to_scalar(nir_shader *shader); 500101e04c3fSmrgbool nir_lower_read_invocation_to_scalar(nir_shader *shader); 50027ec681f3Smrgbool nir_lower_phis_to_scalar(nir_shader *shader, bool lower_all); 500301e04c3fSmrgvoid nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer); 500401e04c3fSmrgvoid nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader, 500501e04c3fSmrg bool outputs_only); 500601e04c3fSmrgvoid nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask); 50077ec681f3Smrgbool nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask); 50087e102996Smayabool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask); 50097ec681f3Smrgbool nir_vectorize_tess_levels(nir_shader *shader); 50107e102996Smaya 50117ec681f3Smrgbool nir_lower_fragcolor(nir_shader *shader, unsigned max_cbufs); 50127ec681f3Smrgbool nir_lower_fragcoord_wtrans(nir_shader *shader); 50137e102996Smayavoid nir_lower_viewport_transform(nir_shader *shader); 50147ec681f3Smrgbool nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4); 50157ec681f3Smrg 50167ec681f3Smrgbool nir_lower_is_helper_invocation(nir_shader *shader); 501701e04c3fSmrg 501801e04c3fSmrgtypedef struct nir_lower_subgroups_options { 501901e04c3fSmrg uint8_t subgroup_size; 502001e04c3fSmrg uint8_t ballot_bit_size; 50217ec681f3Smrg uint8_t ballot_components; 502201e04c3fSmrg bool lower_to_scalar:1; 502301e04c3fSmrg bool lower_vote_trivial:1; 50247ec681f3Smrg bool lower_vote_eq:1; 502501e04c3fSmrg bool lower_subgroup_masks:1; 502601e04c3fSmrg bool lower_shuffle:1; 502701e04c3fSmrg bool lower_shuffle_to_32bit:1; 50287ec681f3Smrg bool lower_shuffle_to_swizzle_amd:1; 502901e04c3fSmrg bool lower_quad:1; 50307ec681f3Smrg bool lower_quad_broadcast_dynamic:1; 50317ec681f3Smrg bool lower_quad_broadcast_dynamic_to_const:1; 50327ec681f3Smrg bool lower_elect:1; 50337ec681f3Smrg bool lower_read_invocation_to_cond:1; 503401e04c3fSmrg} nir_lower_subgroups_options; 503501e04c3fSmrg 503601e04c3fSmrgbool nir_lower_subgroups(nir_shader *shader, 503701e04c3fSmrg const nir_lower_subgroups_options *options); 503801e04c3fSmrg 503901e04c3fSmrgbool nir_lower_system_values(nir_shader *shader); 504001e04c3fSmrg 50417ec681f3Smrgtypedef struct nir_lower_compute_system_values_options { 50427ec681f3Smrg bool has_base_global_invocation_id:1; 50437ec681f3Smrg bool has_base_workgroup_id:1; 50447ec681f3Smrg bool shuffle_local_ids_for_quad_derivatives:1; 50457ec681f3Smrg bool lower_local_invocation_index:1; 50467ec681f3Smrg} nir_lower_compute_system_values_options; 50477ec681f3Smrg 50487ec681f3Smrgbool nir_lower_compute_system_values(nir_shader *shader, 50497ec681f3Smrg const nir_lower_compute_system_values_options *options); 50507ec681f3Smrg 50517ec681f3Smrgstruct nir_lower_sysvals_to_varyings_options { 50527ec681f3Smrg bool frag_coord:1; 50537ec681f3Smrg bool front_face:1; 50547ec681f3Smrg bool point_coord:1; 50557ec681f3Smrg}; 50567ec681f3Smrg 50577ec681f3Smrgbool 50587ec681f3Smrgnir_lower_sysvals_to_varyings(nir_shader *shader, 50597ec681f3Smrg const struct nir_lower_sysvals_to_varyings_options *options); 50607ec681f3Smrg 50617e102996Smayaenum PACKED nir_lower_tex_packing { 50627ec681f3Smrg /** No packing */ 50637e102996Smaya nir_lower_tex_packing_none = 0, 50647ec681f3Smrg /** 50657ec681f3Smrg * The sampler returns up to 2 32-bit words of half floats or 16-bit signed 50667e102996Smaya * or unsigned ints based on the sampler type 50677e102996Smaya */ 50687e102996Smaya nir_lower_tex_packing_16, 50697ec681f3Smrg /** The sampler returns 1 32-bit word of 4x8 unorm */ 50707e102996Smaya nir_lower_tex_packing_8, 50717e102996Smaya}; 50727e102996Smaya 507301e04c3fSmrgtypedef struct nir_lower_tex_options { 507401e04c3fSmrg /** 507501e04c3fSmrg * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which 507601e04c3fSmrg * sampler types a texture projector is lowered. 507701e04c3fSmrg */ 507801e04c3fSmrg unsigned lower_txp; 507901e04c3fSmrg 508001e04c3fSmrg /** 508101e04c3fSmrg * If true, lower away nir_tex_src_offset for all texelfetch instructions. 508201e04c3fSmrg */ 508301e04c3fSmrg bool lower_txf_offset; 508401e04c3fSmrg 508501e04c3fSmrg /** 508601e04c3fSmrg * If true, lower away nir_tex_src_offset for all rect textures. 508701e04c3fSmrg */ 508801e04c3fSmrg bool lower_rect_offset; 508901e04c3fSmrg 509001e04c3fSmrg /** 509101e04c3fSmrg * If true, lower rect textures to 2D, using txs to fetch the 509201e04c3fSmrg * texture dimensions and dividing the texture coords by the 509301e04c3fSmrg * texture dims to normalize. 509401e04c3fSmrg */ 509501e04c3fSmrg bool lower_rect; 509601e04c3fSmrg 509701e04c3fSmrg /** 509801e04c3fSmrg * If true, convert yuv to rgb. 509901e04c3fSmrg */ 510001e04c3fSmrg unsigned lower_y_uv_external; 510101e04c3fSmrg unsigned lower_y_u_v_external; 510201e04c3fSmrg unsigned lower_yx_xuxv_external; 510301e04c3fSmrg unsigned lower_xy_uxvx_external; 51047e102996Smaya unsigned lower_ayuv_external; 51057e102996Smaya unsigned lower_xyuv_external; 51067ec681f3Smrg unsigned lower_yuv_external; 51077ec681f3Smrg unsigned lower_yu_yv_external; 51087ec681f3Smrg unsigned lower_y41x_external; 51097ec681f3Smrg unsigned bt709_external; 51107ec681f3Smrg unsigned bt2020_external; 511101e04c3fSmrg 511201e04c3fSmrg /** 511301e04c3fSmrg * To emulate certain texture wrap modes, this can be used 511401e04c3fSmrg * to saturate the specified tex coord to [0.0, 1.0]. The 511501e04c3fSmrg * bits are according to sampler #, ie. if, for example: 511601e04c3fSmrg * 511701e04c3fSmrg * (conf->saturate_s & (1 << n)) 511801e04c3fSmrg * 511901e04c3fSmrg * is true, then the s coord for sampler n is saturated. 512001e04c3fSmrg * 512101e04c3fSmrg * Note that clamping must happen *after* projector lowering 512201e04c3fSmrg * so any projected texture sample instruction with a clamped 512301e04c3fSmrg * coordinate gets automatically lowered, regardless of the 512401e04c3fSmrg * 'lower_txp' setting. 512501e04c3fSmrg */ 512601e04c3fSmrg unsigned saturate_s; 512701e04c3fSmrg unsigned saturate_t; 512801e04c3fSmrg unsigned saturate_r; 512901e04c3fSmrg 513001e04c3fSmrg /* Bitmask of textures that need swizzling. 513101e04c3fSmrg * 513201e04c3fSmrg * If (swizzle_result & (1 << texture_index)), then the swizzle in 513301e04c3fSmrg * swizzles[texture_index] is applied to the result of the texturing 513401e04c3fSmrg * operation. 513501e04c3fSmrg */ 513601e04c3fSmrg unsigned swizzle_result; 513701e04c3fSmrg 513801e04c3fSmrg /* A swizzle for each texture. Values 0-3 represent x, y, z, or w swizzles 513901e04c3fSmrg * while 4 and 5 represent 0 and 1 respectively. 51407ec681f3Smrg * 51417ec681f3Smrg * Indexed by texture-id. 514201e04c3fSmrg */ 514301e04c3fSmrg uint8_t swizzles[32][4]; 514401e04c3fSmrg 51457ec681f3Smrg /* Can be used to scale sampled values in range required by the 51467ec681f3Smrg * format. 51477ec681f3Smrg * 51487ec681f3Smrg * Indexed by texture-id. 51497ec681f3Smrg */ 51507e102996Smaya float scale_factors[32]; 51517e102996Smaya 515201e04c3fSmrg /** 515301e04c3fSmrg * Bitmap of textures that need srgb to linear conversion. If 515401e04c3fSmrg * (lower_srgb & (1 << texture_index)) then the rgb (xyz) components 515501e04c3fSmrg * of the texture are lowered to linear. 515601e04c3fSmrg */ 515701e04c3fSmrg unsigned lower_srgb; 515801e04c3fSmrg 515901e04c3fSmrg /** 516001e04c3fSmrg * If true, lower nir_texop_txd on cube maps with nir_texop_txl. 516101e04c3fSmrg */ 516201e04c3fSmrg bool lower_txd_cube_map; 516301e04c3fSmrg 51647e102996Smaya /** 51657e102996Smaya * If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl. 51667e102996Smaya */ 51677e102996Smaya bool lower_txd_3d; 51687e102996Smaya 516901e04c3fSmrg /** 517001e04c3fSmrg * If true, lower nir_texop_txd on shadow samplers (except cube maps) 517101e04c3fSmrg * with nir_texop_txl. Notice that cube map shadow samplers are lowered 517201e04c3fSmrg * with lower_txd_cube_map. 517301e04c3fSmrg */ 517401e04c3fSmrg bool lower_txd_shadow; 517501e04c3fSmrg 517601e04c3fSmrg /** 517701e04c3fSmrg * If true, lower nir_texop_txd on all samplers to a nir_texop_txl. 517801e04c3fSmrg * Implies lower_txd_cube_map and lower_txd_shadow. 517901e04c3fSmrg */ 518001e04c3fSmrg bool lower_txd; 51817e102996Smaya 51827e102996Smaya /** 51837e102996Smaya * If true, lower nir_texop_txb that try to use shadow compare and min_lod 51847e102996Smaya * at the same time to a nir_texop_lod, some math, and nir_texop_tex. 51857e102996Smaya */ 51867e102996Smaya bool lower_txb_shadow_clamp; 51877e102996Smaya 51887e102996Smaya /** 51897e102996Smaya * If true, lower nir_texop_txd on shadow samplers when it uses min_lod 51907e102996Smaya * with nir_texop_txl. This includes cube maps. 51917e102996Smaya */ 51927e102996Smaya bool lower_txd_shadow_clamp; 51937e102996Smaya 51947e102996Smaya /** 51957e102996Smaya * If true, lower nir_texop_txd on when it uses both offset and min_lod 51967e102996Smaya * with nir_texop_txl. This includes cube maps. 51977e102996Smaya */ 51987e102996Smaya bool lower_txd_offset_clamp; 51997e102996Smaya 52007e102996Smaya /** 52017e102996Smaya * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 52027e102996Smaya * sampler is bindless. 52037e102996Smaya */ 52047e102996Smaya bool lower_txd_clamp_bindless_sampler; 52057e102996Smaya 52067e102996Smaya /** 52077e102996Smaya * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 52087e102996Smaya * sampler index is not statically determinable to be less than 16. 52097e102996Smaya */ 52107e102996Smaya bool lower_txd_clamp_if_sampler_index_not_lt_16; 52117e102996Smaya 52127ec681f3Smrg /** 52137ec681f3Smrg * If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with 52147ec681f3Smrg * 0-lod followed by a nir_ishr. 52157ec681f3Smrg */ 52167ec681f3Smrg bool lower_txs_lod; 52177ec681f3Smrg 52187ec681f3Smrg /** 52197ec681f3Smrg * If true, lower nir_texop_txs for cube arrays to a nir_texop_txs with a 52207ec681f3Smrg * 2D array type followed by a nir_idiv by 6. 52217ec681f3Smrg */ 52227ec681f3Smrg bool lower_txs_cube_array; 52237ec681f3Smrg 52247e102996Smaya /** 52257e102996Smaya * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's 52267e102996Smaya * mixed-up tg4 locations. 52277e102996Smaya */ 52287e102996Smaya bool lower_tg4_broadcom_swizzle; 52297e102996Smaya 52307e102996Smaya /** 52317e102996Smaya * If true, lowers tg4 with 4 constant offsets to 4 tg4 calls 52327e102996Smaya */ 52337e102996Smaya bool lower_tg4_offsets; 52347e102996Smaya 52357ec681f3Smrg /** 52367ec681f3Smrg * Lower txf_ms to fragment_mask_fetch and fragment_fetch and samples_identical to 52377ec681f3Smrg * fragment_mask_fetch. 52387ec681f3Smrg */ 52397ec681f3Smrg bool lower_to_fragment_fetch_amd; 52407ec681f3Smrg 52417ec681f3Smrg /** 52427ec681f3Smrg * To lower packed sampler return formats. 52437ec681f3Smrg * 52447ec681f3Smrg * Indexed by sampler-id. 52457ec681f3Smrg */ 52467e102996Smaya enum nir_lower_tex_packing lower_tex_packing[32]; 524701e04c3fSmrg} nir_lower_tex_options; 524801e04c3fSmrg 52497ec681f3Smrg/** Lowers complex texture instructions to simpler ones */ 525001e04c3fSmrgbool nir_lower_tex(nir_shader *shader, 525101e04c3fSmrg const nir_lower_tex_options *options); 525201e04c3fSmrg 52537ec681f3Smrgtypedef struct nir_lower_image_options { 52547ec681f3Smrg /** 52557ec681f3Smrg * If true, lower cube size operations. 52567ec681f3Smrg */ 52577ec681f3Smrg bool lower_cube_size; 52587ec681f3Smrg} nir_lower_image_options; 52597ec681f3Smrg 52607ec681f3Smrgbool nir_lower_image(nir_shader *nir, 52617ec681f3Smrg const nir_lower_image_options *options); 52627ec681f3Smrg 52637ec681f3Smrgbool nir_lower_readonly_images_to_tex(nir_shader *shader, bool per_variable); 52647ec681f3Smrg 52657e102996Smayaenum nir_lower_non_uniform_access_type { 52667e102996Smaya nir_lower_non_uniform_ubo_access = (1 << 0), 52677e102996Smaya nir_lower_non_uniform_ssbo_access = (1 << 1), 52687e102996Smaya nir_lower_non_uniform_texture_access = (1 << 2), 52697e102996Smaya nir_lower_non_uniform_image_access = (1 << 3), 52707e102996Smaya}; 52717e102996Smaya 52727ec681f3Smrg/* Given the nir_src used for the resource, return the channels which might be non-uniform. */ 52737ec681f3Smrgtypedef nir_component_mask_t (*nir_lower_non_uniform_access_callback)(const nir_src *, void *); 52747ec681f3Smrg 52757ec681f3Smrgtypedef struct nir_lower_non_uniform_access_options { 52767ec681f3Smrg enum nir_lower_non_uniform_access_type types; 52777ec681f3Smrg nir_lower_non_uniform_access_callback callback; 52787ec681f3Smrg void *callback_data; 52797ec681f3Smrg} nir_lower_non_uniform_access_options; 52807ec681f3Smrg 52817e102996Smayabool nir_lower_non_uniform_access(nir_shader *shader, 52827ec681f3Smrg const nir_lower_non_uniform_access_options *options); 52837e102996Smaya 52847ec681f3Smrgtypedef struct { 52857ec681f3Smrg /* If true, a 32-bit division lowering based on NV50LegalizeSSA::handleDIV() 52867ec681f3Smrg * is used. It is the faster of the two but it is not exact in some cases 52877ec681f3Smrg * (for example, 1091317713u / 1034u gives 5209173 instead of 1055432). 52887ec681f3Smrg * 52897ec681f3Smrg * If false, a lowering based on AMDGPUTargetLowering::LowerUDIVREM() and 52907ec681f3Smrg * AMDGPUTargetLowering::LowerSDIVREM() is used. It requires more 52917ec681f3Smrg * instructions than the nv50 path and many of them are integer 52927ec681f3Smrg * multiplications, so it is probably slower. It should always return the 52937ec681f3Smrg * correct result, though. 52947ec681f3Smrg */ 52957ec681f3Smrg bool imprecise_32bit_lowering; 529601e04c3fSmrg 52977ec681f3Smrg /* Whether 16-bit floating point arithmetic should be allowed in 8-bit 52987ec681f3Smrg * division lowering 52997ec681f3Smrg */ 53007ec681f3Smrg bool allow_fp16; 53017ec681f3Smrg} nir_lower_idiv_options; 53027ec681f3Smrg 53037ec681f3Smrgbool nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options); 53047ec681f3Smrg 53057ec681f3Smrgtypedef struct nir_input_attachment_options { 53067ec681f3Smrg bool use_fragcoord_sysval; 53077ec681f3Smrg bool use_layer_id_sysval; 53087ec681f3Smrg bool use_view_id_for_layer; 53097ec681f3Smrg} nir_input_attachment_options; 53107ec681f3Smrg 53117ec681f3Smrgbool nir_lower_input_attachments(nir_shader *shader, 53127ec681f3Smrg const nir_input_attachment_options *options); 53137ec681f3Smrg 53147ec681f3Smrgbool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, 53157ec681f3Smrg bool use_vars, 53167ec681f3Smrg bool use_clipdist_array, 53177ec681f3Smrg const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 53187ec681f3Smrgbool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, 53197ec681f3Smrg bool use_clipdist_array, 53207ec681f3Smrg const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 53217ec681f3Smrgbool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, 53227ec681f3Smrg bool use_clipdist_array); 532301e04c3fSmrgbool nir_lower_clip_cull_distance_arrays(nir_shader *nir); 53247ec681f3Smrgbool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable); 53257ec681f3Smrg 53267ec681f3Smrgvoid nir_lower_point_size_mov(nir_shader *shader, 53277ec681f3Smrg const gl_state_index16 *pointsize_state_tokens); 532801e04c3fSmrg 53297e102996Smayabool nir_lower_frexp(nir_shader *nir); 53307e102996Smaya 53317ec681f3Smrgvoid nir_lower_two_sided_color(nir_shader *shader, bool face_sysval); 533201e04c3fSmrg 533301e04c3fSmrgbool nir_lower_clamp_color_outputs(nir_shader *shader); 533401e04c3fSmrg 53357ec681f3Smrgbool nir_lower_flatshade(nir_shader *shader); 53367ec681f3Smrg 533701e04c3fSmrgvoid nir_lower_passthrough_edgeflags(nir_shader *shader); 533801e04c3fSmrgbool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count, 533901e04c3fSmrg const gl_state_index16 *uniform_state_tokens); 534001e04c3fSmrg 534101e04c3fSmrgtypedef struct nir_lower_wpos_ytransform_options { 534201e04c3fSmrg gl_state_index16 state_tokens[STATE_LENGTH]; 534301e04c3fSmrg bool fs_coord_origin_upper_left :1; 534401e04c3fSmrg bool fs_coord_origin_lower_left :1; 534501e04c3fSmrg bool fs_coord_pixel_center_integer :1; 534601e04c3fSmrg bool fs_coord_pixel_center_half_integer :1; 534701e04c3fSmrg} nir_lower_wpos_ytransform_options; 534801e04c3fSmrg 534901e04c3fSmrgbool nir_lower_wpos_ytransform(nir_shader *shader, 535001e04c3fSmrg const nir_lower_wpos_ytransform_options *options); 535101e04c3fSmrgbool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading); 535201e04c3fSmrg 53537ec681f3Smrgbool nir_lower_pntc_ytransform(nir_shader *shader, 53547ec681f3Smrg const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 53557ec681f3Smrg 53567ec681f3Smrgbool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data); 53577ec681f3Smrg 53587e102996Smayabool nir_lower_fb_read(nir_shader *shader); 53597e102996Smaya 536001e04c3fSmrgtypedef struct nir_lower_drawpixels_options { 536101e04c3fSmrg gl_state_index16 texcoord_state_tokens[STATE_LENGTH]; 536201e04c3fSmrg gl_state_index16 scale_state_tokens[STATE_LENGTH]; 536301e04c3fSmrg gl_state_index16 bias_state_tokens[STATE_LENGTH]; 536401e04c3fSmrg unsigned drawpix_sampler; 536501e04c3fSmrg unsigned pixelmap_sampler; 536601e04c3fSmrg bool pixel_maps :1; 536701e04c3fSmrg bool scale_and_bias :1; 536801e04c3fSmrg} nir_lower_drawpixels_options; 536901e04c3fSmrg 537001e04c3fSmrgvoid nir_lower_drawpixels(nir_shader *shader, 537101e04c3fSmrg const nir_lower_drawpixels_options *options); 537201e04c3fSmrg 537301e04c3fSmrgtypedef struct nir_lower_bitmap_options { 537401e04c3fSmrg unsigned sampler; 537501e04c3fSmrg bool swizzle_xxxx; 537601e04c3fSmrg} nir_lower_bitmap_options; 537701e04c3fSmrg 537801e04c3fSmrgvoid nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options); 537901e04c3fSmrg 53807ec681f3Smrgbool nir_lower_atomics_to_ssbo(nir_shader *shader); 53817e102996Smaya 53827e102996Smayatypedef enum { 53837e102996Smaya nir_lower_int_source_mods = 1 << 0, 53847e102996Smaya nir_lower_float_source_mods = 1 << 1, 53857ec681f3Smrg nir_lower_64bit_source_mods = 1 << 2, 53867ec681f3Smrg nir_lower_triop_abs = 1 << 3, 53877ec681f3Smrg nir_lower_all_source_mods = (1 << 4) - 1 53887e102996Smaya} nir_lower_to_source_mods_flags; 53897e102996Smaya 53907e102996Smaya 53917e102996Smayabool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options); 539201e04c3fSmrg 53937ec681f3Smrgtypedef enum { 53947ec681f3Smrg nir_lower_gs_intrinsics_per_stream = 1 << 0, 53957ec681f3Smrg nir_lower_gs_intrinsics_count_primitives = 1 << 1, 53967ec681f3Smrg nir_lower_gs_intrinsics_count_vertices_per_primitive = 1 << 2, 53977ec681f3Smrg nir_lower_gs_intrinsics_overwrite_incomplete = 1 << 3, 53987ec681f3Smrg} nir_lower_gs_intrinsics_flags; 53997ec681f3Smrg 54007ec681f3Smrgbool nir_lower_gs_intrinsics(nir_shader *shader, nir_lower_gs_intrinsics_flags options); 540101e04c3fSmrg 54027ec681f3Smrgtypedef unsigned (*nir_lower_bit_size_callback)(const nir_instr *, void *); 540301e04c3fSmrg 540401e04c3fSmrgbool nir_lower_bit_size(nir_shader *shader, 540501e04c3fSmrg nir_lower_bit_size_callback callback, 540601e04c3fSmrg void *callback_data); 54077ec681f3Smrgbool nir_lower_64bit_phis(nir_shader *shader); 540801e04c3fSmrg 54097e102996Smayanir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode); 54107ec681f3Smrgbool nir_lower_int64(nir_shader *shader); 541101e04c3fSmrg 54127e102996Smayanir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode); 54137e102996Smayabool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64, 54147e102996Smaya nir_lower_doubles_options options); 541501e04c3fSmrgbool nir_lower_pack(nir_shader *shader); 541601e04c3fSmrg 54177ec681f3Smrgbool nir_recompute_io_bases(nir_function_impl *impl, nir_variable_mode modes); 54187ec681f3Smrgbool nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes, 54197ec681f3Smrg uint64_t varying_mask, bool use_16bit_slots); 54207ec681f3Smrgbool nir_force_mediump_io(nir_shader *nir, nir_variable_mode modes, 54217ec681f3Smrg nir_alu_type types); 54227ec681f3Smrgbool nir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes); 54237ec681f3Smrgbool nir_fold_16bit_sampler_conversions(nir_shader *nir, 54247ec681f3Smrg unsigned tex_src_types); 54257ec681f3Smrg 54267ec681f3Smrgtypedef struct { 54277ec681f3Smrg bool legalize_type; /* whether this src should be legalized */ 54287ec681f3Smrg uint8_t bit_size; /* bit_size to enforce */ 54297ec681f3Smrg nir_tex_src_type match_src; /* if bit_size is 0, match bit size of this */ 54307ec681f3Smrg} nir_tex_src_type_constraint, nir_tex_src_type_constraints[nir_num_tex_src_types]; 54317ec681f3Smrg 54327ec681f3Smrgbool nir_legalize_16bit_sampler_srcs(nir_shader *nir, 54337ec681f3Smrg nir_tex_src_type_constraints constraints); 54347ec681f3Smrg 54357ec681f3Smrgbool nir_lower_point_size(nir_shader *shader, float min, float max); 54367ec681f3Smrg 54377ec681f3Smrgvoid nir_lower_texcoord_replace(nir_shader *s, unsigned coord_replace, 54387ec681f3Smrg bool point_coord_is_sysval, bool yinvert); 54397ec681f3Smrg 54407ec681f3Smrgtypedef enum { 54417ec681f3Smrg nir_lower_interpolation_at_sample = (1 << 1), 54427ec681f3Smrg nir_lower_interpolation_at_offset = (1 << 2), 54437ec681f3Smrg nir_lower_interpolation_centroid = (1 << 3), 54447ec681f3Smrg nir_lower_interpolation_pixel = (1 << 4), 54457ec681f3Smrg nir_lower_interpolation_sample = (1 << 5), 54467ec681f3Smrg} nir_lower_interpolation_options; 54477ec681f3Smrg 54487ec681f3Smrgbool nir_lower_interpolation(nir_shader *shader, 54497ec681f3Smrg nir_lower_interpolation_options options); 54507ec681f3Smrg 54517ec681f3Smrgbool nir_lower_discard_or_demote(nir_shader *shader, 54527ec681f3Smrg bool force_correct_quad_ops_after_discard); 54537ec681f3Smrg 54547ec681f3Smrgbool nir_lower_memory_model(nir_shader *shader); 54557ec681f3Smrg 54567ec681f3Smrgbool nir_lower_goto_ifs(nir_shader *shader); 54577ec681f3Smrg 54587ec681f3Smrgbool nir_shader_uses_view_index(nir_shader *shader); 54597ec681f3Smrgbool nir_can_lower_multiview(nir_shader *shader); 54607ec681f3Smrgbool nir_lower_multiview(nir_shader *shader, uint32_t view_mask); 54617ec681f3Smrg 54627ec681f3Smrg 54637ec681f3Smrgbool nir_lower_fp16_casts(nir_shader *shader); 546401e04c3fSmrgbool nir_normalize_cubemap_coords(nir_shader *shader); 546501e04c3fSmrg 54667ec681f3Smrgbool nir_shader_supports_implicit_lod(nir_shader *shader); 54677ec681f3Smrg 546801e04c3fSmrgvoid nir_live_ssa_defs_impl(nir_function_impl *impl); 546901e04c3fSmrg 54707ec681f3Smrgconst BITSET_WORD *nir_get_live_ssa_defs(nir_cursor cursor, void *mem_ctx); 54717ec681f3Smrg 547201e04c3fSmrgvoid nir_loop_analyze_impl(nir_function_impl *impl, 547301e04c3fSmrg nir_variable_mode indirect_mask); 547401e04c3fSmrg 547501e04c3fSmrgbool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); 547601e04c3fSmrg 547701e04c3fSmrgbool nir_repair_ssa_impl(nir_function_impl *impl); 547801e04c3fSmrgbool nir_repair_ssa(nir_shader *shader); 547901e04c3fSmrg 548001e04c3fSmrgvoid nir_convert_loop_to_lcssa(nir_loop *loop); 54817ec681f3Smrgbool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants); 54827ec681f3Smrgvoid nir_divergence_analysis(nir_shader *shader); 54837ec681f3Smrgbool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr); 548401e04c3fSmrg 548501e04c3fSmrg/* If phi_webs_only is true, only convert SSA values involved in phi nodes to 548601e04c3fSmrg * registers. If false, convert all values (even those not involved in a phi 548701e04c3fSmrg * node) to registers. 548801e04c3fSmrg */ 548901e04c3fSmrgbool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only); 549001e04c3fSmrg 549101e04c3fSmrgbool nir_lower_phis_to_regs_block(nir_block *block); 549201e04c3fSmrgbool nir_lower_ssa_defs_to_regs_block(nir_block *block); 549301e04c3fSmrgbool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl); 549401e04c3fSmrg 54957ec681f3Smrgbool nir_lower_samplers(nir_shader *shader); 54967ec681f3Smrgbool nir_lower_ssbo(nir_shader *shader); 54977ec681f3Smrg 54987ec681f3Smrgtypedef struct nir_lower_printf_options { 54997ec681f3Smrg bool treat_doubles_as_floats : 1; 55007ec681f3Smrg unsigned max_buffer_size; 55017ec681f3Smrg} nir_lower_printf_options; 55027ec681f3Smrg 55037ec681f3Smrgbool nir_lower_printf(nir_shader *nir, const nir_lower_printf_options *options); 55047ec681f3Smrg 55057e102996Smaya/* This is here for unit tests. */ 55067e102996Smayabool nir_opt_comparison_pre_impl(nir_function_impl *impl); 55077e102996Smaya 55087e102996Smayabool nir_opt_comparison_pre(nir_shader *shader); 55097e102996Smaya 55107ec681f3Smrgtypedef struct nir_opt_access_options { 55117ec681f3Smrg bool is_vulkan; 55127ec681f3Smrg bool infer_non_readable; 55137ec681f3Smrg} nir_opt_access_options; 55147ec681f3Smrg 55157ec681f3Smrgbool nir_opt_access(nir_shader *shader, const nir_opt_access_options *options); 551601e04c3fSmrgbool nir_opt_algebraic(nir_shader *shader); 551701e04c3fSmrgbool nir_opt_algebraic_before_ffma(nir_shader *shader); 551801e04c3fSmrgbool nir_opt_algebraic_late(nir_shader *shader); 55197ec681f3Smrgbool nir_opt_algebraic_distribute_src_mods(nir_shader *shader); 552001e04c3fSmrgbool nir_opt_constant_folding(nir_shader *shader); 552101e04c3fSmrg 55227ec681f3Smrg/* Try to combine a and b into a. Return true if combination was possible, 55237ec681f3Smrg * which will result in b being removed by the pass. Return false if 55247ec681f3Smrg * combination wasn't possible. 55257ec681f3Smrg */ 55267ec681f3Smrgtypedef bool (*nir_combine_memory_barrier_cb)( 55277ec681f3Smrg nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data); 55287ec681f3Smrg 55297ec681f3Smrgbool nir_opt_combine_memory_barriers(nir_shader *shader, 55307ec681f3Smrg nir_combine_memory_barrier_cb combine_cb, 55317ec681f3Smrg void *data); 55327ec681f3Smrg 55337e102996Smayabool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes); 553401e04c3fSmrg 55357ec681f3Smrgbool nir_copy_prop_impl(nir_function_impl *impl); 553601e04c3fSmrgbool nir_copy_prop(nir_shader *shader); 553701e04c3fSmrg 553801e04c3fSmrgbool nir_opt_copy_prop_vars(nir_shader *shader); 553901e04c3fSmrg 554001e04c3fSmrgbool nir_opt_cse(nir_shader *shader); 554101e04c3fSmrg 554201e04c3fSmrgbool nir_opt_dce(nir_shader *shader); 554301e04c3fSmrg 554401e04c3fSmrgbool nir_opt_dead_cf(nir_shader *shader); 554501e04c3fSmrg 554601e04c3fSmrgbool nir_opt_dead_write_vars(nir_shader *shader); 554701e04c3fSmrg 55487e102996Smayabool nir_opt_deref_impl(nir_function_impl *impl); 55497e102996Smayabool nir_opt_deref(nir_shader *shader); 55507e102996Smaya 555101e04c3fSmrgbool nir_opt_find_array_copies(nir_shader *shader); 555201e04c3fSmrg 55537ec681f3Smrgbool nir_opt_fragdepth(nir_shader *shader); 55547ec681f3Smrg 555501e04c3fSmrgbool nir_opt_gcm(nir_shader *shader, bool value_number); 555601e04c3fSmrg 55577e102996Smayabool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size); 55587e102996Smaya 55597e102996Smayabool nir_opt_if(nir_shader *shader, bool aggressive_last_continue); 556001e04c3fSmrg 556101e04c3fSmrgbool nir_opt_intrinsics(nir_shader *shader); 556201e04c3fSmrg 556301e04c3fSmrgbool nir_opt_large_constants(nir_shader *shader, 556401e04c3fSmrg glsl_type_size_align_func size_align, 556501e04c3fSmrg unsigned threshold); 556601e04c3fSmrg 55677ec681f3Smrgbool nir_opt_loop_unroll(nir_shader *shader); 55687ec681f3Smrg 55697ec681f3Smrgtypedef enum { 55707ec681f3Smrg nir_move_const_undef = (1 << 0), 55717ec681f3Smrg nir_move_load_ubo = (1 << 1), 55727ec681f3Smrg nir_move_load_input = (1 << 2), 55737ec681f3Smrg nir_move_comparisons = (1 << 3), 55747ec681f3Smrg nir_move_copies = (1 << 4), 55757ec681f3Smrg nir_move_load_ssbo = (1 << 5), 55767ec681f3Smrg} nir_move_options; 55777ec681f3Smrg 55787ec681f3Smrgbool nir_can_move_instr(nir_instr *instr, nir_move_options options); 55797ec681f3Smrg 55807ec681f3Smrgbool nir_opt_sink(nir_shader *shader, nir_move_options options); 558101e04c3fSmrg 55827ec681f3Smrgbool nir_opt_move(nir_shader *shader, nir_move_options options); 558301e04c3fSmrg 55847ec681f3Smrgbool nir_opt_offsets(nir_shader *shader); 558501e04c3fSmrg 55867e102996Smayabool nir_opt_peephole_select(nir_shader *shader, unsigned limit, 55877e102996Smaya bool indirect_load_ok, bool expensive_alu_ok); 558801e04c3fSmrg 55897ec681f3Smrgbool nir_opt_rematerialize_compares(nir_shader *shader); 55907ec681f3Smrg 559101e04c3fSmrgbool nir_opt_remove_phis(nir_shader *shader); 55927e102996Smayabool nir_opt_remove_phis_block(nir_block *block); 559301e04c3fSmrg 55947ec681f3Smrgbool nir_opt_phi_precision(nir_shader *shader); 55957ec681f3Smrg 55967ec681f3Smrgbool nir_opt_shrink_vectors(nir_shader *shader, bool shrink_image_store); 559701e04c3fSmrg 559801e04c3fSmrgbool nir_opt_trivial_continues(nir_shader *shader); 559901e04c3fSmrg 560001e04c3fSmrgbool nir_opt_undef(nir_shader *shader); 560101e04c3fSmrg 56027ec681f3Smrgbool nir_lower_undef_to_zero(nir_shader *shader); 56037ec681f3Smrg 56047ec681f3Smrgbool nir_opt_uniform_atomics(nir_shader *shader); 56057ec681f3Smrg 56067ec681f3Smrgtypedef bool (*nir_opt_vectorize_cb)(const nir_instr *instr, void *data); 56077ec681f3Smrg 56087ec681f3Smrgbool nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter, 56097ec681f3Smrg void *data); 56107ec681f3Smrg 561101e04c3fSmrgbool nir_opt_conditional_discard(nir_shader *shader); 56127ec681f3Smrgbool nir_opt_move_discards_to_top(nir_shader *shader); 561301e04c3fSmrg 56147ec681f3Smrgtypedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul, 56157ec681f3Smrg unsigned align_offset, 56167ec681f3Smrg unsigned bit_size, 56177ec681f3Smrg unsigned num_components, 56187ec681f3Smrg nir_intrinsic_instr *low, nir_intrinsic_instr *high, 56197ec681f3Smrg void *data); 56207ec681f3Smrg 56217ec681f3Smrgtypedef struct { 56227ec681f3Smrg nir_should_vectorize_mem_func callback; 56237ec681f3Smrg nir_variable_mode modes; 56247ec681f3Smrg nir_variable_mode robust_modes; 56257ec681f3Smrg void *cb_data; 56267ec681f3Smrg} nir_load_store_vectorize_options; 56277ec681f3Smrg 56287ec681f3Smrgbool nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_options *options); 56297e102996Smaya 563001e04c3fSmrgvoid nir_sweep(nir_shader *shader); 563101e04c3fSmrg 563201e04c3fSmrgvoid nir_remap_dual_slot_attributes(nir_shader *shader, 563301e04c3fSmrg uint64_t *dual_slot_inputs); 563401e04c3fSmrguint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot); 563501e04c3fSmrg 563601e04c3fSmrgnir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val); 563701e04c3fSmrggl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin); 563801e04c3fSmrg 56397ec681f3Smrgstatic inline bool 56407ec681f3Smrgnir_variable_is_in_ubo(const nir_variable *var) 56417ec681f3Smrg{ 56427ec681f3Smrg return (var->data.mode == nir_var_mem_ubo && 56437ec681f3Smrg var->interface_type != NULL); 56447ec681f3Smrg} 56457ec681f3Smrg 56467ec681f3Smrgstatic inline bool 56477ec681f3Smrgnir_variable_is_in_ssbo(const nir_variable *var) 56487ec681f3Smrg{ 56497ec681f3Smrg return (var->data.mode == nir_var_mem_ssbo && 56507ec681f3Smrg var->interface_type != NULL); 56517ec681f3Smrg} 56527ec681f3Smrg 56537ec681f3Smrgstatic inline bool 56547ec681f3Smrgnir_variable_is_in_block(const nir_variable *var) 56557ec681f3Smrg{ 56567ec681f3Smrg return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var); 56577ec681f3Smrg} 56587ec681f3Smrg 56597ec681f3Smrgtypedef struct nir_unsigned_upper_bound_config { 56607ec681f3Smrg unsigned min_subgroup_size; 56617ec681f3Smrg unsigned max_subgroup_size; 56627ec681f3Smrg unsigned max_workgroup_invocations; 56637ec681f3Smrg unsigned max_workgroup_count[3]; 56647ec681f3Smrg unsigned max_workgroup_size[3]; 56657ec681f3Smrg 56667ec681f3Smrg uint32_t vertex_attrib_max[32]; 56677ec681f3Smrg} nir_unsigned_upper_bound_config; 56687ec681f3Smrg 56697ec681f3Smrguint32_t 56707ec681f3Smrgnir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, 56717ec681f3Smrg nir_ssa_scalar scalar, 56727ec681f3Smrg const nir_unsigned_upper_bound_config *config); 56737ec681f3Smrg 56747ec681f3Smrgbool 56757ec681f3Smrgnir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht, 56767ec681f3Smrg nir_ssa_scalar ssa, unsigned const_val, 56777ec681f3Smrg const nir_unsigned_upper_bound_config *config); 56787ec681f3Smrg 56797ec681f3Smrg#include "nir_inline_helpers.h" 56807ec681f3Smrg 568101e04c3fSmrg#ifdef __cplusplus 568201e04c3fSmrg} /* extern "C" */ 568301e04c3fSmrg#endif 568401e04c3fSmrg 568501e04c3fSmrg#endif /* NIR_H */ 5686