1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2014 Connor Abbott 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * Authors: 24b8e80941Smrg * Connor Abbott (cwabbott0@gmail.com) 25b8e80941Smrg * 26b8e80941Smrg */ 27b8e80941Smrg 28b8e80941Smrg#ifndef NIR_H 29b8e80941Smrg#define NIR_H 30b8e80941Smrg 31b8e80941Smrg#include "util/hash_table.h" 32b8e80941Smrg#include "compiler/glsl/list.h" 33b8e80941Smrg#include "GL/gl.h" /* GLenum */ 34b8e80941Smrg#include "util/list.h" 35b8e80941Smrg#include "util/ralloc.h" 36b8e80941Smrg#include "util/set.h" 37b8e80941Smrg#include "util/bitscan.h" 38b8e80941Smrg#include "util/bitset.h" 39b8e80941Smrg#include "util/macros.h" 40b8e80941Smrg#include "compiler/nir_types.h" 41b8e80941Smrg#include "compiler/shader_enums.h" 42b8e80941Smrg#include "compiler/shader_info.h" 43b8e80941Smrg#include <stdio.h> 44b8e80941Smrg 45b8e80941Smrg#ifndef NDEBUG 46b8e80941Smrg#include "util/debug.h" 47b8e80941Smrg#endif /* NDEBUG */ 48b8e80941Smrg 49b8e80941Smrg#include "nir_opcodes.h" 50b8e80941Smrg 51b8e80941Smrg#if defined(_WIN32) && !defined(snprintf) 52b8e80941Smrg#define snprintf _snprintf 53b8e80941Smrg#endif 54b8e80941Smrg 55b8e80941Smrg#ifdef __cplusplus 56b8e80941Smrgextern "C" { 57b8e80941Smrg#endif 58b8e80941Smrg 59b8e80941Smrg#define NIR_FALSE 0u 60b8e80941Smrg#define NIR_TRUE (~0u) 61b8e80941Smrg#define NIR_MAX_VEC_COMPONENTS 4 62b8e80941Smrg#define NIR_MAX_MATRIX_COLUMNS 4 63b8e80941Smrgtypedef uint8_t nir_component_mask_t; 64b8e80941Smrg 65b8e80941Smrg/** Defines a cast function 66b8e80941Smrg * 67b8e80941Smrg * This macro defines a cast function from in_type to out_type where 68b8e80941Smrg * out_type is some structure type that contains a field of type out_type. 69b8e80941Smrg * 70b8e80941Smrg * Note that you have to be a bit careful as the generated cast function 71b8e80941Smrg * destroys constness. 72b8e80941Smrg */ 73b8e80941Smrg#define NIR_DEFINE_CAST(name, in_type, out_type, field, \ 74b8e80941Smrg type_field, type_value) \ 75b8e80941Smrgstatic inline out_type * \ 76b8e80941Smrgname(const in_type *parent) \ 77b8e80941Smrg{ \ 78b8e80941Smrg assert(parent && parent->type_field == type_value); \ 79b8e80941Smrg return exec_node_data(out_type, parent, field); \ 80b8e80941Smrg} 81b8e80941Smrg 82b8e80941Smrgstruct nir_function; 83b8e80941Smrgstruct nir_shader; 84b8e80941Smrgstruct nir_instr; 85b8e80941Smrgstruct nir_builder; 86b8e80941Smrg 87b8e80941Smrg 88b8e80941Smrg/** 89b8e80941Smrg * Description of built-in state associated with a uniform 90b8e80941Smrg * 91b8e80941Smrg * \sa nir_variable::state_slots 92b8e80941Smrg */ 93b8e80941Smrgtypedef struct { 94b8e80941Smrg gl_state_index16 tokens[STATE_LENGTH]; 95b8e80941Smrg int swizzle; 96b8e80941Smrg} nir_state_slot; 97b8e80941Smrg 98b8e80941Smrgtypedef enum { 99b8e80941Smrg nir_var_shader_in = (1 << 0), 100b8e80941Smrg nir_var_shader_out = (1 << 1), 101b8e80941Smrg nir_var_shader_temp = (1 << 2), 102b8e80941Smrg nir_var_function_temp = (1 << 3), 103b8e80941Smrg nir_var_uniform = (1 << 4), 104b8e80941Smrg nir_var_mem_ubo = (1 << 5), 105b8e80941Smrg nir_var_system_value = (1 << 6), 106b8e80941Smrg nir_var_mem_ssbo = (1 << 7), 107b8e80941Smrg nir_var_mem_shared = (1 << 8), 108b8e80941Smrg nir_var_mem_global = (1 << 9), 109b8e80941Smrg nir_var_all = ~0, 110b8e80941Smrg} nir_variable_mode; 111b8e80941Smrg 112b8e80941Smrg/** 113b8e80941Smrg * Rounding modes. 114b8e80941Smrg */ 115b8e80941Smrgtypedef enum { 116b8e80941Smrg nir_rounding_mode_undef = 0, 117b8e80941Smrg nir_rounding_mode_rtne = 1, /* round to nearest even */ 118b8e80941Smrg nir_rounding_mode_ru = 2, /* round up */ 119b8e80941Smrg nir_rounding_mode_rd = 3, /* round down */ 120b8e80941Smrg nir_rounding_mode_rtz = 4, /* round towards zero */ 121b8e80941Smrg} nir_rounding_mode; 122b8e80941Smrg 123b8e80941Smrgtypedef union { 124b8e80941Smrg bool b; 125b8e80941Smrg float f32; 126b8e80941Smrg double f64; 127b8e80941Smrg int8_t i8; 128b8e80941Smrg uint8_t u8; 129b8e80941Smrg int16_t i16; 130b8e80941Smrg uint16_t u16; 131b8e80941Smrg int32_t i32; 132b8e80941Smrg uint32_t u32; 133b8e80941Smrg int64_t i64; 134b8e80941Smrg uint64_t u64; 135b8e80941Smrg} nir_const_value; 136b8e80941Smrg 137b8e80941Smrg#define nir_const_value_to_array(arr, c, components, m) \ 138b8e80941Smrg{ \ 139b8e80941Smrg for (unsigned i = 0; i < components; ++i) \ 140b8e80941Smrg arr[i] = c[i].m; \ 141b8e80941Smrg} while (false) 142b8e80941Smrg 143b8e80941Smrgstatic inline nir_const_value 144b8e80941Smrgnir_const_value_for_raw_uint(uint64_t x, unsigned bit_size) 145b8e80941Smrg{ 146b8e80941Smrg nir_const_value v; 147b8e80941Smrg memset(&v, 0, sizeof(v)); 148b8e80941Smrg 149b8e80941Smrg switch (bit_size) { 150b8e80941Smrg case 1: v.b = x; break; 151b8e80941Smrg case 8: v.u8 = x; break; 152b8e80941Smrg case 16: v.u16 = x; break; 153b8e80941Smrg case 32: v.u32 = x; break; 154b8e80941Smrg case 64: v.u64 = x; break; 155b8e80941Smrg default: 156b8e80941Smrg unreachable("Invalid bit size"); 157b8e80941Smrg } 158b8e80941Smrg 159b8e80941Smrg return v; 160b8e80941Smrg} 161b8e80941Smrg 162b8e80941Smrgstatic inline nir_const_value 163b8e80941Smrgnir_const_value_for_int(int64_t i, unsigned bit_size) 164b8e80941Smrg{ 165b8e80941Smrg nir_const_value v; 166b8e80941Smrg memset(&v, 0, sizeof(v)); 167b8e80941Smrg 168b8e80941Smrg assert(bit_size <= 64); 169b8e80941Smrg if (bit_size < 64) { 170b8e80941Smrg assert(i >= (-(1ll << (bit_size - 1)))); 171b8e80941Smrg assert(i < (1ll << (bit_size - 1))); 172b8e80941Smrg } 173b8e80941Smrg 174b8e80941Smrg return nir_const_value_for_raw_uint(i, bit_size); 175b8e80941Smrg} 176b8e80941Smrg 177b8e80941Smrgstatic inline nir_const_value 178b8e80941Smrgnir_const_value_for_uint(uint64_t u, unsigned bit_size) 179b8e80941Smrg{ 180b8e80941Smrg nir_const_value v; 181b8e80941Smrg memset(&v, 0, sizeof(v)); 182b8e80941Smrg 183b8e80941Smrg assert(bit_size <= 64); 184b8e80941Smrg if (bit_size < 64) 185b8e80941Smrg assert(u < (1ull << bit_size)); 186b8e80941Smrg 187b8e80941Smrg return nir_const_value_for_raw_uint(u, bit_size); 188b8e80941Smrg} 189b8e80941Smrg 190b8e80941Smrgstatic inline nir_const_value 191b8e80941Smrgnir_const_value_for_bool(bool b, unsigned bit_size) 192b8e80941Smrg{ 193b8e80941Smrg /* Booleans use a 0/-1 convention */ 194b8e80941Smrg return nir_const_value_for_int(-(int)b, bit_size); 195b8e80941Smrg} 196b8e80941Smrg 197b8e80941Smrg/* This one isn't inline because it requires half-float conversion */ 198b8e80941Smrgnir_const_value nir_const_value_for_float(double b, unsigned bit_size); 199b8e80941Smrg 200b8e80941Smrgstatic inline int64_t 201b8e80941Smrgnir_const_value_as_int(nir_const_value value, unsigned bit_size) 202b8e80941Smrg{ 203b8e80941Smrg switch (bit_size) { 204b8e80941Smrg /* int1_t uses 0/-1 convention */ 205b8e80941Smrg case 1: return -(int)value.b; 206b8e80941Smrg case 8: return value.i8; 207b8e80941Smrg case 16: return value.i16; 208b8e80941Smrg case 32: return value.i32; 209b8e80941Smrg case 64: return value.i64; 210b8e80941Smrg default: 211b8e80941Smrg unreachable("Invalid bit size"); 212b8e80941Smrg } 213b8e80941Smrg} 214b8e80941Smrg 215b8e80941Smrgstatic inline int64_t 216b8e80941Smrgnir_const_value_as_uint(nir_const_value value, unsigned bit_size) 217b8e80941Smrg{ 218b8e80941Smrg switch (bit_size) { 219b8e80941Smrg case 1: return value.b; 220b8e80941Smrg case 8: return value.u8; 221b8e80941Smrg case 16: return value.u16; 222b8e80941Smrg case 32: return value.u32; 223b8e80941Smrg case 64: return value.u64; 224b8e80941Smrg default: 225b8e80941Smrg unreachable("Invalid bit size"); 226b8e80941Smrg } 227b8e80941Smrg} 228b8e80941Smrg 229b8e80941Smrgstatic inline bool 230b8e80941Smrgnir_const_value_as_bool(nir_const_value value, unsigned bit_size) 231b8e80941Smrg{ 232b8e80941Smrg int64_t i = nir_const_value_as_int(value, bit_size); 233b8e80941Smrg 234b8e80941Smrg /* Booleans of any size use 0/-1 convention */ 235b8e80941Smrg assert(i == 0 || i == -1); 236b8e80941Smrg 237b8e80941Smrg return i; 238b8e80941Smrg} 239b8e80941Smrg 240b8e80941Smrg/* This one isn't inline because it requires half-float conversion */ 241b8e80941Smrgdouble nir_const_value_as_float(nir_const_value value, unsigned bit_size); 242b8e80941Smrg 243b8e80941Smrgtypedef struct nir_constant { 244b8e80941Smrg /** 245b8e80941Smrg * Value of the constant. 246b8e80941Smrg * 247b8e80941Smrg * The field used to back the values supplied by the constant is determined 248b8e80941Smrg * by the type associated with the \c nir_variable. Constants may be 249b8e80941Smrg * scalars, vectors, or matrices. 250b8e80941Smrg */ 251b8e80941Smrg nir_const_value values[NIR_MAX_MATRIX_COLUMNS][NIR_MAX_VEC_COMPONENTS]; 252b8e80941Smrg 253b8e80941Smrg /* we could get this from the var->type but makes clone *much* easier to 254b8e80941Smrg * not have to care about the type. 255b8e80941Smrg */ 256b8e80941Smrg unsigned num_elements; 257b8e80941Smrg 258b8e80941Smrg /* Array elements / Structure Fields */ 259b8e80941Smrg struct nir_constant **elements; 260b8e80941Smrg} nir_constant; 261b8e80941Smrg 262b8e80941Smrg/** 263b8e80941Smrg * \brief Layout qualifiers for gl_FragDepth. 264b8e80941Smrg * 265b8e80941Smrg * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared 266b8e80941Smrg * with a layout qualifier. 267b8e80941Smrg */ 268b8e80941Smrgtypedef enum { 269b8e80941Smrg nir_depth_layout_none, /**< No depth layout is specified. */ 270b8e80941Smrg nir_depth_layout_any, 271b8e80941Smrg nir_depth_layout_greater, 272b8e80941Smrg nir_depth_layout_less, 273b8e80941Smrg nir_depth_layout_unchanged 274b8e80941Smrg} nir_depth_layout; 275b8e80941Smrg 276b8e80941Smrg/** 277b8e80941Smrg * Enum keeping track of how a variable was declared. 278b8e80941Smrg */ 279b8e80941Smrgtypedef enum { 280b8e80941Smrg /** 281b8e80941Smrg * Normal declaration. 282b8e80941Smrg */ 283b8e80941Smrg nir_var_declared_normally = 0, 284b8e80941Smrg 285b8e80941Smrg /** 286b8e80941Smrg * Variable is implicitly generated by the compiler and should not be 287b8e80941Smrg * visible via the API. 288b8e80941Smrg */ 289b8e80941Smrg nir_var_hidden, 290b8e80941Smrg} nir_var_declaration_type; 291b8e80941Smrg 292b8e80941Smrg/** 293b8e80941Smrg * Either a uniform, global variable, shader input, or shader output. Based on 294b8e80941Smrg * ir_variable - it should be easy to translate between the two. 295b8e80941Smrg */ 296b8e80941Smrg 297b8e80941Smrgtypedef struct nir_variable { 298b8e80941Smrg struct exec_node node; 299b8e80941Smrg 300b8e80941Smrg /** 301b8e80941Smrg * Declared type of the variable 302b8e80941Smrg */ 303b8e80941Smrg const struct glsl_type *type; 304b8e80941Smrg 305b8e80941Smrg /** 306b8e80941Smrg * Declared name of the variable 307b8e80941Smrg */ 308b8e80941Smrg char *name; 309b8e80941Smrg 310b8e80941Smrg struct nir_variable_data { 311b8e80941Smrg /** 312b8e80941Smrg * Storage class of the variable. 313b8e80941Smrg * 314b8e80941Smrg * \sa nir_variable_mode 315b8e80941Smrg */ 316b8e80941Smrg nir_variable_mode mode; 317b8e80941Smrg 318b8e80941Smrg /** 319b8e80941Smrg * Is the variable read-only? 320b8e80941Smrg * 321b8e80941Smrg * This is set for variables declared as \c const, shader inputs, 322b8e80941Smrg * and uniforms. 323b8e80941Smrg */ 324b8e80941Smrg unsigned read_only:1; 325b8e80941Smrg unsigned centroid:1; 326b8e80941Smrg unsigned sample:1; 327b8e80941Smrg unsigned patch:1; 328b8e80941Smrg unsigned invariant:1; 329b8e80941Smrg 330b8e80941Smrg /** 331b8e80941Smrg * When separate shader programs are enabled, only input/outputs between 332b8e80941Smrg * the stages of a multi-stage separate program can be safely removed 333b8e80941Smrg * from the shader interface. Other input/outputs must remains active. 334b8e80941Smrg * 335b8e80941Smrg * This is also used to make sure xfb varyings that are unused by the 336b8e80941Smrg * fragment shader are not removed. 337b8e80941Smrg */ 338b8e80941Smrg unsigned always_active_io:1; 339b8e80941Smrg 340b8e80941Smrg /** 341b8e80941Smrg * Interpolation mode for shader inputs / outputs 342b8e80941Smrg * 343b8e80941Smrg * \sa glsl_interp_mode 344b8e80941Smrg */ 345b8e80941Smrg unsigned interpolation:2; 346b8e80941Smrg 347b8e80941Smrg /** 348b8e80941Smrg * If non-zero, then this variable may be packed along with other variables 349b8e80941Smrg * into a single varying slot, so this offset should be applied when 350b8e80941Smrg * accessing components. For example, an offset of 1 means that the x 351b8e80941Smrg * component of this variable is actually stored in component y of the 352b8e80941Smrg * location specified by \c location. 353b8e80941Smrg */ 354b8e80941Smrg unsigned location_frac:2; 355b8e80941Smrg 356b8e80941Smrg /** 357b8e80941Smrg * If true, this variable represents an array of scalars that should 358b8e80941Smrg * be tightly packed. In other words, consecutive array elements 359b8e80941Smrg * should be stored one component apart, rather than one slot apart. 360b8e80941Smrg */ 361b8e80941Smrg unsigned compact:1; 362b8e80941Smrg 363b8e80941Smrg /** 364b8e80941Smrg * Whether this is a fragment shader output implicitly initialized with 365b8e80941Smrg * the previous contents of the specified render target at the 366b8e80941Smrg * framebuffer location corresponding to this shader invocation. 367b8e80941Smrg */ 368b8e80941Smrg unsigned fb_fetch_output:1; 369b8e80941Smrg 370b8e80941Smrg /** 371b8e80941Smrg * Non-zero if this variable is considered bindless as defined by 372b8e80941Smrg * ARB_bindless_texture. 373b8e80941Smrg */ 374b8e80941Smrg unsigned bindless:1; 375b8e80941Smrg 376b8e80941Smrg /** 377b8e80941Smrg * Was an explicit binding set in the shader? 378b8e80941Smrg */ 379b8e80941Smrg unsigned explicit_binding:1; 380b8e80941Smrg 381b8e80941Smrg /** 382b8e80941Smrg * Was a transfer feedback buffer set in the shader? 383b8e80941Smrg */ 384b8e80941Smrg unsigned explicit_xfb_buffer:1; 385b8e80941Smrg 386b8e80941Smrg /** 387b8e80941Smrg * Was a transfer feedback stride set in the shader? 388b8e80941Smrg */ 389b8e80941Smrg unsigned explicit_xfb_stride:1; 390b8e80941Smrg 391b8e80941Smrg /** 392b8e80941Smrg * Was an explicit offset set in the shader? 393b8e80941Smrg */ 394b8e80941Smrg unsigned explicit_offset:1; 395b8e80941Smrg 396b8e80941Smrg /** 397b8e80941Smrg * \brief Layout qualifier for gl_FragDepth. 398b8e80941Smrg * 399b8e80941Smrg * This is not equal to \c ir_depth_layout_none if and only if this 400b8e80941Smrg * variable is \c gl_FragDepth and a layout qualifier is specified. 401b8e80941Smrg */ 402b8e80941Smrg nir_depth_layout depth_layout; 403b8e80941Smrg 404b8e80941Smrg /** 405b8e80941Smrg * Storage location of the base of this variable 406b8e80941Smrg * 407b8e80941Smrg * The precise meaning of this field depends on the nature of the variable. 408b8e80941Smrg * 409b8e80941Smrg * - Vertex shader input: one of the values from \c gl_vert_attrib. 410b8e80941Smrg * - Vertex shader output: one of the values from \c gl_varying_slot. 411b8e80941Smrg * - Geometry shader input: one of the values from \c gl_varying_slot. 412b8e80941Smrg * - Geometry shader output: one of the values from \c gl_varying_slot. 413b8e80941Smrg * - Fragment shader input: one of the values from \c gl_varying_slot. 414b8e80941Smrg * - Fragment shader output: one of the values from \c gl_frag_result. 415b8e80941Smrg * - Uniforms: Per-stage uniform slot number for default uniform block. 416b8e80941Smrg * - Uniforms: Index within the uniform block definition for UBO members. 417b8e80941Smrg * - Non-UBO Uniforms: uniform slot number. 418b8e80941Smrg * - Other: This field is not currently used. 419b8e80941Smrg * 420b8e80941Smrg * If the variable is a uniform, shader input, or shader output, and the 421b8e80941Smrg * slot has not been assigned, the value will be -1. 422b8e80941Smrg */ 423b8e80941Smrg int location; 424b8e80941Smrg 425b8e80941Smrg /** 426b8e80941Smrg * The actual location of the variable in the IR. Only valid for inputs 427b8e80941Smrg * and outputs. 428b8e80941Smrg */ 429b8e80941Smrg unsigned int driver_location; 430b8e80941Smrg 431b8e80941Smrg /** 432b8e80941Smrg * Vertex stream output identifier. 433b8e80941Smrg * 434b8e80941Smrg * For packed outputs, bit 31 is set and bits [2*i+1,2*i] indicate the 435b8e80941Smrg * stream of the i-th component. 436b8e80941Smrg */ 437b8e80941Smrg unsigned stream; 438b8e80941Smrg 439b8e80941Smrg /** 440b8e80941Smrg * output index for dual source blending. 441b8e80941Smrg */ 442b8e80941Smrg int index; 443b8e80941Smrg 444b8e80941Smrg /** 445b8e80941Smrg * Descriptor set binding for sampler or UBO. 446b8e80941Smrg */ 447b8e80941Smrg int descriptor_set; 448b8e80941Smrg 449b8e80941Smrg /** 450b8e80941Smrg * Initial binding point for a sampler or UBO. 451b8e80941Smrg * 452b8e80941Smrg * For array types, this represents the binding point for the first element. 453b8e80941Smrg */ 454b8e80941Smrg int binding; 455b8e80941Smrg 456b8e80941Smrg /** 457b8e80941Smrg * Location an atomic counter or transform feedback is stored at. 458b8e80941Smrg */ 459b8e80941Smrg unsigned offset; 460b8e80941Smrg 461b8e80941Smrg /** 462b8e80941Smrg * Transform feedback buffer. 463b8e80941Smrg */ 464b8e80941Smrg unsigned xfb_buffer; 465b8e80941Smrg 466b8e80941Smrg /** 467b8e80941Smrg * Transform feedback stride. 468b8e80941Smrg */ 469b8e80941Smrg unsigned xfb_stride; 470b8e80941Smrg 471b8e80941Smrg /** 472b8e80941Smrg * How the variable was declared. See nir_var_declaration_type. 473b8e80941Smrg * 474b8e80941Smrg * This is used to detect variables generated by the compiler, so should 475b8e80941Smrg * not be visible via the API. 476b8e80941Smrg */ 477b8e80941Smrg unsigned how_declared:2; 478b8e80941Smrg 479b8e80941Smrg /** 480b8e80941Smrg * ARB_shader_image_load_store qualifiers. 481b8e80941Smrg */ 482b8e80941Smrg struct { 483b8e80941Smrg enum gl_access_qualifier access; 484b8e80941Smrg 485b8e80941Smrg /** Image internal format if specified explicitly, otherwise GL_NONE. */ 486b8e80941Smrg GLenum format; 487b8e80941Smrg } image; 488b8e80941Smrg } data; 489b8e80941Smrg 490b8e80941Smrg /** 491b8e80941Smrg * Built-in state that backs this uniform 492b8e80941Smrg * 493b8e80941Smrg * Once set at variable creation, \c state_slots must remain invariant. 494b8e80941Smrg * This is because, ideally, this array would be shared by all clones of 495b8e80941Smrg * this variable in the IR tree. In other words, we'd really like for it 496b8e80941Smrg * to be a fly-weight. 497b8e80941Smrg * 498b8e80941Smrg * If the variable is not a uniform, \c num_state_slots will be zero and 499b8e80941Smrg * \c state_slots will be \c NULL. 500b8e80941Smrg */ 501b8e80941Smrg /*@{*/ 502b8e80941Smrg unsigned num_state_slots; /**< Number of state slots used */ 503b8e80941Smrg nir_state_slot *state_slots; /**< State descriptors. */ 504b8e80941Smrg /*@}*/ 505b8e80941Smrg 506b8e80941Smrg /** 507b8e80941Smrg * Constant expression assigned in the initializer of the variable 508b8e80941Smrg * 509b8e80941Smrg * This field should only be used temporarily by creators of NIR shaders 510b8e80941Smrg * and then lower_constant_initializers can be used to get rid of them. 511b8e80941Smrg * Most of the rest of NIR ignores this field or asserts that it's NULL. 512b8e80941Smrg */ 513b8e80941Smrg nir_constant *constant_initializer; 514b8e80941Smrg 515b8e80941Smrg /** 516b8e80941Smrg * For variables that are in an interface block or are an instance of an 517b8e80941Smrg * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. 518b8e80941Smrg * 519b8e80941Smrg * \sa ir_variable::location 520b8e80941Smrg */ 521b8e80941Smrg const struct glsl_type *interface_type; 522b8e80941Smrg 523b8e80941Smrg /** 524b8e80941Smrg * Description of per-member data for per-member struct variables 525b8e80941Smrg * 526b8e80941Smrg * This is used for variables which are actually an amalgamation of 527b8e80941Smrg * multiple entities such as a struct of built-in values or a struct of 528b8e80941Smrg * inputs each with their own layout specifier. This is only allowed on 529b8e80941Smrg * variables with a struct or array of array of struct type. 530b8e80941Smrg */ 531b8e80941Smrg unsigned num_members; 532b8e80941Smrg struct nir_variable_data *members; 533b8e80941Smrg} nir_variable; 534b8e80941Smrg 535b8e80941Smrg#define nir_foreach_variable(var, var_list) \ 536b8e80941Smrg foreach_list_typed(nir_variable, var, node, var_list) 537b8e80941Smrg 538b8e80941Smrg#define nir_foreach_variable_safe(var, var_list) \ 539b8e80941Smrg foreach_list_typed_safe(nir_variable, var, node, var_list) 540b8e80941Smrg 541b8e80941Smrgstatic inline bool 542b8e80941Smrgnir_variable_is_global(const nir_variable *var) 543b8e80941Smrg{ 544b8e80941Smrg return var->data.mode != nir_var_function_temp; 545b8e80941Smrg} 546b8e80941Smrg 547b8e80941Smrgtypedef struct nir_register { 548b8e80941Smrg struct exec_node node; 549b8e80941Smrg 550b8e80941Smrg unsigned num_components; /** < number of vector components */ 551b8e80941Smrg unsigned num_array_elems; /** < size of array (0 for no array) */ 552b8e80941Smrg 553b8e80941Smrg /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 554b8e80941Smrg uint8_t bit_size; 555b8e80941Smrg 556b8e80941Smrg /** generic register index. */ 557b8e80941Smrg unsigned index; 558b8e80941Smrg 559b8e80941Smrg /** only for debug purposes, can be NULL */ 560b8e80941Smrg const char *name; 561b8e80941Smrg 562b8e80941Smrg /** set of nir_srcs where this register is used (read from) */ 563b8e80941Smrg struct list_head uses; 564b8e80941Smrg 565b8e80941Smrg /** set of nir_dests where this register is defined (written to) */ 566b8e80941Smrg struct list_head defs; 567b8e80941Smrg 568b8e80941Smrg /** set of nir_ifs where this register is used as a condition */ 569b8e80941Smrg struct list_head if_uses; 570b8e80941Smrg} nir_register; 571b8e80941Smrg 572b8e80941Smrg#define nir_foreach_register(reg, reg_list) \ 573b8e80941Smrg foreach_list_typed(nir_register, reg, node, reg_list) 574b8e80941Smrg#define nir_foreach_register_safe(reg, reg_list) \ 575b8e80941Smrg foreach_list_typed_safe(nir_register, reg, node, reg_list) 576b8e80941Smrg 577b8e80941Smrgtypedef enum PACKED { 578b8e80941Smrg nir_instr_type_alu, 579b8e80941Smrg nir_instr_type_deref, 580b8e80941Smrg nir_instr_type_call, 581b8e80941Smrg nir_instr_type_tex, 582b8e80941Smrg nir_instr_type_intrinsic, 583b8e80941Smrg nir_instr_type_load_const, 584b8e80941Smrg nir_instr_type_jump, 585b8e80941Smrg nir_instr_type_ssa_undef, 586b8e80941Smrg nir_instr_type_phi, 587b8e80941Smrg nir_instr_type_parallel_copy, 588b8e80941Smrg} nir_instr_type; 589b8e80941Smrg 590b8e80941Smrgtypedef struct nir_instr { 591b8e80941Smrg struct exec_node node; 592b8e80941Smrg struct nir_block *block; 593b8e80941Smrg nir_instr_type type; 594b8e80941Smrg 595b8e80941Smrg /* A temporary for optimization and analysis passes to use for storing 596b8e80941Smrg * flags. For instance, DCE uses this to store the "dead/live" info. 597b8e80941Smrg */ 598b8e80941Smrg uint8_t pass_flags; 599b8e80941Smrg 600b8e80941Smrg /** generic instruction index. */ 601b8e80941Smrg unsigned index; 602b8e80941Smrg} nir_instr; 603b8e80941Smrg 604b8e80941Smrgstatic inline nir_instr * 605b8e80941Smrgnir_instr_next(nir_instr *instr) 606b8e80941Smrg{ 607b8e80941Smrg struct exec_node *next = exec_node_get_next(&instr->node); 608b8e80941Smrg if (exec_node_is_tail_sentinel(next)) 609b8e80941Smrg return NULL; 610b8e80941Smrg else 611b8e80941Smrg return exec_node_data(nir_instr, next, node); 612b8e80941Smrg} 613b8e80941Smrg 614b8e80941Smrgstatic inline nir_instr * 615b8e80941Smrgnir_instr_prev(nir_instr *instr) 616b8e80941Smrg{ 617b8e80941Smrg struct exec_node *prev = exec_node_get_prev(&instr->node); 618b8e80941Smrg if (exec_node_is_head_sentinel(prev)) 619b8e80941Smrg return NULL; 620b8e80941Smrg else 621b8e80941Smrg return exec_node_data(nir_instr, prev, node); 622b8e80941Smrg} 623b8e80941Smrg 624b8e80941Smrgstatic inline bool 625b8e80941Smrgnir_instr_is_first(const nir_instr *instr) 626b8e80941Smrg{ 627b8e80941Smrg return exec_node_is_head_sentinel(exec_node_get_prev_const(&instr->node)); 628b8e80941Smrg} 629b8e80941Smrg 630b8e80941Smrgstatic inline bool 631b8e80941Smrgnir_instr_is_last(const nir_instr *instr) 632b8e80941Smrg{ 633b8e80941Smrg return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node)); 634b8e80941Smrg} 635b8e80941Smrg 636b8e80941Smrgtypedef struct nir_ssa_def { 637b8e80941Smrg /** for debugging only, can be NULL */ 638b8e80941Smrg const char* name; 639b8e80941Smrg 640b8e80941Smrg /** generic SSA definition index. */ 641b8e80941Smrg unsigned index; 642b8e80941Smrg 643b8e80941Smrg /** Index into the live_in and live_out bitfields */ 644b8e80941Smrg unsigned live_index; 645b8e80941Smrg 646b8e80941Smrg /** Instruction which produces this SSA value. */ 647b8e80941Smrg nir_instr *parent_instr; 648b8e80941Smrg 649b8e80941Smrg /** set of nir_instrs where this register is used (read from) */ 650b8e80941Smrg struct list_head uses; 651b8e80941Smrg 652b8e80941Smrg /** set of nir_ifs where this register is used as a condition */ 653b8e80941Smrg struct list_head if_uses; 654b8e80941Smrg 655b8e80941Smrg uint8_t num_components; 656b8e80941Smrg 657b8e80941Smrg /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 658b8e80941Smrg uint8_t bit_size; 659b8e80941Smrg} nir_ssa_def; 660b8e80941Smrg 661b8e80941Smrgstruct nir_src; 662b8e80941Smrg 663b8e80941Smrgtypedef struct { 664b8e80941Smrg nir_register *reg; 665b8e80941Smrg struct nir_src *indirect; /** < NULL for no indirect offset */ 666b8e80941Smrg unsigned base_offset; 667b8e80941Smrg 668b8e80941Smrg /* TODO use-def chain goes here */ 669b8e80941Smrg} nir_reg_src; 670b8e80941Smrg 671b8e80941Smrgtypedef struct { 672b8e80941Smrg nir_instr *parent_instr; 673b8e80941Smrg struct list_head def_link; 674b8e80941Smrg 675b8e80941Smrg nir_register *reg; 676b8e80941Smrg struct nir_src *indirect; /** < NULL for no indirect offset */ 677b8e80941Smrg unsigned base_offset; 678b8e80941Smrg 679b8e80941Smrg /* TODO def-use chain goes here */ 680b8e80941Smrg} nir_reg_dest; 681b8e80941Smrg 682b8e80941Smrgstruct nir_if; 683b8e80941Smrg 684b8e80941Smrgtypedef struct nir_src { 685b8e80941Smrg union { 686b8e80941Smrg /** Instruction that consumes this value as a source. */ 687b8e80941Smrg nir_instr *parent_instr; 688b8e80941Smrg struct nir_if *parent_if; 689b8e80941Smrg }; 690b8e80941Smrg 691b8e80941Smrg struct list_head use_link; 692b8e80941Smrg 693b8e80941Smrg union { 694b8e80941Smrg nir_reg_src reg; 695b8e80941Smrg nir_ssa_def *ssa; 696b8e80941Smrg }; 697b8e80941Smrg 698b8e80941Smrg bool is_ssa; 699b8e80941Smrg} nir_src; 700b8e80941Smrg 701b8e80941Smrgstatic inline nir_src 702b8e80941Smrgnir_src_init(void) 703b8e80941Smrg{ 704b8e80941Smrg nir_src src = { { NULL } }; 705b8e80941Smrg return src; 706b8e80941Smrg} 707b8e80941Smrg 708b8e80941Smrg#define NIR_SRC_INIT nir_src_init() 709b8e80941Smrg 710b8e80941Smrg#define nir_foreach_use(src, reg_or_ssa_def) \ 711b8e80941Smrg list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 712b8e80941Smrg 713b8e80941Smrg#define nir_foreach_use_safe(src, reg_or_ssa_def) \ 714b8e80941Smrg list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 715b8e80941Smrg 716b8e80941Smrg#define nir_foreach_if_use(src, reg_or_ssa_def) \ 717b8e80941Smrg list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 718b8e80941Smrg 719b8e80941Smrg#define nir_foreach_if_use_safe(src, reg_or_ssa_def) \ 720b8e80941Smrg list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 721b8e80941Smrg 722b8e80941Smrgtypedef struct { 723b8e80941Smrg union { 724b8e80941Smrg nir_reg_dest reg; 725b8e80941Smrg nir_ssa_def ssa; 726b8e80941Smrg }; 727b8e80941Smrg 728b8e80941Smrg bool is_ssa; 729b8e80941Smrg} nir_dest; 730b8e80941Smrg 731b8e80941Smrgstatic inline nir_dest 732b8e80941Smrgnir_dest_init(void) 733b8e80941Smrg{ 734b8e80941Smrg nir_dest dest = { { { NULL } } }; 735b8e80941Smrg return dest; 736b8e80941Smrg} 737b8e80941Smrg 738b8e80941Smrg#define NIR_DEST_INIT nir_dest_init() 739b8e80941Smrg 740b8e80941Smrg#define nir_foreach_def(dest, reg) \ 741b8e80941Smrg list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) 742b8e80941Smrg 743b8e80941Smrg#define nir_foreach_def_safe(dest, reg) \ 744b8e80941Smrg list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link) 745b8e80941Smrg 746b8e80941Smrgstatic inline nir_src 747b8e80941Smrgnir_src_for_ssa(nir_ssa_def *def) 748b8e80941Smrg{ 749b8e80941Smrg nir_src src = NIR_SRC_INIT; 750b8e80941Smrg 751b8e80941Smrg src.is_ssa = true; 752b8e80941Smrg src.ssa = def; 753b8e80941Smrg 754b8e80941Smrg return src; 755b8e80941Smrg} 756b8e80941Smrg 757b8e80941Smrgstatic inline nir_src 758b8e80941Smrgnir_src_for_reg(nir_register *reg) 759b8e80941Smrg{ 760b8e80941Smrg nir_src src = NIR_SRC_INIT; 761b8e80941Smrg 762b8e80941Smrg src.is_ssa = false; 763b8e80941Smrg src.reg.reg = reg; 764b8e80941Smrg src.reg.indirect = NULL; 765b8e80941Smrg src.reg.base_offset = 0; 766b8e80941Smrg 767b8e80941Smrg return src; 768b8e80941Smrg} 769b8e80941Smrg 770b8e80941Smrgstatic inline nir_dest 771b8e80941Smrgnir_dest_for_reg(nir_register *reg) 772b8e80941Smrg{ 773b8e80941Smrg nir_dest dest = NIR_DEST_INIT; 774b8e80941Smrg 775b8e80941Smrg dest.reg.reg = reg; 776b8e80941Smrg 777b8e80941Smrg return dest; 778b8e80941Smrg} 779b8e80941Smrg 780b8e80941Smrgstatic inline unsigned 781b8e80941Smrgnir_src_bit_size(nir_src src) 782b8e80941Smrg{ 783b8e80941Smrg return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size; 784b8e80941Smrg} 785b8e80941Smrg 786b8e80941Smrgstatic inline unsigned 787b8e80941Smrgnir_src_num_components(nir_src src) 788b8e80941Smrg{ 789b8e80941Smrg return src.is_ssa ? src.ssa->num_components : src.reg.reg->num_components; 790b8e80941Smrg} 791b8e80941Smrg 792b8e80941Smrgstatic inline bool 793b8e80941Smrgnir_src_is_const(nir_src src) 794b8e80941Smrg{ 795b8e80941Smrg return src.is_ssa && 796b8e80941Smrg src.ssa->parent_instr->type == nir_instr_type_load_const; 797b8e80941Smrg} 798b8e80941Smrg 799b8e80941Smrgint64_t nir_src_as_int(nir_src src); 800b8e80941Smrguint64_t nir_src_as_uint(nir_src src); 801b8e80941Smrgbool nir_src_as_bool(nir_src src); 802b8e80941Smrgdouble nir_src_as_float(nir_src src); 803b8e80941Smrgint64_t nir_src_comp_as_int(nir_src src, unsigned component); 804b8e80941Smrguint64_t nir_src_comp_as_uint(nir_src src, unsigned component); 805b8e80941Smrgbool nir_src_comp_as_bool(nir_src src, unsigned component); 806b8e80941Smrgdouble nir_src_comp_as_float(nir_src src, unsigned component); 807b8e80941Smrg 808b8e80941Smrgstatic inline unsigned 809b8e80941Smrgnir_dest_bit_size(nir_dest dest) 810b8e80941Smrg{ 811b8e80941Smrg return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size; 812b8e80941Smrg} 813b8e80941Smrg 814b8e80941Smrgstatic inline unsigned 815b8e80941Smrgnir_dest_num_components(nir_dest dest) 816b8e80941Smrg{ 817b8e80941Smrg return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components; 818b8e80941Smrg} 819b8e80941Smrg 820b8e80941Smrgvoid nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if); 821b8e80941Smrgvoid nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr); 822b8e80941Smrg 823b8e80941Smrgtypedef struct { 824b8e80941Smrg nir_src src; 825b8e80941Smrg 826b8e80941Smrg /** 827b8e80941Smrg * \name input modifiers 828b8e80941Smrg */ 829b8e80941Smrg /*@{*/ 830b8e80941Smrg /** 831b8e80941Smrg * For inputs interpreted as floating point, flips the sign bit. For 832b8e80941Smrg * inputs interpreted as integers, performs the two's complement negation. 833b8e80941Smrg */ 834b8e80941Smrg bool negate; 835b8e80941Smrg 836b8e80941Smrg /** 837b8e80941Smrg * Clears the sign bit for floating point values, and computes the integer 838b8e80941Smrg * absolute value for integers. Note that the negate modifier acts after 839b8e80941Smrg * the absolute value modifier, therefore if both are set then all inputs 840b8e80941Smrg * will become negative. 841b8e80941Smrg */ 842b8e80941Smrg bool abs; 843b8e80941Smrg /*@}*/ 844b8e80941Smrg 845b8e80941Smrg /** 846b8e80941Smrg * For each input component, says which component of the register it is 847b8e80941Smrg * chosen from. Note that which elements of the swizzle are used and which 848b8e80941Smrg * are ignored are based on the write mask for most opcodes - for example, 849b8e80941Smrg * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and 850b8e80941Smrg * a swizzle of {2, x, 1, 0} where x means "don't care." 851b8e80941Smrg */ 852b8e80941Smrg uint8_t swizzle[NIR_MAX_VEC_COMPONENTS]; 853b8e80941Smrg} nir_alu_src; 854b8e80941Smrg 855b8e80941Smrgtypedef struct { 856b8e80941Smrg nir_dest dest; 857b8e80941Smrg 858b8e80941Smrg /** 859b8e80941Smrg * \name saturate output modifier 860b8e80941Smrg * 861b8e80941Smrg * Only valid for opcodes that output floating-point numbers. Clamps the 862b8e80941Smrg * output to between 0.0 and 1.0 inclusive. 863b8e80941Smrg */ 864b8e80941Smrg 865b8e80941Smrg bool saturate; 866b8e80941Smrg 867b8e80941Smrg unsigned write_mask : NIR_MAX_VEC_COMPONENTS; /* ignored if dest.is_ssa is true */ 868b8e80941Smrg} nir_alu_dest; 869b8e80941Smrg 870b8e80941Smrg/** NIR sized and unsized types 871b8e80941Smrg * 872b8e80941Smrg * The values in this enum are carefully chosen so that the sized type is 873b8e80941Smrg * just the unsized type OR the number of bits. 874b8e80941Smrg */ 875b8e80941Smrgtypedef enum { 876b8e80941Smrg nir_type_invalid = 0, /* Not a valid type */ 877b8e80941Smrg nir_type_int = 2, 878b8e80941Smrg nir_type_uint = 4, 879b8e80941Smrg nir_type_bool = 6, 880b8e80941Smrg nir_type_float = 128, 881b8e80941Smrg nir_type_bool1 = 1 | nir_type_bool, 882b8e80941Smrg nir_type_bool32 = 32 | nir_type_bool, 883b8e80941Smrg nir_type_int1 = 1 | nir_type_int, 884b8e80941Smrg nir_type_int8 = 8 | nir_type_int, 885b8e80941Smrg nir_type_int16 = 16 | nir_type_int, 886b8e80941Smrg nir_type_int32 = 32 | nir_type_int, 887b8e80941Smrg nir_type_int64 = 64 | nir_type_int, 888b8e80941Smrg nir_type_uint1 = 1 | nir_type_uint, 889b8e80941Smrg nir_type_uint8 = 8 | nir_type_uint, 890b8e80941Smrg nir_type_uint16 = 16 | nir_type_uint, 891b8e80941Smrg nir_type_uint32 = 32 | nir_type_uint, 892b8e80941Smrg nir_type_uint64 = 64 | nir_type_uint, 893b8e80941Smrg nir_type_float16 = 16 | nir_type_float, 894b8e80941Smrg nir_type_float32 = 32 | nir_type_float, 895b8e80941Smrg nir_type_float64 = 64 | nir_type_float, 896b8e80941Smrg} nir_alu_type; 897b8e80941Smrg 898b8e80941Smrg#define NIR_ALU_TYPE_SIZE_MASK 0x79 899b8e80941Smrg#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x86 900b8e80941Smrg 901b8e80941Smrgstatic inline unsigned 902b8e80941Smrgnir_alu_type_get_type_size(nir_alu_type type) 903b8e80941Smrg{ 904b8e80941Smrg return type & NIR_ALU_TYPE_SIZE_MASK; 905b8e80941Smrg} 906b8e80941Smrg 907b8e80941Smrgstatic inline unsigned 908b8e80941Smrgnir_alu_type_get_base_type(nir_alu_type type) 909b8e80941Smrg{ 910b8e80941Smrg return type & NIR_ALU_TYPE_BASE_TYPE_MASK; 911b8e80941Smrg} 912b8e80941Smrg 913b8e80941Smrgstatic inline nir_alu_type 914b8e80941Smrgnir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type) 915b8e80941Smrg{ 916b8e80941Smrg switch (base_type) { 917b8e80941Smrg case GLSL_TYPE_BOOL: 918b8e80941Smrg return nir_type_bool1; 919b8e80941Smrg break; 920b8e80941Smrg case GLSL_TYPE_UINT: 921b8e80941Smrg return nir_type_uint32; 922b8e80941Smrg break; 923b8e80941Smrg case GLSL_TYPE_INT: 924b8e80941Smrg return nir_type_int32; 925b8e80941Smrg break; 926b8e80941Smrg case GLSL_TYPE_UINT16: 927b8e80941Smrg return nir_type_uint16; 928b8e80941Smrg break; 929b8e80941Smrg case GLSL_TYPE_INT16: 930b8e80941Smrg return nir_type_int16; 931b8e80941Smrg break; 932b8e80941Smrg case GLSL_TYPE_UINT8: 933b8e80941Smrg return nir_type_uint8; 934b8e80941Smrg case GLSL_TYPE_INT8: 935b8e80941Smrg return nir_type_int8; 936b8e80941Smrg case GLSL_TYPE_UINT64: 937b8e80941Smrg return nir_type_uint64; 938b8e80941Smrg break; 939b8e80941Smrg case GLSL_TYPE_INT64: 940b8e80941Smrg return nir_type_int64; 941b8e80941Smrg break; 942b8e80941Smrg case GLSL_TYPE_FLOAT: 943b8e80941Smrg return nir_type_float32; 944b8e80941Smrg break; 945b8e80941Smrg case GLSL_TYPE_FLOAT16: 946b8e80941Smrg return nir_type_float16; 947b8e80941Smrg break; 948b8e80941Smrg case GLSL_TYPE_DOUBLE: 949b8e80941Smrg return nir_type_float64; 950b8e80941Smrg break; 951b8e80941Smrg default: 952b8e80941Smrg unreachable("unknown type"); 953b8e80941Smrg } 954b8e80941Smrg} 955b8e80941Smrg 956b8e80941Smrgstatic inline nir_alu_type 957b8e80941Smrgnir_get_nir_type_for_glsl_type(const struct glsl_type *type) 958b8e80941Smrg{ 959b8e80941Smrg return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type)); 960b8e80941Smrg} 961b8e80941Smrg 962b8e80941Smrgnir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, 963b8e80941Smrg nir_rounding_mode rnd); 964b8e80941Smrg 965b8e80941Smrgstatic inline nir_op 966b8e80941Smrgnir_op_vec(unsigned components) 967b8e80941Smrg{ 968b8e80941Smrg switch (components) { 969b8e80941Smrg case 1: return nir_op_imov; 970b8e80941Smrg case 2: return nir_op_vec2; 971b8e80941Smrg case 3: return nir_op_vec3; 972b8e80941Smrg case 4: return nir_op_vec4; 973b8e80941Smrg default: unreachable("bad component count"); 974b8e80941Smrg } 975b8e80941Smrg} 976b8e80941Smrg 977b8e80941Smrgtypedef enum { 978b8e80941Smrg NIR_OP_IS_COMMUTATIVE = (1 << 0), 979b8e80941Smrg NIR_OP_IS_ASSOCIATIVE = (1 << 1), 980b8e80941Smrg} nir_op_algebraic_property; 981b8e80941Smrg 982b8e80941Smrgtypedef struct { 983b8e80941Smrg const char *name; 984b8e80941Smrg 985b8e80941Smrg unsigned num_inputs; 986b8e80941Smrg 987b8e80941Smrg /** 988b8e80941Smrg * The number of components in the output 989b8e80941Smrg * 990b8e80941Smrg * If non-zero, this is the size of the output and input sizes are 991b8e80941Smrg * explicitly given; swizzle and writemask are still in effect, but if 992b8e80941Smrg * the output component is masked out, then the input component may 993b8e80941Smrg * still be in use. 994b8e80941Smrg * 995b8e80941Smrg * If zero, the opcode acts in the standard, per-component manner; the 996b8e80941Smrg * operation is performed on each component (except the ones that are 997b8e80941Smrg * masked out) with the input being taken from the input swizzle for 998b8e80941Smrg * that component. 999b8e80941Smrg * 1000b8e80941Smrg * The size of some of the inputs may be given (i.e. non-zero) even 1001b8e80941Smrg * though output_size is zero; in that case, the inputs with a zero 1002b8e80941Smrg * size act per-component, while the inputs with non-zero size don't. 1003b8e80941Smrg */ 1004b8e80941Smrg unsigned output_size; 1005b8e80941Smrg 1006b8e80941Smrg /** 1007b8e80941Smrg * The type of vector that the instruction outputs. Note that the 1008b8e80941Smrg * staurate modifier is only allowed on outputs with the float type. 1009b8e80941Smrg */ 1010b8e80941Smrg 1011b8e80941Smrg nir_alu_type output_type; 1012b8e80941Smrg 1013b8e80941Smrg /** 1014b8e80941Smrg * The number of components in each input 1015b8e80941Smrg */ 1016b8e80941Smrg unsigned input_sizes[NIR_MAX_VEC_COMPONENTS]; 1017b8e80941Smrg 1018b8e80941Smrg /** 1019b8e80941Smrg * The type of vector that each input takes. Note that negate and 1020b8e80941Smrg * absolute value are only allowed on inputs with int or float type and 1021b8e80941Smrg * behave differently on the two. 1022b8e80941Smrg */ 1023b8e80941Smrg nir_alu_type input_types[NIR_MAX_VEC_COMPONENTS]; 1024b8e80941Smrg 1025b8e80941Smrg nir_op_algebraic_property algebraic_properties; 1026b8e80941Smrg 1027b8e80941Smrg /* Whether this represents a numeric conversion opcode */ 1028b8e80941Smrg bool is_conversion; 1029b8e80941Smrg} nir_op_info; 1030b8e80941Smrg 1031b8e80941Smrgextern const nir_op_info nir_op_infos[nir_num_opcodes]; 1032b8e80941Smrg 1033b8e80941Smrgtypedef struct nir_alu_instr { 1034b8e80941Smrg nir_instr instr; 1035b8e80941Smrg nir_op op; 1036b8e80941Smrg 1037b8e80941Smrg /** Indicates that this ALU instruction generates an exact value 1038b8e80941Smrg * 1039b8e80941Smrg * This is kind of a mixture of GLSL "precise" and "invariant" and not 1040b8e80941Smrg * really equivalent to either. This indicates that the value generated by 1041b8e80941Smrg * this operation is high-precision and any code transformations that touch 1042b8e80941Smrg * it must ensure that the resulting value is bit-for-bit identical to the 1043b8e80941Smrg * original. 1044b8e80941Smrg */ 1045b8e80941Smrg bool exact; 1046b8e80941Smrg 1047b8e80941Smrg nir_alu_dest dest; 1048b8e80941Smrg nir_alu_src src[]; 1049b8e80941Smrg} nir_alu_instr; 1050b8e80941Smrg 1051b8e80941Smrgvoid nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, 1052b8e80941Smrg nir_alu_instr *instr); 1053b8e80941Smrgvoid nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, 1054b8e80941Smrg nir_alu_instr *instr); 1055b8e80941Smrg 1056b8e80941Smrg/* is this source channel used? */ 1057b8e80941Smrgstatic inline bool 1058b8e80941Smrgnir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src, 1059b8e80941Smrg unsigned channel) 1060b8e80941Smrg{ 1061b8e80941Smrg if (nir_op_infos[instr->op].input_sizes[src] > 0) 1062b8e80941Smrg return channel < nir_op_infos[instr->op].input_sizes[src]; 1063b8e80941Smrg 1064b8e80941Smrg return (instr->dest.write_mask >> channel) & 1; 1065b8e80941Smrg} 1066b8e80941Smrg 1067b8e80941Smrgstatic inline nir_component_mask_t 1068b8e80941Smrgnir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src) 1069b8e80941Smrg{ 1070b8e80941Smrg nir_component_mask_t read_mask = 0; 1071b8e80941Smrg for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) { 1072b8e80941Smrg if (!nir_alu_instr_channel_used(instr, src, c)) 1073b8e80941Smrg continue; 1074b8e80941Smrg 1075b8e80941Smrg read_mask |= (1 << instr->src[src].swizzle[c]); 1076b8e80941Smrg } 1077b8e80941Smrg return read_mask; 1078b8e80941Smrg} 1079b8e80941Smrg 1080b8e80941Smrg/* 1081b8e80941Smrg * For instructions whose destinations are SSA, get the number of channels 1082b8e80941Smrg * used for a source 1083b8e80941Smrg */ 1084b8e80941Smrgstatic inline unsigned 1085b8e80941Smrgnir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src) 1086b8e80941Smrg{ 1087b8e80941Smrg assert(instr->dest.dest.is_ssa); 1088b8e80941Smrg 1089b8e80941Smrg if (nir_op_infos[instr->op].input_sizes[src] > 0) 1090b8e80941Smrg return nir_op_infos[instr->op].input_sizes[src]; 1091b8e80941Smrg 1092b8e80941Smrg return instr->dest.dest.ssa.num_components; 1093b8e80941Smrg} 1094b8e80941Smrg 1095b8e80941Smrgbool nir_const_value_negative_equal(const nir_const_value *c1, 1096b8e80941Smrg const nir_const_value *c2, 1097b8e80941Smrg unsigned components, 1098b8e80941Smrg nir_alu_type base_type, 1099b8e80941Smrg unsigned bits); 1100b8e80941Smrg 1101b8e80941Smrgbool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, 1102b8e80941Smrg unsigned src1, unsigned src2); 1103b8e80941Smrg 1104b8e80941Smrgbool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1, 1105b8e80941Smrg const nir_alu_instr *alu2, 1106b8e80941Smrg unsigned src1, unsigned src2); 1107b8e80941Smrg 1108b8e80941Smrgtypedef enum { 1109b8e80941Smrg nir_deref_type_var, 1110b8e80941Smrg nir_deref_type_array, 1111b8e80941Smrg nir_deref_type_array_wildcard, 1112b8e80941Smrg nir_deref_type_ptr_as_array, 1113b8e80941Smrg nir_deref_type_struct, 1114b8e80941Smrg nir_deref_type_cast, 1115b8e80941Smrg} nir_deref_type; 1116b8e80941Smrg 1117b8e80941Smrgtypedef struct { 1118b8e80941Smrg nir_instr instr; 1119b8e80941Smrg 1120b8e80941Smrg /** The type of this deref instruction */ 1121b8e80941Smrg nir_deref_type deref_type; 1122b8e80941Smrg 1123b8e80941Smrg /** The mode of the underlying variable */ 1124b8e80941Smrg nir_variable_mode mode; 1125b8e80941Smrg 1126b8e80941Smrg /** The dereferenced type of the resulting pointer value */ 1127b8e80941Smrg const struct glsl_type *type; 1128b8e80941Smrg 1129b8e80941Smrg union { 1130b8e80941Smrg /** Variable being dereferenced if deref_type is a deref_var */ 1131b8e80941Smrg nir_variable *var; 1132b8e80941Smrg 1133b8e80941Smrg /** Parent deref if deref_type is not deref_var */ 1134b8e80941Smrg nir_src parent; 1135b8e80941Smrg }; 1136b8e80941Smrg 1137b8e80941Smrg /** Additional deref parameters */ 1138b8e80941Smrg union { 1139b8e80941Smrg struct { 1140b8e80941Smrg nir_src index; 1141b8e80941Smrg } arr; 1142b8e80941Smrg 1143b8e80941Smrg struct { 1144b8e80941Smrg unsigned index; 1145b8e80941Smrg } strct; 1146b8e80941Smrg 1147b8e80941Smrg struct { 1148b8e80941Smrg unsigned ptr_stride; 1149b8e80941Smrg } cast; 1150b8e80941Smrg }; 1151b8e80941Smrg 1152b8e80941Smrg /** Destination to store the resulting "pointer" */ 1153b8e80941Smrg nir_dest dest; 1154b8e80941Smrg} nir_deref_instr; 1155b8e80941Smrg 1156b8e80941Smrgstatic inline nir_deref_instr *nir_src_as_deref(nir_src src); 1157b8e80941Smrg 1158b8e80941Smrgstatic inline nir_deref_instr * 1159b8e80941Smrgnir_deref_instr_parent(const nir_deref_instr *instr) 1160b8e80941Smrg{ 1161b8e80941Smrg if (instr->deref_type == nir_deref_type_var) 1162b8e80941Smrg return NULL; 1163b8e80941Smrg else 1164b8e80941Smrg return nir_src_as_deref(instr->parent); 1165b8e80941Smrg} 1166b8e80941Smrg 1167b8e80941Smrgstatic inline nir_variable * 1168b8e80941Smrgnir_deref_instr_get_variable(const nir_deref_instr *instr) 1169b8e80941Smrg{ 1170b8e80941Smrg while (instr->deref_type != nir_deref_type_var) { 1171b8e80941Smrg if (instr->deref_type == nir_deref_type_cast) 1172b8e80941Smrg return NULL; 1173b8e80941Smrg 1174b8e80941Smrg instr = nir_deref_instr_parent(instr); 1175b8e80941Smrg } 1176b8e80941Smrg 1177b8e80941Smrg return instr->var; 1178b8e80941Smrg} 1179b8e80941Smrg 1180b8e80941Smrgbool nir_deref_instr_has_indirect(nir_deref_instr *instr); 1181b8e80941Smrg 1182b8e80941Smrgbool nir_deref_instr_remove_if_unused(nir_deref_instr *instr); 1183b8e80941Smrg 1184b8e80941Smrgunsigned nir_deref_instr_ptr_as_array_stride(nir_deref_instr *instr); 1185b8e80941Smrg 1186b8e80941Smrgtypedef struct { 1187b8e80941Smrg nir_instr instr; 1188b8e80941Smrg 1189b8e80941Smrg struct nir_function *callee; 1190b8e80941Smrg 1191b8e80941Smrg unsigned num_params; 1192b8e80941Smrg nir_src params[]; 1193b8e80941Smrg} nir_call_instr; 1194b8e80941Smrg 1195b8e80941Smrg#include "nir_intrinsics.h" 1196b8e80941Smrg 1197b8e80941Smrg#define NIR_INTRINSIC_MAX_CONST_INDEX 4 1198b8e80941Smrg 1199b8e80941Smrg/** Represents an intrinsic 1200b8e80941Smrg * 1201b8e80941Smrg * An intrinsic is an instruction type for handling things that are 1202b8e80941Smrg * more-or-less regular operations but don't just consume and produce SSA 1203b8e80941Smrg * values like ALU operations do. Intrinsics are not for things that have 1204b8e80941Smrg * special semantic meaning such as phi nodes and parallel copies. 1205b8e80941Smrg * Examples of intrinsics include variable load/store operations, system 1206b8e80941Smrg * value loads, and the like. Even though texturing more-or-less falls 1207b8e80941Smrg * under this category, texturing is its own instruction type because 1208b8e80941Smrg * trying to represent texturing with intrinsics would lead to a 1209b8e80941Smrg * combinatorial explosion of intrinsic opcodes. 1210b8e80941Smrg * 1211b8e80941Smrg * By having a single instruction type for handling a lot of different 1212b8e80941Smrg * cases, optimization passes can look for intrinsics and, for the most 1213b8e80941Smrg * part, completely ignore them. Each intrinsic type also has a few 1214b8e80941Smrg * possible flags that govern whether or not they can be reordered or 1215b8e80941Smrg * eliminated. That way passes like dead code elimination can still work 1216b8e80941Smrg * on intrisics without understanding the meaning of each. 1217b8e80941Smrg * 1218b8e80941Smrg * Each intrinsic has some number of constant indices, some number of 1219b8e80941Smrg * variables, and some number of sources. What these sources, variables, 1220b8e80941Smrg * and indices mean depends on the intrinsic and is documented with the 1221b8e80941Smrg * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture 1222b8e80941Smrg * instructions are the only types of instruction that can operate on 1223b8e80941Smrg * variables. 1224b8e80941Smrg */ 1225b8e80941Smrgtypedef struct { 1226b8e80941Smrg nir_instr instr; 1227b8e80941Smrg 1228b8e80941Smrg nir_intrinsic_op intrinsic; 1229b8e80941Smrg 1230b8e80941Smrg nir_dest dest; 1231b8e80941Smrg 1232b8e80941Smrg /** number of components if this is a vectorized intrinsic 1233b8e80941Smrg * 1234b8e80941Smrg * Similarly to ALU operations, some intrinsics are vectorized. 1235b8e80941Smrg * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0. 1236b8e80941Smrg * For vectorized intrinsics, the num_components field specifies the 1237b8e80941Smrg * number of destination components and the number of source components 1238b8e80941Smrg * for all sources with nir_intrinsic_infos.src_components[i] == 0. 1239b8e80941Smrg */ 1240b8e80941Smrg uint8_t num_components; 1241b8e80941Smrg 1242b8e80941Smrg int const_index[NIR_INTRINSIC_MAX_CONST_INDEX]; 1243b8e80941Smrg 1244b8e80941Smrg nir_src src[]; 1245b8e80941Smrg} nir_intrinsic_instr; 1246b8e80941Smrg 1247b8e80941Smrgstatic inline nir_variable * 1248b8e80941Smrgnir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i) 1249b8e80941Smrg{ 1250b8e80941Smrg return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); 1251b8e80941Smrg} 1252b8e80941Smrg 1253b8e80941Smrg/** 1254b8e80941Smrg * \name NIR intrinsics semantic flags 1255b8e80941Smrg * 1256b8e80941Smrg * information about what the compiler can do with the intrinsics. 1257b8e80941Smrg * 1258b8e80941Smrg * \sa nir_intrinsic_info::flags 1259b8e80941Smrg */ 1260b8e80941Smrgtypedef enum { 1261b8e80941Smrg /** 1262b8e80941Smrg * whether the intrinsic can be safely eliminated if none of its output 1263b8e80941Smrg * value is not being used. 1264b8e80941Smrg */ 1265b8e80941Smrg NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0), 1266b8e80941Smrg 1267b8e80941Smrg /** 1268b8e80941Smrg * Whether the intrinsic can be reordered with respect to any other 1269b8e80941Smrg * intrinsic, i.e. whether the only reordering dependencies of the 1270b8e80941Smrg * intrinsic are due to the register reads/writes. 1271b8e80941Smrg */ 1272b8e80941Smrg NIR_INTRINSIC_CAN_REORDER = (1 << 1), 1273b8e80941Smrg} nir_intrinsic_semantic_flag; 1274b8e80941Smrg 1275b8e80941Smrg/** 1276b8e80941Smrg * \name NIR intrinsics const-index flag 1277b8e80941Smrg * 1278b8e80941Smrg * Indicates the usage of a const_index slot. 1279b8e80941Smrg * 1280b8e80941Smrg * \sa nir_intrinsic_info::index_map 1281b8e80941Smrg */ 1282b8e80941Smrgtypedef enum { 1283b8e80941Smrg /** 1284b8e80941Smrg * Generally instructions that take a offset src argument, can encode 1285b8e80941Smrg * a constant 'base' value which is added to the offset. 1286b8e80941Smrg */ 1287b8e80941Smrg NIR_INTRINSIC_BASE = 1, 1288b8e80941Smrg 1289b8e80941Smrg /** 1290b8e80941Smrg * For store instructions, a writemask for the store. 1291b8e80941Smrg */ 1292b8e80941Smrg NIR_INTRINSIC_WRMASK = 2, 1293b8e80941Smrg 1294b8e80941Smrg /** 1295b8e80941Smrg * The stream-id for GS emit_vertex/end_primitive intrinsics. 1296b8e80941Smrg */ 1297b8e80941Smrg NIR_INTRINSIC_STREAM_ID = 3, 1298b8e80941Smrg 1299b8e80941Smrg /** 1300b8e80941Smrg * The clip-plane id for load_user_clip_plane intrinsic. 1301b8e80941Smrg */ 1302b8e80941Smrg NIR_INTRINSIC_UCP_ID = 4, 1303b8e80941Smrg 1304b8e80941Smrg /** 1305b8e80941Smrg * The amount of data, starting from BASE, that this instruction may 1306b8e80941Smrg * access. This is used to provide bounds if the offset is not constant. 1307b8e80941Smrg */ 1308b8e80941Smrg NIR_INTRINSIC_RANGE = 5, 1309b8e80941Smrg 1310b8e80941Smrg /** 1311b8e80941Smrg * The Vulkan descriptor set for vulkan_resource_index intrinsic. 1312b8e80941Smrg */ 1313b8e80941Smrg NIR_INTRINSIC_DESC_SET = 6, 1314b8e80941Smrg 1315b8e80941Smrg /** 1316b8e80941Smrg * The Vulkan descriptor set binding for vulkan_resource_index intrinsic. 1317b8e80941Smrg */ 1318b8e80941Smrg NIR_INTRINSIC_BINDING = 7, 1319b8e80941Smrg 1320b8e80941Smrg /** 1321b8e80941Smrg * Component offset. 1322b8e80941Smrg */ 1323b8e80941Smrg NIR_INTRINSIC_COMPONENT = 8, 1324b8e80941Smrg 1325b8e80941Smrg /** 1326b8e80941Smrg * Interpolation mode (only meaningful for FS inputs). 1327b8e80941Smrg */ 1328b8e80941Smrg NIR_INTRINSIC_INTERP_MODE = 9, 1329b8e80941Smrg 1330b8e80941Smrg /** 1331b8e80941Smrg * A binary nir_op to use when performing a reduction or scan operation 1332b8e80941Smrg */ 1333b8e80941Smrg NIR_INTRINSIC_REDUCTION_OP = 10, 1334b8e80941Smrg 1335b8e80941Smrg /** 1336b8e80941Smrg * Cluster size for reduction operations 1337b8e80941Smrg */ 1338b8e80941Smrg NIR_INTRINSIC_CLUSTER_SIZE = 11, 1339b8e80941Smrg 1340b8e80941Smrg /** 1341b8e80941Smrg * Parameter index for a load_param intrinsic 1342b8e80941Smrg */ 1343b8e80941Smrg NIR_INTRINSIC_PARAM_IDX = 12, 1344b8e80941Smrg 1345b8e80941Smrg /** 1346b8e80941Smrg * Image dimensionality for image intrinsics 1347b8e80941Smrg * 1348b8e80941Smrg * One of GLSL_SAMPLER_DIM_* 1349b8e80941Smrg */ 1350b8e80941Smrg NIR_INTRINSIC_IMAGE_DIM = 13, 1351b8e80941Smrg 1352b8e80941Smrg /** 1353b8e80941Smrg * Non-zero if we are accessing an array image 1354b8e80941Smrg */ 1355b8e80941Smrg NIR_INTRINSIC_IMAGE_ARRAY = 14, 1356b8e80941Smrg 1357b8e80941Smrg /** 1358b8e80941Smrg * Image format for image intrinsics 1359b8e80941Smrg */ 1360b8e80941Smrg NIR_INTRINSIC_FORMAT = 15, 1361b8e80941Smrg 1362b8e80941Smrg /** 1363b8e80941Smrg * Access qualifiers for image and memory access intrinsics 1364b8e80941Smrg */ 1365b8e80941Smrg NIR_INTRINSIC_ACCESS = 16, 1366b8e80941Smrg 1367b8e80941Smrg /** 1368b8e80941Smrg * Alignment for offsets and addresses 1369b8e80941Smrg * 1370b8e80941Smrg * These two parameters, specify an alignment in terms of a multiplier and 1371b8e80941Smrg * an offset. The offset or address parameter X of the intrinsic is 1372b8e80941Smrg * guaranteed to satisfy the following: 1373b8e80941Smrg * 1374b8e80941Smrg * (X - align_offset) % align_mul == 0 1375b8e80941Smrg */ 1376b8e80941Smrg NIR_INTRINSIC_ALIGN_MUL = 17, 1377b8e80941Smrg NIR_INTRINSIC_ALIGN_OFFSET = 18, 1378b8e80941Smrg 1379b8e80941Smrg /** 1380b8e80941Smrg * The Vulkan descriptor type for a vulkan_resource_[re]index intrinsic. 1381b8e80941Smrg */ 1382b8e80941Smrg NIR_INTRINSIC_DESC_TYPE = 19, 1383b8e80941Smrg 1384b8e80941Smrg /* Separate source/dest access flags for copies */ 1385b8e80941Smrg NIR_INTRINSIC_SRC_ACCESS, 1386b8e80941Smrg NIR_INTRINSIC_DST_ACCESS, 1387b8e80941Smrg 1388b8e80941Smrg NIR_INTRINSIC_NUM_INDEX_FLAGS, 1389b8e80941Smrg 1390b8e80941Smrg} nir_intrinsic_index_flag; 1391b8e80941Smrg 1392b8e80941Smrg#define NIR_INTRINSIC_MAX_INPUTS 5 1393b8e80941Smrg 1394b8e80941Smrgtypedef struct { 1395b8e80941Smrg const char *name; 1396b8e80941Smrg 1397b8e80941Smrg unsigned num_srcs; /** < number of register/SSA inputs */ 1398b8e80941Smrg 1399b8e80941Smrg /** number of components of each input register 1400b8e80941Smrg * 1401b8e80941Smrg * If this value is 0, the number of components is given by the 1402b8e80941Smrg * num_components field of nir_intrinsic_instr. If this value is -1, the 1403b8e80941Smrg * intrinsic consumes however many components are provided and it is not 1404b8e80941Smrg * validated at all. 1405b8e80941Smrg */ 1406b8e80941Smrg int src_components[NIR_INTRINSIC_MAX_INPUTS]; 1407b8e80941Smrg 1408b8e80941Smrg bool has_dest; 1409b8e80941Smrg 1410b8e80941Smrg /** number of components of the output register 1411b8e80941Smrg * 1412b8e80941Smrg * If this value is 0, the number of components is given by the 1413b8e80941Smrg * num_components field of nir_intrinsic_instr. 1414b8e80941Smrg */ 1415b8e80941Smrg unsigned dest_components; 1416b8e80941Smrg 1417b8e80941Smrg /** bitfield of legal bit sizes */ 1418b8e80941Smrg unsigned dest_bit_sizes; 1419b8e80941Smrg 1420b8e80941Smrg /** the number of constant indices used by the intrinsic */ 1421b8e80941Smrg unsigned num_indices; 1422b8e80941Smrg 1423b8e80941Smrg /** indicates the usage of intr->const_index[n] */ 1424b8e80941Smrg unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; 1425b8e80941Smrg 1426b8e80941Smrg /** semantic flags for calls to this intrinsic */ 1427b8e80941Smrg nir_intrinsic_semantic_flag flags; 1428b8e80941Smrg} nir_intrinsic_info; 1429b8e80941Smrg 1430b8e80941Smrgextern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; 1431b8e80941Smrg 1432b8e80941Smrgstatic inline unsigned 1433b8e80941Smrgnir_intrinsic_src_components(nir_intrinsic_instr *intr, unsigned srcn) 1434b8e80941Smrg{ 1435b8e80941Smrg const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; 1436b8e80941Smrg assert(srcn < info->num_srcs); 1437b8e80941Smrg if (info->src_components[srcn] > 0) 1438b8e80941Smrg return info->src_components[srcn]; 1439b8e80941Smrg else if (info->src_components[srcn] == 0) 1440b8e80941Smrg return intr->num_components; 1441b8e80941Smrg else 1442b8e80941Smrg return nir_src_num_components(intr->src[srcn]); 1443b8e80941Smrg} 1444b8e80941Smrg 1445b8e80941Smrgstatic inline unsigned 1446b8e80941Smrgnir_intrinsic_dest_components(nir_intrinsic_instr *intr) 1447b8e80941Smrg{ 1448b8e80941Smrg const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; 1449b8e80941Smrg if (!info->has_dest) 1450b8e80941Smrg return 0; 1451b8e80941Smrg else if (info->dest_components) 1452b8e80941Smrg return info->dest_components; 1453b8e80941Smrg else 1454b8e80941Smrg return intr->num_components; 1455b8e80941Smrg} 1456b8e80941Smrg 1457b8e80941Smrg#define INTRINSIC_IDX_ACCESSORS(name, flag, type) \ 1458b8e80941Smrgstatic inline type \ 1459b8e80941Smrgnir_intrinsic_##name(const nir_intrinsic_instr *instr) \ 1460b8e80941Smrg{ \ 1461b8e80941Smrg const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \ 1462b8e80941Smrg assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \ 1463b8e80941Smrg return (type)instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1]; \ 1464b8e80941Smrg} \ 1465b8e80941Smrgstatic inline void \ 1466b8e80941Smrgnir_intrinsic_set_##name(nir_intrinsic_instr *instr, type val) \ 1467b8e80941Smrg{ \ 1468b8e80941Smrg const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \ 1469b8e80941Smrg assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \ 1470b8e80941Smrg instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1] = val; \ 1471b8e80941Smrg} 1472b8e80941Smrg 1473b8e80941SmrgINTRINSIC_IDX_ACCESSORS(write_mask, WRMASK, unsigned) 1474b8e80941SmrgINTRINSIC_IDX_ACCESSORS(base, BASE, int) 1475b8e80941SmrgINTRINSIC_IDX_ACCESSORS(stream_id, STREAM_ID, unsigned) 1476b8e80941SmrgINTRINSIC_IDX_ACCESSORS(ucp_id, UCP_ID, unsigned) 1477b8e80941SmrgINTRINSIC_IDX_ACCESSORS(range, RANGE, unsigned) 1478b8e80941SmrgINTRINSIC_IDX_ACCESSORS(desc_set, DESC_SET, unsigned) 1479b8e80941SmrgINTRINSIC_IDX_ACCESSORS(binding, BINDING, unsigned) 1480b8e80941SmrgINTRINSIC_IDX_ACCESSORS(component, COMPONENT, unsigned) 1481b8e80941SmrgINTRINSIC_IDX_ACCESSORS(interp_mode, INTERP_MODE, unsigned) 1482b8e80941SmrgINTRINSIC_IDX_ACCESSORS(reduction_op, REDUCTION_OP, unsigned) 1483b8e80941SmrgINTRINSIC_IDX_ACCESSORS(cluster_size, CLUSTER_SIZE, unsigned) 1484b8e80941SmrgINTRINSIC_IDX_ACCESSORS(param_idx, PARAM_IDX, unsigned) 1485b8e80941SmrgINTRINSIC_IDX_ACCESSORS(image_dim, IMAGE_DIM, enum glsl_sampler_dim) 1486b8e80941SmrgINTRINSIC_IDX_ACCESSORS(image_array, IMAGE_ARRAY, bool) 1487b8e80941SmrgINTRINSIC_IDX_ACCESSORS(access, ACCESS, enum gl_access_qualifier) 1488b8e80941SmrgINTRINSIC_IDX_ACCESSORS(src_access, SRC_ACCESS, enum gl_access_qualifier) 1489b8e80941SmrgINTRINSIC_IDX_ACCESSORS(dst_access, DST_ACCESS, enum gl_access_qualifier) 1490b8e80941SmrgINTRINSIC_IDX_ACCESSORS(format, FORMAT, unsigned) 1491b8e80941SmrgINTRINSIC_IDX_ACCESSORS(align_mul, ALIGN_MUL, unsigned) 1492b8e80941SmrgINTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned) 1493b8e80941SmrgINTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned) 1494b8e80941Smrg 1495b8e80941Smrgstatic inline void 1496b8e80941Smrgnir_intrinsic_set_align(nir_intrinsic_instr *intrin, 1497b8e80941Smrg unsigned align_mul, unsigned align_offset) 1498b8e80941Smrg{ 1499b8e80941Smrg assert(util_is_power_of_two_nonzero(align_mul)); 1500b8e80941Smrg assert(align_offset < align_mul); 1501b8e80941Smrg nir_intrinsic_set_align_mul(intrin, align_mul); 1502b8e80941Smrg nir_intrinsic_set_align_offset(intrin, align_offset); 1503b8e80941Smrg} 1504b8e80941Smrg 1505b8e80941Smrg/** Returns a simple alignment for a load/store intrinsic offset 1506b8e80941Smrg * 1507b8e80941Smrg * Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL 1508b8e80941Smrg * and ALIGN_OFFSET parameters, this helper takes both into account and 1509b8e80941Smrg * provides a single simple alignment parameter. The offset X is guaranteed 1510b8e80941Smrg * to satisfy X % align == 0. 1511b8e80941Smrg */ 1512b8e80941Smrgstatic inline unsigned 1513b8e80941Smrgnir_intrinsic_align(const nir_intrinsic_instr *intrin) 1514b8e80941Smrg{ 1515b8e80941Smrg const unsigned align_mul = nir_intrinsic_align_mul(intrin); 1516b8e80941Smrg const unsigned align_offset = nir_intrinsic_align_offset(intrin); 1517b8e80941Smrg assert(align_offset < align_mul); 1518b8e80941Smrg return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; 1519b8e80941Smrg} 1520b8e80941Smrg 1521b8e80941Smrg/* Converts a image_deref_* intrinsic into a image_* one */ 1522b8e80941Smrgvoid nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr, 1523b8e80941Smrg nir_ssa_def *handle, bool bindless); 1524b8e80941Smrg 1525b8e80941Smrg/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */ 1526b8e80941Smrgstatic inline bool 1527b8e80941Smrgnir_intrinsic_can_reorder(nir_intrinsic_instr *instr) 1528b8e80941Smrg{ 1529b8e80941Smrg const nir_intrinsic_info *info = 1530b8e80941Smrg &nir_intrinsic_infos[instr->intrinsic]; 1531b8e80941Smrg return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && 1532b8e80941Smrg (info->flags & NIR_INTRINSIC_CAN_REORDER); 1533b8e80941Smrg} 1534b8e80941Smrg 1535b8e80941Smrg/** 1536b8e80941Smrg * \group texture information 1537b8e80941Smrg * 1538b8e80941Smrg * This gives semantic information about textures which is useful to the 1539b8e80941Smrg * frontend, the backend, and lowering passes, but not the optimizer. 1540b8e80941Smrg */ 1541b8e80941Smrg 1542b8e80941Smrgtypedef enum { 1543b8e80941Smrg nir_tex_src_coord, 1544b8e80941Smrg nir_tex_src_projector, 1545b8e80941Smrg nir_tex_src_comparator, /* shadow comparator */ 1546b8e80941Smrg nir_tex_src_offset, 1547b8e80941Smrg nir_tex_src_bias, 1548b8e80941Smrg nir_tex_src_lod, 1549b8e80941Smrg nir_tex_src_min_lod, 1550b8e80941Smrg nir_tex_src_ms_index, /* MSAA sample index */ 1551b8e80941Smrg nir_tex_src_ms_mcs, /* MSAA compression value */ 1552b8e80941Smrg nir_tex_src_ddx, 1553b8e80941Smrg nir_tex_src_ddy, 1554b8e80941Smrg nir_tex_src_texture_deref, /* < deref pointing to the texture */ 1555b8e80941Smrg nir_tex_src_sampler_deref, /* < deref pointing to the sampler */ 1556b8e80941Smrg nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */ 1557b8e80941Smrg nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ 1558b8e80941Smrg nir_tex_src_texture_handle, /* < bindless texture handle */ 1559b8e80941Smrg nir_tex_src_sampler_handle, /* < bindless sampler handle */ 1560b8e80941Smrg nir_tex_src_plane, /* < selects plane for planar textures */ 1561b8e80941Smrg nir_num_tex_src_types 1562b8e80941Smrg} nir_tex_src_type; 1563b8e80941Smrg 1564b8e80941Smrgtypedef struct { 1565b8e80941Smrg nir_src src; 1566b8e80941Smrg nir_tex_src_type src_type; 1567b8e80941Smrg} nir_tex_src; 1568b8e80941Smrg 1569b8e80941Smrgtypedef enum { 1570b8e80941Smrg nir_texop_tex, /**< Regular texture look-up */ 1571b8e80941Smrg nir_texop_txb, /**< Texture look-up with LOD bias */ 1572b8e80941Smrg nir_texop_txl, /**< Texture look-up with explicit LOD */ 1573b8e80941Smrg nir_texop_txd, /**< Texture look-up with partial derivatives */ 1574b8e80941Smrg nir_texop_txf, /**< Texel fetch with explicit LOD */ 1575b8e80941Smrg nir_texop_txf_ms, /**< Multisample texture fetch */ 1576b8e80941Smrg nir_texop_txf_ms_fb, /**< Multisample texture fetch from framebuffer */ 1577b8e80941Smrg nir_texop_txf_ms_mcs, /**< Multisample compression value fetch */ 1578b8e80941Smrg nir_texop_txs, /**< Texture size */ 1579b8e80941Smrg nir_texop_lod, /**< Texture lod query */ 1580b8e80941Smrg nir_texop_tg4, /**< Texture gather */ 1581b8e80941Smrg nir_texop_query_levels, /**< Texture levels query */ 1582b8e80941Smrg nir_texop_texture_samples, /**< Texture samples query */ 1583b8e80941Smrg nir_texop_samples_identical, /**< Query whether all samples are definitely 1584b8e80941Smrg * identical. 1585b8e80941Smrg */ 1586b8e80941Smrg} nir_texop; 1587b8e80941Smrg 1588b8e80941Smrgtypedef struct { 1589b8e80941Smrg nir_instr instr; 1590b8e80941Smrg 1591b8e80941Smrg enum glsl_sampler_dim sampler_dim; 1592b8e80941Smrg nir_alu_type dest_type; 1593b8e80941Smrg 1594b8e80941Smrg nir_texop op; 1595b8e80941Smrg nir_dest dest; 1596b8e80941Smrg nir_tex_src *src; 1597b8e80941Smrg unsigned num_srcs, coord_components; 1598b8e80941Smrg bool is_array, is_shadow; 1599b8e80941Smrg 1600b8e80941Smrg /** 1601b8e80941Smrg * If is_shadow is true, whether this is the old-style shadow that outputs 4 1602b8e80941Smrg * components or the new-style shadow that outputs 1 component. 1603b8e80941Smrg */ 1604b8e80941Smrg bool is_new_style_shadow; 1605b8e80941Smrg 1606b8e80941Smrg /* gather component selector */ 1607b8e80941Smrg unsigned component : 2; 1608b8e80941Smrg 1609b8e80941Smrg /* gather offsets */ 1610b8e80941Smrg int8_t tg4_offsets[4][2]; 1611b8e80941Smrg 1612b8e80941Smrg /* True if the texture index or handle is not dynamically uniform */ 1613b8e80941Smrg bool texture_non_uniform; 1614b8e80941Smrg 1615b8e80941Smrg /* True if the sampler index or handle is not dynamically uniform */ 1616b8e80941Smrg bool sampler_non_uniform; 1617b8e80941Smrg 1618b8e80941Smrg /** The texture index 1619b8e80941Smrg * 1620b8e80941Smrg * If this texture instruction has a nir_tex_src_texture_offset source, 1621b8e80941Smrg * then the texture index is given by texture_index + texture_offset. 1622b8e80941Smrg */ 1623b8e80941Smrg unsigned texture_index; 1624b8e80941Smrg 1625b8e80941Smrg /** The size of the texture array or 0 if it's not an array */ 1626b8e80941Smrg unsigned texture_array_size; 1627b8e80941Smrg 1628b8e80941Smrg /** The sampler index 1629b8e80941Smrg * 1630b8e80941Smrg * The following operations do not require a sampler and, as such, this 1631b8e80941Smrg * field should be ignored: 1632b8e80941Smrg * - nir_texop_txf 1633b8e80941Smrg * - nir_texop_txf_ms 1634b8e80941Smrg * - nir_texop_txs 1635b8e80941Smrg * - nir_texop_lod 1636b8e80941Smrg * - nir_texop_query_levels 1637b8e80941Smrg * - nir_texop_texture_samples 1638b8e80941Smrg * - nir_texop_samples_identical 1639b8e80941Smrg * 1640b8e80941Smrg * If this texture instruction has a nir_tex_src_sampler_offset source, 1641b8e80941Smrg * then the sampler index is given by sampler_index + sampler_offset. 1642b8e80941Smrg */ 1643b8e80941Smrg unsigned sampler_index; 1644b8e80941Smrg} nir_tex_instr; 1645b8e80941Smrg 1646b8e80941Smrgstatic inline unsigned 1647b8e80941Smrgnir_tex_instr_dest_size(const nir_tex_instr *instr) 1648b8e80941Smrg{ 1649b8e80941Smrg switch (instr->op) { 1650b8e80941Smrg case nir_texop_txs: { 1651b8e80941Smrg unsigned ret; 1652b8e80941Smrg switch (instr->sampler_dim) { 1653b8e80941Smrg case GLSL_SAMPLER_DIM_1D: 1654b8e80941Smrg case GLSL_SAMPLER_DIM_BUF: 1655b8e80941Smrg ret = 1; 1656b8e80941Smrg break; 1657b8e80941Smrg case GLSL_SAMPLER_DIM_2D: 1658b8e80941Smrg case GLSL_SAMPLER_DIM_CUBE: 1659b8e80941Smrg case GLSL_SAMPLER_DIM_MS: 1660b8e80941Smrg case GLSL_SAMPLER_DIM_RECT: 1661b8e80941Smrg case GLSL_SAMPLER_DIM_EXTERNAL: 1662b8e80941Smrg case GLSL_SAMPLER_DIM_SUBPASS: 1663b8e80941Smrg ret = 2; 1664b8e80941Smrg break; 1665b8e80941Smrg case GLSL_SAMPLER_DIM_3D: 1666b8e80941Smrg ret = 3; 1667b8e80941Smrg break; 1668b8e80941Smrg default: 1669b8e80941Smrg unreachable("not reached"); 1670b8e80941Smrg } 1671b8e80941Smrg if (instr->is_array) 1672b8e80941Smrg ret++; 1673b8e80941Smrg return ret; 1674b8e80941Smrg } 1675b8e80941Smrg 1676b8e80941Smrg case nir_texop_lod: 1677b8e80941Smrg return 2; 1678b8e80941Smrg 1679b8e80941Smrg case nir_texop_texture_samples: 1680b8e80941Smrg case nir_texop_query_levels: 1681b8e80941Smrg case nir_texop_samples_identical: 1682b8e80941Smrg return 1; 1683b8e80941Smrg 1684b8e80941Smrg default: 1685b8e80941Smrg if (instr->is_shadow && instr->is_new_style_shadow) 1686b8e80941Smrg return 1; 1687b8e80941Smrg 1688b8e80941Smrg return 4; 1689b8e80941Smrg } 1690b8e80941Smrg} 1691b8e80941Smrg 1692b8e80941Smrg/* Returns true if this texture operation queries something about the texture 1693b8e80941Smrg * rather than actually sampling it. 1694b8e80941Smrg */ 1695b8e80941Smrgstatic inline bool 1696b8e80941Smrgnir_tex_instr_is_query(const nir_tex_instr *instr) 1697b8e80941Smrg{ 1698b8e80941Smrg switch (instr->op) { 1699b8e80941Smrg case nir_texop_txs: 1700b8e80941Smrg case nir_texop_lod: 1701b8e80941Smrg case nir_texop_texture_samples: 1702b8e80941Smrg case nir_texop_query_levels: 1703b8e80941Smrg case nir_texop_txf_ms_mcs: 1704b8e80941Smrg return true; 1705b8e80941Smrg case nir_texop_tex: 1706b8e80941Smrg case nir_texop_txb: 1707b8e80941Smrg case nir_texop_txl: 1708b8e80941Smrg case nir_texop_txd: 1709b8e80941Smrg case nir_texop_txf: 1710b8e80941Smrg case nir_texop_txf_ms: 1711b8e80941Smrg case nir_texop_txf_ms_fb: 1712b8e80941Smrg case nir_texop_tg4: 1713b8e80941Smrg return false; 1714b8e80941Smrg default: 1715b8e80941Smrg unreachable("Invalid texture opcode"); 1716b8e80941Smrg } 1717b8e80941Smrg} 1718b8e80941Smrg 1719b8e80941Smrgstatic inline bool 1720b8e80941Smrgnir_alu_instr_is_comparison(const nir_alu_instr *instr) 1721b8e80941Smrg{ 1722b8e80941Smrg switch (instr->op) { 1723b8e80941Smrg case nir_op_flt: 1724b8e80941Smrg case nir_op_fge: 1725b8e80941Smrg case nir_op_feq: 1726b8e80941Smrg case nir_op_fne: 1727b8e80941Smrg case nir_op_ilt: 1728b8e80941Smrg case nir_op_ult: 1729b8e80941Smrg case nir_op_ige: 1730b8e80941Smrg case nir_op_uge: 1731b8e80941Smrg case nir_op_ieq: 1732b8e80941Smrg case nir_op_ine: 1733b8e80941Smrg case nir_op_i2b1: 1734b8e80941Smrg case nir_op_f2b1: 1735b8e80941Smrg case nir_op_inot: 1736b8e80941Smrg case nir_op_fnot: 1737b8e80941Smrg return true; 1738b8e80941Smrg default: 1739b8e80941Smrg return false; 1740b8e80941Smrg } 1741b8e80941Smrg} 1742b8e80941Smrg 1743b8e80941Smrgstatic inline nir_alu_type 1744b8e80941Smrgnir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src) 1745b8e80941Smrg{ 1746b8e80941Smrg switch (instr->src[src].src_type) { 1747b8e80941Smrg case nir_tex_src_coord: 1748b8e80941Smrg switch (instr->op) { 1749b8e80941Smrg case nir_texop_txf: 1750b8e80941Smrg case nir_texop_txf_ms: 1751b8e80941Smrg case nir_texop_txf_ms_fb: 1752b8e80941Smrg case nir_texop_txf_ms_mcs: 1753b8e80941Smrg case nir_texop_samples_identical: 1754b8e80941Smrg return nir_type_int; 1755b8e80941Smrg 1756b8e80941Smrg default: 1757b8e80941Smrg return nir_type_float; 1758b8e80941Smrg } 1759b8e80941Smrg 1760b8e80941Smrg case nir_tex_src_lod: 1761b8e80941Smrg switch (instr->op) { 1762b8e80941Smrg case nir_texop_txs: 1763b8e80941Smrg case nir_texop_txf: 1764b8e80941Smrg return nir_type_int; 1765b8e80941Smrg 1766b8e80941Smrg default: 1767b8e80941Smrg return nir_type_float; 1768b8e80941Smrg } 1769b8e80941Smrg 1770b8e80941Smrg case nir_tex_src_projector: 1771b8e80941Smrg case nir_tex_src_comparator: 1772b8e80941Smrg case nir_tex_src_bias: 1773b8e80941Smrg case nir_tex_src_ddx: 1774b8e80941Smrg case nir_tex_src_ddy: 1775b8e80941Smrg return nir_type_float; 1776b8e80941Smrg 1777b8e80941Smrg case nir_tex_src_offset: 1778b8e80941Smrg case nir_tex_src_ms_index: 1779b8e80941Smrg case nir_tex_src_texture_offset: 1780b8e80941Smrg case nir_tex_src_sampler_offset: 1781b8e80941Smrg return nir_type_int; 1782b8e80941Smrg 1783b8e80941Smrg default: 1784b8e80941Smrg unreachable("Invalid texture source type"); 1785b8e80941Smrg } 1786b8e80941Smrg} 1787b8e80941Smrg 1788b8e80941Smrgstatic inline unsigned 1789b8e80941Smrgnir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src) 1790b8e80941Smrg{ 1791b8e80941Smrg if (instr->src[src].src_type == nir_tex_src_coord) 1792b8e80941Smrg return instr->coord_components; 1793b8e80941Smrg 1794b8e80941Smrg /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs */ 1795b8e80941Smrg if (instr->src[src].src_type == nir_tex_src_ms_mcs) 1796b8e80941Smrg return 4; 1797b8e80941Smrg 1798b8e80941Smrg if (instr->src[src].src_type == nir_tex_src_ddx || 1799b8e80941Smrg instr->src[src].src_type == nir_tex_src_ddy) { 1800b8e80941Smrg if (instr->is_array) 1801b8e80941Smrg return instr->coord_components - 1; 1802b8e80941Smrg else 1803b8e80941Smrg return instr->coord_components; 1804b8e80941Smrg } 1805b8e80941Smrg 1806b8e80941Smrg /* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for 1807b8e80941Smrg * the offset, since a cube maps to a single face. 1808b8e80941Smrg */ 1809b8e80941Smrg if (instr->src[src].src_type == nir_tex_src_offset) { 1810b8e80941Smrg if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) 1811b8e80941Smrg return 2; 1812b8e80941Smrg else if (instr->is_array) 1813b8e80941Smrg return instr->coord_components - 1; 1814b8e80941Smrg else 1815b8e80941Smrg return instr->coord_components; 1816b8e80941Smrg } 1817b8e80941Smrg 1818b8e80941Smrg return 1; 1819b8e80941Smrg} 1820b8e80941Smrg 1821b8e80941Smrgstatic inline int 1822b8e80941Smrgnir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type) 1823b8e80941Smrg{ 1824b8e80941Smrg for (unsigned i = 0; i < instr->num_srcs; i++) 1825b8e80941Smrg if (instr->src[i].src_type == type) 1826b8e80941Smrg return (int) i; 1827b8e80941Smrg 1828b8e80941Smrg return -1; 1829b8e80941Smrg} 1830b8e80941Smrg 1831b8e80941Smrgvoid nir_tex_instr_add_src(nir_tex_instr *tex, 1832b8e80941Smrg nir_tex_src_type src_type, 1833b8e80941Smrg nir_src src); 1834b8e80941Smrg 1835b8e80941Smrgvoid nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx); 1836b8e80941Smrg 1837b8e80941Smrgbool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex); 1838b8e80941Smrg 1839b8e80941Smrgtypedef struct { 1840b8e80941Smrg nir_instr instr; 1841b8e80941Smrg 1842b8e80941Smrg nir_ssa_def def; 1843b8e80941Smrg 1844b8e80941Smrg nir_const_value value[]; 1845b8e80941Smrg} nir_load_const_instr; 1846b8e80941Smrg 1847b8e80941Smrg#define nir_const_load_to_arr(arr, l, m) \ 1848b8e80941Smrg{ \ 1849b8e80941Smrg nir_const_value_to_array(arr, l->value, l->def.num_components, m); \ 1850b8e80941Smrg} while (false); 1851b8e80941Smrg 1852b8e80941Smrgtypedef enum { 1853b8e80941Smrg nir_jump_return, 1854b8e80941Smrg nir_jump_break, 1855b8e80941Smrg nir_jump_continue, 1856b8e80941Smrg} nir_jump_type; 1857b8e80941Smrg 1858b8e80941Smrgtypedef struct { 1859b8e80941Smrg nir_instr instr; 1860b8e80941Smrg nir_jump_type type; 1861b8e80941Smrg} nir_jump_instr; 1862b8e80941Smrg 1863b8e80941Smrg/* creates a new SSA variable in an undefined state */ 1864b8e80941Smrg 1865b8e80941Smrgtypedef struct { 1866b8e80941Smrg nir_instr instr; 1867b8e80941Smrg nir_ssa_def def; 1868b8e80941Smrg} nir_ssa_undef_instr; 1869b8e80941Smrg 1870b8e80941Smrgtypedef struct { 1871b8e80941Smrg struct exec_node node; 1872b8e80941Smrg 1873b8e80941Smrg /* The predecessor block corresponding to this source */ 1874b8e80941Smrg struct nir_block *pred; 1875b8e80941Smrg 1876b8e80941Smrg nir_src src; 1877b8e80941Smrg} nir_phi_src; 1878b8e80941Smrg 1879b8e80941Smrg#define nir_foreach_phi_src(phi_src, phi) \ 1880b8e80941Smrg foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs) 1881b8e80941Smrg#define nir_foreach_phi_src_safe(phi_src, phi) \ 1882b8e80941Smrg foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs) 1883b8e80941Smrg 1884b8e80941Smrgtypedef struct { 1885b8e80941Smrg nir_instr instr; 1886b8e80941Smrg 1887b8e80941Smrg struct exec_list srcs; /** < list of nir_phi_src */ 1888b8e80941Smrg 1889b8e80941Smrg nir_dest dest; 1890b8e80941Smrg} nir_phi_instr; 1891b8e80941Smrg 1892b8e80941Smrgtypedef struct { 1893b8e80941Smrg struct exec_node node; 1894b8e80941Smrg nir_src src; 1895b8e80941Smrg nir_dest dest; 1896b8e80941Smrg} nir_parallel_copy_entry; 1897b8e80941Smrg 1898b8e80941Smrg#define nir_foreach_parallel_copy_entry(entry, pcopy) \ 1899b8e80941Smrg foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries) 1900b8e80941Smrg 1901b8e80941Smrgtypedef struct { 1902b8e80941Smrg nir_instr instr; 1903b8e80941Smrg 1904b8e80941Smrg /* A list of nir_parallel_copy_entrys. The sources of all of the 1905b8e80941Smrg * entries are copied to the corresponding destinations "in parallel". 1906b8e80941Smrg * In other words, if we have two entries: a -> b and b -> a, the values 1907b8e80941Smrg * get swapped. 1908b8e80941Smrg */ 1909b8e80941Smrg struct exec_list entries; 1910b8e80941Smrg} nir_parallel_copy_instr; 1911b8e80941Smrg 1912b8e80941SmrgNIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr, 1913b8e80941Smrg type, nir_instr_type_alu) 1914b8e80941SmrgNIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr, 1915b8e80941Smrg type, nir_instr_type_deref) 1916b8e80941SmrgNIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr, 1917b8e80941Smrg type, nir_instr_type_call) 1918b8e80941SmrgNIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr, 1919b8e80941Smrg type, nir_instr_type_jump) 1920b8e80941SmrgNIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr, 1921b8e80941Smrg type, nir_instr_type_tex) 1922b8e80941SmrgNIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr, 1923b8e80941Smrg type, nir_instr_type_intrinsic) 1924b8e80941SmrgNIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr, 1925b8e80941Smrg type, nir_instr_type_load_const) 1926b8e80941SmrgNIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr, 1927b8e80941Smrg type, nir_instr_type_ssa_undef) 1928b8e80941SmrgNIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr, 1929b8e80941Smrg type, nir_instr_type_phi) 1930b8e80941SmrgNIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, 1931b8e80941Smrg nir_parallel_copy_instr, instr, 1932b8e80941Smrg type, nir_instr_type_parallel_copy) 1933b8e80941Smrg 1934b8e80941Smrgtypedef struct { 1935b8e80941Smrg nir_ssa_def *def; 1936b8e80941Smrg unsigned comp; 1937b8e80941Smrg} nir_ssa_scalar; 1938b8e80941Smrg 1939b8e80941Smrgstatic inline bool 1940b8e80941Smrgnir_ssa_scalar_is_const(nir_ssa_scalar s) 1941b8e80941Smrg{ 1942b8e80941Smrg return s.def->parent_instr->type == nir_instr_type_load_const; 1943b8e80941Smrg} 1944b8e80941Smrg 1945b8e80941Smrgstatic inline nir_const_value 1946b8e80941Smrgnir_ssa_scalar_as_const_value(nir_ssa_scalar s) 1947b8e80941Smrg{ 1948b8e80941Smrg assert(s.comp < s.def->num_components); 1949b8e80941Smrg nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr); 1950b8e80941Smrg return load->value[s.comp]; 1951b8e80941Smrg} 1952b8e80941Smrg 1953b8e80941Smrg#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \ 1954b8e80941Smrgstatic inline type \ 1955b8e80941Smrgnir_ssa_scalar_as_##suffix(nir_ssa_scalar s) \ 1956b8e80941Smrg{ \ 1957b8e80941Smrg return nir_const_value_as_##suffix( \ 1958b8e80941Smrg nir_ssa_scalar_as_const_value(s), s.def->bit_size); \ 1959b8e80941Smrg} 1960b8e80941Smrg 1961b8e80941SmrgNIR_DEFINE_SCALAR_AS_CONST(int64_t, int) 1962b8e80941SmrgNIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint) 1963b8e80941SmrgNIR_DEFINE_SCALAR_AS_CONST(bool, bool) 1964b8e80941SmrgNIR_DEFINE_SCALAR_AS_CONST(double, float) 1965b8e80941Smrg 1966b8e80941Smrg#undef NIR_DEFINE_SCALAR_AS_CONST 1967b8e80941Smrg 1968b8e80941Smrgstatic inline bool 1969b8e80941Smrgnir_ssa_scalar_is_alu(nir_ssa_scalar s) 1970b8e80941Smrg{ 1971b8e80941Smrg return s.def->parent_instr->type == nir_instr_type_alu; 1972b8e80941Smrg} 1973b8e80941Smrg 1974b8e80941Smrgstatic inline nir_op 1975b8e80941Smrgnir_ssa_scalar_alu_op(nir_ssa_scalar s) 1976b8e80941Smrg{ 1977b8e80941Smrg return nir_instr_as_alu(s.def->parent_instr)->op; 1978b8e80941Smrg} 1979b8e80941Smrg 1980b8e80941Smrgstatic inline nir_ssa_scalar 1981b8e80941Smrgnir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx) 1982b8e80941Smrg{ 1983b8e80941Smrg nir_ssa_scalar out = { NULL, 0 }; 1984b8e80941Smrg 1985b8e80941Smrg nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr); 1986b8e80941Smrg assert(alu_src_idx < nir_op_infos[alu->op].num_inputs); 1987b8e80941Smrg 1988b8e80941Smrg /* Our component must be written */ 1989b8e80941Smrg assert(s.comp < s.def->num_components); 1990b8e80941Smrg assert(alu->dest.write_mask & (1u << s.comp)); 1991b8e80941Smrg 1992b8e80941Smrg assert(alu->src[alu_src_idx].src.is_ssa); 1993b8e80941Smrg out.def = alu->src[alu_src_idx].src.ssa; 1994b8e80941Smrg 1995b8e80941Smrg if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) { 1996b8e80941Smrg /* The ALU src is unsized so the source component follows the 1997b8e80941Smrg * destination component. 1998b8e80941Smrg */ 1999b8e80941Smrg out.comp = alu->src[alu_src_idx].swizzle[s.comp]; 2000b8e80941Smrg } else { 2001b8e80941Smrg /* This is a sized source so all source components work together to 2002b8e80941Smrg * produce all the destination components. Since we need to return a 2003b8e80941Smrg * scalar, this only works if the source is a scalar. 2004b8e80941Smrg */ 2005b8e80941Smrg assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1); 2006b8e80941Smrg out.comp = alu->src[alu_src_idx].swizzle[0]; 2007b8e80941Smrg } 2008b8e80941Smrg assert(out.comp < out.def->num_components); 2009b8e80941Smrg 2010b8e80941Smrg return out; 2011b8e80941Smrg} 2012b8e80941Smrg 2013b8e80941Smrg/* 2014b8e80941Smrg * Control flow 2015b8e80941Smrg * 2016b8e80941Smrg * Control flow consists of a tree of control flow nodes, which include 2017b8e80941Smrg * if-statements and loops. The leaves of the tree are basic blocks, lists of 2018b8e80941Smrg * instructions that always run start-to-finish. Each basic block also keeps 2019b8e80941Smrg * track of its successors (blocks which may run immediately after the current 2020b8e80941Smrg * block) and predecessors (blocks which could have run immediately before the 2021b8e80941Smrg * current block). Each function also has a start block and an end block which 2022b8e80941Smrg * all return statements point to (which is always empty). Together, all the 2023b8e80941Smrg * blocks with their predecessors and successors make up the control flow 2024b8e80941Smrg * graph (CFG) of the function. There are helpers that modify the tree of 2025b8e80941Smrg * control flow nodes while modifying the CFG appropriately; these should be 2026b8e80941Smrg * used instead of modifying the tree directly. 2027b8e80941Smrg */ 2028b8e80941Smrg 2029b8e80941Smrgtypedef enum { 2030b8e80941Smrg nir_cf_node_block, 2031b8e80941Smrg nir_cf_node_if, 2032b8e80941Smrg nir_cf_node_loop, 2033b8e80941Smrg nir_cf_node_function 2034b8e80941Smrg} nir_cf_node_type; 2035b8e80941Smrg 2036b8e80941Smrgtypedef struct nir_cf_node { 2037b8e80941Smrg struct exec_node node; 2038b8e80941Smrg nir_cf_node_type type; 2039b8e80941Smrg struct nir_cf_node *parent; 2040b8e80941Smrg} nir_cf_node; 2041b8e80941Smrg 2042b8e80941Smrgtypedef struct nir_block { 2043b8e80941Smrg nir_cf_node cf_node; 2044b8e80941Smrg 2045b8e80941Smrg struct exec_list instr_list; /** < list of nir_instr */ 2046b8e80941Smrg 2047b8e80941Smrg /** generic block index; generated by nir_index_blocks */ 2048b8e80941Smrg unsigned index; 2049b8e80941Smrg 2050b8e80941Smrg /* 2051b8e80941Smrg * Each block can only have up to 2 successors, so we put them in a simple 2052b8e80941Smrg * array - no need for anything more complicated. 2053b8e80941Smrg */ 2054b8e80941Smrg struct nir_block *successors[2]; 2055b8e80941Smrg 2056b8e80941Smrg /* Set of nir_block predecessors in the CFG */ 2057b8e80941Smrg struct set *predecessors; 2058b8e80941Smrg 2059b8e80941Smrg /* 2060b8e80941Smrg * this node's immediate dominator in the dominance tree - set to NULL for 2061b8e80941Smrg * the start block. 2062b8e80941Smrg */ 2063b8e80941Smrg struct nir_block *imm_dom; 2064b8e80941Smrg 2065b8e80941Smrg /* This node's children in the dominance tree */ 2066b8e80941Smrg unsigned num_dom_children; 2067b8e80941Smrg struct nir_block **dom_children; 2068b8e80941Smrg 2069b8e80941Smrg /* Set of nir_blocks on the dominance frontier of this block */ 2070b8e80941Smrg struct set *dom_frontier; 2071b8e80941Smrg 2072b8e80941Smrg /* 2073b8e80941Smrg * These two indices have the property that dom_{pre,post}_index for each 2074b8e80941Smrg * child of this block in the dominance tree will always be between 2075b8e80941Smrg * dom_pre_index and dom_post_index for this block, which makes testing if 2076b8e80941Smrg * a given block is dominated by another block an O(1) operation. 2077b8e80941Smrg */ 2078b8e80941Smrg unsigned dom_pre_index, dom_post_index; 2079b8e80941Smrg 2080b8e80941Smrg /* live in and out for this block; used for liveness analysis */ 2081b8e80941Smrg BITSET_WORD *live_in; 2082b8e80941Smrg BITSET_WORD *live_out; 2083b8e80941Smrg} nir_block; 2084b8e80941Smrg 2085b8e80941Smrgstatic inline nir_instr * 2086b8e80941Smrgnir_block_first_instr(nir_block *block) 2087b8e80941Smrg{ 2088b8e80941Smrg struct exec_node *head = exec_list_get_head(&block->instr_list); 2089b8e80941Smrg return exec_node_data(nir_instr, head, node); 2090b8e80941Smrg} 2091b8e80941Smrg 2092b8e80941Smrgstatic inline nir_instr * 2093b8e80941Smrgnir_block_last_instr(nir_block *block) 2094b8e80941Smrg{ 2095b8e80941Smrg struct exec_node *tail = exec_list_get_tail(&block->instr_list); 2096b8e80941Smrg return exec_node_data(nir_instr, tail, node); 2097b8e80941Smrg} 2098b8e80941Smrg 2099b8e80941Smrgstatic inline bool 2100b8e80941Smrgnir_block_ends_in_jump(nir_block *block) 2101b8e80941Smrg{ 2102b8e80941Smrg return !exec_list_is_empty(&block->instr_list) && 2103b8e80941Smrg nir_block_last_instr(block)->type == nir_instr_type_jump; 2104b8e80941Smrg} 2105b8e80941Smrg 2106b8e80941Smrg#define nir_foreach_instr(instr, block) \ 2107b8e80941Smrg foreach_list_typed(nir_instr, instr, node, &(block)->instr_list) 2108b8e80941Smrg#define nir_foreach_instr_reverse(instr, block) \ 2109b8e80941Smrg foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) 2110b8e80941Smrg#define nir_foreach_instr_safe(instr, block) \ 2111b8e80941Smrg foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) 2112b8e80941Smrg#define nir_foreach_instr_reverse_safe(instr, block) \ 2113b8e80941Smrg foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list) 2114b8e80941Smrg 2115b8e80941Smrgtypedef enum { 2116b8e80941Smrg nir_selection_control_none = 0x0, 2117b8e80941Smrg nir_selection_control_flatten = 0x1, 2118b8e80941Smrg nir_selection_control_dont_flatten = 0x2, 2119b8e80941Smrg} nir_selection_control; 2120b8e80941Smrg 2121b8e80941Smrgtypedef struct nir_if { 2122b8e80941Smrg nir_cf_node cf_node; 2123b8e80941Smrg nir_src condition; 2124b8e80941Smrg nir_selection_control control; 2125b8e80941Smrg 2126b8e80941Smrg struct exec_list then_list; /** < list of nir_cf_node */ 2127b8e80941Smrg struct exec_list else_list; /** < list of nir_cf_node */ 2128b8e80941Smrg} nir_if; 2129b8e80941Smrg 2130b8e80941Smrgtypedef struct { 2131b8e80941Smrg nir_if *nif; 2132b8e80941Smrg 2133b8e80941Smrg /** Instruction that generates nif::condition. */ 2134b8e80941Smrg nir_instr *conditional_instr; 2135b8e80941Smrg 2136b8e80941Smrg /** Block within ::nif that has the break instruction. */ 2137b8e80941Smrg nir_block *break_block; 2138b8e80941Smrg 2139b8e80941Smrg /** Last block for the then- or else-path that does not contain the break. */ 2140b8e80941Smrg nir_block *continue_from_block; 2141b8e80941Smrg 2142b8e80941Smrg /** True when ::break_block is in the else-path of ::nif. */ 2143b8e80941Smrg bool continue_from_then; 2144b8e80941Smrg bool induction_rhs; 2145b8e80941Smrg 2146b8e80941Smrg /* This is true if the terminators exact trip count is unknown. For 2147b8e80941Smrg * example: 2148b8e80941Smrg * 2149b8e80941Smrg * for (int i = 0; i < imin(x, 4); i++) 2150b8e80941Smrg * ... 2151b8e80941Smrg * 2152b8e80941Smrg * Here loop analysis would have set a max_trip_count of 4 however we dont 2153b8e80941Smrg * know for sure that this is the exact trip count. 2154b8e80941Smrg */ 2155b8e80941Smrg bool exact_trip_count_unknown; 2156b8e80941Smrg 2157b8e80941Smrg struct list_head loop_terminator_link; 2158b8e80941Smrg} nir_loop_terminator; 2159b8e80941Smrg 2160b8e80941Smrgtypedef struct { 2161b8e80941Smrg /* Estimated cost (in number of instructions) of the loop */ 2162b8e80941Smrg unsigned instr_cost; 2163b8e80941Smrg 2164b8e80941Smrg /* Guessed trip count based on array indexing */ 2165b8e80941Smrg unsigned guessed_trip_count; 2166b8e80941Smrg 2167b8e80941Smrg /* Maximum number of times the loop is run (if known) */ 2168b8e80941Smrg unsigned max_trip_count; 2169b8e80941Smrg 2170b8e80941Smrg /* Do we know the exact number of times the loop will be run */ 2171b8e80941Smrg bool exact_trip_count_known; 2172b8e80941Smrg 2173b8e80941Smrg /* Unroll the loop regardless of its size */ 2174b8e80941Smrg bool force_unroll; 2175b8e80941Smrg 2176b8e80941Smrg /* Does the loop contain complex loop terminators, continues or other 2177b8e80941Smrg * complex behaviours? If this is true we can't rely on 2178b8e80941Smrg * loop_terminator_list to be complete or accurate. 2179b8e80941Smrg */ 2180b8e80941Smrg bool complex_loop; 2181b8e80941Smrg 2182b8e80941Smrg nir_loop_terminator *limiting_terminator; 2183b8e80941Smrg 2184b8e80941Smrg /* A list of loop_terminators terminating this loop. */ 2185b8e80941Smrg struct list_head loop_terminator_list; 2186b8e80941Smrg} nir_loop_info; 2187b8e80941Smrg 2188b8e80941Smrgtypedef enum { 2189b8e80941Smrg nir_loop_control_none = 0x0, 2190b8e80941Smrg nir_loop_control_unroll = 0x1, 2191b8e80941Smrg nir_loop_control_dont_unroll = 0x2, 2192b8e80941Smrg} nir_loop_control; 2193b8e80941Smrg 2194b8e80941Smrgtypedef struct { 2195b8e80941Smrg nir_cf_node cf_node; 2196b8e80941Smrg 2197b8e80941Smrg struct exec_list body; /** < list of nir_cf_node */ 2198b8e80941Smrg 2199b8e80941Smrg nir_loop_info *info; 2200b8e80941Smrg nir_loop_control control; 2201b8e80941Smrg bool partially_unrolled; 2202b8e80941Smrg} nir_loop; 2203b8e80941Smrg 2204b8e80941Smrg/** 2205b8e80941Smrg * Various bits of metadata that can may be created or required by 2206b8e80941Smrg * optimization and analysis passes 2207b8e80941Smrg */ 2208b8e80941Smrgtypedef enum { 2209b8e80941Smrg nir_metadata_none = 0x0, 2210b8e80941Smrg nir_metadata_block_index = 0x1, 2211b8e80941Smrg nir_metadata_dominance = 0x2, 2212b8e80941Smrg nir_metadata_live_ssa_defs = 0x4, 2213b8e80941Smrg nir_metadata_not_properly_reset = 0x8, 2214b8e80941Smrg nir_metadata_loop_analysis = 0x10, 2215b8e80941Smrg} nir_metadata; 2216b8e80941Smrg 2217b8e80941Smrgtypedef struct { 2218b8e80941Smrg nir_cf_node cf_node; 2219b8e80941Smrg 2220b8e80941Smrg /** pointer to the function of which this is an implementation */ 2221b8e80941Smrg struct nir_function *function; 2222b8e80941Smrg 2223b8e80941Smrg struct exec_list body; /** < list of nir_cf_node */ 2224b8e80941Smrg 2225b8e80941Smrg nir_block *end_block; 2226b8e80941Smrg 2227b8e80941Smrg /** list for all local variables in the function */ 2228b8e80941Smrg struct exec_list locals; 2229b8e80941Smrg 2230b8e80941Smrg /** list of local registers in the function */ 2231b8e80941Smrg struct exec_list registers; 2232b8e80941Smrg 2233b8e80941Smrg /** next available local register index */ 2234b8e80941Smrg unsigned reg_alloc; 2235b8e80941Smrg 2236b8e80941Smrg /** next available SSA value index */ 2237b8e80941Smrg unsigned ssa_alloc; 2238b8e80941Smrg 2239b8e80941Smrg /* total number of basic blocks, only valid when block_index_dirty = false */ 2240b8e80941Smrg unsigned num_blocks; 2241b8e80941Smrg 2242b8e80941Smrg nir_metadata valid_metadata; 2243b8e80941Smrg} nir_function_impl; 2244b8e80941Smrg 2245b8e80941SmrgATTRIBUTE_RETURNS_NONNULL static inline nir_block * 2246b8e80941Smrgnir_start_block(nir_function_impl *impl) 2247b8e80941Smrg{ 2248b8e80941Smrg return (nir_block *) impl->body.head_sentinel.next; 2249b8e80941Smrg} 2250b8e80941Smrg 2251b8e80941SmrgATTRIBUTE_RETURNS_NONNULL static inline nir_block * 2252b8e80941Smrgnir_impl_last_block(nir_function_impl *impl) 2253b8e80941Smrg{ 2254b8e80941Smrg return (nir_block *) impl->body.tail_sentinel.prev; 2255b8e80941Smrg} 2256b8e80941Smrg 2257b8e80941Smrgstatic inline nir_cf_node * 2258b8e80941Smrgnir_cf_node_next(nir_cf_node *node) 2259b8e80941Smrg{ 2260b8e80941Smrg struct exec_node *next = exec_node_get_next(&node->node); 2261b8e80941Smrg if (exec_node_is_tail_sentinel(next)) 2262b8e80941Smrg return NULL; 2263b8e80941Smrg else 2264b8e80941Smrg return exec_node_data(nir_cf_node, next, node); 2265b8e80941Smrg} 2266b8e80941Smrg 2267b8e80941Smrgstatic inline nir_cf_node * 2268b8e80941Smrgnir_cf_node_prev(nir_cf_node *node) 2269b8e80941Smrg{ 2270b8e80941Smrg struct exec_node *prev = exec_node_get_prev(&node->node); 2271b8e80941Smrg if (exec_node_is_head_sentinel(prev)) 2272b8e80941Smrg return NULL; 2273b8e80941Smrg else 2274b8e80941Smrg return exec_node_data(nir_cf_node, prev, node); 2275b8e80941Smrg} 2276b8e80941Smrg 2277b8e80941Smrgstatic inline bool 2278b8e80941Smrgnir_cf_node_is_first(const nir_cf_node *node) 2279b8e80941Smrg{ 2280b8e80941Smrg return exec_node_is_head_sentinel(node->node.prev); 2281b8e80941Smrg} 2282b8e80941Smrg 2283b8e80941Smrgstatic inline bool 2284b8e80941Smrgnir_cf_node_is_last(const nir_cf_node *node) 2285b8e80941Smrg{ 2286b8e80941Smrg return exec_node_is_tail_sentinel(node->node.next); 2287b8e80941Smrg} 2288b8e80941Smrg 2289b8e80941SmrgNIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node, 2290b8e80941Smrg type, nir_cf_node_block) 2291b8e80941SmrgNIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node, 2292b8e80941Smrg type, nir_cf_node_if) 2293b8e80941SmrgNIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node, 2294b8e80941Smrg type, nir_cf_node_loop) 2295b8e80941SmrgNIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, 2296b8e80941Smrg nir_function_impl, cf_node, type, nir_cf_node_function) 2297b8e80941Smrg 2298b8e80941Smrgstatic inline nir_block * 2299b8e80941Smrgnir_if_first_then_block(nir_if *if_stmt) 2300b8e80941Smrg{ 2301b8e80941Smrg struct exec_node *head = exec_list_get_head(&if_stmt->then_list); 2302b8e80941Smrg return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 2303b8e80941Smrg} 2304b8e80941Smrg 2305b8e80941Smrgstatic inline nir_block * 2306b8e80941Smrgnir_if_last_then_block(nir_if *if_stmt) 2307b8e80941Smrg{ 2308b8e80941Smrg struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list); 2309b8e80941Smrg return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 2310b8e80941Smrg} 2311b8e80941Smrg 2312b8e80941Smrgstatic inline nir_block * 2313b8e80941Smrgnir_if_first_else_block(nir_if *if_stmt) 2314b8e80941Smrg{ 2315b8e80941Smrg struct exec_node *head = exec_list_get_head(&if_stmt->else_list); 2316b8e80941Smrg return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 2317b8e80941Smrg} 2318b8e80941Smrg 2319b8e80941Smrgstatic inline nir_block * 2320b8e80941Smrgnir_if_last_else_block(nir_if *if_stmt) 2321b8e80941Smrg{ 2322b8e80941Smrg struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list); 2323b8e80941Smrg return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 2324b8e80941Smrg} 2325b8e80941Smrg 2326b8e80941Smrgstatic inline nir_block * 2327b8e80941Smrgnir_loop_first_block(nir_loop *loop) 2328b8e80941Smrg{ 2329b8e80941Smrg struct exec_node *head = exec_list_get_head(&loop->body); 2330b8e80941Smrg return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 2331b8e80941Smrg} 2332b8e80941Smrg 2333b8e80941Smrgstatic inline nir_block * 2334b8e80941Smrgnir_loop_last_block(nir_loop *loop) 2335b8e80941Smrg{ 2336b8e80941Smrg struct exec_node *tail = exec_list_get_tail(&loop->body); 2337b8e80941Smrg return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 2338b8e80941Smrg} 2339b8e80941Smrg 2340b8e80941Smrg/** 2341b8e80941Smrg * Return true if this list of cf_nodes contains a single empty block. 2342b8e80941Smrg */ 2343b8e80941Smrgstatic inline bool 2344b8e80941Smrgnir_cf_list_is_empty_block(struct exec_list *cf_list) 2345b8e80941Smrg{ 2346b8e80941Smrg if (exec_list_is_singular(cf_list)) { 2347b8e80941Smrg struct exec_node *head = exec_list_get_head(cf_list); 2348b8e80941Smrg nir_block *block = 2349b8e80941Smrg nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 2350b8e80941Smrg return exec_list_is_empty(&block->instr_list); 2351b8e80941Smrg } 2352b8e80941Smrg return false; 2353b8e80941Smrg} 2354b8e80941Smrg 2355b8e80941Smrgtypedef struct { 2356b8e80941Smrg uint8_t num_components; 2357b8e80941Smrg uint8_t bit_size; 2358b8e80941Smrg} nir_parameter; 2359b8e80941Smrg 2360b8e80941Smrgtypedef struct nir_function { 2361b8e80941Smrg struct exec_node node; 2362b8e80941Smrg 2363b8e80941Smrg const char *name; 2364b8e80941Smrg struct nir_shader *shader; 2365b8e80941Smrg 2366b8e80941Smrg unsigned num_params; 2367b8e80941Smrg nir_parameter *params; 2368b8e80941Smrg 2369b8e80941Smrg /** The implementation of this function. 2370b8e80941Smrg * 2371b8e80941Smrg * If the function is only declared and not implemented, this is NULL. 2372b8e80941Smrg */ 2373b8e80941Smrg nir_function_impl *impl; 2374b8e80941Smrg 2375b8e80941Smrg bool is_entrypoint; 2376b8e80941Smrg} nir_function; 2377b8e80941Smrg 2378b8e80941Smrgtypedef enum { 2379b8e80941Smrg nir_lower_imul64 = (1 << 0), 2380b8e80941Smrg nir_lower_isign64 = (1 << 1), 2381b8e80941Smrg /** Lower all int64 modulus and division opcodes */ 2382b8e80941Smrg nir_lower_divmod64 = (1 << 2), 2383b8e80941Smrg /** Lower all 64-bit umul_high and imul_high opcodes */ 2384b8e80941Smrg nir_lower_imul_high64 = (1 << 3), 2385b8e80941Smrg nir_lower_mov64 = (1 << 4), 2386b8e80941Smrg nir_lower_icmp64 = (1 << 5), 2387b8e80941Smrg nir_lower_iadd64 = (1 << 6), 2388b8e80941Smrg nir_lower_iabs64 = (1 << 7), 2389b8e80941Smrg nir_lower_ineg64 = (1 << 8), 2390b8e80941Smrg nir_lower_logic64 = (1 << 9), 2391b8e80941Smrg nir_lower_minmax64 = (1 << 10), 2392b8e80941Smrg nir_lower_shift64 = (1 << 11), 2393b8e80941Smrg nir_lower_imul_2x32_64 = (1 << 12), 2394b8e80941Smrg nir_lower_extract64 = (1 << 13), 2395b8e80941Smrg} nir_lower_int64_options; 2396b8e80941Smrg 2397b8e80941Smrgtypedef enum { 2398b8e80941Smrg nir_lower_drcp = (1 << 0), 2399b8e80941Smrg nir_lower_dsqrt = (1 << 1), 2400b8e80941Smrg nir_lower_drsq = (1 << 2), 2401b8e80941Smrg nir_lower_dtrunc = (1 << 3), 2402b8e80941Smrg nir_lower_dfloor = (1 << 4), 2403b8e80941Smrg nir_lower_dceil = (1 << 5), 2404b8e80941Smrg nir_lower_dfract = (1 << 6), 2405b8e80941Smrg nir_lower_dround_even = (1 << 7), 2406b8e80941Smrg nir_lower_dmod = (1 << 8), 2407b8e80941Smrg nir_lower_fp64_full_software = (1 << 9), 2408b8e80941Smrg} nir_lower_doubles_options; 2409b8e80941Smrg 2410b8e80941Smrgtypedef struct nir_shader_compiler_options { 2411b8e80941Smrg bool lower_fdiv; 2412b8e80941Smrg bool lower_ffma; 2413b8e80941Smrg bool fuse_ffma; 2414b8e80941Smrg bool lower_flrp16; 2415b8e80941Smrg bool lower_flrp32; 2416b8e80941Smrg /** Lowers flrp when it does not support doubles */ 2417b8e80941Smrg bool lower_flrp64; 2418b8e80941Smrg bool lower_fpow; 2419b8e80941Smrg bool lower_fsat; 2420b8e80941Smrg bool lower_fsqrt; 2421b8e80941Smrg bool lower_fmod16; 2422b8e80941Smrg bool lower_fmod32; 2423b8e80941Smrg bool lower_fmod64; 2424b8e80941Smrg /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */ 2425b8e80941Smrg bool lower_bitfield_extract; 2426b8e80941Smrg /** Lowers ibitfield_extract/ubitfield_extract to bfm, compares, shifts. */ 2427b8e80941Smrg bool lower_bitfield_extract_to_shifts; 2428b8e80941Smrg /** Lowers bitfield_insert to bfi/bfm */ 2429b8e80941Smrg bool lower_bitfield_insert; 2430b8e80941Smrg /** Lowers bitfield_insert to bfm, compares, and shifts. */ 2431b8e80941Smrg bool lower_bitfield_insert_to_shifts; 2432b8e80941Smrg /** Lowers bitfield_reverse to shifts. */ 2433b8e80941Smrg bool lower_bitfield_reverse; 2434b8e80941Smrg /** Lowers bit_count to shifts. */ 2435b8e80941Smrg bool lower_bit_count; 2436b8e80941Smrg /** Lowers bfm to shifts and subtracts. */ 2437b8e80941Smrg bool lower_bfm; 2438b8e80941Smrg /** Lowers ifind_msb to compare and ufind_msb */ 2439b8e80941Smrg bool lower_ifind_msb; 2440b8e80941Smrg /** Lowers find_lsb to ufind_msb and logic ops */ 2441b8e80941Smrg bool lower_find_lsb; 2442b8e80941Smrg bool lower_uadd_carry; 2443b8e80941Smrg bool lower_usub_borrow; 2444b8e80941Smrg /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */ 2445b8e80941Smrg bool lower_mul_high; 2446b8e80941Smrg /** lowers fneg and ineg to fsub and isub. */ 2447b8e80941Smrg bool lower_negate; 2448b8e80941Smrg /** lowers fsub and isub to fadd+fneg and iadd+ineg. */ 2449b8e80941Smrg bool lower_sub; 2450b8e80941Smrg 2451b8e80941Smrg /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */ 2452b8e80941Smrg bool lower_scmp; 2453b8e80941Smrg 2454b8e80941Smrg /** enables rules to lower idiv by power-of-two: */ 2455b8e80941Smrg bool lower_idiv; 2456b8e80941Smrg 2457b8e80941Smrg /** enables rules to lower isign to imin+imax */ 2458b8e80941Smrg bool lower_isign; 2459b8e80941Smrg 2460b8e80941Smrg /** enables rules to lower fsign to fsub and flt */ 2461b8e80941Smrg bool lower_fsign; 2462b8e80941Smrg 2463b8e80941Smrg /* Does the native fdot instruction replicate its result for four 2464b8e80941Smrg * components? If so, then opt_algebraic_late will turn all fdotN 2465b8e80941Smrg * instructions into fdot_replicatedN instructions. 2466b8e80941Smrg */ 2467b8e80941Smrg bool fdot_replicates; 2468b8e80941Smrg 2469b8e80941Smrg /** lowers ffloor to fsub+ffract: */ 2470b8e80941Smrg bool lower_ffloor; 2471b8e80941Smrg 2472b8e80941Smrg /** lowers ffract to fsub+ffloor: */ 2473b8e80941Smrg bool lower_ffract; 2474b8e80941Smrg 2475b8e80941Smrg /** lowers fceil to fneg+ffloor+fneg: */ 2476b8e80941Smrg bool lower_fceil; 2477b8e80941Smrg 2478b8e80941Smrg bool lower_ftrunc; 2479b8e80941Smrg 2480b8e80941Smrg bool lower_ldexp; 2481b8e80941Smrg 2482b8e80941Smrg bool lower_pack_half_2x16; 2483b8e80941Smrg bool lower_pack_unorm_2x16; 2484b8e80941Smrg bool lower_pack_snorm_2x16; 2485b8e80941Smrg bool lower_pack_unorm_4x8; 2486b8e80941Smrg bool lower_pack_snorm_4x8; 2487b8e80941Smrg bool lower_unpack_half_2x16; 2488b8e80941Smrg bool lower_unpack_unorm_2x16; 2489b8e80941Smrg bool lower_unpack_snorm_2x16; 2490b8e80941Smrg bool lower_unpack_unorm_4x8; 2491b8e80941Smrg bool lower_unpack_snorm_4x8; 2492b8e80941Smrg 2493b8e80941Smrg bool lower_extract_byte; 2494b8e80941Smrg bool lower_extract_word; 2495b8e80941Smrg 2496b8e80941Smrg bool lower_all_io_to_temps; 2497b8e80941Smrg bool lower_all_io_to_elements; 2498b8e80941Smrg 2499b8e80941Smrg /** 2500b8e80941Smrg * Does the driver support real 32-bit integers? (Otherwise, integers 2501b8e80941Smrg * are simulated by floats.) 2502b8e80941Smrg */ 2503b8e80941Smrg bool native_integers; 2504b8e80941Smrg 2505b8e80941Smrg /* Indicates that the driver only has zero-based vertex id */ 2506b8e80941Smrg bool vertex_id_zero_based; 2507b8e80941Smrg 2508b8e80941Smrg /** 2509b8e80941Smrg * If enabled, gl_BaseVertex will be lowered as: 2510b8e80941Smrg * is_indexed_draw (~0/0) & firstvertex 2511b8e80941Smrg */ 2512b8e80941Smrg bool lower_base_vertex; 2513b8e80941Smrg 2514b8e80941Smrg /** 2515b8e80941Smrg * If enabled, gl_HelperInvocation will be lowered as: 2516b8e80941Smrg * 2517b8e80941Smrg * !((1 << sample_id) & sample_mask_in)) 2518b8e80941Smrg * 2519b8e80941Smrg * This depends on some possibly hw implementation details, which may 2520b8e80941Smrg * not be true for all hw. In particular that the FS is only executed 2521b8e80941Smrg * for covered samples or for helper invocations. So, do not blindly 2522b8e80941Smrg * enable this option. 2523b8e80941Smrg * 2524b8e80941Smrg * Note: See also issue #22 in ARB_shader_image_load_store 2525b8e80941Smrg */ 2526b8e80941Smrg bool lower_helper_invocation; 2527b8e80941Smrg 2528b8e80941Smrg /** 2529b8e80941Smrg * Convert gl_SampleMaskIn to gl_HelperInvocation as follows: 2530b8e80941Smrg * 2531b8e80941Smrg * gl_SampleMaskIn == 0 ---> gl_HelperInvocation 2532b8e80941Smrg * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation 2533b8e80941Smrg */ 2534b8e80941Smrg bool optimize_sample_mask_in; 2535b8e80941Smrg 2536b8e80941Smrg bool lower_cs_local_index_from_id; 2537b8e80941Smrg bool lower_cs_local_id_from_index; 2538b8e80941Smrg 2539b8e80941Smrg bool lower_device_index_to_zero; 2540b8e80941Smrg 2541b8e80941Smrg /* Set if nir_lower_wpos_ytransform() should also invert gl_PointCoord. */ 2542b8e80941Smrg bool lower_wpos_pntc; 2543b8e80941Smrg 2544b8e80941Smrg bool lower_hadd; 2545b8e80941Smrg bool lower_add_sat; 2546b8e80941Smrg 2547b8e80941Smrg /** 2548b8e80941Smrg * Should nir_lower_io() create load_interpolated_input intrinsics? 2549b8e80941Smrg * 2550b8e80941Smrg * If not, it generates regular load_input intrinsics and interpolation 2551b8e80941Smrg * information must be inferred from the list of input nir_variables. 2552b8e80941Smrg */ 2553b8e80941Smrg bool use_interpolated_input_intrinsics; 2554b8e80941Smrg 2555b8e80941Smrg /* Lowers when 32x32->64 bit multiplication is not supported */ 2556b8e80941Smrg bool lower_mul_2x32_64; 2557b8e80941Smrg 2558b8e80941Smrg unsigned max_unroll_iterations; 2559b8e80941Smrg 2560b8e80941Smrg nir_lower_int64_options lower_int64_options; 2561b8e80941Smrg nir_lower_doubles_options lower_doubles_options; 2562b8e80941Smrg} nir_shader_compiler_options; 2563b8e80941Smrg 2564b8e80941Smrgtypedef struct nir_shader { 2565b8e80941Smrg /** list of uniforms (nir_variable) */ 2566b8e80941Smrg struct exec_list uniforms; 2567b8e80941Smrg 2568b8e80941Smrg /** list of inputs (nir_variable) */ 2569b8e80941Smrg struct exec_list inputs; 2570b8e80941Smrg 2571b8e80941Smrg /** list of outputs (nir_variable) */ 2572b8e80941Smrg struct exec_list outputs; 2573b8e80941Smrg 2574b8e80941Smrg /** list of shared compute variables (nir_variable) */ 2575b8e80941Smrg struct exec_list shared; 2576b8e80941Smrg 2577b8e80941Smrg /** Set of driver-specific options for the shader. 2578b8e80941Smrg * 2579b8e80941Smrg * The memory for the options is expected to be kept in a single static 2580b8e80941Smrg * copy by the driver. 2581b8e80941Smrg */ 2582b8e80941Smrg const struct nir_shader_compiler_options *options; 2583b8e80941Smrg 2584b8e80941Smrg /** Various bits of compile-time information about a given shader */ 2585b8e80941Smrg struct shader_info info; 2586b8e80941Smrg 2587b8e80941Smrg /** list of global variables in the shader (nir_variable) */ 2588b8e80941Smrg struct exec_list globals; 2589b8e80941Smrg 2590b8e80941Smrg /** list of system value variables in the shader (nir_variable) */ 2591b8e80941Smrg struct exec_list system_values; 2592b8e80941Smrg 2593b8e80941Smrg struct exec_list functions; /** < list of nir_function */ 2594b8e80941Smrg 2595b8e80941Smrg /** 2596b8e80941Smrg * the highest index a load_input_*, load_uniform_*, etc. intrinsic can 2597b8e80941Smrg * access plus one 2598b8e80941Smrg */ 2599b8e80941Smrg unsigned num_inputs, num_uniforms, num_outputs, num_shared; 2600b8e80941Smrg 2601b8e80941Smrg /** Size in bytes of required scratch space */ 2602b8e80941Smrg unsigned scratch_size; 2603b8e80941Smrg 2604b8e80941Smrg /** Constant data associated with this shader. 2605b8e80941Smrg * 2606b8e80941Smrg * Constant data is loaded through load_constant intrinsics. See also 2607b8e80941Smrg * nir_opt_large_constants. 2608b8e80941Smrg */ 2609b8e80941Smrg void *constant_data; 2610b8e80941Smrg unsigned constant_data_size; 2611b8e80941Smrg} nir_shader; 2612b8e80941Smrg 2613b8e80941Smrg#define nir_foreach_function(func, shader) \ 2614b8e80941Smrg foreach_list_typed(nir_function, func, node, &(shader)->functions) 2615b8e80941Smrg 2616b8e80941Smrgstatic inline nir_function_impl * 2617b8e80941Smrgnir_shader_get_entrypoint(nir_shader *shader) 2618b8e80941Smrg{ 2619b8e80941Smrg nir_function *func = NULL; 2620b8e80941Smrg 2621b8e80941Smrg nir_foreach_function(function, shader) { 2622b8e80941Smrg assert(func == NULL); 2623b8e80941Smrg if (function->is_entrypoint) { 2624b8e80941Smrg func = function; 2625b8e80941Smrg#ifndef NDEBUG 2626b8e80941Smrg break; 2627b8e80941Smrg#endif 2628b8e80941Smrg } 2629b8e80941Smrg } 2630b8e80941Smrg 2631b8e80941Smrg if (!func) 2632b8e80941Smrg return NULL; 2633b8e80941Smrg 2634b8e80941Smrg assert(func->num_params == 0); 2635b8e80941Smrg assert(func->impl); 2636b8e80941Smrg return func->impl; 2637b8e80941Smrg} 2638b8e80941Smrg 2639b8e80941Smrgnir_shader *nir_shader_create(void *mem_ctx, 2640b8e80941Smrg gl_shader_stage stage, 2641b8e80941Smrg const nir_shader_compiler_options *options, 2642b8e80941Smrg shader_info *si); 2643b8e80941Smrg 2644b8e80941Smrgnir_register *nir_local_reg_create(nir_function_impl *impl); 2645b8e80941Smrg 2646b8e80941Smrgvoid nir_reg_remove(nir_register *reg); 2647b8e80941Smrg 2648b8e80941Smrg/** Adds a variable to the appropriate list in nir_shader */ 2649b8e80941Smrgvoid nir_shader_add_variable(nir_shader *shader, nir_variable *var); 2650b8e80941Smrg 2651b8e80941Smrgstatic inline void 2652b8e80941Smrgnir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var) 2653b8e80941Smrg{ 2654b8e80941Smrg assert(var->data.mode == nir_var_function_temp); 2655b8e80941Smrg exec_list_push_tail(&impl->locals, &var->node); 2656b8e80941Smrg} 2657b8e80941Smrg 2658b8e80941Smrg/** creates a variable, sets a few defaults, and adds it to the list */ 2659b8e80941Smrgnir_variable *nir_variable_create(nir_shader *shader, 2660b8e80941Smrg nir_variable_mode mode, 2661b8e80941Smrg const struct glsl_type *type, 2662b8e80941Smrg const char *name); 2663b8e80941Smrg/** creates a local variable and adds it to the list */ 2664b8e80941Smrgnir_variable *nir_local_variable_create(nir_function_impl *impl, 2665b8e80941Smrg const struct glsl_type *type, 2666b8e80941Smrg const char *name); 2667b8e80941Smrg 2668b8e80941Smrg/** creates a function and adds it to the shader's list of functions */ 2669b8e80941Smrgnir_function *nir_function_create(nir_shader *shader, const char *name); 2670b8e80941Smrg 2671b8e80941Smrgnir_function_impl *nir_function_impl_create(nir_function *func); 2672b8e80941Smrg/** creates a function_impl that isn't tied to any particular function */ 2673b8e80941Smrgnir_function_impl *nir_function_impl_create_bare(nir_shader *shader); 2674b8e80941Smrg 2675b8e80941Smrgnir_block *nir_block_create(nir_shader *shader); 2676b8e80941Smrgnir_if *nir_if_create(nir_shader *shader); 2677b8e80941Smrgnir_loop *nir_loop_create(nir_shader *shader); 2678b8e80941Smrg 2679b8e80941Smrgnir_function_impl *nir_cf_node_get_function(nir_cf_node *node); 2680b8e80941Smrg 2681b8e80941Smrg/** requests that the given pieces of metadata be generated */ 2682b8e80941Smrgvoid nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...); 2683b8e80941Smrg/** dirties all but the preserved metadata */ 2684b8e80941Smrgvoid nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); 2685b8e80941Smrg 2686b8e80941Smrg/** creates an instruction with default swizzle/writemask/etc. with NULL registers */ 2687b8e80941Smrgnir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); 2688b8e80941Smrg 2689b8e80941Smrgnir_deref_instr *nir_deref_instr_create(nir_shader *shader, 2690b8e80941Smrg nir_deref_type deref_type); 2691b8e80941Smrg 2692b8e80941Smrgnir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); 2693b8e80941Smrg 2694b8e80941Smrgnir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, 2695b8e80941Smrg unsigned num_components, 2696b8e80941Smrg unsigned bit_size); 2697b8e80941Smrg 2698b8e80941Smrgnir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, 2699b8e80941Smrg nir_intrinsic_op op); 2700b8e80941Smrg 2701b8e80941Smrgnir_call_instr *nir_call_instr_create(nir_shader *shader, 2702b8e80941Smrg nir_function *callee); 2703b8e80941Smrg 2704b8e80941Smrgnir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); 2705b8e80941Smrg 2706b8e80941Smrgnir_phi_instr *nir_phi_instr_create(nir_shader *shader); 2707b8e80941Smrg 2708b8e80941Smrgnir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); 2709b8e80941Smrg 2710b8e80941Smrgnir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader, 2711b8e80941Smrg unsigned num_components, 2712b8e80941Smrg unsigned bit_size); 2713b8e80941Smrg 2714b8e80941Smrgnir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size); 2715b8e80941Smrg 2716b8e80941Smrg/** 2717b8e80941Smrg * NIR Cursors and Instruction Insertion API 2718b8e80941Smrg * @{ 2719b8e80941Smrg * 2720b8e80941Smrg * A tiny struct representing a point to insert/extract instructions or 2721b8e80941Smrg * control flow nodes. Helps reduce the combinatorial explosion of possible 2722b8e80941Smrg * points to insert/extract. 2723b8e80941Smrg * 2724b8e80941Smrg * \sa nir_control_flow.h 2725b8e80941Smrg */ 2726b8e80941Smrgtypedef enum { 2727b8e80941Smrg nir_cursor_before_block, 2728b8e80941Smrg nir_cursor_after_block, 2729b8e80941Smrg nir_cursor_before_instr, 2730b8e80941Smrg nir_cursor_after_instr, 2731b8e80941Smrg} nir_cursor_option; 2732b8e80941Smrg 2733b8e80941Smrgtypedef struct { 2734b8e80941Smrg nir_cursor_option option; 2735b8e80941Smrg union { 2736b8e80941Smrg nir_block *block; 2737b8e80941Smrg nir_instr *instr; 2738b8e80941Smrg }; 2739b8e80941Smrg} nir_cursor; 2740b8e80941Smrg 2741b8e80941Smrgstatic inline nir_block * 2742b8e80941Smrgnir_cursor_current_block(nir_cursor cursor) 2743b8e80941Smrg{ 2744b8e80941Smrg if (cursor.option == nir_cursor_before_instr || 2745b8e80941Smrg cursor.option == nir_cursor_after_instr) { 2746b8e80941Smrg return cursor.instr->block; 2747b8e80941Smrg } else { 2748b8e80941Smrg return cursor.block; 2749b8e80941Smrg } 2750b8e80941Smrg} 2751b8e80941Smrg 2752b8e80941Smrgbool nir_cursors_equal(nir_cursor a, nir_cursor b); 2753b8e80941Smrg 2754b8e80941Smrgstatic inline nir_cursor 2755b8e80941Smrgnir_before_block(nir_block *block) 2756b8e80941Smrg{ 2757b8e80941Smrg nir_cursor cursor; 2758b8e80941Smrg cursor.option = nir_cursor_before_block; 2759b8e80941Smrg cursor.block = block; 2760b8e80941Smrg return cursor; 2761b8e80941Smrg} 2762b8e80941Smrg 2763b8e80941Smrgstatic inline nir_cursor 2764b8e80941Smrgnir_after_block(nir_block *block) 2765b8e80941Smrg{ 2766b8e80941Smrg nir_cursor cursor; 2767b8e80941Smrg cursor.option = nir_cursor_after_block; 2768b8e80941Smrg cursor.block = block; 2769b8e80941Smrg return cursor; 2770b8e80941Smrg} 2771b8e80941Smrg 2772b8e80941Smrgstatic inline nir_cursor 2773b8e80941Smrgnir_before_instr(nir_instr *instr) 2774b8e80941Smrg{ 2775b8e80941Smrg nir_cursor cursor; 2776b8e80941Smrg cursor.option = nir_cursor_before_instr; 2777b8e80941Smrg cursor.instr = instr; 2778b8e80941Smrg return cursor; 2779b8e80941Smrg} 2780b8e80941Smrg 2781b8e80941Smrgstatic inline nir_cursor 2782b8e80941Smrgnir_after_instr(nir_instr *instr) 2783b8e80941Smrg{ 2784b8e80941Smrg nir_cursor cursor; 2785b8e80941Smrg cursor.option = nir_cursor_after_instr; 2786b8e80941Smrg cursor.instr = instr; 2787b8e80941Smrg return cursor; 2788b8e80941Smrg} 2789b8e80941Smrg 2790b8e80941Smrgstatic inline nir_cursor 2791b8e80941Smrgnir_after_block_before_jump(nir_block *block) 2792b8e80941Smrg{ 2793b8e80941Smrg nir_instr *last_instr = nir_block_last_instr(block); 2794b8e80941Smrg if (last_instr && last_instr->type == nir_instr_type_jump) { 2795b8e80941Smrg return nir_before_instr(last_instr); 2796b8e80941Smrg } else { 2797b8e80941Smrg return nir_after_block(block); 2798b8e80941Smrg } 2799b8e80941Smrg} 2800b8e80941Smrg 2801b8e80941Smrgstatic inline nir_cursor 2802b8e80941Smrgnir_before_src(nir_src *src, bool is_if_condition) 2803b8e80941Smrg{ 2804b8e80941Smrg if (is_if_condition) { 2805b8e80941Smrg nir_block *prev_block = 2806b8e80941Smrg nir_cf_node_as_block(nir_cf_node_prev(&src->parent_if->cf_node)); 2807b8e80941Smrg assert(!nir_block_ends_in_jump(prev_block)); 2808b8e80941Smrg return nir_after_block(prev_block); 2809b8e80941Smrg } else if (src->parent_instr->type == nir_instr_type_phi) { 2810b8e80941Smrg#ifndef NDEBUG 2811b8e80941Smrg nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr); 2812b8e80941Smrg bool found = false; 2813b8e80941Smrg nir_foreach_phi_src(phi_src, cond_phi) { 2814b8e80941Smrg if (phi_src->src.ssa == src->ssa) { 2815b8e80941Smrg found = true; 2816b8e80941Smrg break; 2817b8e80941Smrg } 2818b8e80941Smrg } 2819b8e80941Smrg assert(found); 2820b8e80941Smrg#endif 2821b8e80941Smrg /* The LIST_ENTRY macro is a generic container-of macro, it just happens 2822b8e80941Smrg * to have a more specific name. 2823b8e80941Smrg */ 2824b8e80941Smrg nir_phi_src *phi_src = LIST_ENTRY(nir_phi_src, src, src); 2825b8e80941Smrg return nir_after_block_before_jump(phi_src->pred); 2826b8e80941Smrg } else { 2827b8e80941Smrg return nir_before_instr(src->parent_instr); 2828b8e80941Smrg } 2829b8e80941Smrg} 2830b8e80941Smrg 2831b8e80941Smrgstatic inline nir_cursor 2832b8e80941Smrgnir_before_cf_node(nir_cf_node *node) 2833b8e80941Smrg{ 2834b8e80941Smrg if (node->type == nir_cf_node_block) 2835b8e80941Smrg return nir_before_block(nir_cf_node_as_block(node)); 2836b8e80941Smrg 2837b8e80941Smrg return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); 2838b8e80941Smrg} 2839b8e80941Smrg 2840b8e80941Smrgstatic inline nir_cursor 2841b8e80941Smrgnir_after_cf_node(nir_cf_node *node) 2842b8e80941Smrg{ 2843b8e80941Smrg if (node->type == nir_cf_node_block) 2844b8e80941Smrg return nir_after_block(nir_cf_node_as_block(node)); 2845b8e80941Smrg 2846b8e80941Smrg return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); 2847b8e80941Smrg} 2848b8e80941Smrg 2849b8e80941Smrgstatic inline nir_cursor 2850b8e80941Smrgnir_after_phis(nir_block *block) 2851b8e80941Smrg{ 2852b8e80941Smrg nir_foreach_instr(instr, block) { 2853b8e80941Smrg if (instr->type != nir_instr_type_phi) 2854b8e80941Smrg return nir_before_instr(instr); 2855b8e80941Smrg } 2856b8e80941Smrg return nir_after_block(block); 2857b8e80941Smrg} 2858b8e80941Smrg 2859b8e80941Smrgstatic inline nir_cursor 2860b8e80941Smrgnir_after_cf_node_and_phis(nir_cf_node *node) 2861b8e80941Smrg{ 2862b8e80941Smrg if (node->type == nir_cf_node_block) 2863b8e80941Smrg return nir_after_block(nir_cf_node_as_block(node)); 2864b8e80941Smrg 2865b8e80941Smrg nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node)); 2866b8e80941Smrg 2867b8e80941Smrg return nir_after_phis(block); 2868b8e80941Smrg} 2869b8e80941Smrg 2870b8e80941Smrgstatic inline nir_cursor 2871b8e80941Smrgnir_before_cf_list(struct exec_list *cf_list) 2872b8e80941Smrg{ 2873b8e80941Smrg nir_cf_node *first_node = exec_node_data(nir_cf_node, 2874b8e80941Smrg exec_list_get_head(cf_list), node); 2875b8e80941Smrg return nir_before_cf_node(first_node); 2876b8e80941Smrg} 2877b8e80941Smrg 2878b8e80941Smrgstatic inline nir_cursor 2879b8e80941Smrgnir_after_cf_list(struct exec_list *cf_list) 2880b8e80941Smrg{ 2881b8e80941Smrg nir_cf_node *last_node = exec_node_data(nir_cf_node, 2882b8e80941Smrg exec_list_get_tail(cf_list), node); 2883b8e80941Smrg return nir_after_cf_node(last_node); 2884b8e80941Smrg} 2885b8e80941Smrg 2886b8e80941Smrg/** 2887b8e80941Smrg * Insert a NIR instruction at the given cursor. 2888b8e80941Smrg * 2889b8e80941Smrg * Note: This does not update the cursor. 2890b8e80941Smrg */ 2891b8e80941Smrgvoid nir_instr_insert(nir_cursor cursor, nir_instr *instr); 2892b8e80941Smrg 2893b8e80941Smrgstatic inline void 2894b8e80941Smrgnir_instr_insert_before(nir_instr *instr, nir_instr *before) 2895b8e80941Smrg{ 2896b8e80941Smrg nir_instr_insert(nir_before_instr(instr), before); 2897b8e80941Smrg} 2898b8e80941Smrg 2899b8e80941Smrgstatic inline void 2900b8e80941Smrgnir_instr_insert_after(nir_instr *instr, nir_instr *after) 2901b8e80941Smrg{ 2902b8e80941Smrg nir_instr_insert(nir_after_instr(instr), after); 2903b8e80941Smrg} 2904b8e80941Smrg 2905b8e80941Smrgstatic inline void 2906b8e80941Smrgnir_instr_insert_before_block(nir_block *block, nir_instr *before) 2907b8e80941Smrg{ 2908b8e80941Smrg nir_instr_insert(nir_before_block(block), before); 2909b8e80941Smrg} 2910b8e80941Smrg 2911b8e80941Smrgstatic inline void 2912b8e80941Smrgnir_instr_insert_after_block(nir_block *block, nir_instr *after) 2913b8e80941Smrg{ 2914b8e80941Smrg nir_instr_insert(nir_after_block(block), after); 2915b8e80941Smrg} 2916b8e80941Smrg 2917b8e80941Smrgstatic inline void 2918b8e80941Smrgnir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) 2919b8e80941Smrg{ 2920b8e80941Smrg nir_instr_insert(nir_before_cf_node(node), before); 2921b8e80941Smrg} 2922b8e80941Smrg 2923b8e80941Smrgstatic inline void 2924b8e80941Smrgnir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) 2925b8e80941Smrg{ 2926b8e80941Smrg nir_instr_insert(nir_after_cf_node(node), after); 2927b8e80941Smrg} 2928b8e80941Smrg 2929b8e80941Smrgstatic inline void 2930b8e80941Smrgnir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) 2931b8e80941Smrg{ 2932b8e80941Smrg nir_instr_insert(nir_before_cf_list(list), before); 2933b8e80941Smrg} 2934b8e80941Smrg 2935b8e80941Smrgstatic inline void 2936b8e80941Smrgnir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) 2937b8e80941Smrg{ 2938b8e80941Smrg nir_instr_insert(nir_after_cf_list(list), after); 2939b8e80941Smrg} 2940b8e80941Smrg 2941b8e80941Smrgvoid nir_instr_remove_v(nir_instr *instr); 2942b8e80941Smrg 2943b8e80941Smrgstatic inline nir_cursor 2944b8e80941Smrgnir_instr_remove(nir_instr *instr) 2945b8e80941Smrg{ 2946b8e80941Smrg nir_cursor cursor; 2947b8e80941Smrg nir_instr *prev = nir_instr_prev(instr); 2948b8e80941Smrg if (prev) { 2949b8e80941Smrg cursor = nir_after_instr(prev); 2950b8e80941Smrg } else { 2951b8e80941Smrg cursor = nir_before_block(instr->block); 2952b8e80941Smrg } 2953b8e80941Smrg nir_instr_remove_v(instr); 2954b8e80941Smrg return cursor; 2955b8e80941Smrg} 2956b8e80941Smrg 2957b8e80941Smrg/** @} */ 2958b8e80941Smrg 2959b8e80941Smrgtypedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state); 2960b8e80941Smrgtypedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state); 2961b8e80941Smrgtypedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); 2962b8e80941Smrgbool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, 2963b8e80941Smrg void *state); 2964b8e80941Smrgbool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); 2965b8e80941Smrgbool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); 2966b8e80941Smrg 2967b8e80941Smrgnir_const_value *nir_src_as_const_value(nir_src src); 2968b8e80941Smrg 2969b8e80941Smrg#define NIR_SRC_AS_(name, c_type, type_enum, cast_macro) \ 2970b8e80941Smrgstatic inline c_type * \ 2971b8e80941Smrgnir_src_as_ ## name (nir_src src) \ 2972b8e80941Smrg{ \ 2973b8e80941Smrg return src.is_ssa && src.ssa->parent_instr->type == type_enum \ 2974b8e80941Smrg ? cast_macro(src.ssa->parent_instr) : NULL; \ 2975b8e80941Smrg} 2976b8e80941Smrg 2977b8e80941SmrgNIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu) 2978b8e80941SmrgNIR_SRC_AS_(intrinsic, nir_intrinsic_instr, 2979b8e80941Smrg nir_instr_type_intrinsic, nir_instr_as_intrinsic) 2980b8e80941SmrgNIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref) 2981b8e80941Smrg 2982b8e80941Smrgbool nir_src_is_dynamically_uniform(nir_src src); 2983b8e80941Smrgbool nir_srcs_equal(nir_src src1, nir_src src2); 2984b8e80941Smrgbool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2); 2985b8e80941Smrgvoid nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); 2986b8e80941Smrgvoid nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); 2987b8e80941Smrgvoid nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); 2988b8e80941Smrgvoid nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, 2989b8e80941Smrg nir_dest new_dest); 2990b8e80941Smrg 2991b8e80941Smrgvoid nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, 2992b8e80941Smrg unsigned num_components, unsigned bit_size, 2993b8e80941Smrg const char *name); 2994b8e80941Smrgvoid nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, 2995b8e80941Smrg unsigned num_components, unsigned bit_size, 2996b8e80941Smrg const char *name); 2997b8e80941Smrgstatic inline void 2998b8e80941Smrgnir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest, 2999b8e80941Smrg const struct glsl_type *type, 3000b8e80941Smrg const char *name) 3001b8e80941Smrg{ 3002b8e80941Smrg assert(glsl_type_is_vector_or_scalar(type)); 3003b8e80941Smrg nir_ssa_dest_init(instr, dest, glsl_get_components(type), 3004b8e80941Smrg glsl_get_bit_size(type), name); 3005b8e80941Smrg} 3006b8e80941Smrgvoid nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src); 3007b8e80941Smrgvoid nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, 3008b8e80941Smrg nir_instr *after_me); 3009b8e80941Smrg 3010b8e80941Smrgnir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def); 3011b8e80941Smrg 3012b8e80941Smrg/* 3013b8e80941Smrg * finds the next basic block in source-code order, returns NULL if there is 3014b8e80941Smrg * none 3015b8e80941Smrg */ 3016b8e80941Smrg 3017b8e80941Smrgnir_block *nir_block_cf_tree_next(nir_block *block); 3018b8e80941Smrg 3019b8e80941Smrg/* Performs the opposite of nir_block_cf_tree_next() */ 3020b8e80941Smrg 3021b8e80941Smrgnir_block *nir_block_cf_tree_prev(nir_block *block); 3022b8e80941Smrg 3023b8e80941Smrg/* Gets the first block in a CF node in source-code order */ 3024b8e80941Smrg 3025b8e80941Smrgnir_block *nir_cf_node_cf_tree_first(nir_cf_node *node); 3026b8e80941Smrg 3027b8e80941Smrg/* Gets the last block in a CF node in source-code order */ 3028b8e80941Smrg 3029b8e80941Smrgnir_block *nir_cf_node_cf_tree_last(nir_cf_node *node); 3030b8e80941Smrg 3031b8e80941Smrg/* Gets the next block after a CF node in source-code order */ 3032b8e80941Smrg 3033b8e80941Smrgnir_block *nir_cf_node_cf_tree_next(nir_cf_node *node); 3034b8e80941Smrg 3035b8e80941Smrg/* Macros for loops that visit blocks in source-code order */ 3036b8e80941Smrg 3037b8e80941Smrg#define nir_foreach_block(block, impl) \ 3038b8e80941Smrg for (nir_block *block = nir_start_block(impl); block != NULL; \ 3039b8e80941Smrg block = nir_block_cf_tree_next(block)) 3040b8e80941Smrg 3041b8e80941Smrg#define nir_foreach_block_safe(block, impl) \ 3042b8e80941Smrg for (nir_block *block = nir_start_block(impl), \ 3043b8e80941Smrg *next = nir_block_cf_tree_next(block); \ 3044b8e80941Smrg block != NULL; \ 3045b8e80941Smrg block = next, next = nir_block_cf_tree_next(block)) 3046b8e80941Smrg 3047b8e80941Smrg#define nir_foreach_block_reverse(block, impl) \ 3048b8e80941Smrg for (nir_block *block = nir_impl_last_block(impl); block != NULL; \ 3049b8e80941Smrg block = nir_block_cf_tree_prev(block)) 3050b8e80941Smrg 3051b8e80941Smrg#define nir_foreach_block_reverse_safe(block, impl) \ 3052b8e80941Smrg for (nir_block *block = nir_impl_last_block(impl), \ 3053b8e80941Smrg *prev = nir_block_cf_tree_prev(block); \ 3054b8e80941Smrg block != NULL; \ 3055b8e80941Smrg block = prev, prev = nir_block_cf_tree_prev(block)) 3056b8e80941Smrg 3057b8e80941Smrg#define nir_foreach_block_in_cf_node(block, node) \ 3058b8e80941Smrg for (nir_block *block = nir_cf_node_cf_tree_first(node); \ 3059b8e80941Smrg block != nir_cf_node_cf_tree_next(node); \ 3060b8e80941Smrg block = nir_block_cf_tree_next(block)) 3061b8e80941Smrg 3062b8e80941Smrg/* If the following CF node is an if, this function returns that if. 3063b8e80941Smrg * Otherwise, it returns NULL. 3064b8e80941Smrg */ 3065b8e80941Smrgnir_if *nir_block_get_following_if(nir_block *block); 3066b8e80941Smrg 3067b8e80941Smrgnir_loop *nir_block_get_following_loop(nir_block *block); 3068b8e80941Smrg 3069b8e80941Smrgvoid nir_index_local_regs(nir_function_impl *impl); 3070b8e80941Smrgvoid nir_index_ssa_defs(nir_function_impl *impl); 3071b8e80941Smrgunsigned nir_index_instrs(nir_function_impl *impl); 3072b8e80941Smrg 3073b8e80941Smrgvoid nir_index_blocks(nir_function_impl *impl); 3074b8e80941Smrg 3075b8e80941Smrgvoid nir_print_shader(nir_shader *shader, FILE *fp); 3076b8e80941Smrgvoid nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors); 3077b8e80941Smrgvoid nir_print_instr(const nir_instr *instr, FILE *fp); 3078b8e80941Smrgvoid nir_print_deref(const nir_deref_instr *deref, FILE *fp); 3079b8e80941Smrg 3080b8e80941Smrgnir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s); 3081b8e80941Smrgnir_function_impl *nir_function_impl_clone(nir_shader *shader, 3082b8e80941Smrg const nir_function_impl *fi); 3083b8e80941Smrgnir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var); 3084b8e80941Smrgnir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader); 3085b8e80941Smrg 3086b8e80941Smrgnir_shader *nir_shader_serialize_deserialize(void *mem_ctx, nir_shader *s); 3087b8e80941Smrg 3088b8e80941Smrg#ifndef NDEBUG 3089b8e80941Smrgvoid nir_validate_shader(nir_shader *shader, const char *when); 3090b8e80941Smrgvoid nir_metadata_set_validation_flag(nir_shader *shader); 3091b8e80941Smrgvoid nir_metadata_check_validation_flag(nir_shader *shader); 3092b8e80941Smrg 3093b8e80941Smrgstatic inline bool 3094b8e80941Smrgshould_skip_nir(const char *name) 3095b8e80941Smrg{ 3096b8e80941Smrg static const char *list = NULL; 3097b8e80941Smrg if (!list) { 3098b8e80941Smrg /* Comma separated list of names to skip. */ 3099b8e80941Smrg list = getenv("NIR_SKIP"); 3100b8e80941Smrg if (!list) 3101b8e80941Smrg list = ""; 3102b8e80941Smrg } 3103b8e80941Smrg 3104b8e80941Smrg if (!list[0]) 3105b8e80941Smrg return false; 3106b8e80941Smrg 3107b8e80941Smrg return comma_separated_list_contains(list, name); 3108b8e80941Smrg} 3109b8e80941Smrg 3110b8e80941Smrgstatic inline bool 3111b8e80941Smrgshould_clone_nir(void) 3112b8e80941Smrg{ 3113b8e80941Smrg static int should_clone = -1; 3114b8e80941Smrg if (should_clone < 0) 3115b8e80941Smrg should_clone = env_var_as_boolean("NIR_TEST_CLONE", false); 3116b8e80941Smrg 3117b8e80941Smrg return should_clone; 3118b8e80941Smrg} 3119b8e80941Smrg 3120b8e80941Smrgstatic inline bool 3121b8e80941Smrgshould_serialize_deserialize_nir(void) 3122b8e80941Smrg{ 3123b8e80941Smrg static int test_serialize = -1; 3124b8e80941Smrg if (test_serialize < 0) 3125b8e80941Smrg test_serialize = env_var_as_boolean("NIR_TEST_SERIALIZE", false); 3126b8e80941Smrg 3127b8e80941Smrg return test_serialize; 3128b8e80941Smrg} 3129b8e80941Smrg 3130b8e80941Smrgstatic inline bool 3131b8e80941Smrgshould_print_nir(void) 3132b8e80941Smrg{ 3133b8e80941Smrg static int should_print = -1; 3134b8e80941Smrg if (should_print < 0) 3135b8e80941Smrg should_print = env_var_as_boolean("NIR_PRINT", false); 3136b8e80941Smrg 3137b8e80941Smrg return should_print; 3138b8e80941Smrg} 3139b8e80941Smrg#else 3140b8e80941Smrgstatic inline void nir_validate_shader(nir_shader *shader, const char *when) { (void) shader; (void)when; } 3141b8e80941Smrgstatic inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; } 3142b8e80941Smrgstatic inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; } 3143b8e80941Smrgstatic inline bool should_skip_nir(UNUSED const char *pass_name) { return false; } 3144b8e80941Smrgstatic inline bool should_clone_nir(void) { return false; } 3145b8e80941Smrgstatic inline bool should_serialize_deserialize_nir(void) { return false; } 3146b8e80941Smrgstatic inline bool should_print_nir(void) { return false; } 3147b8e80941Smrg#endif /* NDEBUG */ 3148b8e80941Smrg 3149b8e80941Smrg#define _PASS(pass, nir, do_pass) do { \ 3150b8e80941Smrg if (should_skip_nir(#pass)) { \ 3151b8e80941Smrg printf("skipping %s\n", #pass); \ 3152b8e80941Smrg break; \ 3153b8e80941Smrg } \ 3154b8e80941Smrg do_pass \ 3155b8e80941Smrg nir_validate_shader(nir, "after " #pass); \ 3156b8e80941Smrg if (should_clone_nir()) { \ 3157b8e80941Smrg nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ 3158b8e80941Smrg ralloc_free(nir); \ 3159b8e80941Smrg nir = clone; \ 3160b8e80941Smrg } \ 3161b8e80941Smrg if (should_serialize_deserialize_nir()) { \ 3162b8e80941Smrg void *mem_ctx = ralloc_parent(nir); \ 3163b8e80941Smrg nir = nir_shader_serialize_deserialize(mem_ctx, nir); \ 3164b8e80941Smrg } \ 3165b8e80941Smrg} while (0) 3166b8e80941Smrg 3167b8e80941Smrg#define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir, \ 3168b8e80941Smrg nir_metadata_set_validation_flag(nir); \ 3169b8e80941Smrg if (should_print_nir()) \ 3170b8e80941Smrg printf("%s\n", #pass); \ 3171b8e80941Smrg if (pass(nir, ##__VA_ARGS__)) { \ 3172b8e80941Smrg progress = true; \ 3173b8e80941Smrg if (should_print_nir()) \ 3174b8e80941Smrg nir_print_shader(nir, stdout); \ 3175b8e80941Smrg nir_metadata_check_validation_flag(nir); \ 3176b8e80941Smrg } \ 3177b8e80941Smrg) 3178b8e80941Smrg 3179b8e80941Smrg#define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir, \ 3180b8e80941Smrg if (should_print_nir()) \ 3181b8e80941Smrg printf("%s\n", #pass); \ 3182b8e80941Smrg pass(nir, ##__VA_ARGS__); \ 3183b8e80941Smrg if (should_print_nir()) \ 3184b8e80941Smrg nir_print_shader(nir, stdout); \ 3185b8e80941Smrg) 3186b8e80941Smrg 3187b8e80941Smrg#define NIR_SKIP(name) should_skip_nir(#name) 3188b8e80941Smrg 3189b8e80941Smrgvoid nir_calc_dominance_impl(nir_function_impl *impl); 3190b8e80941Smrgvoid nir_calc_dominance(nir_shader *shader); 3191b8e80941Smrg 3192b8e80941Smrgnir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); 3193b8e80941Smrgbool nir_block_dominates(nir_block *parent, nir_block *child); 3194b8e80941Smrgbool nir_block_is_unreachable(nir_block *block); 3195b8e80941Smrg 3196b8e80941Smrgvoid nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); 3197b8e80941Smrgvoid nir_dump_dom_tree(nir_shader *shader, FILE *fp); 3198b8e80941Smrg 3199b8e80941Smrgvoid nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp); 3200b8e80941Smrgvoid nir_dump_dom_frontier(nir_shader *shader, FILE *fp); 3201b8e80941Smrg 3202b8e80941Smrgvoid nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp); 3203b8e80941Smrgvoid nir_dump_cfg(nir_shader *shader, FILE *fp); 3204b8e80941Smrg 3205b8e80941Smrgint nir_gs_count_vertices(const nir_shader *shader); 3206b8e80941Smrg 3207b8e80941Smrgbool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes); 3208b8e80941Smrgbool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes); 3209b8e80941Smrgbool nir_split_var_copies(nir_shader *shader); 3210b8e80941Smrgbool nir_split_per_member_structs(nir_shader *shader); 3211b8e80941Smrgbool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes); 3212b8e80941Smrg 3213b8e80941Smrgbool nir_lower_returns_impl(nir_function_impl *impl); 3214b8e80941Smrgbool nir_lower_returns(nir_shader *shader); 3215b8e80941Smrg 3216b8e80941Smrgvoid nir_inline_function_impl(struct nir_builder *b, 3217b8e80941Smrg const nir_function_impl *impl, 3218b8e80941Smrg nir_ssa_def **params); 3219b8e80941Smrgbool nir_inline_functions(nir_shader *shader); 3220b8e80941Smrg 3221b8e80941Smrgbool nir_propagate_invariant(nir_shader *shader); 3222b8e80941Smrg 3223b8e80941Smrgvoid nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader); 3224b8e80941Smrgvoid nir_lower_deref_copy_instr(struct nir_builder *b, 3225b8e80941Smrg nir_intrinsic_instr *copy); 3226b8e80941Smrgbool nir_lower_var_copies(nir_shader *shader); 3227b8e80941Smrg 3228b8e80941Smrgvoid nir_fixup_deref_modes(nir_shader *shader); 3229b8e80941Smrg 3230b8e80941Smrgbool nir_lower_global_vars_to_local(nir_shader *shader); 3231b8e80941Smrg 3232b8e80941Smrgtypedef enum { 3233b8e80941Smrg nir_lower_direct_array_deref_of_vec_load = (1 << 0), 3234b8e80941Smrg nir_lower_indirect_array_deref_of_vec_load = (1 << 1), 3235b8e80941Smrg nir_lower_direct_array_deref_of_vec_store = (1 << 2), 3236b8e80941Smrg nir_lower_indirect_array_deref_of_vec_store = (1 << 3), 3237b8e80941Smrg} nir_lower_array_deref_of_vec_options; 3238b8e80941Smrg 3239b8e80941Smrgbool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes, 3240b8e80941Smrg nir_lower_array_deref_of_vec_options options); 3241b8e80941Smrg 3242b8e80941Smrgbool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes); 3243b8e80941Smrg 3244b8e80941Smrgbool nir_lower_locals_to_regs(nir_shader *shader); 3245b8e80941Smrg 3246b8e80941Smrgvoid nir_lower_io_to_temporaries(nir_shader *shader, 3247b8e80941Smrg nir_function_impl *entrypoint, 3248b8e80941Smrg bool outputs, bool inputs); 3249b8e80941Smrg 3250b8e80941Smrgbool nir_lower_vars_to_scratch(nir_shader *shader, 3251b8e80941Smrg nir_variable_mode modes, 3252b8e80941Smrg int size_threshold, 3253b8e80941Smrg glsl_type_size_align_func size_align); 3254b8e80941Smrg 3255b8e80941Smrgvoid nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); 3256b8e80941Smrg 3257b8e80941Smrgvoid nir_gather_ssa_types(nir_function_impl *impl, 3258b8e80941Smrg BITSET_WORD *float_types, 3259b8e80941Smrg BITSET_WORD *int_types); 3260b8e80941Smrg 3261b8e80941Smrgvoid nir_assign_var_locations(struct exec_list *var_list, unsigned *size, 3262b8e80941Smrg int (*type_size)(const struct glsl_type *, bool)); 3263b8e80941Smrg 3264b8e80941Smrg/* Some helpers to do very simple linking */ 3265b8e80941Smrgbool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer); 3266b8e80941Smrgbool nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list, 3267b8e80941Smrg uint64_t *used_by_other_stage, 3268b8e80941Smrg uint64_t *used_by_other_stage_patches); 3269b8e80941Smrgvoid nir_compact_varyings(nir_shader *producer, nir_shader *consumer, 3270b8e80941Smrg bool default_to_smooth_interp); 3271b8e80941Smrgvoid nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer); 3272b8e80941Smrgbool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer); 3273b8e80941Smrg 3274b8e80941Smrgtypedef enum { 3275b8e80941Smrg /* If set, this forces all non-flat fragment shader inputs to be 3276b8e80941Smrg * interpolated as if with the "sample" qualifier. This requires 3277b8e80941Smrg * nir_shader_compiler_options::use_interpolated_input_intrinsics. 3278b8e80941Smrg */ 3279b8e80941Smrg nir_lower_io_force_sample_interpolation = (1 << 1), 3280b8e80941Smrg} nir_lower_io_options; 3281b8e80941Smrgbool nir_lower_io(nir_shader *shader, 3282b8e80941Smrg nir_variable_mode modes, 3283b8e80941Smrg int (*type_size)(const struct glsl_type *, bool), 3284b8e80941Smrg nir_lower_io_options); 3285b8e80941Smrg 3286b8e80941Smrgtypedef enum { 3287b8e80941Smrg /** 3288b8e80941Smrg * An address format which is a simple 32-bit global GPU address. 3289b8e80941Smrg */ 3290b8e80941Smrg nir_address_format_32bit_global, 3291b8e80941Smrg 3292b8e80941Smrg /** 3293b8e80941Smrg * An address format which is a simple 64-bit global GPU address. 3294b8e80941Smrg */ 3295b8e80941Smrg nir_address_format_64bit_global, 3296b8e80941Smrg 3297b8e80941Smrg /** 3298b8e80941Smrg * An address format which is a bounds-checked 64-bit global GPU address. 3299b8e80941Smrg * 3300b8e80941Smrg * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base 3301b8e80941Smrg * address stored with the low bits in .x and high bits in .y, .z is a 3302b8e80941Smrg * size, and .w is an offset. When the final I/O operation is lowered, .w 3303b8e80941Smrg * is checked against .z and the operation is predicated on the result. 3304b8e80941Smrg */ 3305b8e80941Smrg nir_address_format_64bit_bounded_global, 3306b8e80941Smrg 3307b8e80941Smrg /** 3308b8e80941Smrg * An address format which is comprised of a vec2 where the first 3309b8e80941Smrg * component is a buffer index and the second is an offset. 3310b8e80941Smrg */ 3311b8e80941Smrg nir_address_format_32bit_index_offset, 3312b8e80941Smrg} nir_address_format; 3313b8e80941Smrg 3314b8e80941Smrgstatic inline unsigned 3315b8e80941Smrgnir_address_format_bit_size(nir_address_format addr_format) 3316b8e80941Smrg{ 3317b8e80941Smrg switch (addr_format) { 3318b8e80941Smrg case nir_address_format_32bit_global: return 32; 3319b8e80941Smrg case nir_address_format_64bit_global: return 64; 3320b8e80941Smrg case nir_address_format_64bit_bounded_global: return 32; 3321b8e80941Smrg case nir_address_format_32bit_index_offset: return 32; 3322b8e80941Smrg } 3323b8e80941Smrg unreachable("Invalid address format"); 3324b8e80941Smrg} 3325b8e80941Smrg 3326b8e80941Smrgstatic inline unsigned 3327b8e80941Smrgnir_address_format_num_components(nir_address_format addr_format) 3328b8e80941Smrg{ 3329b8e80941Smrg switch (addr_format) { 3330b8e80941Smrg case nir_address_format_32bit_global: return 1; 3331b8e80941Smrg case nir_address_format_64bit_global: return 1; 3332b8e80941Smrg case nir_address_format_64bit_bounded_global: return 4; 3333b8e80941Smrg case nir_address_format_32bit_index_offset: return 2; 3334b8e80941Smrg } 3335b8e80941Smrg unreachable("Invalid address format"); 3336b8e80941Smrg} 3337b8e80941Smrg 3338b8e80941Smrgstatic inline const struct glsl_type * 3339b8e80941Smrgnir_address_format_to_glsl_type(nir_address_format addr_format) 3340b8e80941Smrg{ 3341b8e80941Smrg unsigned bit_size = nir_address_format_bit_size(addr_format); 3342b8e80941Smrg assert(bit_size == 32 || bit_size == 64); 3343b8e80941Smrg return glsl_vector_type(bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64, 3344b8e80941Smrg nir_address_format_num_components(addr_format)); 3345b8e80941Smrg} 3346b8e80941Smrg 3347b8e80941Smrgnir_ssa_def * nir_explicit_io_address_from_deref(struct nir_builder *b, 3348b8e80941Smrg nir_deref_instr *deref, 3349b8e80941Smrg nir_ssa_def *base_addr, 3350b8e80941Smrg nir_address_format addr_format); 3351b8e80941Smrgvoid nir_lower_explicit_io_instr(struct nir_builder *b, 3352b8e80941Smrg nir_intrinsic_instr *io_instr, 3353b8e80941Smrg nir_ssa_def *addr, 3354b8e80941Smrg nir_address_format addr_format); 3355b8e80941Smrg 3356b8e80941Smrgbool nir_lower_explicit_io(nir_shader *shader, 3357b8e80941Smrg nir_variable_mode modes, 3358b8e80941Smrg nir_address_format); 3359b8e80941Smrg 3360b8e80941Smrgnir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); 3361b8e80941Smrgnir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); 3362b8e80941Smrg 3363b8e80941Smrgbool nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage); 3364b8e80941Smrg 3365b8e80941Smrgbool nir_lower_regs_to_ssa_impl(nir_function_impl *impl); 3366b8e80941Smrgbool nir_lower_regs_to_ssa(nir_shader *shader); 3367b8e80941Smrgbool nir_lower_vars_to_ssa(nir_shader *shader); 3368b8e80941Smrg 3369b8e80941Smrgbool nir_remove_dead_derefs(nir_shader *shader); 3370b8e80941Smrgbool nir_remove_dead_derefs_impl(nir_function_impl *impl); 3371b8e80941Smrgbool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes); 3372b8e80941Smrgbool nir_lower_constant_initializers(nir_shader *shader, 3373b8e80941Smrg nir_variable_mode modes); 3374b8e80941Smrg 3375b8e80941Smrgbool nir_move_load_const(nir_shader *shader); 3376b8e80941Smrgbool nir_move_vec_src_uses_to_dest(nir_shader *shader); 3377b8e80941Smrgbool nir_lower_vec_to_movs(nir_shader *shader); 3378b8e80941Smrgvoid nir_lower_alpha_test(nir_shader *shader, enum compare_func func, 3379b8e80941Smrg bool alpha_to_one); 3380b8e80941Smrgbool nir_lower_alu(nir_shader *shader); 3381b8e80941Smrgbool nir_lower_alu_to_scalar(nir_shader *shader); 3382b8e80941Smrgbool nir_lower_bool_to_float(nir_shader *shader); 3383b8e80941Smrgbool nir_lower_bool_to_int32(nir_shader *shader); 3384b8e80941Smrgbool nir_lower_load_const_to_scalar(nir_shader *shader); 3385b8e80941Smrgbool nir_lower_read_invocation_to_scalar(nir_shader *shader); 3386b8e80941Smrgbool nir_lower_phis_to_scalar(nir_shader *shader); 3387b8e80941Smrgvoid nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer); 3388b8e80941Smrgvoid nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader, 3389b8e80941Smrg bool outputs_only); 3390b8e80941Smrgvoid nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask); 3391b8e80941Smrgvoid nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask); 3392b8e80941Smrgbool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask); 3393b8e80941Smrg 3394b8e80941Smrgvoid nir_lower_fragcoord_wtrans(nir_shader *shader); 3395b8e80941Smrgvoid nir_lower_viewport_transform(nir_shader *shader); 3396b8e80941Smrgbool nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier); 3397b8e80941Smrg 3398b8e80941Smrgtypedef struct nir_lower_subgroups_options { 3399b8e80941Smrg uint8_t subgroup_size; 3400b8e80941Smrg uint8_t ballot_bit_size; 3401b8e80941Smrg bool lower_to_scalar:1; 3402b8e80941Smrg bool lower_vote_trivial:1; 3403b8e80941Smrg bool lower_vote_eq_to_ballot:1; 3404b8e80941Smrg bool lower_subgroup_masks:1; 3405b8e80941Smrg bool lower_shuffle:1; 3406b8e80941Smrg bool lower_shuffle_to_32bit:1; 3407b8e80941Smrg bool lower_quad:1; 3408b8e80941Smrg} nir_lower_subgroups_options; 3409b8e80941Smrg 3410b8e80941Smrgbool nir_lower_subgroups(nir_shader *shader, 3411b8e80941Smrg const nir_lower_subgroups_options *options); 3412b8e80941Smrg 3413b8e80941Smrgbool nir_lower_system_values(nir_shader *shader); 3414b8e80941Smrg 3415b8e80941Smrgenum PACKED nir_lower_tex_packing { 3416b8e80941Smrg nir_lower_tex_packing_none = 0, 3417b8e80941Smrg /* The sampler returns up to 2 32-bit words of half floats or 16-bit signed 3418b8e80941Smrg * or unsigned ints based on the sampler type 3419b8e80941Smrg */ 3420b8e80941Smrg nir_lower_tex_packing_16, 3421b8e80941Smrg /* The sampler returns 1 32-bit word of 4x8 unorm */ 3422b8e80941Smrg nir_lower_tex_packing_8, 3423b8e80941Smrg}; 3424b8e80941Smrg 3425b8e80941Smrgtypedef struct nir_lower_tex_options { 3426b8e80941Smrg /** 3427b8e80941Smrg * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which 3428b8e80941Smrg * sampler types a texture projector is lowered. 3429b8e80941Smrg */ 3430b8e80941Smrg unsigned lower_txp; 3431b8e80941Smrg 3432b8e80941Smrg /** 3433b8e80941Smrg * If true, lower away nir_tex_src_offset for all texelfetch instructions. 3434b8e80941Smrg */ 3435b8e80941Smrg bool lower_txf_offset; 3436b8e80941Smrg 3437b8e80941Smrg /** 3438b8e80941Smrg * If true, lower away nir_tex_src_offset for all rect textures. 3439b8e80941Smrg */ 3440b8e80941Smrg bool lower_rect_offset; 3441b8e80941Smrg 3442b8e80941Smrg /** 3443b8e80941Smrg * If true, lower rect textures to 2D, using txs to fetch the 3444b8e80941Smrg * texture dimensions and dividing the texture coords by the 3445b8e80941Smrg * texture dims to normalize. 3446b8e80941Smrg */ 3447b8e80941Smrg bool lower_rect; 3448b8e80941Smrg 3449b8e80941Smrg /** 3450b8e80941Smrg * If true, convert yuv to rgb. 3451b8e80941Smrg */ 3452b8e80941Smrg unsigned lower_y_uv_external; 3453b8e80941Smrg unsigned lower_y_u_v_external; 3454b8e80941Smrg unsigned lower_yx_xuxv_external; 3455b8e80941Smrg unsigned lower_xy_uxvx_external; 3456b8e80941Smrg unsigned lower_ayuv_external; 3457b8e80941Smrg unsigned lower_xyuv_external; 3458b8e80941Smrg 3459b8e80941Smrg /** 3460b8e80941Smrg * To emulate certain texture wrap modes, this can be used 3461b8e80941Smrg * to saturate the specified tex coord to [0.0, 1.0]. The 3462b8e80941Smrg * bits are according to sampler #, ie. if, for example: 3463b8e80941Smrg * 3464b8e80941Smrg * (conf->saturate_s & (1 << n)) 3465b8e80941Smrg * 3466b8e80941Smrg * is true, then the s coord for sampler n is saturated. 3467b8e80941Smrg * 3468b8e80941Smrg * Note that clamping must happen *after* projector lowering 3469b8e80941Smrg * so any projected texture sample instruction with a clamped 3470b8e80941Smrg * coordinate gets automatically lowered, regardless of the 3471b8e80941Smrg * 'lower_txp' setting. 3472b8e80941Smrg */ 3473b8e80941Smrg unsigned saturate_s; 3474b8e80941Smrg unsigned saturate_t; 3475b8e80941Smrg unsigned saturate_r; 3476b8e80941Smrg 3477b8e80941Smrg /* Bitmask of textures that need swizzling. 3478b8e80941Smrg * 3479b8e80941Smrg * If (swizzle_result & (1 << texture_index)), then the swizzle in 3480b8e80941Smrg * swizzles[texture_index] is applied to the result of the texturing 3481b8e80941Smrg * operation. 3482b8e80941Smrg */ 3483b8e80941Smrg unsigned swizzle_result; 3484b8e80941Smrg 3485b8e80941Smrg /* A swizzle for each texture. Values 0-3 represent x, y, z, or w swizzles 3486b8e80941Smrg * while 4 and 5 represent 0 and 1 respectively. 3487b8e80941Smrg */ 3488b8e80941Smrg uint8_t swizzles[32][4]; 3489b8e80941Smrg 3490b8e80941Smrg /* Can be used to scale sampled values in range required by the format. */ 3491b8e80941Smrg float scale_factors[32]; 3492b8e80941Smrg 3493b8e80941Smrg /** 3494b8e80941Smrg * Bitmap of textures that need srgb to linear conversion. If 3495b8e80941Smrg * (lower_srgb & (1 << texture_index)) then the rgb (xyz) components 3496b8e80941Smrg * of the texture are lowered to linear. 3497b8e80941Smrg */ 3498b8e80941Smrg unsigned lower_srgb; 3499b8e80941Smrg 3500b8e80941Smrg /** 3501b8e80941Smrg * If true, lower nir_texop_tex on shaders that doesn't support implicit 3502b8e80941Smrg * LODs to nir_texop_txl. 3503b8e80941Smrg */ 3504b8e80941Smrg bool lower_tex_without_implicit_lod; 3505b8e80941Smrg 3506b8e80941Smrg /** 3507b8e80941Smrg * If true, lower nir_texop_txd on cube maps with nir_texop_txl. 3508b8e80941Smrg */ 3509b8e80941Smrg bool lower_txd_cube_map; 3510b8e80941Smrg 3511b8e80941Smrg /** 3512b8e80941Smrg * If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl. 3513b8e80941Smrg */ 3514b8e80941Smrg bool lower_txd_3d; 3515b8e80941Smrg 3516b8e80941Smrg /** 3517b8e80941Smrg * If true, lower nir_texop_txd on shadow samplers (except cube maps) 3518b8e80941Smrg * with nir_texop_txl. Notice that cube map shadow samplers are lowered 3519b8e80941Smrg * with lower_txd_cube_map. 3520b8e80941Smrg */ 3521b8e80941Smrg bool lower_txd_shadow; 3522b8e80941Smrg 3523b8e80941Smrg /** 3524b8e80941Smrg * If true, lower nir_texop_txd on all samplers to a nir_texop_txl. 3525b8e80941Smrg * Implies lower_txd_cube_map and lower_txd_shadow. 3526b8e80941Smrg */ 3527b8e80941Smrg bool lower_txd; 3528b8e80941Smrg 3529b8e80941Smrg /** 3530b8e80941Smrg * If true, lower nir_texop_txb that try to use shadow compare and min_lod 3531b8e80941Smrg * at the same time to a nir_texop_lod, some math, and nir_texop_tex. 3532b8e80941Smrg */ 3533b8e80941Smrg bool lower_txb_shadow_clamp; 3534b8e80941Smrg 3535b8e80941Smrg /** 3536b8e80941Smrg * If true, lower nir_texop_txd on shadow samplers when it uses min_lod 3537b8e80941Smrg * with nir_texop_txl. This includes cube maps. 3538b8e80941Smrg */ 3539b8e80941Smrg bool lower_txd_shadow_clamp; 3540b8e80941Smrg 3541b8e80941Smrg /** 3542b8e80941Smrg * If true, lower nir_texop_txd on when it uses both offset and min_lod 3543b8e80941Smrg * with nir_texop_txl. This includes cube maps. 3544b8e80941Smrg */ 3545b8e80941Smrg bool lower_txd_offset_clamp; 3546b8e80941Smrg 3547b8e80941Smrg /** 3548b8e80941Smrg * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 3549b8e80941Smrg * sampler is bindless. 3550b8e80941Smrg */ 3551b8e80941Smrg bool lower_txd_clamp_bindless_sampler; 3552b8e80941Smrg 3553b8e80941Smrg /** 3554b8e80941Smrg * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 3555b8e80941Smrg * sampler index is not statically determinable to be less than 16. 3556b8e80941Smrg */ 3557b8e80941Smrg bool lower_txd_clamp_if_sampler_index_not_lt_16; 3558b8e80941Smrg 3559b8e80941Smrg /** 3560b8e80941Smrg * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's 3561b8e80941Smrg * mixed-up tg4 locations. 3562b8e80941Smrg */ 3563b8e80941Smrg bool lower_tg4_broadcom_swizzle; 3564b8e80941Smrg 3565b8e80941Smrg /** 3566b8e80941Smrg * If true, lowers tg4 with 4 constant offsets to 4 tg4 calls 3567b8e80941Smrg */ 3568b8e80941Smrg bool lower_tg4_offsets; 3569b8e80941Smrg 3570b8e80941Smrg enum nir_lower_tex_packing lower_tex_packing[32]; 3571b8e80941Smrg} nir_lower_tex_options; 3572b8e80941Smrg 3573b8e80941Smrgbool nir_lower_tex(nir_shader *shader, 3574b8e80941Smrg const nir_lower_tex_options *options); 3575b8e80941Smrg 3576b8e80941Smrgenum nir_lower_non_uniform_access_type { 3577b8e80941Smrg nir_lower_non_uniform_ubo_access = (1 << 0), 3578b8e80941Smrg nir_lower_non_uniform_ssbo_access = (1 << 1), 3579b8e80941Smrg nir_lower_non_uniform_texture_access = (1 << 2), 3580b8e80941Smrg nir_lower_non_uniform_image_access = (1 << 3), 3581b8e80941Smrg}; 3582b8e80941Smrg 3583b8e80941Smrgbool nir_lower_non_uniform_access(nir_shader *shader, 3584b8e80941Smrg enum nir_lower_non_uniform_access_type); 3585b8e80941Smrg 3586b8e80941Smrgbool nir_lower_idiv(nir_shader *shader); 3587b8e80941Smrg 3588b8e80941Smrgbool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars); 3589b8e80941Smrgbool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables); 3590b8e80941Smrgbool nir_lower_clip_cull_distance_arrays(nir_shader *nir); 3591b8e80941Smrg 3592b8e80941Smrgbool nir_lower_frexp(nir_shader *nir); 3593b8e80941Smrg 3594b8e80941Smrgvoid nir_lower_two_sided_color(nir_shader *shader); 3595b8e80941Smrg 3596b8e80941Smrgbool nir_lower_clamp_color_outputs(nir_shader *shader); 3597b8e80941Smrg 3598b8e80941Smrgvoid nir_lower_passthrough_edgeflags(nir_shader *shader); 3599b8e80941Smrgbool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count, 3600b8e80941Smrg const gl_state_index16 *uniform_state_tokens); 3601b8e80941Smrg 3602b8e80941Smrgtypedef struct nir_lower_wpos_ytransform_options { 3603b8e80941Smrg gl_state_index16 state_tokens[STATE_LENGTH]; 3604b8e80941Smrg bool fs_coord_origin_upper_left :1; 3605b8e80941Smrg bool fs_coord_origin_lower_left :1; 3606b8e80941Smrg bool fs_coord_pixel_center_integer :1; 3607b8e80941Smrg bool fs_coord_pixel_center_half_integer :1; 3608b8e80941Smrg} nir_lower_wpos_ytransform_options; 3609b8e80941Smrg 3610b8e80941Smrgbool nir_lower_wpos_ytransform(nir_shader *shader, 3611b8e80941Smrg const nir_lower_wpos_ytransform_options *options); 3612b8e80941Smrgbool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading); 3613b8e80941Smrg 3614b8e80941Smrgbool nir_lower_fb_read(nir_shader *shader); 3615b8e80941Smrg 3616b8e80941Smrgtypedef struct nir_lower_drawpixels_options { 3617b8e80941Smrg gl_state_index16 texcoord_state_tokens[STATE_LENGTH]; 3618b8e80941Smrg gl_state_index16 scale_state_tokens[STATE_LENGTH]; 3619b8e80941Smrg gl_state_index16 bias_state_tokens[STATE_LENGTH]; 3620b8e80941Smrg unsigned drawpix_sampler; 3621b8e80941Smrg unsigned pixelmap_sampler; 3622b8e80941Smrg bool pixel_maps :1; 3623b8e80941Smrg bool scale_and_bias :1; 3624b8e80941Smrg} nir_lower_drawpixels_options; 3625b8e80941Smrg 3626b8e80941Smrgvoid nir_lower_drawpixels(nir_shader *shader, 3627b8e80941Smrg const nir_lower_drawpixels_options *options); 3628b8e80941Smrg 3629b8e80941Smrgtypedef struct nir_lower_bitmap_options { 3630b8e80941Smrg unsigned sampler; 3631b8e80941Smrg bool swizzle_xxxx; 3632b8e80941Smrg} nir_lower_bitmap_options; 3633b8e80941Smrg 3634b8e80941Smrgvoid nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options); 3635b8e80941Smrg 3636b8e80941Smrgbool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset); 3637b8e80941Smrg 3638b8e80941Smrgtypedef enum { 3639b8e80941Smrg nir_lower_int_source_mods = 1 << 0, 3640b8e80941Smrg nir_lower_float_source_mods = 1 << 1, 3641b8e80941Smrg nir_lower_triop_abs = 1 << 2, 3642b8e80941Smrg nir_lower_all_source_mods = (1 << 3) - 1 3643b8e80941Smrg} nir_lower_to_source_mods_flags; 3644b8e80941Smrg 3645b8e80941Smrg 3646b8e80941Smrgbool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options); 3647b8e80941Smrg 3648b8e80941Smrgbool nir_lower_gs_intrinsics(nir_shader *shader); 3649b8e80941Smrg 3650b8e80941Smrgtypedef unsigned (*nir_lower_bit_size_callback)(const nir_alu_instr *, void *); 3651b8e80941Smrg 3652b8e80941Smrgbool nir_lower_bit_size(nir_shader *shader, 3653b8e80941Smrg nir_lower_bit_size_callback callback, 3654b8e80941Smrg void *callback_data); 3655b8e80941Smrg 3656b8e80941Smrgnir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode); 3657b8e80941Smrgbool nir_lower_int64(nir_shader *shader, nir_lower_int64_options options); 3658b8e80941Smrg 3659b8e80941Smrgnir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode); 3660b8e80941Smrgbool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64, 3661b8e80941Smrg nir_lower_doubles_options options); 3662b8e80941Smrgbool nir_lower_pack(nir_shader *shader); 3663b8e80941Smrg 3664b8e80941Smrgbool nir_normalize_cubemap_coords(nir_shader *shader); 3665b8e80941Smrg 3666b8e80941Smrgvoid nir_live_ssa_defs_impl(nir_function_impl *impl); 3667b8e80941Smrg 3668b8e80941Smrgvoid nir_loop_analyze_impl(nir_function_impl *impl, 3669b8e80941Smrg nir_variable_mode indirect_mask); 3670b8e80941Smrg 3671b8e80941Smrgbool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); 3672b8e80941Smrg 3673b8e80941Smrgbool nir_repair_ssa_impl(nir_function_impl *impl); 3674b8e80941Smrgbool nir_repair_ssa(nir_shader *shader); 3675b8e80941Smrg 3676b8e80941Smrgvoid nir_convert_loop_to_lcssa(nir_loop *loop); 3677b8e80941Smrg 3678b8e80941Smrg/* If phi_webs_only is true, only convert SSA values involved in phi nodes to 3679b8e80941Smrg * registers. If false, convert all values (even those not involved in a phi 3680b8e80941Smrg * node) to registers. 3681b8e80941Smrg */ 3682b8e80941Smrgbool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only); 3683b8e80941Smrg 3684b8e80941Smrgbool nir_lower_phis_to_regs_block(nir_block *block); 3685b8e80941Smrgbool nir_lower_ssa_defs_to_regs_block(nir_block *block); 3686b8e80941Smrgbool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl); 3687b8e80941Smrg 3688b8e80941Smrg/* This is here for unit tests. */ 3689b8e80941Smrgbool nir_opt_comparison_pre_impl(nir_function_impl *impl); 3690b8e80941Smrg 3691b8e80941Smrgbool nir_opt_comparison_pre(nir_shader *shader); 3692b8e80941Smrg 3693b8e80941Smrgbool nir_opt_algebraic(nir_shader *shader); 3694b8e80941Smrgbool nir_opt_algebraic_before_ffma(nir_shader *shader); 3695b8e80941Smrgbool nir_opt_algebraic_late(nir_shader *shader); 3696b8e80941Smrgbool nir_opt_constant_folding(nir_shader *shader); 3697b8e80941Smrg 3698b8e80941Smrgbool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes); 3699b8e80941Smrg 3700b8e80941Smrgbool nir_copy_prop(nir_shader *shader); 3701b8e80941Smrg 3702b8e80941Smrgbool nir_opt_copy_prop_vars(nir_shader *shader); 3703b8e80941Smrg 3704b8e80941Smrgbool nir_opt_cse(nir_shader *shader); 3705b8e80941Smrg 3706b8e80941Smrgbool nir_opt_dce(nir_shader *shader); 3707b8e80941Smrg 3708b8e80941Smrgbool nir_opt_dead_cf(nir_shader *shader); 3709b8e80941Smrg 3710b8e80941Smrgbool nir_opt_dead_write_vars(nir_shader *shader); 3711b8e80941Smrg 3712b8e80941Smrgbool nir_opt_deref_impl(nir_function_impl *impl); 3713b8e80941Smrgbool nir_opt_deref(nir_shader *shader); 3714b8e80941Smrg 3715b8e80941Smrgbool nir_opt_find_array_copies(nir_shader *shader); 3716b8e80941Smrg 3717b8e80941Smrgbool nir_opt_gcm(nir_shader *shader, bool value_number); 3718b8e80941Smrg 3719b8e80941Smrgbool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size); 3720b8e80941Smrg 3721b8e80941Smrgbool nir_opt_if(nir_shader *shader, bool aggressive_last_continue); 3722b8e80941Smrg 3723b8e80941Smrgbool nir_opt_intrinsics(nir_shader *shader); 3724b8e80941Smrg 3725b8e80941Smrgbool nir_opt_large_constants(nir_shader *shader, 3726b8e80941Smrg glsl_type_size_align_func size_align, 3727b8e80941Smrg unsigned threshold); 3728b8e80941Smrg 3729b8e80941Smrgbool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask); 3730b8e80941Smrg 3731b8e80941Smrgbool nir_opt_move_comparisons(nir_shader *shader); 3732b8e80941Smrg 3733b8e80941Smrgbool nir_opt_move_load_ubo(nir_shader *shader); 3734b8e80941Smrg 3735b8e80941Smrgbool nir_opt_peephole_select(nir_shader *shader, unsigned limit, 3736b8e80941Smrg bool indirect_load_ok, bool expensive_alu_ok); 3737b8e80941Smrg 3738b8e80941Smrgbool nir_opt_remove_phis(nir_shader *shader); 3739b8e80941Smrgbool nir_opt_remove_phis_block(nir_block *block); 3740b8e80941Smrg 3741b8e80941Smrgbool nir_opt_shrink_load(nir_shader *shader); 3742b8e80941Smrg 3743b8e80941Smrgbool nir_opt_trivial_continues(nir_shader *shader); 3744b8e80941Smrg 3745b8e80941Smrgbool nir_opt_undef(nir_shader *shader); 3746b8e80941Smrg 3747b8e80941Smrgbool nir_opt_conditional_discard(nir_shader *shader); 3748b8e80941Smrg 3749b8e80941Smrgvoid nir_strip(nir_shader *shader); 3750b8e80941Smrg 3751b8e80941Smrgvoid nir_sweep(nir_shader *shader); 3752b8e80941Smrg 3753b8e80941Smrgvoid nir_remap_dual_slot_attributes(nir_shader *shader, 3754b8e80941Smrg uint64_t *dual_slot_inputs); 3755b8e80941Smrguint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot); 3756b8e80941Smrg 3757b8e80941Smrgnir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val); 3758b8e80941Smrggl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin); 3759b8e80941Smrg 3760b8e80941Smrg#ifdef __cplusplus 3761b8e80941Smrg} /* extern "C" */ 3762b8e80941Smrg#endif 3763b8e80941Smrg 3764b8e80941Smrg#endif /* NIR_H */ 3765