1/* 2 * Copyright © 2014 Connor Abbott 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * 26 */ 27 28#ifndef NIR_H 29#define NIR_H 30 31#include "util/hash_table.h" 32#include "compiler/glsl/list.h" 33#include "GL/gl.h" /* GLenum */ 34#include "util/list.h" 35#include "util/log.h" 36#include "util/ralloc.h" 37#include "util/set.h" 38#include "util/bitscan.h" 39#include "util/bitset.h" 40#include "util/compiler.h" 41#include "util/enum_operators.h" 42#include "util/macros.h" 43#include "util/format/u_format.h" 44#include "compiler/nir_types.h" 45#include "compiler/shader_enums.h" 46#include "compiler/shader_info.h" 47#define XXH_INLINE_ALL 48#include "util/xxhash.h" 49#include <stdio.h> 50 51#ifndef NDEBUG 52#include "util/debug.h" 53#endif /* NDEBUG */ 54 55#include "nir_opcodes.h" 56 57#if defined(_WIN32) && !defined(snprintf) 58#define snprintf _snprintf 59#endif 60 61#ifdef __cplusplus 62extern "C" { 63#endif 64 65#define NIR_FALSE 0u 66#define NIR_TRUE (~0u) 67#define NIR_MAX_VEC_COMPONENTS 16 68#define NIR_MAX_MATRIX_COLUMNS 4 69#define NIR_STREAM_PACKED (1 << 8) 70typedef uint16_t nir_component_mask_t; 71 72static inline bool 73nir_num_components_valid(unsigned num_components) 74{ 75 return (num_components >= 1 && 76 num_components <= 5) || 77 num_components == 8 || 78 num_components == 16; 79} 80 81bool nir_component_mask_can_reinterpret(nir_component_mask_t mask, 82 unsigned old_bit_size, 83 unsigned new_bit_size); 84nir_component_mask_t 85nir_component_mask_reinterpret(nir_component_mask_t mask, 86 unsigned old_bit_size, 87 unsigned new_bit_size); 88 89/** Defines a cast function 90 * 91 * This macro defines a cast function from in_type to out_type where 92 * out_type is some structure type that contains a field of type out_type. 93 * 94 * Note that you have to be a bit careful as the generated cast function 95 * destroys constness. 96 */ 97#define NIR_DEFINE_CAST(name, in_type, out_type, field, \ 98 type_field, type_value) \ 99static inline out_type * \ 100name(const in_type *parent) \ 101{ \ 102 assert(parent && parent->type_field == type_value); \ 103 return exec_node_data(out_type, parent, field); \ 104} 105 106struct nir_function; 107struct nir_shader; 108struct nir_instr; 109struct nir_builder; 110 111 112/** 113 * Description of built-in state associated with a uniform 114 * 115 * \sa nir_variable::state_slots 116 */ 117typedef struct { 118 gl_state_index16 tokens[STATE_LENGTH]; 119 uint16_t swizzle; 120} nir_state_slot; 121 122typedef enum { 123 nir_var_shader_in = (1 << 0), 124 nir_var_shader_out = (1 << 1), 125 nir_var_shader_temp = (1 << 2), 126 nir_var_function_temp = (1 << 3), 127 nir_var_uniform = (1 << 4), 128 nir_var_mem_ubo = (1 << 5), 129 nir_var_system_value = (1 << 6), 130 nir_var_mem_ssbo = (1 << 7), 131 nir_var_mem_shared = (1 << 8), 132 nir_var_mem_global = (1 << 9), 133 nir_var_mem_generic = (nir_var_shader_temp | 134 nir_var_function_temp | 135 nir_var_mem_shared | 136 nir_var_mem_global), 137 nir_var_mem_push_const = (1 << 10), /* not actually used for variables */ 138 nir_var_mem_constant = (1 << 11), 139 /** Incoming call or ray payload data for ray-tracing shaders */ 140 nir_var_shader_call_data = (1 << 12), 141 /** Ray hit attributes */ 142 nir_var_ray_hit_attrib = (1 << 13), 143 nir_var_read_only_modes = nir_var_shader_in | nir_var_uniform | 144 nir_var_system_value | nir_var_mem_constant | 145 nir_var_mem_ubo, 146 /** Modes where vector derefs can be indexed as arrays */ 147 nir_var_vec_indexable_modes = nir_var_mem_ubo | nir_var_mem_ssbo | 148 nir_var_mem_shared | nir_var_mem_global | 149 nir_var_mem_push_const, 150 nir_num_variable_modes = 14, 151 nir_var_all = (1 << nir_num_variable_modes) - 1, 152} nir_variable_mode; 153MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_variable_mode) 154 155/** 156 * Rounding modes. 157 */ 158typedef enum { 159 nir_rounding_mode_undef = 0, 160 nir_rounding_mode_rtne = 1, /* round to nearest even */ 161 nir_rounding_mode_ru = 2, /* round up */ 162 nir_rounding_mode_rd = 3, /* round down */ 163 nir_rounding_mode_rtz = 4, /* round towards zero */ 164} nir_rounding_mode; 165 166typedef union { 167 bool b; 168 float f32; 169 double f64; 170 int8_t i8; 171 uint8_t u8; 172 int16_t i16; 173 uint16_t u16; 174 int32_t i32; 175 uint32_t u32; 176 int64_t i64; 177 uint64_t u64; 178} nir_const_value; 179 180#define nir_const_value_to_array(arr, c, components, m) \ 181{ \ 182 for (unsigned i = 0; i < components; ++i) \ 183 arr[i] = c[i].m; \ 184} while (false) 185 186static inline nir_const_value 187nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size) 188{ 189 nir_const_value v; 190 memset(&v, 0, sizeof(v)); 191 192 switch (bit_size) { 193 case 1: v.b = x; break; 194 case 8: v.u8 = x; break; 195 case 16: v.u16 = x; break; 196 case 32: v.u32 = x; break; 197 case 64: v.u64 = x; break; 198 default: 199 unreachable("Invalid bit size"); 200 } 201 202 return v; 203} 204 205static inline nir_const_value 206nir_const_value_for_int(int64_t i, unsigned bit_size) 207{ 208 nir_const_value v; 209 memset(&v, 0, sizeof(v)); 210 211 assert(bit_size <= 64); 212 if (bit_size < 64) { 213 assert(i >= (-(1ll << (bit_size - 1)))); 214 assert(i < (1ll << (bit_size - 1))); 215 } 216 217 return nir_const_value_for_raw_uint(i, bit_size); 218} 219 220static inline nir_const_value 221nir_const_value_for_uint(uint64_t u, unsigned bit_size) 222{ 223 nir_const_value v; 224 memset(&v, 0, sizeof(v)); 225 226 assert(bit_size <= 64); 227 if (bit_size < 64) 228 assert(u < (1ull << bit_size)); 229 230 return nir_const_value_for_raw_uint(u, bit_size); 231} 232 233static inline nir_const_value 234nir_const_value_for_bool(bool b, unsigned bit_size) 235{ 236 /* Booleans use a 0/-1 convention */ 237 return nir_const_value_for_int(-(int)b, bit_size); 238} 239 240/* This one isn't inline because it requires half-float conversion */ 241nir_const_value nir_const_value_for_float(double b, unsigned bit_size); 242 243static inline int64_t 244nir_const_value_as_int(nir_const_value value, unsigned bit_size) 245{ 246 switch (bit_size) { 247 /* int1_t uses 0/-1 convention */ 248 case 1: return -(int)value.b; 249 case 8: return value.i8; 250 case 16: return value.i16; 251 case 32: return value.i32; 252 case 64: return value.i64; 253 default: 254 unreachable("Invalid bit size"); 255 } 256} 257 258static inline uint64_t 259nir_const_value_as_uint(nir_const_value value, unsigned bit_size) 260{ 261 switch (bit_size) { 262 case 1: return value.b; 263 case 8: return value.u8; 264 case 16: return value.u16; 265 case 32: return value.u32; 266 case 64: return value.u64; 267 default: 268 unreachable("Invalid bit size"); 269 } 270} 271 272static inline bool 273nir_const_value_as_bool(nir_const_value value, unsigned bit_size) 274{ 275 int64_t i = nir_const_value_as_int(value, bit_size); 276 277 /* Booleans of any size use 0/-1 convention */ 278 assert(i == 0 || i == -1); 279 280 return i; 281} 282 283/* This one isn't inline because it requires half-float conversion */ 284double nir_const_value_as_float(nir_const_value value, unsigned bit_size); 285 286typedef struct nir_constant { 287 /** 288 * Value of the constant. 289 * 290 * The field used to back the values supplied by the constant is determined 291 * by the type associated with the \c nir_variable. Constants may be 292 * scalars, vectors, or matrices. 293 */ 294 nir_const_value values[NIR_MAX_VEC_COMPONENTS]; 295 296 /* we could get this from the var->type but makes clone *much* easier to 297 * not have to care about the type. 298 */ 299 unsigned num_elements; 300 301 /* Array elements / Structure Fields */ 302 struct nir_constant **elements; 303} nir_constant; 304 305/** 306 * \brief Layout qualifiers for gl_FragDepth. 307 * 308 * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared 309 * with a layout qualifier. 310 */ 311typedef enum { 312 nir_depth_layout_none, /**< No depth layout is specified. */ 313 nir_depth_layout_any, 314 nir_depth_layout_greater, 315 nir_depth_layout_less, 316 nir_depth_layout_unchanged 317} nir_depth_layout; 318 319/** 320 * Enum keeping track of how a variable was declared. 321 */ 322typedef enum { 323 /** 324 * Normal declaration. 325 */ 326 nir_var_declared_normally = 0, 327 328 /** 329 * Variable is implicitly generated by the compiler and should not be 330 * visible via the API. 331 */ 332 nir_var_hidden, 333} nir_var_declaration_type; 334 335/** 336 * Either a uniform, global variable, shader input, or shader output. Based on 337 * ir_variable - it should be easy to translate between the two. 338 */ 339 340typedef struct nir_variable { 341 struct exec_node node; 342 343 /** 344 * Declared type of the variable 345 */ 346 const struct glsl_type *type; 347 348 /** 349 * Declared name of the variable 350 */ 351 char *name; 352 353 struct nir_variable_data { 354 /** 355 * Storage class of the variable. 356 * 357 * \sa nir_variable_mode 358 */ 359 unsigned mode:14; 360 361 /** 362 * Is the variable read-only? 363 * 364 * This is set for variables declared as \c const, shader inputs, 365 * and uniforms. 366 */ 367 unsigned read_only:1; 368 unsigned centroid:1; 369 unsigned sample:1; 370 unsigned patch:1; 371 unsigned invariant:1; 372 373 /** 374 * Precision qualifier. 375 * 376 * In desktop GLSL we do not care about precision qualifiers at all, in 377 * fact, the spec says that precision qualifiers are ignored. 378 * 379 * To make things easy, we make it so that this field is always 380 * GLSL_PRECISION_NONE on desktop shaders. This way all the variables 381 * have the same precision value and the checks we add in the compiler 382 * for this field will never break a desktop shader compile. 383 */ 384 unsigned precision:2; 385 386 /** 387 * Can this variable be coalesced with another? 388 * 389 * This is set by nir_lower_io_to_temporaries to say that any 390 * copies involving this variable should stay put. Propagating it can 391 * duplicate the resulting load/store, which is not wanted, and may 392 * result in a load/store of the variable with an indirect offset which 393 * the backend may not be able to handle. 394 */ 395 unsigned cannot_coalesce:1; 396 397 /** 398 * When separate shader programs are enabled, only input/outputs between 399 * the stages of a multi-stage separate program can be safely removed 400 * from the shader interface. Other input/outputs must remains active. 401 * 402 * This is also used to make sure xfb varyings that are unused by the 403 * fragment shader are not removed. 404 */ 405 unsigned always_active_io:1; 406 407 /** 408 * Interpolation mode for shader inputs / outputs 409 * 410 * \sa glsl_interp_mode 411 */ 412 unsigned interpolation:3; 413 414 /** 415 * If non-zero, then this variable may be packed along with other variables 416 * into a single varying slot, so this offset should be applied when 417 * accessing components. For example, an offset of 1 means that the x 418 * component of this variable is actually stored in component y of the 419 * location specified by \c location. 420 */ 421 unsigned location_frac:2; 422 423 /** 424 * If true, this variable represents an array of scalars that should 425 * be tightly packed. In other words, consecutive array elements 426 * should be stored one component apart, rather than one slot apart. 427 */ 428 unsigned compact:1; 429 430 /** 431 * Whether this is a fragment shader output implicitly initialized with 432 * the previous contents of the specified render target at the 433 * framebuffer location corresponding to this shader invocation. 434 */ 435 unsigned fb_fetch_output:1; 436 437 /** 438 * Non-zero if this variable is considered bindless as defined by 439 * ARB_bindless_texture. 440 */ 441 unsigned bindless:1; 442 443 /** 444 * Was an explicit binding set in the shader? 445 */ 446 unsigned explicit_binding:1; 447 448 /** 449 * Was the location explicitly set in the shader? 450 * 451 * If the location is explicitly set in the shader, it \b cannot be changed 452 * by the linker or by the API (e.g., calls to \c glBindAttribLocation have 453 * no effect). 454 */ 455 unsigned explicit_location:1; 456 457 /** 458 * Was a transfer feedback buffer set in the shader? 459 */ 460 unsigned explicit_xfb_buffer:1; 461 462 /** 463 * Was a transfer feedback stride set in the shader? 464 */ 465 unsigned explicit_xfb_stride:1; 466 467 /** 468 * Was an explicit offset set in the shader? 469 */ 470 unsigned explicit_offset:1; 471 472 /** 473 * Layout of the matrix. Uses glsl_matrix_layout values. 474 */ 475 unsigned matrix_layout:2; 476 477 /** 478 * Non-zero if this variable was created by lowering a named interface 479 * block. 480 */ 481 unsigned from_named_ifc_block:1; 482 483 /** 484 * How the variable was declared. See nir_var_declaration_type. 485 * 486 * This is used to detect variables generated by the compiler, so should 487 * not be visible via the API. 488 */ 489 unsigned how_declared:2; 490 491 /** 492 * Is this variable per-view? If so, we know it must be an array with 493 * size corresponding to the number of views. 494 */ 495 unsigned per_view:1; 496 497 /** 498 * Whether the variable is per-primitive. 499 * Can be use by Mesh Shader outputs and corresponding Fragment Shader inputs. 500 */ 501 unsigned per_primitive:1; 502 503 /** 504 * \brief Layout qualifier for gl_FragDepth. See nir_depth_layout. 505 * 506 * This is not equal to \c ir_depth_layout_none if and only if this 507 * variable is \c gl_FragDepth and a layout qualifier is specified. 508 */ 509 unsigned depth_layout:3; 510 511 /** 512 * Vertex stream output identifier. 513 * 514 * For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i] 515 * indicate the stream of the i-th component. 516 */ 517 unsigned stream:9; 518 519 /** 520 * See gl_access_qualifier. 521 * 522 * Access flags for memory variables (SSBO/global), image uniforms, and 523 * bindless images in uniforms/inputs/outputs. 524 */ 525 unsigned access:8; 526 527 /** 528 * Descriptor set binding for sampler or UBO. 529 */ 530 unsigned descriptor_set:5; 531 532 /** 533 * output index for dual source blending. 534 */ 535 unsigned index; 536 537 /** 538 * Initial binding point for a sampler or UBO. 539 * 540 * For array types, this represents the binding point for the first element. 541 */ 542 unsigned binding; 543 544 /** 545 * Storage location of the base of this variable 546 * 547 * The precise meaning of this field depends on the nature of the variable. 548 * 549 * - Vertex shader input: one of the values from \c gl_vert_attrib. 550 * - Vertex shader output: one of the values from \c gl_varying_slot. 551 * - Geometry shader input: one of the values from \c gl_varying_slot. 552 * - Geometry shader output: one of the values from \c gl_varying_slot. 553 * - Fragment shader input: one of the values from \c gl_varying_slot. 554 * - Fragment shader output: one of the values from \c gl_frag_result. 555 * - Task shader output: one of the values from \c gl_varying_slot. 556 * - Mesh shader input: one of the values from \c gl_varying_slot. 557 * - Mesh shader output: one of the values from \c gl_varying_slot. 558 * - Uniforms: Per-stage uniform slot number for default uniform block. 559 * - Uniforms: Index within the uniform block definition for UBO members. 560 * - Non-UBO Uniforms: uniform slot number. 561 * - Other: This field is not currently used. 562 * 563 * If the variable is a uniform, shader input, or shader output, and the 564 * slot has not been assigned, the value will be -1. 565 */ 566 int location; 567 568 /** 569 * The actual location of the variable in the IR. Only valid for inputs, 570 * outputs, uniforms (including samplers and images), and for UBO and SSBO 571 * variables in GLSL. 572 */ 573 unsigned driver_location; 574 575 /** 576 * Location an atomic counter or transform feedback is stored at. 577 */ 578 unsigned offset; 579 580 union { 581 struct { 582 /** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */ 583 enum pipe_format format; 584 } image; 585 586 struct { 587 /** 588 * For OpenCL inline samplers. See cl_sampler_addressing_mode and cl_sampler_filter_mode 589 */ 590 unsigned is_inline_sampler : 1; 591 unsigned addressing_mode : 3; 592 unsigned normalized_coordinates : 1; 593 unsigned filter_mode : 1; 594 } sampler; 595 596 struct { 597 /** 598 * Transform feedback buffer. 599 */ 600 uint16_t buffer:2; 601 602 /** 603 * Transform feedback stride. 604 */ 605 uint16_t stride; 606 } xfb; 607 }; 608 } data; 609 610 /** 611 * Identifier for this variable generated by nir_index_vars() that is unique 612 * among other variables in the same exec_list. 613 */ 614 unsigned index; 615 616 /* Number of nir_variable_data members */ 617 uint16_t num_members; 618 619 /** 620 * Built-in state that backs this uniform 621 * 622 * Once set at variable creation, \c state_slots must remain invariant. 623 * This is because, ideally, this array would be shared by all clones of 624 * this variable in the IR tree. In other words, we'd really like for it 625 * to be a fly-weight. 626 * 627 * If the variable is not a uniform, \c num_state_slots will be zero and 628 * \c state_slots will be \c NULL. 629 */ 630 /*@{*/ 631 uint16_t num_state_slots; /**< Number of state slots used */ 632 nir_state_slot *state_slots; /**< State descriptors. */ 633 /*@}*/ 634 635 /** 636 * Constant expression assigned in the initializer of the variable 637 * 638 * This field should only be used temporarily by creators of NIR shaders 639 * and then nir_lower_variable_initializers can be used to get rid of them. 640 * Most of the rest of NIR ignores this field or asserts that it's NULL. 641 */ 642 nir_constant *constant_initializer; 643 644 /** 645 * Global variable assigned in the initializer of the variable 646 * This field should only be used temporarily by creators of NIR shaders 647 * and then nir_lower_variable_initializers can be used to get rid of them. 648 * Most of the rest of NIR ignores this field or asserts that it's NULL. 649 */ 650 struct nir_variable *pointer_initializer; 651 652 /** 653 * For variables that are in an interface block or are an instance of an 654 * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. 655 * 656 * \sa ir_variable::location 657 */ 658 const struct glsl_type *interface_type; 659 660 /** 661 * Description of per-member data for per-member struct variables 662 * 663 * This is used for variables which are actually an amalgamation of 664 * multiple entities such as a struct of built-in values or a struct of 665 * inputs each with their own layout specifier. This is only allowed on 666 * variables with a struct or array of array of struct type. 667 */ 668 struct nir_variable_data *members; 669} nir_variable; 670 671static inline bool 672_nir_shader_variable_has_mode(nir_variable *var, unsigned modes) 673{ 674 /* This isn't a shader variable */ 675 assert(!(modes & nir_var_function_temp)); 676 return var->data.mode & modes; 677} 678 679#define nir_foreach_variable_in_list(var, var_list) \ 680 foreach_list_typed(nir_variable, var, node, var_list) 681 682#define nir_foreach_variable_in_list_safe(var, var_list) \ 683 foreach_list_typed_safe(nir_variable, var, node, var_list) 684 685#define nir_foreach_variable_in_shader(var, shader) \ 686 nir_foreach_variable_in_list(var, &(shader)->variables) 687 688#define nir_foreach_variable_in_shader_safe(var, shader) \ 689 nir_foreach_variable_in_list_safe(var, &(shader)->variables) 690 691#define nir_foreach_variable_with_modes(var, shader, modes) \ 692 nir_foreach_variable_in_shader(var, shader) \ 693 if (_nir_shader_variable_has_mode(var, modes)) 694 695#define nir_foreach_variable_with_modes_safe(var, shader, modes) \ 696 nir_foreach_variable_in_shader_safe(var, shader) \ 697 if (_nir_shader_variable_has_mode(var, modes)) 698 699#define nir_foreach_shader_in_variable(var, shader) \ 700 nir_foreach_variable_with_modes(var, shader, nir_var_shader_in) 701 702#define nir_foreach_shader_in_variable_safe(var, shader) \ 703 nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_in) 704 705#define nir_foreach_shader_out_variable(var, shader) \ 706 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) 707 708#define nir_foreach_shader_out_variable_safe(var, shader) \ 709 nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_out) 710 711#define nir_foreach_uniform_variable(var, shader) \ 712 nir_foreach_variable_with_modes(var, shader, nir_var_uniform) 713 714#define nir_foreach_uniform_variable_safe(var, shader) \ 715 nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform) 716 717static inline bool 718nir_variable_is_global(const nir_variable *var) 719{ 720 return var->data.mode != nir_var_function_temp; 721} 722 723typedef struct nir_register { 724 struct exec_node node; 725 726 unsigned num_components; /** < number of vector components */ 727 unsigned num_array_elems; /** < size of array (0 for no array) */ 728 729 /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 730 uint8_t bit_size; 731 732 /** 733 * True if this register may have different values in different SIMD 734 * invocations of the shader. 735 */ 736 bool divergent; 737 738 /** generic register index. */ 739 unsigned index; 740 741 /** set of nir_srcs where this register is used (read from) */ 742 struct list_head uses; 743 744 /** set of nir_dests where this register is defined (written to) */ 745 struct list_head defs; 746 747 /** set of nir_ifs where this register is used as a condition */ 748 struct list_head if_uses; 749} nir_register; 750 751#define nir_foreach_register(reg, reg_list) \ 752 foreach_list_typed(nir_register, reg, node, reg_list) 753#define nir_foreach_register_safe(reg, reg_list) \ 754 foreach_list_typed_safe(nir_register, reg, node, reg_list) 755 756typedef enum PACKED { 757 nir_instr_type_alu, 758 nir_instr_type_deref, 759 nir_instr_type_call, 760 nir_instr_type_tex, 761 nir_instr_type_intrinsic, 762 nir_instr_type_load_const, 763 nir_instr_type_jump, 764 nir_instr_type_ssa_undef, 765 nir_instr_type_phi, 766 nir_instr_type_parallel_copy, 767} nir_instr_type; 768 769typedef struct nir_instr { 770 struct exec_node node; 771 struct list_head gc_node; 772 struct nir_block *block; 773 nir_instr_type type; 774 775 /* A temporary for optimization and analysis passes to use for storing 776 * flags. For instance, DCE uses this to store the "dead/live" info. 777 */ 778 uint8_t pass_flags; 779 780 /** generic instruction index. */ 781 uint32_t index; 782} nir_instr; 783 784static inline nir_instr * 785nir_instr_next(nir_instr *instr) 786{ 787 struct exec_node *next = exec_node_get_next(&instr->node); 788 if (exec_node_is_tail_sentinel(next)) 789 return NULL; 790 else 791 return exec_node_data(nir_instr, next, node); 792} 793 794static inline nir_instr * 795nir_instr_prev(nir_instr *instr) 796{ 797 struct exec_node *prev = exec_node_get_prev(&instr->node); 798 if (exec_node_is_head_sentinel(prev)) 799 return NULL; 800 else 801 return exec_node_data(nir_instr, prev, node); 802} 803 804static inline bool 805nir_instr_is_first(const nir_instr *instr) 806{ 807 return exec_node_is_head_sentinel(exec_node_get_prev_const(&instr->node)); 808} 809 810static inline bool 811nir_instr_is_last(const nir_instr *instr) 812{ 813 return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node)); 814} 815 816typedef struct nir_ssa_def { 817 /** Instruction which produces this SSA value. */ 818 nir_instr *parent_instr; 819 820 /** set of nir_instrs where this register is used (read from) */ 821 struct list_head uses; 822 823 /** set of nir_ifs where this register is used as a condition */ 824 struct list_head if_uses; 825 826 /** generic SSA definition index. */ 827 unsigned index; 828 829 uint8_t num_components; 830 831 /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 832 uint8_t bit_size; 833 834 /** 835 * True if this SSA value may have different values in different SIMD 836 * invocations of the shader. This is set by nir_divergence_analysis. 837 */ 838 bool divergent; 839} nir_ssa_def; 840 841struct nir_src; 842 843typedef struct { 844 nir_register *reg; 845 struct nir_src *indirect; /** < NULL for no indirect offset */ 846 unsigned base_offset; 847 848 /* TODO use-def chain goes here */ 849} nir_reg_src; 850 851typedef struct { 852 nir_instr *parent_instr; 853 struct list_head def_link; 854 855 nir_register *reg; 856 struct nir_src *indirect; /** < NULL for no indirect offset */ 857 unsigned base_offset; 858 859 /* TODO def-use chain goes here */ 860} nir_reg_dest; 861 862struct nir_if; 863 864typedef struct nir_src { 865 union { 866 /** Instruction that consumes this value as a source. */ 867 nir_instr *parent_instr; 868 struct nir_if *parent_if; 869 }; 870 871 struct list_head use_link; 872 873 union { 874 nir_reg_src reg; 875 nir_ssa_def *ssa; 876 }; 877 878 bool is_ssa; 879} nir_src; 880 881static inline nir_src 882nir_src_init(void) 883{ 884 nir_src src = { { NULL } }; 885 return src; 886} 887 888#define NIR_SRC_INIT nir_src_init() 889 890#define nir_foreach_use(src, reg_or_ssa_def) \ 891 list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 892 893#define nir_foreach_use_safe(src, reg_or_ssa_def) \ 894 list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 895 896#define nir_foreach_if_use(src, reg_or_ssa_def) \ 897 list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 898 899#define nir_foreach_if_use_safe(src, reg_or_ssa_def) \ 900 list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 901 902typedef struct { 903 union { 904 nir_reg_dest reg; 905 nir_ssa_def ssa; 906 }; 907 908 bool is_ssa; 909} nir_dest; 910 911static inline nir_dest 912nir_dest_init(void) 913{ 914 nir_dest dest = { { { NULL } } }; 915 return dest; 916} 917 918#define NIR_DEST_INIT nir_dest_init() 919 920#define nir_foreach_def(dest, reg) \ 921 list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) 922 923#define nir_foreach_def_safe(dest, reg) \ 924 list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link) 925 926static inline nir_src 927nir_src_for_ssa(nir_ssa_def *def) 928{ 929 nir_src src = NIR_SRC_INIT; 930 931 src.is_ssa = true; 932 src.ssa = def; 933 934 return src; 935} 936 937static inline nir_src 938nir_src_for_reg(nir_register *reg) 939{ 940 nir_src src = NIR_SRC_INIT; 941 942 src.is_ssa = false; 943 src.reg.reg = reg; 944 src.reg.indirect = NULL; 945 src.reg.base_offset = 0; 946 947 return src; 948} 949 950static inline nir_dest 951nir_dest_for_reg(nir_register *reg) 952{ 953 nir_dest dest = NIR_DEST_INIT; 954 955 dest.reg.reg = reg; 956 957 return dest; 958} 959 960static inline unsigned 961nir_src_bit_size(nir_src src) 962{ 963 return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size; 964} 965 966static inline unsigned 967nir_src_num_components(nir_src src) 968{ 969 return src.is_ssa ? src.ssa->num_components : src.reg.reg->num_components; 970} 971 972static inline bool 973nir_src_is_const(nir_src src) 974{ 975 return src.is_ssa && 976 src.ssa->parent_instr->type == nir_instr_type_load_const; 977} 978 979static inline bool 980nir_src_is_undef(nir_src src) 981{ 982 return src.is_ssa && 983 src.ssa->parent_instr->type == nir_instr_type_ssa_undef; 984} 985 986static inline bool 987nir_src_is_divergent(nir_src src) 988{ 989 return src.is_ssa ? src.ssa->divergent : src.reg.reg->divergent; 990} 991 992static inline unsigned 993nir_dest_bit_size(nir_dest dest) 994{ 995 return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size; 996} 997 998static inline unsigned 999nir_dest_num_components(nir_dest dest) 1000{ 1001 return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components; 1002} 1003 1004static inline bool 1005nir_dest_is_divergent(nir_dest dest) 1006{ 1007 return dest.is_ssa ? dest.ssa.divergent : dest.reg.reg->divergent; 1008} 1009 1010/* Are all components the same, ie. .xxxx */ 1011static inline bool 1012nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp) 1013{ 1014 for (unsigned i = 1; i < nr_comp; i++) 1015 if (swiz[i] != swiz[0]) 1016 return false; 1017 return true; 1018} 1019 1020/* Are all components sequential, ie. .yzw */ 1021static inline bool 1022nir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp) 1023{ 1024 for (unsigned i = 1; i < nr_comp; i++) 1025 if (swiz[i] != (swiz[0] + i)) 1026 return false; 1027 return true; 1028} 1029 1030void nir_src_copy(nir_src *dest, const nir_src *src); 1031void nir_dest_copy(nir_dest *dest, const nir_dest *src); 1032 1033typedef struct { 1034 /** Base source */ 1035 nir_src src; 1036 1037 /** 1038 * \name input modifiers 1039 */ 1040 /*@{*/ 1041 /** 1042 * For inputs interpreted as floating point, flips the sign bit. For 1043 * inputs interpreted as integers, performs the two's complement negation. 1044 */ 1045 bool negate; 1046 1047 /** 1048 * Clears the sign bit for floating point values, and computes the integer 1049 * absolute value for integers. Note that the negate modifier acts after 1050 * the absolute value modifier, therefore if both are set then all inputs 1051 * will become negative. 1052 */ 1053 bool abs; 1054 /*@}*/ 1055 1056 /** 1057 * For each input component, says which component of the register it is 1058 * chosen from. 1059 * 1060 * Note that which elements of the swizzle are used and which are ignored 1061 * are based on the write mask for most opcodes - for example, a statement 1062 * like "foo.xzw = bar.zyx" would have a writemask of 1101b and a swizzle 1063 * of {2, 1, x, 0} where x means "don't care." 1064 */ 1065 uint8_t swizzle[NIR_MAX_VEC_COMPONENTS]; 1066} nir_alu_src; 1067 1068typedef struct { 1069 /** Base destination */ 1070 nir_dest dest; 1071 1072 /** 1073 * Saturate output modifier 1074 * 1075 * Only valid for opcodes that output floating-point numbers. Clamps the 1076 * output to between 0.0 and 1.0 inclusive. 1077 */ 1078 bool saturate; 1079 1080 /** 1081 * Write-mask 1082 * 1083 * Ignored if dest.is_ssa is true 1084 */ 1085 unsigned write_mask : NIR_MAX_VEC_COMPONENTS; 1086} nir_alu_dest; 1087 1088/** NIR sized and unsized types 1089 * 1090 * The values in this enum are carefully chosen so that the sized type is 1091 * just the unsized type OR the number of bits. 1092 */ 1093typedef enum PACKED { 1094 nir_type_invalid = 0, /* Not a valid type */ 1095 nir_type_int = 2, 1096 nir_type_uint = 4, 1097 nir_type_bool = 6, 1098 nir_type_float = 128, 1099 nir_type_bool1 = 1 | nir_type_bool, 1100 nir_type_bool8 = 8 | nir_type_bool, 1101 nir_type_bool16 = 16 | nir_type_bool, 1102 nir_type_bool32 = 32 | nir_type_bool, 1103 nir_type_int1 = 1 | nir_type_int, 1104 nir_type_int8 = 8 | nir_type_int, 1105 nir_type_int16 = 16 | nir_type_int, 1106 nir_type_int32 = 32 | nir_type_int, 1107 nir_type_int64 = 64 | nir_type_int, 1108 nir_type_uint1 = 1 | nir_type_uint, 1109 nir_type_uint8 = 8 | nir_type_uint, 1110 nir_type_uint16 = 16 | nir_type_uint, 1111 nir_type_uint32 = 32 | nir_type_uint, 1112 nir_type_uint64 = 64 | nir_type_uint, 1113 nir_type_float16 = 16 | nir_type_float, 1114 nir_type_float32 = 32 | nir_type_float, 1115 nir_type_float64 = 64 | nir_type_float, 1116} nir_alu_type; 1117 1118#define NIR_ALU_TYPE_SIZE_MASK 0x79 1119#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x86 1120 1121static inline unsigned 1122nir_alu_type_get_type_size(nir_alu_type type) 1123{ 1124 return type & NIR_ALU_TYPE_SIZE_MASK; 1125} 1126 1127static inline nir_alu_type 1128nir_alu_type_get_base_type(nir_alu_type type) 1129{ 1130 return (nir_alu_type)(type & NIR_ALU_TYPE_BASE_TYPE_MASK); 1131} 1132 1133static inline nir_alu_type 1134nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type) 1135{ 1136 switch (base_type) { 1137 case GLSL_TYPE_BOOL: 1138 return nir_type_bool1; 1139 break; 1140 case GLSL_TYPE_UINT: 1141 return nir_type_uint32; 1142 break; 1143 case GLSL_TYPE_INT: 1144 return nir_type_int32; 1145 break; 1146 case GLSL_TYPE_UINT16: 1147 return nir_type_uint16; 1148 break; 1149 case GLSL_TYPE_INT16: 1150 return nir_type_int16; 1151 break; 1152 case GLSL_TYPE_UINT8: 1153 return nir_type_uint8; 1154 case GLSL_TYPE_INT8: 1155 return nir_type_int8; 1156 case GLSL_TYPE_UINT64: 1157 return nir_type_uint64; 1158 break; 1159 case GLSL_TYPE_INT64: 1160 return nir_type_int64; 1161 break; 1162 case GLSL_TYPE_FLOAT: 1163 return nir_type_float32; 1164 break; 1165 case GLSL_TYPE_FLOAT16: 1166 return nir_type_float16; 1167 break; 1168 case GLSL_TYPE_DOUBLE: 1169 return nir_type_float64; 1170 break; 1171 1172 case GLSL_TYPE_SAMPLER: 1173 case GLSL_TYPE_IMAGE: 1174 case GLSL_TYPE_ATOMIC_UINT: 1175 case GLSL_TYPE_STRUCT: 1176 case GLSL_TYPE_INTERFACE: 1177 case GLSL_TYPE_ARRAY: 1178 case GLSL_TYPE_VOID: 1179 case GLSL_TYPE_SUBROUTINE: 1180 case GLSL_TYPE_FUNCTION: 1181 case GLSL_TYPE_ERROR: 1182 return nir_type_invalid; 1183 } 1184 1185 unreachable("unknown type"); 1186} 1187 1188static inline nir_alu_type 1189nir_get_nir_type_for_glsl_type(const struct glsl_type *type) 1190{ 1191 return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type)); 1192} 1193 1194static inline enum glsl_base_type 1195nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type) 1196{ 1197 switch (base_type) { 1198 case nir_type_bool1: 1199 return GLSL_TYPE_BOOL; 1200 case nir_type_uint32: 1201 return GLSL_TYPE_UINT; 1202 case nir_type_int32: 1203 return GLSL_TYPE_INT; 1204 case nir_type_uint16: 1205 return GLSL_TYPE_UINT16; 1206 case nir_type_int16: 1207 return GLSL_TYPE_INT16; 1208 case nir_type_uint8: 1209 return GLSL_TYPE_UINT8; 1210 case nir_type_int8: 1211 return GLSL_TYPE_INT8; 1212 case nir_type_uint64: 1213 return GLSL_TYPE_UINT64; 1214 case nir_type_int64: 1215 return GLSL_TYPE_INT64; 1216 case nir_type_float32: 1217 return GLSL_TYPE_FLOAT; 1218 case nir_type_float16: 1219 return GLSL_TYPE_FLOAT16; 1220 case nir_type_float64: 1221 return GLSL_TYPE_DOUBLE; 1222 1223 default: unreachable("Not a sized nir_alu_type"); 1224 } 1225} 1226 1227nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, 1228 nir_rounding_mode rnd); 1229 1230static inline nir_op 1231nir_op_vec(unsigned components) 1232{ 1233 switch (components) { 1234 case 1: return nir_op_mov; 1235 case 2: return nir_op_vec2; 1236 case 3: return nir_op_vec3; 1237 case 4: return nir_op_vec4; 1238 case 5: return nir_op_vec5; 1239 case 8: return nir_op_vec8; 1240 case 16: return nir_op_vec16; 1241 default: unreachable("bad component count"); 1242 } 1243} 1244 1245static inline bool 1246nir_op_is_vec(nir_op op) 1247{ 1248 switch (op) { 1249 case nir_op_mov: 1250 case nir_op_vec2: 1251 case nir_op_vec3: 1252 case nir_op_vec4: 1253 case nir_op_vec5: 1254 case nir_op_vec8: 1255 case nir_op_vec16: 1256 return true; 1257 default: 1258 return false; 1259 } 1260} 1261 1262static inline bool 1263nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size) 1264{ 1265 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) || 1266 (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) || 1267 (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64); 1268} 1269 1270static inline bool 1271nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size) 1272{ 1273 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) || 1274 (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) || 1275 (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64); 1276} 1277 1278static inline bool 1279nir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size) 1280{ 1281 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) || 1282 (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) || 1283 (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64); 1284} 1285 1286static inline bool 1287nir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size) 1288{ 1289 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || 1290 (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || 1291 (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); 1292} 1293 1294static inline bool 1295nir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size) 1296{ 1297 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || 1298 (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || 1299 (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); 1300} 1301 1302static inline bool 1303nir_has_any_rounding_mode_rtz(unsigned execution_mode) 1304{ 1305 return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || 1306 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || 1307 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); 1308} 1309 1310static inline bool 1311nir_has_any_rounding_mode_rtne(unsigned execution_mode) 1312{ 1313 return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || 1314 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || 1315 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); 1316} 1317 1318static inline nir_rounding_mode 1319nir_get_rounding_mode_from_float_controls(unsigned execution_mode, 1320 nir_alu_type type) 1321{ 1322 if (nir_alu_type_get_base_type(type) != nir_type_float) 1323 return nir_rounding_mode_undef; 1324 1325 unsigned bit_size = nir_alu_type_get_type_size(type); 1326 1327 if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) 1328 return nir_rounding_mode_rtz; 1329 if (nir_is_rounding_mode_rtne(execution_mode, bit_size)) 1330 return nir_rounding_mode_rtne; 1331 return nir_rounding_mode_undef; 1332} 1333 1334static inline bool 1335nir_has_any_rounding_mode_enabled(unsigned execution_mode) 1336{ 1337 bool result = 1338 nir_has_any_rounding_mode_rtne(execution_mode) || 1339 nir_has_any_rounding_mode_rtz(execution_mode); 1340 return result; 1341} 1342 1343typedef enum { 1344 /** 1345 * Operation where the first two sources are commutative. 1346 * 1347 * For 2-source operations, this just mathematical commutativity. Some 1348 * 3-source operations, like ffma, are only commutative in the first two 1349 * sources. 1350 */ 1351 NIR_OP_IS_2SRC_COMMUTATIVE = (1 << 0), 1352 1353 /** 1354 * Operation is associative 1355 */ 1356 NIR_OP_IS_ASSOCIATIVE = (1 << 1), 1357} nir_op_algebraic_property; 1358 1359/* vec16 is the widest ALU op in NIR, making the max number of input of ALU 1360 * instructions to be the same as NIR_MAX_VEC_COMPONENTS. 1361 */ 1362#define NIR_ALU_MAX_INPUTS NIR_MAX_VEC_COMPONENTS 1363 1364typedef struct nir_op_info { 1365 /** Name of the NIR ALU opcode */ 1366 const char *name; 1367 1368 /** Number of inputs (sources) */ 1369 uint8_t num_inputs; 1370 1371 /** 1372 * The number of components in the output 1373 * 1374 * If non-zero, this is the size of the output and input sizes are 1375 * explicitly given; swizzle and writemask are still in effect, but if 1376 * the output component is masked out, then the input component may 1377 * still be in use. 1378 * 1379 * If zero, the opcode acts in the standard, per-component manner; the 1380 * operation is performed on each component (except the ones that are 1381 * masked out) with the input being taken from the input swizzle for 1382 * that component. 1383 * 1384 * The size of some of the inputs may be given (i.e. non-zero) even 1385 * though output_size is zero; in that case, the inputs with a zero 1386 * size act per-component, while the inputs with non-zero size don't. 1387 */ 1388 uint8_t output_size; 1389 1390 /** 1391 * The type of vector that the instruction outputs. Note that the 1392 * staurate modifier is only allowed on outputs with the float type. 1393 */ 1394 nir_alu_type output_type; 1395 1396 /** 1397 * The number of components in each input 1398 * 1399 * See nir_op_infos::output_size for more detail about the relationship 1400 * between input and output sizes. 1401 */ 1402 uint8_t input_sizes[NIR_ALU_MAX_INPUTS]; 1403 1404 /** 1405 * The type of vector that each input takes. Note that negate and 1406 * absolute value are only allowed on inputs with int or float type and 1407 * behave differently on the two. 1408 */ 1409 nir_alu_type input_types[NIR_ALU_MAX_INPUTS]; 1410 1411 /** Algebraic properties of this opcode */ 1412 nir_op_algebraic_property algebraic_properties; 1413 1414 /** Whether this represents a numeric conversion opcode */ 1415 bool is_conversion; 1416} nir_op_info; 1417 1418/** Metadata for each nir_op, indexed by opcode */ 1419extern const nir_op_info nir_op_infos[nir_num_opcodes]; 1420 1421typedef struct nir_alu_instr { 1422 /** Base instruction */ 1423 nir_instr instr; 1424 1425 /** Opcode */ 1426 nir_op op; 1427 1428 /** Indicates that this ALU instruction generates an exact value 1429 * 1430 * This is kind of a mixture of GLSL "precise" and "invariant" and not 1431 * really equivalent to either. This indicates that the value generated by 1432 * this operation is high-precision and any code transformations that touch 1433 * it must ensure that the resulting value is bit-for-bit identical to the 1434 * original. 1435 */ 1436 bool exact:1; 1437 1438 /** 1439 * Indicates that this instruction doese not cause signed integer wrapping 1440 * to occur, in the form of overflow or underflow. 1441 */ 1442 bool no_signed_wrap:1; 1443 1444 /** 1445 * Indicates that this instruction does not cause unsigned integer wrapping 1446 * to occur, in the form of overflow or underflow. 1447 */ 1448 bool no_unsigned_wrap:1; 1449 1450 /** Destination */ 1451 nir_alu_dest dest; 1452 1453 /** Sources 1454 * 1455 * The size of the array is given by nir_op_info::num_inputs. 1456 */ 1457 nir_alu_src src[]; 1458} nir_alu_instr; 1459 1460void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src); 1461void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src); 1462 1463bool nir_alu_instr_is_copy(nir_alu_instr *instr); 1464 1465/* is this source channel used? */ 1466static inline bool 1467nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src, 1468 unsigned channel) 1469{ 1470 if (nir_op_infos[instr->op].input_sizes[src] > 0) 1471 return channel < nir_op_infos[instr->op].input_sizes[src]; 1472 1473 return (instr->dest.write_mask >> channel) & 1; 1474} 1475 1476static inline nir_component_mask_t 1477nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src) 1478{ 1479 nir_component_mask_t read_mask = 0; 1480 for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) { 1481 if (!nir_alu_instr_channel_used(instr, src, c)) 1482 continue; 1483 1484 read_mask |= (1 << instr->src[src].swizzle[c]); 1485 } 1486 return read_mask; 1487} 1488 1489/** 1490 * Get the number of channels used for a source 1491 */ 1492static inline unsigned 1493nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src) 1494{ 1495 if (nir_op_infos[instr->op].input_sizes[src] > 0) 1496 return nir_op_infos[instr->op].input_sizes[src]; 1497 1498 return nir_dest_num_components(instr->dest.dest); 1499} 1500 1501static inline bool 1502nir_alu_instr_is_comparison(const nir_alu_instr *instr) 1503{ 1504 switch (instr->op) { 1505 case nir_op_flt: 1506 case nir_op_fge: 1507 case nir_op_feq: 1508 case nir_op_fneu: 1509 case nir_op_ilt: 1510 case nir_op_ult: 1511 case nir_op_ige: 1512 case nir_op_uge: 1513 case nir_op_ieq: 1514 case nir_op_ine: 1515 case nir_op_i2b1: 1516 case nir_op_f2b1: 1517 case nir_op_inot: 1518 return true; 1519 default: 1520 return false; 1521 } 1522} 1523 1524bool nir_const_value_negative_equal(nir_const_value c1, nir_const_value c2, 1525 nir_alu_type full_type); 1526 1527bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, 1528 unsigned src1, unsigned src2); 1529 1530bool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1, 1531 const nir_alu_instr *alu2, 1532 unsigned src1, unsigned src2); 1533 1534bool nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn); 1535 1536typedef enum { 1537 nir_deref_type_var, 1538 nir_deref_type_array, 1539 nir_deref_type_array_wildcard, 1540 nir_deref_type_ptr_as_array, 1541 nir_deref_type_struct, 1542 nir_deref_type_cast, 1543} nir_deref_type; 1544 1545typedef struct { 1546 nir_instr instr; 1547 1548 /** The type of this deref instruction */ 1549 nir_deref_type deref_type; 1550 1551 /** Bitmask what modes the underlying variable might be 1552 * 1553 * For OpenCL-style generic pointers, we may not know exactly what mode it 1554 * is at any given point in time in the compile process. This bitfield 1555 * contains the set of modes which it MAY be. 1556 * 1557 * Generally, this field should not be accessed directly. Use one of the 1558 * nir_deref_mode_ helpers instead. 1559 */ 1560 nir_variable_mode modes; 1561 1562 /** The dereferenced type of the resulting pointer value */ 1563 const struct glsl_type *type; 1564 1565 union { 1566 /** Variable being dereferenced if deref_type is a deref_var */ 1567 nir_variable *var; 1568 1569 /** Parent deref if deref_type is not deref_var */ 1570 nir_src parent; 1571 }; 1572 1573 /** Additional deref parameters */ 1574 union { 1575 struct { 1576 nir_src index; 1577 } arr; 1578 1579 struct { 1580 unsigned index; 1581 } strct; 1582 1583 struct { 1584 unsigned ptr_stride; 1585 unsigned align_mul; 1586 unsigned align_offset; 1587 } cast; 1588 }; 1589 1590 /** Destination to store the resulting "pointer" */ 1591 nir_dest dest; 1592} nir_deref_instr; 1593 1594/** Returns true if deref might have one of the given modes 1595 * 1596 * For multi-mode derefs, this returns true if any of the possible modes for 1597 * the deref to have any of the specified modes. This function returning true 1598 * does NOT mean that the deref definitely has one of those modes. It simply 1599 * means that, with the best information we have at the time, it might. 1600 */ 1601static inline bool 1602nir_deref_mode_may_be(const nir_deref_instr *deref, nir_variable_mode modes) 1603{ 1604 assert(!(modes & ~nir_var_all)); 1605 assert(deref->modes != 0); 1606 return deref->modes & modes; 1607} 1608 1609/** Returns true if deref must have one of the given modes 1610 * 1611 * For multi-mode derefs, this returns true if NIR can prove that the given 1612 * deref has one of the specified modes. This function returning false does 1613 * NOT mean that deref doesn't have one of the given mode. It very well may 1614 * have one of those modes, we just don't have enough information to prove 1615 * that it does for sure. 1616 */ 1617static inline bool 1618nir_deref_mode_must_be(const nir_deref_instr *deref, nir_variable_mode modes) 1619{ 1620 assert(!(modes & ~nir_var_all)); 1621 assert(deref->modes != 0); 1622 return !(deref->modes & ~modes); 1623} 1624 1625/** Returns true if deref has the given mode 1626 * 1627 * This returns true if the deref has exactly the mode specified. If the 1628 * deref may have that mode but may also have a different mode (i.e. modes has 1629 * multiple bits set), this will assert-fail. 1630 * 1631 * If you're confused about which nir_deref_mode_ helper to use, use this one 1632 * or nir_deref_mode_is_one_of below. 1633 */ 1634static inline bool 1635nir_deref_mode_is(const nir_deref_instr *deref, nir_variable_mode mode) 1636{ 1637 assert(util_bitcount(mode) == 1 && (mode & nir_var_all)); 1638 assert(deref->modes != 0); 1639 1640 /* This is only for "simple" cases so, if modes might interact with this 1641 * deref then the deref has to have a single mode. 1642 */ 1643 if (nir_deref_mode_may_be(deref, mode)) { 1644 assert(util_bitcount(deref->modes) == 1); 1645 assert(deref->modes == mode); 1646 } 1647 1648 return deref->modes == mode; 1649} 1650 1651/** Returns true if deref has one of the given modes 1652 * 1653 * This returns true if the deref has exactly one possible mode and that mode 1654 * is one of the modes specified. If the deref may have one of those modes 1655 * but may also have a different mode (i.e. modes has multiple bits set), this 1656 * will assert-fail. 1657 */ 1658static inline bool 1659nir_deref_mode_is_one_of(const nir_deref_instr *deref, nir_variable_mode modes) 1660{ 1661 /* This is only for "simple" cases so, if modes might interact with this 1662 * deref then the deref has to have a single mode. 1663 */ 1664 if (nir_deref_mode_may_be(deref, modes)) { 1665 assert(util_bitcount(deref->modes) == 1); 1666 assert(nir_deref_mode_must_be(deref, modes)); 1667 } 1668 1669 return nir_deref_mode_may_be(deref, modes); 1670} 1671 1672/** Returns true if deref's possible modes lie in the given set of modes 1673 * 1674 * This returns true if the deref's modes lie in the given set of modes. If 1675 * the deref's modes overlap with the specified modes but aren't entirely 1676 * contained in the specified set of modes, this will assert-fail. In 1677 * particular, if this is used in a generic pointers scenario, the specified 1678 * modes has to contain all or none of the possible generic pointer modes. 1679 * 1680 * This is intended mostly for mass-lowering of derefs which might have 1681 * generic pointers. 1682 */ 1683static inline bool 1684nir_deref_mode_is_in_set(const nir_deref_instr *deref, nir_variable_mode modes) 1685{ 1686 if (nir_deref_mode_may_be(deref, modes)) 1687 assert(nir_deref_mode_must_be(deref, modes)); 1688 1689 return nir_deref_mode_may_be(deref, modes); 1690} 1691 1692static inline nir_deref_instr *nir_src_as_deref(nir_src src); 1693 1694static inline nir_deref_instr * 1695nir_deref_instr_parent(const nir_deref_instr *instr) 1696{ 1697 if (instr->deref_type == nir_deref_type_var) 1698 return NULL; 1699 else 1700 return nir_src_as_deref(instr->parent); 1701} 1702 1703static inline nir_variable * 1704nir_deref_instr_get_variable(const nir_deref_instr *instr) 1705{ 1706 while (instr->deref_type != nir_deref_type_var) { 1707 if (instr->deref_type == nir_deref_type_cast) 1708 return NULL; 1709 1710 instr = nir_deref_instr_parent(instr); 1711 } 1712 1713 return instr->var; 1714} 1715 1716bool nir_deref_instr_has_indirect(nir_deref_instr *instr); 1717bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr); 1718bool nir_deref_instr_has_complex_use(nir_deref_instr *instr); 1719 1720bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr); 1721 1722unsigned nir_deref_instr_array_stride(nir_deref_instr *instr); 1723 1724typedef struct { 1725 nir_instr instr; 1726 1727 struct nir_function *callee; 1728 1729 unsigned num_params; 1730 nir_src params[]; 1731} nir_call_instr; 1732 1733#include "nir_intrinsics.h" 1734 1735#define NIR_INTRINSIC_MAX_CONST_INDEX 5 1736 1737/** Represents an intrinsic 1738 * 1739 * An intrinsic is an instruction type for handling things that are 1740 * more-or-less regular operations but don't just consume and produce SSA 1741 * values like ALU operations do. Intrinsics are not for things that have 1742 * special semantic meaning such as phi nodes and parallel copies. 1743 * Examples of intrinsics include variable load/store operations, system 1744 * value loads, and the like. Even though texturing more-or-less falls 1745 * under this category, texturing is its own instruction type because 1746 * trying to represent texturing with intrinsics would lead to a 1747 * combinatorial explosion of intrinsic opcodes. 1748 * 1749 * By having a single instruction type for handling a lot of different 1750 * cases, optimization passes can look for intrinsics and, for the most 1751 * part, completely ignore them. Each intrinsic type also has a few 1752 * possible flags that govern whether or not they can be reordered or 1753 * eliminated. That way passes like dead code elimination can still work 1754 * on intrisics without understanding the meaning of each. 1755 * 1756 * Each intrinsic has some number of constant indices, some number of 1757 * variables, and some number of sources. What these sources, variables, 1758 * and indices mean depends on the intrinsic and is documented with the 1759 * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture 1760 * instructions are the only types of instruction that can operate on 1761 * variables. 1762 */ 1763typedef struct { 1764 nir_instr instr; 1765 1766 nir_intrinsic_op intrinsic; 1767 1768 nir_dest dest; 1769 1770 /** number of components if this is a vectorized intrinsic 1771 * 1772 * Similarly to ALU operations, some intrinsics are vectorized. 1773 * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0. 1774 * For vectorized intrinsics, the num_components field specifies the 1775 * number of destination components and the number of source components 1776 * for all sources with nir_intrinsic_infos.src_components[i] == 0. 1777 */ 1778 uint8_t num_components; 1779 1780 int const_index[NIR_INTRINSIC_MAX_CONST_INDEX]; 1781 1782 nir_src src[]; 1783} nir_intrinsic_instr; 1784 1785static inline nir_variable * 1786nir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i) 1787{ 1788 return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); 1789} 1790 1791typedef enum { 1792 /* Memory ordering. */ 1793 NIR_MEMORY_ACQUIRE = 1 << 0, 1794 NIR_MEMORY_RELEASE = 1 << 1, 1795 NIR_MEMORY_ACQ_REL = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE, 1796 1797 /* Memory visibility operations. */ 1798 NIR_MEMORY_MAKE_AVAILABLE = 1 << 2, 1799 NIR_MEMORY_MAKE_VISIBLE = 1 << 3, 1800} nir_memory_semantics; 1801 1802typedef enum { 1803 NIR_SCOPE_NONE, 1804 NIR_SCOPE_INVOCATION, 1805 NIR_SCOPE_SUBGROUP, 1806 NIR_SCOPE_SHADER_CALL, 1807 NIR_SCOPE_WORKGROUP, 1808 NIR_SCOPE_QUEUE_FAMILY, 1809 NIR_SCOPE_DEVICE, 1810} nir_scope; 1811 1812/** 1813 * \name NIR intrinsics semantic flags 1814 * 1815 * information about what the compiler can do with the intrinsics. 1816 * 1817 * \sa nir_intrinsic_info::flags 1818 */ 1819typedef enum { 1820 /** 1821 * whether the intrinsic can be safely eliminated if none of its output 1822 * value is not being used. 1823 */ 1824 NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0), 1825 1826 /** 1827 * Whether the intrinsic can be reordered with respect to any other 1828 * intrinsic, i.e. whether the only reordering dependencies of the 1829 * intrinsic are due to the register reads/writes. 1830 */ 1831 NIR_INTRINSIC_CAN_REORDER = (1 << 1), 1832} nir_intrinsic_semantic_flag; 1833 1834/** 1835 * Maximum valid value for a nir align_mul value (in intrinsics or derefs). 1836 * 1837 * Offsets can be signed, so this is the largest power of two in int32_t. 1838 */ 1839#define NIR_ALIGN_MUL_MAX 0x40000000 1840 1841typedef struct nir_io_semantics { 1842 unsigned location:7; /* gl_vert_attrib, gl_varying_slot, or gl_frag_result */ 1843 unsigned num_slots:6; /* max 32, may be pessimistic with const indexing */ 1844 unsigned dual_source_blend_index:1; 1845 unsigned fb_fetch_output:1; /* for GL_KHR_blend_equation_advanced */ 1846 unsigned gs_streams:8; /* xxyyzzww: 2-bit stream index for each component */ 1847 unsigned medium_precision:1; /* GLSL mediump qualifier */ 1848 unsigned per_view:1; 1849 unsigned high_16bits:1; /* whether accessing low or high half of the slot */ 1850 unsigned _pad:6; 1851} nir_io_semantics; 1852 1853#define NIR_INTRINSIC_MAX_INPUTS 11 1854 1855typedef struct { 1856 const char *name; 1857 1858 uint8_t num_srcs; /** < number of register/SSA inputs */ 1859 1860 /** number of components of each input register 1861 * 1862 * If this value is 0, the number of components is given by the 1863 * num_components field of nir_intrinsic_instr. If this value is -1, the 1864 * intrinsic consumes however many components are provided and it is not 1865 * validated at all. 1866 */ 1867 int8_t src_components[NIR_INTRINSIC_MAX_INPUTS]; 1868 1869 bool has_dest; 1870 1871 /** number of components of the output register 1872 * 1873 * If this value is 0, the number of components is given by the 1874 * num_components field of nir_intrinsic_instr. 1875 */ 1876 uint8_t dest_components; 1877 1878 /** bitfield of legal bit sizes */ 1879 uint8_t dest_bit_sizes; 1880 1881 /** source which the destination bit size must match 1882 * 1883 * Some intrinsics, such as subgroup intrinsics, are data manipulation 1884 * intrinsics and they have similar bit-size rules to ALU ops. This enables 1885 * validation to validate a bit more and enables auto-generated builder code 1886 * to properly determine destination bit sizes automatically. 1887 */ 1888 int8_t bit_size_src; 1889 1890 /** the number of constant indices used by the intrinsic */ 1891 uint8_t num_indices; 1892 1893 /** list of indices */ 1894 uint8_t indices[NIR_INTRINSIC_MAX_CONST_INDEX]; 1895 1896 /** indicates the usage of intr->const_index[n] */ 1897 uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; 1898 1899 /** semantic flags for calls to this intrinsic */ 1900 nir_intrinsic_semantic_flag flags; 1901} nir_intrinsic_info; 1902 1903extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; 1904 1905static inline unsigned 1906nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn) 1907{ 1908 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; 1909 assert(srcn < info->num_srcs); 1910 if (info->src_components[srcn] > 0) 1911 return info->src_components[srcn]; 1912 else if (info->src_components[srcn] == 0) 1913 return intr->num_components; 1914 else 1915 return nir_src_num_components(intr->src[srcn]); 1916} 1917 1918static inline unsigned 1919nir_intrinsic_dest_components(nir_intrinsic_instr *intr) 1920{ 1921 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; 1922 if (!info->has_dest) 1923 return 0; 1924 else if (info->dest_components) 1925 return info->dest_components; 1926 else 1927 return intr->num_components; 1928} 1929 1930/** 1931 * Helper to copy const_index[] from src to dst, without assuming they 1932 * match in order. 1933 */ 1934static inline void 1935nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src) 1936{ 1937 if (src->intrinsic == dst->intrinsic) { 1938 memcpy(dst->const_index, src->const_index, sizeof(dst->const_index)); 1939 return; 1940 } 1941 1942 const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic]; 1943 const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic]; 1944 1945 for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) { 1946 if (src_info->index_map[i] == 0) 1947 continue; 1948 1949 /* require that dst instruction also uses the same const_index[]: */ 1950 assert(dst_info->index_map[i] > 0); 1951 1952 dst->const_index[dst_info->index_map[i] - 1] = 1953 src->const_index[src_info->index_map[i] - 1]; 1954 } 1955} 1956 1957#include "nir_intrinsics_indices.h" 1958 1959static inline void 1960nir_intrinsic_set_align(nir_intrinsic_instr *intrin, 1961 unsigned align_mul, unsigned align_offset) 1962{ 1963 assert(util_is_power_of_two_nonzero(align_mul)); 1964 assert(align_offset < align_mul); 1965 nir_intrinsic_set_align_mul(intrin, align_mul); 1966 nir_intrinsic_set_align_offset(intrin, align_offset); 1967} 1968 1969/** Returns a simple alignment for a load/store intrinsic offset 1970 * 1971 * Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL 1972 * and ALIGN_OFFSET parameters, this helper takes both into account and 1973 * provides a single simple alignment parameter. The offset X is guaranteed 1974 * to satisfy X % align == 0. 1975 */ 1976static inline unsigned 1977nir_intrinsic_align(const nir_intrinsic_instr *intrin) 1978{ 1979 const unsigned align_mul = nir_intrinsic_align_mul(intrin); 1980 const unsigned align_offset = nir_intrinsic_align_offset(intrin); 1981 assert(align_offset < align_mul); 1982 return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; 1983} 1984 1985static inline bool 1986nir_intrinsic_has_align(const nir_intrinsic_instr *intrin) 1987{ 1988 return nir_intrinsic_has_align_mul(intrin) && 1989 nir_intrinsic_has_align_offset(intrin); 1990} 1991 1992unsigned 1993nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr); 1994 1995/* Converts a image_deref_* intrinsic into a image_* one */ 1996void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr, 1997 nir_ssa_def *handle, bool bindless); 1998 1999/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */ 2000static inline bool 2001nir_intrinsic_can_reorder(nir_intrinsic_instr *instr) 2002{ 2003 if (instr->intrinsic == nir_intrinsic_load_deref) { 2004 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); 2005 return nir_deref_mode_is_in_set(deref, nir_var_read_only_modes) || 2006 (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER); 2007 } else if (instr->intrinsic == nir_intrinsic_load_ssbo || 2008 instr->intrinsic == nir_intrinsic_bindless_image_load || 2009 instr->intrinsic == nir_intrinsic_image_deref_load || 2010 instr->intrinsic == nir_intrinsic_image_load) { 2011 return nir_intrinsic_access(instr) & ACCESS_CAN_REORDER; 2012 } else { 2013 const nir_intrinsic_info *info = 2014 &nir_intrinsic_infos[instr->intrinsic]; 2015 return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && 2016 (info->flags & NIR_INTRINSIC_CAN_REORDER); 2017 } 2018} 2019 2020bool nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr); 2021 2022/** Texture instruction source type */ 2023typedef enum { 2024 /** Texture coordinate 2025 * 2026 * Must have nir_tex_instr::coord_components components. 2027 */ 2028 nir_tex_src_coord, 2029 2030 /** Projector 2031 * 2032 * The texture coordinate (except for the array component, if any) is 2033 * divided by this value before LOD computation and sampling. 2034 * 2035 * Must be a float scalar. 2036 */ 2037 nir_tex_src_projector, 2038 2039 /** Shadow comparator 2040 * 2041 * For shadow sampling, the fetched texel values are compared against the 2042 * shadow comparator using the compare op specified by the sampler object 2043 * and converted to 1.0 if the comparison succeeds and 0.0 if it fails. 2044 * Interpolation happens after this conversion so the actual result may be 2045 * anywhere in the range [0.0, 1.0]. 2046 * 2047 * Only valid if nir_tex_instr::is_shadow and must be a float scalar. 2048 */ 2049 nir_tex_src_comparator, 2050 2051 /** Coordinate offset 2052 * 2053 * An integer value that is added to the texel address before sampling. 2054 * This is only allowed with operations that take an explicit LOD as it is 2055 * applied in integer texel space after LOD selection and not normalized 2056 * coordinate space. 2057 */ 2058 nir_tex_src_offset, 2059 2060 /** LOD bias 2061 * 2062 * This value is added to the computed LOD before mip-mapping. 2063 */ 2064 nir_tex_src_bias, 2065 2066 /** Explicit LOD */ 2067 nir_tex_src_lod, 2068 2069 /** Min LOD 2070 * 2071 * The computed LOD is clamped to be at least as large as min_lod before 2072 * mip-mapping. 2073 */ 2074 nir_tex_src_min_lod, 2075 2076 /** MSAA sample index */ 2077 nir_tex_src_ms_index, 2078 2079 /** Intel-specific MSAA compression data */ 2080 nir_tex_src_ms_mcs_intel, 2081 2082 /** Explicit horizontal (X-major) coordinate derivative */ 2083 nir_tex_src_ddx, 2084 2085 /** Explicit vertical (Y-major) coordinate derivative */ 2086 nir_tex_src_ddy, 2087 2088 /** Texture variable dereference */ 2089 nir_tex_src_texture_deref, 2090 2091 /** Sampler variable dereference */ 2092 nir_tex_src_sampler_deref, 2093 2094 /** Texture index offset 2095 * 2096 * This is added to nir_tex_instr::texture_index. Unless 2097 * nir_tex_instr::texture_non_uniform is set, this is guaranteed to be 2098 * dynamically uniform. 2099 */ 2100 nir_tex_src_texture_offset, 2101 2102 /** Dynamically uniform sampler index offset 2103 * 2104 * This is added to nir_tex_instr::sampler_index. Unless 2105 * nir_tex_instr::sampler_non_uniform is set, this is guaranteed to be 2106 * dynamically uniform. 2107 */ 2108 nir_tex_src_sampler_offset, 2109 2110 /** Bindless texture handle 2111 * 2112 * This is, unfortunately, a bit overloaded at the moment. There are 2113 * generally two types of bindless handles: 2114 * 2115 * 1. For GL_ARB_bindless bindless handles. These are part of the 2116 * GL/Gallium-level API and are always a 64-bit integer. 2117 * 2118 * 2. HW-specific handles. GL_ARB_bindless handles may be lowered to 2119 * these. Also, these are used by many Vulkan drivers to implement 2120 * descriptor sets, especially for UPDATE_AFTER_BIND descriptors. 2121 * The details of hardware handles (bit size, format, etc.) is 2122 * HW-specific. 2123 * 2124 * Because of this overloading and the resulting ambiguity, we currently 2125 * don't validate anything for these. 2126 */ 2127 nir_tex_src_texture_handle, 2128 2129 /** Bindless sampler handle 2130 * 2131 * See nir_tex_src_texture_handle, 2132 */ 2133 nir_tex_src_sampler_handle, 2134 2135 /** Plane index for multi-plane YCbCr textures */ 2136 nir_tex_src_plane, 2137 2138 /** 2139 * Backend-specific vec4 tex src argument. 2140 * 2141 * Can be used to have NIR optimization (copy propagation, lower_vec_to_movs) 2142 * apply to the packing of the tex srcs. This lowering must only happen 2143 * after nir_lower_tex(). 2144 * 2145 * The nir_tex_instr_src_type() of this argument is float, so no lowering 2146 * will happen if nir_lower_int_to_float is used. 2147 */ 2148 nir_tex_src_backend1, 2149 2150 /** Second backend-specific vec4 tex src argument, see nir_tex_src_backend1. */ 2151 nir_tex_src_backend2, 2152 2153 nir_num_tex_src_types 2154} nir_tex_src_type; 2155 2156/** A texture instruction source */ 2157typedef struct { 2158 /** Base source */ 2159 nir_src src; 2160 2161 /** Type of this source */ 2162 nir_tex_src_type src_type; 2163} nir_tex_src; 2164 2165/** Texture instruction opcode */ 2166typedef enum { 2167 nir_texop_tex, /**< Regular texture look-up */ 2168 nir_texop_txb, /**< Texture look-up with LOD bias */ 2169 nir_texop_txl, /**< Texture look-up with explicit LOD */ 2170 nir_texop_txd, /**< Texture look-up with partial derivatives */ 2171 nir_texop_txf, /**< Texel fetch with explicit LOD */ 2172 nir_texop_txf_ms, /**< Multisample texture fetch */ 2173 nir_texop_txf_ms_fb, /**< Multisample texture fetch from framebuffer */ 2174 nir_texop_txf_ms_mcs_intel, /**< Multisample compression value fetch */ 2175 nir_texop_txs, /**< Texture size */ 2176 nir_texop_lod, /**< Texture lod query */ 2177 nir_texop_tg4, /**< Texture gather */ 2178 nir_texop_query_levels, /**< Texture levels query */ 2179 nir_texop_texture_samples, /**< Texture samples query */ 2180 nir_texop_samples_identical, /**< Query whether all samples are definitely 2181 * identical. 2182 */ 2183 nir_texop_tex_prefetch, /**< Regular texture look-up, eligible for pre-dispatch */ 2184 nir_texop_fragment_fetch_amd, /**< Multisample fragment color texture fetch */ 2185 nir_texop_fragment_mask_fetch_amd, /**< Multisample fragment mask texture fetch */ 2186} nir_texop; 2187 2188/** Represents a texture instruction */ 2189typedef struct { 2190 /** Base instruction */ 2191 nir_instr instr; 2192 2193 /** Dimensionality of the texture operation 2194 * 2195 * This will typically match the dimensionality of the texture deref type 2196 * if a nir_tex_src_texture_deref is present. However, it may not if 2197 * texture lowering has occurred. 2198 */ 2199 enum glsl_sampler_dim sampler_dim; 2200 2201 /** ALU type of the destination 2202 * 2203 * This is the canonical sampled type for this texture operation and may 2204 * not exactly match the sampled type of the deref type when a 2205 * nir_tex_src_texture_deref is present. For OpenCL, the sampled type of 2206 * the texture deref will be GLSL_TYPE_VOID and this is allowed to be 2207 * anything. With SPIR-V, the signedness of integer types is allowed to 2208 * differ. For all APIs, the bit size may differ if the driver has done 2209 * any sort of mediump or similar lowering since texture types always have 2210 * 32-bit sampled types. 2211 */ 2212 nir_alu_type dest_type; 2213 2214 /** Texture opcode */ 2215 nir_texop op; 2216 2217 /** Destination */ 2218 nir_dest dest; 2219 2220 /** Array of sources 2221 * 2222 * This array has nir_tex_instr::num_srcs elements 2223 */ 2224 nir_tex_src *src; 2225 2226 /** Number of sources */ 2227 unsigned num_srcs; 2228 2229 /** Number of components in the coordinate, if any */ 2230 unsigned coord_components; 2231 2232 /** True if the texture instruction acts on an array texture */ 2233 bool is_array; 2234 2235 /** True if the texture instruction performs a shadow comparison 2236 * 2237 * If this is true, the texture instruction must have a 2238 * nir_tex_src_comparator. 2239 */ 2240 bool is_shadow; 2241 2242 /** 2243 * If is_shadow is true, whether this is the old-style shadow that outputs 2244 * 4 components or the new-style shadow that outputs 1 component. 2245 */ 2246 bool is_new_style_shadow; 2247 2248 /** 2249 * True if this texture instruction should return a sparse residency code. 2250 * The code is in the last component of the result. 2251 */ 2252 bool is_sparse; 2253 2254 /** nir_texop_tg4 component selector 2255 * 2256 * This determines which RGBA component is gathered. 2257 */ 2258 unsigned component : 2; 2259 2260 /** Validation needs to know this for gradient component count */ 2261 unsigned array_is_lowered_cube : 1; 2262 2263 /** Gather offsets */ 2264 int8_t tg4_offsets[4][2]; 2265 2266 /** True if the texture index or handle is not dynamically uniform */ 2267 bool texture_non_uniform; 2268 2269 /** True if the sampler index or handle is not dynamically uniform */ 2270 bool sampler_non_uniform; 2271 2272 /** The texture index 2273 * 2274 * If this texture instruction has a nir_tex_src_texture_offset source, 2275 * then the texture index is given by texture_index + texture_offset. 2276 */ 2277 unsigned texture_index; 2278 2279 /** The sampler index 2280 * 2281 * The following operations do not require a sampler and, as such, this 2282 * field should be ignored: 2283 * - nir_texop_txf 2284 * - nir_texop_txf_ms 2285 * - nir_texop_txs 2286 * - nir_texop_query_levels 2287 * - nir_texop_texture_samples 2288 * - nir_texop_samples_identical 2289 * 2290 * If this texture instruction has a nir_tex_src_sampler_offset source, 2291 * then the sampler index is given by sampler_index + sampler_offset. 2292 */ 2293 unsigned sampler_index; 2294} nir_tex_instr; 2295 2296/** 2297 * Returns true if the texture operation requires a sampler as a general rule 2298 * 2299 * Note that the specific hw/driver backend could require to a sampler 2300 * object/configuration packet in any case, for some other reason. 2301 * 2302 * @see nir_tex_instr::sampler_index. 2303 */ 2304static inline bool 2305nir_tex_instr_need_sampler(const nir_tex_instr *instr) 2306{ 2307 switch (instr->op) { 2308 case nir_texop_txf: 2309 case nir_texop_txf_ms: 2310 case nir_texop_txs: 2311 case nir_texop_query_levels: 2312 case nir_texop_texture_samples: 2313 case nir_texop_samples_identical: 2314 return false; 2315 default: 2316 return true; 2317 } 2318} 2319 2320/** Returns the number of components returned by this nir_tex_instr 2321 * 2322 * Useful for code building texture instructions when you don't want to think 2323 * about how many components a particular texture op returns. This does not 2324 * include the sparse residency code. 2325 */ 2326static inline unsigned 2327nir_tex_instr_result_size(const nir_tex_instr *instr) 2328{ 2329 switch (instr->op) { 2330 case nir_texop_txs: { 2331 unsigned ret; 2332 switch (instr->sampler_dim) { 2333 case GLSL_SAMPLER_DIM_1D: 2334 case GLSL_SAMPLER_DIM_BUF: 2335 ret = 1; 2336 break; 2337 case GLSL_SAMPLER_DIM_2D: 2338 case GLSL_SAMPLER_DIM_CUBE: 2339 case GLSL_SAMPLER_DIM_MS: 2340 case GLSL_SAMPLER_DIM_RECT: 2341 case GLSL_SAMPLER_DIM_EXTERNAL: 2342 case GLSL_SAMPLER_DIM_SUBPASS: 2343 ret = 2; 2344 break; 2345 case GLSL_SAMPLER_DIM_3D: 2346 ret = 3; 2347 break; 2348 default: 2349 unreachable("not reached"); 2350 } 2351 if (instr->is_array) 2352 ret++; 2353 return ret; 2354 } 2355 2356 case nir_texop_lod: 2357 return 2; 2358 2359 case nir_texop_texture_samples: 2360 case nir_texop_query_levels: 2361 case nir_texop_samples_identical: 2362 case nir_texop_fragment_mask_fetch_amd: 2363 return 1; 2364 2365 default: 2366 if (instr->is_shadow && instr->is_new_style_shadow) 2367 return 1; 2368 2369 return 4; 2370 } 2371} 2372 2373/** 2374 * Returns the destination size of this nir_tex_instr including the sparse 2375 * residency code, if any. 2376 */ 2377static inline unsigned 2378nir_tex_instr_dest_size(const nir_tex_instr *instr) 2379{ 2380 /* One more component is needed for the residency code. */ 2381 return nir_tex_instr_result_size(instr) + instr->is_sparse; 2382} 2383 2384/** 2385 * Returns true if this texture operation queries something about the texture 2386 * rather than actually sampling it. 2387 */ 2388static inline bool 2389nir_tex_instr_is_query(const nir_tex_instr *instr) 2390{ 2391 switch (instr->op) { 2392 case nir_texop_txs: 2393 case nir_texop_lod: 2394 case nir_texop_texture_samples: 2395 case nir_texop_query_levels: 2396 return true; 2397 case nir_texop_tex: 2398 case nir_texop_txb: 2399 case nir_texop_txl: 2400 case nir_texop_txd: 2401 case nir_texop_txf: 2402 case nir_texop_txf_ms: 2403 case nir_texop_txf_ms_fb: 2404 case nir_texop_txf_ms_mcs_intel: 2405 case nir_texop_tg4: 2406 return false; 2407 default: 2408 unreachable("Invalid texture opcode"); 2409 } 2410} 2411 2412/** Returns true if this texture instruction does implicit derivatives 2413 * 2414 * This is important as there are extra control-flow rules around derivatives 2415 * and texture instructions which perform them implicitly. 2416 */ 2417static inline bool 2418nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr) 2419{ 2420 switch (instr->op) { 2421 case nir_texop_tex: 2422 case nir_texop_txb: 2423 case nir_texop_lod: 2424 return true; 2425 default: 2426 return false; 2427 } 2428} 2429 2430/** Returns the ALU type of the given texture instruction source */ 2431static inline nir_alu_type 2432nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src) 2433{ 2434 switch (instr->src[src].src_type) { 2435 case nir_tex_src_coord: 2436 switch (instr->op) { 2437 case nir_texop_txf: 2438 case nir_texop_txf_ms: 2439 case nir_texop_txf_ms_fb: 2440 case nir_texop_txf_ms_mcs_intel: 2441 case nir_texop_samples_identical: 2442 return nir_type_int; 2443 2444 default: 2445 return nir_type_float; 2446 } 2447 2448 case nir_tex_src_lod: 2449 switch (instr->op) { 2450 case nir_texop_txs: 2451 case nir_texop_txf: 2452 case nir_texop_txf_ms: 2453 return nir_type_int; 2454 2455 default: 2456 return nir_type_float; 2457 } 2458 2459 case nir_tex_src_projector: 2460 case nir_tex_src_comparator: 2461 case nir_tex_src_bias: 2462 case nir_tex_src_min_lod: 2463 case nir_tex_src_ddx: 2464 case nir_tex_src_ddy: 2465 case nir_tex_src_backend1: 2466 case nir_tex_src_backend2: 2467 return nir_type_float; 2468 2469 case nir_tex_src_offset: 2470 case nir_tex_src_ms_index: 2471 case nir_tex_src_plane: 2472 return nir_type_int; 2473 2474 case nir_tex_src_ms_mcs_intel: 2475 case nir_tex_src_texture_deref: 2476 case nir_tex_src_sampler_deref: 2477 case nir_tex_src_texture_offset: 2478 case nir_tex_src_sampler_offset: 2479 case nir_tex_src_texture_handle: 2480 case nir_tex_src_sampler_handle: 2481 return nir_type_uint; 2482 2483 case nir_num_tex_src_types: 2484 unreachable("nir_num_tex_src_types is not a valid source type"); 2485 } 2486 2487 unreachable("Invalid texture source type"); 2488} 2489 2490/** 2491 * Returns the number of components required by the given texture instruction 2492 * source 2493 */ 2494static inline unsigned 2495nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src) 2496{ 2497 if (instr->src[src].src_type == nir_tex_src_coord) 2498 return instr->coord_components; 2499 2500 /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs_intel */ 2501 if (instr->src[src].src_type == nir_tex_src_ms_mcs_intel) 2502 return 4; 2503 2504 if (instr->src[src].src_type == nir_tex_src_ddx || 2505 instr->src[src].src_type == nir_tex_src_ddy) { 2506 2507 if (instr->is_array && !instr->array_is_lowered_cube) 2508 return instr->coord_components - 1; 2509 else 2510 return instr->coord_components; 2511 } 2512 2513 /* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for 2514 * the offset, since a cube maps to a single face. 2515 */ 2516 if (instr->src[src].src_type == nir_tex_src_offset) { 2517 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) 2518 return 2; 2519 else if (instr->is_array) 2520 return instr->coord_components - 1; 2521 else 2522 return instr->coord_components; 2523 } 2524 2525 if (instr->src[src].src_type == nir_tex_src_backend1 || 2526 instr->src[src].src_type == nir_tex_src_backend2) 2527 return nir_src_num_components(instr->src[src].src); 2528 2529 return 1; 2530} 2531 2532/** 2533 * Returns the index of the texture instruction source with the given 2534 * nir_tex_src_type or -1 if no such source exists. 2535 */ 2536static inline int 2537nir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type) 2538{ 2539 for (unsigned i = 0; i < instr->num_srcs; i++) 2540 if (instr->src[i].src_type == type) 2541 return (int) i; 2542 2543 return -1; 2544} 2545 2546/** Adds a source to a texture instruction */ 2547void nir_tex_instr_add_src(nir_tex_instr *tex, 2548 nir_tex_src_type src_type, 2549 nir_src src); 2550 2551/** Removes a source from a texture instruction */ 2552void nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx); 2553 2554bool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex); 2555 2556typedef struct { 2557 nir_instr instr; 2558 2559 nir_ssa_def def; 2560 2561 nir_const_value value[]; 2562} nir_load_const_instr; 2563 2564typedef enum { 2565 /** Return from a function 2566 * 2567 * This instruction is a classic function return. It jumps to 2568 * nir_function_impl::end_block. No return value is provided in this 2569 * instruction. Instead, the function is expected to write any return 2570 * data to a deref passed in from the caller. 2571 */ 2572 nir_jump_return, 2573 2574 /** Immediately exit the current shader 2575 * 2576 * This instruction is roughly the equivalent of C's "exit()" in that it 2577 * immediately terminates the current shader invocation. From a CFG 2578 * perspective, it looks like a jump to nir_function_impl::end_block but 2579 * it actually jumps to the end block of the shader entrypoint. A halt 2580 * instruction in the shader entrypoint itself is semantically identical 2581 * to a return. 2582 * 2583 * For shaders with built-in I/O, any outputs written prior to a halt 2584 * instruction remain written and any outputs not written prior to the 2585 * halt have undefined values. It does NOT cause an implicit discard of 2586 * written results. If one wants discard results in a fragment shader, 2587 * for instance, a discard or demote intrinsic is required. 2588 */ 2589 nir_jump_halt, 2590 2591 /** Break out of the inner-most loop 2592 * 2593 * This has the same semantics as C's "break" statement. 2594 */ 2595 nir_jump_break, 2596 2597 /** Jump back to the top of the inner-most loop 2598 * 2599 * This has the same semantics as C's "continue" statement assuming that a 2600 * NIR loop is implemented as "while (1) { body }". 2601 */ 2602 nir_jump_continue, 2603 2604 /** Jumps for unstructured CFG. 2605 * 2606 * As within an unstructured CFG we can't rely on block ordering we need to 2607 * place explicit jumps at the end of every block. 2608 */ 2609 nir_jump_goto, 2610 nir_jump_goto_if, 2611} nir_jump_type; 2612 2613typedef struct { 2614 nir_instr instr; 2615 nir_jump_type type; 2616 nir_src condition; 2617 struct nir_block *target; 2618 struct nir_block *else_target; 2619} nir_jump_instr; 2620 2621/* creates a new SSA variable in an undefined state */ 2622 2623typedef struct { 2624 nir_instr instr; 2625 nir_ssa_def def; 2626} nir_ssa_undef_instr; 2627 2628typedef struct { 2629 struct exec_node node; 2630 2631 /* The predecessor block corresponding to this source */ 2632 struct nir_block *pred; 2633 2634 nir_src src; 2635} nir_phi_src; 2636 2637#define nir_foreach_phi_src(phi_src, phi) \ 2638 foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs) 2639#define nir_foreach_phi_src_safe(phi_src, phi) \ 2640 foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs) 2641 2642typedef struct { 2643 nir_instr instr; 2644 2645 struct exec_list srcs; /** < list of nir_phi_src */ 2646 2647 nir_dest dest; 2648} nir_phi_instr; 2649 2650static inline nir_phi_src * 2651nir_phi_get_src_from_block(nir_phi_instr *phi, struct nir_block *block) 2652{ 2653 nir_foreach_phi_src(src, phi) { 2654 if (src->pred == block) 2655 return src; 2656 } 2657 2658 assert(!"Block is not a predecessor of phi."); 2659 return NULL; 2660} 2661 2662typedef struct { 2663 struct exec_node node; 2664 nir_src src; 2665 nir_dest dest; 2666} nir_parallel_copy_entry; 2667 2668#define nir_foreach_parallel_copy_entry(entry, pcopy) \ 2669 foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries) 2670 2671typedef struct { 2672 nir_instr instr; 2673 2674 /* A list of nir_parallel_copy_entrys. The sources of all of the 2675 * entries are copied to the corresponding destinations "in parallel". 2676 * In other words, if we have two entries: a -> b and b -> a, the values 2677 * get swapped. 2678 */ 2679 struct exec_list entries; 2680} nir_parallel_copy_instr; 2681 2682NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr, 2683 type, nir_instr_type_alu) 2684NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr, 2685 type, nir_instr_type_deref) 2686NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr, 2687 type, nir_instr_type_call) 2688NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr, 2689 type, nir_instr_type_jump) 2690NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr, 2691 type, nir_instr_type_tex) 2692NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr, 2693 type, nir_instr_type_intrinsic) 2694NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr, 2695 type, nir_instr_type_load_const) 2696NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr, 2697 type, nir_instr_type_ssa_undef) 2698NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr, 2699 type, nir_instr_type_phi) 2700NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, 2701 nir_parallel_copy_instr, instr, 2702 type, nir_instr_type_parallel_copy) 2703 2704 2705#define NIR_DEFINE_SRC_AS_CONST(type, suffix) \ 2706static inline type \ 2707nir_src_comp_as_##suffix(nir_src src, unsigned comp) \ 2708{ \ 2709 assert(nir_src_is_const(src)); \ 2710 nir_load_const_instr *load = \ 2711 nir_instr_as_load_const(src.ssa->parent_instr); \ 2712 assert(comp < load->def.num_components); \ 2713 return nir_const_value_as_##suffix(load->value[comp], \ 2714 load->def.bit_size); \ 2715} \ 2716 \ 2717static inline type \ 2718nir_src_as_##suffix(nir_src src) \ 2719{ \ 2720 assert(nir_src_num_components(src) == 1); \ 2721 return nir_src_comp_as_##suffix(src, 0); \ 2722} 2723 2724NIR_DEFINE_SRC_AS_CONST(int64_t, int) 2725NIR_DEFINE_SRC_AS_CONST(uint64_t, uint) 2726NIR_DEFINE_SRC_AS_CONST(bool, bool) 2727NIR_DEFINE_SRC_AS_CONST(double, float) 2728 2729#undef NIR_DEFINE_SRC_AS_CONST 2730 2731 2732typedef struct { 2733 nir_ssa_def *def; 2734 unsigned comp; 2735} nir_ssa_scalar; 2736 2737static inline bool 2738nir_ssa_scalar_is_const(nir_ssa_scalar s) 2739{ 2740 return s.def->parent_instr->type == nir_instr_type_load_const; 2741} 2742 2743static inline nir_const_value 2744nir_ssa_scalar_as_const_value(nir_ssa_scalar s) 2745{ 2746 assert(s.comp < s.def->num_components); 2747 nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr); 2748 return load->value[s.comp]; 2749} 2750 2751#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \ 2752static inline type \ 2753nir_ssa_scalar_as_##suffix(nir_ssa_scalar s) \ 2754{ \ 2755 return nir_const_value_as_##suffix( \ 2756 nir_ssa_scalar_as_const_value(s), s.def->bit_size); \ 2757} 2758 2759NIR_DEFINE_SCALAR_AS_CONST(int64_t, int) 2760NIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint) 2761NIR_DEFINE_SCALAR_AS_CONST(bool, bool) 2762NIR_DEFINE_SCALAR_AS_CONST(double, float) 2763 2764#undef NIR_DEFINE_SCALAR_AS_CONST 2765 2766static inline bool 2767nir_ssa_scalar_is_alu(nir_ssa_scalar s) 2768{ 2769 return s.def->parent_instr->type == nir_instr_type_alu; 2770} 2771 2772static inline nir_op 2773nir_ssa_scalar_alu_op(nir_ssa_scalar s) 2774{ 2775 return nir_instr_as_alu(s.def->parent_instr)->op; 2776} 2777 2778static inline nir_ssa_scalar 2779nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx) 2780{ 2781 nir_ssa_scalar out = { NULL, 0 }; 2782 2783 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr); 2784 assert(alu_src_idx < nir_op_infos[alu->op].num_inputs); 2785 2786 /* Our component must be written */ 2787 assert(s.comp < s.def->num_components); 2788 assert(alu->dest.write_mask & (1u << s.comp)); 2789 2790 assert(alu->src[alu_src_idx].src.is_ssa); 2791 out.def = alu->src[alu_src_idx].src.ssa; 2792 2793 if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) { 2794 /* The ALU src is unsized so the source component follows the 2795 * destination component. 2796 */ 2797 out.comp = alu->src[alu_src_idx].swizzle[s.comp]; 2798 } else { 2799 /* This is a sized source so all source components work together to 2800 * produce all the destination components. Since we need to return a 2801 * scalar, this only works if the source is a scalar. 2802 */ 2803 assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1); 2804 out.comp = alu->src[alu_src_idx].swizzle[0]; 2805 } 2806 assert(out.comp < out.def->num_components); 2807 2808 return out; 2809} 2810 2811nir_ssa_scalar nir_ssa_scalar_chase_movs(nir_ssa_scalar s); 2812 2813/** Returns a nir_ssa_scalar where we've followed the bit-exact mov/vec use chain to the original definition */ 2814static inline nir_ssa_scalar 2815nir_ssa_scalar_resolved(nir_ssa_def *def, unsigned channel) 2816{ 2817 nir_ssa_scalar s = { def, channel }; 2818 return nir_ssa_scalar_chase_movs(s); 2819} 2820 2821 2822typedef struct { 2823 bool success; 2824 2825 nir_variable *var; 2826 unsigned desc_set; 2827 unsigned binding; 2828 unsigned num_indices; 2829 nir_src indices[4]; 2830 bool read_first_invocation; 2831} nir_binding; 2832 2833nir_binding nir_chase_binding(nir_src rsrc); 2834nir_variable *nir_get_binding_variable(struct nir_shader *shader, nir_binding binding); 2835 2836 2837/* 2838 * Control flow 2839 * 2840 * Control flow consists of a tree of control flow nodes, which include 2841 * if-statements and loops. The leaves of the tree are basic blocks, lists of 2842 * instructions that always run start-to-finish. Each basic block also keeps 2843 * track of its successors (blocks which may run immediately after the current 2844 * block) and predecessors (blocks which could have run immediately before the 2845 * current block). Each function also has a start block and an end block which 2846 * all return statements point to (which is always empty). Together, all the 2847 * blocks with their predecessors and successors make up the control flow 2848 * graph (CFG) of the function. There are helpers that modify the tree of 2849 * control flow nodes while modifying the CFG appropriately; these should be 2850 * used instead of modifying the tree directly. 2851 */ 2852 2853typedef enum { 2854 nir_cf_node_block, 2855 nir_cf_node_if, 2856 nir_cf_node_loop, 2857 nir_cf_node_function 2858} nir_cf_node_type; 2859 2860typedef struct nir_cf_node { 2861 struct exec_node node; 2862 nir_cf_node_type type; 2863 struct nir_cf_node *parent; 2864} nir_cf_node; 2865 2866typedef struct nir_block { 2867 nir_cf_node cf_node; 2868 2869 struct exec_list instr_list; /** < list of nir_instr */ 2870 2871 /** generic block index; generated by nir_index_blocks */ 2872 unsigned index; 2873 2874 /* 2875 * Each block can only have up to 2 successors, so we put them in a simple 2876 * array - no need for anything more complicated. 2877 */ 2878 struct nir_block *successors[2]; 2879 2880 /* Set of nir_block predecessors in the CFG */ 2881 struct set *predecessors; 2882 2883 /* 2884 * this node's immediate dominator in the dominance tree - set to NULL for 2885 * the start block. 2886 */ 2887 struct nir_block *imm_dom; 2888 2889 /* This node's children in the dominance tree */ 2890 unsigned num_dom_children; 2891 struct nir_block **dom_children; 2892 2893 /* Set of nir_blocks on the dominance frontier of this block */ 2894 struct set *dom_frontier; 2895 2896 /* 2897 * These two indices have the property that dom_{pre,post}_index for each 2898 * child of this block in the dominance tree will always be between 2899 * dom_pre_index and dom_post_index for this block, which makes testing if 2900 * a given block is dominated by another block an O(1) operation. 2901 */ 2902 uint32_t dom_pre_index, dom_post_index; 2903 2904 /** 2905 * Value just before the first nir_instr->index in the block, but after 2906 * end_ip that of any predecessor block. 2907 */ 2908 uint32_t start_ip; 2909 /** 2910 * Value just after the last nir_instr->index in the block, but before the 2911 * start_ip of any successor block. 2912 */ 2913 uint32_t end_ip; 2914 2915 /* SSA def live in and out for this block; used for liveness analysis. 2916 * Indexed by ssa_def->index 2917 */ 2918 BITSET_WORD *live_in; 2919 BITSET_WORD *live_out; 2920} nir_block; 2921 2922static inline bool 2923nir_block_is_reachable(nir_block *b) 2924{ 2925 /* See also nir_block_dominates */ 2926 return b->dom_post_index != 0; 2927} 2928 2929static inline nir_instr * 2930nir_block_first_instr(nir_block *block) 2931{ 2932 struct exec_node *head = exec_list_get_head(&block->instr_list); 2933 return exec_node_data(nir_instr, head, node); 2934} 2935 2936static inline nir_instr * 2937nir_block_last_instr(nir_block *block) 2938{ 2939 struct exec_node *tail = exec_list_get_tail(&block->instr_list); 2940 return exec_node_data(nir_instr, tail, node); 2941} 2942 2943static inline bool 2944nir_block_ends_in_jump(nir_block *block) 2945{ 2946 return !exec_list_is_empty(&block->instr_list) && 2947 nir_block_last_instr(block)->type == nir_instr_type_jump; 2948} 2949 2950static inline bool 2951nir_block_ends_in_return_or_halt(nir_block *block) 2952{ 2953 if (exec_list_is_empty(&block->instr_list)) 2954 return false; 2955 2956 nir_instr *instr = nir_block_last_instr(block); 2957 if (instr->type != nir_instr_type_jump) 2958 return false; 2959 2960 nir_jump_instr *jump_instr = nir_instr_as_jump(instr); 2961 return jump_instr->type == nir_jump_return || 2962 jump_instr->type == nir_jump_halt; 2963} 2964 2965static inline bool 2966nir_block_ends_in_break(nir_block *block) 2967{ 2968 if (exec_list_is_empty(&block->instr_list)) 2969 return false; 2970 2971 nir_instr *instr = nir_block_last_instr(block); 2972 return instr->type == nir_instr_type_jump && 2973 nir_instr_as_jump(instr)->type == nir_jump_break; 2974} 2975 2976#define nir_foreach_instr(instr, block) \ 2977 foreach_list_typed(nir_instr, instr, node, &(block)->instr_list) 2978#define nir_foreach_instr_reverse(instr, block) \ 2979 foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) 2980#define nir_foreach_instr_safe(instr, block) \ 2981 foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) 2982#define nir_foreach_instr_reverse_safe(instr, block) \ 2983 foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list) 2984 2985static inline nir_phi_instr * 2986nir_block_last_phi_instr(nir_block *block) 2987{ 2988 nir_phi_instr *last_phi = NULL; 2989 nir_foreach_instr(instr, block) { 2990 if (instr->type == nir_instr_type_phi) 2991 last_phi = nir_instr_as_phi(instr); 2992 else 2993 return last_phi; 2994 } 2995 return last_phi; 2996} 2997 2998typedef enum { 2999 nir_selection_control_none = 0x0, 3000 nir_selection_control_flatten = 0x1, 3001 nir_selection_control_dont_flatten = 0x2, 3002} nir_selection_control; 3003 3004typedef struct nir_if { 3005 nir_cf_node cf_node; 3006 nir_src condition; 3007 nir_selection_control control; 3008 3009 struct exec_list then_list; /** < list of nir_cf_node */ 3010 struct exec_list else_list; /** < list of nir_cf_node */ 3011} nir_if; 3012 3013typedef struct { 3014 nir_if *nif; 3015 3016 /** Instruction that generates nif::condition. */ 3017 nir_instr *conditional_instr; 3018 3019 /** Block within ::nif that has the break instruction. */ 3020 nir_block *break_block; 3021 3022 /** Last block for the then- or else-path that does not contain the break. */ 3023 nir_block *continue_from_block; 3024 3025 /** True when ::break_block is in the else-path of ::nif. */ 3026 bool continue_from_then; 3027 bool induction_rhs; 3028 3029 /* This is true if the terminators exact trip count is unknown. For 3030 * example: 3031 * 3032 * for (int i = 0; i < imin(x, 4); i++) 3033 * ... 3034 * 3035 * Here loop analysis would have set a max_trip_count of 4 however we dont 3036 * know for sure that this is the exact trip count. 3037 */ 3038 bool exact_trip_count_unknown; 3039 3040 struct list_head loop_terminator_link; 3041} nir_loop_terminator; 3042 3043typedef struct { 3044 /* Induction variable. */ 3045 nir_ssa_def *def; 3046 3047 /* Init statement with only uniform. */ 3048 nir_src *init_src; 3049 3050 /* Update statement with only uniform. */ 3051 nir_alu_src *update_src; 3052} nir_loop_induction_variable; 3053 3054typedef struct { 3055 /* Estimated cost (in number of instructions) of the loop */ 3056 unsigned instr_cost; 3057 3058 /* Guessed trip count based on array indexing */ 3059 unsigned guessed_trip_count; 3060 3061 /* Maximum number of times the loop is run (if known) */ 3062 unsigned max_trip_count; 3063 3064 /* Do we know the exact number of times the loop will be run */ 3065 bool exact_trip_count_known; 3066 3067 /* Unroll the loop regardless of its size */ 3068 bool force_unroll; 3069 3070 /* Does the loop contain complex loop terminators, continues or other 3071 * complex behaviours? If this is true we can't rely on 3072 * loop_terminator_list to be complete or accurate. 3073 */ 3074 bool complex_loop; 3075 3076 nir_loop_terminator *limiting_terminator; 3077 3078 /* A list of loop_terminators terminating this loop. */ 3079 struct list_head loop_terminator_list; 3080 3081 /* array of induction variables for this loop */ 3082 nir_loop_induction_variable *induction_vars; 3083 unsigned num_induction_vars; 3084} nir_loop_info; 3085 3086typedef enum { 3087 nir_loop_control_none = 0x0, 3088 nir_loop_control_unroll = 0x1, 3089 nir_loop_control_dont_unroll = 0x2, 3090} nir_loop_control; 3091 3092typedef struct { 3093 nir_cf_node cf_node; 3094 3095 struct exec_list body; /** < list of nir_cf_node */ 3096 3097 nir_loop_info *info; 3098 nir_loop_control control; 3099 bool partially_unrolled; 3100 bool divergent; 3101} nir_loop; 3102 3103/** 3104 * Various bits of metadata that can may be created or required by 3105 * optimization and analysis passes 3106 */ 3107typedef enum { 3108 nir_metadata_none = 0x0, 3109 3110 /** Indicates that nir_block::index values are valid. 3111 * 3112 * The start block has index 0 and they increase through a natural walk of 3113 * the CFG. nir_function_impl::num_blocks is the number of blocks and 3114 * every block index is in the range [0, nir_function_impl::num_blocks]. 3115 * 3116 * A pass can preserve this metadata type if it doesn't touch the CFG. 3117 */ 3118 nir_metadata_block_index = 0x1, 3119 3120 /** Indicates that block dominance information is valid 3121 * 3122 * This includes: 3123 * 3124 * - nir_block::num_dom_children 3125 * - nir_block::dom_children 3126 * - nir_block::dom_frontier 3127 * - nir_block::dom_pre_index 3128 * - nir_block::dom_post_index 3129 * 3130 * A pass can preserve this metadata type if it doesn't touch the CFG. 3131 */ 3132 nir_metadata_dominance = 0x2, 3133 3134 /** Indicates that SSA def data-flow liveness information is valid 3135 * 3136 * This includes: 3137 * 3138 * - nir_block::live_in 3139 * - nir_block::live_out 3140 * 3141 * A pass can preserve this metadata type if it never adds or removes any 3142 * SSA defs or uses of SSA defs (most passes shouldn't preserve this 3143 * metadata type). 3144 */ 3145 nir_metadata_live_ssa_defs = 0x4, 3146 3147 /** A dummy metadata value to track when a pass forgot to call 3148 * nir_metadata_preserve. 3149 * 3150 * A pass should always clear this value even if it doesn't make any 3151 * progress to indicate that it thought about preserving metadata. 3152 */ 3153 nir_metadata_not_properly_reset = 0x8, 3154 3155 /** Indicates that loop analysis information is valid. 3156 * 3157 * This includes everything pointed to by nir_loop::info. 3158 * 3159 * A pass can preserve this metadata type if it is guaranteed to not affect 3160 * any loop metadata. However, since loop metadata includes things like 3161 * loop counts which depend on arithmetic in the loop, this is very hard to 3162 * determine. Most passes shouldn't preserve this metadata type. 3163 */ 3164 nir_metadata_loop_analysis = 0x10, 3165 3166 /** Indicates that nir_instr::index values are valid. 3167 * 3168 * The start instruction has index 0 and they increase through a natural 3169 * walk of instructions in blocks in the CFG. The indices my have holes 3170 * after passes such as DCE. 3171 * 3172 * A pass can preserve this metadata type if it never adds or moves any 3173 * instructions (most passes shouldn't preserve this metadata type), but 3174 * can preserve it if it only removes instructions. 3175 */ 3176 nir_metadata_instr_index = 0x20, 3177 3178 /** All metadata 3179 * 3180 * This includes all nir_metadata flags except not_properly_reset. Passes 3181 * which do not change the shader in any way should call 3182 * 3183 * nir_metadata_preserve(impl, nir_metadata_all); 3184 */ 3185 nir_metadata_all = ~nir_metadata_not_properly_reset, 3186} nir_metadata; 3187MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_metadata) 3188 3189typedef struct { 3190 nir_cf_node cf_node; 3191 3192 /** pointer to the function of which this is an implementation */ 3193 struct nir_function *function; 3194 3195 struct exec_list body; /** < list of nir_cf_node */ 3196 3197 nir_block *end_block; 3198 3199 /** list for all local variables in the function */ 3200 struct exec_list locals; 3201 3202 /** list of local registers in the function */ 3203 struct exec_list registers; 3204 3205 /** next available local register index */ 3206 unsigned reg_alloc; 3207 3208 /** next available SSA value index */ 3209 unsigned ssa_alloc; 3210 3211 /* total number of basic blocks, only valid when block_index_dirty = false */ 3212 unsigned num_blocks; 3213 3214 /** True if this nir_function_impl uses structured control-flow 3215 * 3216 * Structured nir_function_impls have different validation rules. 3217 */ 3218 bool structured; 3219 3220 nir_metadata valid_metadata; 3221} nir_function_impl; 3222 3223#define nir_foreach_function_temp_variable(var, impl) \ 3224 foreach_list_typed(nir_variable, var, node, &(impl)->locals) 3225 3226#define nir_foreach_function_temp_variable_safe(var, impl) \ 3227 foreach_list_typed_safe(nir_variable, var, node, &(impl)->locals) 3228 3229ATTRIBUTE_RETURNS_NONNULL static inline nir_block * 3230nir_start_block(nir_function_impl *impl) 3231{ 3232 return (nir_block *) impl->body.head_sentinel.next; 3233} 3234 3235ATTRIBUTE_RETURNS_NONNULL static inline nir_block * 3236nir_impl_last_block(nir_function_impl *impl) 3237{ 3238 return (nir_block *) impl->body.tail_sentinel.prev; 3239} 3240 3241static inline nir_cf_node * 3242nir_cf_node_next(nir_cf_node *node) 3243{ 3244 struct exec_node *next = exec_node_get_next(&node->node); 3245 if (exec_node_is_tail_sentinel(next)) 3246 return NULL; 3247 else 3248 return exec_node_data(nir_cf_node, next, node); 3249} 3250 3251static inline nir_cf_node * 3252nir_cf_node_prev(nir_cf_node *node) 3253{ 3254 struct exec_node *prev = exec_node_get_prev(&node->node); 3255 if (exec_node_is_head_sentinel(prev)) 3256 return NULL; 3257 else 3258 return exec_node_data(nir_cf_node, prev, node); 3259} 3260 3261static inline bool 3262nir_cf_node_is_first(const nir_cf_node *node) 3263{ 3264 return exec_node_is_head_sentinel(node->node.prev); 3265} 3266 3267static inline bool 3268nir_cf_node_is_last(const nir_cf_node *node) 3269{ 3270 return exec_node_is_tail_sentinel(node->node.next); 3271} 3272 3273NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node, 3274 type, nir_cf_node_block) 3275NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node, 3276 type, nir_cf_node_if) 3277NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node, 3278 type, nir_cf_node_loop) 3279NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, 3280 nir_function_impl, cf_node, type, nir_cf_node_function) 3281 3282static inline nir_block * 3283nir_if_first_then_block(nir_if *if_stmt) 3284{ 3285 struct exec_node *head = exec_list_get_head(&if_stmt->then_list); 3286 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3287} 3288 3289static inline nir_block * 3290nir_if_last_then_block(nir_if *if_stmt) 3291{ 3292 struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list); 3293 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3294} 3295 3296static inline nir_block * 3297nir_if_first_else_block(nir_if *if_stmt) 3298{ 3299 struct exec_node *head = exec_list_get_head(&if_stmt->else_list); 3300 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3301} 3302 3303static inline nir_block * 3304nir_if_last_else_block(nir_if *if_stmt) 3305{ 3306 struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list); 3307 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3308} 3309 3310static inline nir_block * 3311nir_loop_first_block(nir_loop *loop) 3312{ 3313 struct exec_node *head = exec_list_get_head(&loop->body); 3314 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3315} 3316 3317static inline nir_block * 3318nir_loop_last_block(nir_loop *loop) 3319{ 3320 struct exec_node *tail = exec_list_get_tail(&loop->body); 3321 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3322} 3323 3324/** 3325 * Return true if this list of cf_nodes contains a single empty block. 3326 */ 3327static inline bool 3328nir_cf_list_is_empty_block(struct exec_list *cf_list) 3329{ 3330 if (exec_list_is_singular(cf_list)) { 3331 struct exec_node *head = exec_list_get_head(cf_list); 3332 nir_block *block = 3333 nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3334 return exec_list_is_empty(&block->instr_list); 3335 } 3336 return false; 3337} 3338 3339typedef struct { 3340 uint8_t num_components; 3341 uint8_t bit_size; 3342} nir_parameter; 3343 3344typedef struct nir_printf_info { 3345 unsigned num_args; 3346 unsigned *arg_sizes; 3347 unsigned string_size; 3348 char *strings; 3349} nir_printf_info; 3350 3351typedef struct nir_function { 3352 struct exec_node node; 3353 3354 const char *name; 3355 struct nir_shader *shader; 3356 3357 unsigned num_params; 3358 nir_parameter *params; 3359 3360 /** The implementation of this function. 3361 * 3362 * If the function is only declared and not implemented, this is NULL. 3363 */ 3364 nir_function_impl *impl; 3365 3366 bool is_entrypoint; 3367} nir_function; 3368 3369typedef enum { 3370 nir_lower_imul64 = (1 << 0), 3371 nir_lower_isign64 = (1 << 1), 3372 /** Lower all int64 modulus and division opcodes */ 3373 nir_lower_divmod64 = (1 << 2), 3374 /** Lower all 64-bit umul_high and imul_high opcodes */ 3375 nir_lower_imul_high64 = (1 << 3), 3376 nir_lower_mov64 = (1 << 4), 3377 nir_lower_icmp64 = (1 << 5), 3378 nir_lower_iadd64 = (1 << 6), 3379 nir_lower_iabs64 = (1 << 7), 3380 nir_lower_ineg64 = (1 << 8), 3381 nir_lower_logic64 = (1 << 9), 3382 nir_lower_minmax64 = (1 << 10), 3383 nir_lower_shift64 = (1 << 11), 3384 nir_lower_imul_2x32_64 = (1 << 12), 3385 nir_lower_extract64 = (1 << 13), 3386 nir_lower_ufind_msb64 = (1 << 14), 3387 nir_lower_bit_count64 = (1 << 15), 3388 nir_lower_subgroup_shuffle64 = (1 << 16), 3389 nir_lower_scan_reduce_bitwise64 = (1 << 17), 3390 nir_lower_scan_reduce_iadd64 = (1 << 18), 3391 nir_lower_vote_ieq64 = (1 << 19), 3392} nir_lower_int64_options; 3393 3394typedef enum { 3395 nir_lower_drcp = (1 << 0), 3396 nir_lower_dsqrt = (1 << 1), 3397 nir_lower_drsq = (1 << 2), 3398 nir_lower_dtrunc = (1 << 3), 3399 nir_lower_dfloor = (1 << 4), 3400 nir_lower_dceil = (1 << 5), 3401 nir_lower_dfract = (1 << 6), 3402 nir_lower_dround_even = (1 << 7), 3403 nir_lower_dmod = (1 << 8), 3404 nir_lower_dsub = (1 << 9), 3405 nir_lower_ddiv = (1 << 10), 3406 nir_lower_fp64_full_software = (1 << 11), 3407} nir_lower_doubles_options; 3408 3409typedef enum { 3410 nir_divergence_single_prim_per_subgroup = (1 << 0), 3411 nir_divergence_single_patch_per_tcs_subgroup = (1 << 1), 3412 nir_divergence_single_patch_per_tes_subgroup = (1 << 2), 3413 nir_divergence_view_index_uniform = (1 << 3), 3414 nir_divergence_single_frag_shading_rate_per_subgroup = (1 << 4), 3415 nir_divergence_multiple_workgroup_per_compute_subgroup = (1 << 5), 3416} nir_divergence_options; 3417 3418typedef enum { 3419 nir_pack_varying_interp_mode_none = (1 << 0), 3420 nir_pack_varying_interp_mode_smooth = (1 << 1), 3421 nir_pack_varying_interp_mode_flat = (1 << 2), 3422 nir_pack_varying_interp_mode_noperspective = (1 << 3), 3423 nir_pack_varying_interp_loc_sample = (1 << 16), 3424 nir_pack_varying_interp_loc_centroid = (1 << 17), 3425 nir_pack_varying_interp_loc_center = (1 << 18), 3426} nir_pack_varying_options; 3427 3428/** An instruction filtering callback 3429 * 3430 * Returns true if the instruction should be processed and false otherwise. 3431 */ 3432typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *); 3433 3434typedef struct nir_shader_compiler_options { 3435 bool lower_fdiv; 3436 bool lower_ffma16; 3437 bool lower_ffma32; 3438 bool lower_ffma64; 3439 bool fuse_ffma16; 3440 bool fuse_ffma32; 3441 bool fuse_ffma64; 3442 bool lower_flrp16; 3443 bool lower_flrp32; 3444 /** Lowers flrp when it does not support doubles */ 3445 bool lower_flrp64; 3446 bool lower_fpow; 3447 bool lower_fsat; 3448 bool lower_fsqrt; 3449 bool lower_sincos; 3450 bool lower_fmod; 3451 /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */ 3452 bool lower_bitfield_extract; 3453 /** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */ 3454 bool lower_bitfield_extract_to_shifts; 3455 /** Lowers bitfield_insert to bfi/bfm */ 3456 bool lower_bitfield_insert; 3457 /** Lowers bitfield_insert to compares, and shifts. */ 3458 bool lower_bitfield_insert_to_shifts; 3459 /** Lowers bitfield_insert to bfm/bitfield_select. */ 3460 bool lower_bitfield_insert_to_bitfield_select; 3461 /** Lowers bitfield_reverse to shifts. */ 3462 bool lower_bitfield_reverse; 3463 /** Lowers bit_count to shifts. */ 3464 bool lower_bit_count; 3465 /** Lowers ifind_msb to compare and ufind_msb */ 3466 bool lower_ifind_msb; 3467 /** Lowers ifind_msb and ufind_msb to reverse variants */ 3468 bool lower_find_msb_to_reverse; 3469 /** Lowers find_lsb to ufind_msb and logic ops */ 3470 bool lower_find_lsb; 3471 bool lower_uadd_carry; 3472 bool lower_usub_borrow; 3473 /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */ 3474 bool lower_mul_high; 3475 /** lowers fneg to fmul(x, -1.0). Driver must call nir_opt_algebraic_late() */ 3476 bool lower_fneg; 3477 /** lowers ineg to isub. Driver must call nir_opt_algebraic_late(). */ 3478 bool lower_ineg; 3479 /** lowers fisnormal to alu ops. */ 3480 bool lower_fisnormal; 3481 3482 /* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */ 3483 bool lower_scmp; 3484 3485 /* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */ 3486 bool lower_vector_cmp; 3487 3488 /** enable rules to avoid bit ops */ 3489 bool lower_bitops; 3490 3491 /** enables rules to lower isign to imin+imax */ 3492 bool lower_isign; 3493 3494 /** enables rules to lower fsign to fsub and flt */ 3495 bool lower_fsign; 3496 3497 /** enables rules to lower iabs to ineg+imax */ 3498 bool lower_iabs; 3499 3500 /** enable rules that avoid generating umax from signed integer ops */ 3501 bool lower_umax; 3502 3503 /** enable rules that avoid generating umin from signed integer ops */ 3504 bool lower_umin; 3505 3506 /* lower fdph to fdot4 */ 3507 bool lower_fdph; 3508 3509 /** lower fdot to fmul and fsum/fadd. */ 3510 bool lower_fdot; 3511 3512 /* Does the native fdot instruction replicate its result for four 3513 * components? If so, then opt_algebraic_late will turn all fdotN 3514 * instructions into fdotN_replicated instructions. 3515 */ 3516 bool fdot_replicates; 3517 3518 /** lowers ffloor to fsub+ffract: */ 3519 bool lower_ffloor; 3520 3521 /** lowers ffract to fsub+ffloor: */ 3522 bool lower_ffract; 3523 3524 /** lowers fceil to fneg+ffloor+fneg: */ 3525 bool lower_fceil; 3526 3527 bool lower_ftrunc; 3528 3529 bool lower_ldexp; 3530 3531 bool lower_pack_half_2x16; 3532 bool lower_pack_unorm_2x16; 3533 bool lower_pack_snorm_2x16; 3534 bool lower_pack_unorm_4x8; 3535 bool lower_pack_snorm_4x8; 3536 bool lower_pack_64_2x32; 3537 bool lower_pack_64_4x16; 3538 bool lower_pack_32_2x16; 3539 bool lower_pack_64_2x32_split; 3540 bool lower_pack_32_2x16_split; 3541 bool lower_unpack_half_2x16; 3542 bool lower_unpack_unorm_2x16; 3543 bool lower_unpack_snorm_2x16; 3544 bool lower_unpack_unorm_4x8; 3545 bool lower_unpack_snorm_4x8; 3546 bool lower_unpack_64_2x32_split; 3547 bool lower_unpack_32_2x16_split; 3548 3549 bool lower_pack_split; 3550 3551 bool lower_extract_byte; 3552 bool lower_extract_word; 3553 bool lower_insert_byte; 3554 bool lower_insert_word; 3555 3556 bool lower_all_io_to_temps; 3557 bool lower_all_io_to_elements; 3558 3559 /* Indicates that the driver only has zero-based vertex id */ 3560 bool vertex_id_zero_based; 3561 3562 /** 3563 * If enabled, gl_BaseVertex will be lowered as: 3564 * is_indexed_draw (~0/0) & firstvertex 3565 */ 3566 bool lower_base_vertex; 3567 3568 /** 3569 * If enabled, gl_HelperInvocation will be lowered as: 3570 * 3571 * !((1 << sample_id) & sample_mask_in)) 3572 * 3573 * This depends on some possibly hw implementation details, which may 3574 * not be true for all hw. In particular that the FS is only executed 3575 * for covered samples or for helper invocations. So, do not blindly 3576 * enable this option. 3577 * 3578 * Note: See also issue #22 in ARB_shader_image_load_store 3579 */ 3580 bool lower_helper_invocation; 3581 3582 /** 3583 * Convert gl_SampleMaskIn to gl_HelperInvocation as follows: 3584 * 3585 * gl_SampleMaskIn == 0 ---> gl_HelperInvocation 3586 * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation 3587 */ 3588 bool optimize_sample_mask_in; 3589 3590 bool lower_cs_local_index_from_id; 3591 bool lower_cs_local_id_from_index; 3592 3593 /* Prevents lowering global_invocation_id to be in terms of workgroup_id */ 3594 bool has_cs_global_id; 3595 3596 bool lower_device_index_to_zero; 3597 3598 /* Set if nir_lower_pntc_ytransform() should invert gl_PointCoord. 3599 * Either when frame buffer is flipped or GL_POINT_SPRITE_COORD_ORIGIN 3600 * is GL_LOWER_LEFT. 3601 */ 3602 bool lower_wpos_pntc; 3603 3604 /** 3605 * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be 3606 * lowered to simple arithmetic. 3607 * 3608 * If this flag is set, the lowering will be applied to all bit-sizes of 3609 * these instructions. 3610 * 3611 * \sa ::lower_hadd64 3612 */ 3613 bool lower_hadd; 3614 3615 /** 3616 * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions 3617 * should be lowered to simple arithmetic. 3618 * 3619 * If this flag is set, the lowering will be applied to only 64-bit 3620 * versions of these instructions. 3621 * 3622 * \sa ::lower_hadd 3623 */ 3624 bool lower_hadd64; 3625 3626 /** 3627 * Set if nir_op_uadd_sat and nir_op_usub_sat should be lowered to simple 3628 * arithmetic. 3629 * 3630 * If this flag is set, the lowering will be applied to all bit-sizes of 3631 * these instructions. 3632 * 3633 * \sa ::lower_usub_sat64 3634 */ 3635 bool lower_uadd_sat; 3636 3637 /** 3638 * Set if only 64-bit nir_op_usub_sat should be lowered to simple 3639 * arithmetic. 3640 * 3641 * \sa ::lower_add_sat 3642 */ 3643 bool lower_usub_sat64; 3644 3645 /** 3646 * Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple 3647 * arithmetic. 3648 * 3649 * If this flag is set, the lowering will be applied to all bit-sizes of 3650 * these instructions. 3651 */ 3652 bool lower_iadd_sat; 3653 3654 /** 3655 * Should IO be re-vectorized? Some scalar ISAs still operate on vec4's 3656 * for IO purposes and would prefer loads/stores be vectorized. 3657 */ 3658 bool vectorize_io; 3659 bool lower_to_scalar; 3660 nir_instr_filter_cb lower_to_scalar_filter; 3661 3662 /** 3663 * Whether nir_opt_vectorize should only create 16-bit 2D vectors. 3664 */ 3665 bool vectorize_vec2_16bit; 3666 3667 /** 3668 * Should the linker unify inputs_read/outputs_written between adjacent 3669 * shader stages which are linked into a single program? 3670 */ 3671 bool unify_interfaces; 3672 3673 /** 3674 * Should nir_lower_io() create load_interpolated_input intrinsics? 3675 * 3676 * If not, it generates regular load_input intrinsics and interpolation 3677 * information must be inferred from the list of input nir_variables. 3678 */ 3679 bool use_interpolated_input_intrinsics; 3680 3681 3682 /** 3683 * Whether nir_lower_io() will lower interpolateAt functions to 3684 * load_interpolated_input intrinsics. 3685 * 3686 * Unlike use_interpolated_input_intrinsics this will only lower these 3687 * functions and leave input load intrinsics untouched. 3688 */ 3689 bool lower_interpolate_at; 3690 3691 /* Lowers when 32x32->64 bit multiplication is not supported */ 3692 bool lower_mul_2x32_64; 3693 3694 /* Lowers when rotate instruction is not supported */ 3695 bool lower_rotate; 3696 3697 /** Backend supports ternary addition */ 3698 bool has_iadd3; 3699 3700 /** 3701 * Backend supports imul24, and would like to use it (when possible) 3702 * for address/offset calculation. If true, driver should call 3703 * nir_lower_amul(). (If not set, amul will automatically be lowered 3704 * to imul.) 3705 */ 3706 bool has_imul24; 3707 3708 /** Backend supports umul24, if not set umul24 will automatically be lowered 3709 * to imul with masked inputs */ 3710 bool has_umul24; 3711 3712 /** Backend supports umad24, if not set umad24 will automatically be lowered 3713 * to imul with masked inputs and iadd */ 3714 bool has_umad24; 3715 3716 /* Backend supports fused comapre against zero and csel */ 3717 bool has_fused_comp_and_csel; 3718 3719 /** Backend supports fsub, if not set fsub will automatically be lowered to 3720 * fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */ 3721 bool has_fsub; 3722 3723 /** Backend supports isub, if not set isub will automatically be lowered to 3724 * iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */ 3725 bool has_isub; 3726 3727 /** Backend supports pack_32_4x8 or pack_32_4x8_split. */ 3728 bool has_pack_32_4x8; 3729 3730 /** Backend supports txs, if not nir_lower_tex(..) uses txs-free variants 3731 * for rect texture lowering. */ 3732 bool has_txs; 3733 3734 /** Backend supports sdot_4x8 and udot_4x8 opcodes. */ 3735 bool has_dot_4x8; 3736 3737 /** Backend supports sudot_4x8 opcodes. */ 3738 bool has_sudot_4x8; 3739 3740 /** Backend supports sdot_2x16 and udot_2x16 opcodes. */ 3741 bool has_dot_2x16; 3742 3743 /* Whether to generate only scoped_barrier intrinsics instead of the set of 3744 * memory and control barrier intrinsics based on GLSL. 3745 */ 3746 bool use_scoped_barrier; 3747 3748 /** 3749 * Is this the Intel vec4 backend? 3750 * 3751 * Used to inhibit algebraic optimizations that are known to be harmful on 3752 * the Intel vec4 backend. This is generally applicable to any 3753 * optimization that might cause more immediate values to be used in 3754 * 3-source (e.g., ffma and flrp) instructions. 3755 */ 3756 bool intel_vec4; 3757 3758 /** 3759 * For most Intel GPUs, all ternary operations such as FMA and BFE cannot 3760 * have immediates, so two to three instructions may eventually be needed. 3761 */ 3762 bool avoid_ternary_with_two_constants; 3763 3764 /** Whether 8-bit ALU is supported. */ 3765 bool support_8bit_alu; 3766 3767 /** Whether 16-bit ALU is supported. */ 3768 bool support_16bit_alu; 3769 3770 unsigned max_unroll_iterations; 3771 unsigned max_unroll_iterations_aggressive; 3772 3773 bool lower_uniforms_to_ubo; 3774 3775 /* If the precision is ignored, backends that don't handle 3776 * different precisions when passing data between stages and use 3777 * vectorized IO can pack more varyings when linking. */ 3778 bool linker_ignore_precision; 3779 3780 /** 3781 * Specifies which type of indirectly accessed variables should force 3782 * loop unrolling. 3783 */ 3784 nir_variable_mode force_indirect_unrolling; 3785 3786 nir_lower_int64_options lower_int64_options; 3787 nir_lower_doubles_options lower_doubles_options; 3788 nir_divergence_options divergence_analysis_options; 3789 3790 /** 3791 * Support pack varyings with different interpolation location 3792 * (center, centroid, sample) and mode (flat, noperspective, smooth) 3793 * into same slot. 3794 */ 3795 nir_pack_varying_options pack_varying_options; 3796} nir_shader_compiler_options; 3797 3798typedef struct nir_shader { 3799 /** list of uniforms (nir_variable) */ 3800 struct exec_list variables; 3801 3802 /** Set of driver-specific options for the shader. 3803 * 3804 * The memory for the options is expected to be kept in a single static 3805 * copy by the driver. 3806 */ 3807 const struct nir_shader_compiler_options *options; 3808 3809 /** Various bits of compile-time information about a given shader */ 3810 struct shader_info info; 3811 3812 struct exec_list functions; /** < list of nir_function */ 3813 3814 struct list_head gc_list; /** < list of all nir_instrs allocated on the shader but not yet freed. */ 3815 3816 /** 3817 * The size of the variable space for load_input_*, load_uniform_*, etc. 3818 * intrinsics. This is in back-end specific units which is likely one of 3819 * bytes, dwords, or vec4s depending on context and back-end. 3820 */ 3821 unsigned num_inputs, num_uniforms, num_outputs; 3822 3823 /** Size in bytes of required scratch space */ 3824 unsigned scratch_size; 3825 3826 /** Constant data associated with this shader. 3827 * 3828 * Constant data is loaded through load_constant intrinsics (as compared to 3829 * the NIR load_const instructions which have the constant value inlined 3830 * into them). This is usually generated by nir_opt_large_constants (so 3831 * shaders don't have to load_const into a temporary array when they want 3832 * to indirect on a const array). 3833 */ 3834 void *constant_data; 3835 /** Size of the constant data associated with the shader, in bytes */ 3836 unsigned constant_data_size; 3837 3838 unsigned printf_info_count; 3839 nir_printf_info *printf_info; 3840} nir_shader; 3841 3842#define nir_foreach_function(func, shader) \ 3843 foreach_list_typed(nir_function, func, node, &(shader)->functions) 3844 3845static inline nir_function_impl * 3846nir_shader_get_entrypoint(nir_shader *shader) 3847{ 3848 nir_function *func = NULL; 3849 3850 nir_foreach_function(function, shader) { 3851 assert(func == NULL); 3852 if (function->is_entrypoint) { 3853 func = function; 3854#ifndef NDEBUG 3855 break; 3856#endif 3857 } 3858 } 3859 3860 if (!func) 3861 return NULL; 3862 3863 assert(func->num_params == 0); 3864 assert(func->impl); 3865 return func->impl; 3866} 3867 3868typedef struct nir_liveness_bounds { 3869 uint32_t start; 3870 uint32_t end; 3871} nir_liveness_bounds; 3872 3873typedef struct nir_instr_liveness { 3874 /** 3875 * nir_instr->index for the start and end of a single live interval for SSA 3876 * defs. ssa values last used by a nir_if condition will have an interval 3877 * ending at the first instruction after the last one before the if 3878 * condition. 3879 * 3880 * Indexed by def->index (impl->ssa_alloc elements). 3881 */ 3882 struct nir_liveness_bounds *defs; 3883} nir_instr_liveness; 3884 3885nir_instr_liveness * 3886nir_live_ssa_defs_per_instr(nir_function_impl *impl); 3887 3888nir_shader *nir_shader_create(void *mem_ctx, 3889 gl_shader_stage stage, 3890 const nir_shader_compiler_options *options, 3891 shader_info *si); 3892 3893nir_register *nir_local_reg_create(nir_function_impl *impl); 3894 3895void nir_reg_remove(nir_register *reg); 3896 3897/** Adds a variable to the appropriate list in nir_shader */ 3898void nir_shader_add_variable(nir_shader *shader, nir_variable *var); 3899 3900static inline void 3901nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var) 3902{ 3903 assert(var->data.mode == nir_var_function_temp); 3904 exec_list_push_tail(&impl->locals, &var->node); 3905} 3906 3907/** creates a variable, sets a few defaults, and adds it to the list */ 3908nir_variable *nir_variable_create(nir_shader *shader, 3909 nir_variable_mode mode, 3910 const struct glsl_type *type, 3911 const char *name); 3912/** creates a local variable and adds it to the list */ 3913nir_variable *nir_local_variable_create(nir_function_impl *impl, 3914 const struct glsl_type *type, 3915 const char *name); 3916 3917nir_variable *nir_find_variable_with_location(nir_shader *shader, 3918 nir_variable_mode mode, 3919 unsigned location); 3920 3921nir_variable *nir_find_variable_with_driver_location(nir_shader *shader, 3922 nir_variable_mode mode, 3923 unsigned location); 3924 3925void nir_sort_variables_with_modes(nir_shader *shader, 3926 int (*compar)(const nir_variable *, 3927 const nir_variable *), 3928 nir_variable_mode modes); 3929 3930/** creates a function and adds it to the shader's list of functions */ 3931nir_function *nir_function_create(nir_shader *shader, const char *name); 3932 3933nir_function_impl *nir_function_impl_create(nir_function *func); 3934/** creates a function_impl that isn't tied to any particular function */ 3935nir_function_impl *nir_function_impl_create_bare(nir_shader *shader); 3936 3937nir_block *nir_block_create(nir_shader *shader); 3938nir_if *nir_if_create(nir_shader *shader); 3939nir_loop *nir_loop_create(nir_shader *shader); 3940 3941nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); 3942 3943/** requests that the given pieces of metadata be generated */ 3944void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...); 3945/** dirties all but the preserved metadata */ 3946void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); 3947/** Preserves all metadata for the given shader */ 3948void nir_shader_preserve_all_metadata(nir_shader *shader); 3949 3950/** creates an instruction with default swizzle/writemask/etc. with NULL registers */ 3951nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); 3952 3953nir_deref_instr *nir_deref_instr_create(nir_shader *shader, 3954 nir_deref_type deref_type); 3955 3956nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); 3957 3958nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, 3959 unsigned num_components, 3960 unsigned bit_size); 3961 3962nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, 3963 nir_intrinsic_op op); 3964 3965nir_call_instr *nir_call_instr_create(nir_shader *shader, 3966 nir_function *callee); 3967 3968/** Creates a NIR texture instruction */ 3969nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); 3970 3971nir_phi_instr *nir_phi_instr_create(nir_shader *shader); 3972nir_phi_src *nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src); 3973 3974nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); 3975 3976nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader, 3977 unsigned num_components, 3978 unsigned bit_size); 3979 3980nir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size); 3981 3982/** 3983 * NIR Cursors and Instruction Insertion API 3984 * @{ 3985 * 3986 * A tiny struct representing a point to insert/extract instructions or 3987 * control flow nodes. Helps reduce the combinatorial explosion of possible 3988 * points to insert/extract. 3989 * 3990 * \sa nir_control_flow.h 3991 */ 3992typedef enum { 3993 nir_cursor_before_block, 3994 nir_cursor_after_block, 3995 nir_cursor_before_instr, 3996 nir_cursor_after_instr, 3997} nir_cursor_option; 3998 3999typedef struct { 4000 nir_cursor_option option; 4001 union { 4002 nir_block *block; 4003 nir_instr *instr; 4004 }; 4005} nir_cursor; 4006 4007static inline nir_block * 4008nir_cursor_current_block(nir_cursor cursor) 4009{ 4010 if (cursor.option == nir_cursor_before_instr || 4011 cursor.option == nir_cursor_after_instr) { 4012 return cursor.instr->block; 4013 } else { 4014 return cursor.block; 4015 } 4016} 4017 4018bool nir_cursors_equal(nir_cursor a, nir_cursor b); 4019 4020static inline nir_cursor 4021nir_before_block(nir_block *block) 4022{ 4023 nir_cursor cursor; 4024 cursor.option = nir_cursor_before_block; 4025 cursor.block = block; 4026 return cursor; 4027} 4028 4029static inline nir_cursor 4030nir_after_block(nir_block *block) 4031{ 4032 nir_cursor cursor; 4033 cursor.option = nir_cursor_after_block; 4034 cursor.block = block; 4035 return cursor; 4036} 4037 4038static inline nir_cursor 4039nir_before_instr(nir_instr *instr) 4040{ 4041 nir_cursor cursor; 4042 cursor.option = nir_cursor_before_instr; 4043 cursor.instr = instr; 4044 return cursor; 4045} 4046 4047static inline nir_cursor 4048nir_after_instr(nir_instr *instr) 4049{ 4050 nir_cursor cursor; 4051 cursor.option = nir_cursor_after_instr; 4052 cursor.instr = instr; 4053 return cursor; 4054} 4055 4056static inline nir_cursor 4057nir_before_block_after_phis(nir_block *block) 4058{ 4059 nir_phi_instr *last_phi = nir_block_last_phi_instr(block); 4060 if (last_phi) 4061 return nir_after_instr(&last_phi->instr); 4062 else 4063 return nir_before_block(block); 4064} 4065 4066static inline nir_cursor 4067nir_after_block_before_jump(nir_block *block) 4068{ 4069 nir_instr *last_instr = nir_block_last_instr(block); 4070 if (last_instr && last_instr->type == nir_instr_type_jump) { 4071 return nir_before_instr(last_instr); 4072 } else { 4073 return nir_after_block(block); 4074 } 4075} 4076 4077static inline nir_cursor 4078nir_before_src(nir_src *src, bool is_if_condition) 4079{ 4080 if (is_if_condition) { 4081 nir_block *prev_block = 4082 nir_cf_node_as_block(nir_cf_node_prev(&src->parent_if->cf_node)); 4083 assert(!nir_block_ends_in_jump(prev_block)); 4084 return nir_after_block(prev_block); 4085 } else if (src->parent_instr->type == nir_instr_type_phi) { 4086#ifndef NDEBUG 4087 nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr); 4088 bool found = false; 4089 nir_foreach_phi_src(phi_src, cond_phi) { 4090 if (phi_src->src.ssa == src->ssa) { 4091 found = true; 4092 break; 4093 } 4094 } 4095 assert(found); 4096#endif 4097 /* The LIST_ENTRY macro is a generic container-of macro, it just happens 4098 * to have a more specific name. 4099 */ 4100 nir_phi_src *phi_src = LIST_ENTRY(nir_phi_src, src, src); 4101 return nir_after_block_before_jump(phi_src->pred); 4102 } else { 4103 return nir_before_instr(src->parent_instr); 4104 } 4105} 4106 4107static inline nir_cursor 4108nir_before_cf_node(nir_cf_node *node) 4109{ 4110 if (node->type == nir_cf_node_block) 4111 return nir_before_block(nir_cf_node_as_block(node)); 4112 4113 return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); 4114} 4115 4116static inline nir_cursor 4117nir_after_cf_node(nir_cf_node *node) 4118{ 4119 if (node->type == nir_cf_node_block) 4120 return nir_after_block(nir_cf_node_as_block(node)); 4121 4122 return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); 4123} 4124 4125static inline nir_cursor 4126nir_after_phis(nir_block *block) 4127{ 4128 nir_foreach_instr(instr, block) { 4129 if (instr->type != nir_instr_type_phi) 4130 return nir_before_instr(instr); 4131 } 4132 return nir_after_block(block); 4133} 4134 4135static inline nir_cursor 4136nir_after_instr_and_phis(nir_instr *instr) 4137{ 4138 if (instr->type == nir_instr_type_phi) 4139 return nir_after_phis(instr->block); 4140 else 4141 return nir_after_instr(instr); 4142} 4143 4144static inline nir_cursor 4145nir_after_cf_node_and_phis(nir_cf_node *node) 4146{ 4147 if (node->type == nir_cf_node_block) 4148 return nir_after_block(nir_cf_node_as_block(node)); 4149 4150 nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node)); 4151 4152 return nir_after_phis(block); 4153} 4154 4155static inline nir_cursor 4156nir_before_cf_list(struct exec_list *cf_list) 4157{ 4158 nir_cf_node *first_node = exec_node_data(nir_cf_node, 4159 exec_list_get_head(cf_list), node); 4160 return nir_before_cf_node(first_node); 4161} 4162 4163static inline nir_cursor 4164nir_after_cf_list(struct exec_list *cf_list) 4165{ 4166 nir_cf_node *last_node = exec_node_data(nir_cf_node, 4167 exec_list_get_tail(cf_list), node); 4168 return nir_after_cf_node(last_node); 4169} 4170 4171/** 4172 * Insert a NIR instruction at the given cursor. 4173 * 4174 * Note: This does not update the cursor. 4175 */ 4176void nir_instr_insert(nir_cursor cursor, nir_instr *instr); 4177 4178bool nir_instr_move(nir_cursor cursor, nir_instr *instr); 4179 4180static inline void 4181nir_instr_insert_before(nir_instr *instr, nir_instr *before) 4182{ 4183 nir_instr_insert(nir_before_instr(instr), before); 4184} 4185 4186static inline void 4187nir_instr_insert_after(nir_instr *instr, nir_instr *after) 4188{ 4189 nir_instr_insert(nir_after_instr(instr), after); 4190} 4191 4192static inline void 4193nir_instr_insert_before_block(nir_block *block, nir_instr *before) 4194{ 4195 nir_instr_insert(nir_before_block(block), before); 4196} 4197 4198static inline void 4199nir_instr_insert_after_block(nir_block *block, nir_instr *after) 4200{ 4201 nir_instr_insert(nir_after_block(block), after); 4202} 4203 4204static inline void 4205nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) 4206{ 4207 nir_instr_insert(nir_before_cf_node(node), before); 4208} 4209 4210static inline void 4211nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) 4212{ 4213 nir_instr_insert(nir_after_cf_node(node), after); 4214} 4215 4216static inline void 4217nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) 4218{ 4219 nir_instr_insert(nir_before_cf_list(list), before); 4220} 4221 4222static inline void 4223nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) 4224{ 4225 nir_instr_insert(nir_after_cf_list(list), after); 4226} 4227 4228void nir_instr_remove_v(nir_instr *instr); 4229void nir_instr_free(nir_instr *instr); 4230void nir_instr_free_list(struct exec_list *list); 4231 4232static inline nir_cursor 4233nir_instr_remove(nir_instr *instr) 4234{ 4235 nir_cursor cursor; 4236 nir_instr *prev = nir_instr_prev(instr); 4237 if (prev) { 4238 cursor = nir_after_instr(prev); 4239 } else { 4240 cursor = nir_before_block(instr->block); 4241 } 4242 nir_instr_remove_v(instr); 4243 return cursor; 4244} 4245 4246nir_cursor nir_instr_free_and_dce(nir_instr *instr); 4247 4248/** @} */ 4249 4250nir_ssa_def *nir_instr_ssa_def(nir_instr *instr); 4251 4252typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state); 4253typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state); 4254typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); 4255bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, 4256 void *state); 4257static inline bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); 4258static inline bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); 4259bool nir_foreach_phi_src_leaving_block(nir_block *instr, 4260 nir_foreach_src_cb cb, 4261 void *state); 4262 4263nir_const_value *nir_src_as_const_value(nir_src src); 4264 4265#define NIR_SRC_AS_(name, c_type, type_enum, cast_macro) \ 4266static inline c_type * \ 4267nir_src_as_ ## name (nir_src src) \ 4268{ \ 4269 return src.is_ssa && src.ssa->parent_instr->type == type_enum \ 4270 ? cast_macro(src.ssa->parent_instr) : NULL; \ 4271} 4272 4273NIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu) 4274NIR_SRC_AS_(intrinsic, nir_intrinsic_instr, 4275 nir_instr_type_intrinsic, nir_instr_as_intrinsic) 4276NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref) 4277 4278bool nir_src_is_dynamically_uniform(nir_src src); 4279bool nir_srcs_equal(nir_src src1, nir_src src2); 4280bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2); 4281 4282static inline void 4283nir_instr_rewrite_src_ssa(ASSERTED nir_instr *instr, 4284 nir_src *src, nir_ssa_def *new_ssa) 4285{ 4286 assert(src->parent_instr == instr); 4287 assert(src->is_ssa && src->ssa); 4288 list_del(&src->use_link); 4289 src->ssa = new_ssa; 4290 list_addtail(&src->use_link, &new_ssa->uses); 4291} 4292 4293void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); 4294void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); 4295 4296static inline void 4297nir_if_rewrite_condition_ssa(ASSERTED nir_if *if_stmt, 4298 nir_src *src, nir_ssa_def *new_ssa) 4299{ 4300 assert(src->parent_if == if_stmt); 4301 assert(src->is_ssa && src->ssa); 4302 list_del(&src->use_link); 4303 src->ssa = new_ssa; 4304 list_addtail(&src->use_link, &new_ssa->if_uses); 4305} 4306 4307void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); 4308void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, 4309 nir_dest new_dest); 4310 4311void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, 4312 unsigned num_components, unsigned bit_size, 4313 const char *name); 4314void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, 4315 unsigned num_components, unsigned bit_size); 4316static inline void 4317nir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest, 4318 const struct glsl_type *type, 4319 const char *name) 4320{ 4321 assert(glsl_type_is_vector_or_scalar(type)); 4322 nir_ssa_dest_init(instr, dest, glsl_get_components(type), 4323 glsl_get_bit_size(type), name); 4324} 4325void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa); 4326void nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src); 4327void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa, 4328 nir_instr *after_me); 4329 4330nir_component_mask_t nir_src_components_read(const nir_src *src); 4331nir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def); 4332 4333static inline bool 4334nir_ssa_def_is_unused(nir_ssa_def *ssa) 4335{ 4336 return list_is_empty(&ssa->uses) && list_is_empty(&ssa->if_uses); 4337} 4338 4339 4340/** Returns the next block, disregarding structure 4341 * 4342 * The ordering is deterministic but has no guarantees beyond that. In 4343 * particular, it is not guaranteed to be dominance-preserving. 4344 */ 4345nir_block *nir_block_unstructured_next(nir_block *block); 4346nir_block *nir_unstructured_start_block(nir_function_impl *impl); 4347 4348#define nir_foreach_block_unstructured(block, impl) \ 4349 for (nir_block *block = nir_unstructured_start_block(impl); block != NULL; \ 4350 block = nir_block_unstructured_next(block)) 4351 4352#define nir_foreach_block_unstructured_safe(block, impl) \ 4353 for (nir_block *block = nir_unstructured_start_block(impl), \ 4354 *next = nir_block_unstructured_next(block); \ 4355 block != NULL; \ 4356 block = next, next = nir_block_unstructured_next(block)) 4357 4358/* 4359 * finds the next basic block in source-code order, returns NULL if there is 4360 * none 4361 */ 4362 4363nir_block *nir_block_cf_tree_next(nir_block *block); 4364 4365/* Performs the opposite of nir_block_cf_tree_next() */ 4366 4367nir_block *nir_block_cf_tree_prev(nir_block *block); 4368 4369/* Gets the first block in a CF node in source-code order */ 4370 4371nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node); 4372 4373/* Gets the last block in a CF node in source-code order */ 4374 4375nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node); 4376 4377/* Gets the next block after a CF node in source-code order */ 4378 4379nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node); 4380 4381/* Macros for loops that visit blocks in source-code order */ 4382 4383#define nir_foreach_block(block, impl) \ 4384 for (nir_block *block = nir_start_block(impl); block != NULL; \ 4385 block = nir_block_cf_tree_next(block)) 4386 4387#define nir_foreach_block_safe(block, impl) \ 4388 for (nir_block *block = nir_start_block(impl), \ 4389 *next = nir_block_cf_tree_next(block); \ 4390 block != NULL; \ 4391 block = next, next = nir_block_cf_tree_next(block)) 4392 4393#define nir_foreach_block_reverse(block, impl) \ 4394 for (nir_block *block = nir_impl_last_block(impl); block != NULL; \ 4395 block = nir_block_cf_tree_prev(block)) 4396 4397#define nir_foreach_block_reverse_safe(block, impl) \ 4398 for (nir_block *block = nir_impl_last_block(impl), \ 4399 *prev = nir_block_cf_tree_prev(block); \ 4400 block != NULL; \ 4401 block = prev, prev = nir_block_cf_tree_prev(block)) 4402 4403#define nir_foreach_block_in_cf_node(block, node) \ 4404 for (nir_block *block = nir_cf_node_cf_tree_first(node); \ 4405 block != nir_cf_node_cf_tree_next(node); \ 4406 block = nir_block_cf_tree_next(block)) 4407 4408/* If the following CF node is an if, this function returns that if. 4409 * Otherwise, it returns NULL. 4410 */ 4411nir_if *nir_block_get_following_if(nir_block *block); 4412 4413nir_loop *nir_block_get_following_loop(nir_block *block); 4414 4415nir_block **nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx); 4416 4417void nir_index_local_regs(nir_function_impl *impl); 4418void nir_index_ssa_defs(nir_function_impl *impl); 4419unsigned nir_index_instrs(nir_function_impl *impl); 4420 4421void nir_index_blocks(nir_function_impl *impl); 4422 4423unsigned nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes); 4424unsigned nir_function_impl_index_vars(nir_function_impl *impl); 4425 4426void nir_print_shader(nir_shader *shader, FILE *fp); 4427void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors); 4428void nir_print_instr(const nir_instr *instr, FILE *fp); 4429void nir_print_deref(const nir_deref_instr *deref, FILE *fp); 4430void nir_log_shader_annotated_tagged(enum mesa_log_level level, const char *tag, nir_shader *shader, struct hash_table *annotations); 4431#define nir_log_shadere(s) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), NULL) 4432#define nir_log_shaderw(s) nir_log_shader_annotated_tagged(MESA_LOG_WARN, (MESA_LOG_TAG), (s), NULL) 4433#define nir_log_shaderi(s) nir_log_shader_annotated_tagged(MESA_LOG_INFO, (MESA_LOG_TAG), (s), NULL) 4434#define nir_log_shader_annotated(s, annotations) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), annotations) 4435 4436char *nir_shader_as_str(nir_shader *nir, void *mem_ctx); 4437char *nir_shader_as_str_annotated(nir_shader *nir, struct hash_table *annotations, void *mem_ctx); 4438 4439/** Shallow clone of a single instruction. */ 4440nir_instr *nir_instr_clone(nir_shader *s, const nir_instr *orig); 4441 4442/** Shallow clone of a single ALU instruction. */ 4443nir_alu_instr *nir_alu_instr_clone(nir_shader *s, const nir_alu_instr *orig); 4444 4445nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s); 4446nir_function_impl *nir_function_impl_clone(nir_shader *shader, 4447 const nir_function_impl *fi); 4448nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var); 4449nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader); 4450 4451void nir_shader_replace(nir_shader *dest, nir_shader *src); 4452 4453void nir_shader_serialize_deserialize(nir_shader *s); 4454 4455#ifndef NDEBUG 4456void nir_validate_shader(nir_shader *shader, const char *when); 4457void nir_validate_ssa_dominance(nir_shader *shader, const char *when); 4458void nir_metadata_set_validation_flag(nir_shader *shader); 4459void nir_metadata_check_validation_flag(nir_shader *shader); 4460 4461static inline bool 4462should_skip_nir(const char *name) 4463{ 4464 static const char *list = NULL; 4465 if (!list) { 4466 /* Comma separated list of names to skip. */ 4467 list = getenv("NIR_SKIP"); 4468 if (!list) 4469 list = ""; 4470 } 4471 4472 if (!list[0]) 4473 return false; 4474 4475 return comma_separated_list_contains(list, name); 4476} 4477 4478static inline bool 4479should_clone_nir(void) 4480{ 4481 static int should_clone = -1; 4482 if (should_clone < 0) 4483 should_clone = env_var_as_boolean("NIR_TEST_CLONE", false); 4484 4485 return should_clone; 4486} 4487 4488static inline bool 4489should_serialize_deserialize_nir(void) 4490{ 4491 static int test_serialize = -1; 4492 if (test_serialize < 0) 4493 test_serialize = env_var_as_boolean("NIR_TEST_SERIALIZE", false); 4494 4495 return test_serialize; 4496} 4497 4498static inline bool 4499should_print_nir(nir_shader *shader) 4500{ 4501 static int should_print = -1; 4502 if (should_print < 0) 4503 should_print = env_var_as_unsigned("NIR_PRINT", 0); 4504 4505 if (should_print == 1) 4506 return !shader->info.internal; 4507 4508 return should_print; 4509} 4510#else 4511static inline void nir_validate_shader(nir_shader *shader, const char *when) { (void) shader; (void)when; } 4512static inline void nir_validate_ssa_dominance(nir_shader *shader, const char *when) { (void) shader; (void)when; } 4513static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; } 4514static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; } 4515static inline bool should_skip_nir(UNUSED const char *pass_name) { return false; } 4516static inline bool should_clone_nir(void) { return false; } 4517static inline bool should_serialize_deserialize_nir(void) { return false; } 4518static inline bool should_print_nir(nir_shader *shader) { return false; } 4519#endif /* NDEBUG */ 4520 4521#define _PASS(pass, nir, do_pass) do { \ 4522 if (should_skip_nir(#pass)) { \ 4523 printf("skipping %s\n", #pass); \ 4524 break; \ 4525 } \ 4526 do_pass \ 4527 if (should_clone_nir()) { \ 4528 nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ 4529 nir_shader_replace(nir, clone); \ 4530 } \ 4531 if (should_serialize_deserialize_nir()) { \ 4532 nir_shader_serialize_deserialize(nir); \ 4533 } \ 4534} while (0) 4535 4536#define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir, \ 4537 nir_metadata_set_validation_flag(nir); \ 4538 if (should_print_nir(nir)) \ 4539 printf("%s\n", #pass); \ 4540 if (pass(nir, ##__VA_ARGS__)) { \ 4541 nir_validate_shader(nir, "after " #pass); \ 4542 progress = true; \ 4543 if (should_print_nir(nir)) \ 4544 nir_print_shader(nir, stdout); \ 4545 nir_metadata_check_validation_flag(nir); \ 4546 } \ 4547) 4548 4549#define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir, \ 4550 if (should_print_nir(nir)) \ 4551 printf("%s\n", #pass); \ 4552 pass(nir, ##__VA_ARGS__); \ 4553 nir_validate_shader(nir, "after " #pass); \ 4554 if (should_print_nir(nir)) \ 4555 nir_print_shader(nir, stdout); \ 4556) 4557 4558#define NIR_SKIP(name) should_skip_nir(#name) 4559 4560/** An instruction filtering callback with writemask 4561 * 4562 * Returns true if the instruction should be processed with the associated 4563 * writemask and false otherwise. 4564 */ 4565typedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *, 4566 unsigned writemask, const void *); 4567 4568/** A simple instruction lowering callback 4569 * 4570 * Many instruction lowering passes can be written as a simple function which 4571 * takes an instruction as its input and returns a sequence of instructions 4572 * that implement the consumed instruction. This function type represents 4573 * such a lowering function. When called, a function with this prototype 4574 * should either return NULL indicating that no lowering needs to be done or 4575 * emit a sequence of instructions using the provided builder (whose cursor 4576 * will already be placed after the instruction to be lowered) and return the 4577 * resulting nir_ssa_def. 4578 */ 4579typedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *, 4580 nir_instr *, void *); 4581 4582/** 4583 * Special return value for nir_lower_instr_cb when some progress occurred 4584 * (like changing an input to the instr) that didn't result in a replacement 4585 * SSA def being generated. 4586 */ 4587#define NIR_LOWER_INSTR_PROGRESS ((nir_ssa_def *)(uintptr_t)1) 4588 4589/** 4590 * Special return value for nir_lower_instr_cb when some progress occurred 4591 * that should remove the current instruction that doesn't create an output 4592 * (like a store) 4593 */ 4594 4595#define NIR_LOWER_INSTR_PROGRESS_REPLACE ((nir_ssa_def *)(uintptr_t)2) 4596 4597/** Iterate over all the instructions in a nir_function_impl and lower them 4598 * using the provided callbacks 4599 * 4600 * This function implements the guts of a standard lowering pass for you. It 4601 * iterates over all of the instructions in a nir_function_impl and calls the 4602 * filter callback on each one. If the filter callback returns true, it then 4603 * calls the lowering call back on the instruction. (Splitting it this way 4604 * allows us to avoid some save/restore work for instructions we know won't be 4605 * lowered.) If the instruction is dead after the lowering is complete, it 4606 * will be removed. If new instructions are added, the lowering callback will 4607 * also be called on them in case multiple lowerings are required. 4608 * 4609 * If the callback indicates that the original instruction is replaced (either 4610 * through a new SSA def or NIR_LOWER_INSTR_PROGRESS_REPLACE), then the 4611 * instruction is removed along with any now-dead SSA defs it used. 4612 * 4613 * The metadata for the nir_function_impl will also be updated. If any blocks 4614 * are added (they cannot be removed), dominance and block indices will be 4615 * invalidated. 4616 */ 4617bool nir_function_impl_lower_instructions(nir_function_impl *impl, 4618 nir_instr_filter_cb filter, 4619 nir_lower_instr_cb lower, 4620 void *cb_data); 4621bool nir_shader_lower_instructions(nir_shader *shader, 4622 nir_instr_filter_cb filter, 4623 nir_lower_instr_cb lower, 4624 void *cb_data); 4625 4626void nir_calc_dominance_impl(nir_function_impl *impl); 4627void nir_calc_dominance(nir_shader *shader); 4628 4629nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); 4630bool nir_block_dominates(nir_block *parent, nir_block *child); 4631bool nir_block_is_unreachable(nir_block *block); 4632 4633void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); 4634void nir_dump_dom_tree(nir_shader *shader, FILE *fp); 4635 4636void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp); 4637void nir_dump_dom_frontier(nir_shader *shader, FILE *fp); 4638 4639void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp); 4640void nir_dump_cfg(nir_shader *shader, FILE *fp); 4641 4642void nir_gs_count_vertices_and_primitives(const nir_shader *shader, 4643 int *out_vtxcnt, 4644 int *out_prmcnt, 4645 unsigned num_streams); 4646 4647bool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes); 4648bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes); 4649bool nir_split_var_copies(nir_shader *shader); 4650bool nir_split_per_member_structs(nir_shader *shader); 4651bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes); 4652 4653bool nir_lower_returns_impl(nir_function_impl *impl); 4654bool nir_lower_returns(nir_shader *shader); 4655 4656void nir_inline_function_impl(struct nir_builder *b, 4657 const nir_function_impl *impl, 4658 nir_ssa_def **params, 4659 struct hash_table *shader_var_remap); 4660bool nir_inline_functions(nir_shader *shader); 4661 4662void nir_find_inlinable_uniforms(nir_shader *shader); 4663void nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms, 4664 const uint32_t *uniform_values, 4665 const uint16_t *uniform_dw_offsets); 4666 4667bool nir_propagate_invariant(nir_shader *shader, bool invariant_prim); 4668 4669void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader); 4670void nir_lower_deref_copy_instr(struct nir_builder *b, 4671 nir_intrinsic_instr *copy); 4672bool nir_lower_var_copies(nir_shader *shader); 4673 4674bool nir_opt_memcpy(nir_shader *shader); 4675bool nir_lower_memcpy(nir_shader *shader); 4676 4677void nir_fixup_deref_modes(nir_shader *shader); 4678 4679bool nir_lower_global_vars_to_local(nir_shader *shader); 4680 4681typedef enum { 4682 nir_lower_direct_array_deref_of_vec_load = (1 << 0), 4683 nir_lower_indirect_array_deref_of_vec_load = (1 << 1), 4684 nir_lower_direct_array_deref_of_vec_store = (1 << 2), 4685 nir_lower_indirect_array_deref_of_vec_store = (1 << 3), 4686} nir_lower_array_deref_of_vec_options; 4687 4688bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes, 4689 nir_lower_array_deref_of_vec_options options); 4690 4691bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes, 4692 uint32_t max_lower_array_len); 4693 4694bool nir_lower_indirect_builtin_uniform_derefs(nir_shader *shader); 4695 4696bool nir_lower_locals_to_regs(nir_shader *shader); 4697 4698void nir_lower_io_to_temporaries(nir_shader *shader, 4699 nir_function_impl *entrypoint, 4700 bool outputs, bool inputs); 4701 4702bool nir_lower_vars_to_scratch(nir_shader *shader, 4703 nir_variable_mode modes, 4704 int size_threshold, 4705 glsl_type_size_align_func size_align); 4706 4707void nir_lower_clip_halfz(nir_shader *shader); 4708 4709void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); 4710 4711void nir_gather_ssa_types(nir_function_impl *impl, 4712 BITSET_WORD *float_types, 4713 BITSET_WORD *int_types); 4714 4715void nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode, 4716 unsigned *size, 4717 int (*type_size)(const struct glsl_type *, bool)); 4718 4719/* Some helpers to do very simple linking */ 4720bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer); 4721bool nir_remove_unused_io_vars(nir_shader *shader, nir_variable_mode mode, 4722 uint64_t *used_by_other_stage, 4723 uint64_t *used_by_other_stage_patches); 4724void nir_compact_varyings(nir_shader *producer, nir_shader *consumer, 4725 bool default_to_smooth_interp); 4726void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer); 4727bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer); 4728void nir_link_varying_precision(nir_shader *producer, nir_shader *consumer); 4729 4730bool nir_lower_amul(nir_shader *shader, 4731 int (*type_size)(const struct glsl_type *, bool)); 4732 4733bool nir_lower_ubo_vec4(nir_shader *shader); 4734 4735void nir_assign_io_var_locations(nir_shader *shader, 4736 nir_variable_mode mode, 4737 unsigned *size, 4738 gl_shader_stage stage); 4739 4740typedef struct { 4741 uint8_t num_linked_io_vars; 4742 uint8_t num_linked_patch_io_vars; 4743} nir_linked_io_var_info; 4744 4745nir_linked_io_var_info 4746nir_assign_linked_io_var_locations(nir_shader *producer, 4747 nir_shader *consumer); 4748 4749typedef enum { 4750 /* If set, this causes all 64-bit IO operations to be lowered on-the-fly 4751 * to 32-bit operations. This is only valid for nir_var_shader_in/out 4752 * modes. 4753 */ 4754 nir_lower_io_lower_64bit_to_32 = (1 << 0), 4755 4756 /* If set, this forces all non-flat fragment shader inputs to be 4757 * interpolated as if with the "sample" qualifier. This requires 4758 * nir_shader_compiler_options::use_interpolated_input_intrinsics. 4759 */ 4760 nir_lower_io_force_sample_interpolation = (1 << 1), 4761} nir_lower_io_options; 4762bool nir_lower_io(nir_shader *shader, 4763 nir_variable_mode modes, 4764 int (*type_size)(const struct glsl_type *, bool), 4765 nir_lower_io_options); 4766 4767bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes); 4768 4769bool 4770nir_lower_vars_to_explicit_types(nir_shader *shader, 4771 nir_variable_mode modes, 4772 glsl_type_size_align_func type_info); 4773void 4774nir_gather_explicit_io_initializers(nir_shader *shader, 4775 void *dst, size_t dst_size, 4776 nir_variable_mode mode); 4777 4778bool nir_lower_vec3_to_vec4(nir_shader *shader, nir_variable_mode modes); 4779 4780typedef enum { 4781 /** 4782 * An address format which is a simple 32-bit global GPU address. 4783 */ 4784 nir_address_format_32bit_global, 4785 4786 /** 4787 * An address format which is a simple 64-bit global GPU address. 4788 */ 4789 nir_address_format_64bit_global, 4790 4791 /** 4792 * An address format which is a 64-bit global base address and a 32-bit 4793 * offset. 4794 * 4795 * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base 4796 * address stored with the low bits in .x and high bits in .y, .z is 4797 * undefined, and .w is an offset. This is intended to match 4798 * 64bit_bounded_global but without the bounds checking. 4799 */ 4800 nir_address_format_64bit_global_32bit_offset, 4801 4802 /** 4803 * An address format which is a bounds-checked 64-bit global GPU address. 4804 * 4805 * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base 4806 * address stored with the low bits in .x and high bits in .y, .z is a 4807 * size, and .w is an offset. When the final I/O operation is lowered, .w 4808 * is checked against .z and the operation is predicated on the result. 4809 */ 4810 nir_address_format_64bit_bounded_global, 4811 4812 /** 4813 * An address format which is comprised of a vec2 where the first 4814 * component is a buffer index and the second is an offset. 4815 */ 4816 nir_address_format_32bit_index_offset, 4817 4818 /** 4819 * An address format which is a 64-bit value, where the high 32 bits 4820 * are a buffer index, and the low 32 bits are an offset. 4821 */ 4822 nir_address_format_32bit_index_offset_pack64, 4823 4824 /** 4825 * An address format which is comprised of a vec3 where the first two 4826 * components specify the buffer and the third is an offset. 4827 */ 4828 nir_address_format_vec2_index_32bit_offset, 4829 4830 /** 4831 * An address format which represents generic pointers with a 62-bit 4832 * pointer and a 2-bit enum in the top two bits. The top two bits have 4833 * the following meanings: 4834 * 4835 * - 0x0: Global memory 4836 * - 0x1: Shared memory 4837 * - 0x2: Scratch memory 4838 * - 0x3: Global memory 4839 * 4840 * The redundancy between 0x0 and 0x3 is because of Intel sign-extension of 4841 * addresses. Valid global memory addresses may naturally have either 0 or 4842 * ~0 as their high bits. 4843 * 4844 * Shared and scratch pointers are represented as 32-bit offsets with the 4845 * top 32 bits only being used for the enum. This allows us to avoid 4846 * 64-bit address calculations in a bunch of cases. 4847 */ 4848 nir_address_format_62bit_generic, 4849 4850 /** 4851 * An address format which is a simple 32-bit offset. 4852 */ 4853 nir_address_format_32bit_offset, 4854 4855 /** 4856 * An address format which is a simple 32-bit offset cast to 64-bit. 4857 */ 4858 nir_address_format_32bit_offset_as_64bit, 4859 4860 /** 4861 * An address format representing a purely logical addressing model. In 4862 * this model, all deref chains must be complete from the dereference 4863 * operation to the variable. Cast derefs are not allowed. These 4864 * addresses will be 32-bit scalars but the format is immaterial because 4865 * you can always chase the chain. 4866 */ 4867 nir_address_format_logical, 4868} nir_address_format; 4869 4870static inline unsigned 4871nir_address_format_bit_size(nir_address_format addr_format) 4872{ 4873 switch (addr_format) { 4874 case nir_address_format_32bit_global: return 32; 4875 case nir_address_format_64bit_global: return 64; 4876 case nir_address_format_64bit_global_32bit_offset: return 32; 4877 case nir_address_format_64bit_bounded_global: return 32; 4878 case nir_address_format_32bit_index_offset: return 32; 4879 case nir_address_format_32bit_index_offset_pack64: return 64; 4880 case nir_address_format_vec2_index_32bit_offset: return 32; 4881 case nir_address_format_62bit_generic: return 64; 4882 case nir_address_format_32bit_offset: return 32; 4883 case nir_address_format_32bit_offset_as_64bit: return 64; 4884 case nir_address_format_logical: return 32; 4885 } 4886 unreachable("Invalid address format"); 4887} 4888 4889static inline unsigned 4890nir_address_format_num_components(nir_address_format addr_format) 4891{ 4892 switch (addr_format) { 4893 case nir_address_format_32bit_global: return 1; 4894 case nir_address_format_64bit_global: return 1; 4895 case nir_address_format_64bit_global_32bit_offset: return 4; 4896 case nir_address_format_64bit_bounded_global: return 4; 4897 case nir_address_format_32bit_index_offset: return 2; 4898 case nir_address_format_32bit_index_offset_pack64: return 1; 4899 case nir_address_format_vec2_index_32bit_offset: return 3; 4900 case nir_address_format_62bit_generic: return 1; 4901 case nir_address_format_32bit_offset: return 1; 4902 case nir_address_format_32bit_offset_as_64bit: return 1; 4903 case nir_address_format_logical: return 1; 4904 } 4905 unreachable("Invalid address format"); 4906} 4907 4908static inline const struct glsl_type * 4909nir_address_format_to_glsl_type(nir_address_format addr_format) 4910{ 4911 unsigned bit_size = nir_address_format_bit_size(addr_format); 4912 assert(bit_size == 32 || bit_size == 64); 4913 return glsl_vector_type(bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64, 4914 nir_address_format_num_components(addr_format)); 4915} 4916 4917const nir_const_value *nir_address_format_null_value(nir_address_format addr_format); 4918 4919nir_ssa_def *nir_build_addr_ieq(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 4920 nir_address_format addr_format); 4921 4922nir_ssa_def *nir_build_addr_isub(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 4923 nir_address_format addr_format); 4924 4925nir_ssa_def * nir_explicit_io_address_from_deref(struct nir_builder *b, 4926 nir_deref_instr *deref, 4927 nir_ssa_def *base_addr, 4928 nir_address_format addr_format); 4929 4930bool nir_get_explicit_deref_align(nir_deref_instr *deref, 4931 bool default_to_type_align, 4932 uint32_t *align_mul, 4933 uint32_t *align_offset); 4934 4935void nir_lower_explicit_io_instr(struct nir_builder *b, 4936 nir_intrinsic_instr *io_instr, 4937 nir_ssa_def *addr, 4938 nir_address_format addr_format); 4939 4940bool nir_lower_explicit_io(nir_shader *shader, 4941 nir_variable_mode modes, 4942 nir_address_format); 4943 4944bool 4945nir_lower_shader_calls(nir_shader *shader, 4946 nir_address_format address_format, 4947 unsigned stack_alignment, 4948 nir_shader ***resume_shaders_out, 4949 uint32_t *num_resume_shaders_out, 4950 void *mem_ctx); 4951 4952nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); 4953nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); 4954nir_src *nir_get_shader_call_payload_src(nir_intrinsic_instr *call); 4955 4956bool nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage); 4957 4958bool nir_lower_regs_to_ssa_impl(nir_function_impl *impl); 4959bool nir_lower_regs_to_ssa(nir_shader *shader); 4960bool nir_lower_vars_to_ssa(nir_shader *shader); 4961 4962bool nir_remove_dead_derefs(nir_shader *shader); 4963bool nir_remove_dead_derefs_impl(nir_function_impl *impl); 4964 4965typedef struct nir_remove_dead_variables_options { 4966 bool (*can_remove_var)(nir_variable *var, void *data); 4967 void *can_remove_var_data; 4968} nir_remove_dead_variables_options; 4969 4970bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes, 4971 const nir_remove_dead_variables_options *options); 4972 4973bool nir_lower_variable_initializers(nir_shader *shader, 4974 nir_variable_mode modes); 4975bool nir_zero_initialize_shared_memory(nir_shader *shader, 4976 const unsigned shared_size, 4977 const unsigned chunk_size); 4978 4979bool nir_move_vec_src_uses_to_dest(nir_shader *shader); 4980bool nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb, 4981 const void *_data); 4982void nir_lower_alpha_test(nir_shader *shader, enum compare_func func, 4983 bool alpha_to_one, 4984 const gl_state_index16 *alpha_ref_state_tokens); 4985bool nir_lower_alu(nir_shader *shader); 4986 4987bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask, 4988 bool always_precise); 4989 4990bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data); 4991bool nir_lower_bool_to_bitsize(nir_shader *shader); 4992bool nir_lower_bool_to_float(nir_shader *shader); 4993bool nir_lower_bool_to_int32(nir_shader *shader); 4994bool nir_opt_simplify_convert_alu_types(nir_shader *shader); 4995bool nir_lower_convert_alu_types(nir_shader *shader, 4996 bool (*should_lower)(nir_intrinsic_instr *)); 4997bool nir_lower_constant_convert_alu_types(nir_shader *shader); 4998bool nir_lower_alu_conversion_to_intrinsic(nir_shader *shader); 4999bool nir_lower_int_to_float(nir_shader *shader); 5000bool nir_lower_load_const_to_scalar(nir_shader *shader); 5001bool nir_lower_read_invocation_to_scalar(nir_shader *shader); 5002bool nir_lower_phis_to_scalar(nir_shader *shader, bool lower_all); 5003void nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer); 5004void nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader, 5005 bool outputs_only); 5006void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask); 5007bool nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask); 5008bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask); 5009bool nir_vectorize_tess_levels(nir_shader *shader); 5010 5011bool nir_lower_fragcolor(nir_shader *shader, unsigned max_cbufs); 5012bool nir_lower_fragcoord_wtrans(nir_shader *shader); 5013void nir_lower_viewport_transform(nir_shader *shader); 5014bool nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4); 5015 5016bool nir_lower_is_helper_invocation(nir_shader *shader); 5017 5018typedef struct nir_lower_subgroups_options { 5019 uint8_t subgroup_size; 5020 uint8_t ballot_bit_size; 5021 uint8_t ballot_components; 5022 bool lower_to_scalar:1; 5023 bool lower_vote_trivial:1; 5024 bool lower_vote_eq:1; 5025 bool lower_subgroup_masks:1; 5026 bool lower_shuffle:1; 5027 bool lower_shuffle_to_32bit:1; 5028 bool lower_shuffle_to_swizzle_amd:1; 5029 bool lower_quad:1; 5030 bool lower_quad_broadcast_dynamic:1; 5031 bool lower_quad_broadcast_dynamic_to_const:1; 5032 bool lower_elect:1; 5033 bool lower_read_invocation_to_cond:1; 5034} nir_lower_subgroups_options; 5035 5036bool nir_lower_subgroups(nir_shader *shader, 5037 const nir_lower_subgroups_options *options); 5038 5039bool nir_lower_system_values(nir_shader *shader); 5040 5041typedef struct nir_lower_compute_system_values_options { 5042 bool has_base_global_invocation_id:1; 5043 bool has_base_workgroup_id:1; 5044 bool shuffle_local_ids_for_quad_derivatives:1; 5045 bool lower_local_invocation_index:1; 5046} nir_lower_compute_system_values_options; 5047 5048bool nir_lower_compute_system_values(nir_shader *shader, 5049 const nir_lower_compute_system_values_options *options); 5050 5051struct nir_lower_sysvals_to_varyings_options { 5052 bool frag_coord:1; 5053 bool front_face:1; 5054 bool point_coord:1; 5055}; 5056 5057bool 5058nir_lower_sysvals_to_varyings(nir_shader *shader, 5059 const struct nir_lower_sysvals_to_varyings_options *options); 5060 5061enum PACKED nir_lower_tex_packing { 5062 /** No packing */ 5063 nir_lower_tex_packing_none = 0, 5064 /** 5065 * The sampler returns up to 2 32-bit words of half floats or 16-bit signed 5066 * or unsigned ints based on the sampler type 5067 */ 5068 nir_lower_tex_packing_16, 5069 /** The sampler returns 1 32-bit word of 4x8 unorm */ 5070 nir_lower_tex_packing_8, 5071}; 5072 5073typedef struct nir_lower_tex_options { 5074 /** 5075 * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which 5076 * sampler types a texture projector is lowered. 5077 */ 5078 unsigned lower_txp; 5079 5080 /** 5081 * If true, lower away nir_tex_src_offset for all texelfetch instructions. 5082 */ 5083 bool lower_txf_offset; 5084 5085 /** 5086 * If true, lower away nir_tex_src_offset for all rect textures. 5087 */ 5088 bool lower_rect_offset; 5089 5090 /** 5091 * If true, lower rect textures to 2D, using txs to fetch the 5092 * texture dimensions and dividing the texture coords by the 5093 * texture dims to normalize. 5094 */ 5095 bool lower_rect; 5096 5097 /** 5098 * If true, convert yuv to rgb. 5099 */ 5100 unsigned lower_y_uv_external; 5101 unsigned lower_y_u_v_external; 5102 unsigned lower_yx_xuxv_external; 5103 unsigned lower_xy_uxvx_external; 5104 unsigned lower_ayuv_external; 5105 unsigned lower_xyuv_external; 5106 unsigned lower_yuv_external; 5107 unsigned lower_yu_yv_external; 5108 unsigned lower_y41x_external; 5109 unsigned bt709_external; 5110 unsigned bt2020_external; 5111 5112 /** 5113 * To emulate certain texture wrap modes, this can be used 5114 * to saturate the specified tex coord to [0.0, 1.0]. The 5115 * bits are according to sampler #, ie. if, for example: 5116 * 5117 * (conf->saturate_s & (1 << n)) 5118 * 5119 * is true, then the s coord for sampler n is saturated. 5120 * 5121 * Note that clamping must happen *after* projector lowering 5122 * so any projected texture sample instruction with a clamped 5123 * coordinate gets automatically lowered, regardless of the 5124 * 'lower_txp' setting. 5125 */ 5126 unsigned saturate_s; 5127 unsigned saturate_t; 5128 unsigned saturate_r; 5129 5130 /* Bitmask of textures that need swizzling. 5131 * 5132 * If (swizzle_result & (1 << texture_index)), then the swizzle in 5133 * swizzles[texture_index] is applied to the result of the texturing 5134 * operation. 5135 */ 5136 unsigned swizzle_result; 5137 5138 /* A swizzle for each texture. Values 0-3 represent x, y, z, or w swizzles 5139 * while 4 and 5 represent 0 and 1 respectively. 5140 * 5141 * Indexed by texture-id. 5142 */ 5143 uint8_t swizzles[32][4]; 5144 5145 /* Can be used to scale sampled values in range required by the 5146 * format. 5147 * 5148 * Indexed by texture-id. 5149 */ 5150 float scale_factors[32]; 5151 5152 /** 5153 * Bitmap of textures that need srgb to linear conversion. If 5154 * (lower_srgb & (1 << texture_index)) then the rgb (xyz) components 5155 * of the texture are lowered to linear. 5156 */ 5157 unsigned lower_srgb; 5158 5159 /** 5160 * If true, lower nir_texop_txd on cube maps with nir_texop_txl. 5161 */ 5162 bool lower_txd_cube_map; 5163 5164 /** 5165 * If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl. 5166 */ 5167 bool lower_txd_3d; 5168 5169 /** 5170 * If true, lower nir_texop_txd on shadow samplers (except cube maps) 5171 * with nir_texop_txl. Notice that cube map shadow samplers are lowered 5172 * with lower_txd_cube_map. 5173 */ 5174 bool lower_txd_shadow; 5175 5176 /** 5177 * If true, lower nir_texop_txd on all samplers to a nir_texop_txl. 5178 * Implies lower_txd_cube_map and lower_txd_shadow. 5179 */ 5180 bool lower_txd; 5181 5182 /** 5183 * If true, lower nir_texop_txb that try to use shadow compare and min_lod 5184 * at the same time to a nir_texop_lod, some math, and nir_texop_tex. 5185 */ 5186 bool lower_txb_shadow_clamp; 5187 5188 /** 5189 * If true, lower nir_texop_txd on shadow samplers when it uses min_lod 5190 * with nir_texop_txl. This includes cube maps. 5191 */ 5192 bool lower_txd_shadow_clamp; 5193 5194 /** 5195 * If true, lower nir_texop_txd on when it uses both offset and min_lod 5196 * with nir_texop_txl. This includes cube maps. 5197 */ 5198 bool lower_txd_offset_clamp; 5199 5200 /** 5201 * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 5202 * sampler is bindless. 5203 */ 5204 bool lower_txd_clamp_bindless_sampler; 5205 5206 /** 5207 * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 5208 * sampler index is not statically determinable to be less than 16. 5209 */ 5210 bool lower_txd_clamp_if_sampler_index_not_lt_16; 5211 5212 /** 5213 * If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with 5214 * 0-lod followed by a nir_ishr. 5215 */ 5216 bool lower_txs_lod; 5217 5218 /** 5219 * If true, lower nir_texop_txs for cube arrays to a nir_texop_txs with a 5220 * 2D array type followed by a nir_idiv by 6. 5221 */ 5222 bool lower_txs_cube_array; 5223 5224 /** 5225 * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's 5226 * mixed-up tg4 locations. 5227 */ 5228 bool lower_tg4_broadcom_swizzle; 5229 5230 /** 5231 * If true, lowers tg4 with 4 constant offsets to 4 tg4 calls 5232 */ 5233 bool lower_tg4_offsets; 5234 5235 /** 5236 * Lower txf_ms to fragment_mask_fetch and fragment_fetch and samples_identical to 5237 * fragment_mask_fetch. 5238 */ 5239 bool lower_to_fragment_fetch_amd; 5240 5241 /** 5242 * To lower packed sampler return formats. 5243 * 5244 * Indexed by sampler-id. 5245 */ 5246 enum nir_lower_tex_packing lower_tex_packing[32]; 5247} nir_lower_tex_options; 5248 5249/** Lowers complex texture instructions to simpler ones */ 5250bool nir_lower_tex(nir_shader *shader, 5251 const nir_lower_tex_options *options); 5252 5253typedef struct nir_lower_image_options { 5254 /** 5255 * If true, lower cube size operations. 5256 */ 5257 bool lower_cube_size; 5258} nir_lower_image_options; 5259 5260bool nir_lower_image(nir_shader *nir, 5261 const nir_lower_image_options *options); 5262 5263bool nir_lower_readonly_images_to_tex(nir_shader *shader, bool per_variable); 5264 5265enum nir_lower_non_uniform_access_type { 5266 nir_lower_non_uniform_ubo_access = (1 << 0), 5267 nir_lower_non_uniform_ssbo_access = (1 << 1), 5268 nir_lower_non_uniform_texture_access = (1 << 2), 5269 nir_lower_non_uniform_image_access = (1 << 3), 5270}; 5271 5272/* Given the nir_src used for the resource, return the channels which might be non-uniform. */ 5273typedef nir_component_mask_t (*nir_lower_non_uniform_access_callback)(const nir_src *, void *); 5274 5275typedef struct nir_lower_non_uniform_access_options { 5276 enum nir_lower_non_uniform_access_type types; 5277 nir_lower_non_uniform_access_callback callback; 5278 void *callback_data; 5279} nir_lower_non_uniform_access_options; 5280 5281bool nir_lower_non_uniform_access(nir_shader *shader, 5282 const nir_lower_non_uniform_access_options *options); 5283 5284typedef struct { 5285 /* If true, a 32-bit division lowering based on NV50LegalizeSSA::handleDIV() 5286 * is used. It is the faster of the two but it is not exact in some cases 5287 * (for example, 1091317713u / 1034u gives 5209173 instead of 1055432). 5288 * 5289 * If false, a lowering based on AMDGPUTargetLowering::LowerUDIVREM() and 5290 * AMDGPUTargetLowering::LowerSDIVREM() is used. It requires more 5291 * instructions than the nv50 path and many of them are integer 5292 * multiplications, so it is probably slower. It should always return the 5293 * correct result, though. 5294 */ 5295 bool imprecise_32bit_lowering; 5296 5297 /* Whether 16-bit floating point arithmetic should be allowed in 8-bit 5298 * division lowering 5299 */ 5300 bool allow_fp16; 5301} nir_lower_idiv_options; 5302 5303bool nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options); 5304 5305typedef struct nir_input_attachment_options { 5306 bool use_fragcoord_sysval; 5307 bool use_layer_id_sysval; 5308 bool use_view_id_for_layer; 5309} nir_input_attachment_options; 5310 5311bool nir_lower_input_attachments(nir_shader *shader, 5312 const nir_input_attachment_options *options); 5313 5314bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, 5315 bool use_vars, 5316 bool use_clipdist_array, 5317 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 5318bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, 5319 bool use_clipdist_array, 5320 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 5321bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, 5322 bool use_clipdist_array); 5323bool nir_lower_clip_cull_distance_arrays(nir_shader *nir); 5324bool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable); 5325 5326void nir_lower_point_size_mov(nir_shader *shader, 5327 const gl_state_index16 *pointsize_state_tokens); 5328 5329bool nir_lower_frexp(nir_shader *nir); 5330 5331void nir_lower_two_sided_color(nir_shader *shader, bool face_sysval); 5332 5333bool nir_lower_clamp_color_outputs(nir_shader *shader); 5334 5335bool nir_lower_flatshade(nir_shader *shader); 5336 5337void nir_lower_passthrough_edgeflags(nir_shader *shader); 5338bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count, 5339 const gl_state_index16 *uniform_state_tokens); 5340 5341typedef struct nir_lower_wpos_ytransform_options { 5342 gl_state_index16 state_tokens[STATE_LENGTH]; 5343 bool fs_coord_origin_upper_left :1; 5344 bool fs_coord_origin_lower_left :1; 5345 bool fs_coord_pixel_center_integer :1; 5346 bool fs_coord_pixel_center_half_integer :1; 5347} nir_lower_wpos_ytransform_options; 5348 5349bool nir_lower_wpos_ytransform(nir_shader *shader, 5350 const nir_lower_wpos_ytransform_options *options); 5351bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading); 5352 5353bool nir_lower_pntc_ytransform(nir_shader *shader, 5354 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 5355 5356bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data); 5357 5358bool nir_lower_fb_read(nir_shader *shader); 5359 5360typedef struct nir_lower_drawpixels_options { 5361 gl_state_index16 texcoord_state_tokens[STATE_LENGTH]; 5362 gl_state_index16 scale_state_tokens[STATE_LENGTH]; 5363 gl_state_index16 bias_state_tokens[STATE_LENGTH]; 5364 unsigned drawpix_sampler; 5365 unsigned pixelmap_sampler; 5366 bool pixel_maps :1; 5367 bool scale_and_bias :1; 5368} nir_lower_drawpixels_options; 5369 5370void nir_lower_drawpixels(nir_shader *shader, 5371 const nir_lower_drawpixels_options *options); 5372 5373typedef struct nir_lower_bitmap_options { 5374 unsigned sampler; 5375 bool swizzle_xxxx; 5376} nir_lower_bitmap_options; 5377 5378void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options); 5379 5380bool nir_lower_atomics_to_ssbo(nir_shader *shader); 5381 5382typedef enum { 5383 nir_lower_int_source_mods = 1 << 0, 5384 nir_lower_float_source_mods = 1 << 1, 5385 nir_lower_64bit_source_mods = 1 << 2, 5386 nir_lower_triop_abs = 1 << 3, 5387 nir_lower_all_source_mods = (1 << 4) - 1 5388} nir_lower_to_source_mods_flags; 5389 5390 5391bool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options); 5392 5393typedef enum { 5394 nir_lower_gs_intrinsics_per_stream = 1 << 0, 5395 nir_lower_gs_intrinsics_count_primitives = 1 << 1, 5396 nir_lower_gs_intrinsics_count_vertices_per_primitive = 1 << 2, 5397 nir_lower_gs_intrinsics_overwrite_incomplete = 1 << 3, 5398} nir_lower_gs_intrinsics_flags; 5399 5400bool nir_lower_gs_intrinsics(nir_shader *shader, nir_lower_gs_intrinsics_flags options); 5401 5402typedef unsigned (*nir_lower_bit_size_callback)(const nir_instr *, void *); 5403 5404bool nir_lower_bit_size(nir_shader *shader, 5405 nir_lower_bit_size_callback callback, 5406 void *callback_data); 5407bool nir_lower_64bit_phis(nir_shader *shader); 5408 5409nir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode); 5410bool nir_lower_int64(nir_shader *shader); 5411 5412nir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode); 5413bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64, 5414 nir_lower_doubles_options options); 5415bool nir_lower_pack(nir_shader *shader); 5416 5417bool nir_recompute_io_bases(nir_function_impl *impl, nir_variable_mode modes); 5418bool nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes, 5419 uint64_t varying_mask, bool use_16bit_slots); 5420bool nir_force_mediump_io(nir_shader *nir, nir_variable_mode modes, 5421 nir_alu_type types); 5422bool nir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes); 5423bool nir_fold_16bit_sampler_conversions(nir_shader *nir, 5424 unsigned tex_src_types); 5425 5426typedef struct { 5427 bool legalize_type; /* whether this src should be legalized */ 5428 uint8_t bit_size; /* bit_size to enforce */ 5429 nir_tex_src_type match_src; /* if bit_size is 0, match bit size of this */ 5430} nir_tex_src_type_constraint, nir_tex_src_type_constraints[nir_num_tex_src_types]; 5431 5432bool nir_legalize_16bit_sampler_srcs(nir_shader *nir, 5433 nir_tex_src_type_constraints constraints); 5434 5435bool nir_lower_point_size(nir_shader *shader, float min, float max); 5436 5437void nir_lower_texcoord_replace(nir_shader *s, unsigned coord_replace, 5438 bool point_coord_is_sysval, bool yinvert); 5439 5440typedef enum { 5441 nir_lower_interpolation_at_sample = (1 << 1), 5442 nir_lower_interpolation_at_offset = (1 << 2), 5443 nir_lower_interpolation_centroid = (1 << 3), 5444 nir_lower_interpolation_pixel = (1 << 4), 5445 nir_lower_interpolation_sample = (1 << 5), 5446} nir_lower_interpolation_options; 5447 5448bool nir_lower_interpolation(nir_shader *shader, 5449 nir_lower_interpolation_options options); 5450 5451bool nir_lower_discard_or_demote(nir_shader *shader, 5452 bool force_correct_quad_ops_after_discard); 5453 5454bool nir_lower_memory_model(nir_shader *shader); 5455 5456bool nir_lower_goto_ifs(nir_shader *shader); 5457 5458bool nir_shader_uses_view_index(nir_shader *shader); 5459bool nir_can_lower_multiview(nir_shader *shader); 5460bool nir_lower_multiview(nir_shader *shader, uint32_t view_mask); 5461 5462 5463bool nir_lower_fp16_casts(nir_shader *shader); 5464bool nir_normalize_cubemap_coords(nir_shader *shader); 5465 5466bool nir_shader_supports_implicit_lod(nir_shader *shader); 5467 5468void nir_live_ssa_defs_impl(nir_function_impl *impl); 5469 5470const BITSET_WORD *nir_get_live_ssa_defs(nir_cursor cursor, void *mem_ctx); 5471 5472void nir_loop_analyze_impl(nir_function_impl *impl, 5473 nir_variable_mode indirect_mask); 5474 5475bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); 5476 5477bool nir_repair_ssa_impl(nir_function_impl *impl); 5478bool nir_repair_ssa(nir_shader *shader); 5479 5480void nir_convert_loop_to_lcssa(nir_loop *loop); 5481bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants); 5482void nir_divergence_analysis(nir_shader *shader); 5483bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr); 5484 5485/* If phi_webs_only is true, only convert SSA values involved in phi nodes to 5486 * registers. If false, convert all values (even those not involved in a phi 5487 * node) to registers. 5488 */ 5489bool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only); 5490 5491bool nir_lower_phis_to_regs_block(nir_block *block); 5492bool nir_lower_ssa_defs_to_regs_block(nir_block *block); 5493bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl); 5494 5495bool nir_lower_samplers(nir_shader *shader); 5496bool nir_lower_ssbo(nir_shader *shader); 5497 5498typedef struct nir_lower_printf_options { 5499 bool treat_doubles_as_floats : 1; 5500 unsigned max_buffer_size; 5501} nir_lower_printf_options; 5502 5503bool nir_lower_printf(nir_shader *nir, const nir_lower_printf_options *options); 5504 5505/* This is here for unit tests. */ 5506bool nir_opt_comparison_pre_impl(nir_function_impl *impl); 5507 5508bool nir_opt_comparison_pre(nir_shader *shader); 5509 5510typedef struct nir_opt_access_options { 5511 bool is_vulkan; 5512 bool infer_non_readable; 5513} nir_opt_access_options; 5514 5515bool nir_opt_access(nir_shader *shader, const nir_opt_access_options *options); 5516bool nir_opt_algebraic(nir_shader *shader); 5517bool nir_opt_algebraic_before_ffma(nir_shader *shader); 5518bool nir_opt_algebraic_late(nir_shader *shader); 5519bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader); 5520bool nir_opt_constant_folding(nir_shader *shader); 5521 5522/* Try to combine a and b into a. Return true if combination was possible, 5523 * which will result in b being removed by the pass. Return false if 5524 * combination wasn't possible. 5525 */ 5526typedef bool (*nir_combine_memory_barrier_cb)( 5527 nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data); 5528 5529bool nir_opt_combine_memory_barriers(nir_shader *shader, 5530 nir_combine_memory_barrier_cb combine_cb, 5531 void *data); 5532 5533bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes); 5534 5535bool nir_copy_prop_impl(nir_function_impl *impl); 5536bool nir_copy_prop(nir_shader *shader); 5537 5538bool nir_opt_copy_prop_vars(nir_shader *shader); 5539 5540bool nir_opt_cse(nir_shader *shader); 5541 5542bool nir_opt_dce(nir_shader *shader); 5543 5544bool nir_opt_dead_cf(nir_shader *shader); 5545 5546bool nir_opt_dead_write_vars(nir_shader *shader); 5547 5548bool nir_opt_deref_impl(nir_function_impl *impl); 5549bool nir_opt_deref(nir_shader *shader); 5550 5551bool nir_opt_find_array_copies(nir_shader *shader); 5552 5553bool nir_opt_fragdepth(nir_shader *shader); 5554 5555bool nir_opt_gcm(nir_shader *shader, bool value_number); 5556 5557bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size); 5558 5559bool nir_opt_if(nir_shader *shader, bool aggressive_last_continue); 5560 5561bool nir_opt_intrinsics(nir_shader *shader); 5562 5563bool nir_opt_large_constants(nir_shader *shader, 5564 glsl_type_size_align_func size_align, 5565 unsigned threshold); 5566 5567bool nir_opt_loop_unroll(nir_shader *shader); 5568 5569typedef enum { 5570 nir_move_const_undef = (1 << 0), 5571 nir_move_load_ubo = (1 << 1), 5572 nir_move_load_input = (1 << 2), 5573 nir_move_comparisons = (1 << 3), 5574 nir_move_copies = (1 << 4), 5575 nir_move_load_ssbo = (1 << 5), 5576} nir_move_options; 5577 5578bool nir_can_move_instr(nir_instr *instr, nir_move_options options); 5579 5580bool nir_opt_sink(nir_shader *shader, nir_move_options options); 5581 5582bool nir_opt_move(nir_shader *shader, nir_move_options options); 5583 5584bool nir_opt_offsets(nir_shader *shader); 5585 5586bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, 5587 bool indirect_load_ok, bool expensive_alu_ok); 5588 5589bool nir_opt_rematerialize_compares(nir_shader *shader); 5590 5591bool nir_opt_remove_phis(nir_shader *shader); 5592bool nir_opt_remove_phis_block(nir_block *block); 5593 5594bool nir_opt_phi_precision(nir_shader *shader); 5595 5596bool nir_opt_shrink_vectors(nir_shader *shader, bool shrink_image_store); 5597 5598bool nir_opt_trivial_continues(nir_shader *shader); 5599 5600bool nir_opt_undef(nir_shader *shader); 5601 5602bool nir_lower_undef_to_zero(nir_shader *shader); 5603 5604bool nir_opt_uniform_atomics(nir_shader *shader); 5605 5606typedef bool (*nir_opt_vectorize_cb)(const nir_instr *instr, void *data); 5607 5608bool nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter, 5609 void *data); 5610 5611bool nir_opt_conditional_discard(nir_shader *shader); 5612bool nir_opt_move_discards_to_top(nir_shader *shader); 5613 5614typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul, 5615 unsigned align_offset, 5616 unsigned bit_size, 5617 unsigned num_components, 5618 nir_intrinsic_instr *low, nir_intrinsic_instr *high, 5619 void *data); 5620 5621typedef struct { 5622 nir_should_vectorize_mem_func callback; 5623 nir_variable_mode modes; 5624 nir_variable_mode robust_modes; 5625 void *cb_data; 5626} nir_load_store_vectorize_options; 5627 5628bool nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_options *options); 5629 5630void nir_sweep(nir_shader *shader); 5631 5632void nir_remap_dual_slot_attributes(nir_shader *shader, 5633 uint64_t *dual_slot_inputs); 5634uint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot); 5635 5636nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val); 5637gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin); 5638 5639static inline bool 5640nir_variable_is_in_ubo(const nir_variable *var) 5641{ 5642 return (var->data.mode == nir_var_mem_ubo && 5643 var->interface_type != NULL); 5644} 5645 5646static inline bool 5647nir_variable_is_in_ssbo(const nir_variable *var) 5648{ 5649 return (var->data.mode == nir_var_mem_ssbo && 5650 var->interface_type != NULL); 5651} 5652 5653static inline bool 5654nir_variable_is_in_block(const nir_variable *var) 5655{ 5656 return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var); 5657} 5658 5659typedef struct nir_unsigned_upper_bound_config { 5660 unsigned min_subgroup_size; 5661 unsigned max_subgroup_size; 5662 unsigned max_workgroup_invocations; 5663 unsigned max_workgroup_count[3]; 5664 unsigned max_workgroup_size[3]; 5665 5666 uint32_t vertex_attrib_max[32]; 5667} nir_unsigned_upper_bound_config; 5668 5669uint32_t 5670nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, 5671 nir_ssa_scalar scalar, 5672 const nir_unsigned_upper_bound_config *config); 5673 5674bool 5675nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht, 5676 nir_ssa_scalar ssa, unsigned const_val, 5677 const nir_unsigned_upper_bound_config *config); 5678 5679#include "nir_inline_helpers.h" 5680 5681#ifdef __cplusplus 5682} /* extern "C" */ 5683#endif 5684 5685#endif /* NIR_H */ 5686