1/* 2 * Copyright © 2014 Connor Abbott 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * 26 */ 27 28#ifndef NIR_H 29#define NIR_H 30 31#include "util/hash_table.h" 32#include "compiler/glsl/list.h" 33#include "GL/gl.h" /* GLenum */ 34#include "util/list.h" 35#include "util/ralloc.h" 36#include "util/set.h" 37#include "util/bitscan.h" 38#include "util/bitset.h" 39#include "util/macros.h" 40#include "compiler/nir_types.h" 41#include "compiler/shader_enums.h" 42#include "compiler/shader_info.h" 43#include <stdio.h> 44 45#ifndef NDEBUG 46#include "util/debug.h" 47#endif /* NDEBUG */ 48 49#include "nir_opcodes.h" 50 51#if defined(_WIN32) && !defined(snprintf) 52#define snprintf _snprintf 53#endif 54 55#ifdef __cplusplus 56extern "C" { 57#endif 58 59#define NIR_FALSE 0u 60#define NIR_TRUE (~0u) 61#define NIR_MAX_VEC_COMPONENTS 4 62#define NIR_MAX_MATRIX_COLUMNS 4 63typedef uint8_t nir_component_mask_t; 64 65/** Defines a cast function 66 * 67 * This macro defines a cast function from in_type to out_type where 68 * out_type is some structure type that contains a field of type out_type. 69 * 70 * Note that you have to be a bit careful as the generated cast function 71 * destroys constness. 72 */ 73#define NIR_DEFINE_CAST(name, in_type, out_type, field, \ 74 type_field, type_value) \ 75static inline out_type * \ 76name(const in_type *parent) \ 77{ \ 78 assert(parent && parent->type_field == type_value); \ 79 return exec_node_data(out_type, parent, field); \ 80} 81 82struct nir_function; 83struct nir_shader; 84struct nir_instr; 85struct nir_builder; 86 87 88/** 89 * Description of built-in state associated with a uniform 90 * 91 * \sa nir_variable::state_slots 92 */ 93typedef struct { 94 gl_state_index16 tokens[STATE_LENGTH]; 95 int swizzle; 96} nir_state_slot; 97 98typedef enum { 99 nir_var_shader_in = (1 << 0), 100 nir_var_shader_out = (1 << 1), 101 nir_var_shader_temp = (1 << 2), 102 nir_var_function_temp = (1 << 3), 103 nir_var_uniform = (1 << 4), 104 nir_var_mem_ubo = (1 << 5), 105 nir_var_system_value = (1 << 6), 106 nir_var_mem_ssbo = (1 << 7), 107 nir_var_mem_shared = (1 << 8), 108 nir_var_mem_global = (1 << 9), 109 nir_var_all = ~0, 110} nir_variable_mode; 111 112/** 113 * Rounding modes. 114 */ 115typedef enum { 116 nir_rounding_mode_undef = 0, 117 nir_rounding_mode_rtne = 1, /* round to nearest even */ 118 nir_rounding_mode_ru = 2, /* round up */ 119 nir_rounding_mode_rd = 3, /* round down */ 120 nir_rounding_mode_rtz = 4, /* round towards zero */ 121} nir_rounding_mode; 122 123typedef union { 124 bool b; 125 float f32; 126 double f64; 127 int8_t i8; 128 uint8_t u8; 129 int16_t i16; 130 uint16_t u16; 131 int32_t i32; 132 uint32_t u32; 133 int64_t i64; 134 uint64_t u64; 135} nir_const_value; 136 137#define nir_const_value_to_array(arr, c, components, m) \ 138{ \ 139 for (unsigned i = 0; i < components; ++i) \ 140 arr[i] = c[i].m; \ 141} while (false) 142 143static inline nir_const_value 144nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size) 145{ 146 nir_const_value v; 147 memset(&v, 0, sizeof(v)); 148 149 switch (bit_size) { 150 case 1: v.b = x; break; 151 case 8: v.u8 = x; break; 152 case 16: v.u16 = x; break; 153 case 32: v.u32 = x; break; 154 case 64: v.u64 = x; break; 155 default: 156 unreachable("Invalid bit size"); 157 } 158 159 return v; 160} 161 162static inline nir_const_value 163nir_const_value_for_int(int64_t i, unsigned bit_size) 164{ 165 nir_const_value v; 166 memset(&v, 0, sizeof(v)); 167 168 assert(bit_size <= 64); 169 if (bit_size < 64) { 170 assert(i >= (-(1ll << (bit_size - 1)))); 171 assert(i < (1ll << (bit_size - 1))); 172 } 173 174 return nir_const_value_for_raw_uint(i, bit_size); 175} 176 177static inline nir_const_value 178nir_const_value_for_uint(uint64_t u, unsigned bit_size) 179{ 180 nir_const_value v; 181 memset(&v, 0, sizeof(v)); 182 183 assert(bit_size <= 64); 184 if (bit_size < 64) 185 assert(u < (1ull << bit_size)); 186 187 return nir_const_value_for_raw_uint(u, bit_size); 188} 189 190static inline nir_const_value 191nir_const_value_for_bool(bool b, unsigned bit_size) 192{ 193 /* Booleans use a 0/-1 convention */ 194 return nir_const_value_for_int(-(int)b, bit_size); 195} 196 197/* This one isn't inline because it requires half-float conversion */ 198nir_const_value nir_const_value_for_float(double b, unsigned bit_size); 199 200static inline int64_t 201nir_const_value_as_int(nir_const_value value, unsigned bit_size) 202{ 203 switch (bit_size) { 204 /* int1_t uses 0/-1 convention */ 205 case 1: return -(int)value.b; 206 case 8: return value.i8; 207 case 16: return value.i16; 208 case 32: return value.i32; 209 case 64: return value.i64; 210 default: 211 unreachable("Invalid bit size"); 212 } 213} 214 215static inline int64_t 216nir_const_value_as_uint(nir_const_value value, unsigned bit_size) 217{ 218 switch (bit_size) { 219 case 1: return value.b; 220 case 8: return value.u8; 221 case 16: return value.u16; 222 case 32: return value.u32; 223 case 64: return value.u64; 224 default: 225 unreachable("Invalid bit size"); 226 } 227} 228 229static inline bool 230nir_const_value_as_bool(nir_const_value value, unsigned bit_size) 231{ 232 int64_t i = nir_const_value_as_int(value, bit_size); 233 234 /* Booleans of any size use 0/-1 convention */ 235 assert(i == 0 || i == -1); 236 237 return i; 238} 239 240/* This one isn't inline because it requires half-float conversion */ 241double nir_const_value_as_float(nir_const_value value, unsigned bit_size); 242 243typedef struct nir_constant { 244 /** 245 * Value of the constant. 246 * 247 * The field used to back the values supplied by the constant is determined 248 * by the type associated with the \c nir_variable. Constants may be 249 * scalars, vectors, or matrices. 250 */ 251 nir_const_value values[NIR_MAX_MATRIX_COLUMNS][NIR_MAX_VEC_COMPONENTS]; 252 253 /* we could get this from the var->type but makes clone *much* easier to 254 * not have to care about the type. 255 */ 256 unsigned num_elements; 257 258 /* Array elements / Structure Fields */ 259 struct nir_constant **elements; 260} nir_constant; 261 262/** 263 * \brief Layout qualifiers for gl_FragDepth. 264 * 265 * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared 266 * with a layout qualifier. 267 */ 268typedef enum { 269 nir_depth_layout_none, /**< No depth layout is specified. */ 270 nir_depth_layout_any, 271 nir_depth_layout_greater, 272 nir_depth_layout_less, 273 nir_depth_layout_unchanged 274} nir_depth_layout; 275 276/** 277 * Enum keeping track of how a variable was declared. 278 */ 279typedef enum { 280 /** 281 * Normal declaration. 282 */ 283 nir_var_declared_normally = 0, 284 285 /** 286 * Variable is implicitly generated by the compiler and should not be 287 * visible via the API. 288 */ 289 nir_var_hidden, 290} nir_var_declaration_type; 291 292/** 293 * Either a uniform, global variable, shader input, or shader output. Based on 294 * ir_variable - it should be easy to translate between the two. 295 */ 296 297typedef struct nir_variable { 298 struct exec_node node; 299 300 /** 301 * Declared type of the variable 302 */ 303 const struct glsl_type *type; 304 305 /** 306 * Declared name of the variable 307 */ 308 char *name; 309 310 struct nir_variable_data { 311 /** 312 * Storage class of the variable. 313 * 314 * \sa nir_variable_mode 315 */ 316 nir_variable_mode mode; 317 318 /** 319 * Is the variable read-only? 320 * 321 * This is set for variables declared as \c const, shader inputs, 322 * and uniforms. 323 */ 324 unsigned read_only:1; 325 unsigned centroid:1; 326 unsigned sample:1; 327 unsigned patch:1; 328 unsigned invariant:1; 329 330 /** 331 * When separate shader programs are enabled, only input/outputs between 332 * the stages of a multi-stage separate program can be safely removed 333 * from the shader interface. Other input/outputs must remains active. 334 * 335 * This is also used to make sure xfb varyings that are unused by the 336 * fragment shader are not removed. 337 */ 338 unsigned always_active_io:1; 339 340 /** 341 * Interpolation mode for shader inputs / outputs 342 * 343 * \sa glsl_interp_mode 344 */ 345 unsigned interpolation:2; 346 347 /** 348 * If non-zero, then this variable may be packed along with other variables 349 * into a single varying slot, so this offset should be applied when 350 * accessing components. For example, an offset of 1 means that the x 351 * component of this variable is actually stored in component y of the 352 * location specified by \c location. 353 */ 354 unsigned location_frac:2; 355 356 /** 357 * If true, this variable represents an array of scalars that should 358 * be tightly packed. In other words, consecutive array elements 359 * should be stored one component apart, rather than one slot apart. 360 */ 361 unsigned compact:1; 362 363 /** 364 * Whether this is a fragment shader output implicitly initialized with 365 * the previous contents of the specified render target at the 366 * framebuffer location corresponding to this shader invocation. 367 */ 368 unsigned fb_fetch_output:1; 369 370 /** 371 * Non-zero if this variable is considered bindless as defined by 372 * ARB_bindless_texture. 373 */ 374 unsigned bindless:1; 375 376 /** 377 * Was an explicit binding set in the shader? 378 */ 379 unsigned explicit_binding:1; 380 381 /** 382 * Was a transfer feedback buffer set in the shader? 383 */ 384 unsigned explicit_xfb_buffer:1; 385 386 /** 387 * Was a transfer feedback stride set in the shader? 388 */ 389 unsigned explicit_xfb_stride:1; 390 391 /** 392 * Was an explicit offset set in the shader? 393 */ 394 unsigned explicit_offset:1; 395 396 /** 397 * \brief Layout qualifier for gl_FragDepth. 398 * 399 * This is not equal to \c ir_depth_layout_none if and only if this 400 * variable is \c gl_FragDepth and a layout qualifier is specified. 401 */ 402 nir_depth_layout depth_layout; 403 404 /** 405 * Storage location of the base of this variable 406 * 407 * The precise meaning of this field depends on the nature of the variable. 408 * 409 * - Vertex shader input: one of the values from \c gl_vert_attrib. 410 * - Vertex shader output: one of the values from \c gl_varying_slot. 411 * - Geometry shader input: one of the values from \c gl_varying_slot. 412 * - Geometry shader output: one of the values from \c gl_varying_slot. 413 * - Fragment shader input: one of the values from \c gl_varying_slot. 414 * - Fragment shader output: one of the values from \c gl_frag_result. 415 * - Uniforms: Per-stage uniform slot number for default uniform block. 416 * - Uniforms: Index within the uniform block definition for UBO members. 417 * - Non-UBO Uniforms: uniform slot number. 418 * - Other: This field is not currently used. 419 * 420 * If the variable is a uniform, shader input, or shader output, and the 421 * slot has not been assigned, the value will be -1. 422 */ 423 int location; 424 425 /** 426 * The actual location of the variable in the IR. Only valid for inputs 427 * and outputs. 428 */ 429 unsigned int driver_location; 430 431 /** 432 * Vertex stream output identifier. 433 * 434 * For packed outputs, bit 31 is set and bits [2*i+1,2*i] indicate the 435 * stream of the i-th component. 436 */ 437 unsigned stream; 438 439 /** 440 * output index for dual source blending. 441 */ 442 int index; 443 444 /** 445 * Descriptor set binding for sampler or UBO. 446 */ 447 int descriptor_set; 448 449 /** 450 * Initial binding point for a sampler or UBO. 451 * 452 * For array types, this represents the binding point for the first element. 453 */ 454 int binding; 455 456 /** 457 * Location an atomic counter or transform feedback is stored at. 458 */ 459 unsigned offset; 460 461 /** 462 * Transform feedback buffer. 463 */ 464 unsigned xfb_buffer; 465 466 /** 467 * Transform feedback stride. 468 */ 469 unsigned xfb_stride; 470 471 /** 472 * How the variable was declared. See nir_var_declaration_type. 473 * 474 * This is used to detect variables generated by the compiler, so should 475 * not be visible via the API. 476 */ 477 unsigned how_declared:2; 478 479 /** 480 * ARB_shader_image_load_store qualifiers. 481 */ 482 struct { 483 enum gl_access_qualifier access; 484 485 /** Image internal format if specified explicitly, otherwise GL_NONE. */ 486 GLenum format; 487 } image; 488 } data; 489 490 /** 491 * Built-in state that backs this uniform 492 * 493 * Once set at variable creation, \c state_slots must remain invariant. 494 * This is because, ideally, this array would be shared by all clones of 495 * this variable in the IR tree. In other words, we'd really like for it 496 * to be a fly-weight. 497 * 498 * If the variable is not a uniform, \c num_state_slots will be zero and 499 * \c state_slots will be \c NULL. 500 */ 501 /*@{*/ 502 unsigned num_state_slots; /**< Number of state slots used */ 503 nir_state_slot *state_slots; /**< State descriptors. */ 504 /*@}*/ 505 506 /** 507 * Constant expression assigned in the initializer of the variable 508 * 509 * This field should only be used temporarily by creators of NIR shaders 510 * and then lower_constant_initializers can be used to get rid of them. 511 * Most of the rest of NIR ignores this field or asserts that it's NULL. 512 */ 513 nir_constant *constant_initializer; 514 515 /** 516 * For variables that are in an interface block or are an instance of an 517 * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. 518 * 519 * \sa ir_variable::location 520 */ 521 const struct glsl_type *interface_type; 522 523 /** 524 * Description of per-member data for per-member struct variables 525 * 526 * This is used for variables which are actually an amalgamation of 527 * multiple entities such as a struct of built-in values or a struct of 528 * inputs each with their own layout specifier. This is only allowed on 529 * variables with a struct or array of array of struct type. 530 */ 531 unsigned num_members; 532 struct nir_variable_data *members; 533} nir_variable; 534 535#define nir_foreach_variable(var, var_list) \ 536 foreach_list_typed(nir_variable, var, node, var_list) 537 538#define nir_foreach_variable_safe(var, var_list) \ 539 foreach_list_typed_safe(nir_variable, var, node, var_list) 540 541static inline bool 542nir_variable_is_global(const nir_variable *var) 543{ 544 return var->data.mode != nir_var_function_temp; 545} 546 547typedef struct nir_register { 548 struct exec_node node; 549 550 unsigned num_components; /** < number of vector components */ 551 unsigned num_array_elems; /** < size of array (0 for no array) */ 552 553 /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 554 uint8_t bit_size; 555 556 /** generic register index. */ 557 unsigned index; 558 559 /** only for debug purposes, can be NULL */ 560 const char *name; 561 562 /** set of nir_srcs where this register is used (read from) */ 563 struct list_head uses; 564 565 /** set of nir_dests where this register is defined (written to) */ 566 struct list_head defs; 567 568 /** set of nir_ifs where this register is used as a condition */ 569 struct list_head if_uses; 570} nir_register; 571 572#define nir_foreach_register(reg, reg_list) \ 573 foreach_list_typed(nir_register, reg, node, reg_list) 574#define nir_foreach_register_safe(reg, reg_list) \ 575 foreach_list_typed_safe(nir_register, reg, node, reg_list) 576 577typedef enum PACKED { 578 nir_instr_type_alu, 579 nir_instr_type_deref, 580 nir_instr_type_call, 581 nir_instr_type_tex, 582 nir_instr_type_intrinsic, 583 nir_instr_type_load_const, 584 nir_instr_type_jump, 585 nir_instr_type_ssa_undef, 586 nir_instr_type_phi, 587 nir_instr_type_parallel_copy, 588} nir_instr_type; 589 590typedef struct nir_instr { 591 struct exec_node node; 592 struct nir_block *block; 593 nir_instr_type type; 594 595 /* A temporary for optimization and analysis passes to use for storing 596 * flags. For instance, DCE uses this to store the "dead/live" info. 597 */ 598 uint8_t pass_flags; 599 600 /** generic instruction index. */ 601 unsigned index; 602} nir_instr; 603 604static inline nir_instr * 605nir_instr_next(nir_instr *instr) 606{ 607 struct exec_node *next = exec_node_get_next(&instr->node); 608 if (exec_node_is_tail_sentinel(next)) 609 return NULL; 610 else 611 return exec_node_data(nir_instr, next, node); 612} 613 614static inline nir_instr * 615nir_instr_prev(nir_instr *instr) 616{ 617 struct exec_node *prev = exec_node_get_prev(&instr->node); 618 if (exec_node_is_head_sentinel(prev)) 619 return NULL; 620 else 621 return exec_node_data(nir_instr, prev, node); 622} 623 624static inline bool 625nir_instr_is_first(const nir_instr *instr) 626{ 627 return exec_node_is_head_sentinel(exec_node_get_prev_const(&instr->node)); 628} 629 630static inline bool 631nir_instr_is_last(const nir_instr *instr) 632{ 633 return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node)); 634} 635 636typedef struct nir_ssa_def { 637 /** for debugging only, can be NULL */ 638 const char* name; 639 640 /** generic SSA definition index. */ 641 unsigned index; 642 643 /** Index into the live_in and live_out bitfields */ 644 unsigned live_index; 645 646 /** Instruction which produces this SSA value. */ 647 nir_instr *parent_instr; 648 649 /** set of nir_instrs where this register is used (read from) */ 650 struct list_head uses; 651 652 /** set of nir_ifs where this register is used as a condition */ 653 struct list_head if_uses; 654 655 uint8_t num_components; 656 657 /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 658 uint8_t bit_size; 659} nir_ssa_def; 660 661struct nir_src; 662 663typedef struct { 664 nir_register *reg; 665 struct nir_src *indirect; /** < NULL for no indirect offset */ 666 unsigned base_offset; 667 668 /* TODO use-def chain goes here */ 669} nir_reg_src; 670 671typedef struct { 672 nir_instr *parent_instr; 673 struct list_head def_link; 674 675 nir_register *reg; 676 struct nir_src *indirect; /** < NULL for no indirect offset */ 677 unsigned base_offset; 678 679 /* TODO def-use chain goes here */ 680} nir_reg_dest; 681 682struct nir_if; 683 684typedef struct nir_src { 685 union { 686 /** Instruction that consumes this value as a source. */ 687 nir_instr *parent_instr; 688 struct nir_if *parent_if; 689 }; 690 691 struct list_head use_link; 692 693 union { 694 nir_reg_src reg; 695 nir_ssa_def *ssa; 696 }; 697 698 bool is_ssa; 699} nir_src; 700 701static inline nir_src 702nir_src_init(void) 703{ 704 nir_src src = { { NULL } }; 705 return src; 706} 707 708#define NIR_SRC_INIT nir_src_init() 709 710#define nir_foreach_use(src, reg_or_ssa_def) \ 711 list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 712 713#define nir_foreach_use_safe(src, reg_or_ssa_def) \ 714 list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 715 716#define nir_foreach_if_use(src, reg_or_ssa_def) \ 717 list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 718 719#define nir_foreach_if_use_safe(src, reg_or_ssa_def) \ 720 list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 721 722typedef struct { 723 union { 724 nir_reg_dest reg; 725 nir_ssa_def ssa; 726 }; 727 728 bool is_ssa; 729} nir_dest; 730 731static inline nir_dest 732nir_dest_init(void) 733{ 734 nir_dest dest = { { { NULL } } }; 735 return dest; 736} 737 738#define NIR_DEST_INIT nir_dest_init() 739 740#define nir_foreach_def(dest, reg) \ 741 list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) 742 743#define nir_foreach_def_safe(dest, reg) \ 744 list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link) 745 746static inline nir_src 747nir_src_for_ssa(nir_ssa_def *def) 748{ 749 nir_src src = NIR_SRC_INIT; 750 751 src.is_ssa = true; 752 src.ssa = def; 753 754 return src; 755} 756 757static inline nir_src 758nir_src_for_reg(nir_register *reg) 759{ 760 nir_src src = NIR_SRC_INIT; 761 762 src.is_ssa = false; 763 src.reg.reg = reg; 764 src.reg.indirect = NULL; 765 src.reg.base_offset = 0; 766 767 return src; 768} 769 770static inline nir_dest 771nir_dest_for_reg(nir_register *reg) 772{ 773 nir_dest dest = NIR_DEST_INIT; 774 775 dest.reg.reg = reg; 776 777 return dest; 778} 779 780static inline unsigned 781nir_src_bit_size(nir_src src) 782{ 783 return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size; 784} 785 786static inline unsigned 787nir_src_num_components(nir_src src) 788{ 789 return src.is_ssa ? src.ssa->num_components : src.reg.reg->num_components; 790} 791 792static inline bool 793nir_src_is_const(nir_src src) 794{ 795 return src.is_ssa && 796 src.ssa->parent_instr->type == nir_instr_type_load_const; 797} 798 799int64_t nir_src_as_int(nir_src src); 800uint64_t nir_src_as_uint(nir_src src); 801bool nir_src_as_bool(nir_src src); 802double nir_src_as_float(nir_src src); 803int64_t nir_src_comp_as_int(nir_src src, unsigned component); 804uint64_t nir_src_comp_as_uint(nir_src src, unsigned component); 805bool nir_src_comp_as_bool(nir_src src, unsigned component); 806double nir_src_comp_as_float(nir_src src, unsigned component); 807 808static inline unsigned 809nir_dest_bit_size(nir_dest dest) 810{ 811 return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size; 812} 813 814static inline unsigned 815nir_dest_num_components(nir_dest dest) 816{ 817 return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components; 818} 819 820void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if); 821void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr); 822 823typedef struct { 824 nir_src src; 825 826 /** 827 * \name input modifiers 828 */ 829 /*@{*/ 830 /** 831 * For inputs interpreted as floating point, flips the sign bit. For 832 * inputs interpreted as integers, performs the two's complement negation. 833 */ 834 bool negate; 835 836 /** 837 * Clears the sign bit for floating point values, and computes the integer 838 * absolute value for integers. Note that the negate modifier acts after 839 * the absolute value modifier, therefore if both are set then all inputs 840 * will become negative. 841 */ 842 bool abs; 843 /*@}*/ 844 845 /** 846 * For each input component, says which component of the register it is 847 * chosen from. Note that which elements of the swizzle are used and which 848 * are ignored are based on the write mask for most opcodes - for example, 849 * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and 850 * a swizzle of {2, x, 1, 0} where x means "don't care." 851 */ 852 uint8_t swizzle[NIR_MAX_VEC_COMPONENTS]; 853} nir_alu_src; 854 855typedef struct { 856 nir_dest dest; 857 858 /** 859 * \name saturate output modifier 860 * 861 * Only valid for opcodes that output floating-point numbers. Clamps the 862 * output to between 0.0 and 1.0 inclusive. 863 */ 864 865 bool saturate; 866 867 unsigned write_mask : NIR_MAX_VEC_COMPONENTS; /* ignored if dest.is_ssa is true */ 868} nir_alu_dest; 869 870/** NIR sized and unsized types 871 * 872 * The values in this enum are carefully chosen so that the sized type is 873 * just the unsized type OR the number of bits. 874 */ 875typedef enum { 876 nir_type_invalid = 0, /* Not a valid type */ 877 nir_type_int = 2, 878 nir_type_uint = 4, 879 nir_type_bool = 6, 880 nir_type_float = 128, 881 nir_type_bool1 = 1 | nir_type_bool, 882 nir_type_bool32 = 32 | nir_type_bool, 883 nir_type_int1 = 1 | nir_type_int, 884 nir_type_int8 = 8 | nir_type_int, 885 nir_type_int16 = 16 | nir_type_int, 886 nir_type_int32 = 32 | nir_type_int, 887 nir_type_int64 = 64 | nir_type_int, 888 nir_type_uint1 = 1 | nir_type_uint, 889 nir_type_uint8 = 8 | nir_type_uint, 890 nir_type_uint16 = 16 | nir_type_uint, 891 nir_type_uint32 = 32 | nir_type_uint, 892 nir_type_uint64 = 64 | nir_type_uint, 893 nir_type_float16 = 16 | nir_type_float, 894 nir_type_float32 = 32 | nir_type_float, 895 nir_type_float64 = 64 | nir_type_float, 896} nir_alu_type; 897 898#define NIR_ALU_TYPE_SIZE_MASK 0x79 899#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x86 900 901static inline unsigned 902nir_alu_type_get_type_size(nir_alu_type type) 903{ 904 return type & NIR_ALU_TYPE_SIZE_MASK; 905} 906 907static inline unsigned 908nir_alu_type_get_base_type(nir_alu_type type) 909{ 910 return type & NIR_ALU_TYPE_BASE_TYPE_MASK; 911} 912 913static inline nir_alu_type 914nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type) 915{ 916 switch (base_type) { 917 case GLSL_TYPE_BOOL: 918 return nir_type_bool1; 919 break; 920 case GLSL_TYPE_UINT: 921 return nir_type_uint32; 922 break; 923 case GLSL_TYPE_INT: 924 return nir_type_int32; 925 break; 926 case GLSL_TYPE_UINT16: 927 return nir_type_uint16; 928 break; 929 case GLSL_TYPE_INT16: 930 return nir_type_int16; 931 break; 932 case GLSL_TYPE_UINT8: 933 return nir_type_uint8; 934 case GLSL_TYPE_INT8: 935 return nir_type_int8; 936 case GLSL_TYPE_UINT64: 937 return nir_type_uint64; 938 break; 939 case GLSL_TYPE_INT64: 940 return nir_type_int64; 941 break; 942 case GLSL_TYPE_FLOAT: 943 return nir_type_float32; 944 break; 945 case GLSL_TYPE_FLOAT16: 946 return nir_type_float16; 947 break; 948 case GLSL_TYPE_DOUBLE: 949 return nir_type_float64; 950 break; 951 default: 952 unreachable("unknown type"); 953 } 954} 955 956static inline nir_alu_type 957nir_get_nir_type_for_glsl_type(const struct glsl_type *type) 958{ 959 return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type)); 960} 961 962nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, 963 nir_rounding_mode rnd); 964 965static inline nir_op 966nir_op_vec(unsigned components) 967{ 968 switch (components) { 969 case 1: return nir_op_imov; 970 case 2: return nir_op_vec2; 971 case 3: return nir_op_vec3; 972 case 4: return nir_op_vec4; 973 default: unreachable("bad component count"); 974 } 975} 976 977typedef enum { 978 NIR_OP_IS_COMMUTATIVE = (1 << 0), 979 NIR_OP_IS_ASSOCIATIVE = (1 << 1), 980} nir_op_algebraic_property; 981 982typedef struct { 983 const char *name; 984 985 unsigned num_inputs; 986 987 /** 988 * The number of components in the output 989 * 990 * If non-zero, this is the size of the output and input sizes are 991 * explicitly given; swizzle and writemask are still in effect, but if 992 * the output component is masked out, then the input component may 993 * still be in use. 994 * 995 * If zero, the opcode acts in the standard, per-component manner; the 996 * operation is performed on each component (except the ones that are 997 * masked out) with the input being taken from the input swizzle for 998 * that component. 999 * 1000 * The size of some of the inputs may be given (i.e. non-zero) even 1001 * though output_size is zero; in that case, the inputs with a zero 1002 * size act per-component, while the inputs with non-zero size don't. 1003 */ 1004 unsigned output_size; 1005 1006 /** 1007 * The type of vector that the instruction outputs. Note that the 1008 * staurate modifier is only allowed on outputs with the float type. 1009 */ 1010 1011 nir_alu_type output_type; 1012 1013 /** 1014 * The number of components in each input 1015 */ 1016 unsigned input_sizes[NIR_MAX_VEC_COMPONENTS]; 1017 1018 /** 1019 * The type of vector that each input takes. Note that negate and 1020 * absolute value are only allowed on inputs with int or float type and 1021 * behave differently on the two. 1022 */ 1023 nir_alu_type input_types[NIR_MAX_VEC_COMPONENTS]; 1024 1025 nir_op_algebraic_property algebraic_properties; 1026 1027 /* Whether this represents a numeric conversion opcode */ 1028 bool is_conversion; 1029} nir_op_info; 1030 1031extern const nir_op_info nir_op_infos[nir_num_opcodes]; 1032 1033typedef struct nir_alu_instr { 1034 nir_instr instr; 1035 nir_op op; 1036 1037 /** Indicates that this ALU instruction generates an exact value 1038 * 1039 * This is kind of a mixture of GLSL "precise" and "invariant" and not 1040 * really equivalent to either. This indicates that the value generated by 1041 * this operation is high-precision and any code transformations that touch 1042 * it must ensure that the resulting value is bit-for-bit identical to the 1043 * original. 1044 */ 1045 bool exact; 1046 1047 nir_alu_dest dest; 1048 nir_alu_src src[]; 1049} nir_alu_instr; 1050 1051void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, 1052 nir_alu_instr *instr); 1053void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, 1054 nir_alu_instr *instr); 1055 1056/* is this source channel used? */ 1057static inline bool 1058nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src, 1059 unsigned channel) 1060{ 1061 if (nir_op_infos[instr->op].input_sizes[src] > 0) 1062 return channel < nir_op_infos[instr->op].input_sizes[src]; 1063 1064 return (instr->dest.write_mask >> channel) & 1; 1065} 1066 1067static inline nir_component_mask_t 1068nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src) 1069{ 1070 nir_component_mask_t read_mask = 0; 1071 for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) { 1072 if (!nir_alu_instr_channel_used(instr, src, c)) 1073 continue; 1074 1075 read_mask |= (1 << instr->src[src].swizzle[c]); 1076 } 1077 return read_mask; 1078} 1079 1080/* 1081 * For instructions whose destinations are SSA, get the number of channels 1082 * used for a source 1083 */ 1084static inline unsigned 1085nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src) 1086{ 1087 assert(instr->dest.dest.is_ssa); 1088 1089 if (nir_op_infos[instr->op].input_sizes[src] > 0) 1090 return nir_op_infos[instr->op].input_sizes[src]; 1091 1092 return instr->dest.dest.ssa.num_components; 1093} 1094 1095bool nir_const_value_negative_equal(const nir_const_value *c1, 1096 const nir_const_value *c2, 1097 unsigned components, 1098 nir_alu_type base_type, 1099 unsigned bits); 1100 1101bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, 1102 unsigned src1, unsigned src2); 1103 1104bool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1, 1105 const nir_alu_instr *alu2, 1106 unsigned src1, unsigned src2); 1107 1108typedef enum { 1109 nir_deref_type_var, 1110 nir_deref_type_array, 1111 nir_deref_type_array_wildcard, 1112 nir_deref_type_ptr_as_array, 1113 nir_deref_type_struct, 1114 nir_deref_type_cast, 1115} nir_deref_type; 1116 1117typedef struct { 1118 nir_instr instr; 1119 1120 /** The type of this deref instruction */ 1121 nir_deref_type deref_type; 1122 1123 /** The mode of the underlying variable */ 1124 nir_variable_mode mode; 1125 1126 /** The dereferenced type of the resulting pointer value */ 1127 const struct glsl_type *type; 1128 1129 union { 1130 /** Variable being dereferenced if deref_type is a deref_var */ 1131 nir_variable *var; 1132 1133 /** Parent deref if deref_type is not deref_var */ 1134 nir_src parent; 1135 }; 1136 1137 /** Additional deref parameters */ 1138 union { 1139 struct { 1140 nir_src index; 1141 } arr; 1142 1143 struct { 1144 unsigned index; 1145 } strct; 1146 1147 struct { 1148 unsigned ptr_stride; 1149 } cast; 1150 }; 1151 1152 /** Destination to store the resulting "pointer" */ 1153 nir_dest dest; 1154} nir_deref_instr; 1155 1156static inline nir_deref_instr *nir_src_as_deref(nir_src src); 1157 1158static inline nir_deref_instr * 1159nir_deref_instr_parent(const nir_deref_instr *instr) 1160{ 1161 if (instr->deref_type == nir_deref_type_var) 1162 return NULL; 1163 else 1164 return nir_src_as_deref(instr->parent); 1165} 1166 1167static inline nir_variable * 1168nir_deref_instr_get_variable(const nir_deref_instr *instr) 1169{ 1170 while (instr->deref_type != nir_deref_type_var) { 1171 if (instr->deref_type == nir_deref_type_cast) 1172 return NULL; 1173 1174 instr = nir_deref_instr_parent(instr); 1175 } 1176 1177 return instr->var; 1178} 1179 1180bool nir_deref_instr_has_indirect(nir_deref_instr *instr); 1181 1182bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr); 1183 1184unsigned nir_deref_instr_ptr_as_array_stride(nir_deref_instr *instr); 1185 1186typedef struct { 1187 nir_instr instr; 1188 1189 struct nir_function *callee; 1190 1191 unsigned num_params; 1192 nir_src params[]; 1193} nir_call_instr; 1194 1195#include "nir_intrinsics.h" 1196 1197#define NIR_INTRINSIC_MAX_CONST_INDEX 4 1198 1199/** Represents an intrinsic 1200 * 1201 * An intrinsic is an instruction type for handling things that are 1202 * more-or-less regular operations but don't just consume and produce SSA 1203 * values like ALU operations do. Intrinsics are not for things that have 1204 * special semantic meaning such as phi nodes and parallel copies. 1205 * Examples of intrinsics include variable load/store operations, system 1206 * value loads, and the like. Even though texturing more-or-less falls 1207 * under this category, texturing is its own instruction type because 1208 * trying to represent texturing with intrinsics would lead to a 1209 * combinatorial explosion of intrinsic opcodes. 1210 * 1211 * By having a single instruction type for handling a lot of different 1212 * cases, optimization passes can look for intrinsics and, for the most 1213 * part, completely ignore them. Each intrinsic type also has a few 1214 * possible flags that govern whether or not they can be reordered or 1215 * eliminated. That way passes like dead code elimination can still work 1216 * on intrisics without understanding the meaning of each. 1217 * 1218 * Each intrinsic has some number of constant indices, some number of 1219 * variables, and some number of sources. What these sources, variables, 1220 * and indices mean depends on the intrinsic and is documented with the 1221 * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture 1222 * instructions are the only types of instruction that can operate on 1223 * variables. 1224 */ 1225typedef struct { 1226 nir_instr instr; 1227 1228 nir_intrinsic_op intrinsic; 1229 1230 nir_dest dest; 1231 1232 /** number of components if this is a vectorized intrinsic 1233 * 1234 * Similarly to ALU operations, some intrinsics are vectorized. 1235 * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0. 1236 * For vectorized intrinsics, the num_components field specifies the 1237 * number of destination components and the number of source components 1238 * for all sources with nir_intrinsic_infos.src_components[i] == 0. 1239 */ 1240 uint8_t num_components; 1241 1242 int const_index[NIR_INTRINSIC_MAX_CONST_INDEX]; 1243 1244 nir_src src[]; 1245} nir_intrinsic_instr; 1246 1247static inline nir_variable * 1248nir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i) 1249{ 1250 return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); 1251} 1252 1253/** 1254 * \name NIR intrinsics semantic flags 1255 * 1256 * information about what the compiler can do with the intrinsics. 1257 * 1258 * \sa nir_intrinsic_info::flags 1259 */ 1260typedef enum { 1261 /** 1262 * whether the intrinsic can be safely eliminated if none of its output 1263 * value is not being used. 1264 */ 1265 NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0), 1266 1267 /** 1268 * Whether the intrinsic can be reordered with respect to any other 1269 * intrinsic, i.e. whether the only reordering dependencies of the 1270 * intrinsic are due to the register reads/writes. 1271 */ 1272 NIR_INTRINSIC_CAN_REORDER = (1 << 1), 1273} nir_intrinsic_semantic_flag; 1274 1275/** 1276 * \name NIR intrinsics const-index flag 1277 * 1278 * Indicates the usage of a const_index slot. 1279 * 1280 * \sa nir_intrinsic_info::index_map 1281 */ 1282typedef enum { 1283 /** 1284 * Generally instructions that take a offset src argument, can encode 1285 * a constant 'base' value which is added to the offset. 1286 */ 1287 NIR_INTRINSIC_BASE = 1, 1288 1289 /** 1290 * For store instructions, a writemask for the store. 1291 */ 1292 NIR_INTRINSIC_WRMASK = 2, 1293 1294 /** 1295 * The stream-id for GS emit_vertex/end_primitive intrinsics. 1296 */ 1297 NIR_INTRINSIC_STREAM_ID = 3, 1298 1299 /** 1300 * The clip-plane id for load_user_clip_plane intrinsic. 1301 */ 1302 NIR_INTRINSIC_UCP_ID = 4, 1303 1304 /** 1305 * The amount of data, starting from BASE, that this instruction may 1306 * access. This is used to provide bounds if the offset is not constant. 1307 */ 1308 NIR_INTRINSIC_RANGE = 5, 1309 1310 /** 1311 * The Vulkan descriptor set for vulkan_resource_index intrinsic. 1312 */ 1313 NIR_INTRINSIC_DESC_SET = 6, 1314 1315 /** 1316 * The Vulkan descriptor set binding for vulkan_resource_index intrinsic. 1317 */ 1318 NIR_INTRINSIC_BINDING = 7, 1319 1320 /** 1321 * Component offset. 1322 */ 1323 NIR_INTRINSIC_COMPONENT = 8, 1324 1325 /** 1326 * Interpolation mode (only meaningful for FS inputs). 1327 */ 1328 NIR_INTRINSIC_INTERP_MODE = 9, 1329 1330 /** 1331 * A binary nir_op to use when performing a reduction or scan operation 1332 */ 1333 NIR_INTRINSIC_REDUCTION_OP = 10, 1334 1335 /** 1336 * Cluster size for reduction operations 1337 */ 1338 NIR_INTRINSIC_CLUSTER_SIZE = 11, 1339 1340 /** 1341 * Parameter index for a load_param intrinsic 1342 */ 1343 NIR_INTRINSIC_PARAM_IDX = 12, 1344 1345 /** 1346 * Image dimensionality for image intrinsics 1347 * 1348 * One of GLSL_SAMPLER_DIM_* 1349 */ 1350 NIR_INTRINSIC_IMAGE_DIM = 13, 1351 1352 /** 1353 * Non-zero if we are accessing an array image 1354 */ 1355 NIR_INTRINSIC_IMAGE_ARRAY = 14, 1356 1357 /** 1358 * Image format for image intrinsics 1359 */ 1360 NIR_INTRINSIC_FORMAT = 15, 1361 1362 /** 1363 * Access qualifiers for image and memory access intrinsics 1364 */ 1365 NIR_INTRINSIC_ACCESS = 16, 1366 1367 /** 1368 * Alignment for offsets and addresses 1369 * 1370 * These two parameters, specify an alignment in terms of a multiplier and 1371 * an offset. The offset or address parameter X of the intrinsic is 1372 * guaranteed to satisfy the following: 1373 * 1374 * (X - align_offset) % align_mul == 0 1375 */ 1376 NIR_INTRINSIC_ALIGN_MUL = 17, 1377 NIR_INTRINSIC_ALIGN_OFFSET = 18, 1378 1379 /** 1380 * The Vulkan descriptor type for a vulkan_resource_[re]index intrinsic. 1381 */ 1382 NIR_INTRINSIC_DESC_TYPE = 19, 1383 1384 /* Separate source/dest access flags for copies */ 1385 NIR_INTRINSIC_SRC_ACCESS, 1386 NIR_INTRINSIC_DST_ACCESS, 1387 1388 NIR_INTRINSIC_NUM_INDEX_FLAGS, 1389 1390} nir_intrinsic_index_flag; 1391 1392#define NIR_INTRINSIC_MAX_INPUTS 5 1393 1394typedef struct { 1395 const char *name; 1396 1397 unsigned num_srcs; /** < number of register/SSA inputs */ 1398 1399 /** number of components of each input register 1400 * 1401 * If this value is 0, the number of components is given by the 1402 * num_components field of nir_intrinsic_instr. If this value is -1, the 1403 * intrinsic consumes however many components are provided and it is not 1404 * validated at all. 1405 */ 1406 int src_components[NIR_INTRINSIC_MAX_INPUTS]; 1407 1408 bool has_dest; 1409 1410 /** number of components of the output register 1411 * 1412 * If this value is 0, the number of components is given by the 1413 * num_components field of nir_intrinsic_instr. 1414 */ 1415 unsigned dest_components; 1416 1417 /** bitfield of legal bit sizes */ 1418 unsigned dest_bit_sizes; 1419 1420 /** the number of constant indices used by the intrinsic */ 1421 unsigned num_indices; 1422 1423 /** indicates the usage of intr->const_index[n] */ 1424 unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; 1425 1426 /** semantic flags for calls to this intrinsic */ 1427 nir_intrinsic_semantic_flag flags; 1428} nir_intrinsic_info; 1429 1430extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; 1431 1432static inline unsigned 1433nir_intrinsic_src_components(nir_intrinsic_instr *intr, unsigned srcn) 1434{ 1435 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; 1436 assert(srcn < info->num_srcs); 1437 if (info->src_components[srcn] > 0) 1438 return info->src_components[srcn]; 1439 else if (info->src_components[srcn] == 0) 1440 return intr->num_components; 1441 else 1442 return nir_src_num_components(intr->src[srcn]); 1443} 1444 1445static inline unsigned 1446nir_intrinsic_dest_components(nir_intrinsic_instr *intr) 1447{ 1448 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; 1449 if (!info->has_dest) 1450 return 0; 1451 else if (info->dest_components) 1452 return info->dest_components; 1453 else 1454 return intr->num_components; 1455} 1456 1457#define INTRINSIC_IDX_ACCESSORS(name, flag, type) \ 1458static inline type \ 1459nir_intrinsic_##name(const nir_intrinsic_instr *instr) \ 1460{ \ 1461 const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \ 1462 assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \ 1463 return (type)instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1]; \ 1464} \ 1465static inline void \ 1466nir_intrinsic_set_##name(nir_intrinsic_instr *instr, type val) \ 1467{ \ 1468 const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \ 1469 assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \ 1470 instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1] = val; \ 1471} 1472 1473INTRINSIC_IDX_ACCESSORS(write_mask, WRMASK, unsigned) 1474INTRINSIC_IDX_ACCESSORS(base, BASE, int) 1475INTRINSIC_IDX_ACCESSORS(stream_id, STREAM_ID, unsigned) 1476INTRINSIC_IDX_ACCESSORS(ucp_id, UCP_ID, unsigned) 1477INTRINSIC_IDX_ACCESSORS(range, RANGE, unsigned) 1478INTRINSIC_IDX_ACCESSORS(desc_set, DESC_SET, unsigned) 1479INTRINSIC_IDX_ACCESSORS(binding, BINDING, unsigned) 1480INTRINSIC_IDX_ACCESSORS(component, COMPONENT, unsigned) 1481INTRINSIC_IDX_ACCESSORS(interp_mode, INTERP_MODE, unsigned) 1482INTRINSIC_IDX_ACCESSORS(reduction_op, REDUCTION_OP, unsigned) 1483INTRINSIC_IDX_ACCESSORS(cluster_size, CLUSTER_SIZE, unsigned) 1484INTRINSIC_IDX_ACCESSORS(param_idx, PARAM_IDX, unsigned) 1485INTRINSIC_IDX_ACCESSORS(image_dim, IMAGE_DIM, enum glsl_sampler_dim) 1486INTRINSIC_IDX_ACCESSORS(image_array, IMAGE_ARRAY, bool) 1487INTRINSIC_IDX_ACCESSORS(access, ACCESS, enum gl_access_qualifier) 1488INTRINSIC_IDX_ACCESSORS(src_access, SRC_ACCESS, enum gl_access_qualifier) 1489INTRINSIC_IDX_ACCESSORS(dst_access, DST_ACCESS, enum gl_access_qualifier) 1490INTRINSIC_IDX_ACCESSORS(format, FORMAT, unsigned) 1491INTRINSIC_IDX_ACCESSORS(align_mul, ALIGN_MUL, unsigned) 1492INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned) 1493INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned) 1494 1495static inline void 1496nir_intrinsic_set_align(nir_intrinsic_instr *intrin, 1497 unsigned align_mul, unsigned align_offset) 1498{ 1499 assert(util_is_power_of_two_nonzero(align_mul)); 1500 assert(align_offset < align_mul); 1501 nir_intrinsic_set_align_mul(intrin, align_mul); 1502 nir_intrinsic_set_align_offset(intrin, align_offset); 1503} 1504 1505/** Returns a simple alignment for a load/store intrinsic offset 1506 * 1507 * Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL 1508 * and ALIGN_OFFSET parameters, this helper takes both into account and 1509 * provides a single simple alignment parameter. The offset X is guaranteed 1510 * to satisfy X % align == 0. 1511 */ 1512static inline unsigned 1513nir_intrinsic_align(const nir_intrinsic_instr *intrin) 1514{ 1515 const unsigned align_mul = nir_intrinsic_align_mul(intrin); 1516 const unsigned align_offset = nir_intrinsic_align_offset(intrin); 1517 assert(align_offset < align_mul); 1518 return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; 1519} 1520 1521/* Converts a image_deref_* intrinsic into a image_* one */ 1522void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr, 1523 nir_ssa_def *handle, bool bindless); 1524 1525/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */ 1526static inline bool 1527nir_intrinsic_can_reorder(nir_intrinsic_instr *instr) 1528{ 1529 const nir_intrinsic_info *info = 1530 &nir_intrinsic_infos[instr->intrinsic]; 1531 return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && 1532 (info->flags & NIR_INTRINSIC_CAN_REORDER); 1533} 1534 1535/** 1536 * \group texture information 1537 * 1538 * This gives semantic information about textures which is useful to the 1539 * frontend, the backend, and lowering passes, but not the optimizer. 1540 */ 1541 1542typedef enum { 1543 nir_tex_src_coord, 1544 nir_tex_src_projector, 1545 nir_tex_src_comparator, /* shadow comparator */ 1546 nir_tex_src_offset, 1547 nir_tex_src_bias, 1548 nir_tex_src_lod, 1549 nir_tex_src_min_lod, 1550 nir_tex_src_ms_index, /* MSAA sample index */ 1551 nir_tex_src_ms_mcs, /* MSAA compression value */ 1552 nir_tex_src_ddx, 1553 nir_tex_src_ddy, 1554 nir_tex_src_texture_deref, /* < deref pointing to the texture */ 1555 nir_tex_src_sampler_deref, /* < deref pointing to the sampler */ 1556 nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */ 1557 nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ 1558 nir_tex_src_texture_handle, /* < bindless texture handle */ 1559 nir_tex_src_sampler_handle, /* < bindless sampler handle */ 1560 nir_tex_src_plane, /* < selects plane for planar textures */ 1561 nir_num_tex_src_types 1562} nir_tex_src_type; 1563 1564typedef struct { 1565 nir_src src; 1566 nir_tex_src_type src_type; 1567} nir_tex_src; 1568 1569typedef enum { 1570 nir_texop_tex, /**< Regular texture look-up */ 1571 nir_texop_txb, /**< Texture look-up with LOD bias */ 1572 nir_texop_txl, /**< Texture look-up with explicit LOD */ 1573 nir_texop_txd, /**< Texture look-up with partial derivatives */ 1574 nir_texop_txf, /**< Texel fetch with explicit LOD */ 1575 nir_texop_txf_ms, /**< Multisample texture fetch */ 1576 nir_texop_txf_ms_fb, /**< Multisample texture fetch from framebuffer */ 1577 nir_texop_txf_ms_mcs, /**< Multisample compression value fetch */ 1578 nir_texop_txs, /**< Texture size */ 1579 nir_texop_lod, /**< Texture lod query */ 1580 nir_texop_tg4, /**< Texture gather */ 1581 nir_texop_query_levels, /**< Texture levels query */ 1582 nir_texop_texture_samples, /**< Texture samples query */ 1583 nir_texop_samples_identical, /**< Query whether all samples are definitely 1584 * identical. 1585 */ 1586} nir_texop; 1587 1588typedef struct { 1589 nir_instr instr; 1590 1591 enum glsl_sampler_dim sampler_dim; 1592 nir_alu_type dest_type; 1593 1594 nir_texop op; 1595 nir_dest dest; 1596 nir_tex_src *src; 1597 unsigned num_srcs, coord_components; 1598 bool is_array, is_shadow; 1599 1600 /** 1601 * If is_shadow is true, whether this is the old-style shadow that outputs 4 1602 * components or the new-style shadow that outputs 1 component. 1603 */ 1604 bool is_new_style_shadow; 1605 1606 /* gather component selector */ 1607 unsigned component : 2; 1608 1609 /* gather offsets */ 1610 int8_t tg4_offsets[4][2]; 1611 1612 /* True if the texture index or handle is not dynamically uniform */ 1613 bool texture_non_uniform; 1614 1615 /* True if the sampler index or handle is not dynamically uniform */ 1616 bool sampler_non_uniform; 1617 1618 /** The texture index 1619 * 1620 * If this texture instruction has a nir_tex_src_texture_offset source, 1621 * then the texture index is given by texture_index + texture_offset. 1622 */ 1623 unsigned texture_index; 1624 1625 /** The size of the texture array or 0 if it's not an array */ 1626 unsigned texture_array_size; 1627 1628 /** The sampler index 1629 * 1630 * The following operations do not require a sampler and, as such, this 1631 * field should be ignored: 1632 * - nir_texop_txf 1633 * - nir_texop_txf_ms 1634 * - nir_texop_txs 1635 * - nir_texop_lod 1636 * - nir_texop_query_levels 1637 * - nir_texop_texture_samples 1638 * - nir_texop_samples_identical 1639 * 1640 * If this texture instruction has a nir_tex_src_sampler_offset source, 1641 * then the sampler index is given by sampler_index + sampler_offset. 1642 */ 1643 unsigned sampler_index; 1644} nir_tex_instr; 1645 1646static inline unsigned 1647nir_tex_instr_dest_size(const nir_tex_instr *instr) 1648{ 1649 switch (instr->op) { 1650 case nir_texop_txs: { 1651 unsigned ret; 1652 switch (instr->sampler_dim) { 1653 case GLSL_SAMPLER_DIM_1D: 1654 case GLSL_SAMPLER_DIM_BUF: 1655 ret = 1; 1656 break; 1657 case GLSL_SAMPLER_DIM_2D: 1658 case GLSL_SAMPLER_DIM_CUBE: 1659 case GLSL_SAMPLER_DIM_MS: 1660 case GLSL_SAMPLER_DIM_RECT: 1661 case GLSL_SAMPLER_DIM_EXTERNAL: 1662 case GLSL_SAMPLER_DIM_SUBPASS: 1663 ret = 2; 1664 break; 1665 case GLSL_SAMPLER_DIM_3D: 1666 ret = 3; 1667 break; 1668 default: 1669 unreachable("not reached"); 1670 } 1671 if (instr->is_array) 1672 ret++; 1673 return ret; 1674 } 1675 1676 case nir_texop_lod: 1677 return 2; 1678 1679 case nir_texop_texture_samples: 1680 case nir_texop_query_levels: 1681 case nir_texop_samples_identical: 1682 return 1; 1683 1684 default: 1685 if (instr->is_shadow && instr->is_new_style_shadow) 1686 return 1; 1687 1688 return 4; 1689 } 1690} 1691 1692/* Returns true if this texture operation queries something about the texture 1693 * rather than actually sampling it. 1694 */ 1695static inline bool 1696nir_tex_instr_is_query(const nir_tex_instr *instr) 1697{ 1698 switch (instr->op) { 1699 case nir_texop_txs: 1700 case nir_texop_lod: 1701 case nir_texop_texture_samples: 1702 case nir_texop_query_levels: 1703 case nir_texop_txf_ms_mcs: 1704 return true; 1705 case nir_texop_tex: 1706 case nir_texop_txb: 1707 case nir_texop_txl: 1708 case nir_texop_txd: 1709 case nir_texop_txf: 1710 case nir_texop_txf_ms: 1711 case nir_texop_txf_ms_fb: 1712 case nir_texop_tg4: 1713 return false; 1714 default: 1715 unreachable("Invalid texture opcode"); 1716 } 1717} 1718 1719static inline bool 1720nir_alu_instr_is_comparison(const nir_alu_instr *instr) 1721{ 1722 switch (instr->op) { 1723 case nir_op_flt: 1724 case nir_op_fge: 1725 case nir_op_feq: 1726 case nir_op_fne: 1727 case nir_op_ilt: 1728 case nir_op_ult: 1729 case nir_op_ige: 1730 case nir_op_uge: 1731 case nir_op_ieq: 1732 case nir_op_ine: 1733 case nir_op_i2b1: 1734 case nir_op_f2b1: 1735 case nir_op_inot: 1736 case nir_op_fnot: 1737 return true; 1738 default: 1739 return false; 1740 } 1741} 1742 1743static inline nir_alu_type 1744nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src) 1745{ 1746 switch (instr->src[src].src_type) { 1747 case nir_tex_src_coord: 1748 switch (instr->op) { 1749 case nir_texop_txf: 1750 case nir_texop_txf_ms: 1751 case nir_texop_txf_ms_fb: 1752 case nir_texop_txf_ms_mcs: 1753 case nir_texop_samples_identical: 1754 return nir_type_int; 1755 1756 default: 1757 return nir_type_float; 1758 } 1759 1760 case nir_tex_src_lod: 1761 switch (instr->op) { 1762 case nir_texop_txs: 1763 case nir_texop_txf: 1764 return nir_type_int; 1765 1766 default: 1767 return nir_type_float; 1768 } 1769 1770 case nir_tex_src_projector: 1771 case nir_tex_src_comparator: 1772 case nir_tex_src_bias: 1773 case nir_tex_src_ddx: 1774 case nir_tex_src_ddy: 1775 return nir_type_float; 1776 1777 case nir_tex_src_offset: 1778 case nir_tex_src_ms_index: 1779 case nir_tex_src_texture_offset: 1780 case nir_tex_src_sampler_offset: 1781 return nir_type_int; 1782 1783 default: 1784 unreachable("Invalid texture source type"); 1785 } 1786} 1787 1788static inline unsigned 1789nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src) 1790{ 1791 if (instr->src[src].src_type == nir_tex_src_coord) 1792 return instr->coord_components; 1793 1794 /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs */ 1795 if (instr->src[src].src_type == nir_tex_src_ms_mcs) 1796 return 4; 1797 1798 if (instr->src[src].src_type == nir_tex_src_ddx || 1799 instr->src[src].src_type == nir_tex_src_ddy) { 1800 if (instr->is_array) 1801 return instr->coord_components - 1; 1802 else 1803 return instr->coord_components; 1804 } 1805 1806 /* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for 1807 * the offset, since a cube maps to a single face. 1808 */ 1809 if (instr->src[src].src_type == nir_tex_src_offset) { 1810 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) 1811 return 2; 1812 else if (instr->is_array) 1813 return instr->coord_components - 1; 1814 else 1815 return instr->coord_components; 1816 } 1817 1818 return 1; 1819} 1820 1821static inline int 1822nir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type) 1823{ 1824 for (unsigned i = 0; i < instr->num_srcs; i++) 1825 if (instr->src[i].src_type == type) 1826 return (int) i; 1827 1828 return -1; 1829} 1830 1831void nir_tex_instr_add_src(nir_tex_instr *tex, 1832 nir_tex_src_type src_type, 1833 nir_src src); 1834 1835void nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx); 1836 1837bool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex); 1838 1839typedef struct { 1840 nir_instr instr; 1841 1842 nir_ssa_def def; 1843 1844 nir_const_value value[]; 1845} nir_load_const_instr; 1846 1847#define nir_const_load_to_arr(arr, l, m) \ 1848{ \ 1849 nir_const_value_to_array(arr, l->value, l->def.num_components, m); \ 1850} while (false); 1851 1852typedef enum { 1853 nir_jump_return, 1854 nir_jump_break, 1855 nir_jump_continue, 1856} nir_jump_type; 1857 1858typedef struct { 1859 nir_instr instr; 1860 nir_jump_type type; 1861} nir_jump_instr; 1862 1863/* creates a new SSA variable in an undefined state */ 1864 1865typedef struct { 1866 nir_instr instr; 1867 nir_ssa_def def; 1868} nir_ssa_undef_instr; 1869 1870typedef struct { 1871 struct exec_node node; 1872 1873 /* The predecessor block corresponding to this source */ 1874 struct nir_block *pred; 1875 1876 nir_src src; 1877} nir_phi_src; 1878 1879#define nir_foreach_phi_src(phi_src, phi) \ 1880 foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs) 1881#define nir_foreach_phi_src_safe(phi_src, phi) \ 1882 foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs) 1883 1884typedef struct { 1885 nir_instr instr; 1886 1887 struct exec_list srcs; /** < list of nir_phi_src */ 1888 1889 nir_dest dest; 1890} nir_phi_instr; 1891 1892typedef struct { 1893 struct exec_node node; 1894 nir_src src; 1895 nir_dest dest; 1896} nir_parallel_copy_entry; 1897 1898#define nir_foreach_parallel_copy_entry(entry, pcopy) \ 1899 foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries) 1900 1901typedef struct { 1902 nir_instr instr; 1903 1904 /* A list of nir_parallel_copy_entrys. The sources of all of the 1905 * entries are copied to the corresponding destinations "in parallel". 1906 * In other words, if we have two entries: a -> b and b -> a, the values 1907 * get swapped. 1908 */ 1909 struct exec_list entries; 1910} nir_parallel_copy_instr; 1911 1912NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr, 1913 type, nir_instr_type_alu) 1914NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr, 1915 type, nir_instr_type_deref) 1916NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr, 1917 type, nir_instr_type_call) 1918NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr, 1919 type, nir_instr_type_jump) 1920NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr, 1921 type, nir_instr_type_tex) 1922NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr, 1923 type, nir_instr_type_intrinsic) 1924NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr, 1925 type, nir_instr_type_load_const) 1926NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr, 1927 type, nir_instr_type_ssa_undef) 1928NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr, 1929 type, nir_instr_type_phi) 1930NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, 1931 nir_parallel_copy_instr, instr, 1932 type, nir_instr_type_parallel_copy) 1933 1934typedef struct { 1935 nir_ssa_def *def; 1936 unsigned comp; 1937} nir_ssa_scalar; 1938 1939static inline bool 1940nir_ssa_scalar_is_const(nir_ssa_scalar s) 1941{ 1942 return s.def->parent_instr->type == nir_instr_type_load_const; 1943} 1944 1945static inline nir_const_value 1946nir_ssa_scalar_as_const_value(nir_ssa_scalar s) 1947{ 1948 assert(s.comp < s.def->num_components); 1949 nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr); 1950 return load->value[s.comp]; 1951} 1952 1953#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \ 1954static inline type \ 1955nir_ssa_scalar_as_##suffix(nir_ssa_scalar s) \ 1956{ \ 1957 return nir_const_value_as_##suffix( \ 1958 nir_ssa_scalar_as_const_value(s), s.def->bit_size); \ 1959} 1960 1961NIR_DEFINE_SCALAR_AS_CONST(int64_t, int) 1962NIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint) 1963NIR_DEFINE_SCALAR_AS_CONST(bool, bool) 1964NIR_DEFINE_SCALAR_AS_CONST(double, float) 1965 1966#undef NIR_DEFINE_SCALAR_AS_CONST 1967 1968static inline bool 1969nir_ssa_scalar_is_alu(nir_ssa_scalar s) 1970{ 1971 return s.def->parent_instr->type == nir_instr_type_alu; 1972} 1973 1974static inline nir_op 1975nir_ssa_scalar_alu_op(nir_ssa_scalar s) 1976{ 1977 return nir_instr_as_alu(s.def->parent_instr)->op; 1978} 1979 1980static inline nir_ssa_scalar 1981nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx) 1982{ 1983 nir_ssa_scalar out = { NULL, 0 }; 1984 1985 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr); 1986 assert(alu_src_idx < nir_op_infos[alu->op].num_inputs); 1987 1988 /* Our component must be written */ 1989 assert(s.comp < s.def->num_components); 1990 assert(alu->dest.write_mask & (1u << s.comp)); 1991 1992 assert(alu->src[alu_src_idx].src.is_ssa); 1993 out.def = alu->src[alu_src_idx].src.ssa; 1994 1995 if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) { 1996 /* The ALU src is unsized so the source component follows the 1997 * destination component. 1998 */ 1999 out.comp = alu->src[alu_src_idx].swizzle[s.comp]; 2000 } else { 2001 /* This is a sized source so all source components work together to 2002 * produce all the destination components. Since we need to return a 2003 * scalar, this only works if the source is a scalar. 2004 */ 2005 assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1); 2006 out.comp = alu->src[alu_src_idx].swizzle[0]; 2007 } 2008 assert(out.comp < out.def->num_components); 2009 2010 return out; 2011} 2012 2013/* 2014 * Control flow 2015 * 2016 * Control flow consists of a tree of control flow nodes, which include 2017 * if-statements and loops. The leaves of the tree are basic blocks, lists of 2018 * instructions that always run start-to-finish. Each basic block also keeps 2019 * track of its successors (blocks which may run immediately after the current 2020 * block) and predecessors (blocks which could have run immediately before the 2021 * current block). Each function also has a start block and an end block which 2022 * all return statements point to (which is always empty). Together, all the 2023 * blocks with their predecessors and successors make up the control flow 2024 * graph (CFG) of the function. There are helpers that modify the tree of 2025 * control flow nodes while modifying the CFG appropriately; these should be 2026 * used instead of modifying the tree directly. 2027 */ 2028 2029typedef enum { 2030 nir_cf_node_block, 2031 nir_cf_node_if, 2032 nir_cf_node_loop, 2033 nir_cf_node_function 2034} nir_cf_node_type; 2035 2036typedef struct nir_cf_node { 2037 struct exec_node node; 2038 nir_cf_node_type type; 2039 struct nir_cf_node *parent; 2040} nir_cf_node; 2041 2042typedef struct nir_block { 2043 nir_cf_node cf_node; 2044 2045 struct exec_list instr_list; /** < list of nir_instr */ 2046 2047 /** generic block index; generated by nir_index_blocks */ 2048 unsigned index; 2049 2050 /* 2051 * Each block can only have up to 2 successors, so we put them in a simple 2052 * array - no need for anything more complicated. 2053 */ 2054 struct nir_block *successors[2]; 2055 2056 /* Set of nir_block predecessors in the CFG */ 2057 struct set *predecessors; 2058 2059 /* 2060 * this node's immediate dominator in the dominance tree - set to NULL for 2061 * the start block. 2062 */ 2063 struct nir_block *imm_dom; 2064 2065 /* This node's children in the dominance tree */ 2066 unsigned num_dom_children; 2067 struct nir_block **dom_children; 2068 2069 /* Set of nir_blocks on the dominance frontier of this block */ 2070 struct set *dom_frontier; 2071 2072 /* 2073 * These two indices have the property that dom_{pre,post}_index for each 2074 * child of this block in the dominance tree will always be between 2075 * dom_pre_index and dom_post_index for this block, which makes testing if 2076 * a given block is dominated by another block an O(1) operation. 2077 */ 2078 unsigned dom_pre_index, dom_post_index; 2079 2080 /* live in and out for this block; used for liveness analysis */ 2081 BITSET_WORD *live_in; 2082 BITSET_WORD *live_out; 2083} nir_block; 2084 2085static inline nir_instr * 2086nir_block_first_instr(nir_block *block) 2087{ 2088 struct exec_node *head = exec_list_get_head(&block->instr_list); 2089 return exec_node_data(nir_instr, head, node); 2090} 2091 2092static inline nir_instr * 2093nir_block_last_instr(nir_block *block) 2094{ 2095 struct exec_node *tail = exec_list_get_tail(&block->instr_list); 2096 return exec_node_data(nir_instr, tail, node); 2097} 2098 2099static inline bool 2100nir_block_ends_in_jump(nir_block *block) 2101{ 2102 return !exec_list_is_empty(&block->instr_list) && 2103 nir_block_last_instr(block)->type == nir_instr_type_jump; 2104} 2105 2106#define nir_foreach_instr(instr, block) \ 2107 foreach_list_typed(nir_instr, instr, node, &(block)->instr_list) 2108#define nir_foreach_instr_reverse(instr, block) \ 2109 foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) 2110#define nir_foreach_instr_safe(instr, block) \ 2111 foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) 2112#define nir_foreach_instr_reverse_safe(instr, block) \ 2113 foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list) 2114 2115typedef enum { 2116 nir_selection_control_none = 0x0, 2117 nir_selection_control_flatten = 0x1, 2118 nir_selection_control_dont_flatten = 0x2, 2119} nir_selection_control; 2120 2121typedef struct nir_if { 2122 nir_cf_node cf_node; 2123 nir_src condition; 2124 nir_selection_control control; 2125 2126 struct exec_list then_list; /** < list of nir_cf_node */ 2127 struct exec_list else_list; /** < list of nir_cf_node */ 2128} nir_if; 2129 2130typedef struct { 2131 nir_if *nif; 2132 2133 /** Instruction that generates nif::condition. */ 2134 nir_instr *conditional_instr; 2135 2136 /** Block within ::nif that has the break instruction. */ 2137 nir_block *break_block; 2138 2139 /** Last block for the then- or else-path that does not contain the break. */ 2140 nir_block *continue_from_block; 2141 2142 /** True when ::break_block is in the else-path of ::nif. */ 2143 bool continue_from_then; 2144 bool induction_rhs; 2145 2146 /* This is true if the terminators exact trip count is unknown. For 2147 * example: 2148 * 2149 * for (int i = 0; i < imin(x, 4); i++) 2150 * ... 2151 * 2152 * Here loop analysis would have set a max_trip_count of 4 however we dont 2153 * know for sure that this is the exact trip count. 2154 */ 2155 bool exact_trip_count_unknown; 2156 2157 struct list_head loop_terminator_link; 2158} nir_loop_terminator; 2159 2160typedef struct { 2161 /* Estimated cost (in number of instructions) of the loop */ 2162 unsigned instr_cost; 2163 2164 /* Guessed trip count based on array indexing */ 2165 unsigned guessed_trip_count; 2166 2167 /* Maximum number of times the loop is run (if known) */ 2168 unsigned max_trip_count; 2169 2170 /* Do we know the exact number of times the loop will be run */ 2171 bool exact_trip_count_known; 2172 2173 /* Unroll the loop regardless of its size */ 2174 bool force_unroll; 2175 2176 /* Does the loop contain complex loop terminators, continues or other 2177 * complex behaviours? If this is true we can't rely on 2178 * loop_terminator_list to be complete or accurate. 2179 */ 2180 bool complex_loop; 2181 2182 nir_loop_terminator *limiting_terminator; 2183 2184 /* A list of loop_terminators terminating this loop. */ 2185 struct list_head loop_terminator_list; 2186} nir_loop_info; 2187 2188typedef enum { 2189 nir_loop_control_none = 0x0, 2190 nir_loop_control_unroll = 0x1, 2191 nir_loop_control_dont_unroll = 0x2, 2192} nir_loop_control; 2193 2194typedef struct { 2195 nir_cf_node cf_node; 2196 2197 struct exec_list body; /** < list of nir_cf_node */ 2198 2199 nir_loop_info *info; 2200 nir_loop_control control; 2201 bool partially_unrolled; 2202} nir_loop; 2203 2204/** 2205 * Various bits of metadata that can may be created or required by 2206 * optimization and analysis passes 2207 */ 2208typedef enum { 2209 nir_metadata_none = 0x0, 2210 nir_metadata_block_index = 0x1, 2211 nir_metadata_dominance = 0x2, 2212 nir_metadata_live_ssa_defs = 0x4, 2213 nir_metadata_not_properly_reset = 0x8, 2214 nir_metadata_loop_analysis = 0x10, 2215} nir_metadata; 2216 2217typedef struct { 2218 nir_cf_node cf_node; 2219 2220 /** pointer to the function of which this is an implementation */ 2221 struct nir_function *function; 2222 2223 struct exec_list body; /** < list of nir_cf_node */ 2224 2225 nir_block *end_block; 2226 2227 /** list for all local variables in the function */ 2228 struct exec_list locals; 2229 2230 /** list of local registers in the function */ 2231 struct exec_list registers; 2232 2233 /** next available local register index */ 2234 unsigned reg_alloc; 2235 2236 /** next available SSA value index */ 2237 unsigned ssa_alloc; 2238 2239 /* total number of basic blocks, only valid when block_index_dirty = false */ 2240 unsigned num_blocks; 2241 2242 nir_metadata valid_metadata; 2243} nir_function_impl; 2244 2245ATTRIBUTE_RETURNS_NONNULL static inline nir_block * 2246nir_start_block(nir_function_impl *impl) 2247{ 2248 return (nir_block *) impl->body.head_sentinel.next; 2249} 2250 2251ATTRIBUTE_RETURNS_NONNULL static inline nir_block * 2252nir_impl_last_block(nir_function_impl *impl) 2253{ 2254 return (nir_block *) impl->body.tail_sentinel.prev; 2255} 2256 2257static inline nir_cf_node * 2258nir_cf_node_next(nir_cf_node *node) 2259{ 2260 struct exec_node *next = exec_node_get_next(&node->node); 2261 if (exec_node_is_tail_sentinel(next)) 2262 return NULL; 2263 else 2264 return exec_node_data(nir_cf_node, next, node); 2265} 2266 2267static inline nir_cf_node * 2268nir_cf_node_prev(nir_cf_node *node) 2269{ 2270 struct exec_node *prev = exec_node_get_prev(&node->node); 2271 if (exec_node_is_head_sentinel(prev)) 2272 return NULL; 2273 else 2274 return exec_node_data(nir_cf_node, prev, node); 2275} 2276 2277static inline bool 2278nir_cf_node_is_first(const nir_cf_node *node) 2279{ 2280 return exec_node_is_head_sentinel(node->node.prev); 2281} 2282 2283static inline bool 2284nir_cf_node_is_last(const nir_cf_node *node) 2285{ 2286 return exec_node_is_tail_sentinel(node->node.next); 2287} 2288 2289NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node, 2290 type, nir_cf_node_block) 2291NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node, 2292 type, nir_cf_node_if) 2293NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node, 2294 type, nir_cf_node_loop) 2295NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, 2296 nir_function_impl, cf_node, type, nir_cf_node_function) 2297 2298static inline nir_block * 2299nir_if_first_then_block(nir_if *if_stmt) 2300{ 2301 struct exec_node *head = exec_list_get_head(&if_stmt->then_list); 2302 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 2303} 2304 2305static inline nir_block * 2306nir_if_last_then_block(nir_if *if_stmt) 2307{ 2308 struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list); 2309 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 2310} 2311 2312static inline nir_block * 2313nir_if_first_else_block(nir_if *if_stmt) 2314{ 2315 struct exec_node *head = exec_list_get_head(&if_stmt->else_list); 2316 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 2317} 2318 2319static inline nir_block * 2320nir_if_last_else_block(nir_if *if_stmt) 2321{ 2322 struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list); 2323 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 2324} 2325 2326static inline nir_block * 2327nir_loop_first_block(nir_loop *loop) 2328{ 2329 struct exec_node *head = exec_list_get_head(&loop->body); 2330 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 2331} 2332 2333static inline nir_block * 2334nir_loop_last_block(nir_loop *loop) 2335{ 2336 struct exec_node *tail = exec_list_get_tail(&loop->body); 2337 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 2338} 2339 2340/** 2341 * Return true if this list of cf_nodes contains a single empty block. 2342 */ 2343static inline bool 2344nir_cf_list_is_empty_block(struct exec_list *cf_list) 2345{ 2346 if (exec_list_is_singular(cf_list)) { 2347 struct exec_node *head = exec_list_get_head(cf_list); 2348 nir_block *block = 2349 nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 2350 return exec_list_is_empty(&block->instr_list); 2351 } 2352 return false; 2353} 2354 2355typedef struct { 2356 uint8_t num_components; 2357 uint8_t bit_size; 2358} nir_parameter; 2359 2360typedef struct nir_function { 2361 struct exec_node node; 2362 2363 const char *name; 2364 struct nir_shader *shader; 2365 2366 unsigned num_params; 2367 nir_parameter *params; 2368 2369 /** The implementation of this function. 2370 * 2371 * If the function is only declared and not implemented, this is NULL. 2372 */ 2373 nir_function_impl *impl; 2374 2375 bool is_entrypoint; 2376} nir_function; 2377 2378typedef enum { 2379 nir_lower_imul64 = (1 << 0), 2380 nir_lower_isign64 = (1 << 1), 2381 /** Lower all int64 modulus and division opcodes */ 2382 nir_lower_divmod64 = (1 << 2), 2383 /** Lower all 64-bit umul_high and imul_high opcodes */ 2384 nir_lower_imul_high64 = (1 << 3), 2385 nir_lower_mov64 = (1 << 4), 2386 nir_lower_icmp64 = (1 << 5), 2387 nir_lower_iadd64 = (1 << 6), 2388 nir_lower_iabs64 = (1 << 7), 2389 nir_lower_ineg64 = (1 << 8), 2390 nir_lower_logic64 = (1 << 9), 2391 nir_lower_minmax64 = (1 << 10), 2392 nir_lower_shift64 = (1 << 11), 2393 nir_lower_imul_2x32_64 = (1 << 12), 2394 nir_lower_extract64 = (1 << 13), 2395} nir_lower_int64_options; 2396 2397typedef enum { 2398 nir_lower_drcp = (1 << 0), 2399 nir_lower_dsqrt = (1 << 1), 2400 nir_lower_drsq = (1 << 2), 2401 nir_lower_dtrunc = (1 << 3), 2402 nir_lower_dfloor = (1 << 4), 2403 nir_lower_dceil = (1 << 5), 2404 nir_lower_dfract = (1 << 6), 2405 nir_lower_dround_even = (1 << 7), 2406 nir_lower_dmod = (1 << 8), 2407 nir_lower_fp64_full_software = (1 << 9), 2408} nir_lower_doubles_options; 2409 2410typedef struct nir_shader_compiler_options { 2411 bool lower_fdiv; 2412 bool lower_ffma; 2413 bool fuse_ffma; 2414 bool lower_flrp16; 2415 bool lower_flrp32; 2416 /** Lowers flrp when it does not support doubles */ 2417 bool lower_flrp64; 2418 bool lower_fpow; 2419 bool lower_fsat; 2420 bool lower_fsqrt; 2421 bool lower_fmod16; 2422 bool lower_fmod32; 2423 bool lower_fmod64; 2424 /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */ 2425 bool lower_bitfield_extract; 2426 /** Lowers ibitfield_extract/ubitfield_extract to bfm, compares, shifts. */ 2427 bool lower_bitfield_extract_to_shifts; 2428 /** Lowers bitfield_insert to bfi/bfm */ 2429 bool lower_bitfield_insert; 2430 /** Lowers bitfield_insert to bfm, compares, and shifts. */ 2431 bool lower_bitfield_insert_to_shifts; 2432 /** Lowers bitfield_reverse to shifts. */ 2433 bool lower_bitfield_reverse; 2434 /** Lowers bit_count to shifts. */ 2435 bool lower_bit_count; 2436 /** Lowers bfm to shifts and subtracts. */ 2437 bool lower_bfm; 2438 /** Lowers ifind_msb to compare and ufind_msb */ 2439 bool lower_ifind_msb; 2440 /** Lowers find_lsb to ufind_msb and logic ops */ 2441 bool lower_find_lsb; 2442 bool lower_uadd_carry; 2443 bool lower_usub_borrow; 2444 /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */ 2445 bool lower_mul_high; 2446 /** lowers fneg and ineg to fsub and isub. */ 2447 bool lower_negate; 2448 /** lowers fsub and isub to fadd+fneg and iadd+ineg. */ 2449 bool lower_sub; 2450 2451 /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */ 2452 bool lower_scmp; 2453 2454 /** enables rules to lower idiv by power-of-two: */ 2455 bool lower_idiv; 2456 2457 /** enables rules to lower isign to imin+imax */ 2458 bool lower_isign; 2459 2460 /** enables rules to lower fsign to fsub and flt */ 2461 bool lower_fsign; 2462 2463 /* Does the native fdot instruction replicate its result for four 2464 * components? If so, then opt_algebraic_late will turn all fdotN 2465 * instructions into fdot_replicatedN instructions. 2466 */ 2467 bool fdot_replicates; 2468 2469 /** lowers ffloor to fsub+ffract: */ 2470 bool lower_ffloor; 2471 2472 /** lowers ffract to fsub+ffloor: */ 2473 bool lower_ffract; 2474 2475 /** lowers fceil to fneg+ffloor+fneg: */ 2476 bool lower_fceil; 2477 2478 bool lower_ftrunc; 2479 2480 bool lower_ldexp; 2481 2482 bool lower_pack_half_2x16; 2483 bool lower_pack_unorm_2x16; 2484 bool lower_pack_snorm_2x16; 2485 bool lower_pack_unorm_4x8; 2486 bool lower_pack_snorm_4x8; 2487 bool lower_unpack_half_2x16; 2488 bool lower_unpack_unorm_2x16; 2489 bool lower_unpack_snorm_2x16; 2490 bool lower_unpack_unorm_4x8; 2491 bool lower_unpack_snorm_4x8; 2492 2493 bool lower_extract_byte; 2494 bool lower_extract_word; 2495 2496 bool lower_all_io_to_temps; 2497 bool lower_all_io_to_elements; 2498 2499 /** 2500 * Does the driver support real 32-bit integers? (Otherwise, integers 2501 * are simulated by floats.) 2502 */ 2503 bool native_integers; 2504 2505 /* Indicates that the driver only has zero-based vertex id */ 2506 bool vertex_id_zero_based; 2507 2508 /** 2509 * If enabled, gl_BaseVertex will be lowered as: 2510 * is_indexed_draw (~0/0) & firstvertex 2511 */ 2512 bool lower_base_vertex; 2513 2514 /** 2515 * If enabled, gl_HelperInvocation will be lowered as: 2516 * 2517 * !((1 << sample_id) & sample_mask_in)) 2518 * 2519 * This depends on some possibly hw implementation details, which may 2520 * not be true for all hw. In particular that the FS is only executed 2521 * for covered samples or for helper invocations. So, do not blindly 2522 * enable this option. 2523 * 2524 * Note: See also issue #22 in ARB_shader_image_load_store 2525 */ 2526 bool lower_helper_invocation; 2527 2528 /** 2529 * Convert gl_SampleMaskIn to gl_HelperInvocation as follows: 2530 * 2531 * gl_SampleMaskIn == 0 ---> gl_HelperInvocation 2532 * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation 2533 */ 2534 bool optimize_sample_mask_in; 2535 2536 bool lower_cs_local_index_from_id; 2537 bool lower_cs_local_id_from_index; 2538 2539 bool lower_device_index_to_zero; 2540 2541 /* Set if nir_lower_wpos_ytransform() should also invert gl_PointCoord. */ 2542 bool lower_wpos_pntc; 2543 2544 bool lower_hadd; 2545 bool lower_add_sat; 2546 2547 /** 2548 * Should nir_lower_io() create load_interpolated_input intrinsics? 2549 * 2550 * If not, it generates regular load_input intrinsics and interpolation 2551 * information must be inferred from the list of input nir_variables. 2552 */ 2553 bool use_interpolated_input_intrinsics; 2554 2555 /* Lowers when 32x32->64 bit multiplication is not supported */ 2556 bool lower_mul_2x32_64; 2557 2558 unsigned max_unroll_iterations; 2559 2560 nir_lower_int64_options lower_int64_options; 2561 nir_lower_doubles_options lower_doubles_options; 2562} nir_shader_compiler_options; 2563 2564typedef struct nir_shader { 2565 /** list of uniforms (nir_variable) */ 2566 struct exec_list uniforms; 2567 2568 /** list of inputs (nir_variable) */ 2569 struct exec_list inputs; 2570 2571 /** list of outputs (nir_variable) */ 2572 struct exec_list outputs; 2573 2574 /** list of shared compute variables (nir_variable) */ 2575 struct exec_list shared; 2576 2577 /** Set of driver-specific options for the shader. 2578 * 2579 * The memory for the options is expected to be kept in a single static 2580 * copy by the driver. 2581 */ 2582 const struct nir_shader_compiler_options *options; 2583 2584 /** Various bits of compile-time information about a given shader */ 2585 struct shader_info info; 2586 2587 /** list of global variables in the shader (nir_variable) */ 2588 struct exec_list globals; 2589 2590 /** list of system value variables in the shader (nir_variable) */ 2591 struct exec_list system_values; 2592 2593 struct exec_list functions; /** < list of nir_function */ 2594 2595 /** 2596 * the highest index a load_input_*, load_uniform_*, etc. intrinsic can 2597 * access plus one 2598 */ 2599 unsigned num_inputs, num_uniforms, num_outputs, num_shared; 2600 2601 /** Size in bytes of required scratch space */ 2602 unsigned scratch_size; 2603 2604 /** Constant data associated with this shader. 2605 * 2606 * Constant data is loaded through load_constant intrinsics. See also 2607 * nir_opt_large_constants. 2608 */ 2609 void *constant_data; 2610 unsigned constant_data_size; 2611} nir_shader; 2612 2613#define nir_foreach_function(func, shader) \ 2614 foreach_list_typed(nir_function, func, node, &(shader)->functions) 2615 2616static inline nir_function_impl * 2617nir_shader_get_entrypoint(nir_shader *shader) 2618{ 2619 nir_function *func = NULL; 2620 2621 nir_foreach_function(function, shader) { 2622 assert(func == NULL); 2623 if (function->is_entrypoint) { 2624 func = function; 2625#ifndef NDEBUG 2626 break; 2627#endif 2628 } 2629 } 2630 2631 if (!func) 2632 return NULL; 2633 2634 assert(func->num_params == 0); 2635 assert(func->impl); 2636 return func->impl; 2637} 2638 2639nir_shader *nir_shader_create(void *mem_ctx, 2640 gl_shader_stage stage, 2641 const nir_shader_compiler_options *options, 2642 shader_info *si); 2643 2644nir_register *nir_local_reg_create(nir_function_impl *impl); 2645 2646void nir_reg_remove(nir_register *reg); 2647 2648/** Adds a variable to the appropriate list in nir_shader */ 2649void nir_shader_add_variable(nir_shader *shader, nir_variable *var); 2650 2651static inline void 2652nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var) 2653{ 2654 assert(var->data.mode == nir_var_function_temp); 2655 exec_list_push_tail(&impl->locals, &var->node); 2656} 2657 2658/** creates a variable, sets a few defaults, and adds it to the list */ 2659nir_variable *nir_variable_create(nir_shader *shader, 2660 nir_variable_mode mode, 2661 const struct glsl_type *type, 2662 const char *name); 2663/** creates a local variable and adds it to the list */ 2664nir_variable *nir_local_variable_create(nir_function_impl *impl, 2665 const struct glsl_type *type, 2666 const char *name); 2667 2668/** creates a function and adds it to the shader's list of functions */ 2669nir_function *nir_function_create(nir_shader *shader, const char *name); 2670 2671nir_function_impl *nir_function_impl_create(nir_function *func); 2672/** creates a function_impl that isn't tied to any particular function */ 2673nir_function_impl *nir_function_impl_create_bare(nir_shader *shader); 2674 2675nir_block *nir_block_create(nir_shader *shader); 2676nir_if *nir_if_create(nir_shader *shader); 2677nir_loop *nir_loop_create(nir_shader *shader); 2678 2679nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); 2680 2681/** requests that the given pieces of metadata be generated */ 2682void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...); 2683/** dirties all but the preserved metadata */ 2684void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); 2685 2686/** creates an instruction with default swizzle/writemask/etc. with NULL registers */ 2687nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); 2688 2689nir_deref_instr *nir_deref_instr_create(nir_shader *shader, 2690 nir_deref_type deref_type); 2691 2692nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); 2693 2694nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, 2695 unsigned num_components, 2696 unsigned bit_size); 2697 2698nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, 2699 nir_intrinsic_op op); 2700 2701nir_call_instr *nir_call_instr_create(nir_shader *shader, 2702 nir_function *callee); 2703 2704nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); 2705 2706nir_phi_instr *nir_phi_instr_create(nir_shader *shader); 2707 2708nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); 2709 2710nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader, 2711 unsigned num_components, 2712 unsigned bit_size); 2713 2714nir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size); 2715 2716/** 2717 * NIR Cursors and Instruction Insertion API 2718 * @{ 2719 * 2720 * A tiny struct representing a point to insert/extract instructions or 2721 * control flow nodes. Helps reduce the combinatorial explosion of possible 2722 * points to insert/extract. 2723 * 2724 * \sa nir_control_flow.h 2725 */ 2726typedef enum { 2727 nir_cursor_before_block, 2728 nir_cursor_after_block, 2729 nir_cursor_before_instr, 2730 nir_cursor_after_instr, 2731} nir_cursor_option; 2732 2733typedef struct { 2734 nir_cursor_option option; 2735 union { 2736 nir_block *block; 2737 nir_instr *instr; 2738 }; 2739} nir_cursor; 2740 2741static inline nir_block * 2742nir_cursor_current_block(nir_cursor cursor) 2743{ 2744 if (cursor.option == nir_cursor_before_instr || 2745 cursor.option == nir_cursor_after_instr) { 2746 return cursor.instr->block; 2747 } else { 2748 return cursor.block; 2749 } 2750} 2751 2752bool nir_cursors_equal(nir_cursor a, nir_cursor b); 2753 2754static inline nir_cursor 2755nir_before_block(nir_block *block) 2756{ 2757 nir_cursor cursor; 2758 cursor.option = nir_cursor_before_block; 2759 cursor.block = block; 2760 return cursor; 2761} 2762 2763static inline nir_cursor 2764nir_after_block(nir_block *block) 2765{ 2766 nir_cursor cursor; 2767 cursor.option = nir_cursor_after_block; 2768 cursor.block = block; 2769 return cursor; 2770} 2771 2772static inline nir_cursor 2773nir_before_instr(nir_instr *instr) 2774{ 2775 nir_cursor cursor; 2776 cursor.option = nir_cursor_before_instr; 2777 cursor.instr = instr; 2778 return cursor; 2779} 2780 2781static inline nir_cursor 2782nir_after_instr(nir_instr *instr) 2783{ 2784 nir_cursor cursor; 2785 cursor.option = nir_cursor_after_instr; 2786 cursor.instr = instr; 2787 return cursor; 2788} 2789 2790static inline nir_cursor 2791nir_after_block_before_jump(nir_block *block) 2792{ 2793 nir_instr *last_instr = nir_block_last_instr(block); 2794 if (last_instr && last_instr->type == nir_instr_type_jump) { 2795 return nir_before_instr(last_instr); 2796 } else { 2797 return nir_after_block(block); 2798 } 2799} 2800 2801static inline nir_cursor 2802nir_before_src(nir_src *src, bool is_if_condition) 2803{ 2804 if (is_if_condition) { 2805 nir_block *prev_block = 2806 nir_cf_node_as_block(nir_cf_node_prev(&src->parent_if->cf_node)); 2807 assert(!nir_block_ends_in_jump(prev_block)); 2808 return nir_after_block(prev_block); 2809 } else if (src->parent_instr->type == nir_instr_type_phi) { 2810#ifndef NDEBUG 2811 nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr); 2812 bool found = false; 2813 nir_foreach_phi_src(phi_src, cond_phi) { 2814 if (phi_src->src.ssa == src->ssa) { 2815 found = true; 2816 break; 2817 } 2818 } 2819 assert(found); 2820#endif 2821 /* The LIST_ENTRY macro is a generic container-of macro, it just happens 2822 * to have a more specific name. 2823 */ 2824 nir_phi_src *phi_src = LIST_ENTRY(nir_phi_src, src, src); 2825 return nir_after_block_before_jump(phi_src->pred); 2826 } else { 2827 return nir_before_instr(src->parent_instr); 2828 } 2829} 2830 2831static inline nir_cursor 2832nir_before_cf_node(nir_cf_node *node) 2833{ 2834 if (node->type == nir_cf_node_block) 2835 return nir_before_block(nir_cf_node_as_block(node)); 2836 2837 return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); 2838} 2839 2840static inline nir_cursor 2841nir_after_cf_node(nir_cf_node *node) 2842{ 2843 if (node->type == nir_cf_node_block) 2844 return nir_after_block(nir_cf_node_as_block(node)); 2845 2846 return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); 2847} 2848 2849static inline nir_cursor 2850nir_after_phis(nir_block *block) 2851{ 2852 nir_foreach_instr(instr, block) { 2853 if (instr->type != nir_instr_type_phi) 2854 return nir_before_instr(instr); 2855 } 2856 return nir_after_block(block); 2857} 2858 2859static inline nir_cursor 2860nir_after_cf_node_and_phis(nir_cf_node *node) 2861{ 2862 if (node->type == nir_cf_node_block) 2863 return nir_after_block(nir_cf_node_as_block(node)); 2864 2865 nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node)); 2866 2867 return nir_after_phis(block); 2868} 2869 2870static inline nir_cursor 2871nir_before_cf_list(struct exec_list *cf_list) 2872{ 2873 nir_cf_node *first_node = exec_node_data(nir_cf_node, 2874 exec_list_get_head(cf_list), node); 2875 return nir_before_cf_node(first_node); 2876} 2877 2878static inline nir_cursor 2879nir_after_cf_list(struct exec_list *cf_list) 2880{ 2881 nir_cf_node *last_node = exec_node_data(nir_cf_node, 2882 exec_list_get_tail(cf_list), node); 2883 return nir_after_cf_node(last_node); 2884} 2885 2886/** 2887 * Insert a NIR instruction at the given cursor. 2888 * 2889 * Note: This does not update the cursor. 2890 */ 2891void nir_instr_insert(nir_cursor cursor, nir_instr *instr); 2892 2893static inline void 2894nir_instr_insert_before(nir_instr *instr, nir_instr *before) 2895{ 2896 nir_instr_insert(nir_before_instr(instr), before); 2897} 2898 2899static inline void 2900nir_instr_insert_after(nir_instr *instr, nir_instr *after) 2901{ 2902 nir_instr_insert(nir_after_instr(instr), after); 2903} 2904 2905static inline void 2906nir_instr_insert_before_block(nir_block *block, nir_instr *before) 2907{ 2908 nir_instr_insert(nir_before_block(block), before); 2909} 2910 2911static inline void 2912nir_instr_insert_after_block(nir_block *block, nir_instr *after) 2913{ 2914 nir_instr_insert(nir_after_block(block), after); 2915} 2916 2917static inline void 2918nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) 2919{ 2920 nir_instr_insert(nir_before_cf_node(node), before); 2921} 2922 2923static inline void 2924nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) 2925{ 2926 nir_instr_insert(nir_after_cf_node(node), after); 2927} 2928 2929static inline void 2930nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) 2931{ 2932 nir_instr_insert(nir_before_cf_list(list), before); 2933} 2934 2935static inline void 2936nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) 2937{ 2938 nir_instr_insert(nir_after_cf_list(list), after); 2939} 2940 2941void nir_instr_remove_v(nir_instr *instr); 2942 2943static inline nir_cursor 2944nir_instr_remove(nir_instr *instr) 2945{ 2946 nir_cursor cursor; 2947 nir_instr *prev = nir_instr_prev(instr); 2948 if (prev) { 2949 cursor = nir_after_instr(prev); 2950 } else { 2951 cursor = nir_before_block(instr->block); 2952 } 2953 nir_instr_remove_v(instr); 2954 return cursor; 2955} 2956 2957/** @} */ 2958 2959typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state); 2960typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state); 2961typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); 2962bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, 2963 void *state); 2964bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); 2965bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); 2966 2967nir_const_value *nir_src_as_const_value(nir_src src); 2968 2969#define NIR_SRC_AS_(name, c_type, type_enum, cast_macro) \ 2970static inline c_type * \ 2971nir_src_as_ ## name (nir_src src) \ 2972{ \ 2973 return src.is_ssa && src.ssa->parent_instr->type == type_enum \ 2974 ? cast_macro(src.ssa->parent_instr) : NULL; \ 2975} 2976 2977NIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu) 2978NIR_SRC_AS_(intrinsic, nir_intrinsic_instr, 2979 nir_instr_type_intrinsic, nir_instr_as_intrinsic) 2980NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref) 2981 2982bool nir_src_is_dynamically_uniform(nir_src src); 2983bool nir_srcs_equal(nir_src src1, nir_src src2); 2984bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2); 2985void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); 2986void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); 2987void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); 2988void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, 2989 nir_dest new_dest); 2990 2991void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, 2992 unsigned num_components, unsigned bit_size, 2993 const char *name); 2994void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, 2995 unsigned num_components, unsigned bit_size, 2996 const char *name); 2997static inline void 2998nir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest, 2999 const struct glsl_type *type, 3000 const char *name) 3001{ 3002 assert(glsl_type_is_vector_or_scalar(type)); 3003 nir_ssa_dest_init(instr, dest, glsl_get_components(type), 3004 glsl_get_bit_size(type), name); 3005} 3006void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src); 3007void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, 3008 nir_instr *after_me); 3009 3010nir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def); 3011 3012/* 3013 * finds the next basic block in source-code order, returns NULL if there is 3014 * none 3015 */ 3016 3017nir_block *nir_block_cf_tree_next(nir_block *block); 3018 3019/* Performs the opposite of nir_block_cf_tree_next() */ 3020 3021nir_block *nir_block_cf_tree_prev(nir_block *block); 3022 3023/* Gets the first block in a CF node in source-code order */ 3024 3025nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node); 3026 3027/* Gets the last block in a CF node in source-code order */ 3028 3029nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node); 3030 3031/* Gets the next block after a CF node in source-code order */ 3032 3033nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node); 3034 3035/* Macros for loops that visit blocks in source-code order */ 3036 3037#define nir_foreach_block(block, impl) \ 3038 for (nir_block *block = nir_start_block(impl); block != NULL; \ 3039 block = nir_block_cf_tree_next(block)) 3040 3041#define nir_foreach_block_safe(block, impl) \ 3042 for (nir_block *block = nir_start_block(impl), \ 3043 *next = nir_block_cf_tree_next(block); \ 3044 block != NULL; \ 3045 block = next, next = nir_block_cf_tree_next(block)) 3046 3047#define nir_foreach_block_reverse(block, impl) \ 3048 for (nir_block *block = nir_impl_last_block(impl); block != NULL; \ 3049 block = nir_block_cf_tree_prev(block)) 3050 3051#define nir_foreach_block_reverse_safe(block, impl) \ 3052 for (nir_block *block = nir_impl_last_block(impl), \ 3053 *prev = nir_block_cf_tree_prev(block); \ 3054 block != NULL; \ 3055 block = prev, prev = nir_block_cf_tree_prev(block)) 3056 3057#define nir_foreach_block_in_cf_node(block, node) \ 3058 for (nir_block *block = nir_cf_node_cf_tree_first(node); \ 3059 block != nir_cf_node_cf_tree_next(node); \ 3060 block = nir_block_cf_tree_next(block)) 3061 3062/* If the following CF node is an if, this function returns that if. 3063 * Otherwise, it returns NULL. 3064 */ 3065nir_if *nir_block_get_following_if(nir_block *block); 3066 3067nir_loop *nir_block_get_following_loop(nir_block *block); 3068 3069void nir_index_local_regs(nir_function_impl *impl); 3070void nir_index_ssa_defs(nir_function_impl *impl); 3071unsigned nir_index_instrs(nir_function_impl *impl); 3072 3073void nir_index_blocks(nir_function_impl *impl); 3074 3075void nir_print_shader(nir_shader *shader, FILE *fp); 3076void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors); 3077void nir_print_instr(const nir_instr *instr, FILE *fp); 3078void nir_print_deref(const nir_deref_instr *deref, FILE *fp); 3079 3080nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s); 3081nir_function_impl *nir_function_impl_clone(nir_shader *shader, 3082 const nir_function_impl *fi); 3083nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var); 3084nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader); 3085 3086nir_shader *nir_shader_serialize_deserialize(void *mem_ctx, nir_shader *s); 3087 3088#ifndef NDEBUG 3089void nir_validate_shader(nir_shader *shader, const char *when); 3090void nir_metadata_set_validation_flag(nir_shader *shader); 3091void nir_metadata_check_validation_flag(nir_shader *shader); 3092 3093static inline bool 3094should_skip_nir(const char *name) 3095{ 3096 static const char *list = NULL; 3097 if (!list) { 3098 /* Comma separated list of names to skip. */ 3099 list = getenv("NIR_SKIP"); 3100 if (!list) 3101 list = ""; 3102 } 3103 3104 if (!list[0]) 3105 return false; 3106 3107 return comma_separated_list_contains(list, name); 3108} 3109 3110static inline bool 3111should_clone_nir(void) 3112{ 3113 static int should_clone = -1; 3114 if (should_clone < 0) 3115 should_clone = env_var_as_boolean("NIR_TEST_CLONE", false); 3116 3117 return should_clone; 3118} 3119 3120static inline bool 3121should_serialize_deserialize_nir(void) 3122{ 3123 static int test_serialize = -1; 3124 if (test_serialize < 0) 3125 test_serialize = env_var_as_boolean("NIR_TEST_SERIALIZE", false); 3126 3127 return test_serialize; 3128} 3129 3130static inline bool 3131should_print_nir(void) 3132{ 3133 static int should_print = -1; 3134 if (should_print < 0) 3135 should_print = env_var_as_boolean("NIR_PRINT", false); 3136 3137 return should_print; 3138} 3139#else 3140static inline void nir_validate_shader(nir_shader *shader, const char *when) { (void) shader; (void)when; } 3141static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; } 3142static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; } 3143static inline bool should_skip_nir(UNUSED const char *pass_name) { return false; } 3144static inline bool should_clone_nir(void) { return false; } 3145static inline bool should_serialize_deserialize_nir(void) { return false; } 3146static inline bool should_print_nir(void) { return false; } 3147#endif /* NDEBUG */ 3148 3149#define _PASS(pass, nir, do_pass) do { \ 3150 if (should_skip_nir(#pass)) { \ 3151 printf("skipping %s\n", #pass); \ 3152 break; \ 3153 } \ 3154 do_pass \ 3155 nir_validate_shader(nir, "after " #pass); \ 3156 if (should_clone_nir()) { \ 3157 nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ 3158 ralloc_free(nir); \ 3159 nir = clone; \ 3160 } \ 3161 if (should_serialize_deserialize_nir()) { \ 3162 void *mem_ctx = ralloc_parent(nir); \ 3163 nir = nir_shader_serialize_deserialize(mem_ctx, nir); \ 3164 } \ 3165} while (0) 3166 3167#define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir, \ 3168 nir_metadata_set_validation_flag(nir); \ 3169 if (should_print_nir()) \ 3170 printf("%s\n", #pass); \ 3171 if (pass(nir, ##__VA_ARGS__)) { \ 3172 progress = true; \ 3173 if (should_print_nir()) \ 3174 nir_print_shader(nir, stdout); \ 3175 nir_metadata_check_validation_flag(nir); \ 3176 } \ 3177) 3178 3179#define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir, \ 3180 if (should_print_nir()) \ 3181 printf("%s\n", #pass); \ 3182 pass(nir, ##__VA_ARGS__); \ 3183 if (should_print_nir()) \ 3184 nir_print_shader(nir, stdout); \ 3185) 3186 3187#define NIR_SKIP(name) should_skip_nir(#name) 3188 3189void nir_calc_dominance_impl(nir_function_impl *impl); 3190void nir_calc_dominance(nir_shader *shader); 3191 3192nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); 3193bool nir_block_dominates(nir_block *parent, nir_block *child); 3194bool nir_block_is_unreachable(nir_block *block); 3195 3196void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); 3197void nir_dump_dom_tree(nir_shader *shader, FILE *fp); 3198 3199void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp); 3200void nir_dump_dom_frontier(nir_shader *shader, FILE *fp); 3201 3202void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp); 3203void nir_dump_cfg(nir_shader *shader, FILE *fp); 3204 3205int nir_gs_count_vertices(const nir_shader *shader); 3206 3207bool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes); 3208bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes); 3209bool nir_split_var_copies(nir_shader *shader); 3210bool nir_split_per_member_structs(nir_shader *shader); 3211bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes); 3212 3213bool nir_lower_returns_impl(nir_function_impl *impl); 3214bool nir_lower_returns(nir_shader *shader); 3215 3216void nir_inline_function_impl(struct nir_builder *b, 3217 const nir_function_impl *impl, 3218 nir_ssa_def **params); 3219bool nir_inline_functions(nir_shader *shader); 3220 3221bool nir_propagate_invariant(nir_shader *shader); 3222 3223void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader); 3224void nir_lower_deref_copy_instr(struct nir_builder *b, 3225 nir_intrinsic_instr *copy); 3226bool nir_lower_var_copies(nir_shader *shader); 3227 3228void nir_fixup_deref_modes(nir_shader *shader); 3229 3230bool nir_lower_global_vars_to_local(nir_shader *shader); 3231 3232typedef enum { 3233 nir_lower_direct_array_deref_of_vec_load = (1 << 0), 3234 nir_lower_indirect_array_deref_of_vec_load = (1 << 1), 3235 nir_lower_direct_array_deref_of_vec_store = (1 << 2), 3236 nir_lower_indirect_array_deref_of_vec_store = (1 << 3), 3237} nir_lower_array_deref_of_vec_options; 3238 3239bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes, 3240 nir_lower_array_deref_of_vec_options options); 3241 3242bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes); 3243 3244bool nir_lower_locals_to_regs(nir_shader *shader); 3245 3246void nir_lower_io_to_temporaries(nir_shader *shader, 3247 nir_function_impl *entrypoint, 3248 bool outputs, bool inputs); 3249 3250bool nir_lower_vars_to_scratch(nir_shader *shader, 3251 nir_variable_mode modes, 3252 int size_threshold, 3253 glsl_type_size_align_func size_align); 3254 3255void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); 3256 3257void nir_gather_ssa_types(nir_function_impl *impl, 3258 BITSET_WORD *float_types, 3259 BITSET_WORD *int_types); 3260 3261void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, 3262 int (*type_size)(const struct glsl_type *, bool)); 3263 3264/* Some helpers to do very simple linking */ 3265bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer); 3266bool nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list, 3267 uint64_t *used_by_other_stage, 3268 uint64_t *used_by_other_stage_patches); 3269void nir_compact_varyings(nir_shader *producer, nir_shader *consumer, 3270 bool default_to_smooth_interp); 3271void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer); 3272bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer); 3273 3274typedef enum { 3275 /* If set, this forces all non-flat fragment shader inputs to be 3276 * interpolated as if with the "sample" qualifier. This requires 3277 * nir_shader_compiler_options::use_interpolated_input_intrinsics. 3278 */ 3279 nir_lower_io_force_sample_interpolation = (1 << 1), 3280} nir_lower_io_options; 3281bool nir_lower_io(nir_shader *shader, 3282 nir_variable_mode modes, 3283 int (*type_size)(const struct glsl_type *, bool), 3284 nir_lower_io_options); 3285 3286typedef enum { 3287 /** 3288 * An address format which is a simple 32-bit global GPU address. 3289 */ 3290 nir_address_format_32bit_global, 3291 3292 /** 3293 * An address format which is a simple 64-bit global GPU address. 3294 */ 3295 nir_address_format_64bit_global, 3296 3297 /** 3298 * An address format which is a bounds-checked 64-bit global GPU address. 3299 * 3300 * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base 3301 * address stored with the low bits in .x and high bits in .y, .z is a 3302 * size, and .w is an offset. When the final I/O operation is lowered, .w 3303 * is checked against .z and the operation is predicated on the result. 3304 */ 3305 nir_address_format_64bit_bounded_global, 3306 3307 /** 3308 * An address format which is comprised of a vec2 where the first 3309 * component is a buffer index and the second is an offset. 3310 */ 3311 nir_address_format_32bit_index_offset, 3312} nir_address_format; 3313 3314static inline unsigned 3315nir_address_format_bit_size(nir_address_format addr_format) 3316{ 3317 switch (addr_format) { 3318 case nir_address_format_32bit_global: return 32; 3319 case nir_address_format_64bit_global: return 64; 3320 case nir_address_format_64bit_bounded_global: return 32; 3321 case nir_address_format_32bit_index_offset: return 32; 3322 } 3323 unreachable("Invalid address format"); 3324} 3325 3326static inline unsigned 3327nir_address_format_num_components(nir_address_format addr_format) 3328{ 3329 switch (addr_format) { 3330 case nir_address_format_32bit_global: return 1; 3331 case nir_address_format_64bit_global: return 1; 3332 case nir_address_format_64bit_bounded_global: return 4; 3333 case nir_address_format_32bit_index_offset: return 2; 3334 } 3335 unreachable("Invalid address format"); 3336} 3337 3338static inline const struct glsl_type * 3339nir_address_format_to_glsl_type(nir_address_format addr_format) 3340{ 3341 unsigned bit_size = nir_address_format_bit_size(addr_format); 3342 assert(bit_size == 32 || bit_size == 64); 3343 return glsl_vector_type(bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64, 3344 nir_address_format_num_components(addr_format)); 3345} 3346 3347nir_ssa_def * nir_explicit_io_address_from_deref(struct nir_builder *b, 3348 nir_deref_instr *deref, 3349 nir_ssa_def *base_addr, 3350 nir_address_format addr_format); 3351void nir_lower_explicit_io_instr(struct nir_builder *b, 3352 nir_intrinsic_instr *io_instr, 3353 nir_ssa_def *addr, 3354 nir_address_format addr_format); 3355 3356bool nir_lower_explicit_io(nir_shader *shader, 3357 nir_variable_mode modes, 3358 nir_address_format); 3359 3360nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); 3361nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); 3362 3363bool nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage); 3364 3365bool nir_lower_regs_to_ssa_impl(nir_function_impl *impl); 3366bool nir_lower_regs_to_ssa(nir_shader *shader); 3367bool nir_lower_vars_to_ssa(nir_shader *shader); 3368 3369bool nir_remove_dead_derefs(nir_shader *shader); 3370bool nir_remove_dead_derefs_impl(nir_function_impl *impl); 3371bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes); 3372bool nir_lower_constant_initializers(nir_shader *shader, 3373 nir_variable_mode modes); 3374 3375bool nir_move_load_const(nir_shader *shader); 3376bool nir_move_vec_src_uses_to_dest(nir_shader *shader); 3377bool nir_lower_vec_to_movs(nir_shader *shader); 3378void nir_lower_alpha_test(nir_shader *shader, enum compare_func func, 3379 bool alpha_to_one); 3380bool nir_lower_alu(nir_shader *shader); 3381bool nir_lower_alu_to_scalar(nir_shader *shader); 3382bool nir_lower_bool_to_float(nir_shader *shader); 3383bool nir_lower_bool_to_int32(nir_shader *shader); 3384bool nir_lower_load_const_to_scalar(nir_shader *shader); 3385bool nir_lower_read_invocation_to_scalar(nir_shader *shader); 3386bool nir_lower_phis_to_scalar(nir_shader *shader); 3387void nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer); 3388void nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader, 3389 bool outputs_only); 3390void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask); 3391void nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask); 3392bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask); 3393 3394void nir_lower_fragcoord_wtrans(nir_shader *shader); 3395void nir_lower_viewport_transform(nir_shader *shader); 3396bool nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier); 3397 3398typedef struct nir_lower_subgroups_options { 3399 uint8_t subgroup_size; 3400 uint8_t ballot_bit_size; 3401 bool lower_to_scalar:1; 3402 bool lower_vote_trivial:1; 3403 bool lower_vote_eq_to_ballot:1; 3404 bool lower_subgroup_masks:1; 3405 bool lower_shuffle:1; 3406 bool lower_shuffle_to_32bit:1; 3407 bool lower_quad:1; 3408} nir_lower_subgroups_options; 3409 3410bool nir_lower_subgroups(nir_shader *shader, 3411 const nir_lower_subgroups_options *options); 3412 3413bool nir_lower_system_values(nir_shader *shader); 3414 3415enum PACKED nir_lower_tex_packing { 3416 nir_lower_tex_packing_none = 0, 3417 /* The sampler returns up to 2 32-bit words of half floats or 16-bit signed 3418 * or unsigned ints based on the sampler type 3419 */ 3420 nir_lower_tex_packing_16, 3421 /* The sampler returns 1 32-bit word of 4x8 unorm */ 3422 nir_lower_tex_packing_8, 3423}; 3424 3425typedef struct nir_lower_tex_options { 3426 /** 3427 * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which 3428 * sampler types a texture projector is lowered. 3429 */ 3430 unsigned lower_txp; 3431 3432 /** 3433 * If true, lower away nir_tex_src_offset for all texelfetch instructions. 3434 */ 3435 bool lower_txf_offset; 3436 3437 /** 3438 * If true, lower away nir_tex_src_offset for all rect textures. 3439 */ 3440 bool lower_rect_offset; 3441 3442 /** 3443 * If true, lower rect textures to 2D, using txs to fetch the 3444 * texture dimensions and dividing the texture coords by the 3445 * texture dims to normalize. 3446 */ 3447 bool lower_rect; 3448 3449 /** 3450 * If true, convert yuv to rgb. 3451 */ 3452 unsigned lower_y_uv_external; 3453 unsigned lower_y_u_v_external; 3454 unsigned lower_yx_xuxv_external; 3455 unsigned lower_xy_uxvx_external; 3456 unsigned lower_ayuv_external; 3457 unsigned lower_xyuv_external; 3458 3459 /** 3460 * To emulate certain texture wrap modes, this can be used 3461 * to saturate the specified tex coord to [0.0, 1.0]. The 3462 * bits are according to sampler #, ie. if, for example: 3463 * 3464 * (conf->saturate_s & (1 << n)) 3465 * 3466 * is true, then the s coord for sampler n is saturated. 3467 * 3468 * Note that clamping must happen *after* projector lowering 3469 * so any projected texture sample instruction with a clamped 3470 * coordinate gets automatically lowered, regardless of the 3471 * 'lower_txp' setting. 3472 */ 3473 unsigned saturate_s; 3474 unsigned saturate_t; 3475 unsigned saturate_r; 3476 3477 /* Bitmask of textures that need swizzling. 3478 * 3479 * If (swizzle_result & (1 << texture_index)), then the swizzle in 3480 * swizzles[texture_index] is applied to the result of the texturing 3481 * operation. 3482 */ 3483 unsigned swizzle_result; 3484 3485 /* A swizzle for each texture. Values 0-3 represent x, y, z, or w swizzles 3486 * while 4 and 5 represent 0 and 1 respectively. 3487 */ 3488 uint8_t swizzles[32][4]; 3489 3490 /* Can be used to scale sampled values in range required by the format. */ 3491 float scale_factors[32]; 3492 3493 /** 3494 * Bitmap of textures that need srgb to linear conversion. If 3495 * (lower_srgb & (1 << texture_index)) then the rgb (xyz) components 3496 * of the texture are lowered to linear. 3497 */ 3498 unsigned lower_srgb; 3499 3500 /** 3501 * If true, lower nir_texop_tex on shaders that doesn't support implicit 3502 * LODs to nir_texop_txl. 3503 */ 3504 bool lower_tex_without_implicit_lod; 3505 3506 /** 3507 * If true, lower nir_texop_txd on cube maps with nir_texop_txl. 3508 */ 3509 bool lower_txd_cube_map; 3510 3511 /** 3512 * If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl. 3513 */ 3514 bool lower_txd_3d; 3515 3516 /** 3517 * If true, lower nir_texop_txd on shadow samplers (except cube maps) 3518 * with nir_texop_txl. Notice that cube map shadow samplers are lowered 3519 * with lower_txd_cube_map. 3520 */ 3521 bool lower_txd_shadow; 3522 3523 /** 3524 * If true, lower nir_texop_txd on all samplers to a nir_texop_txl. 3525 * Implies lower_txd_cube_map and lower_txd_shadow. 3526 */ 3527 bool lower_txd; 3528 3529 /** 3530 * If true, lower nir_texop_txb that try to use shadow compare and min_lod 3531 * at the same time to a nir_texop_lod, some math, and nir_texop_tex. 3532 */ 3533 bool lower_txb_shadow_clamp; 3534 3535 /** 3536 * If true, lower nir_texop_txd on shadow samplers when it uses min_lod 3537 * with nir_texop_txl. This includes cube maps. 3538 */ 3539 bool lower_txd_shadow_clamp; 3540 3541 /** 3542 * If true, lower nir_texop_txd on when it uses both offset and min_lod 3543 * with nir_texop_txl. This includes cube maps. 3544 */ 3545 bool lower_txd_offset_clamp; 3546 3547 /** 3548 * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 3549 * sampler is bindless. 3550 */ 3551 bool lower_txd_clamp_bindless_sampler; 3552 3553 /** 3554 * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 3555 * sampler index is not statically determinable to be less than 16. 3556 */ 3557 bool lower_txd_clamp_if_sampler_index_not_lt_16; 3558 3559 /** 3560 * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's 3561 * mixed-up tg4 locations. 3562 */ 3563 bool lower_tg4_broadcom_swizzle; 3564 3565 /** 3566 * If true, lowers tg4 with 4 constant offsets to 4 tg4 calls 3567 */ 3568 bool lower_tg4_offsets; 3569 3570 enum nir_lower_tex_packing lower_tex_packing[32]; 3571} nir_lower_tex_options; 3572 3573bool nir_lower_tex(nir_shader *shader, 3574 const nir_lower_tex_options *options); 3575 3576enum nir_lower_non_uniform_access_type { 3577 nir_lower_non_uniform_ubo_access = (1 << 0), 3578 nir_lower_non_uniform_ssbo_access = (1 << 1), 3579 nir_lower_non_uniform_texture_access = (1 << 2), 3580 nir_lower_non_uniform_image_access = (1 << 3), 3581}; 3582 3583bool nir_lower_non_uniform_access(nir_shader *shader, 3584 enum nir_lower_non_uniform_access_type); 3585 3586bool nir_lower_idiv(nir_shader *shader); 3587 3588bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars); 3589bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables); 3590bool nir_lower_clip_cull_distance_arrays(nir_shader *nir); 3591 3592bool nir_lower_frexp(nir_shader *nir); 3593 3594void nir_lower_two_sided_color(nir_shader *shader); 3595 3596bool nir_lower_clamp_color_outputs(nir_shader *shader); 3597 3598void nir_lower_passthrough_edgeflags(nir_shader *shader); 3599bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count, 3600 const gl_state_index16 *uniform_state_tokens); 3601 3602typedef struct nir_lower_wpos_ytransform_options { 3603 gl_state_index16 state_tokens[STATE_LENGTH]; 3604 bool fs_coord_origin_upper_left :1; 3605 bool fs_coord_origin_lower_left :1; 3606 bool fs_coord_pixel_center_integer :1; 3607 bool fs_coord_pixel_center_half_integer :1; 3608} nir_lower_wpos_ytransform_options; 3609 3610bool nir_lower_wpos_ytransform(nir_shader *shader, 3611 const nir_lower_wpos_ytransform_options *options); 3612bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading); 3613 3614bool nir_lower_fb_read(nir_shader *shader); 3615 3616typedef struct nir_lower_drawpixels_options { 3617 gl_state_index16 texcoord_state_tokens[STATE_LENGTH]; 3618 gl_state_index16 scale_state_tokens[STATE_LENGTH]; 3619 gl_state_index16 bias_state_tokens[STATE_LENGTH]; 3620 unsigned drawpix_sampler; 3621 unsigned pixelmap_sampler; 3622 bool pixel_maps :1; 3623 bool scale_and_bias :1; 3624} nir_lower_drawpixels_options; 3625 3626void nir_lower_drawpixels(nir_shader *shader, 3627 const nir_lower_drawpixels_options *options); 3628 3629typedef struct nir_lower_bitmap_options { 3630 unsigned sampler; 3631 bool swizzle_xxxx; 3632} nir_lower_bitmap_options; 3633 3634void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options); 3635 3636bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset); 3637 3638typedef enum { 3639 nir_lower_int_source_mods = 1 << 0, 3640 nir_lower_float_source_mods = 1 << 1, 3641 nir_lower_triop_abs = 1 << 2, 3642 nir_lower_all_source_mods = (1 << 3) - 1 3643} nir_lower_to_source_mods_flags; 3644 3645 3646bool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options); 3647 3648bool nir_lower_gs_intrinsics(nir_shader *shader); 3649 3650typedef unsigned (*nir_lower_bit_size_callback)(const nir_alu_instr *, void *); 3651 3652bool nir_lower_bit_size(nir_shader *shader, 3653 nir_lower_bit_size_callback callback, 3654 void *callback_data); 3655 3656nir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode); 3657bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options options); 3658 3659nir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode); 3660bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64, 3661 nir_lower_doubles_options options); 3662bool nir_lower_pack(nir_shader *shader); 3663 3664bool nir_normalize_cubemap_coords(nir_shader *shader); 3665 3666void nir_live_ssa_defs_impl(nir_function_impl *impl); 3667 3668void nir_loop_analyze_impl(nir_function_impl *impl, 3669 nir_variable_mode indirect_mask); 3670 3671bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); 3672 3673bool nir_repair_ssa_impl(nir_function_impl *impl); 3674bool nir_repair_ssa(nir_shader *shader); 3675 3676void nir_convert_loop_to_lcssa(nir_loop *loop); 3677 3678/* If phi_webs_only is true, only convert SSA values involved in phi nodes to 3679 * registers. If false, convert all values (even those not involved in a phi 3680 * node) to registers. 3681 */ 3682bool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only); 3683 3684bool nir_lower_phis_to_regs_block(nir_block *block); 3685bool nir_lower_ssa_defs_to_regs_block(nir_block *block); 3686bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl); 3687 3688/* This is here for unit tests. */ 3689bool nir_opt_comparison_pre_impl(nir_function_impl *impl); 3690 3691bool nir_opt_comparison_pre(nir_shader *shader); 3692 3693bool nir_opt_algebraic(nir_shader *shader); 3694bool nir_opt_algebraic_before_ffma(nir_shader *shader); 3695bool nir_opt_algebraic_late(nir_shader *shader); 3696bool nir_opt_constant_folding(nir_shader *shader); 3697 3698bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes); 3699 3700bool nir_copy_prop(nir_shader *shader); 3701 3702bool nir_opt_copy_prop_vars(nir_shader *shader); 3703 3704bool nir_opt_cse(nir_shader *shader); 3705 3706bool nir_opt_dce(nir_shader *shader); 3707 3708bool nir_opt_dead_cf(nir_shader *shader); 3709 3710bool nir_opt_dead_write_vars(nir_shader *shader); 3711 3712bool nir_opt_deref_impl(nir_function_impl *impl); 3713bool nir_opt_deref(nir_shader *shader); 3714 3715bool nir_opt_find_array_copies(nir_shader *shader); 3716 3717bool nir_opt_gcm(nir_shader *shader, bool value_number); 3718 3719bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size); 3720 3721bool nir_opt_if(nir_shader *shader, bool aggressive_last_continue); 3722 3723bool nir_opt_intrinsics(nir_shader *shader); 3724 3725bool nir_opt_large_constants(nir_shader *shader, 3726 glsl_type_size_align_func size_align, 3727 unsigned threshold); 3728 3729bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask); 3730 3731bool nir_opt_move_comparisons(nir_shader *shader); 3732 3733bool nir_opt_move_load_ubo(nir_shader *shader); 3734 3735bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, 3736 bool indirect_load_ok, bool expensive_alu_ok); 3737 3738bool nir_opt_remove_phis(nir_shader *shader); 3739bool nir_opt_remove_phis_block(nir_block *block); 3740 3741bool nir_opt_shrink_load(nir_shader *shader); 3742 3743bool nir_opt_trivial_continues(nir_shader *shader); 3744 3745bool nir_opt_undef(nir_shader *shader); 3746 3747bool nir_opt_conditional_discard(nir_shader *shader); 3748 3749void nir_strip(nir_shader *shader); 3750 3751void nir_sweep(nir_shader *shader); 3752 3753void nir_remap_dual_slot_attributes(nir_shader *shader, 3754 uint64_t *dual_slot_inputs); 3755uint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot); 3756 3757nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val); 3758gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin); 3759 3760#ifdef __cplusplus 3761} /* extern "C" */ 3762#endif 3763 3764#endif /* NIR_H */ 3765