1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * 26 */ 27 28#include "nir.h" 29#include "nir_builder.h" 30#include "nir_control_flow_private.h" 31#include "nir_worklist.h" 32#include "util/half_float.h" 33#include <limits.h> 34#include <assert.h> 35#include <math.h> 36#include "util/u_math.h" 37#include "util/u_qsort.h" 38 39#include "main/menums.h" /* BITFIELD64_MASK */ 40 41 42/** Return true if the component mask "mask" with bit size "old_bit_size" can 43 * be re-interpreted to be used with "new_bit_size". 44 */ 45bool 46nir_component_mask_can_reinterpret(nir_component_mask_t mask, 47 unsigned old_bit_size, 48 unsigned new_bit_size) 49{ 50 assert(util_is_power_of_two_nonzero(old_bit_size)); 51 assert(util_is_power_of_two_nonzero(new_bit_size)); 52 53 if (old_bit_size == new_bit_size) 54 return true; 55 56 if (old_bit_size == 1 || new_bit_size == 1) 57 return false; 58 59 if (old_bit_size > new_bit_size) { 60 unsigned ratio = old_bit_size / new_bit_size; 61 return util_last_bit(mask) * ratio <= NIR_MAX_VEC_COMPONENTS; 62 } 63 64 unsigned iter = mask; 65 while (iter) { 66 int start, count; 67 u_bit_scan_consecutive_range(&iter, &start, &count); 68 start *= old_bit_size; 69 count *= old_bit_size; 70 if (start % new_bit_size != 0) 71 return false; 72 if (count % new_bit_size != 0) 73 return false; 74 } 75 return true; 76} 77 78/** Re-interprets a component mask "mask" with bit size "old_bit_size" so that 79 * it can be used can be used with "new_bit_size". 80 */ 81nir_component_mask_t 82nir_component_mask_reinterpret(nir_component_mask_t mask, 83 unsigned old_bit_size, 84 unsigned new_bit_size) 85{ 86 assert(nir_component_mask_can_reinterpret(mask, old_bit_size, new_bit_size)); 87 88 if (old_bit_size == new_bit_size) 89 return mask; 90 91 nir_component_mask_t new_mask = 0; 92 unsigned iter = mask; 93 while (iter) { 94 int start, count; 95 u_bit_scan_consecutive_range(&iter, &start, &count); 96 start = start * old_bit_size / new_bit_size; 97 count = count * old_bit_size / new_bit_size; 98 new_mask |= BITFIELD_RANGE(start, count); 99 } 100 return new_mask; 101} 102 103static void 104nir_shader_destructor(void *ptr) 105{ 106 nir_shader *shader = ptr; 107 108 /* Free all instrs from the shader, since they're not ralloced. */ 109 list_for_each_entry_safe(nir_instr, instr, &shader->gc_list, gc_node) { 110 nir_instr_free(instr); 111 } 112} 113 114nir_shader * 115nir_shader_create(void *mem_ctx, 116 gl_shader_stage stage, 117 const nir_shader_compiler_options *options, 118 shader_info *si) 119{ 120 nir_shader *shader = rzalloc(mem_ctx, nir_shader); 121 ralloc_set_destructor(shader, nir_shader_destructor); 122 123 exec_list_make_empty(&shader->variables); 124 125 shader->options = options; 126 127 if (si) { 128 assert(si->stage == stage); 129 shader->info = *si; 130 } else { 131 shader->info.stage = stage; 132 } 133 134 exec_list_make_empty(&shader->functions); 135 136 list_inithead(&shader->gc_list); 137 138 shader->num_inputs = 0; 139 shader->num_outputs = 0; 140 shader->num_uniforms = 0; 141 142 return shader; 143} 144 145static nir_register * 146reg_create(void *mem_ctx, struct exec_list *list) 147{ 148 nir_register *reg = ralloc(mem_ctx, nir_register); 149 150 list_inithead(®->uses); 151 list_inithead(®->defs); 152 list_inithead(®->if_uses); 153 154 reg->num_components = 0; 155 reg->bit_size = 32; 156 reg->num_array_elems = 0; 157 reg->divergent = false; 158 159 exec_list_push_tail(list, ®->node); 160 161 return reg; 162} 163 164nir_register * 165nir_local_reg_create(nir_function_impl *impl) 166{ 167 nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers); 168 reg->index = impl->reg_alloc++; 169 170 return reg; 171} 172 173void 174nir_reg_remove(nir_register *reg) 175{ 176 exec_node_remove(®->node); 177} 178 179void 180nir_shader_add_variable(nir_shader *shader, nir_variable *var) 181{ 182 switch (var->data.mode) { 183 case nir_var_function_temp: 184 assert(!"nir_shader_add_variable cannot be used for local variables"); 185 return; 186 187 case nir_var_shader_temp: 188 case nir_var_shader_in: 189 case nir_var_shader_out: 190 case nir_var_uniform: 191 case nir_var_mem_ubo: 192 case nir_var_mem_ssbo: 193 case nir_var_mem_shared: 194 case nir_var_system_value: 195 case nir_var_mem_push_const: 196 case nir_var_mem_constant: 197 case nir_var_shader_call_data: 198 case nir_var_ray_hit_attrib: 199 break; 200 201 case nir_var_mem_global: 202 assert(!"nir_shader_add_variable cannot be used for global memory"); 203 return; 204 205 default: 206 assert(!"invalid mode"); 207 return; 208 } 209 210 exec_list_push_tail(&shader->variables, &var->node); 211} 212 213nir_variable * 214nir_variable_create(nir_shader *shader, nir_variable_mode mode, 215 const struct glsl_type *type, const char *name) 216{ 217 nir_variable *var = rzalloc(shader, nir_variable); 218 var->name = ralloc_strdup(var, name); 219 var->type = type; 220 var->data.mode = mode; 221 var->data.how_declared = nir_var_declared_normally; 222 223 if ((mode == nir_var_shader_in && 224 shader->info.stage != MESA_SHADER_VERTEX && 225 shader->info.stage != MESA_SHADER_KERNEL) || 226 (mode == nir_var_shader_out && 227 shader->info.stage != MESA_SHADER_FRAGMENT)) 228 var->data.interpolation = INTERP_MODE_SMOOTH; 229 230 if (mode == nir_var_shader_in || mode == nir_var_uniform) 231 var->data.read_only = true; 232 233 nir_shader_add_variable(shader, var); 234 235 return var; 236} 237 238nir_variable * 239nir_local_variable_create(nir_function_impl *impl, 240 const struct glsl_type *type, const char *name) 241{ 242 nir_variable *var = rzalloc(impl->function->shader, nir_variable); 243 var->name = ralloc_strdup(var, name); 244 var->type = type; 245 var->data.mode = nir_var_function_temp; 246 247 nir_function_impl_add_variable(impl, var); 248 249 return var; 250} 251 252nir_variable * 253nir_find_variable_with_location(nir_shader *shader, 254 nir_variable_mode mode, 255 unsigned location) 256{ 257 assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp); 258 nir_foreach_variable_with_modes(var, shader, mode) { 259 if (var->data.location == location) 260 return var; 261 } 262 return NULL; 263} 264 265nir_variable * 266nir_find_variable_with_driver_location(nir_shader *shader, 267 nir_variable_mode mode, 268 unsigned location) 269{ 270 assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp); 271 nir_foreach_variable_with_modes(var, shader, mode) { 272 if (var->data.driver_location == location) 273 return var; 274 } 275 return NULL; 276} 277 278/* Annoyingly, qsort_r is not in the C standard library and, in particular, we 279 * can't count on it on MSV and Android. So we stuff the CMP function into 280 * each array element. It's a bit messy and burns more memory but the list of 281 * variables should hever be all that long. 282 */ 283struct var_cmp { 284 nir_variable *var; 285 int (*cmp)(const nir_variable *, const nir_variable *); 286}; 287 288static int 289var_sort_cmp(const void *_a, const void *_b, void *_cmp) 290{ 291 const struct var_cmp *a = _a; 292 const struct var_cmp *b = _b; 293 assert(a->cmp == b->cmp); 294 return a->cmp(a->var, b->var); 295} 296 297void 298nir_sort_variables_with_modes(nir_shader *shader, 299 int (*cmp)(const nir_variable *, 300 const nir_variable *), 301 nir_variable_mode modes) 302{ 303 unsigned num_vars = 0; 304 nir_foreach_variable_with_modes(var, shader, modes) { 305 ++num_vars; 306 } 307 struct var_cmp *vars = ralloc_array(shader, struct var_cmp, num_vars); 308 unsigned i = 0; 309 nir_foreach_variable_with_modes_safe(var, shader, modes) { 310 exec_node_remove(&var->node); 311 vars[i++] = (struct var_cmp){ 312 .var = var, 313 .cmp = cmp, 314 }; 315 } 316 assert(i == num_vars); 317 318 util_qsort_r(vars, num_vars, sizeof(*vars), var_sort_cmp, cmp); 319 320 for (i = 0; i < num_vars; i++) 321 exec_list_push_tail(&shader->variables, &vars[i].var->node); 322 323 ralloc_free(vars); 324} 325 326nir_function * 327nir_function_create(nir_shader *shader, const char *name) 328{ 329 nir_function *func = ralloc(shader, nir_function); 330 331 exec_list_push_tail(&shader->functions, &func->node); 332 333 func->name = ralloc_strdup(func, name); 334 func->shader = shader; 335 func->num_params = 0; 336 func->params = NULL; 337 func->impl = NULL; 338 func->is_entrypoint = false; 339 340 return func; 341} 342 343static bool src_has_indirect(nir_src *src) 344{ 345 return !src->is_ssa && src->reg.indirect; 346} 347 348static void src_free_indirects(nir_src *src) 349{ 350 if (src_has_indirect(src)) { 351 assert(src->reg.indirect->is_ssa || !src->reg.indirect->reg.indirect); 352 free(src->reg.indirect); 353 src->reg.indirect = NULL; 354 } 355} 356 357static void dest_free_indirects(nir_dest *dest) 358{ 359 if (!dest->is_ssa && dest->reg.indirect) { 360 assert(dest->reg.indirect->is_ssa || !dest->reg.indirect->reg.indirect); 361 free(dest->reg.indirect); 362 dest->reg.indirect = NULL; 363 } 364} 365 366/* NOTE: if the instruction you are copying a src to is already added 367 * to the IR, use nir_instr_rewrite_src() instead. 368 */ 369void nir_src_copy(nir_src *dest, const nir_src *src) 370{ 371 src_free_indirects(dest); 372 373 dest->is_ssa = src->is_ssa; 374 if (src->is_ssa) { 375 dest->ssa = src->ssa; 376 } else { 377 dest->reg.base_offset = src->reg.base_offset; 378 dest->reg.reg = src->reg.reg; 379 if (src->reg.indirect) { 380 dest->reg.indirect = calloc(1, sizeof(nir_src)); 381 nir_src_copy(dest->reg.indirect, src->reg.indirect); 382 } else { 383 dest->reg.indirect = NULL; 384 } 385 } 386} 387 388void nir_dest_copy(nir_dest *dest, const nir_dest *src) 389{ 390 /* Copying an SSA definition makes no sense whatsoever. */ 391 assert(!src->is_ssa); 392 393 dest_free_indirects(dest); 394 395 dest->is_ssa = false; 396 397 dest->reg.base_offset = src->reg.base_offset; 398 dest->reg.reg = src->reg.reg; 399 if (src->reg.indirect) { 400 dest->reg.indirect = calloc(1, sizeof(nir_src)); 401 nir_src_copy(dest->reg.indirect, src->reg.indirect); 402 } else { 403 dest->reg.indirect = NULL; 404 } 405} 406 407void 408nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src) 409{ 410 nir_src_copy(&dest->src, &src->src); 411 dest->abs = src->abs; 412 dest->negate = src->negate; 413 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) 414 dest->swizzle[i] = src->swizzle[i]; 415} 416 417void 418nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src) 419{ 420 nir_dest_copy(&dest->dest, &src->dest); 421 dest->write_mask = src->write_mask; 422 dest->saturate = src->saturate; 423} 424 425bool 426nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn) 427{ 428 static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; 429 STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS); 430 431 const nir_alu_src *src = &alu->src[srcn]; 432 unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn); 433 434 return src->src.is_ssa && (src->src.ssa->num_components == num_components) && 435 !src->abs && !src->negate && 436 (memcmp(src->swizzle, trivial_swizzle, num_components) == 0); 437} 438 439 440static void 441cf_init(nir_cf_node *node, nir_cf_node_type type) 442{ 443 exec_node_init(&node->node); 444 node->parent = NULL; 445 node->type = type; 446} 447 448nir_function_impl * 449nir_function_impl_create_bare(nir_shader *shader) 450{ 451 nir_function_impl *impl = ralloc(shader, nir_function_impl); 452 453 impl->function = NULL; 454 455 cf_init(&impl->cf_node, nir_cf_node_function); 456 457 exec_list_make_empty(&impl->body); 458 exec_list_make_empty(&impl->registers); 459 exec_list_make_empty(&impl->locals); 460 impl->reg_alloc = 0; 461 impl->ssa_alloc = 0; 462 impl->num_blocks = 0; 463 impl->valid_metadata = nir_metadata_none; 464 impl->structured = true; 465 466 /* create start & end blocks */ 467 nir_block *start_block = nir_block_create(shader); 468 nir_block *end_block = nir_block_create(shader); 469 start_block->cf_node.parent = &impl->cf_node; 470 end_block->cf_node.parent = &impl->cf_node; 471 impl->end_block = end_block; 472 473 exec_list_push_tail(&impl->body, &start_block->cf_node.node); 474 475 start_block->successors[0] = end_block; 476 _mesa_set_add(end_block->predecessors, start_block); 477 return impl; 478} 479 480nir_function_impl * 481nir_function_impl_create(nir_function *function) 482{ 483 assert(function->impl == NULL); 484 485 nir_function_impl *impl = nir_function_impl_create_bare(function->shader); 486 487 function->impl = impl; 488 impl->function = function; 489 490 return impl; 491} 492 493nir_block * 494nir_block_create(nir_shader *shader) 495{ 496 nir_block *block = rzalloc(shader, nir_block); 497 498 cf_init(&block->cf_node, nir_cf_node_block); 499 500 block->successors[0] = block->successors[1] = NULL; 501 block->predecessors = _mesa_pointer_set_create(block); 502 block->imm_dom = NULL; 503 /* XXX maybe it would be worth it to defer allocation? This 504 * way it doesn't get allocated for shader refs that never run 505 * nir_calc_dominance? For example, state-tracker creates an 506 * initial IR, clones that, runs appropriate lowering pass, passes 507 * to driver which does common lowering/opt, and then stores ref 508 * which is later used to do state specific lowering and futher 509 * opt. Do any of the references not need dominance metadata? 510 */ 511 block->dom_frontier = _mesa_pointer_set_create(block); 512 513 exec_list_make_empty(&block->instr_list); 514 515 return block; 516} 517 518static inline void 519src_init(nir_src *src) 520{ 521 src->is_ssa = false; 522 src->reg.reg = NULL; 523 src->reg.indirect = NULL; 524 src->reg.base_offset = 0; 525} 526 527nir_if * 528nir_if_create(nir_shader *shader) 529{ 530 nir_if *if_stmt = ralloc(shader, nir_if); 531 532 if_stmt->control = nir_selection_control_none; 533 534 cf_init(&if_stmt->cf_node, nir_cf_node_if); 535 src_init(&if_stmt->condition); 536 537 nir_block *then = nir_block_create(shader); 538 exec_list_make_empty(&if_stmt->then_list); 539 exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node); 540 then->cf_node.parent = &if_stmt->cf_node; 541 542 nir_block *else_stmt = nir_block_create(shader); 543 exec_list_make_empty(&if_stmt->else_list); 544 exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node); 545 else_stmt->cf_node.parent = &if_stmt->cf_node; 546 547 return if_stmt; 548} 549 550nir_loop * 551nir_loop_create(nir_shader *shader) 552{ 553 nir_loop *loop = rzalloc(shader, nir_loop); 554 555 cf_init(&loop->cf_node, nir_cf_node_loop); 556 /* Assume that loops are divergent until proven otherwise */ 557 loop->divergent = true; 558 559 nir_block *body = nir_block_create(shader); 560 exec_list_make_empty(&loop->body); 561 exec_list_push_tail(&loop->body, &body->cf_node.node); 562 body->cf_node.parent = &loop->cf_node; 563 564 body->successors[0] = body; 565 _mesa_set_add(body->predecessors, body); 566 567 return loop; 568} 569 570static void 571instr_init(nir_instr *instr, nir_instr_type type) 572{ 573 instr->type = type; 574 instr->block = NULL; 575 exec_node_init(&instr->node); 576} 577 578static void 579dest_init(nir_dest *dest) 580{ 581 dest->is_ssa = false; 582 dest->reg.reg = NULL; 583 dest->reg.indirect = NULL; 584 dest->reg.base_offset = 0; 585} 586 587static void 588alu_dest_init(nir_alu_dest *dest) 589{ 590 dest_init(&dest->dest); 591 dest->saturate = false; 592 dest->write_mask = 0xf; 593} 594 595static void 596alu_src_init(nir_alu_src *src) 597{ 598 src_init(&src->src); 599 src->abs = src->negate = false; 600 for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) 601 src->swizzle[i] = i; 602} 603 604nir_alu_instr * 605nir_alu_instr_create(nir_shader *shader, nir_op op) 606{ 607 unsigned num_srcs = nir_op_infos[op].num_inputs; 608 /* TODO: don't use calloc */ 609 nir_alu_instr *instr = calloc(1, sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); 610 611 instr_init(&instr->instr, nir_instr_type_alu); 612 instr->op = op; 613 alu_dest_init(&instr->dest); 614 for (unsigned i = 0; i < num_srcs; i++) 615 alu_src_init(&instr->src[i]); 616 617 list_add(&instr->instr.gc_node, &shader->gc_list); 618 619 return instr; 620} 621 622nir_deref_instr * 623nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type) 624{ 625 nir_deref_instr *instr = calloc(1, sizeof(*instr)); 626 627 instr_init(&instr->instr, nir_instr_type_deref); 628 629 instr->deref_type = deref_type; 630 if (deref_type != nir_deref_type_var) 631 src_init(&instr->parent); 632 633 if (deref_type == nir_deref_type_array || 634 deref_type == nir_deref_type_ptr_as_array) 635 src_init(&instr->arr.index); 636 637 dest_init(&instr->dest); 638 639 list_add(&instr->instr.gc_node, &shader->gc_list); 640 641 return instr; 642} 643 644nir_jump_instr * 645nir_jump_instr_create(nir_shader *shader, nir_jump_type type) 646{ 647 nir_jump_instr *instr = malloc(sizeof(*instr)); 648 instr_init(&instr->instr, nir_instr_type_jump); 649 src_init(&instr->condition); 650 instr->type = type; 651 instr->target = NULL; 652 instr->else_target = NULL; 653 654 list_add(&instr->instr.gc_node, &shader->gc_list); 655 656 return instr; 657} 658 659nir_load_const_instr * 660nir_load_const_instr_create(nir_shader *shader, unsigned num_components, 661 unsigned bit_size) 662{ 663 nir_load_const_instr *instr = 664 calloc(1, sizeof(*instr) + num_components * sizeof(*instr->value)); 665 instr_init(&instr->instr, nir_instr_type_load_const); 666 667 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size); 668 669 list_add(&instr->instr.gc_node, &shader->gc_list); 670 671 return instr; 672} 673 674nir_intrinsic_instr * 675nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) 676{ 677 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; 678 /* TODO: don't use calloc */ 679 nir_intrinsic_instr *instr = 680 calloc(1, sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src)); 681 682 instr_init(&instr->instr, nir_instr_type_intrinsic); 683 instr->intrinsic = op; 684 685 if (nir_intrinsic_infos[op].has_dest) 686 dest_init(&instr->dest); 687 688 for (unsigned i = 0; i < num_srcs; i++) 689 src_init(&instr->src[i]); 690 691 list_add(&instr->instr.gc_node, &shader->gc_list); 692 693 return instr; 694} 695 696nir_call_instr * 697nir_call_instr_create(nir_shader *shader, nir_function *callee) 698{ 699 const unsigned num_params = callee->num_params; 700 nir_call_instr *instr = 701 calloc(1, sizeof(*instr) + num_params * sizeof(instr->params[0])); 702 703 instr_init(&instr->instr, nir_instr_type_call); 704 instr->callee = callee; 705 instr->num_params = num_params; 706 for (unsigned i = 0; i < num_params; i++) 707 src_init(&instr->params[i]); 708 709 list_add(&instr->instr.gc_node, &shader->gc_list); 710 711 return instr; 712} 713 714static int8_t default_tg4_offsets[4][2] = 715{ 716 { 0, 1 }, 717 { 1, 1 }, 718 { 1, 0 }, 719 { 0, 0 }, 720}; 721 722nir_tex_instr * 723nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) 724{ 725 nir_tex_instr *instr = calloc(1, sizeof(*instr)); 726 instr_init(&instr->instr, nir_instr_type_tex); 727 728 dest_init(&instr->dest); 729 730 instr->num_srcs = num_srcs; 731 instr->src = malloc(sizeof(nir_tex_src) * num_srcs); 732 for (unsigned i = 0; i < num_srcs; i++) 733 src_init(&instr->src[i].src); 734 735 instr->texture_index = 0; 736 instr->sampler_index = 0; 737 memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets)); 738 739 list_add(&instr->instr.gc_node, &shader->gc_list); 740 741 return instr; 742} 743 744void 745nir_tex_instr_add_src(nir_tex_instr *tex, 746 nir_tex_src_type src_type, 747 nir_src src) 748{ 749 nir_tex_src *new_srcs = calloc(sizeof(*new_srcs), 750 tex->num_srcs + 1); 751 752 for (unsigned i = 0; i < tex->num_srcs; i++) { 753 new_srcs[i].src_type = tex->src[i].src_type; 754 nir_instr_move_src(&tex->instr, &new_srcs[i].src, 755 &tex->src[i].src); 756 } 757 758 free(tex->src); 759 tex->src = new_srcs; 760 761 tex->src[tex->num_srcs].src_type = src_type; 762 nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src); 763 tex->num_srcs++; 764} 765 766void 767nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx) 768{ 769 assert(src_idx < tex->num_srcs); 770 771 /* First rewrite the source to NIR_SRC_INIT */ 772 nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT); 773 774 /* Now, move all of the other sources down */ 775 for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) { 776 tex->src[i-1].src_type = tex->src[i].src_type; 777 nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src); 778 } 779 tex->num_srcs--; 780} 781 782bool 783nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex) 784{ 785 if (tex->op != nir_texop_tg4) 786 return false; 787 return memcmp(tex->tg4_offsets, default_tg4_offsets, 788 sizeof(tex->tg4_offsets)) != 0; 789} 790 791nir_phi_instr * 792nir_phi_instr_create(nir_shader *shader) 793{ 794 nir_phi_instr *instr = malloc(sizeof(*instr)); 795 instr_init(&instr->instr, nir_instr_type_phi); 796 797 dest_init(&instr->dest); 798 exec_list_make_empty(&instr->srcs); 799 800 list_add(&instr->instr.gc_node, &shader->gc_list); 801 802 return instr; 803} 804 805/** 806 * Adds a new source to a NIR instruction. 807 * 808 * Note that this does not update the def/use relationship for src, assuming 809 * that the instr is not in the shader. If it is, you have to do: 810 * 811 * list_addtail(&phi_src->src.use_link, &src.ssa->uses); 812 */ 813nir_phi_src * 814nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src) 815{ 816 nir_phi_src *phi_src; 817 818 phi_src = calloc(1, sizeof(nir_phi_src)); 819 phi_src->pred = pred; 820 phi_src->src = src; 821 phi_src->src.parent_instr = &instr->instr; 822 exec_list_push_tail(&instr->srcs, &phi_src->node); 823 824 return phi_src; 825} 826 827nir_parallel_copy_instr * 828nir_parallel_copy_instr_create(nir_shader *shader) 829{ 830 nir_parallel_copy_instr *instr = malloc(sizeof(*instr)); 831 instr_init(&instr->instr, nir_instr_type_parallel_copy); 832 833 exec_list_make_empty(&instr->entries); 834 835 list_add(&instr->instr.gc_node, &shader->gc_list); 836 837 return instr; 838} 839 840nir_ssa_undef_instr * 841nir_ssa_undef_instr_create(nir_shader *shader, 842 unsigned num_components, 843 unsigned bit_size) 844{ 845 nir_ssa_undef_instr *instr = malloc(sizeof(*instr)); 846 instr_init(&instr->instr, nir_instr_type_ssa_undef); 847 848 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size); 849 850 list_add(&instr->instr.gc_node, &shader->gc_list); 851 852 return instr; 853} 854 855static nir_const_value 856const_value_float(double d, unsigned bit_size) 857{ 858 nir_const_value v; 859 memset(&v, 0, sizeof(v)); 860 switch (bit_size) { 861 case 16: v.u16 = _mesa_float_to_half(d); break; 862 case 32: v.f32 = d; break; 863 case 64: v.f64 = d; break; 864 default: 865 unreachable("Invalid bit size"); 866 } 867 return v; 868} 869 870static nir_const_value 871const_value_int(int64_t i, unsigned bit_size) 872{ 873 nir_const_value v; 874 memset(&v, 0, sizeof(v)); 875 switch (bit_size) { 876 case 1: v.b = i & 1; break; 877 case 8: v.i8 = i; break; 878 case 16: v.i16 = i; break; 879 case 32: v.i32 = i; break; 880 case 64: v.i64 = i; break; 881 default: 882 unreachable("Invalid bit size"); 883 } 884 return v; 885} 886 887nir_const_value 888nir_alu_binop_identity(nir_op binop, unsigned bit_size) 889{ 890 const int64_t max_int = (1ull << (bit_size - 1)) - 1; 891 const int64_t min_int = -max_int - 1; 892 switch (binop) { 893 case nir_op_iadd: 894 return const_value_int(0, bit_size); 895 case nir_op_fadd: 896 return const_value_float(0, bit_size); 897 case nir_op_imul: 898 return const_value_int(1, bit_size); 899 case nir_op_fmul: 900 return const_value_float(1, bit_size); 901 case nir_op_imin: 902 return const_value_int(max_int, bit_size); 903 case nir_op_umin: 904 return const_value_int(~0ull, bit_size); 905 case nir_op_fmin: 906 return const_value_float(INFINITY, bit_size); 907 case nir_op_imax: 908 return const_value_int(min_int, bit_size); 909 case nir_op_umax: 910 return const_value_int(0, bit_size); 911 case nir_op_fmax: 912 return const_value_float(-INFINITY, bit_size); 913 case nir_op_iand: 914 return const_value_int(~0ull, bit_size); 915 case nir_op_ior: 916 return const_value_int(0, bit_size); 917 case nir_op_ixor: 918 return const_value_int(0, bit_size); 919 default: 920 unreachable("Invalid reduction operation"); 921 } 922} 923 924nir_function_impl * 925nir_cf_node_get_function(nir_cf_node *node) 926{ 927 while (node->type != nir_cf_node_function) { 928 node = node->parent; 929 } 930 931 return nir_cf_node_as_function(node); 932} 933 934/* Reduces a cursor by trying to convert everything to after and trying to 935 * go up to block granularity when possible. 936 */ 937static nir_cursor 938reduce_cursor(nir_cursor cursor) 939{ 940 switch (cursor.option) { 941 case nir_cursor_before_block: 942 if (exec_list_is_empty(&cursor.block->instr_list)) { 943 /* Empty block. After is as good as before. */ 944 cursor.option = nir_cursor_after_block; 945 } 946 return cursor; 947 948 case nir_cursor_after_block: 949 return cursor; 950 951 case nir_cursor_before_instr: { 952 nir_instr *prev_instr = nir_instr_prev(cursor.instr); 953 if (prev_instr) { 954 /* Before this instruction is after the previous */ 955 cursor.instr = prev_instr; 956 cursor.option = nir_cursor_after_instr; 957 } else { 958 /* No previous instruction. Switch to before block */ 959 cursor.block = cursor.instr->block; 960 cursor.option = nir_cursor_before_block; 961 } 962 return reduce_cursor(cursor); 963 } 964 965 case nir_cursor_after_instr: 966 if (nir_instr_next(cursor.instr) == NULL) { 967 /* This is the last instruction, switch to after block */ 968 cursor.option = nir_cursor_after_block; 969 cursor.block = cursor.instr->block; 970 } 971 return cursor; 972 973 default: 974 unreachable("Inavlid cursor option"); 975 } 976} 977 978bool 979nir_cursors_equal(nir_cursor a, nir_cursor b) 980{ 981 /* Reduced cursors should be unique */ 982 a = reduce_cursor(a); 983 b = reduce_cursor(b); 984 985 return a.block == b.block && a.option == b.option; 986} 987 988static bool 989add_use_cb(nir_src *src, void *state) 990{ 991 nir_instr *instr = state; 992 993 src->parent_instr = instr; 994 list_addtail(&src->use_link, 995 src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses); 996 997 return true; 998} 999 1000static bool 1001add_ssa_def_cb(nir_ssa_def *def, void *state) 1002{ 1003 nir_instr *instr = state; 1004 1005 if (instr->block && def->index == UINT_MAX) { 1006 nir_function_impl *impl = 1007 nir_cf_node_get_function(&instr->block->cf_node); 1008 1009 def->index = impl->ssa_alloc++; 1010 1011 impl->valid_metadata &= ~nir_metadata_live_ssa_defs; 1012 } 1013 1014 return true; 1015} 1016 1017static bool 1018add_reg_def_cb(nir_dest *dest, void *state) 1019{ 1020 nir_instr *instr = state; 1021 1022 if (!dest->is_ssa) { 1023 dest->reg.parent_instr = instr; 1024 list_addtail(&dest->reg.def_link, &dest->reg.reg->defs); 1025 } 1026 1027 return true; 1028} 1029 1030static void 1031add_defs_uses(nir_instr *instr) 1032{ 1033 nir_foreach_src(instr, add_use_cb, instr); 1034 nir_foreach_dest(instr, add_reg_def_cb, instr); 1035 nir_foreach_ssa_def(instr, add_ssa_def_cb, instr); 1036} 1037 1038void 1039nir_instr_insert(nir_cursor cursor, nir_instr *instr) 1040{ 1041 switch (cursor.option) { 1042 case nir_cursor_before_block: 1043 /* Only allow inserting jumps into empty blocks. */ 1044 if (instr->type == nir_instr_type_jump) 1045 assert(exec_list_is_empty(&cursor.block->instr_list)); 1046 1047 instr->block = cursor.block; 1048 add_defs_uses(instr); 1049 exec_list_push_head(&cursor.block->instr_list, &instr->node); 1050 break; 1051 case nir_cursor_after_block: { 1052 /* Inserting instructions after a jump is illegal. */ 1053 nir_instr *last = nir_block_last_instr(cursor.block); 1054 assert(last == NULL || last->type != nir_instr_type_jump); 1055 (void) last; 1056 1057 instr->block = cursor.block; 1058 add_defs_uses(instr); 1059 exec_list_push_tail(&cursor.block->instr_list, &instr->node); 1060 break; 1061 } 1062 case nir_cursor_before_instr: 1063 assert(instr->type != nir_instr_type_jump); 1064 instr->block = cursor.instr->block; 1065 add_defs_uses(instr); 1066 exec_node_insert_node_before(&cursor.instr->node, &instr->node); 1067 break; 1068 case nir_cursor_after_instr: 1069 /* Inserting instructions after a jump is illegal. */ 1070 assert(cursor.instr->type != nir_instr_type_jump); 1071 1072 /* Only allow inserting jumps at the end of the block. */ 1073 if (instr->type == nir_instr_type_jump) 1074 assert(cursor.instr == nir_block_last_instr(cursor.instr->block)); 1075 1076 instr->block = cursor.instr->block; 1077 add_defs_uses(instr); 1078 exec_node_insert_after(&cursor.instr->node, &instr->node); 1079 break; 1080 } 1081 1082 if (instr->type == nir_instr_type_jump) 1083 nir_handle_add_jump(instr->block); 1084 1085 nir_function_impl *impl = nir_cf_node_get_function(&instr->block->cf_node); 1086 impl->valid_metadata &= ~nir_metadata_instr_index; 1087} 1088 1089bool 1090nir_instr_move(nir_cursor cursor, nir_instr *instr) 1091{ 1092 /* If the cursor happens to refer to this instruction (either before or 1093 * after), don't do anything. 1094 */ 1095 if ((cursor.option == nir_cursor_before_instr || 1096 cursor.option == nir_cursor_after_instr) && 1097 cursor.instr == instr) 1098 return false; 1099 1100 nir_instr_remove(instr); 1101 nir_instr_insert(cursor, instr); 1102 return true; 1103} 1104 1105static bool 1106src_is_valid(const nir_src *src) 1107{ 1108 return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL); 1109} 1110 1111static bool 1112remove_use_cb(nir_src *src, void *state) 1113{ 1114 (void) state; 1115 1116 if (src_is_valid(src)) 1117 list_del(&src->use_link); 1118 1119 return true; 1120} 1121 1122static bool 1123remove_def_cb(nir_dest *dest, void *state) 1124{ 1125 (void) state; 1126 1127 if (!dest->is_ssa) 1128 list_del(&dest->reg.def_link); 1129 1130 return true; 1131} 1132 1133static void 1134remove_defs_uses(nir_instr *instr) 1135{ 1136 nir_foreach_dest(instr, remove_def_cb, instr); 1137 nir_foreach_src(instr, remove_use_cb, instr); 1138} 1139 1140void nir_instr_remove_v(nir_instr *instr) 1141{ 1142 remove_defs_uses(instr); 1143 exec_node_remove(&instr->node); 1144 1145 if (instr->type == nir_instr_type_jump) { 1146 nir_jump_instr *jump_instr = nir_instr_as_jump(instr); 1147 nir_handle_remove_jump(instr->block, jump_instr->type); 1148 } 1149} 1150 1151static bool free_src_indirects_cb(nir_src *src, void *state) 1152{ 1153 src_free_indirects(src); 1154 return true; 1155} 1156 1157static bool free_dest_indirects_cb(nir_dest *dest, void *state) 1158{ 1159 dest_free_indirects(dest); 1160 return true; 1161} 1162 1163void nir_instr_free(nir_instr *instr) 1164{ 1165 nir_foreach_src(instr, free_src_indirects_cb, NULL); 1166 nir_foreach_dest(instr, free_dest_indirects_cb, NULL); 1167 1168 switch (instr->type) { 1169 case nir_instr_type_tex: 1170 free(nir_instr_as_tex(instr)->src); 1171 break; 1172 1173 case nir_instr_type_phi: { 1174 nir_phi_instr *phi = nir_instr_as_phi(instr); 1175 nir_foreach_phi_src_safe(phi_src, phi) { 1176 free(phi_src); 1177 } 1178 break; 1179 } 1180 1181 default: 1182 break; 1183 } 1184 1185 list_del(&instr->gc_node); 1186 free(instr); 1187} 1188 1189void 1190nir_instr_free_list(struct exec_list *list) 1191{ 1192 struct exec_node *node; 1193 while ((node = exec_list_pop_head(list))) { 1194 nir_instr *removed_instr = exec_node_data(nir_instr, node, node); 1195 nir_instr_free(removed_instr); 1196 } 1197} 1198 1199static bool nir_instr_free_and_dce_live_cb(nir_ssa_def *def, void *state) 1200{ 1201 bool *live = state; 1202 1203 if (!nir_ssa_def_is_unused(def)) { 1204 *live = true; 1205 return false; 1206 } else { 1207 return true; 1208 } 1209} 1210 1211static bool nir_instr_free_and_dce_is_live(nir_instr *instr) 1212{ 1213 /* Note: don't have to worry about jumps because they don't have dests to 1214 * become unused. 1215 */ 1216 if (instr->type == nir_instr_type_intrinsic) { 1217 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1218 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; 1219 if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE)) 1220 return true; 1221 } 1222 1223 bool live = false; 1224 nir_foreach_ssa_def(instr, nir_instr_free_and_dce_live_cb, &live); 1225 return live; 1226} 1227 1228static bool 1229nir_instr_dce_add_dead_srcs_cb(nir_src *src, void *state) 1230{ 1231 nir_instr_worklist *wl = state; 1232 1233 if (src->is_ssa) { 1234 list_del(&src->use_link); 1235 if (!nir_instr_free_and_dce_is_live(src->ssa->parent_instr)) 1236 nir_instr_worklist_push_tail(wl, src->ssa->parent_instr); 1237 1238 /* Stop nir_instr_remove from trying to delete the link again. */ 1239 src->ssa = NULL; 1240 } 1241 1242 return true; 1243} 1244 1245static void 1246nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist *wl, nir_instr *instr) 1247{ 1248 nir_foreach_src(instr, nir_instr_dce_add_dead_srcs_cb, wl); 1249} 1250 1251/** 1252 * Frees an instruction and any SSA defs that it used that are now dead, 1253 * returning a nir_cursor where the instruction previously was. 1254 */ 1255nir_cursor 1256nir_instr_free_and_dce(nir_instr *instr) 1257{ 1258 nir_instr_worklist *worklist = nir_instr_worklist_create(); 1259 1260 nir_instr_dce_add_dead_ssa_srcs(worklist, instr); 1261 nir_cursor c = nir_instr_remove(instr); 1262 1263 struct exec_list to_free; 1264 exec_list_make_empty(&to_free); 1265 1266 nir_instr *dce_instr; 1267 while ((dce_instr = nir_instr_worklist_pop_head(worklist))) { 1268 nir_instr_dce_add_dead_ssa_srcs(worklist, dce_instr); 1269 1270 /* If we're removing the instr where our cursor is, then we have to 1271 * point the cursor elsewhere. 1272 */ 1273 if ((c.option == nir_cursor_before_instr || 1274 c.option == nir_cursor_after_instr) && 1275 c.instr == dce_instr) 1276 c = nir_instr_remove(dce_instr); 1277 else 1278 nir_instr_remove(dce_instr); 1279 exec_list_push_tail(&to_free, &dce_instr->node); 1280 } 1281 1282 nir_instr_free_list(&to_free); 1283 1284 nir_instr_worklist_destroy(worklist); 1285 1286 return c; 1287} 1288 1289/*@}*/ 1290 1291void 1292nir_index_local_regs(nir_function_impl *impl) 1293{ 1294 unsigned index = 0; 1295 foreach_list_typed(nir_register, reg, node, &impl->registers) { 1296 reg->index = index++; 1297 } 1298 impl->reg_alloc = index; 1299} 1300 1301struct foreach_ssa_def_state { 1302 nir_foreach_ssa_def_cb cb; 1303 void *client_state; 1304}; 1305 1306static inline bool 1307nir_ssa_def_visitor(nir_dest *dest, void *void_state) 1308{ 1309 struct foreach_ssa_def_state *state = void_state; 1310 1311 if (dest->is_ssa) 1312 return state->cb(&dest->ssa, state->client_state); 1313 else 1314 return true; 1315} 1316 1317bool 1318nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state) 1319{ 1320 switch (instr->type) { 1321 case nir_instr_type_alu: 1322 case nir_instr_type_deref: 1323 case nir_instr_type_tex: 1324 case nir_instr_type_intrinsic: 1325 case nir_instr_type_phi: 1326 case nir_instr_type_parallel_copy: { 1327 struct foreach_ssa_def_state foreach_state = {cb, state}; 1328 return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state); 1329 } 1330 1331 case nir_instr_type_load_const: 1332 return cb(&nir_instr_as_load_const(instr)->def, state); 1333 case nir_instr_type_ssa_undef: 1334 return cb(&nir_instr_as_ssa_undef(instr)->def, state); 1335 case nir_instr_type_call: 1336 case nir_instr_type_jump: 1337 return true; 1338 default: 1339 unreachable("Invalid instruction type"); 1340 } 1341} 1342 1343nir_ssa_def * 1344nir_instr_ssa_def(nir_instr *instr) 1345{ 1346 switch (instr->type) { 1347 case nir_instr_type_alu: 1348 assert(nir_instr_as_alu(instr)->dest.dest.is_ssa); 1349 return &nir_instr_as_alu(instr)->dest.dest.ssa; 1350 1351 case nir_instr_type_deref: 1352 assert(nir_instr_as_deref(instr)->dest.is_ssa); 1353 return &nir_instr_as_deref(instr)->dest.ssa; 1354 1355 case nir_instr_type_tex: 1356 assert(nir_instr_as_tex(instr)->dest.is_ssa); 1357 return &nir_instr_as_tex(instr)->dest.ssa; 1358 1359 case nir_instr_type_intrinsic: { 1360 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1361 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) { 1362 assert(intrin->dest.is_ssa); 1363 return &intrin->dest.ssa; 1364 } else { 1365 return NULL; 1366 } 1367 } 1368 1369 case nir_instr_type_phi: 1370 assert(nir_instr_as_phi(instr)->dest.is_ssa); 1371 return &nir_instr_as_phi(instr)->dest.ssa; 1372 1373 case nir_instr_type_parallel_copy: 1374 unreachable("Parallel copies are unsupported by this function"); 1375 1376 case nir_instr_type_load_const: 1377 return &nir_instr_as_load_const(instr)->def; 1378 1379 case nir_instr_type_ssa_undef: 1380 return &nir_instr_as_ssa_undef(instr)->def; 1381 1382 case nir_instr_type_call: 1383 case nir_instr_type_jump: 1384 return NULL; 1385 } 1386 1387 unreachable("Invalid instruction type"); 1388} 1389 1390bool 1391nir_foreach_phi_src_leaving_block(nir_block *block, 1392 nir_foreach_src_cb cb, 1393 void *state) 1394{ 1395 for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) { 1396 if (block->successors[i] == NULL) 1397 continue; 1398 1399 nir_foreach_instr(instr, block->successors[i]) { 1400 if (instr->type != nir_instr_type_phi) 1401 break; 1402 1403 nir_phi_instr *phi = nir_instr_as_phi(instr); 1404 nir_foreach_phi_src(phi_src, phi) { 1405 if (phi_src->pred == block) { 1406 if (!cb(&phi_src->src, state)) 1407 return false; 1408 } 1409 } 1410 } 1411 } 1412 1413 return true; 1414} 1415 1416nir_const_value 1417nir_const_value_for_float(double f, unsigned bit_size) 1418{ 1419 nir_const_value v; 1420 memset(&v, 0, sizeof(v)); 1421 1422 switch (bit_size) { 1423 case 16: 1424 v.u16 = _mesa_float_to_half(f); 1425 break; 1426 case 32: 1427 v.f32 = f; 1428 break; 1429 case 64: 1430 v.f64 = f; 1431 break; 1432 default: 1433 unreachable("Invalid bit size"); 1434 } 1435 1436 return v; 1437} 1438 1439double 1440nir_const_value_as_float(nir_const_value value, unsigned bit_size) 1441{ 1442 switch (bit_size) { 1443 case 16: return _mesa_half_to_float(value.u16); 1444 case 32: return value.f32; 1445 case 64: return value.f64; 1446 default: 1447 unreachable("Invalid bit size"); 1448 } 1449} 1450 1451nir_const_value * 1452nir_src_as_const_value(nir_src src) 1453{ 1454 if (!src.is_ssa) 1455 return NULL; 1456 1457 if (src.ssa->parent_instr->type != nir_instr_type_load_const) 1458 return NULL; 1459 1460 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); 1461 1462 return load->value; 1463} 1464 1465/** 1466 * Returns true if the source is known to be dynamically uniform. Otherwise it 1467 * returns false which means it may or may not be dynamically uniform but it 1468 * can't be determined. 1469 */ 1470bool 1471nir_src_is_dynamically_uniform(nir_src src) 1472{ 1473 if (!src.is_ssa) 1474 return false; 1475 1476 /* Constants are trivially dynamically uniform */ 1477 if (src.ssa->parent_instr->type == nir_instr_type_load_const) 1478 return true; 1479 1480 if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) { 1481 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr); 1482 /* As are uniform variables */ 1483 if (intr->intrinsic == nir_intrinsic_load_uniform && 1484 nir_src_is_dynamically_uniform(intr->src[0])) 1485 return true; 1486 /* Push constant loads always use uniform offsets. */ 1487 if (intr->intrinsic == nir_intrinsic_load_push_constant) 1488 return true; 1489 if (intr->intrinsic == nir_intrinsic_load_deref && 1490 nir_deref_mode_is(nir_src_as_deref(intr->src[0]), nir_var_mem_push_const)) 1491 return true; 1492 } 1493 1494 /* Operating together dynamically uniform expressions produces a 1495 * dynamically uniform result 1496 */ 1497 if (src.ssa->parent_instr->type == nir_instr_type_alu) { 1498 nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr); 1499 for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 1500 if (!nir_src_is_dynamically_uniform(alu->src[i].src)) 1501 return false; 1502 } 1503 1504 return true; 1505 } 1506 1507 /* XXX: this could have many more tests, such as when a sampler function is 1508 * called with dynamically uniform arguments. 1509 */ 1510 return false; 1511} 1512 1513static void 1514src_remove_all_uses(nir_src *src) 1515{ 1516 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { 1517 if (!src_is_valid(src)) 1518 continue; 1519 1520 list_del(&src->use_link); 1521 } 1522} 1523 1524static void 1525src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) 1526{ 1527 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { 1528 if (!src_is_valid(src)) 1529 continue; 1530 1531 if (parent_instr) { 1532 src->parent_instr = parent_instr; 1533 if (src->is_ssa) 1534 list_addtail(&src->use_link, &src->ssa->uses); 1535 else 1536 list_addtail(&src->use_link, &src->reg.reg->uses); 1537 } else { 1538 assert(parent_if); 1539 src->parent_if = parent_if; 1540 if (src->is_ssa) 1541 list_addtail(&src->use_link, &src->ssa->if_uses); 1542 else 1543 list_addtail(&src->use_link, &src->reg.reg->if_uses); 1544 } 1545 } 1546} 1547 1548void 1549nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src) 1550{ 1551 assert(!src_is_valid(src) || src->parent_instr == instr); 1552 1553 src_remove_all_uses(src); 1554 nir_src_copy(src, &new_src); 1555 src_add_all_uses(src, instr, NULL); 1556} 1557 1558void 1559nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src) 1560{ 1561 assert(!src_is_valid(dest) || dest->parent_instr == dest_instr); 1562 1563 src_remove_all_uses(dest); 1564 src_free_indirects(dest); 1565 src_remove_all_uses(src); 1566 *dest = *src; 1567 *src = NIR_SRC_INIT; 1568 src_add_all_uses(dest, dest_instr, NULL); 1569} 1570 1571void 1572nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src) 1573{ 1574 nir_src *src = &if_stmt->condition; 1575 assert(!src_is_valid(src) || src->parent_if == if_stmt); 1576 1577 src_remove_all_uses(src); 1578 nir_src_copy(src, &new_src); 1579 src_add_all_uses(src, NULL, if_stmt); 1580} 1581 1582void 1583nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest) 1584{ 1585 if (dest->is_ssa) { 1586 /* We can only overwrite an SSA destination if it has no uses. */ 1587 assert(nir_ssa_def_is_unused(&dest->ssa)); 1588 } else { 1589 list_del(&dest->reg.def_link); 1590 if (dest->reg.indirect) 1591 src_remove_all_uses(dest->reg.indirect); 1592 } 1593 1594 /* We can't re-write with an SSA def */ 1595 assert(!new_dest.is_ssa); 1596 1597 nir_dest_copy(dest, &new_dest); 1598 1599 dest->reg.parent_instr = instr; 1600 list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs); 1601 1602 if (dest->reg.indirect) 1603 src_add_all_uses(dest->reg.indirect, instr, NULL); 1604} 1605 1606/* note: does *not* take ownership of 'name' */ 1607void 1608nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, 1609 unsigned num_components, 1610 unsigned bit_size) 1611{ 1612 def->parent_instr = instr; 1613 list_inithead(&def->uses); 1614 list_inithead(&def->if_uses); 1615 def->num_components = num_components; 1616 def->bit_size = bit_size; 1617 def->divergent = true; /* This is the safer default */ 1618 1619 if (instr->block) { 1620 nir_function_impl *impl = 1621 nir_cf_node_get_function(&instr->block->cf_node); 1622 1623 def->index = impl->ssa_alloc++; 1624 1625 impl->valid_metadata &= ~nir_metadata_live_ssa_defs; 1626 } else { 1627 def->index = UINT_MAX; 1628 } 1629} 1630 1631/* note: does *not* take ownership of 'name' */ 1632void 1633nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, 1634 unsigned num_components, unsigned bit_size, 1635 const char *name) 1636{ 1637 dest->is_ssa = true; 1638 nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size); 1639} 1640 1641void 1642nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa) 1643{ 1644 assert(def != new_ssa); 1645 nir_foreach_use_safe(use_src, def) 1646 nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, new_ssa); 1647 1648 nir_foreach_if_use_safe(use_src, def) 1649 nir_if_rewrite_condition_ssa(use_src->parent_if, use_src, new_ssa); 1650} 1651 1652void 1653nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src) 1654{ 1655 if (new_src.is_ssa) { 1656 nir_ssa_def_rewrite_uses(def, new_src.ssa); 1657 } else { 1658 nir_foreach_use_safe(use_src, def) 1659 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); 1660 1661 nir_foreach_if_use_safe(use_src, def) 1662 nir_if_rewrite_condition(use_src->parent_if, new_src); 1663 } 1664} 1665 1666static bool 1667is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between) 1668{ 1669 assert(start->block == end->block); 1670 1671 if (between->block != start->block) 1672 return false; 1673 1674 /* Search backwards looking for "between" */ 1675 while (start != end) { 1676 if (between == end) 1677 return true; 1678 1679 end = nir_instr_prev(end); 1680 assert(end); 1681 } 1682 1683 return false; 1684} 1685 1686/* Replaces all uses of the given SSA def with the given source but only if 1687 * the use comes after the after_me instruction. This can be useful if you 1688 * are emitting code to fix up the result of some instruction: you can freely 1689 * use the result in that code and then call rewrite_uses_after and pass the 1690 * last fixup instruction as after_me and it will replace all of the uses you 1691 * want without touching the fixup code. 1692 * 1693 * This function assumes that after_me is in the same block as 1694 * def->parent_instr and that after_me comes after def->parent_instr. 1695 */ 1696void 1697nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa, 1698 nir_instr *after_me) 1699{ 1700 if (def == new_ssa) 1701 return; 1702 1703 nir_foreach_use_safe(use_src, def) { 1704 assert(use_src->parent_instr != def->parent_instr); 1705 /* Since def already dominates all of its uses, the only way a use can 1706 * not be dominated by after_me is if it is between def and after_me in 1707 * the instruction list. 1708 */ 1709 if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr)) 1710 nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, new_ssa); 1711 } 1712 1713 nir_foreach_if_use_safe(use_src, def) { 1714 nir_if_rewrite_condition_ssa(use_src->parent_if, 1715 &use_src->parent_if->condition, 1716 new_ssa); 1717 } 1718} 1719 1720static nir_ssa_def * 1721get_store_value(nir_intrinsic_instr *intrin) 1722{ 1723 assert(nir_intrinsic_has_write_mask(intrin)); 1724 /* deref stores have the deref in src[0] and the store value in src[1] */ 1725 if (intrin->intrinsic == nir_intrinsic_store_deref || 1726 intrin->intrinsic == nir_intrinsic_store_deref_block_intel) 1727 return intrin->src[1].ssa; 1728 1729 /* all other stores have the store value in src[0] */ 1730 return intrin->src[0].ssa; 1731} 1732 1733nir_component_mask_t 1734nir_src_components_read(const nir_src *src) 1735{ 1736 assert(src->is_ssa && src->parent_instr); 1737 1738 if (src->parent_instr->type == nir_instr_type_alu) { 1739 nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr); 1740 nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src); 1741 int src_idx = alu_src - &alu->src[0]; 1742 assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs); 1743 return nir_alu_instr_src_read_mask(alu, src_idx); 1744 } else if (src->parent_instr->type == nir_instr_type_intrinsic) { 1745 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src->parent_instr); 1746 if (nir_intrinsic_has_write_mask(intrin) && src->ssa == get_store_value(intrin)) 1747 return nir_intrinsic_write_mask(intrin); 1748 else 1749 return (1 << src->ssa->num_components) - 1; 1750 } else { 1751 return (1 << src->ssa->num_components) - 1; 1752 } 1753} 1754 1755nir_component_mask_t 1756nir_ssa_def_components_read(const nir_ssa_def *def) 1757{ 1758 nir_component_mask_t read_mask = 0; 1759 1760 if (!list_is_empty(&def->if_uses)) 1761 read_mask |= 1; 1762 1763 nir_foreach_use(use, def) { 1764 read_mask |= nir_src_components_read(use); 1765 if (read_mask == (1 << def->num_components) - 1) 1766 return read_mask; 1767 } 1768 1769 return read_mask; 1770} 1771 1772nir_block * 1773nir_block_unstructured_next(nir_block *block) 1774{ 1775 if (block == NULL) { 1776 /* nir_foreach_block_unstructured_safe() will call this function on a 1777 * NULL block after the last iteration, but it won't use the result so 1778 * just return NULL here. 1779 */ 1780 return NULL; 1781 } 1782 1783 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node); 1784 if (cf_next == NULL && block->cf_node.parent->type == nir_cf_node_function) 1785 return NULL; 1786 1787 if (cf_next && cf_next->type == nir_cf_node_block) 1788 return nir_cf_node_as_block(cf_next); 1789 1790 return nir_block_cf_tree_next(block); 1791} 1792 1793nir_block * 1794nir_unstructured_start_block(nir_function_impl *impl) 1795{ 1796 return nir_start_block(impl); 1797} 1798 1799nir_block * 1800nir_block_cf_tree_next(nir_block *block) 1801{ 1802 if (block == NULL) { 1803 /* nir_foreach_block_safe() will call this function on a NULL block 1804 * after the last iteration, but it won't use the result so just return 1805 * NULL here. 1806 */ 1807 return NULL; 1808 } 1809 1810 assert(nir_cf_node_get_function(&block->cf_node)->structured); 1811 1812 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node); 1813 if (cf_next) 1814 return nir_cf_node_cf_tree_first(cf_next); 1815 1816 nir_cf_node *parent = block->cf_node.parent; 1817 1818 switch (parent->type) { 1819 case nir_cf_node_if: { 1820 /* Are we at the end of the if? Go to the beginning of the else */ 1821 nir_if *if_stmt = nir_cf_node_as_if(parent); 1822 if (block == nir_if_last_then_block(if_stmt)) 1823 return nir_if_first_else_block(if_stmt); 1824 1825 assert(block == nir_if_last_else_block(if_stmt)); 1826 } 1827 FALLTHROUGH; 1828 1829 case nir_cf_node_loop: 1830 return nir_cf_node_as_block(nir_cf_node_next(parent)); 1831 1832 case nir_cf_node_function: 1833 return NULL; 1834 1835 default: 1836 unreachable("unknown cf node type"); 1837 } 1838} 1839 1840nir_block * 1841nir_block_cf_tree_prev(nir_block *block) 1842{ 1843 if (block == NULL) { 1844 /* do this for consistency with nir_block_cf_tree_next() */ 1845 return NULL; 1846 } 1847 1848 assert(nir_cf_node_get_function(&block->cf_node)->structured); 1849 1850 nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node); 1851 if (cf_prev) 1852 return nir_cf_node_cf_tree_last(cf_prev); 1853 1854 nir_cf_node *parent = block->cf_node.parent; 1855 1856 switch (parent->type) { 1857 case nir_cf_node_if: { 1858 /* Are we at the beginning of the else? Go to the end of the if */ 1859 nir_if *if_stmt = nir_cf_node_as_if(parent); 1860 if (block == nir_if_first_else_block(if_stmt)) 1861 return nir_if_last_then_block(if_stmt); 1862 1863 assert(block == nir_if_first_then_block(if_stmt)); 1864 } 1865 FALLTHROUGH; 1866 1867 case nir_cf_node_loop: 1868 return nir_cf_node_as_block(nir_cf_node_prev(parent)); 1869 1870 case nir_cf_node_function: 1871 return NULL; 1872 1873 default: 1874 unreachable("unknown cf node type"); 1875 } 1876} 1877 1878nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node) 1879{ 1880 switch (node->type) { 1881 case nir_cf_node_function: { 1882 nir_function_impl *impl = nir_cf_node_as_function(node); 1883 return nir_start_block(impl); 1884 } 1885 1886 case nir_cf_node_if: { 1887 nir_if *if_stmt = nir_cf_node_as_if(node); 1888 return nir_if_first_then_block(if_stmt); 1889 } 1890 1891 case nir_cf_node_loop: { 1892 nir_loop *loop = nir_cf_node_as_loop(node); 1893 return nir_loop_first_block(loop); 1894 } 1895 1896 case nir_cf_node_block: { 1897 return nir_cf_node_as_block(node); 1898 } 1899 1900 default: 1901 unreachable("unknown node type"); 1902 } 1903} 1904 1905nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node) 1906{ 1907 switch (node->type) { 1908 case nir_cf_node_function: { 1909 nir_function_impl *impl = nir_cf_node_as_function(node); 1910 return nir_impl_last_block(impl); 1911 } 1912 1913 case nir_cf_node_if: { 1914 nir_if *if_stmt = nir_cf_node_as_if(node); 1915 return nir_if_last_else_block(if_stmt); 1916 } 1917 1918 case nir_cf_node_loop: { 1919 nir_loop *loop = nir_cf_node_as_loop(node); 1920 return nir_loop_last_block(loop); 1921 } 1922 1923 case nir_cf_node_block: { 1924 return nir_cf_node_as_block(node); 1925 } 1926 1927 default: 1928 unreachable("unknown node type"); 1929 } 1930} 1931 1932nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node) 1933{ 1934 if (node->type == nir_cf_node_block) 1935 return nir_block_cf_tree_next(nir_cf_node_as_block(node)); 1936 else if (node->type == nir_cf_node_function) 1937 return NULL; 1938 else 1939 return nir_cf_node_as_block(nir_cf_node_next(node)); 1940} 1941 1942nir_if * 1943nir_block_get_following_if(nir_block *block) 1944{ 1945 if (exec_node_is_tail_sentinel(&block->cf_node.node)) 1946 return NULL; 1947 1948 if (nir_cf_node_is_last(&block->cf_node)) 1949 return NULL; 1950 1951 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); 1952 1953 if (next_node->type != nir_cf_node_if) 1954 return NULL; 1955 1956 return nir_cf_node_as_if(next_node); 1957} 1958 1959nir_loop * 1960nir_block_get_following_loop(nir_block *block) 1961{ 1962 if (exec_node_is_tail_sentinel(&block->cf_node.node)) 1963 return NULL; 1964 1965 if (nir_cf_node_is_last(&block->cf_node)) 1966 return NULL; 1967 1968 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); 1969 1970 if (next_node->type != nir_cf_node_loop) 1971 return NULL; 1972 1973 return nir_cf_node_as_loop(next_node); 1974} 1975 1976static int 1977compare_block_index(const void *p1, const void *p2) 1978{ 1979 const nir_block *block1 = *((const nir_block **) p1); 1980 const nir_block *block2 = *((const nir_block **) p2); 1981 1982 return (int) block1->index - (int) block2->index; 1983} 1984 1985nir_block ** 1986nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx) 1987{ 1988 nir_block **preds = 1989 ralloc_array(mem_ctx, nir_block *, block->predecessors->entries); 1990 1991 unsigned i = 0; 1992 set_foreach(block->predecessors, entry) 1993 preds[i++] = (nir_block *) entry->key; 1994 assert(i == block->predecessors->entries); 1995 1996 qsort(preds, block->predecessors->entries, sizeof(nir_block *), 1997 compare_block_index); 1998 1999 return preds; 2000} 2001 2002void 2003nir_index_blocks(nir_function_impl *impl) 2004{ 2005 unsigned index = 0; 2006 2007 if (impl->valid_metadata & nir_metadata_block_index) 2008 return; 2009 2010 nir_foreach_block_unstructured(block, impl) { 2011 block->index = index++; 2012 } 2013 2014 /* The end_block isn't really part of the program, which is why its index 2015 * is >= num_blocks. 2016 */ 2017 impl->num_blocks = impl->end_block->index = index; 2018} 2019 2020static bool 2021index_ssa_def_cb(nir_ssa_def *def, void *state) 2022{ 2023 unsigned *index = (unsigned *) state; 2024 def->index = (*index)++; 2025 2026 return true; 2027} 2028 2029/** 2030 * The indices are applied top-to-bottom which has the very nice property 2031 * that, if A dominates B, then A->index <= B->index. 2032 */ 2033void 2034nir_index_ssa_defs(nir_function_impl *impl) 2035{ 2036 unsigned index = 0; 2037 2038 impl->valid_metadata &= ~nir_metadata_live_ssa_defs; 2039 2040 nir_foreach_block_unstructured(block, impl) { 2041 nir_foreach_instr(instr, block) 2042 nir_foreach_ssa_def(instr, index_ssa_def_cb, &index); 2043 } 2044 2045 impl->ssa_alloc = index; 2046} 2047 2048/** 2049 * The indices are applied top-to-bottom which has the very nice property 2050 * that, if A dominates B, then A->index <= B->index. 2051 */ 2052unsigned 2053nir_index_instrs(nir_function_impl *impl) 2054{ 2055 unsigned index = 0; 2056 2057 nir_foreach_block(block, impl) { 2058 block->start_ip = index++; 2059 2060 nir_foreach_instr(instr, block) 2061 instr->index = index++; 2062 2063 block->end_ip = index++; 2064 } 2065 2066 return index; 2067} 2068 2069unsigned 2070nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes) 2071{ 2072 unsigned count = 0; 2073 nir_foreach_variable_with_modes(var, shader, modes) 2074 var->index = count++; 2075 return count; 2076} 2077 2078unsigned 2079nir_function_impl_index_vars(nir_function_impl *impl) 2080{ 2081 unsigned count = 0; 2082 nir_foreach_function_temp_variable(var, impl) 2083 var->index = count++; 2084 return count; 2085} 2086 2087static nir_instr * 2088cursor_next_instr(nir_cursor cursor) 2089{ 2090 switch (cursor.option) { 2091 case nir_cursor_before_block: 2092 for (nir_block *block = cursor.block; block; 2093 block = nir_block_cf_tree_next(block)) { 2094 nir_instr *instr = nir_block_first_instr(block); 2095 if (instr) 2096 return instr; 2097 } 2098 return NULL; 2099 2100 case nir_cursor_after_block: 2101 cursor.block = nir_block_cf_tree_next(cursor.block); 2102 if (cursor.block == NULL) 2103 return NULL; 2104 2105 cursor.option = nir_cursor_before_block; 2106 return cursor_next_instr(cursor); 2107 2108 case nir_cursor_before_instr: 2109 return cursor.instr; 2110 2111 case nir_cursor_after_instr: 2112 if (nir_instr_next(cursor.instr)) 2113 return nir_instr_next(cursor.instr); 2114 2115 cursor.option = nir_cursor_after_block; 2116 cursor.block = cursor.instr->block; 2117 return cursor_next_instr(cursor); 2118 } 2119 2120 unreachable("Inavlid cursor option"); 2121} 2122 2123ASSERTED static bool 2124dest_is_ssa(nir_dest *dest, void *_state) 2125{ 2126 (void) _state; 2127 return dest->is_ssa; 2128} 2129 2130bool 2131nir_function_impl_lower_instructions(nir_function_impl *impl, 2132 nir_instr_filter_cb filter, 2133 nir_lower_instr_cb lower, 2134 void *cb_data) 2135{ 2136 nir_builder b; 2137 nir_builder_init(&b, impl); 2138 2139 nir_metadata preserved = nir_metadata_block_index | 2140 nir_metadata_dominance; 2141 2142 bool progress = false; 2143 nir_cursor iter = nir_before_cf_list(&impl->body); 2144 nir_instr *instr; 2145 while ((instr = cursor_next_instr(iter)) != NULL) { 2146 if (filter && !filter(instr, cb_data)) { 2147 iter = nir_after_instr(instr); 2148 continue; 2149 } 2150 2151 assert(nir_foreach_dest(instr, dest_is_ssa, NULL)); 2152 nir_ssa_def *old_def = nir_instr_ssa_def(instr); 2153 struct list_head old_uses, old_if_uses; 2154 if (old_def != NULL) { 2155 /* We're about to ask the callback to generate a replacement for instr. 2156 * Save off the uses from instr's SSA def so we know what uses to 2157 * rewrite later. If we use nir_ssa_def_rewrite_uses, it fails in the 2158 * case where the generated replacement code uses the result of instr 2159 * itself. If we use nir_ssa_def_rewrite_uses_after (which is the 2160 * normal solution to this problem), it doesn't work well if control- 2161 * flow is inserted as part of the replacement, doesn't handle cases 2162 * where the replacement is something consumed by instr, and suffers 2163 * from performance issues. This is the only way to 100% guarantee 2164 * that we rewrite the correct set efficiently. 2165 */ 2166 2167 list_replace(&old_def->uses, &old_uses); 2168 list_inithead(&old_def->uses); 2169 list_replace(&old_def->if_uses, &old_if_uses); 2170 list_inithead(&old_def->if_uses); 2171 } 2172 2173 b.cursor = nir_after_instr(instr); 2174 nir_ssa_def *new_def = lower(&b, instr, cb_data); 2175 if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS && 2176 new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) { 2177 assert(old_def != NULL); 2178 if (new_def->parent_instr->block != instr->block) 2179 preserved = nir_metadata_none; 2180 2181 nir_src new_src = nir_src_for_ssa(new_def); 2182 list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link) 2183 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); 2184 2185 list_for_each_entry_safe(nir_src, use_src, &old_if_uses, use_link) 2186 nir_if_rewrite_condition(use_src->parent_if, new_src); 2187 2188 if (nir_ssa_def_is_unused(old_def)) { 2189 iter = nir_instr_free_and_dce(instr); 2190 } else { 2191 iter = nir_after_instr(instr); 2192 } 2193 progress = true; 2194 } else { 2195 /* We didn't end up lowering after all. Put the uses back */ 2196 if (old_def) { 2197 list_replace(&old_uses, &old_def->uses); 2198 list_replace(&old_if_uses, &old_def->if_uses); 2199 } 2200 if (new_def == NIR_LOWER_INSTR_PROGRESS_REPLACE) { 2201 /* Only instructions without a return value can be removed like this */ 2202 assert(!old_def); 2203 iter = nir_instr_free_and_dce(instr); 2204 progress = true; 2205 } else 2206 iter = nir_after_instr(instr); 2207 2208 if (new_def == NIR_LOWER_INSTR_PROGRESS) 2209 progress = true; 2210 } 2211 } 2212 2213 if (progress) { 2214 nir_metadata_preserve(impl, preserved); 2215 } else { 2216 nir_metadata_preserve(impl, nir_metadata_all); 2217 } 2218 2219 return progress; 2220} 2221 2222bool 2223nir_shader_lower_instructions(nir_shader *shader, 2224 nir_instr_filter_cb filter, 2225 nir_lower_instr_cb lower, 2226 void *cb_data) 2227{ 2228 bool progress = false; 2229 2230 nir_foreach_function(function, shader) { 2231 if (function->impl && 2232 nir_function_impl_lower_instructions(function->impl, 2233 filter, lower, cb_data)) 2234 progress = true; 2235 } 2236 2237 return progress; 2238} 2239 2240/** 2241 * Returns true if the shader supports quad-based implicit derivatives on 2242 * texture sampling. 2243 */ 2244bool nir_shader_supports_implicit_lod(nir_shader *shader) 2245{ 2246 return (shader->info.stage == MESA_SHADER_FRAGMENT || 2247 (shader->info.stage == MESA_SHADER_COMPUTE && 2248 shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE)); 2249} 2250 2251nir_intrinsic_op 2252nir_intrinsic_from_system_value(gl_system_value val) 2253{ 2254 switch (val) { 2255 case SYSTEM_VALUE_VERTEX_ID: 2256 return nir_intrinsic_load_vertex_id; 2257 case SYSTEM_VALUE_INSTANCE_ID: 2258 return nir_intrinsic_load_instance_id; 2259 case SYSTEM_VALUE_DRAW_ID: 2260 return nir_intrinsic_load_draw_id; 2261 case SYSTEM_VALUE_BASE_INSTANCE: 2262 return nir_intrinsic_load_base_instance; 2263 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: 2264 return nir_intrinsic_load_vertex_id_zero_base; 2265 case SYSTEM_VALUE_IS_INDEXED_DRAW: 2266 return nir_intrinsic_load_is_indexed_draw; 2267 case SYSTEM_VALUE_FIRST_VERTEX: 2268 return nir_intrinsic_load_first_vertex; 2269 case SYSTEM_VALUE_BASE_VERTEX: 2270 return nir_intrinsic_load_base_vertex; 2271 case SYSTEM_VALUE_INVOCATION_ID: 2272 return nir_intrinsic_load_invocation_id; 2273 case SYSTEM_VALUE_FRAG_COORD: 2274 return nir_intrinsic_load_frag_coord; 2275 case SYSTEM_VALUE_POINT_COORD: 2276 return nir_intrinsic_load_point_coord; 2277 case SYSTEM_VALUE_LINE_COORD: 2278 return nir_intrinsic_load_line_coord; 2279 case SYSTEM_VALUE_FRONT_FACE: 2280 return nir_intrinsic_load_front_face; 2281 case SYSTEM_VALUE_SAMPLE_ID: 2282 return nir_intrinsic_load_sample_id; 2283 case SYSTEM_VALUE_SAMPLE_POS: 2284 return nir_intrinsic_load_sample_pos; 2285 case SYSTEM_VALUE_SAMPLE_MASK_IN: 2286 return nir_intrinsic_load_sample_mask_in; 2287 case SYSTEM_VALUE_LOCAL_INVOCATION_ID: 2288 return nir_intrinsic_load_local_invocation_id; 2289 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: 2290 return nir_intrinsic_load_local_invocation_index; 2291 case SYSTEM_VALUE_WORKGROUP_ID: 2292 return nir_intrinsic_load_workgroup_id; 2293 case SYSTEM_VALUE_NUM_WORKGROUPS: 2294 return nir_intrinsic_load_num_workgroups; 2295 case SYSTEM_VALUE_PRIMITIVE_ID: 2296 return nir_intrinsic_load_primitive_id; 2297 case SYSTEM_VALUE_TESS_COORD: 2298 return nir_intrinsic_load_tess_coord; 2299 case SYSTEM_VALUE_TESS_LEVEL_OUTER: 2300 return nir_intrinsic_load_tess_level_outer; 2301 case SYSTEM_VALUE_TESS_LEVEL_INNER: 2302 return nir_intrinsic_load_tess_level_inner; 2303 case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT: 2304 return nir_intrinsic_load_tess_level_outer_default; 2305 case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT: 2306 return nir_intrinsic_load_tess_level_inner_default; 2307 case SYSTEM_VALUE_VERTICES_IN: 2308 return nir_intrinsic_load_patch_vertices_in; 2309 case SYSTEM_VALUE_HELPER_INVOCATION: 2310 return nir_intrinsic_load_helper_invocation; 2311 case SYSTEM_VALUE_COLOR0: 2312 return nir_intrinsic_load_color0; 2313 case SYSTEM_VALUE_COLOR1: 2314 return nir_intrinsic_load_color1; 2315 case SYSTEM_VALUE_VIEW_INDEX: 2316 return nir_intrinsic_load_view_index; 2317 case SYSTEM_VALUE_SUBGROUP_SIZE: 2318 return nir_intrinsic_load_subgroup_size; 2319 case SYSTEM_VALUE_SUBGROUP_INVOCATION: 2320 return nir_intrinsic_load_subgroup_invocation; 2321 case SYSTEM_VALUE_SUBGROUP_EQ_MASK: 2322 return nir_intrinsic_load_subgroup_eq_mask; 2323 case SYSTEM_VALUE_SUBGROUP_GE_MASK: 2324 return nir_intrinsic_load_subgroup_ge_mask; 2325 case SYSTEM_VALUE_SUBGROUP_GT_MASK: 2326 return nir_intrinsic_load_subgroup_gt_mask; 2327 case SYSTEM_VALUE_SUBGROUP_LE_MASK: 2328 return nir_intrinsic_load_subgroup_le_mask; 2329 case SYSTEM_VALUE_SUBGROUP_LT_MASK: 2330 return nir_intrinsic_load_subgroup_lt_mask; 2331 case SYSTEM_VALUE_NUM_SUBGROUPS: 2332 return nir_intrinsic_load_num_subgroups; 2333 case SYSTEM_VALUE_SUBGROUP_ID: 2334 return nir_intrinsic_load_subgroup_id; 2335 case SYSTEM_VALUE_WORKGROUP_SIZE: 2336 return nir_intrinsic_load_workgroup_size; 2337 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: 2338 return nir_intrinsic_load_global_invocation_id; 2339 case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID: 2340 return nir_intrinsic_load_base_global_invocation_id; 2341 case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX: 2342 return nir_intrinsic_load_global_invocation_index; 2343 case SYSTEM_VALUE_WORK_DIM: 2344 return nir_intrinsic_load_work_dim; 2345 case SYSTEM_VALUE_USER_DATA_AMD: 2346 return nir_intrinsic_load_user_data_amd; 2347 case SYSTEM_VALUE_RAY_LAUNCH_ID: 2348 return nir_intrinsic_load_ray_launch_id; 2349 case SYSTEM_VALUE_RAY_LAUNCH_SIZE: 2350 return nir_intrinsic_load_ray_launch_size; 2351 case SYSTEM_VALUE_RAY_WORLD_ORIGIN: 2352 return nir_intrinsic_load_ray_world_origin; 2353 case SYSTEM_VALUE_RAY_WORLD_DIRECTION: 2354 return nir_intrinsic_load_ray_world_direction; 2355 case SYSTEM_VALUE_RAY_OBJECT_ORIGIN: 2356 return nir_intrinsic_load_ray_object_origin; 2357 case SYSTEM_VALUE_RAY_OBJECT_DIRECTION: 2358 return nir_intrinsic_load_ray_object_direction; 2359 case SYSTEM_VALUE_RAY_T_MIN: 2360 return nir_intrinsic_load_ray_t_min; 2361 case SYSTEM_VALUE_RAY_T_MAX: 2362 return nir_intrinsic_load_ray_t_max; 2363 case SYSTEM_VALUE_RAY_OBJECT_TO_WORLD: 2364 return nir_intrinsic_load_ray_object_to_world; 2365 case SYSTEM_VALUE_RAY_WORLD_TO_OBJECT: 2366 return nir_intrinsic_load_ray_world_to_object; 2367 case SYSTEM_VALUE_RAY_HIT_KIND: 2368 return nir_intrinsic_load_ray_hit_kind; 2369 case SYSTEM_VALUE_RAY_FLAGS: 2370 return nir_intrinsic_load_ray_flags; 2371 case SYSTEM_VALUE_RAY_GEOMETRY_INDEX: 2372 return nir_intrinsic_load_ray_geometry_index; 2373 case SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX: 2374 return nir_intrinsic_load_ray_instance_custom_index; 2375 case SYSTEM_VALUE_FRAG_SHADING_RATE: 2376 return nir_intrinsic_load_frag_shading_rate; 2377 default: 2378 unreachable("system value does not directly correspond to intrinsic"); 2379 } 2380} 2381 2382gl_system_value 2383nir_system_value_from_intrinsic(nir_intrinsic_op intrin) 2384{ 2385 switch (intrin) { 2386 case nir_intrinsic_load_vertex_id: 2387 return SYSTEM_VALUE_VERTEX_ID; 2388 case nir_intrinsic_load_instance_id: 2389 return SYSTEM_VALUE_INSTANCE_ID; 2390 case nir_intrinsic_load_draw_id: 2391 return SYSTEM_VALUE_DRAW_ID; 2392 case nir_intrinsic_load_base_instance: 2393 return SYSTEM_VALUE_BASE_INSTANCE; 2394 case nir_intrinsic_load_vertex_id_zero_base: 2395 return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; 2396 case nir_intrinsic_load_first_vertex: 2397 return SYSTEM_VALUE_FIRST_VERTEX; 2398 case nir_intrinsic_load_is_indexed_draw: 2399 return SYSTEM_VALUE_IS_INDEXED_DRAW; 2400 case nir_intrinsic_load_base_vertex: 2401 return SYSTEM_VALUE_BASE_VERTEX; 2402 case nir_intrinsic_load_invocation_id: 2403 return SYSTEM_VALUE_INVOCATION_ID; 2404 case nir_intrinsic_load_frag_coord: 2405 return SYSTEM_VALUE_FRAG_COORD; 2406 case nir_intrinsic_load_point_coord: 2407 return SYSTEM_VALUE_POINT_COORD; 2408 case nir_intrinsic_load_line_coord: 2409 return SYSTEM_VALUE_LINE_COORD; 2410 case nir_intrinsic_load_front_face: 2411 return SYSTEM_VALUE_FRONT_FACE; 2412 case nir_intrinsic_load_sample_id: 2413 return SYSTEM_VALUE_SAMPLE_ID; 2414 case nir_intrinsic_load_sample_pos: 2415 return SYSTEM_VALUE_SAMPLE_POS; 2416 case nir_intrinsic_load_sample_mask_in: 2417 return SYSTEM_VALUE_SAMPLE_MASK_IN; 2418 case nir_intrinsic_load_local_invocation_id: 2419 return SYSTEM_VALUE_LOCAL_INVOCATION_ID; 2420 case nir_intrinsic_load_local_invocation_index: 2421 return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; 2422 case nir_intrinsic_load_num_workgroups: 2423 return SYSTEM_VALUE_NUM_WORKGROUPS; 2424 case nir_intrinsic_load_workgroup_id: 2425 return SYSTEM_VALUE_WORKGROUP_ID; 2426 case nir_intrinsic_load_primitive_id: 2427 return SYSTEM_VALUE_PRIMITIVE_ID; 2428 case nir_intrinsic_load_tess_coord: 2429 return SYSTEM_VALUE_TESS_COORD; 2430 case nir_intrinsic_load_tess_level_outer: 2431 return SYSTEM_VALUE_TESS_LEVEL_OUTER; 2432 case nir_intrinsic_load_tess_level_inner: 2433 return SYSTEM_VALUE_TESS_LEVEL_INNER; 2434 case nir_intrinsic_load_tess_level_outer_default: 2435 return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT; 2436 case nir_intrinsic_load_tess_level_inner_default: 2437 return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT; 2438 case nir_intrinsic_load_patch_vertices_in: 2439 return SYSTEM_VALUE_VERTICES_IN; 2440 case nir_intrinsic_load_helper_invocation: 2441 return SYSTEM_VALUE_HELPER_INVOCATION; 2442 case nir_intrinsic_load_color0: 2443 return SYSTEM_VALUE_COLOR0; 2444 case nir_intrinsic_load_color1: 2445 return SYSTEM_VALUE_COLOR1; 2446 case nir_intrinsic_load_view_index: 2447 return SYSTEM_VALUE_VIEW_INDEX; 2448 case nir_intrinsic_load_subgroup_size: 2449 return SYSTEM_VALUE_SUBGROUP_SIZE; 2450 case nir_intrinsic_load_subgroup_invocation: 2451 return SYSTEM_VALUE_SUBGROUP_INVOCATION; 2452 case nir_intrinsic_load_subgroup_eq_mask: 2453 return SYSTEM_VALUE_SUBGROUP_EQ_MASK; 2454 case nir_intrinsic_load_subgroup_ge_mask: 2455 return SYSTEM_VALUE_SUBGROUP_GE_MASK; 2456 case nir_intrinsic_load_subgroup_gt_mask: 2457 return SYSTEM_VALUE_SUBGROUP_GT_MASK; 2458 case nir_intrinsic_load_subgroup_le_mask: 2459 return SYSTEM_VALUE_SUBGROUP_LE_MASK; 2460 case nir_intrinsic_load_subgroup_lt_mask: 2461 return SYSTEM_VALUE_SUBGROUP_LT_MASK; 2462 case nir_intrinsic_load_num_subgroups: 2463 return SYSTEM_VALUE_NUM_SUBGROUPS; 2464 case nir_intrinsic_load_subgroup_id: 2465 return SYSTEM_VALUE_SUBGROUP_ID; 2466 case nir_intrinsic_load_workgroup_size: 2467 return SYSTEM_VALUE_WORKGROUP_SIZE; 2468 case nir_intrinsic_load_global_invocation_id: 2469 return SYSTEM_VALUE_GLOBAL_INVOCATION_ID; 2470 case nir_intrinsic_load_base_global_invocation_id: 2471 return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID; 2472 case nir_intrinsic_load_global_invocation_index: 2473 return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX; 2474 case nir_intrinsic_load_work_dim: 2475 return SYSTEM_VALUE_WORK_DIM; 2476 case nir_intrinsic_load_user_data_amd: 2477 return SYSTEM_VALUE_USER_DATA_AMD; 2478 case nir_intrinsic_load_barycentric_model: 2479 return SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL; 2480 case nir_intrinsic_load_gs_header_ir3: 2481 return SYSTEM_VALUE_GS_HEADER_IR3; 2482 case nir_intrinsic_load_tcs_header_ir3: 2483 return SYSTEM_VALUE_TCS_HEADER_IR3; 2484 case nir_intrinsic_load_ray_launch_id: 2485 return SYSTEM_VALUE_RAY_LAUNCH_ID; 2486 case nir_intrinsic_load_ray_launch_size: 2487 return SYSTEM_VALUE_RAY_LAUNCH_SIZE; 2488 case nir_intrinsic_load_ray_world_origin: 2489 return SYSTEM_VALUE_RAY_WORLD_ORIGIN; 2490 case nir_intrinsic_load_ray_world_direction: 2491 return SYSTEM_VALUE_RAY_WORLD_DIRECTION; 2492 case nir_intrinsic_load_ray_object_origin: 2493 return SYSTEM_VALUE_RAY_OBJECT_ORIGIN; 2494 case nir_intrinsic_load_ray_object_direction: 2495 return SYSTEM_VALUE_RAY_OBJECT_DIRECTION; 2496 case nir_intrinsic_load_ray_t_min: 2497 return SYSTEM_VALUE_RAY_T_MIN; 2498 case nir_intrinsic_load_ray_t_max: 2499 return SYSTEM_VALUE_RAY_T_MAX; 2500 case nir_intrinsic_load_ray_object_to_world: 2501 return SYSTEM_VALUE_RAY_OBJECT_TO_WORLD; 2502 case nir_intrinsic_load_ray_world_to_object: 2503 return SYSTEM_VALUE_RAY_WORLD_TO_OBJECT; 2504 case nir_intrinsic_load_ray_hit_kind: 2505 return SYSTEM_VALUE_RAY_HIT_KIND; 2506 case nir_intrinsic_load_ray_flags: 2507 return SYSTEM_VALUE_RAY_FLAGS; 2508 case nir_intrinsic_load_ray_geometry_index: 2509 return SYSTEM_VALUE_RAY_GEOMETRY_INDEX; 2510 case nir_intrinsic_load_ray_instance_custom_index: 2511 return SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX; 2512 case nir_intrinsic_load_frag_shading_rate: 2513 return SYSTEM_VALUE_FRAG_SHADING_RATE; 2514 default: 2515 unreachable("intrinsic doesn't produce a system value"); 2516 } 2517} 2518 2519/* OpenGL utility method that remaps the location attributes if they are 2520 * doubles. Not needed for vulkan due the differences on the input location 2521 * count for doubles on vulkan vs OpenGL 2522 * 2523 * The bitfield returned in dual_slot is one bit for each double input slot in 2524 * the original OpenGL single-slot input numbering. The mapping from old 2525 * locations to new locations is as follows: 2526 * 2527 * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc)) 2528 */ 2529void 2530nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot) 2531{ 2532 assert(shader->info.stage == MESA_SHADER_VERTEX); 2533 2534 *dual_slot = 0; 2535 nir_foreach_shader_in_variable(var, shader) { 2536 if (glsl_type_is_dual_slot(glsl_without_array(var->type))) { 2537 unsigned slots = glsl_count_attribute_slots(var->type, true); 2538 *dual_slot |= BITFIELD64_MASK(slots) << var->data.location; 2539 } 2540 } 2541 2542 nir_foreach_shader_in_variable(var, shader) { 2543 var->data.location += 2544 util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location)); 2545 } 2546} 2547 2548/* Returns an attribute mask that has been re-compacted using the given 2549 * dual_slot mask. 2550 */ 2551uint64_t 2552nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot) 2553{ 2554 while (dual_slot) { 2555 unsigned loc = u_bit_scan64(&dual_slot); 2556 /* mask of all bits up to and including loc */ 2557 uint64_t mask = BITFIELD64_MASK(loc + 1); 2558 attribs = (attribs & mask) | ((attribs & ~mask) >> 1); 2559 } 2560 return attribs; 2561} 2562 2563void 2564nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src, 2565 bool bindless) 2566{ 2567 enum gl_access_qualifier access = nir_intrinsic_access(intrin); 2568 2569 /* Image intrinsics only have one of these */ 2570 assert(!nir_intrinsic_has_src_type(intrin) || 2571 !nir_intrinsic_has_dest_type(intrin)); 2572 2573 nir_alu_type data_type = nir_type_invalid; 2574 if (nir_intrinsic_has_src_type(intrin)) 2575 data_type = nir_intrinsic_src_type(intrin); 2576 if (nir_intrinsic_has_dest_type(intrin)) 2577 data_type = nir_intrinsic_dest_type(intrin); 2578 2579 switch (intrin->intrinsic) { 2580#define CASE(op) \ 2581 case nir_intrinsic_image_deref_##op: \ 2582 intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \ 2583 : nir_intrinsic_image_##op; \ 2584 break; 2585 CASE(load) 2586 CASE(sparse_load) 2587 CASE(store) 2588 CASE(atomic_add) 2589 CASE(atomic_imin) 2590 CASE(atomic_umin) 2591 CASE(atomic_imax) 2592 CASE(atomic_umax) 2593 CASE(atomic_and) 2594 CASE(atomic_or) 2595 CASE(atomic_xor) 2596 CASE(atomic_exchange) 2597 CASE(atomic_comp_swap) 2598 CASE(atomic_fadd) 2599 CASE(atomic_fmin) 2600 CASE(atomic_fmax) 2601 CASE(atomic_inc_wrap) 2602 CASE(atomic_dec_wrap) 2603 CASE(size) 2604 CASE(samples) 2605 CASE(load_raw_intel) 2606 CASE(store_raw_intel) 2607#undef CASE 2608 default: 2609 unreachable("Unhanded image intrinsic"); 2610 } 2611 2612 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2613 nir_variable *var = nir_deref_instr_get_variable(deref); 2614 2615 /* Only update the format if the intrinsic doesn't have one set */ 2616 if (nir_intrinsic_format(intrin) == PIPE_FORMAT_NONE) 2617 nir_intrinsic_set_format(intrin, var->data.image.format); 2618 2619 nir_intrinsic_set_access(intrin, access | var->data.access); 2620 if (nir_intrinsic_has_src_type(intrin)) 2621 nir_intrinsic_set_src_type(intrin, data_type); 2622 if (nir_intrinsic_has_dest_type(intrin)) 2623 nir_intrinsic_set_dest_type(intrin, data_type); 2624 2625 nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], 2626 nir_src_for_ssa(src)); 2627} 2628 2629unsigned 2630nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr) 2631{ 2632 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); 2633 int coords = glsl_get_sampler_dim_coordinate_components(dim); 2634 if (dim == GLSL_SAMPLER_DIM_CUBE) 2635 return coords; 2636 else 2637 return coords + nir_intrinsic_image_array(instr); 2638} 2639 2640nir_src * 2641nir_get_shader_call_payload_src(nir_intrinsic_instr *call) 2642{ 2643 switch (call->intrinsic) { 2644 case nir_intrinsic_trace_ray: 2645 case nir_intrinsic_rt_trace_ray: 2646 return &call->src[10]; 2647 case nir_intrinsic_execute_callable: 2648 case nir_intrinsic_rt_execute_callable: 2649 return &call->src[1]; 2650 default: 2651 unreachable("Not a call intrinsic"); 2652 return NULL; 2653 } 2654} 2655 2656nir_binding nir_chase_binding(nir_src rsrc) 2657{ 2658 nir_binding res = {0}; 2659 if (rsrc.ssa->parent_instr->type == nir_instr_type_deref) { 2660 const struct glsl_type *type = glsl_without_array(nir_src_as_deref(rsrc)->type); 2661 bool is_image = glsl_type_is_image(type) || glsl_type_is_sampler(type); 2662 while (rsrc.ssa->parent_instr->type == nir_instr_type_deref) { 2663 nir_deref_instr *deref = nir_src_as_deref(rsrc); 2664 2665 if (deref->deref_type == nir_deref_type_var) { 2666 res.success = true; 2667 res.var = deref->var; 2668 res.desc_set = deref->var->data.descriptor_set; 2669 res.binding = deref->var->data.binding; 2670 return res; 2671 } else if (deref->deref_type == nir_deref_type_array && is_image) { 2672 if (res.num_indices == ARRAY_SIZE(res.indices)) 2673 return (nir_binding){0}; 2674 res.indices[res.num_indices++] = deref->arr.index; 2675 } 2676 2677 rsrc = deref->parent; 2678 } 2679 } 2680 2681 /* Skip copies and trimming. Trimming can appear as nir_op_mov instructions 2682 * when removing the offset from addresses. We also consider nir_op_is_vec() 2683 * instructions to skip trimming of vec2_index_32bit_offset addresses after 2684 * lowering ALU to scalar. 2685 */ 2686 while (true) { 2687 nir_alu_instr *alu = nir_src_as_alu_instr(rsrc); 2688 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc); 2689 if (alu && alu->op == nir_op_mov) { 2690 for (unsigned i = 0; i < alu->dest.dest.ssa.num_components; i++) { 2691 if (alu->src[0].swizzle[i] != i) 2692 return (nir_binding){0}; 2693 } 2694 rsrc = alu->src[0].src; 2695 } else if (alu && nir_op_is_vec(alu->op)) { 2696 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 2697 if (alu->src[i].swizzle[0] != i || alu->src[i].src.ssa != alu->src[0].src.ssa) 2698 return (nir_binding){0}; 2699 } 2700 rsrc = alu->src[0].src; 2701 } else if (intrin && intrin->intrinsic == nir_intrinsic_read_first_invocation) { 2702 /* The caller might want to be aware if only the first invocation of 2703 * the indices are used. 2704 */ 2705 res.read_first_invocation = true; 2706 rsrc = intrin->src[0]; 2707 } else { 2708 break; 2709 } 2710 } 2711 2712 if (nir_src_is_const(rsrc)) { 2713 /* GL binding model after deref lowering */ 2714 res.success = true; 2715 res.binding = nir_src_as_uint(rsrc); 2716 return res; 2717 } 2718 2719 /* otherwise, must be Vulkan binding model after deref lowering or GL bindless */ 2720 2721 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc); 2722 if (!intrin) 2723 return (nir_binding){0}; 2724 2725 /* skip load_vulkan_descriptor */ 2726 if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) { 2727 intrin = nir_src_as_intrinsic(intrin->src[0]); 2728 if (!intrin) 2729 return (nir_binding){0}; 2730 } 2731 2732 if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index) 2733 return (nir_binding){0}; 2734 2735 assert(res.num_indices == 0); 2736 res.success = true; 2737 res.desc_set = nir_intrinsic_desc_set(intrin); 2738 res.binding = nir_intrinsic_binding(intrin); 2739 res.num_indices = 1; 2740 res.indices[0] = intrin->src[0]; 2741 return res; 2742} 2743 2744nir_variable *nir_get_binding_variable(nir_shader *shader, nir_binding binding) 2745{ 2746 nir_variable *binding_var = NULL; 2747 unsigned count = 0; 2748 2749 if (!binding.success) 2750 return NULL; 2751 2752 if (binding.var) 2753 return binding.var; 2754 2755 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ubo | nir_var_mem_ssbo) { 2756 if (var->data.descriptor_set == binding.desc_set && var->data.binding == binding.binding) { 2757 binding_var = var; 2758 count++; 2759 } 2760 } 2761 2762 /* Be conservative if another variable is using the same binding/desc_set 2763 * because the access mask might be different and we can't get it reliably. 2764 */ 2765 if (count > 1) 2766 return NULL; 2767 2768 return binding_var; 2769} 2770 2771bool 2772nir_alu_instr_is_copy(nir_alu_instr *instr) 2773{ 2774 assert(instr->src[0].src.is_ssa); 2775 2776 if (instr->op == nir_op_mov) { 2777 return !instr->dest.saturate && 2778 !instr->src[0].abs && 2779 !instr->src[0].negate; 2780 } else if (nir_op_is_vec(instr->op)) { 2781 for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; i++) { 2782 if (instr->src[i].abs || instr->src[i].negate) 2783 return false; 2784 } 2785 return !instr->dest.saturate; 2786 } else { 2787 return false; 2788 } 2789} 2790 2791nir_ssa_scalar 2792nir_ssa_scalar_chase_movs(nir_ssa_scalar s) 2793{ 2794 while (nir_ssa_scalar_is_alu(s)) { 2795 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr); 2796 if (!nir_alu_instr_is_copy(alu)) 2797 break; 2798 2799 if (alu->op == nir_op_mov) { 2800 s.def = alu->src[0].src.ssa; 2801 s.comp = alu->src[0].swizzle[s.comp]; 2802 } else { 2803 assert(nir_op_is_vec(alu->op)); 2804 s.def = alu->src[s.comp].src.ssa; 2805 s.comp = alu->src[s.comp].swizzle[0]; 2806 } 2807 } 2808 2809 return s; 2810} 2811