nir.c revision b8e80941
1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * 26 */ 27 28#include "nir.h" 29#include "nir_control_flow_private.h" 30#include "util/half_float.h" 31#include <limits.h> 32#include <assert.h> 33#include <math.h> 34#include "util/u_math.h" 35 36#include "main/menums.h" /* BITFIELD64_MASK */ 37 38nir_shader * 39nir_shader_create(void *mem_ctx, 40 gl_shader_stage stage, 41 const nir_shader_compiler_options *options, 42 shader_info *si) 43{ 44 nir_shader *shader = rzalloc(mem_ctx, nir_shader); 45 46 exec_list_make_empty(&shader->uniforms); 47 exec_list_make_empty(&shader->inputs); 48 exec_list_make_empty(&shader->outputs); 49 exec_list_make_empty(&shader->shared); 50 51 shader->options = options; 52 53 if (si) { 54 assert(si->stage == stage); 55 shader->info = *si; 56 } else { 57 shader->info.stage = stage; 58 } 59 60 exec_list_make_empty(&shader->functions); 61 exec_list_make_empty(&shader->globals); 62 exec_list_make_empty(&shader->system_values); 63 64 shader->num_inputs = 0; 65 shader->num_outputs = 0; 66 shader->num_uniforms = 0; 67 shader->num_shared = 0; 68 69 return shader; 70} 71 72static nir_register * 73reg_create(void *mem_ctx, struct exec_list *list) 74{ 75 nir_register *reg = ralloc(mem_ctx, nir_register); 76 77 list_inithead(®->uses); 78 list_inithead(®->defs); 79 list_inithead(®->if_uses); 80 81 reg->num_components = 0; 82 reg->bit_size = 32; 83 reg->num_array_elems = 0; 84 reg->name = NULL; 85 86 exec_list_push_tail(list, ®->node); 87 88 return reg; 89} 90 91nir_register * 92nir_local_reg_create(nir_function_impl *impl) 93{ 94 nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers); 95 reg->index = impl->reg_alloc++; 96 97 return reg; 98} 99 100void 101nir_reg_remove(nir_register *reg) 102{ 103 exec_node_remove(®->node); 104} 105 106void 107nir_shader_add_variable(nir_shader *shader, nir_variable *var) 108{ 109 switch (var->data.mode) { 110 case nir_var_all: 111 assert(!"invalid mode"); 112 break; 113 114 case nir_var_function_temp: 115 assert(!"nir_shader_add_variable cannot be used for local variables"); 116 break; 117 118 case nir_var_shader_temp: 119 exec_list_push_tail(&shader->globals, &var->node); 120 break; 121 122 case nir_var_shader_in: 123 exec_list_push_tail(&shader->inputs, &var->node); 124 break; 125 126 case nir_var_shader_out: 127 exec_list_push_tail(&shader->outputs, &var->node); 128 break; 129 130 case nir_var_uniform: 131 case nir_var_mem_ubo: 132 case nir_var_mem_ssbo: 133 exec_list_push_tail(&shader->uniforms, &var->node); 134 break; 135 136 case nir_var_mem_shared: 137 assert(gl_shader_stage_is_compute(shader->info.stage)); 138 exec_list_push_tail(&shader->shared, &var->node); 139 break; 140 141 case nir_var_mem_global: 142 assert(!"nir_shader_add_variable cannot be used for global memory"); 143 break; 144 145 case nir_var_system_value: 146 exec_list_push_tail(&shader->system_values, &var->node); 147 break; 148 } 149} 150 151nir_variable * 152nir_variable_create(nir_shader *shader, nir_variable_mode mode, 153 const struct glsl_type *type, const char *name) 154{ 155 nir_variable *var = rzalloc(shader, nir_variable); 156 var->name = ralloc_strdup(var, name); 157 var->type = type; 158 var->data.mode = mode; 159 var->data.how_declared = nir_var_declared_normally; 160 161 if ((mode == nir_var_shader_in && 162 shader->info.stage != MESA_SHADER_VERTEX) || 163 (mode == nir_var_shader_out && 164 shader->info.stage != MESA_SHADER_FRAGMENT)) 165 var->data.interpolation = INTERP_MODE_SMOOTH; 166 167 if (mode == nir_var_shader_in || mode == nir_var_uniform) 168 var->data.read_only = true; 169 170 nir_shader_add_variable(shader, var); 171 172 return var; 173} 174 175nir_variable * 176nir_local_variable_create(nir_function_impl *impl, 177 const struct glsl_type *type, const char *name) 178{ 179 nir_variable *var = rzalloc(impl->function->shader, nir_variable); 180 var->name = ralloc_strdup(var, name); 181 var->type = type; 182 var->data.mode = nir_var_function_temp; 183 184 nir_function_impl_add_variable(impl, var); 185 186 return var; 187} 188 189nir_function * 190nir_function_create(nir_shader *shader, const char *name) 191{ 192 nir_function *func = ralloc(shader, nir_function); 193 194 exec_list_push_tail(&shader->functions, &func->node); 195 196 func->name = ralloc_strdup(func, name); 197 func->shader = shader; 198 func->num_params = 0; 199 func->params = NULL; 200 func->impl = NULL; 201 func->is_entrypoint = false; 202 203 return func; 204} 205 206/* NOTE: if the instruction you are copying a src to is already added 207 * to the IR, use nir_instr_rewrite_src() instead. 208 */ 209void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx) 210{ 211 dest->is_ssa = src->is_ssa; 212 if (src->is_ssa) { 213 dest->ssa = src->ssa; 214 } else { 215 dest->reg.base_offset = src->reg.base_offset; 216 dest->reg.reg = src->reg.reg; 217 if (src->reg.indirect) { 218 dest->reg.indirect = ralloc(mem_ctx, nir_src); 219 nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx); 220 } else { 221 dest->reg.indirect = NULL; 222 } 223 } 224} 225 226void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr) 227{ 228 /* Copying an SSA definition makes no sense whatsoever. */ 229 assert(!src->is_ssa); 230 231 dest->is_ssa = false; 232 233 dest->reg.base_offset = src->reg.base_offset; 234 dest->reg.reg = src->reg.reg; 235 if (src->reg.indirect) { 236 dest->reg.indirect = ralloc(instr, nir_src); 237 nir_src_copy(dest->reg.indirect, src->reg.indirect, instr); 238 } else { 239 dest->reg.indirect = NULL; 240 } 241} 242 243void 244nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, 245 nir_alu_instr *instr) 246{ 247 nir_src_copy(&dest->src, &src->src, &instr->instr); 248 dest->abs = src->abs; 249 dest->negate = src->negate; 250 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) 251 dest->swizzle[i] = src->swizzle[i]; 252} 253 254void 255nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, 256 nir_alu_instr *instr) 257{ 258 nir_dest_copy(&dest->dest, &src->dest, &instr->instr); 259 dest->write_mask = src->write_mask; 260 dest->saturate = src->saturate; 261} 262 263 264static void 265cf_init(nir_cf_node *node, nir_cf_node_type type) 266{ 267 exec_node_init(&node->node); 268 node->parent = NULL; 269 node->type = type; 270} 271 272nir_function_impl * 273nir_function_impl_create_bare(nir_shader *shader) 274{ 275 nir_function_impl *impl = ralloc(shader, nir_function_impl); 276 277 impl->function = NULL; 278 279 cf_init(&impl->cf_node, nir_cf_node_function); 280 281 exec_list_make_empty(&impl->body); 282 exec_list_make_empty(&impl->registers); 283 exec_list_make_empty(&impl->locals); 284 impl->reg_alloc = 0; 285 impl->ssa_alloc = 0; 286 impl->valid_metadata = nir_metadata_none; 287 288 /* create start & end blocks */ 289 nir_block *start_block = nir_block_create(shader); 290 nir_block *end_block = nir_block_create(shader); 291 start_block->cf_node.parent = &impl->cf_node; 292 end_block->cf_node.parent = &impl->cf_node; 293 impl->end_block = end_block; 294 295 exec_list_push_tail(&impl->body, &start_block->cf_node.node); 296 297 start_block->successors[0] = end_block; 298 _mesa_set_add(end_block->predecessors, start_block); 299 return impl; 300} 301 302nir_function_impl * 303nir_function_impl_create(nir_function *function) 304{ 305 assert(function->impl == NULL); 306 307 nir_function_impl *impl = nir_function_impl_create_bare(function->shader); 308 309 function->impl = impl; 310 impl->function = function; 311 312 return impl; 313} 314 315nir_block * 316nir_block_create(nir_shader *shader) 317{ 318 nir_block *block = rzalloc(shader, nir_block); 319 320 cf_init(&block->cf_node, nir_cf_node_block); 321 322 block->successors[0] = block->successors[1] = NULL; 323 block->predecessors = _mesa_pointer_set_create(block); 324 block->imm_dom = NULL; 325 /* XXX maybe it would be worth it to defer allocation? This 326 * way it doesn't get allocated for shader refs that never run 327 * nir_calc_dominance? For example, state-tracker creates an 328 * initial IR, clones that, runs appropriate lowering pass, passes 329 * to driver which does common lowering/opt, and then stores ref 330 * which is later used to do state specific lowering and futher 331 * opt. Do any of the references not need dominance metadata? 332 */ 333 block->dom_frontier = _mesa_pointer_set_create(block); 334 335 exec_list_make_empty(&block->instr_list); 336 337 return block; 338} 339 340static inline void 341src_init(nir_src *src) 342{ 343 src->is_ssa = false; 344 src->reg.reg = NULL; 345 src->reg.indirect = NULL; 346 src->reg.base_offset = 0; 347} 348 349nir_if * 350nir_if_create(nir_shader *shader) 351{ 352 nir_if *if_stmt = ralloc(shader, nir_if); 353 354 if_stmt->control = nir_selection_control_none; 355 356 cf_init(&if_stmt->cf_node, nir_cf_node_if); 357 src_init(&if_stmt->condition); 358 359 nir_block *then = nir_block_create(shader); 360 exec_list_make_empty(&if_stmt->then_list); 361 exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node); 362 then->cf_node.parent = &if_stmt->cf_node; 363 364 nir_block *else_stmt = nir_block_create(shader); 365 exec_list_make_empty(&if_stmt->else_list); 366 exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node); 367 else_stmt->cf_node.parent = &if_stmt->cf_node; 368 369 return if_stmt; 370} 371 372nir_loop * 373nir_loop_create(nir_shader *shader) 374{ 375 nir_loop *loop = rzalloc(shader, nir_loop); 376 377 cf_init(&loop->cf_node, nir_cf_node_loop); 378 379 nir_block *body = nir_block_create(shader); 380 exec_list_make_empty(&loop->body); 381 exec_list_push_tail(&loop->body, &body->cf_node.node); 382 body->cf_node.parent = &loop->cf_node; 383 384 body->successors[0] = body; 385 _mesa_set_add(body->predecessors, body); 386 387 return loop; 388} 389 390static void 391instr_init(nir_instr *instr, nir_instr_type type) 392{ 393 instr->type = type; 394 instr->block = NULL; 395 exec_node_init(&instr->node); 396} 397 398static void 399dest_init(nir_dest *dest) 400{ 401 dest->is_ssa = false; 402 dest->reg.reg = NULL; 403 dest->reg.indirect = NULL; 404 dest->reg.base_offset = 0; 405} 406 407static void 408alu_dest_init(nir_alu_dest *dest) 409{ 410 dest_init(&dest->dest); 411 dest->saturate = false; 412 dest->write_mask = 0xf; 413} 414 415static void 416alu_src_init(nir_alu_src *src) 417{ 418 src_init(&src->src); 419 src->abs = src->negate = false; 420 for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) 421 src->swizzle[i] = i; 422} 423 424nir_alu_instr * 425nir_alu_instr_create(nir_shader *shader, nir_op op) 426{ 427 unsigned num_srcs = nir_op_infos[op].num_inputs; 428 /* TODO: don't use rzalloc */ 429 nir_alu_instr *instr = 430 rzalloc_size(shader, 431 sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); 432 433 instr_init(&instr->instr, nir_instr_type_alu); 434 instr->op = op; 435 alu_dest_init(&instr->dest); 436 for (unsigned i = 0; i < num_srcs; i++) 437 alu_src_init(&instr->src[i]); 438 439 return instr; 440} 441 442nir_deref_instr * 443nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type) 444{ 445 nir_deref_instr *instr = 446 rzalloc_size(shader, sizeof(nir_deref_instr)); 447 448 instr_init(&instr->instr, nir_instr_type_deref); 449 450 instr->deref_type = deref_type; 451 if (deref_type != nir_deref_type_var) 452 src_init(&instr->parent); 453 454 if (deref_type == nir_deref_type_array || 455 deref_type == nir_deref_type_ptr_as_array) 456 src_init(&instr->arr.index); 457 458 dest_init(&instr->dest); 459 460 return instr; 461} 462 463nir_jump_instr * 464nir_jump_instr_create(nir_shader *shader, nir_jump_type type) 465{ 466 nir_jump_instr *instr = ralloc(shader, nir_jump_instr); 467 instr_init(&instr->instr, nir_instr_type_jump); 468 instr->type = type; 469 return instr; 470} 471 472nir_load_const_instr * 473nir_load_const_instr_create(nir_shader *shader, unsigned num_components, 474 unsigned bit_size) 475{ 476 nir_load_const_instr *instr = 477 rzalloc_size(shader, sizeof(*instr) + num_components * sizeof(*instr->value)); 478 instr_init(&instr->instr, nir_instr_type_load_const); 479 480 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL); 481 482 return instr; 483} 484 485nir_intrinsic_instr * 486nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) 487{ 488 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; 489 /* TODO: don't use rzalloc */ 490 nir_intrinsic_instr *instr = 491 rzalloc_size(shader, 492 sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src)); 493 494 instr_init(&instr->instr, nir_instr_type_intrinsic); 495 instr->intrinsic = op; 496 497 if (nir_intrinsic_infos[op].has_dest) 498 dest_init(&instr->dest); 499 500 for (unsigned i = 0; i < num_srcs; i++) 501 src_init(&instr->src[i]); 502 503 return instr; 504} 505 506nir_call_instr * 507nir_call_instr_create(nir_shader *shader, nir_function *callee) 508{ 509 const unsigned num_params = callee->num_params; 510 nir_call_instr *instr = 511 rzalloc_size(shader, sizeof(*instr) + 512 num_params * sizeof(instr->params[0])); 513 514 instr_init(&instr->instr, nir_instr_type_call); 515 instr->callee = callee; 516 instr->num_params = num_params; 517 for (unsigned i = 0; i < num_params; i++) 518 src_init(&instr->params[i]); 519 520 return instr; 521} 522 523static int8_t default_tg4_offsets[4][2] = 524{ 525 { 0, 1 }, 526 { 1, 1 }, 527 { 1, 0 }, 528 { 0, 0 }, 529}; 530 531nir_tex_instr * 532nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) 533{ 534 nir_tex_instr *instr = rzalloc(shader, nir_tex_instr); 535 instr_init(&instr->instr, nir_instr_type_tex); 536 537 dest_init(&instr->dest); 538 539 instr->num_srcs = num_srcs; 540 instr->src = ralloc_array(instr, nir_tex_src, num_srcs); 541 for (unsigned i = 0; i < num_srcs; i++) 542 src_init(&instr->src[i].src); 543 544 instr->texture_index = 0; 545 instr->texture_array_size = 0; 546 instr->sampler_index = 0; 547 memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets)); 548 549 return instr; 550} 551 552void 553nir_tex_instr_add_src(nir_tex_instr *tex, 554 nir_tex_src_type src_type, 555 nir_src src) 556{ 557 nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, 558 tex->num_srcs + 1); 559 560 for (unsigned i = 0; i < tex->num_srcs; i++) { 561 new_srcs[i].src_type = tex->src[i].src_type; 562 nir_instr_move_src(&tex->instr, &new_srcs[i].src, 563 &tex->src[i].src); 564 } 565 566 ralloc_free(tex->src); 567 tex->src = new_srcs; 568 569 tex->src[tex->num_srcs].src_type = src_type; 570 nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src); 571 tex->num_srcs++; 572} 573 574void 575nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx) 576{ 577 assert(src_idx < tex->num_srcs); 578 579 /* First rewrite the source to NIR_SRC_INIT */ 580 nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT); 581 582 /* Now, move all of the other sources down */ 583 for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) { 584 tex->src[i-1].src_type = tex->src[i].src_type; 585 nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src); 586 } 587 tex->num_srcs--; 588} 589 590bool 591nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex) 592{ 593 if (tex->op != nir_texop_tg4) 594 return false; 595 return memcmp(tex->tg4_offsets, default_tg4_offsets, 596 sizeof(tex->tg4_offsets)) != 0; 597} 598 599nir_phi_instr * 600nir_phi_instr_create(nir_shader *shader) 601{ 602 nir_phi_instr *instr = ralloc(shader, nir_phi_instr); 603 instr_init(&instr->instr, nir_instr_type_phi); 604 605 dest_init(&instr->dest); 606 exec_list_make_empty(&instr->srcs); 607 return instr; 608} 609 610nir_parallel_copy_instr * 611nir_parallel_copy_instr_create(nir_shader *shader) 612{ 613 nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr); 614 instr_init(&instr->instr, nir_instr_type_parallel_copy); 615 616 exec_list_make_empty(&instr->entries); 617 618 return instr; 619} 620 621nir_ssa_undef_instr * 622nir_ssa_undef_instr_create(nir_shader *shader, 623 unsigned num_components, 624 unsigned bit_size) 625{ 626 nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr); 627 instr_init(&instr->instr, nir_instr_type_ssa_undef); 628 629 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL); 630 631 return instr; 632} 633 634static nir_const_value 635const_value_float(double d, unsigned bit_size) 636{ 637 nir_const_value v; 638 memset(&v, 0, sizeof(v)); 639 switch (bit_size) { 640 case 16: v.u16 = _mesa_float_to_half(d); break; 641 case 32: v.f32 = d; break; 642 case 64: v.f64 = d; break; 643 default: 644 unreachable("Invalid bit size"); 645 } 646 return v; 647} 648 649static nir_const_value 650const_value_int(int64_t i, unsigned bit_size) 651{ 652 nir_const_value v; 653 memset(&v, 0, sizeof(v)); 654 switch (bit_size) { 655 case 1: v.b = i & 1; break; 656 case 8: v.i8 = i; break; 657 case 16: v.i16 = i; break; 658 case 32: v.i32 = i; break; 659 case 64: v.i64 = i; break; 660 default: 661 unreachable("Invalid bit size"); 662 } 663 return v; 664} 665 666nir_const_value 667nir_alu_binop_identity(nir_op binop, unsigned bit_size) 668{ 669 const int64_t max_int = (1ull << (bit_size - 1)) - 1; 670 const int64_t min_int = -max_int - 1; 671 switch (binop) { 672 case nir_op_iadd: 673 return const_value_int(0, bit_size); 674 case nir_op_fadd: 675 return const_value_float(0, bit_size); 676 case nir_op_imul: 677 return const_value_int(1, bit_size); 678 case nir_op_fmul: 679 return const_value_float(1, bit_size); 680 case nir_op_imin: 681 return const_value_int(max_int, bit_size); 682 case nir_op_umin: 683 return const_value_int(~0ull, bit_size); 684 case nir_op_fmin: 685 return const_value_float(INFINITY, bit_size); 686 case nir_op_imax: 687 return const_value_int(min_int, bit_size); 688 case nir_op_umax: 689 return const_value_int(0, bit_size); 690 case nir_op_fmax: 691 return const_value_float(-INFINITY, bit_size); 692 case nir_op_iand: 693 return const_value_int(~0ull, bit_size); 694 case nir_op_ior: 695 return const_value_int(0, bit_size); 696 case nir_op_ixor: 697 return const_value_int(0, bit_size); 698 default: 699 unreachable("Invalid reduction operation"); 700 } 701} 702 703nir_function_impl * 704nir_cf_node_get_function(nir_cf_node *node) 705{ 706 while (node->type != nir_cf_node_function) { 707 node = node->parent; 708 } 709 710 return nir_cf_node_as_function(node); 711} 712 713/* Reduces a cursor by trying to convert everything to after and trying to 714 * go up to block granularity when possible. 715 */ 716static nir_cursor 717reduce_cursor(nir_cursor cursor) 718{ 719 switch (cursor.option) { 720 case nir_cursor_before_block: 721 assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL || 722 nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block); 723 if (exec_list_is_empty(&cursor.block->instr_list)) { 724 /* Empty block. After is as good as before. */ 725 cursor.option = nir_cursor_after_block; 726 } 727 return cursor; 728 729 case nir_cursor_after_block: 730 return cursor; 731 732 case nir_cursor_before_instr: { 733 nir_instr *prev_instr = nir_instr_prev(cursor.instr); 734 if (prev_instr) { 735 /* Before this instruction is after the previous */ 736 cursor.instr = prev_instr; 737 cursor.option = nir_cursor_after_instr; 738 } else { 739 /* No previous instruction. Switch to before block */ 740 cursor.block = cursor.instr->block; 741 cursor.option = nir_cursor_before_block; 742 } 743 return reduce_cursor(cursor); 744 } 745 746 case nir_cursor_after_instr: 747 if (nir_instr_next(cursor.instr) == NULL) { 748 /* This is the last instruction, switch to after block */ 749 cursor.option = nir_cursor_after_block; 750 cursor.block = cursor.instr->block; 751 } 752 return cursor; 753 754 default: 755 unreachable("Inavlid cursor option"); 756 } 757} 758 759bool 760nir_cursors_equal(nir_cursor a, nir_cursor b) 761{ 762 /* Reduced cursors should be unique */ 763 a = reduce_cursor(a); 764 b = reduce_cursor(b); 765 766 return a.block == b.block && a.option == b.option; 767} 768 769static bool 770add_use_cb(nir_src *src, void *state) 771{ 772 nir_instr *instr = state; 773 774 src->parent_instr = instr; 775 list_addtail(&src->use_link, 776 src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses); 777 778 return true; 779} 780 781static bool 782add_ssa_def_cb(nir_ssa_def *def, void *state) 783{ 784 nir_instr *instr = state; 785 786 if (instr->block && def->index == UINT_MAX) { 787 nir_function_impl *impl = 788 nir_cf_node_get_function(&instr->block->cf_node); 789 790 def->index = impl->ssa_alloc++; 791 } 792 793 return true; 794} 795 796static bool 797add_reg_def_cb(nir_dest *dest, void *state) 798{ 799 nir_instr *instr = state; 800 801 if (!dest->is_ssa) { 802 dest->reg.parent_instr = instr; 803 list_addtail(&dest->reg.def_link, &dest->reg.reg->defs); 804 } 805 806 return true; 807} 808 809static void 810add_defs_uses(nir_instr *instr) 811{ 812 nir_foreach_src(instr, add_use_cb, instr); 813 nir_foreach_dest(instr, add_reg_def_cb, instr); 814 nir_foreach_ssa_def(instr, add_ssa_def_cb, instr); 815} 816 817void 818nir_instr_insert(nir_cursor cursor, nir_instr *instr) 819{ 820 switch (cursor.option) { 821 case nir_cursor_before_block: 822 /* Only allow inserting jumps into empty blocks. */ 823 if (instr->type == nir_instr_type_jump) 824 assert(exec_list_is_empty(&cursor.block->instr_list)); 825 826 instr->block = cursor.block; 827 add_defs_uses(instr); 828 exec_list_push_head(&cursor.block->instr_list, &instr->node); 829 break; 830 case nir_cursor_after_block: { 831 /* Inserting instructions after a jump is illegal. */ 832 nir_instr *last = nir_block_last_instr(cursor.block); 833 assert(last == NULL || last->type != nir_instr_type_jump); 834 (void) last; 835 836 instr->block = cursor.block; 837 add_defs_uses(instr); 838 exec_list_push_tail(&cursor.block->instr_list, &instr->node); 839 break; 840 } 841 case nir_cursor_before_instr: 842 assert(instr->type != nir_instr_type_jump); 843 instr->block = cursor.instr->block; 844 add_defs_uses(instr); 845 exec_node_insert_node_before(&cursor.instr->node, &instr->node); 846 break; 847 case nir_cursor_after_instr: 848 /* Inserting instructions after a jump is illegal. */ 849 assert(cursor.instr->type != nir_instr_type_jump); 850 851 /* Only allow inserting jumps at the end of the block. */ 852 if (instr->type == nir_instr_type_jump) 853 assert(cursor.instr == nir_block_last_instr(cursor.instr->block)); 854 855 instr->block = cursor.instr->block; 856 add_defs_uses(instr); 857 exec_node_insert_after(&cursor.instr->node, &instr->node); 858 break; 859 } 860 861 if (instr->type == nir_instr_type_jump) 862 nir_handle_add_jump(instr->block); 863} 864 865static bool 866src_is_valid(const nir_src *src) 867{ 868 return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL); 869} 870 871static bool 872remove_use_cb(nir_src *src, void *state) 873{ 874 (void) state; 875 876 if (src_is_valid(src)) 877 list_del(&src->use_link); 878 879 return true; 880} 881 882static bool 883remove_def_cb(nir_dest *dest, void *state) 884{ 885 (void) state; 886 887 if (!dest->is_ssa) 888 list_del(&dest->reg.def_link); 889 890 return true; 891} 892 893static void 894remove_defs_uses(nir_instr *instr) 895{ 896 nir_foreach_dest(instr, remove_def_cb, instr); 897 nir_foreach_src(instr, remove_use_cb, instr); 898} 899 900void nir_instr_remove_v(nir_instr *instr) 901{ 902 remove_defs_uses(instr); 903 exec_node_remove(&instr->node); 904 905 if (instr->type == nir_instr_type_jump) { 906 nir_jump_instr *jump_instr = nir_instr_as_jump(instr); 907 nir_handle_remove_jump(instr->block, jump_instr->type); 908 } 909} 910 911/*@}*/ 912 913void 914nir_index_local_regs(nir_function_impl *impl) 915{ 916 unsigned index = 0; 917 foreach_list_typed(nir_register, reg, node, &impl->registers) { 918 reg->index = index++; 919 } 920 impl->reg_alloc = index; 921} 922 923static bool 924visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state) 925{ 926 return cb(&instr->dest.dest, state); 927} 928 929static bool 930visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state) 931{ 932 return cb(&instr->dest, state); 933} 934 935static bool 936visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb, 937 void *state) 938{ 939 if (nir_intrinsic_infos[instr->intrinsic].has_dest) 940 return cb(&instr->dest, state); 941 942 return true; 943} 944 945static bool 946visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb, 947 void *state) 948{ 949 return cb(&instr->dest, state); 950} 951 952static bool 953visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state) 954{ 955 return cb(&instr->dest, state); 956} 957 958static bool 959visit_parallel_copy_dest(nir_parallel_copy_instr *instr, 960 nir_foreach_dest_cb cb, void *state) 961{ 962 nir_foreach_parallel_copy_entry(entry, instr) { 963 if (!cb(&entry->dest, state)) 964 return false; 965 } 966 967 return true; 968} 969 970bool 971nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state) 972{ 973 switch (instr->type) { 974 case nir_instr_type_alu: 975 return visit_alu_dest(nir_instr_as_alu(instr), cb, state); 976 case nir_instr_type_deref: 977 return visit_deref_dest(nir_instr_as_deref(instr), cb, state); 978 case nir_instr_type_intrinsic: 979 return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state); 980 case nir_instr_type_tex: 981 return visit_texture_dest(nir_instr_as_tex(instr), cb, state); 982 case nir_instr_type_phi: 983 return visit_phi_dest(nir_instr_as_phi(instr), cb, state); 984 case nir_instr_type_parallel_copy: 985 return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr), 986 cb, state); 987 988 case nir_instr_type_load_const: 989 case nir_instr_type_ssa_undef: 990 case nir_instr_type_call: 991 case nir_instr_type_jump: 992 break; 993 994 default: 995 unreachable("Invalid instruction type"); 996 break; 997 } 998 999 return true; 1000} 1001 1002struct foreach_ssa_def_state { 1003 nir_foreach_ssa_def_cb cb; 1004 void *client_state; 1005}; 1006 1007static inline bool 1008nir_ssa_def_visitor(nir_dest *dest, void *void_state) 1009{ 1010 struct foreach_ssa_def_state *state = void_state; 1011 1012 if (dest->is_ssa) 1013 return state->cb(&dest->ssa, state->client_state); 1014 else 1015 return true; 1016} 1017 1018bool 1019nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state) 1020{ 1021 switch (instr->type) { 1022 case nir_instr_type_alu: 1023 case nir_instr_type_deref: 1024 case nir_instr_type_tex: 1025 case nir_instr_type_intrinsic: 1026 case nir_instr_type_phi: 1027 case nir_instr_type_parallel_copy: { 1028 struct foreach_ssa_def_state foreach_state = {cb, state}; 1029 return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state); 1030 } 1031 1032 case nir_instr_type_load_const: 1033 return cb(&nir_instr_as_load_const(instr)->def, state); 1034 case nir_instr_type_ssa_undef: 1035 return cb(&nir_instr_as_ssa_undef(instr)->def, state); 1036 case nir_instr_type_call: 1037 case nir_instr_type_jump: 1038 return true; 1039 default: 1040 unreachable("Invalid instruction type"); 1041 } 1042} 1043 1044static bool 1045visit_src(nir_src *src, nir_foreach_src_cb cb, void *state) 1046{ 1047 if (!cb(src, state)) 1048 return false; 1049 if (!src->is_ssa && src->reg.indirect) 1050 return cb(src->reg.indirect, state); 1051 return true; 1052} 1053 1054static bool 1055visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state) 1056{ 1057 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) 1058 if (!visit_src(&instr->src[i].src, cb, state)) 1059 return false; 1060 1061 return true; 1062} 1063 1064static bool 1065visit_deref_instr_src(nir_deref_instr *instr, 1066 nir_foreach_src_cb cb, void *state) 1067{ 1068 if (instr->deref_type != nir_deref_type_var) { 1069 if (!visit_src(&instr->parent, cb, state)) 1070 return false; 1071 } 1072 1073 if (instr->deref_type == nir_deref_type_array || 1074 instr->deref_type == nir_deref_type_ptr_as_array) { 1075 if (!visit_src(&instr->arr.index, cb, state)) 1076 return false; 1077 } 1078 1079 return true; 1080} 1081 1082static bool 1083visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state) 1084{ 1085 for (unsigned i = 0; i < instr->num_srcs; i++) { 1086 if (!visit_src(&instr->src[i].src, cb, state)) 1087 return false; 1088 } 1089 1090 return true; 1091} 1092 1093static bool 1094visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb, 1095 void *state) 1096{ 1097 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; 1098 for (unsigned i = 0; i < num_srcs; i++) { 1099 if (!visit_src(&instr->src[i], cb, state)) 1100 return false; 1101 } 1102 1103 return true; 1104} 1105 1106static bool 1107visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state) 1108{ 1109 for (unsigned i = 0; i < instr->num_params; i++) { 1110 if (!visit_src(&instr->params[i], cb, state)) 1111 return false; 1112 } 1113 1114 return true; 1115} 1116 1117static bool 1118visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state) 1119{ 1120 nir_foreach_phi_src(src, instr) { 1121 if (!visit_src(&src->src, cb, state)) 1122 return false; 1123 } 1124 1125 return true; 1126} 1127 1128static bool 1129visit_parallel_copy_src(nir_parallel_copy_instr *instr, 1130 nir_foreach_src_cb cb, void *state) 1131{ 1132 nir_foreach_parallel_copy_entry(entry, instr) { 1133 if (!visit_src(&entry->src, cb, state)) 1134 return false; 1135 } 1136 1137 return true; 1138} 1139 1140typedef struct { 1141 void *state; 1142 nir_foreach_src_cb cb; 1143} visit_dest_indirect_state; 1144 1145static bool 1146visit_dest_indirect(nir_dest *dest, void *_state) 1147{ 1148 visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state; 1149 1150 if (!dest->is_ssa && dest->reg.indirect) 1151 return state->cb(dest->reg.indirect, state->state); 1152 1153 return true; 1154} 1155 1156bool 1157nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) 1158{ 1159 switch (instr->type) { 1160 case nir_instr_type_alu: 1161 if (!visit_alu_src(nir_instr_as_alu(instr), cb, state)) 1162 return false; 1163 break; 1164 case nir_instr_type_deref: 1165 if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state)) 1166 return false; 1167 break; 1168 case nir_instr_type_intrinsic: 1169 if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state)) 1170 return false; 1171 break; 1172 case nir_instr_type_tex: 1173 if (!visit_tex_src(nir_instr_as_tex(instr), cb, state)) 1174 return false; 1175 break; 1176 case nir_instr_type_call: 1177 if (!visit_call_src(nir_instr_as_call(instr), cb, state)) 1178 return false; 1179 break; 1180 case nir_instr_type_load_const: 1181 /* Constant load instructions have no regular sources */ 1182 break; 1183 case nir_instr_type_phi: 1184 if (!visit_phi_src(nir_instr_as_phi(instr), cb, state)) 1185 return false; 1186 break; 1187 case nir_instr_type_parallel_copy: 1188 if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr), 1189 cb, state)) 1190 return false; 1191 break; 1192 case nir_instr_type_jump: 1193 case nir_instr_type_ssa_undef: 1194 return true; 1195 1196 default: 1197 unreachable("Invalid instruction type"); 1198 break; 1199 } 1200 1201 visit_dest_indirect_state dest_state; 1202 dest_state.state = state; 1203 dest_state.cb = cb; 1204 return nir_foreach_dest(instr, visit_dest_indirect, &dest_state); 1205} 1206 1207nir_const_value 1208nir_const_value_for_float(double f, unsigned bit_size) 1209{ 1210 nir_const_value v; 1211 memset(&v, 0, sizeof(v)); 1212 1213 switch (bit_size) { 1214 case 16: 1215 v.u16 = _mesa_float_to_half(f); 1216 break; 1217 case 32: 1218 v.f32 = f; 1219 break; 1220 case 64: 1221 v.f64 = f; 1222 break; 1223 default: 1224 unreachable("Invalid bit size"); 1225 } 1226 1227 return v; 1228} 1229 1230double 1231nir_const_value_as_float(nir_const_value value, unsigned bit_size) 1232{ 1233 switch (bit_size) { 1234 case 16: return _mesa_half_to_float(value.u16); 1235 case 32: return value.f32; 1236 case 64: return value.f64; 1237 default: 1238 unreachable("Invalid bit size"); 1239 } 1240} 1241 1242int64_t 1243nir_src_comp_as_int(nir_src src, unsigned comp) 1244{ 1245 assert(nir_src_is_const(src)); 1246 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); 1247 1248 assert(comp < load->def.num_components); 1249 switch (load->def.bit_size) { 1250 /* int1_t uses 0/-1 convention */ 1251 case 1: return -(int)load->value[comp].b; 1252 case 8: return load->value[comp].i8; 1253 case 16: return load->value[comp].i16; 1254 case 32: return load->value[comp].i32; 1255 case 64: return load->value[comp].i64; 1256 default: 1257 unreachable("Invalid bit size"); 1258 } 1259} 1260 1261uint64_t 1262nir_src_comp_as_uint(nir_src src, unsigned comp) 1263{ 1264 assert(nir_src_is_const(src)); 1265 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); 1266 1267 assert(comp < load->def.num_components); 1268 switch (load->def.bit_size) { 1269 case 1: return load->value[comp].b; 1270 case 8: return load->value[comp].u8; 1271 case 16: return load->value[comp].u16; 1272 case 32: return load->value[comp].u32; 1273 case 64: return load->value[comp].u64; 1274 default: 1275 unreachable("Invalid bit size"); 1276 } 1277} 1278 1279bool 1280nir_src_comp_as_bool(nir_src src, unsigned comp) 1281{ 1282 int64_t i = nir_src_comp_as_int(src, comp); 1283 1284 /* Booleans of any size use 0/-1 convention */ 1285 assert(i == 0 || i == -1); 1286 1287 return i; 1288} 1289 1290double 1291nir_src_comp_as_float(nir_src src, unsigned comp) 1292{ 1293 assert(nir_src_is_const(src)); 1294 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); 1295 1296 assert(comp < load->def.num_components); 1297 switch (load->def.bit_size) { 1298 case 16: return _mesa_half_to_float(load->value[comp].u16); 1299 case 32: return load->value[comp].f32; 1300 case 64: return load->value[comp].f64; 1301 default: 1302 unreachable("Invalid bit size"); 1303 } 1304} 1305 1306int64_t 1307nir_src_as_int(nir_src src) 1308{ 1309 assert(nir_src_num_components(src) == 1); 1310 return nir_src_comp_as_int(src, 0); 1311} 1312 1313uint64_t 1314nir_src_as_uint(nir_src src) 1315{ 1316 assert(nir_src_num_components(src) == 1); 1317 return nir_src_comp_as_uint(src, 0); 1318} 1319 1320bool 1321nir_src_as_bool(nir_src src) 1322{ 1323 assert(nir_src_num_components(src) == 1); 1324 return nir_src_comp_as_bool(src, 0); 1325} 1326 1327double 1328nir_src_as_float(nir_src src) 1329{ 1330 assert(nir_src_num_components(src) == 1); 1331 return nir_src_comp_as_float(src, 0); 1332} 1333 1334nir_const_value * 1335nir_src_as_const_value(nir_src src) 1336{ 1337 if (!src.is_ssa) 1338 return NULL; 1339 1340 if (src.ssa->parent_instr->type != nir_instr_type_load_const) 1341 return NULL; 1342 1343 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); 1344 1345 return load->value; 1346} 1347 1348/** 1349 * Returns true if the source is known to be dynamically uniform. Otherwise it 1350 * returns false which means it may or may not be dynamically uniform but it 1351 * can't be determined. 1352 */ 1353bool 1354nir_src_is_dynamically_uniform(nir_src src) 1355{ 1356 if (!src.is_ssa) 1357 return false; 1358 1359 /* Constants are trivially dynamically uniform */ 1360 if (src.ssa->parent_instr->type == nir_instr_type_load_const) 1361 return true; 1362 1363 /* As are uniform variables */ 1364 if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) { 1365 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr); 1366 1367 if (intr->intrinsic == nir_intrinsic_load_uniform) 1368 return true; 1369 } 1370 1371 /* XXX: this could have many more tests, such as when a sampler function is 1372 * called with dynamically uniform arguments. 1373 */ 1374 return false; 1375} 1376 1377static void 1378src_remove_all_uses(nir_src *src) 1379{ 1380 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { 1381 if (!src_is_valid(src)) 1382 continue; 1383 1384 list_del(&src->use_link); 1385 } 1386} 1387 1388static void 1389src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) 1390{ 1391 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { 1392 if (!src_is_valid(src)) 1393 continue; 1394 1395 if (parent_instr) { 1396 src->parent_instr = parent_instr; 1397 if (src->is_ssa) 1398 list_addtail(&src->use_link, &src->ssa->uses); 1399 else 1400 list_addtail(&src->use_link, &src->reg.reg->uses); 1401 } else { 1402 assert(parent_if); 1403 src->parent_if = parent_if; 1404 if (src->is_ssa) 1405 list_addtail(&src->use_link, &src->ssa->if_uses); 1406 else 1407 list_addtail(&src->use_link, &src->reg.reg->if_uses); 1408 } 1409 } 1410} 1411 1412void 1413nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src) 1414{ 1415 assert(!src_is_valid(src) || src->parent_instr == instr); 1416 1417 src_remove_all_uses(src); 1418 *src = new_src; 1419 src_add_all_uses(src, instr, NULL); 1420} 1421 1422void 1423nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src) 1424{ 1425 assert(!src_is_valid(dest) || dest->parent_instr == dest_instr); 1426 1427 src_remove_all_uses(dest); 1428 src_remove_all_uses(src); 1429 *dest = *src; 1430 *src = NIR_SRC_INIT; 1431 src_add_all_uses(dest, dest_instr, NULL); 1432} 1433 1434void 1435nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src) 1436{ 1437 nir_src *src = &if_stmt->condition; 1438 assert(!src_is_valid(src) || src->parent_if == if_stmt); 1439 1440 src_remove_all_uses(src); 1441 *src = new_src; 1442 src_add_all_uses(src, NULL, if_stmt); 1443} 1444 1445void 1446nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest) 1447{ 1448 if (dest->is_ssa) { 1449 /* We can only overwrite an SSA destination if it has no uses. */ 1450 assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses)); 1451 } else { 1452 list_del(&dest->reg.def_link); 1453 if (dest->reg.indirect) 1454 src_remove_all_uses(dest->reg.indirect); 1455 } 1456 1457 /* We can't re-write with an SSA def */ 1458 assert(!new_dest.is_ssa); 1459 1460 nir_dest_copy(dest, &new_dest, instr); 1461 1462 dest->reg.parent_instr = instr; 1463 list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs); 1464 1465 if (dest->reg.indirect) 1466 src_add_all_uses(dest->reg.indirect, instr, NULL); 1467} 1468 1469/* note: does *not* take ownership of 'name' */ 1470void 1471nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, 1472 unsigned num_components, 1473 unsigned bit_size, const char *name) 1474{ 1475 def->name = ralloc_strdup(instr, name); 1476 def->parent_instr = instr; 1477 list_inithead(&def->uses); 1478 list_inithead(&def->if_uses); 1479 def->num_components = num_components; 1480 def->bit_size = bit_size; 1481 1482 if (instr->block) { 1483 nir_function_impl *impl = 1484 nir_cf_node_get_function(&instr->block->cf_node); 1485 1486 def->index = impl->ssa_alloc++; 1487 } else { 1488 def->index = UINT_MAX; 1489 } 1490} 1491 1492/* note: does *not* take ownership of 'name' */ 1493void 1494nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, 1495 unsigned num_components, unsigned bit_size, 1496 const char *name) 1497{ 1498 dest->is_ssa = true; 1499 nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name); 1500} 1501 1502void 1503nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src) 1504{ 1505 assert(!new_src.is_ssa || def != new_src.ssa); 1506 1507 nir_foreach_use_safe(use_src, def) 1508 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); 1509 1510 nir_foreach_if_use_safe(use_src, def) 1511 nir_if_rewrite_condition(use_src->parent_if, new_src); 1512} 1513 1514static bool 1515is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between) 1516{ 1517 assert(start->block == end->block); 1518 1519 if (between->block != start->block) 1520 return false; 1521 1522 /* Search backwards looking for "between" */ 1523 while (start != end) { 1524 if (between == end) 1525 return true; 1526 1527 end = nir_instr_prev(end); 1528 assert(end); 1529 } 1530 1531 return false; 1532} 1533 1534/* Replaces all uses of the given SSA def with the given source but only if 1535 * the use comes after the after_me instruction. This can be useful if you 1536 * are emitting code to fix up the result of some instruction: you can freely 1537 * use the result in that code and then call rewrite_uses_after and pass the 1538 * last fixup instruction as after_me and it will replace all of the uses you 1539 * want without touching the fixup code. 1540 * 1541 * This function assumes that after_me is in the same block as 1542 * def->parent_instr and that after_me comes after def->parent_instr. 1543 */ 1544void 1545nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, 1546 nir_instr *after_me) 1547{ 1548 if (new_src.is_ssa && def == new_src.ssa) 1549 return; 1550 1551 nir_foreach_use_safe(use_src, def) { 1552 assert(use_src->parent_instr != def->parent_instr); 1553 /* Since def already dominates all of its uses, the only way a use can 1554 * not be dominated by after_me is if it is between def and after_me in 1555 * the instruction list. 1556 */ 1557 if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr)) 1558 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); 1559 } 1560 1561 nir_foreach_if_use_safe(use_src, def) 1562 nir_if_rewrite_condition(use_src->parent_if, new_src); 1563} 1564 1565nir_component_mask_t 1566nir_ssa_def_components_read(const nir_ssa_def *def) 1567{ 1568 nir_component_mask_t read_mask = 0; 1569 nir_foreach_use(use, def) { 1570 if (use->parent_instr->type == nir_instr_type_alu) { 1571 nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr); 1572 nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src); 1573 int src_idx = alu_src - &alu->src[0]; 1574 assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs); 1575 read_mask |= nir_alu_instr_src_read_mask(alu, src_idx); 1576 } else { 1577 return (1 << def->num_components) - 1; 1578 } 1579 } 1580 1581 if (!list_empty(&def->if_uses)) 1582 read_mask |= 1; 1583 1584 return read_mask; 1585} 1586 1587nir_block * 1588nir_block_cf_tree_next(nir_block *block) 1589{ 1590 if (block == NULL) { 1591 /* nir_foreach_block_safe() will call this function on a NULL block 1592 * after the last iteration, but it won't use the result so just return 1593 * NULL here. 1594 */ 1595 return NULL; 1596 } 1597 1598 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node); 1599 if (cf_next) 1600 return nir_cf_node_cf_tree_first(cf_next); 1601 1602 nir_cf_node *parent = block->cf_node.parent; 1603 1604 switch (parent->type) { 1605 case nir_cf_node_if: { 1606 /* Are we at the end of the if? Go to the beginning of the else */ 1607 nir_if *if_stmt = nir_cf_node_as_if(parent); 1608 if (block == nir_if_last_then_block(if_stmt)) 1609 return nir_if_first_else_block(if_stmt); 1610 1611 assert(block == nir_if_last_else_block(if_stmt)); 1612 /* fall through */ 1613 } 1614 1615 case nir_cf_node_loop: 1616 return nir_cf_node_as_block(nir_cf_node_next(parent)); 1617 1618 case nir_cf_node_function: 1619 return NULL; 1620 1621 default: 1622 unreachable("unknown cf node type"); 1623 } 1624} 1625 1626nir_block * 1627nir_block_cf_tree_prev(nir_block *block) 1628{ 1629 if (block == NULL) { 1630 /* do this for consistency with nir_block_cf_tree_next() */ 1631 return NULL; 1632 } 1633 1634 nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node); 1635 if (cf_prev) 1636 return nir_cf_node_cf_tree_last(cf_prev); 1637 1638 nir_cf_node *parent = block->cf_node.parent; 1639 1640 switch (parent->type) { 1641 case nir_cf_node_if: { 1642 /* Are we at the beginning of the else? Go to the end of the if */ 1643 nir_if *if_stmt = nir_cf_node_as_if(parent); 1644 if (block == nir_if_first_else_block(if_stmt)) 1645 return nir_if_last_then_block(if_stmt); 1646 1647 assert(block == nir_if_first_then_block(if_stmt)); 1648 /* fall through */ 1649 } 1650 1651 case nir_cf_node_loop: 1652 return nir_cf_node_as_block(nir_cf_node_prev(parent)); 1653 1654 case nir_cf_node_function: 1655 return NULL; 1656 1657 default: 1658 unreachable("unknown cf node type"); 1659 } 1660} 1661 1662nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node) 1663{ 1664 switch (node->type) { 1665 case nir_cf_node_function: { 1666 nir_function_impl *impl = nir_cf_node_as_function(node); 1667 return nir_start_block(impl); 1668 } 1669 1670 case nir_cf_node_if: { 1671 nir_if *if_stmt = nir_cf_node_as_if(node); 1672 return nir_if_first_then_block(if_stmt); 1673 } 1674 1675 case nir_cf_node_loop: { 1676 nir_loop *loop = nir_cf_node_as_loop(node); 1677 return nir_loop_first_block(loop); 1678 } 1679 1680 case nir_cf_node_block: { 1681 return nir_cf_node_as_block(node); 1682 } 1683 1684 default: 1685 unreachable("unknown node type"); 1686 } 1687} 1688 1689nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node) 1690{ 1691 switch (node->type) { 1692 case nir_cf_node_function: { 1693 nir_function_impl *impl = nir_cf_node_as_function(node); 1694 return nir_impl_last_block(impl); 1695 } 1696 1697 case nir_cf_node_if: { 1698 nir_if *if_stmt = nir_cf_node_as_if(node); 1699 return nir_if_last_else_block(if_stmt); 1700 } 1701 1702 case nir_cf_node_loop: { 1703 nir_loop *loop = nir_cf_node_as_loop(node); 1704 return nir_loop_last_block(loop); 1705 } 1706 1707 case nir_cf_node_block: { 1708 return nir_cf_node_as_block(node); 1709 } 1710 1711 default: 1712 unreachable("unknown node type"); 1713 } 1714} 1715 1716nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node) 1717{ 1718 if (node->type == nir_cf_node_block) 1719 return nir_block_cf_tree_next(nir_cf_node_as_block(node)); 1720 else if (node->type == nir_cf_node_function) 1721 return NULL; 1722 else 1723 return nir_cf_node_as_block(nir_cf_node_next(node)); 1724} 1725 1726nir_if * 1727nir_block_get_following_if(nir_block *block) 1728{ 1729 if (exec_node_is_tail_sentinel(&block->cf_node.node)) 1730 return NULL; 1731 1732 if (nir_cf_node_is_last(&block->cf_node)) 1733 return NULL; 1734 1735 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); 1736 1737 if (next_node->type != nir_cf_node_if) 1738 return NULL; 1739 1740 return nir_cf_node_as_if(next_node); 1741} 1742 1743nir_loop * 1744nir_block_get_following_loop(nir_block *block) 1745{ 1746 if (exec_node_is_tail_sentinel(&block->cf_node.node)) 1747 return NULL; 1748 1749 if (nir_cf_node_is_last(&block->cf_node)) 1750 return NULL; 1751 1752 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); 1753 1754 if (next_node->type != nir_cf_node_loop) 1755 return NULL; 1756 1757 return nir_cf_node_as_loop(next_node); 1758} 1759 1760void 1761nir_index_blocks(nir_function_impl *impl) 1762{ 1763 unsigned index = 0; 1764 1765 if (impl->valid_metadata & nir_metadata_block_index) 1766 return; 1767 1768 nir_foreach_block(block, impl) { 1769 block->index = index++; 1770 } 1771 1772 /* The end_block isn't really part of the program, which is why its index 1773 * is >= num_blocks. 1774 */ 1775 impl->num_blocks = impl->end_block->index = index; 1776} 1777 1778static bool 1779index_ssa_def_cb(nir_ssa_def *def, void *state) 1780{ 1781 unsigned *index = (unsigned *) state; 1782 def->index = (*index)++; 1783 1784 return true; 1785} 1786 1787/** 1788 * The indices are applied top-to-bottom which has the very nice property 1789 * that, if A dominates B, then A->index <= B->index. 1790 */ 1791void 1792nir_index_ssa_defs(nir_function_impl *impl) 1793{ 1794 unsigned index = 0; 1795 1796 nir_foreach_block(block, impl) { 1797 nir_foreach_instr(instr, block) 1798 nir_foreach_ssa_def(instr, index_ssa_def_cb, &index); 1799 } 1800 1801 impl->ssa_alloc = index; 1802} 1803 1804/** 1805 * The indices are applied top-to-bottom which has the very nice property 1806 * that, if A dominates B, then A->index <= B->index. 1807 */ 1808unsigned 1809nir_index_instrs(nir_function_impl *impl) 1810{ 1811 unsigned index = 0; 1812 1813 nir_foreach_block(block, impl) { 1814 nir_foreach_instr(instr, block) 1815 instr->index = index++; 1816 } 1817 1818 return index; 1819} 1820 1821nir_intrinsic_op 1822nir_intrinsic_from_system_value(gl_system_value val) 1823{ 1824 switch (val) { 1825 case SYSTEM_VALUE_VERTEX_ID: 1826 return nir_intrinsic_load_vertex_id; 1827 case SYSTEM_VALUE_INSTANCE_ID: 1828 return nir_intrinsic_load_instance_id; 1829 case SYSTEM_VALUE_DRAW_ID: 1830 return nir_intrinsic_load_draw_id; 1831 case SYSTEM_VALUE_BASE_INSTANCE: 1832 return nir_intrinsic_load_base_instance; 1833 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: 1834 return nir_intrinsic_load_vertex_id_zero_base; 1835 case SYSTEM_VALUE_IS_INDEXED_DRAW: 1836 return nir_intrinsic_load_is_indexed_draw; 1837 case SYSTEM_VALUE_FIRST_VERTEX: 1838 return nir_intrinsic_load_first_vertex; 1839 case SYSTEM_VALUE_BASE_VERTEX: 1840 return nir_intrinsic_load_base_vertex; 1841 case SYSTEM_VALUE_INVOCATION_ID: 1842 return nir_intrinsic_load_invocation_id; 1843 case SYSTEM_VALUE_FRAG_COORD: 1844 return nir_intrinsic_load_frag_coord; 1845 case SYSTEM_VALUE_FRONT_FACE: 1846 return nir_intrinsic_load_front_face; 1847 case SYSTEM_VALUE_SAMPLE_ID: 1848 return nir_intrinsic_load_sample_id; 1849 case SYSTEM_VALUE_SAMPLE_POS: 1850 return nir_intrinsic_load_sample_pos; 1851 case SYSTEM_VALUE_SAMPLE_MASK_IN: 1852 return nir_intrinsic_load_sample_mask_in; 1853 case SYSTEM_VALUE_LOCAL_INVOCATION_ID: 1854 return nir_intrinsic_load_local_invocation_id; 1855 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: 1856 return nir_intrinsic_load_local_invocation_index; 1857 case SYSTEM_VALUE_WORK_GROUP_ID: 1858 return nir_intrinsic_load_work_group_id; 1859 case SYSTEM_VALUE_NUM_WORK_GROUPS: 1860 return nir_intrinsic_load_num_work_groups; 1861 case SYSTEM_VALUE_PRIMITIVE_ID: 1862 return nir_intrinsic_load_primitive_id; 1863 case SYSTEM_VALUE_TESS_COORD: 1864 return nir_intrinsic_load_tess_coord; 1865 case SYSTEM_VALUE_TESS_LEVEL_OUTER: 1866 return nir_intrinsic_load_tess_level_outer; 1867 case SYSTEM_VALUE_TESS_LEVEL_INNER: 1868 return nir_intrinsic_load_tess_level_inner; 1869 case SYSTEM_VALUE_VERTICES_IN: 1870 return nir_intrinsic_load_patch_vertices_in; 1871 case SYSTEM_VALUE_HELPER_INVOCATION: 1872 return nir_intrinsic_load_helper_invocation; 1873 case SYSTEM_VALUE_VIEW_INDEX: 1874 return nir_intrinsic_load_view_index; 1875 case SYSTEM_VALUE_SUBGROUP_SIZE: 1876 return nir_intrinsic_load_subgroup_size; 1877 case SYSTEM_VALUE_SUBGROUP_INVOCATION: 1878 return nir_intrinsic_load_subgroup_invocation; 1879 case SYSTEM_VALUE_SUBGROUP_EQ_MASK: 1880 return nir_intrinsic_load_subgroup_eq_mask; 1881 case SYSTEM_VALUE_SUBGROUP_GE_MASK: 1882 return nir_intrinsic_load_subgroup_ge_mask; 1883 case SYSTEM_VALUE_SUBGROUP_GT_MASK: 1884 return nir_intrinsic_load_subgroup_gt_mask; 1885 case SYSTEM_VALUE_SUBGROUP_LE_MASK: 1886 return nir_intrinsic_load_subgroup_le_mask; 1887 case SYSTEM_VALUE_SUBGROUP_LT_MASK: 1888 return nir_intrinsic_load_subgroup_lt_mask; 1889 case SYSTEM_VALUE_NUM_SUBGROUPS: 1890 return nir_intrinsic_load_num_subgroups; 1891 case SYSTEM_VALUE_SUBGROUP_ID: 1892 return nir_intrinsic_load_subgroup_id; 1893 case SYSTEM_VALUE_LOCAL_GROUP_SIZE: 1894 return nir_intrinsic_load_local_group_size; 1895 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: 1896 return nir_intrinsic_load_global_invocation_id; 1897 case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX: 1898 return nir_intrinsic_load_global_invocation_index; 1899 case SYSTEM_VALUE_WORK_DIM: 1900 return nir_intrinsic_load_work_dim; 1901 default: 1902 unreachable("system value does not directly correspond to intrinsic"); 1903 } 1904} 1905 1906gl_system_value 1907nir_system_value_from_intrinsic(nir_intrinsic_op intrin) 1908{ 1909 switch (intrin) { 1910 case nir_intrinsic_load_vertex_id: 1911 return SYSTEM_VALUE_VERTEX_ID; 1912 case nir_intrinsic_load_instance_id: 1913 return SYSTEM_VALUE_INSTANCE_ID; 1914 case nir_intrinsic_load_draw_id: 1915 return SYSTEM_VALUE_DRAW_ID; 1916 case nir_intrinsic_load_base_instance: 1917 return SYSTEM_VALUE_BASE_INSTANCE; 1918 case nir_intrinsic_load_vertex_id_zero_base: 1919 return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; 1920 case nir_intrinsic_load_first_vertex: 1921 return SYSTEM_VALUE_FIRST_VERTEX; 1922 case nir_intrinsic_load_is_indexed_draw: 1923 return SYSTEM_VALUE_IS_INDEXED_DRAW; 1924 case nir_intrinsic_load_base_vertex: 1925 return SYSTEM_VALUE_BASE_VERTEX; 1926 case nir_intrinsic_load_invocation_id: 1927 return SYSTEM_VALUE_INVOCATION_ID; 1928 case nir_intrinsic_load_frag_coord: 1929 return SYSTEM_VALUE_FRAG_COORD; 1930 case nir_intrinsic_load_front_face: 1931 return SYSTEM_VALUE_FRONT_FACE; 1932 case nir_intrinsic_load_sample_id: 1933 return SYSTEM_VALUE_SAMPLE_ID; 1934 case nir_intrinsic_load_sample_pos: 1935 return SYSTEM_VALUE_SAMPLE_POS; 1936 case nir_intrinsic_load_sample_mask_in: 1937 return SYSTEM_VALUE_SAMPLE_MASK_IN; 1938 case nir_intrinsic_load_local_invocation_id: 1939 return SYSTEM_VALUE_LOCAL_INVOCATION_ID; 1940 case nir_intrinsic_load_local_invocation_index: 1941 return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; 1942 case nir_intrinsic_load_num_work_groups: 1943 return SYSTEM_VALUE_NUM_WORK_GROUPS; 1944 case nir_intrinsic_load_work_group_id: 1945 return SYSTEM_VALUE_WORK_GROUP_ID; 1946 case nir_intrinsic_load_primitive_id: 1947 return SYSTEM_VALUE_PRIMITIVE_ID; 1948 case nir_intrinsic_load_tess_coord: 1949 return SYSTEM_VALUE_TESS_COORD; 1950 case nir_intrinsic_load_tess_level_outer: 1951 return SYSTEM_VALUE_TESS_LEVEL_OUTER; 1952 case nir_intrinsic_load_tess_level_inner: 1953 return SYSTEM_VALUE_TESS_LEVEL_INNER; 1954 case nir_intrinsic_load_patch_vertices_in: 1955 return SYSTEM_VALUE_VERTICES_IN; 1956 case nir_intrinsic_load_helper_invocation: 1957 return SYSTEM_VALUE_HELPER_INVOCATION; 1958 case nir_intrinsic_load_view_index: 1959 return SYSTEM_VALUE_VIEW_INDEX; 1960 case nir_intrinsic_load_subgroup_size: 1961 return SYSTEM_VALUE_SUBGROUP_SIZE; 1962 case nir_intrinsic_load_subgroup_invocation: 1963 return SYSTEM_VALUE_SUBGROUP_INVOCATION; 1964 case nir_intrinsic_load_subgroup_eq_mask: 1965 return SYSTEM_VALUE_SUBGROUP_EQ_MASK; 1966 case nir_intrinsic_load_subgroup_ge_mask: 1967 return SYSTEM_VALUE_SUBGROUP_GE_MASK; 1968 case nir_intrinsic_load_subgroup_gt_mask: 1969 return SYSTEM_VALUE_SUBGROUP_GT_MASK; 1970 case nir_intrinsic_load_subgroup_le_mask: 1971 return SYSTEM_VALUE_SUBGROUP_LE_MASK; 1972 case nir_intrinsic_load_subgroup_lt_mask: 1973 return SYSTEM_VALUE_SUBGROUP_LT_MASK; 1974 case nir_intrinsic_load_num_subgroups: 1975 return SYSTEM_VALUE_NUM_SUBGROUPS; 1976 case nir_intrinsic_load_subgroup_id: 1977 return SYSTEM_VALUE_SUBGROUP_ID; 1978 case nir_intrinsic_load_local_group_size: 1979 return SYSTEM_VALUE_LOCAL_GROUP_SIZE; 1980 case nir_intrinsic_load_global_invocation_id: 1981 return SYSTEM_VALUE_GLOBAL_INVOCATION_ID; 1982 default: 1983 unreachable("intrinsic doesn't produce a system value"); 1984 } 1985} 1986 1987/* OpenGL utility method that remaps the location attributes if they are 1988 * doubles. Not needed for vulkan due the differences on the input location 1989 * count for doubles on vulkan vs OpenGL 1990 * 1991 * The bitfield returned in dual_slot is one bit for each double input slot in 1992 * the original OpenGL single-slot input numbering. The mapping from old 1993 * locations to new locations is as follows: 1994 * 1995 * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc)) 1996 */ 1997void 1998nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot) 1999{ 2000 assert(shader->info.stage == MESA_SHADER_VERTEX); 2001 2002 *dual_slot = 0; 2003 nir_foreach_variable(var, &shader->inputs) { 2004 if (glsl_type_is_dual_slot(glsl_without_array(var->type))) { 2005 unsigned slots = glsl_count_attribute_slots(var->type, true); 2006 *dual_slot |= BITFIELD64_MASK(slots) << var->data.location; 2007 } 2008 } 2009 2010 nir_foreach_variable(var, &shader->inputs) { 2011 var->data.location += 2012 util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location)); 2013 } 2014} 2015 2016/* Returns an attribute mask that has been re-compacted using the given 2017 * dual_slot mask. 2018 */ 2019uint64_t 2020nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot) 2021{ 2022 while (dual_slot) { 2023 unsigned loc = u_bit_scan64(&dual_slot); 2024 /* mask of all bits up to and including loc */ 2025 uint64_t mask = BITFIELD64_MASK(loc + 1); 2026 attribs = (attribs & mask) | ((attribs & ~mask) >> 1); 2027 } 2028 return attribs; 2029} 2030 2031void 2032nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src, 2033 bool bindless) 2034{ 2035 enum gl_access_qualifier access = nir_intrinsic_access(intrin); 2036 2037 switch (intrin->intrinsic) { 2038#define CASE(op) \ 2039 case nir_intrinsic_image_deref_##op: \ 2040 intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \ 2041 : nir_intrinsic_image_##op; \ 2042 break; 2043 CASE(load) 2044 CASE(store) 2045 CASE(atomic_add) 2046 CASE(atomic_min) 2047 CASE(atomic_max) 2048 CASE(atomic_and) 2049 CASE(atomic_or) 2050 CASE(atomic_xor) 2051 CASE(atomic_exchange) 2052 CASE(atomic_comp_swap) 2053 CASE(atomic_fadd) 2054 CASE(size) 2055 CASE(samples) 2056 CASE(load_raw_intel) 2057 CASE(store_raw_intel) 2058#undef CASE 2059 default: 2060 unreachable("Unhanded image intrinsic"); 2061 } 2062 2063 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2064 nir_variable *var = nir_deref_instr_get_variable(deref); 2065 2066 nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type)); 2067 nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type)); 2068 nir_intrinsic_set_access(intrin, access | var->data.image.access); 2069 nir_intrinsic_set_format(intrin, var->data.image.format); 2070 2071 nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], 2072 nir_src_for_ssa(src)); 2073} 2074