nir.c revision 01e04c3f
1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * 26 */ 27 28#include "nir.h" 29#include "nir_control_flow_private.h" 30#include "util/half_float.h" 31#include <limits.h> 32#include <assert.h> 33#include <math.h> 34#include "util/u_math.h" 35 36#include "main/menums.h" /* BITFIELD64_MASK */ 37 38nir_shader * 39nir_shader_create(void *mem_ctx, 40 gl_shader_stage stage, 41 const nir_shader_compiler_options *options, 42 shader_info *si) 43{ 44 nir_shader *shader = rzalloc(mem_ctx, nir_shader); 45 46 exec_list_make_empty(&shader->uniforms); 47 exec_list_make_empty(&shader->inputs); 48 exec_list_make_empty(&shader->outputs); 49 exec_list_make_empty(&shader->shared); 50 51 shader->options = options; 52 53 if (si) { 54 assert(si->stage == stage); 55 shader->info = *si; 56 } else { 57 shader->info.stage = stage; 58 } 59 60 exec_list_make_empty(&shader->functions); 61 exec_list_make_empty(&shader->registers); 62 exec_list_make_empty(&shader->globals); 63 exec_list_make_empty(&shader->system_values); 64 shader->reg_alloc = 0; 65 66 shader->num_inputs = 0; 67 shader->num_outputs = 0; 68 shader->num_uniforms = 0; 69 shader->num_shared = 0; 70 71 return shader; 72} 73 74static nir_register * 75reg_create(void *mem_ctx, struct exec_list *list) 76{ 77 nir_register *reg = ralloc(mem_ctx, nir_register); 78 79 list_inithead(®->uses); 80 list_inithead(®->defs); 81 list_inithead(®->if_uses); 82 83 reg->num_components = 0; 84 reg->bit_size = 32; 85 reg->num_array_elems = 0; 86 reg->is_packed = false; 87 reg->name = NULL; 88 89 exec_list_push_tail(list, ®->node); 90 91 return reg; 92} 93 94nir_register * 95nir_global_reg_create(nir_shader *shader) 96{ 97 nir_register *reg = reg_create(shader, &shader->registers); 98 reg->index = shader->reg_alloc++; 99 reg->is_global = true; 100 101 return reg; 102} 103 104nir_register * 105nir_local_reg_create(nir_function_impl *impl) 106{ 107 nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers); 108 reg->index = impl->reg_alloc++; 109 reg->is_global = false; 110 111 return reg; 112} 113 114void 115nir_reg_remove(nir_register *reg) 116{ 117 exec_node_remove(®->node); 118} 119 120void 121nir_shader_add_variable(nir_shader *shader, nir_variable *var) 122{ 123 switch (var->data.mode) { 124 case nir_var_all: 125 assert(!"invalid mode"); 126 break; 127 128 case nir_var_local: 129 assert(!"nir_shader_add_variable cannot be used for local variables"); 130 break; 131 132 case nir_var_global: 133 exec_list_push_tail(&shader->globals, &var->node); 134 break; 135 136 case nir_var_shader_in: 137 exec_list_push_tail(&shader->inputs, &var->node); 138 break; 139 140 case nir_var_shader_out: 141 exec_list_push_tail(&shader->outputs, &var->node); 142 break; 143 144 case nir_var_uniform: 145 case nir_var_shader_storage: 146 exec_list_push_tail(&shader->uniforms, &var->node); 147 break; 148 149 case nir_var_shared: 150 assert(shader->info.stage == MESA_SHADER_COMPUTE); 151 exec_list_push_tail(&shader->shared, &var->node); 152 break; 153 154 case nir_var_system_value: 155 exec_list_push_tail(&shader->system_values, &var->node); 156 break; 157 } 158} 159 160nir_variable * 161nir_variable_create(nir_shader *shader, nir_variable_mode mode, 162 const struct glsl_type *type, const char *name) 163{ 164 nir_variable *var = rzalloc(shader, nir_variable); 165 var->name = ralloc_strdup(var, name); 166 var->type = type; 167 var->data.mode = mode; 168 var->data.how_declared = nir_var_declared_normally; 169 170 if ((mode == nir_var_shader_in && 171 shader->info.stage != MESA_SHADER_VERTEX) || 172 (mode == nir_var_shader_out && 173 shader->info.stage != MESA_SHADER_FRAGMENT)) 174 var->data.interpolation = INTERP_MODE_SMOOTH; 175 176 if (mode == nir_var_shader_in || mode == nir_var_uniform) 177 var->data.read_only = true; 178 179 nir_shader_add_variable(shader, var); 180 181 return var; 182} 183 184nir_variable * 185nir_local_variable_create(nir_function_impl *impl, 186 const struct glsl_type *type, const char *name) 187{ 188 nir_variable *var = rzalloc(impl->function->shader, nir_variable); 189 var->name = ralloc_strdup(var, name); 190 var->type = type; 191 var->data.mode = nir_var_local; 192 193 nir_function_impl_add_variable(impl, var); 194 195 return var; 196} 197 198nir_function * 199nir_function_create(nir_shader *shader, const char *name) 200{ 201 nir_function *func = ralloc(shader, nir_function); 202 203 exec_list_push_tail(&shader->functions, &func->node); 204 205 func->name = ralloc_strdup(func, name); 206 func->shader = shader; 207 func->num_params = 0; 208 func->params = NULL; 209 func->impl = NULL; 210 211 return func; 212} 213 214/* NOTE: if the instruction you are copying a src to is already added 215 * to the IR, use nir_instr_rewrite_src() instead. 216 */ 217void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx) 218{ 219 dest->is_ssa = src->is_ssa; 220 if (src->is_ssa) { 221 dest->ssa = src->ssa; 222 } else { 223 dest->reg.base_offset = src->reg.base_offset; 224 dest->reg.reg = src->reg.reg; 225 if (src->reg.indirect) { 226 dest->reg.indirect = ralloc(mem_ctx, nir_src); 227 nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx); 228 } else { 229 dest->reg.indirect = NULL; 230 } 231 } 232} 233 234void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr) 235{ 236 /* Copying an SSA definition makes no sense whatsoever. */ 237 assert(!src->is_ssa); 238 239 dest->is_ssa = false; 240 241 dest->reg.base_offset = src->reg.base_offset; 242 dest->reg.reg = src->reg.reg; 243 if (src->reg.indirect) { 244 dest->reg.indirect = ralloc(instr, nir_src); 245 nir_src_copy(dest->reg.indirect, src->reg.indirect, instr); 246 } else { 247 dest->reg.indirect = NULL; 248 } 249} 250 251void 252nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, 253 nir_alu_instr *instr) 254{ 255 nir_src_copy(&dest->src, &src->src, &instr->instr); 256 dest->abs = src->abs; 257 dest->negate = src->negate; 258 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) 259 dest->swizzle[i] = src->swizzle[i]; 260} 261 262void 263nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, 264 nir_alu_instr *instr) 265{ 266 nir_dest_copy(&dest->dest, &src->dest, &instr->instr); 267 dest->write_mask = src->write_mask; 268 dest->saturate = src->saturate; 269} 270 271 272static void 273cf_init(nir_cf_node *node, nir_cf_node_type type) 274{ 275 exec_node_init(&node->node); 276 node->parent = NULL; 277 node->type = type; 278} 279 280nir_function_impl * 281nir_function_impl_create_bare(nir_shader *shader) 282{ 283 nir_function_impl *impl = ralloc(shader, nir_function_impl); 284 285 impl->function = NULL; 286 287 cf_init(&impl->cf_node, nir_cf_node_function); 288 289 exec_list_make_empty(&impl->body); 290 exec_list_make_empty(&impl->registers); 291 exec_list_make_empty(&impl->locals); 292 impl->reg_alloc = 0; 293 impl->ssa_alloc = 0; 294 impl->valid_metadata = nir_metadata_none; 295 296 /* create start & end blocks */ 297 nir_block *start_block = nir_block_create(shader); 298 nir_block *end_block = nir_block_create(shader); 299 start_block->cf_node.parent = &impl->cf_node; 300 end_block->cf_node.parent = &impl->cf_node; 301 impl->end_block = end_block; 302 303 exec_list_push_tail(&impl->body, &start_block->cf_node.node); 304 305 start_block->successors[0] = end_block; 306 _mesa_set_add(end_block->predecessors, start_block); 307 return impl; 308} 309 310nir_function_impl * 311nir_function_impl_create(nir_function *function) 312{ 313 assert(function->impl == NULL); 314 315 nir_function_impl *impl = nir_function_impl_create_bare(function->shader); 316 317 function->impl = impl; 318 impl->function = function; 319 320 return impl; 321} 322 323nir_block * 324nir_block_create(nir_shader *shader) 325{ 326 nir_block *block = rzalloc(shader, nir_block); 327 328 cf_init(&block->cf_node, nir_cf_node_block); 329 330 block->successors[0] = block->successors[1] = NULL; 331 block->predecessors = _mesa_set_create(block, _mesa_hash_pointer, 332 _mesa_key_pointer_equal); 333 block->imm_dom = NULL; 334 /* XXX maybe it would be worth it to defer allocation? This 335 * way it doesn't get allocated for shader refs that never run 336 * nir_calc_dominance? For example, state-tracker creates an 337 * initial IR, clones that, runs appropriate lowering pass, passes 338 * to driver which does common lowering/opt, and then stores ref 339 * which is later used to do state specific lowering and futher 340 * opt. Do any of the references not need dominance metadata? 341 */ 342 block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer, 343 _mesa_key_pointer_equal); 344 345 exec_list_make_empty(&block->instr_list); 346 347 return block; 348} 349 350static inline void 351src_init(nir_src *src) 352{ 353 src->is_ssa = false; 354 src->reg.reg = NULL; 355 src->reg.indirect = NULL; 356 src->reg.base_offset = 0; 357} 358 359nir_if * 360nir_if_create(nir_shader *shader) 361{ 362 nir_if *if_stmt = ralloc(shader, nir_if); 363 364 cf_init(&if_stmt->cf_node, nir_cf_node_if); 365 src_init(&if_stmt->condition); 366 367 nir_block *then = nir_block_create(shader); 368 exec_list_make_empty(&if_stmt->then_list); 369 exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node); 370 then->cf_node.parent = &if_stmt->cf_node; 371 372 nir_block *else_stmt = nir_block_create(shader); 373 exec_list_make_empty(&if_stmt->else_list); 374 exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node); 375 else_stmt->cf_node.parent = &if_stmt->cf_node; 376 377 return if_stmt; 378} 379 380nir_loop * 381nir_loop_create(nir_shader *shader) 382{ 383 nir_loop *loop = rzalloc(shader, nir_loop); 384 385 cf_init(&loop->cf_node, nir_cf_node_loop); 386 387 nir_block *body = nir_block_create(shader); 388 exec_list_make_empty(&loop->body); 389 exec_list_push_tail(&loop->body, &body->cf_node.node); 390 body->cf_node.parent = &loop->cf_node; 391 392 body->successors[0] = body; 393 _mesa_set_add(body->predecessors, body); 394 395 return loop; 396} 397 398static void 399instr_init(nir_instr *instr, nir_instr_type type) 400{ 401 instr->type = type; 402 instr->block = NULL; 403 exec_node_init(&instr->node); 404} 405 406static void 407dest_init(nir_dest *dest) 408{ 409 dest->is_ssa = false; 410 dest->reg.reg = NULL; 411 dest->reg.indirect = NULL; 412 dest->reg.base_offset = 0; 413} 414 415static void 416alu_dest_init(nir_alu_dest *dest) 417{ 418 dest_init(&dest->dest); 419 dest->saturate = false; 420 dest->write_mask = 0xf; 421} 422 423static void 424alu_src_init(nir_alu_src *src) 425{ 426 src_init(&src->src); 427 src->abs = src->negate = false; 428 for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) 429 src->swizzle[i] = i; 430} 431 432nir_alu_instr * 433nir_alu_instr_create(nir_shader *shader, nir_op op) 434{ 435 unsigned num_srcs = nir_op_infos[op].num_inputs; 436 /* TODO: don't use rzalloc */ 437 nir_alu_instr *instr = 438 rzalloc_size(shader, 439 sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); 440 441 instr_init(&instr->instr, nir_instr_type_alu); 442 instr->op = op; 443 alu_dest_init(&instr->dest); 444 for (unsigned i = 0; i < num_srcs; i++) 445 alu_src_init(&instr->src[i]); 446 447 return instr; 448} 449 450nir_deref_instr * 451nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type) 452{ 453 nir_deref_instr *instr = 454 rzalloc_size(shader, sizeof(nir_deref_instr)); 455 456 instr_init(&instr->instr, nir_instr_type_deref); 457 458 instr->deref_type = deref_type; 459 if (deref_type != nir_deref_type_var) 460 src_init(&instr->parent); 461 462 if (deref_type == nir_deref_type_array) 463 src_init(&instr->arr.index); 464 465 dest_init(&instr->dest); 466 467 return instr; 468} 469 470nir_jump_instr * 471nir_jump_instr_create(nir_shader *shader, nir_jump_type type) 472{ 473 nir_jump_instr *instr = ralloc(shader, nir_jump_instr); 474 instr_init(&instr->instr, nir_instr_type_jump); 475 instr->type = type; 476 return instr; 477} 478 479nir_load_const_instr * 480nir_load_const_instr_create(nir_shader *shader, unsigned num_components, 481 unsigned bit_size) 482{ 483 nir_load_const_instr *instr = rzalloc(shader, nir_load_const_instr); 484 instr_init(&instr->instr, nir_instr_type_load_const); 485 486 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL); 487 488 return instr; 489} 490 491nir_intrinsic_instr * 492nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) 493{ 494 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; 495 /* TODO: don't use rzalloc */ 496 nir_intrinsic_instr *instr = 497 rzalloc_size(shader, 498 sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src)); 499 500 instr_init(&instr->instr, nir_instr_type_intrinsic); 501 instr->intrinsic = op; 502 503 if (nir_intrinsic_infos[op].has_dest) 504 dest_init(&instr->dest); 505 506 for (unsigned i = 0; i < num_srcs; i++) 507 src_init(&instr->src[i]); 508 509 return instr; 510} 511 512nir_call_instr * 513nir_call_instr_create(nir_shader *shader, nir_function *callee) 514{ 515 const unsigned num_params = callee->num_params; 516 nir_call_instr *instr = 517 rzalloc_size(shader, sizeof(*instr) + 518 num_params * sizeof(instr->params[0])); 519 520 instr_init(&instr->instr, nir_instr_type_call); 521 instr->callee = callee; 522 instr->num_params = num_params; 523 for (unsigned i = 0; i < num_params; i++) 524 src_init(&instr->params[i]); 525 526 return instr; 527} 528 529nir_tex_instr * 530nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) 531{ 532 nir_tex_instr *instr = rzalloc(shader, nir_tex_instr); 533 instr_init(&instr->instr, nir_instr_type_tex); 534 535 dest_init(&instr->dest); 536 537 instr->num_srcs = num_srcs; 538 instr->src = ralloc_array(instr, nir_tex_src, num_srcs); 539 for (unsigned i = 0; i < num_srcs; i++) 540 src_init(&instr->src[i].src); 541 542 instr->texture_index = 0; 543 instr->texture_array_size = 0; 544 instr->sampler_index = 0; 545 546 return instr; 547} 548 549void 550nir_tex_instr_add_src(nir_tex_instr *tex, 551 nir_tex_src_type src_type, 552 nir_src src) 553{ 554 nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, 555 tex->num_srcs + 1); 556 557 for (unsigned i = 0; i < tex->num_srcs; i++) { 558 new_srcs[i].src_type = tex->src[i].src_type; 559 nir_instr_move_src(&tex->instr, &new_srcs[i].src, 560 &tex->src[i].src); 561 } 562 563 ralloc_free(tex->src); 564 tex->src = new_srcs; 565 566 tex->src[tex->num_srcs].src_type = src_type; 567 nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src); 568 tex->num_srcs++; 569} 570 571void 572nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx) 573{ 574 assert(src_idx < tex->num_srcs); 575 576 /* First rewrite the source to NIR_SRC_INIT */ 577 nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT); 578 579 /* Now, move all of the other sources down */ 580 for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) { 581 tex->src[i-1].src_type = tex->src[i].src_type; 582 nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src); 583 } 584 tex->num_srcs--; 585} 586 587nir_phi_instr * 588nir_phi_instr_create(nir_shader *shader) 589{ 590 nir_phi_instr *instr = ralloc(shader, nir_phi_instr); 591 instr_init(&instr->instr, nir_instr_type_phi); 592 593 dest_init(&instr->dest); 594 exec_list_make_empty(&instr->srcs); 595 return instr; 596} 597 598nir_parallel_copy_instr * 599nir_parallel_copy_instr_create(nir_shader *shader) 600{ 601 nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr); 602 instr_init(&instr->instr, nir_instr_type_parallel_copy); 603 604 exec_list_make_empty(&instr->entries); 605 606 return instr; 607} 608 609nir_ssa_undef_instr * 610nir_ssa_undef_instr_create(nir_shader *shader, 611 unsigned num_components, 612 unsigned bit_size) 613{ 614 nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr); 615 instr_init(&instr->instr, nir_instr_type_ssa_undef); 616 617 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL); 618 619 return instr; 620} 621 622static nir_const_value 623const_value_float(double d, unsigned bit_size) 624{ 625 nir_const_value v; 626 switch (bit_size) { 627 case 16: v.u16[0] = _mesa_float_to_half(d); break; 628 case 32: v.f32[0] = d; break; 629 case 64: v.f64[0] = d; break; 630 default: 631 unreachable("Invalid bit size"); 632 } 633 return v; 634} 635 636static nir_const_value 637const_value_int(int64_t i, unsigned bit_size) 638{ 639 nir_const_value v; 640 switch (bit_size) { 641 case 8: v.i8[0] = i; break; 642 case 16: v.i16[0] = i; break; 643 case 32: v.i32[0] = i; break; 644 case 64: v.i64[0] = i; break; 645 default: 646 unreachable("Invalid bit size"); 647 } 648 return v; 649} 650 651nir_const_value 652nir_alu_binop_identity(nir_op binop, unsigned bit_size) 653{ 654 const int64_t max_int = (1ull << (bit_size - 1)) - 1; 655 const int64_t min_int = -max_int - 1; 656 switch (binop) { 657 case nir_op_iadd: 658 return const_value_int(0, bit_size); 659 case nir_op_fadd: 660 return const_value_float(0, bit_size); 661 case nir_op_imul: 662 return const_value_int(1, bit_size); 663 case nir_op_fmul: 664 return const_value_float(1, bit_size); 665 case nir_op_imin: 666 return const_value_int(max_int, bit_size); 667 case nir_op_umin: 668 return const_value_int(~0ull, bit_size); 669 case nir_op_fmin: 670 return const_value_float(INFINITY, bit_size); 671 case nir_op_imax: 672 return const_value_int(min_int, bit_size); 673 case nir_op_umax: 674 return const_value_int(0, bit_size); 675 case nir_op_fmax: 676 return const_value_float(-INFINITY, bit_size); 677 case nir_op_iand: 678 return const_value_int(~0ull, bit_size); 679 case nir_op_ior: 680 return const_value_int(0, bit_size); 681 case nir_op_ixor: 682 return const_value_int(0, bit_size); 683 default: 684 unreachable("Invalid reduction operation"); 685 } 686} 687 688nir_function_impl * 689nir_cf_node_get_function(nir_cf_node *node) 690{ 691 while (node->type != nir_cf_node_function) { 692 node = node->parent; 693 } 694 695 return nir_cf_node_as_function(node); 696} 697 698/* Reduces a cursor by trying to convert everything to after and trying to 699 * go up to block granularity when possible. 700 */ 701static nir_cursor 702reduce_cursor(nir_cursor cursor) 703{ 704 switch (cursor.option) { 705 case nir_cursor_before_block: 706 assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL || 707 nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block); 708 if (exec_list_is_empty(&cursor.block->instr_list)) { 709 /* Empty block. After is as good as before. */ 710 cursor.option = nir_cursor_after_block; 711 } 712 return cursor; 713 714 case nir_cursor_after_block: 715 return cursor; 716 717 case nir_cursor_before_instr: { 718 nir_instr *prev_instr = nir_instr_prev(cursor.instr); 719 if (prev_instr) { 720 /* Before this instruction is after the previous */ 721 cursor.instr = prev_instr; 722 cursor.option = nir_cursor_after_instr; 723 } else { 724 /* No previous instruction. Switch to before block */ 725 cursor.block = cursor.instr->block; 726 cursor.option = nir_cursor_before_block; 727 } 728 return reduce_cursor(cursor); 729 } 730 731 case nir_cursor_after_instr: 732 if (nir_instr_next(cursor.instr) == NULL) { 733 /* This is the last instruction, switch to after block */ 734 cursor.option = nir_cursor_after_block; 735 cursor.block = cursor.instr->block; 736 } 737 return cursor; 738 739 default: 740 unreachable("Inavlid cursor option"); 741 } 742} 743 744bool 745nir_cursors_equal(nir_cursor a, nir_cursor b) 746{ 747 /* Reduced cursors should be unique */ 748 a = reduce_cursor(a); 749 b = reduce_cursor(b); 750 751 return a.block == b.block && a.option == b.option; 752} 753 754static bool 755add_use_cb(nir_src *src, void *state) 756{ 757 nir_instr *instr = state; 758 759 src->parent_instr = instr; 760 list_addtail(&src->use_link, 761 src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses); 762 763 return true; 764} 765 766static bool 767add_ssa_def_cb(nir_ssa_def *def, void *state) 768{ 769 nir_instr *instr = state; 770 771 if (instr->block && def->index == UINT_MAX) { 772 nir_function_impl *impl = 773 nir_cf_node_get_function(&instr->block->cf_node); 774 775 def->index = impl->ssa_alloc++; 776 } 777 778 return true; 779} 780 781static bool 782add_reg_def_cb(nir_dest *dest, void *state) 783{ 784 nir_instr *instr = state; 785 786 if (!dest->is_ssa) { 787 dest->reg.parent_instr = instr; 788 list_addtail(&dest->reg.def_link, &dest->reg.reg->defs); 789 } 790 791 return true; 792} 793 794static void 795add_defs_uses(nir_instr *instr) 796{ 797 nir_foreach_src(instr, add_use_cb, instr); 798 nir_foreach_dest(instr, add_reg_def_cb, instr); 799 nir_foreach_ssa_def(instr, add_ssa_def_cb, instr); 800} 801 802void 803nir_instr_insert(nir_cursor cursor, nir_instr *instr) 804{ 805 switch (cursor.option) { 806 case nir_cursor_before_block: 807 /* Only allow inserting jumps into empty blocks. */ 808 if (instr->type == nir_instr_type_jump) 809 assert(exec_list_is_empty(&cursor.block->instr_list)); 810 811 instr->block = cursor.block; 812 add_defs_uses(instr); 813 exec_list_push_head(&cursor.block->instr_list, &instr->node); 814 break; 815 case nir_cursor_after_block: { 816 /* Inserting instructions after a jump is illegal. */ 817 nir_instr *last = nir_block_last_instr(cursor.block); 818 assert(last == NULL || last->type != nir_instr_type_jump); 819 (void) last; 820 821 instr->block = cursor.block; 822 add_defs_uses(instr); 823 exec_list_push_tail(&cursor.block->instr_list, &instr->node); 824 break; 825 } 826 case nir_cursor_before_instr: 827 assert(instr->type != nir_instr_type_jump); 828 instr->block = cursor.instr->block; 829 add_defs_uses(instr); 830 exec_node_insert_node_before(&cursor.instr->node, &instr->node); 831 break; 832 case nir_cursor_after_instr: 833 /* Inserting instructions after a jump is illegal. */ 834 assert(cursor.instr->type != nir_instr_type_jump); 835 836 /* Only allow inserting jumps at the end of the block. */ 837 if (instr->type == nir_instr_type_jump) 838 assert(cursor.instr == nir_block_last_instr(cursor.instr->block)); 839 840 instr->block = cursor.instr->block; 841 add_defs_uses(instr); 842 exec_node_insert_after(&cursor.instr->node, &instr->node); 843 break; 844 } 845 846 if (instr->type == nir_instr_type_jump) 847 nir_handle_add_jump(instr->block); 848} 849 850static bool 851src_is_valid(const nir_src *src) 852{ 853 return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL); 854} 855 856static bool 857remove_use_cb(nir_src *src, void *state) 858{ 859 (void) state; 860 861 if (src_is_valid(src)) 862 list_del(&src->use_link); 863 864 return true; 865} 866 867static bool 868remove_def_cb(nir_dest *dest, void *state) 869{ 870 (void) state; 871 872 if (!dest->is_ssa) 873 list_del(&dest->reg.def_link); 874 875 return true; 876} 877 878static void 879remove_defs_uses(nir_instr *instr) 880{ 881 nir_foreach_dest(instr, remove_def_cb, instr); 882 nir_foreach_src(instr, remove_use_cb, instr); 883} 884 885void nir_instr_remove_v(nir_instr *instr) 886{ 887 remove_defs_uses(instr); 888 exec_node_remove(&instr->node); 889 890 if (instr->type == nir_instr_type_jump) { 891 nir_jump_instr *jump_instr = nir_instr_as_jump(instr); 892 nir_handle_remove_jump(instr->block, jump_instr->type); 893 } 894} 895 896/*@}*/ 897 898void 899nir_index_local_regs(nir_function_impl *impl) 900{ 901 unsigned index = 0; 902 foreach_list_typed(nir_register, reg, node, &impl->registers) { 903 reg->index = index++; 904 } 905 impl->reg_alloc = index; 906} 907 908void 909nir_index_global_regs(nir_shader *shader) 910{ 911 unsigned index = 0; 912 foreach_list_typed(nir_register, reg, node, &shader->registers) { 913 reg->index = index++; 914 } 915 shader->reg_alloc = index; 916} 917 918static bool 919visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state) 920{ 921 return cb(&instr->dest.dest, state); 922} 923 924static bool 925visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state) 926{ 927 return cb(&instr->dest, state); 928} 929 930static bool 931visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb, 932 void *state) 933{ 934 if (nir_intrinsic_infos[instr->intrinsic].has_dest) 935 return cb(&instr->dest, state); 936 937 return true; 938} 939 940static bool 941visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb, 942 void *state) 943{ 944 return cb(&instr->dest, state); 945} 946 947static bool 948visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state) 949{ 950 return cb(&instr->dest, state); 951} 952 953static bool 954visit_parallel_copy_dest(nir_parallel_copy_instr *instr, 955 nir_foreach_dest_cb cb, void *state) 956{ 957 nir_foreach_parallel_copy_entry(entry, instr) { 958 if (!cb(&entry->dest, state)) 959 return false; 960 } 961 962 return true; 963} 964 965bool 966nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state) 967{ 968 switch (instr->type) { 969 case nir_instr_type_alu: 970 return visit_alu_dest(nir_instr_as_alu(instr), cb, state); 971 case nir_instr_type_deref: 972 return visit_deref_dest(nir_instr_as_deref(instr), cb, state); 973 case nir_instr_type_intrinsic: 974 return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state); 975 case nir_instr_type_tex: 976 return visit_texture_dest(nir_instr_as_tex(instr), cb, state); 977 case nir_instr_type_phi: 978 return visit_phi_dest(nir_instr_as_phi(instr), cb, state); 979 case nir_instr_type_parallel_copy: 980 return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr), 981 cb, state); 982 983 case nir_instr_type_load_const: 984 case nir_instr_type_ssa_undef: 985 case nir_instr_type_call: 986 case nir_instr_type_jump: 987 break; 988 989 default: 990 unreachable("Invalid instruction type"); 991 break; 992 } 993 994 return true; 995} 996 997struct foreach_ssa_def_state { 998 nir_foreach_ssa_def_cb cb; 999 void *client_state; 1000}; 1001 1002static inline bool 1003nir_ssa_def_visitor(nir_dest *dest, void *void_state) 1004{ 1005 struct foreach_ssa_def_state *state = void_state; 1006 1007 if (dest->is_ssa) 1008 return state->cb(&dest->ssa, state->client_state); 1009 else 1010 return true; 1011} 1012 1013bool 1014nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state) 1015{ 1016 switch (instr->type) { 1017 case nir_instr_type_alu: 1018 case nir_instr_type_deref: 1019 case nir_instr_type_tex: 1020 case nir_instr_type_intrinsic: 1021 case nir_instr_type_phi: 1022 case nir_instr_type_parallel_copy: { 1023 struct foreach_ssa_def_state foreach_state = {cb, state}; 1024 return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state); 1025 } 1026 1027 case nir_instr_type_load_const: 1028 return cb(&nir_instr_as_load_const(instr)->def, state); 1029 case nir_instr_type_ssa_undef: 1030 return cb(&nir_instr_as_ssa_undef(instr)->def, state); 1031 case nir_instr_type_call: 1032 case nir_instr_type_jump: 1033 return true; 1034 default: 1035 unreachable("Invalid instruction type"); 1036 } 1037} 1038 1039static bool 1040visit_src(nir_src *src, nir_foreach_src_cb cb, void *state) 1041{ 1042 if (!cb(src, state)) 1043 return false; 1044 if (!src->is_ssa && src->reg.indirect) 1045 return cb(src->reg.indirect, state); 1046 return true; 1047} 1048 1049static bool 1050visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state) 1051{ 1052 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) 1053 if (!visit_src(&instr->src[i].src, cb, state)) 1054 return false; 1055 1056 return true; 1057} 1058 1059static bool 1060visit_deref_instr_src(nir_deref_instr *instr, 1061 nir_foreach_src_cb cb, void *state) 1062{ 1063 if (instr->deref_type != nir_deref_type_var) { 1064 if (!visit_src(&instr->parent, cb, state)) 1065 return false; 1066 } 1067 1068 if (instr->deref_type == nir_deref_type_array) { 1069 if (!visit_src(&instr->arr.index, cb, state)) 1070 return false; 1071 } 1072 1073 return true; 1074} 1075 1076static bool 1077visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state) 1078{ 1079 for (unsigned i = 0; i < instr->num_srcs; i++) { 1080 if (!visit_src(&instr->src[i].src, cb, state)) 1081 return false; 1082 } 1083 1084 return true; 1085} 1086 1087static bool 1088visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb, 1089 void *state) 1090{ 1091 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; 1092 for (unsigned i = 0; i < num_srcs; i++) { 1093 if (!visit_src(&instr->src[i], cb, state)) 1094 return false; 1095 } 1096 1097 return true; 1098} 1099 1100static bool 1101visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state) 1102{ 1103 for (unsigned i = 0; i < instr->num_params; i++) { 1104 if (!visit_src(&instr->params[i], cb, state)) 1105 return false; 1106 } 1107 1108 return true; 1109} 1110 1111static bool 1112visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state) 1113{ 1114 nir_foreach_phi_src(src, instr) { 1115 if (!visit_src(&src->src, cb, state)) 1116 return false; 1117 } 1118 1119 return true; 1120} 1121 1122static bool 1123visit_parallel_copy_src(nir_parallel_copy_instr *instr, 1124 nir_foreach_src_cb cb, void *state) 1125{ 1126 nir_foreach_parallel_copy_entry(entry, instr) { 1127 if (!visit_src(&entry->src, cb, state)) 1128 return false; 1129 } 1130 1131 return true; 1132} 1133 1134typedef struct { 1135 void *state; 1136 nir_foreach_src_cb cb; 1137} visit_dest_indirect_state; 1138 1139static bool 1140visit_dest_indirect(nir_dest *dest, void *_state) 1141{ 1142 visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state; 1143 1144 if (!dest->is_ssa && dest->reg.indirect) 1145 return state->cb(dest->reg.indirect, state->state); 1146 1147 return true; 1148} 1149 1150bool 1151nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) 1152{ 1153 switch (instr->type) { 1154 case nir_instr_type_alu: 1155 if (!visit_alu_src(nir_instr_as_alu(instr), cb, state)) 1156 return false; 1157 break; 1158 case nir_instr_type_deref: 1159 if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state)) 1160 return false; 1161 break; 1162 case nir_instr_type_intrinsic: 1163 if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state)) 1164 return false; 1165 break; 1166 case nir_instr_type_tex: 1167 if (!visit_tex_src(nir_instr_as_tex(instr), cb, state)) 1168 return false; 1169 break; 1170 case nir_instr_type_call: 1171 if (!visit_call_src(nir_instr_as_call(instr), cb, state)) 1172 return false; 1173 break; 1174 case nir_instr_type_load_const: 1175 /* Constant load instructions have no regular sources */ 1176 break; 1177 case nir_instr_type_phi: 1178 if (!visit_phi_src(nir_instr_as_phi(instr), cb, state)) 1179 return false; 1180 break; 1181 case nir_instr_type_parallel_copy: 1182 if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr), 1183 cb, state)) 1184 return false; 1185 break; 1186 case nir_instr_type_jump: 1187 case nir_instr_type_ssa_undef: 1188 return true; 1189 1190 default: 1191 unreachable("Invalid instruction type"); 1192 break; 1193 } 1194 1195 visit_dest_indirect_state dest_state; 1196 dest_state.state = state; 1197 dest_state.cb = cb; 1198 return nir_foreach_dest(instr, visit_dest_indirect, &dest_state); 1199} 1200 1201int64_t 1202nir_src_comp_as_int(nir_src src, unsigned comp) 1203{ 1204 assert(nir_src_is_const(src)); 1205 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); 1206 1207 assert(comp < load->def.num_components); 1208 switch (load->def.bit_size) { 1209 case 8: return load->value.i8[comp]; 1210 case 16: return load->value.i16[comp]; 1211 case 32: return load->value.i32[comp]; 1212 case 64: return load->value.i64[comp]; 1213 default: 1214 unreachable("Invalid bit size"); 1215 } 1216} 1217 1218uint64_t 1219nir_src_comp_as_uint(nir_src src, unsigned comp) 1220{ 1221 assert(nir_src_is_const(src)); 1222 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); 1223 1224 assert(comp < load->def.num_components); 1225 switch (load->def.bit_size) { 1226 case 8: return load->value.u8[comp]; 1227 case 16: return load->value.u16[comp]; 1228 case 32: return load->value.u32[comp]; 1229 case 64: return load->value.u64[comp]; 1230 default: 1231 unreachable("Invalid bit size"); 1232 } 1233} 1234 1235bool 1236nir_src_comp_as_bool(nir_src src, unsigned comp) 1237{ 1238 assert(nir_src_is_const(src)); 1239 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); 1240 1241 assert(comp < load->def.num_components); 1242 assert(load->def.bit_size == 32); 1243 assert(load->value.u32[comp] == NIR_TRUE || 1244 load->value.u32[comp] == NIR_FALSE); 1245 1246 return load->value.u32[comp]; 1247} 1248 1249double 1250nir_src_comp_as_float(nir_src src, unsigned comp) 1251{ 1252 assert(nir_src_is_const(src)); 1253 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); 1254 1255 assert(comp < load->def.num_components); 1256 switch (load->def.bit_size) { 1257 case 16: return _mesa_half_to_float(load->value.u16[comp]); 1258 case 32: return load->value.f32[comp]; 1259 case 64: return load->value.f64[comp]; 1260 default: 1261 unreachable("Invalid bit size"); 1262 } 1263} 1264 1265int64_t 1266nir_src_as_int(nir_src src) 1267{ 1268 assert(nir_src_num_components(src) == 1); 1269 return nir_src_comp_as_int(src, 0); 1270} 1271 1272uint64_t 1273nir_src_as_uint(nir_src src) 1274{ 1275 assert(nir_src_num_components(src) == 1); 1276 return nir_src_comp_as_uint(src, 0); 1277} 1278 1279bool 1280nir_src_as_bool(nir_src src) 1281{ 1282 assert(nir_src_num_components(src) == 1); 1283 return nir_src_comp_as_bool(src, 0); 1284} 1285 1286double 1287nir_src_as_float(nir_src src) 1288{ 1289 assert(nir_src_num_components(src) == 1); 1290 return nir_src_comp_as_float(src, 0); 1291} 1292 1293nir_const_value * 1294nir_src_as_const_value(nir_src src) 1295{ 1296 if (!src.is_ssa) 1297 return NULL; 1298 1299 if (src.ssa->parent_instr->type != nir_instr_type_load_const) 1300 return NULL; 1301 1302 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); 1303 1304 return &load->value; 1305} 1306 1307/** 1308 * Returns true if the source is known to be dynamically uniform. Otherwise it 1309 * returns false which means it may or may not be dynamically uniform but it 1310 * can't be determined. 1311 */ 1312bool 1313nir_src_is_dynamically_uniform(nir_src src) 1314{ 1315 if (!src.is_ssa) 1316 return false; 1317 1318 /* Constants are trivially dynamically uniform */ 1319 if (src.ssa->parent_instr->type == nir_instr_type_load_const) 1320 return true; 1321 1322 /* As are uniform variables */ 1323 if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) { 1324 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr); 1325 1326 if (intr->intrinsic == nir_intrinsic_load_uniform) 1327 return true; 1328 } 1329 1330 /* XXX: this could have many more tests, such as when a sampler function is 1331 * called with dynamically uniform arguments. 1332 */ 1333 return false; 1334} 1335 1336static void 1337src_remove_all_uses(nir_src *src) 1338{ 1339 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { 1340 if (!src_is_valid(src)) 1341 continue; 1342 1343 list_del(&src->use_link); 1344 } 1345} 1346 1347static void 1348src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) 1349{ 1350 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { 1351 if (!src_is_valid(src)) 1352 continue; 1353 1354 if (parent_instr) { 1355 src->parent_instr = parent_instr; 1356 if (src->is_ssa) 1357 list_addtail(&src->use_link, &src->ssa->uses); 1358 else 1359 list_addtail(&src->use_link, &src->reg.reg->uses); 1360 } else { 1361 assert(parent_if); 1362 src->parent_if = parent_if; 1363 if (src->is_ssa) 1364 list_addtail(&src->use_link, &src->ssa->if_uses); 1365 else 1366 list_addtail(&src->use_link, &src->reg.reg->if_uses); 1367 } 1368 } 1369} 1370 1371void 1372nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src) 1373{ 1374 assert(!src_is_valid(src) || src->parent_instr == instr); 1375 1376 src_remove_all_uses(src); 1377 *src = new_src; 1378 src_add_all_uses(src, instr, NULL); 1379} 1380 1381void 1382nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src) 1383{ 1384 assert(!src_is_valid(dest) || dest->parent_instr == dest_instr); 1385 1386 src_remove_all_uses(dest); 1387 src_remove_all_uses(src); 1388 *dest = *src; 1389 *src = NIR_SRC_INIT; 1390 src_add_all_uses(dest, dest_instr, NULL); 1391} 1392 1393void 1394nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src) 1395{ 1396 nir_src *src = &if_stmt->condition; 1397 assert(!src_is_valid(src) || src->parent_if == if_stmt); 1398 1399 src_remove_all_uses(src); 1400 *src = new_src; 1401 src_add_all_uses(src, NULL, if_stmt); 1402} 1403 1404void 1405nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest) 1406{ 1407 if (dest->is_ssa) { 1408 /* We can only overwrite an SSA destination if it has no uses. */ 1409 assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses)); 1410 } else { 1411 list_del(&dest->reg.def_link); 1412 if (dest->reg.indirect) 1413 src_remove_all_uses(dest->reg.indirect); 1414 } 1415 1416 /* We can't re-write with an SSA def */ 1417 assert(!new_dest.is_ssa); 1418 1419 nir_dest_copy(dest, &new_dest, instr); 1420 1421 dest->reg.parent_instr = instr; 1422 list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs); 1423 1424 if (dest->reg.indirect) 1425 src_add_all_uses(dest->reg.indirect, instr, NULL); 1426} 1427 1428/* note: does *not* take ownership of 'name' */ 1429void 1430nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, 1431 unsigned num_components, 1432 unsigned bit_size, const char *name) 1433{ 1434 def->name = ralloc_strdup(instr, name); 1435 def->parent_instr = instr; 1436 list_inithead(&def->uses); 1437 list_inithead(&def->if_uses); 1438 def->num_components = num_components; 1439 def->bit_size = bit_size; 1440 1441 if (instr->block) { 1442 nir_function_impl *impl = 1443 nir_cf_node_get_function(&instr->block->cf_node); 1444 1445 def->index = impl->ssa_alloc++; 1446 } else { 1447 def->index = UINT_MAX; 1448 } 1449} 1450 1451/* note: does *not* take ownership of 'name' */ 1452void 1453nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, 1454 unsigned num_components, unsigned bit_size, 1455 const char *name) 1456{ 1457 dest->is_ssa = true; 1458 nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name); 1459} 1460 1461void 1462nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src) 1463{ 1464 assert(!new_src.is_ssa || def != new_src.ssa); 1465 1466 nir_foreach_use_safe(use_src, def) 1467 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); 1468 1469 nir_foreach_if_use_safe(use_src, def) 1470 nir_if_rewrite_condition(use_src->parent_if, new_src); 1471} 1472 1473static bool 1474is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between) 1475{ 1476 assert(start->block == end->block); 1477 1478 if (between->block != start->block) 1479 return false; 1480 1481 /* Search backwards looking for "between" */ 1482 while (start != end) { 1483 if (between == end) 1484 return true; 1485 1486 end = nir_instr_prev(end); 1487 assert(end); 1488 } 1489 1490 return false; 1491} 1492 1493/* Replaces all uses of the given SSA def with the given source but only if 1494 * the use comes after the after_me instruction. This can be useful if you 1495 * are emitting code to fix up the result of some instruction: you can freely 1496 * use the result in that code and then call rewrite_uses_after and pass the 1497 * last fixup instruction as after_me and it will replace all of the uses you 1498 * want without touching the fixup code. 1499 * 1500 * This function assumes that after_me is in the same block as 1501 * def->parent_instr and that after_me comes after def->parent_instr. 1502 */ 1503void 1504nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, 1505 nir_instr *after_me) 1506{ 1507 assert(!new_src.is_ssa || def != new_src.ssa); 1508 1509 nir_foreach_use_safe(use_src, def) { 1510 assert(use_src->parent_instr != def->parent_instr); 1511 /* Since def already dominates all of its uses, the only way a use can 1512 * not be dominated by after_me is if it is between def and after_me in 1513 * the instruction list. 1514 */ 1515 if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr)) 1516 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); 1517 } 1518 1519 nir_foreach_if_use_safe(use_src, def) 1520 nir_if_rewrite_condition(use_src->parent_if, new_src); 1521} 1522 1523nir_component_mask_t 1524nir_ssa_def_components_read(const nir_ssa_def *def) 1525{ 1526 nir_component_mask_t read_mask = 0; 1527 nir_foreach_use(use, def) { 1528 if (use->parent_instr->type == nir_instr_type_alu) { 1529 nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr); 1530 nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src); 1531 int src_idx = alu_src - &alu->src[0]; 1532 assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs); 1533 1534 for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) { 1535 if (!nir_alu_instr_channel_used(alu, src_idx, c)) 1536 continue; 1537 1538 read_mask |= (1 << alu_src->swizzle[c]); 1539 } 1540 } else { 1541 return (1 << def->num_components) - 1; 1542 } 1543 } 1544 1545 if (!list_empty(&def->if_uses)) 1546 read_mask |= 1; 1547 1548 return read_mask; 1549} 1550 1551nir_block * 1552nir_block_cf_tree_next(nir_block *block) 1553{ 1554 if (block == NULL) { 1555 /* nir_foreach_block_safe() will call this function on a NULL block 1556 * after the last iteration, but it won't use the result so just return 1557 * NULL here. 1558 */ 1559 return NULL; 1560 } 1561 1562 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node); 1563 if (cf_next) 1564 return nir_cf_node_cf_tree_first(cf_next); 1565 1566 nir_cf_node *parent = block->cf_node.parent; 1567 1568 switch (parent->type) { 1569 case nir_cf_node_if: { 1570 /* Are we at the end of the if? Go to the beginning of the else */ 1571 nir_if *if_stmt = nir_cf_node_as_if(parent); 1572 if (block == nir_if_last_then_block(if_stmt)) 1573 return nir_if_first_else_block(if_stmt); 1574 1575 assert(block == nir_if_last_else_block(if_stmt)); 1576 /* fall through */ 1577 } 1578 1579 case nir_cf_node_loop: 1580 return nir_cf_node_as_block(nir_cf_node_next(parent)); 1581 1582 case nir_cf_node_function: 1583 return NULL; 1584 1585 default: 1586 unreachable("unknown cf node type"); 1587 } 1588} 1589 1590nir_block * 1591nir_block_cf_tree_prev(nir_block *block) 1592{ 1593 if (block == NULL) { 1594 /* do this for consistency with nir_block_cf_tree_next() */ 1595 return NULL; 1596 } 1597 1598 nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node); 1599 if (cf_prev) 1600 return nir_cf_node_cf_tree_last(cf_prev); 1601 1602 nir_cf_node *parent = block->cf_node.parent; 1603 1604 switch (parent->type) { 1605 case nir_cf_node_if: { 1606 /* Are we at the beginning of the else? Go to the end of the if */ 1607 nir_if *if_stmt = nir_cf_node_as_if(parent); 1608 if (block == nir_if_first_else_block(if_stmt)) 1609 return nir_if_last_then_block(if_stmt); 1610 1611 assert(block == nir_if_first_then_block(if_stmt)); 1612 /* fall through */ 1613 } 1614 1615 case nir_cf_node_loop: 1616 return nir_cf_node_as_block(nir_cf_node_prev(parent)); 1617 1618 case nir_cf_node_function: 1619 return NULL; 1620 1621 default: 1622 unreachable("unknown cf node type"); 1623 } 1624} 1625 1626nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node) 1627{ 1628 switch (node->type) { 1629 case nir_cf_node_function: { 1630 nir_function_impl *impl = nir_cf_node_as_function(node); 1631 return nir_start_block(impl); 1632 } 1633 1634 case nir_cf_node_if: { 1635 nir_if *if_stmt = nir_cf_node_as_if(node); 1636 return nir_if_first_then_block(if_stmt); 1637 } 1638 1639 case nir_cf_node_loop: { 1640 nir_loop *loop = nir_cf_node_as_loop(node); 1641 return nir_loop_first_block(loop); 1642 } 1643 1644 case nir_cf_node_block: { 1645 return nir_cf_node_as_block(node); 1646 } 1647 1648 default: 1649 unreachable("unknown node type"); 1650 } 1651} 1652 1653nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node) 1654{ 1655 switch (node->type) { 1656 case nir_cf_node_function: { 1657 nir_function_impl *impl = nir_cf_node_as_function(node); 1658 return nir_impl_last_block(impl); 1659 } 1660 1661 case nir_cf_node_if: { 1662 nir_if *if_stmt = nir_cf_node_as_if(node); 1663 return nir_if_last_else_block(if_stmt); 1664 } 1665 1666 case nir_cf_node_loop: { 1667 nir_loop *loop = nir_cf_node_as_loop(node); 1668 return nir_loop_last_block(loop); 1669 } 1670 1671 case nir_cf_node_block: { 1672 return nir_cf_node_as_block(node); 1673 } 1674 1675 default: 1676 unreachable("unknown node type"); 1677 } 1678} 1679 1680nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node) 1681{ 1682 if (node->type == nir_cf_node_block) 1683 return nir_block_cf_tree_next(nir_cf_node_as_block(node)); 1684 else if (node->type == nir_cf_node_function) 1685 return NULL; 1686 else 1687 return nir_cf_node_as_block(nir_cf_node_next(node)); 1688} 1689 1690nir_if * 1691nir_block_get_following_if(nir_block *block) 1692{ 1693 if (exec_node_is_tail_sentinel(&block->cf_node.node)) 1694 return NULL; 1695 1696 if (nir_cf_node_is_last(&block->cf_node)) 1697 return NULL; 1698 1699 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); 1700 1701 if (next_node->type != nir_cf_node_if) 1702 return NULL; 1703 1704 return nir_cf_node_as_if(next_node); 1705} 1706 1707nir_loop * 1708nir_block_get_following_loop(nir_block *block) 1709{ 1710 if (exec_node_is_tail_sentinel(&block->cf_node.node)) 1711 return NULL; 1712 1713 if (nir_cf_node_is_last(&block->cf_node)) 1714 return NULL; 1715 1716 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); 1717 1718 if (next_node->type != nir_cf_node_loop) 1719 return NULL; 1720 1721 return nir_cf_node_as_loop(next_node); 1722} 1723 1724void 1725nir_index_blocks(nir_function_impl *impl) 1726{ 1727 unsigned index = 0; 1728 1729 if (impl->valid_metadata & nir_metadata_block_index) 1730 return; 1731 1732 nir_foreach_block(block, impl) { 1733 block->index = index++; 1734 } 1735 1736 /* The end_block isn't really part of the program, which is why its index 1737 * is >= num_blocks. 1738 */ 1739 impl->num_blocks = impl->end_block->index = index; 1740} 1741 1742static bool 1743index_ssa_def_cb(nir_ssa_def *def, void *state) 1744{ 1745 unsigned *index = (unsigned *) state; 1746 def->index = (*index)++; 1747 1748 return true; 1749} 1750 1751/** 1752 * The indices are applied top-to-bottom which has the very nice property 1753 * that, if A dominates B, then A->index <= B->index. 1754 */ 1755void 1756nir_index_ssa_defs(nir_function_impl *impl) 1757{ 1758 unsigned index = 0; 1759 1760 nir_foreach_block(block, impl) { 1761 nir_foreach_instr(instr, block) 1762 nir_foreach_ssa_def(instr, index_ssa_def_cb, &index); 1763 } 1764 1765 impl->ssa_alloc = index; 1766} 1767 1768/** 1769 * The indices are applied top-to-bottom which has the very nice property 1770 * that, if A dominates B, then A->index <= B->index. 1771 */ 1772unsigned 1773nir_index_instrs(nir_function_impl *impl) 1774{ 1775 unsigned index = 0; 1776 1777 nir_foreach_block(block, impl) { 1778 nir_foreach_instr(instr, block) 1779 instr->index = index++; 1780 } 1781 1782 return index; 1783} 1784 1785nir_intrinsic_op 1786nir_intrinsic_from_system_value(gl_system_value val) 1787{ 1788 switch (val) { 1789 case SYSTEM_VALUE_VERTEX_ID: 1790 return nir_intrinsic_load_vertex_id; 1791 case SYSTEM_VALUE_INSTANCE_ID: 1792 return nir_intrinsic_load_instance_id; 1793 case SYSTEM_VALUE_DRAW_ID: 1794 return nir_intrinsic_load_draw_id; 1795 case SYSTEM_VALUE_BASE_INSTANCE: 1796 return nir_intrinsic_load_base_instance; 1797 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: 1798 return nir_intrinsic_load_vertex_id_zero_base; 1799 case SYSTEM_VALUE_IS_INDEXED_DRAW: 1800 return nir_intrinsic_load_is_indexed_draw; 1801 case SYSTEM_VALUE_FIRST_VERTEX: 1802 return nir_intrinsic_load_first_vertex; 1803 case SYSTEM_VALUE_BASE_VERTEX: 1804 return nir_intrinsic_load_base_vertex; 1805 case SYSTEM_VALUE_INVOCATION_ID: 1806 return nir_intrinsic_load_invocation_id; 1807 case SYSTEM_VALUE_FRAG_COORD: 1808 return nir_intrinsic_load_frag_coord; 1809 case SYSTEM_VALUE_FRONT_FACE: 1810 return nir_intrinsic_load_front_face; 1811 case SYSTEM_VALUE_SAMPLE_ID: 1812 return nir_intrinsic_load_sample_id; 1813 case SYSTEM_VALUE_SAMPLE_POS: 1814 return nir_intrinsic_load_sample_pos; 1815 case SYSTEM_VALUE_SAMPLE_MASK_IN: 1816 return nir_intrinsic_load_sample_mask_in; 1817 case SYSTEM_VALUE_LOCAL_INVOCATION_ID: 1818 return nir_intrinsic_load_local_invocation_id; 1819 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: 1820 return nir_intrinsic_load_local_invocation_index; 1821 case SYSTEM_VALUE_WORK_GROUP_ID: 1822 return nir_intrinsic_load_work_group_id; 1823 case SYSTEM_VALUE_NUM_WORK_GROUPS: 1824 return nir_intrinsic_load_num_work_groups; 1825 case SYSTEM_VALUE_PRIMITIVE_ID: 1826 return nir_intrinsic_load_primitive_id; 1827 case SYSTEM_VALUE_TESS_COORD: 1828 return nir_intrinsic_load_tess_coord; 1829 case SYSTEM_VALUE_TESS_LEVEL_OUTER: 1830 return nir_intrinsic_load_tess_level_outer; 1831 case SYSTEM_VALUE_TESS_LEVEL_INNER: 1832 return nir_intrinsic_load_tess_level_inner; 1833 case SYSTEM_VALUE_VERTICES_IN: 1834 return nir_intrinsic_load_patch_vertices_in; 1835 case SYSTEM_VALUE_HELPER_INVOCATION: 1836 return nir_intrinsic_load_helper_invocation; 1837 case SYSTEM_VALUE_VIEW_INDEX: 1838 return nir_intrinsic_load_view_index; 1839 case SYSTEM_VALUE_SUBGROUP_SIZE: 1840 return nir_intrinsic_load_subgroup_size; 1841 case SYSTEM_VALUE_SUBGROUP_INVOCATION: 1842 return nir_intrinsic_load_subgroup_invocation; 1843 case SYSTEM_VALUE_SUBGROUP_EQ_MASK: 1844 return nir_intrinsic_load_subgroup_eq_mask; 1845 case SYSTEM_VALUE_SUBGROUP_GE_MASK: 1846 return nir_intrinsic_load_subgroup_ge_mask; 1847 case SYSTEM_VALUE_SUBGROUP_GT_MASK: 1848 return nir_intrinsic_load_subgroup_gt_mask; 1849 case SYSTEM_VALUE_SUBGROUP_LE_MASK: 1850 return nir_intrinsic_load_subgroup_le_mask; 1851 case SYSTEM_VALUE_SUBGROUP_LT_MASK: 1852 return nir_intrinsic_load_subgroup_lt_mask; 1853 case SYSTEM_VALUE_NUM_SUBGROUPS: 1854 return nir_intrinsic_load_num_subgroups; 1855 case SYSTEM_VALUE_SUBGROUP_ID: 1856 return nir_intrinsic_load_subgroup_id; 1857 case SYSTEM_VALUE_LOCAL_GROUP_SIZE: 1858 return nir_intrinsic_load_local_group_size; 1859 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: 1860 return nir_intrinsic_load_global_invocation_id; 1861 case SYSTEM_VALUE_WORK_DIM: 1862 return nir_intrinsic_load_work_dim; 1863 default: 1864 unreachable("system value does not directly correspond to intrinsic"); 1865 } 1866} 1867 1868gl_system_value 1869nir_system_value_from_intrinsic(nir_intrinsic_op intrin) 1870{ 1871 switch (intrin) { 1872 case nir_intrinsic_load_vertex_id: 1873 return SYSTEM_VALUE_VERTEX_ID; 1874 case nir_intrinsic_load_instance_id: 1875 return SYSTEM_VALUE_INSTANCE_ID; 1876 case nir_intrinsic_load_draw_id: 1877 return SYSTEM_VALUE_DRAW_ID; 1878 case nir_intrinsic_load_base_instance: 1879 return SYSTEM_VALUE_BASE_INSTANCE; 1880 case nir_intrinsic_load_vertex_id_zero_base: 1881 return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; 1882 case nir_intrinsic_load_first_vertex: 1883 return SYSTEM_VALUE_FIRST_VERTEX; 1884 case nir_intrinsic_load_is_indexed_draw: 1885 return SYSTEM_VALUE_IS_INDEXED_DRAW; 1886 case nir_intrinsic_load_base_vertex: 1887 return SYSTEM_VALUE_BASE_VERTEX; 1888 case nir_intrinsic_load_invocation_id: 1889 return SYSTEM_VALUE_INVOCATION_ID; 1890 case nir_intrinsic_load_frag_coord: 1891 return SYSTEM_VALUE_FRAG_COORD; 1892 case nir_intrinsic_load_front_face: 1893 return SYSTEM_VALUE_FRONT_FACE; 1894 case nir_intrinsic_load_sample_id: 1895 return SYSTEM_VALUE_SAMPLE_ID; 1896 case nir_intrinsic_load_sample_pos: 1897 return SYSTEM_VALUE_SAMPLE_POS; 1898 case nir_intrinsic_load_sample_mask_in: 1899 return SYSTEM_VALUE_SAMPLE_MASK_IN; 1900 case nir_intrinsic_load_local_invocation_id: 1901 return SYSTEM_VALUE_LOCAL_INVOCATION_ID; 1902 case nir_intrinsic_load_local_invocation_index: 1903 return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; 1904 case nir_intrinsic_load_num_work_groups: 1905 return SYSTEM_VALUE_NUM_WORK_GROUPS; 1906 case nir_intrinsic_load_work_group_id: 1907 return SYSTEM_VALUE_WORK_GROUP_ID; 1908 case nir_intrinsic_load_primitive_id: 1909 return SYSTEM_VALUE_PRIMITIVE_ID; 1910 case nir_intrinsic_load_tess_coord: 1911 return SYSTEM_VALUE_TESS_COORD; 1912 case nir_intrinsic_load_tess_level_outer: 1913 return SYSTEM_VALUE_TESS_LEVEL_OUTER; 1914 case nir_intrinsic_load_tess_level_inner: 1915 return SYSTEM_VALUE_TESS_LEVEL_INNER; 1916 case nir_intrinsic_load_patch_vertices_in: 1917 return SYSTEM_VALUE_VERTICES_IN; 1918 case nir_intrinsic_load_helper_invocation: 1919 return SYSTEM_VALUE_HELPER_INVOCATION; 1920 case nir_intrinsic_load_view_index: 1921 return SYSTEM_VALUE_VIEW_INDEX; 1922 case nir_intrinsic_load_subgroup_size: 1923 return SYSTEM_VALUE_SUBGROUP_SIZE; 1924 case nir_intrinsic_load_subgroup_invocation: 1925 return SYSTEM_VALUE_SUBGROUP_INVOCATION; 1926 case nir_intrinsic_load_subgroup_eq_mask: 1927 return SYSTEM_VALUE_SUBGROUP_EQ_MASK; 1928 case nir_intrinsic_load_subgroup_ge_mask: 1929 return SYSTEM_VALUE_SUBGROUP_GE_MASK; 1930 case nir_intrinsic_load_subgroup_gt_mask: 1931 return SYSTEM_VALUE_SUBGROUP_GT_MASK; 1932 case nir_intrinsic_load_subgroup_le_mask: 1933 return SYSTEM_VALUE_SUBGROUP_LE_MASK; 1934 case nir_intrinsic_load_subgroup_lt_mask: 1935 return SYSTEM_VALUE_SUBGROUP_LT_MASK; 1936 case nir_intrinsic_load_num_subgroups: 1937 return SYSTEM_VALUE_NUM_SUBGROUPS; 1938 case nir_intrinsic_load_subgroup_id: 1939 return SYSTEM_VALUE_SUBGROUP_ID; 1940 case nir_intrinsic_load_local_group_size: 1941 return SYSTEM_VALUE_LOCAL_GROUP_SIZE; 1942 case nir_intrinsic_load_global_invocation_id: 1943 return SYSTEM_VALUE_GLOBAL_INVOCATION_ID; 1944 default: 1945 unreachable("intrinsic doesn't produce a system value"); 1946 } 1947} 1948 1949/* OpenGL utility method that remaps the location attributes if they are 1950 * doubles. Not needed for vulkan due the differences on the input location 1951 * count for doubles on vulkan vs OpenGL 1952 * 1953 * The bitfield returned in dual_slot is one bit for each double input slot in 1954 * the original OpenGL single-slot input numbering. The mapping from old 1955 * locations to new locations is as follows: 1956 * 1957 * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc)) 1958 */ 1959void 1960nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot) 1961{ 1962 assert(shader->info.stage == MESA_SHADER_VERTEX); 1963 1964 *dual_slot = 0; 1965 nir_foreach_variable(var, &shader->inputs) { 1966 if (glsl_type_is_dual_slot(glsl_without_array(var->type))) { 1967 unsigned slots = glsl_count_attribute_slots(var->type, true); 1968 *dual_slot |= BITFIELD64_MASK(slots) << var->data.location; 1969 } 1970 } 1971 1972 nir_foreach_variable(var, &shader->inputs) { 1973 var->data.location += 1974 util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location)); 1975 } 1976} 1977 1978/* Returns an attribute mask that has been re-compacted using the given 1979 * dual_slot mask. 1980 */ 1981uint64_t 1982nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot) 1983{ 1984 while (dual_slot) { 1985 unsigned loc = u_bit_scan64(&dual_slot); 1986 /* mask of all bits up to and including loc */ 1987 uint64_t mask = BITFIELD64_MASK(loc + 1); 1988 attribs = (attribs & mask) | ((attribs & ~mask) >> 1); 1989 } 1990 return attribs; 1991} 1992