1/* 2 * Copyright © 2014-2015 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef NIR_BUILDER_H 25#define NIR_BUILDER_H 26 27#include "nir_control_flow.h" 28#include "util/bitscan.h" 29#include "util/half_float.h" 30 31struct exec_list; 32 33typedef struct nir_builder { 34 nir_cursor cursor; 35 36 /* Whether new ALU instructions will be marked "exact" */ 37 bool exact; 38 39 /* Whether to run divergence analysis on inserted instructions (loop merge 40 * and header phis are not updated). */ 41 bool update_divergence; 42 43 nir_shader *shader; 44 nir_function_impl *impl; 45} nir_builder; 46 47static inline void 48nir_builder_init(nir_builder *build, nir_function_impl *impl) 49{ 50 memset(build, 0, sizeof(*build)); 51 build->exact = false; 52 build->impl = impl; 53 build->shader = impl->function->shader; 54} 55 56static inline nir_builder MUST_CHECK PRINTFLIKE(3, 4) 57nir_builder_init_simple_shader(gl_shader_stage stage, 58 const nir_shader_compiler_options *options, 59 const char *name, ...) 60{ 61 nir_builder b; 62 63 memset(&b, 0, sizeof(b)); 64 b.shader = nir_shader_create(NULL, stage, options, NULL); 65 66 if (name) { 67 va_list args; 68 va_start(args, name); 69 b.shader->info.name = ralloc_vasprintf(b.shader, name, args); 70 va_end(args); 71 } 72 73 nir_function *func = nir_function_create(b.shader, "main"); 74 func->is_entrypoint = true; 75 b.exact = false; 76 b.impl = nir_function_impl_create(func); 77 b.cursor = nir_after_cf_list(&b.impl->body); 78 79 return b; 80} 81 82typedef bool (*nir_instr_pass_cb)(struct nir_builder *, nir_instr *, void *); 83 84/** 85 * Iterates over all the instructions in a NIR shader and calls the given pass 86 * on them. 87 * 88 * The pass should return true if it modified the shader. In that case, only 89 * the preserved metadata flags will be preserved in the function impl. 90 * 91 * The builder will be initialized to point at the function impl, but its 92 * cursor is unset. 93 */ 94static inline bool 95nir_shader_instructions_pass(nir_shader *shader, 96 nir_instr_pass_cb pass, 97 nir_metadata preserved, 98 void *cb_data) 99{ 100 bool progress = false; 101 102 nir_foreach_function(function, shader) { 103 if (!function->impl) 104 continue; 105 106 bool func_progress = false; 107 nir_builder b; 108 nir_builder_init(&b, function->impl); 109 110 nir_foreach_block_safe(block, function->impl) { 111 nir_foreach_instr_safe(instr, block) { 112 func_progress |= pass(&b, instr, cb_data); 113 } 114 } 115 116 if (func_progress) { 117 nir_metadata_preserve(function->impl, preserved); 118 progress = true; 119 } else { 120 nir_metadata_preserve(function->impl, nir_metadata_all); 121 } 122 } 123 124 return progress; 125} 126 127static inline void 128nir_builder_instr_insert(nir_builder *build, nir_instr *instr) 129{ 130 nir_instr_insert(build->cursor, instr); 131 132 if (build->update_divergence) 133 nir_update_instr_divergence(build->shader, instr); 134 135 /* Move the cursor forward. */ 136 build->cursor = nir_after_instr(instr); 137} 138 139static inline nir_instr * 140nir_builder_last_instr(nir_builder *build) 141{ 142 assert(build->cursor.option == nir_cursor_after_instr); 143 return build->cursor.instr; 144} 145 146static inline void 147nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf) 148{ 149 nir_cf_node_insert(build->cursor, cf); 150} 151 152static inline bool 153nir_builder_is_inside_cf(nir_builder *build, nir_cf_node *cf_node) 154{ 155 nir_block *block = nir_cursor_current_block(build->cursor); 156 for (nir_cf_node *n = &block->cf_node; n; n = n->parent) { 157 if (n == cf_node) 158 return true; 159 } 160 return false; 161} 162 163static inline nir_if * 164nir_push_if_src(nir_builder *build, nir_src condition) 165{ 166 nir_if *nif = nir_if_create(build->shader); 167 nif->condition = condition; 168 nir_builder_cf_insert(build, &nif->cf_node); 169 build->cursor = nir_before_cf_list(&nif->then_list); 170 return nif; 171} 172 173static inline nir_if * 174nir_push_if(nir_builder *build, nir_ssa_def *condition) 175{ 176 return nir_push_if_src(build, nir_src_for_ssa(condition)); 177} 178 179static inline nir_if * 180nir_push_else(nir_builder *build, nir_if *nif) 181{ 182 if (nif) { 183 assert(nir_builder_is_inside_cf(build, &nif->cf_node)); 184 } else { 185 nir_block *block = nir_cursor_current_block(build->cursor); 186 nif = nir_cf_node_as_if(block->cf_node.parent); 187 } 188 build->cursor = nir_before_cf_list(&nif->else_list); 189 return nif; 190} 191 192static inline void 193nir_pop_if(nir_builder *build, nir_if *nif) 194{ 195 if (nif) { 196 assert(nir_builder_is_inside_cf(build, &nif->cf_node)); 197 } else { 198 nir_block *block = nir_cursor_current_block(build->cursor); 199 nif = nir_cf_node_as_if(block->cf_node.parent); 200 } 201 build->cursor = nir_after_cf_node(&nif->cf_node); 202} 203 204static inline nir_ssa_def * 205nir_if_phi(nir_builder *build, nir_ssa_def *then_def, nir_ssa_def *else_def) 206{ 207 nir_block *block = nir_cursor_current_block(build->cursor); 208 nir_if *nif = nir_cf_node_as_if(nir_cf_node_prev(&block->cf_node)); 209 210 nir_phi_instr *phi = nir_phi_instr_create(build->shader); 211 nir_phi_instr_add_src(phi, nir_if_last_then_block(nif), nir_src_for_ssa(then_def)); 212 nir_phi_instr_add_src(phi, nir_if_last_else_block(nif), nir_src_for_ssa(else_def)); 213 214 assert(then_def->num_components == else_def->num_components); 215 assert(then_def->bit_size == else_def->bit_size); 216 nir_ssa_dest_init(&phi->instr, &phi->dest, 217 then_def->num_components, then_def->bit_size, NULL); 218 219 nir_builder_instr_insert(build, &phi->instr); 220 221 return &phi->dest.ssa; 222} 223 224static inline nir_loop * 225nir_push_loop(nir_builder *build) 226{ 227 nir_loop *loop = nir_loop_create(build->shader); 228 nir_builder_cf_insert(build, &loop->cf_node); 229 build->cursor = nir_before_cf_list(&loop->body); 230 return loop; 231} 232 233static inline void 234nir_pop_loop(nir_builder *build, nir_loop *loop) 235{ 236 if (loop) { 237 assert(nir_builder_is_inside_cf(build, &loop->cf_node)); 238 } else { 239 nir_block *block = nir_cursor_current_block(build->cursor); 240 loop = nir_cf_node_as_loop(block->cf_node.parent); 241 } 242 build->cursor = nir_after_cf_node(&loop->cf_node); 243} 244 245static inline nir_ssa_def * 246nir_ssa_undef(nir_builder *build, unsigned num_components, unsigned bit_size) 247{ 248 nir_ssa_undef_instr *undef = 249 nir_ssa_undef_instr_create(build->shader, num_components, bit_size); 250 if (!undef) 251 return NULL; 252 253 nir_instr_insert(nir_before_cf_list(&build->impl->body), &undef->instr); 254 if (build->update_divergence) 255 nir_update_instr_divergence(build->shader, &undef->instr); 256 257 return &undef->def; 258} 259 260static inline nir_ssa_def * 261nir_build_imm(nir_builder *build, unsigned num_components, 262 unsigned bit_size, const nir_const_value *value) 263{ 264 nir_load_const_instr *load_const = 265 nir_load_const_instr_create(build->shader, num_components, bit_size); 266 if (!load_const) 267 return NULL; 268 269 memcpy(load_const->value, value, sizeof(nir_const_value) * num_components); 270 271 nir_builder_instr_insert(build, &load_const->instr); 272 273 return &load_const->def; 274} 275 276static inline nir_ssa_def * 277nir_imm_zero(nir_builder *build, unsigned num_components, unsigned bit_size) 278{ 279 nir_load_const_instr *load_const = 280 nir_load_const_instr_create(build->shader, num_components, bit_size); 281 282 /* nir_load_const_instr_create uses rzalloc so it's already zero */ 283 284 nir_builder_instr_insert(build, &load_const->instr); 285 286 return &load_const->def; 287} 288 289static inline nir_ssa_def * 290nir_imm_boolN_t(nir_builder *build, bool x, unsigned bit_size) 291{ 292 nir_const_value v = nir_const_value_for_bool(x, bit_size); 293 return nir_build_imm(build, 1, bit_size, &v); 294} 295 296static inline nir_ssa_def * 297nir_imm_bool(nir_builder *build, bool x) 298{ 299 return nir_imm_boolN_t(build, x, 1); 300} 301 302static inline nir_ssa_def * 303nir_imm_true(nir_builder *build) 304{ 305 return nir_imm_bool(build, true); 306} 307 308static inline nir_ssa_def * 309nir_imm_false(nir_builder *build) 310{ 311 return nir_imm_bool(build, false); 312} 313 314static inline nir_ssa_def * 315nir_imm_floatN_t(nir_builder *build, double x, unsigned bit_size) 316{ 317 nir_const_value v = nir_const_value_for_float(x, bit_size); 318 return nir_build_imm(build, 1, bit_size, &v); 319} 320 321static inline nir_ssa_def * 322nir_imm_float16(nir_builder *build, float x) 323{ 324 return nir_imm_floatN_t(build, x, 16); 325} 326 327static inline nir_ssa_def * 328nir_imm_float(nir_builder *build, float x) 329{ 330 return nir_imm_floatN_t(build, x, 32); 331} 332 333static inline nir_ssa_def * 334nir_imm_double(nir_builder *build, double x) 335{ 336 return nir_imm_floatN_t(build, x, 64); 337} 338 339static inline nir_ssa_def * 340nir_imm_vec2(nir_builder *build, float x, float y) 341{ 342 nir_const_value v[2] = { 343 nir_const_value_for_float(x, 32), 344 nir_const_value_for_float(y, 32), 345 }; 346 return nir_build_imm(build, 2, 32, v); 347} 348 349static inline nir_ssa_def * 350nir_imm_vec3(nir_builder *build, float x, float y, float z) 351{ 352 nir_const_value v[3] = { 353 nir_const_value_for_float(x, 32), 354 nir_const_value_for_float(y, 32), 355 nir_const_value_for_float(z, 32), 356 }; 357 return nir_build_imm(build, 3, 32, v); 358} 359 360static inline nir_ssa_def * 361nir_imm_vec4(nir_builder *build, float x, float y, float z, float w) 362{ 363 nir_const_value v[4] = { 364 nir_const_value_for_float(x, 32), 365 nir_const_value_for_float(y, 32), 366 nir_const_value_for_float(z, 32), 367 nir_const_value_for_float(w, 32), 368 }; 369 370 return nir_build_imm(build, 4, 32, v); 371} 372 373static inline nir_ssa_def * 374nir_imm_vec4_16(nir_builder *build, float x, float y, float z, float w) 375{ 376 nir_const_value v[4] = { 377 nir_const_value_for_float(x, 16), 378 nir_const_value_for_float(y, 16), 379 nir_const_value_for_float(z, 16), 380 nir_const_value_for_float(w, 16), 381 }; 382 383 return nir_build_imm(build, 4, 16, v); 384} 385 386static inline nir_ssa_def * 387nir_imm_intN_t(nir_builder *build, uint64_t x, unsigned bit_size) 388{ 389 nir_const_value v = nir_const_value_for_raw_uint(x, bit_size); 390 return nir_build_imm(build, 1, bit_size, &v); 391} 392 393static inline nir_ssa_def * 394nir_imm_int(nir_builder *build, int x) 395{ 396 return nir_imm_intN_t(build, x, 32); 397} 398 399static inline nir_ssa_def * 400nir_imm_int64(nir_builder *build, int64_t x) 401{ 402 return nir_imm_intN_t(build, x, 64); 403} 404 405static inline nir_ssa_def * 406nir_imm_ivec2(nir_builder *build, int x, int y) 407{ 408 nir_const_value v[2] = { 409 nir_const_value_for_int(x, 32), 410 nir_const_value_for_int(y, 32), 411 }; 412 413 return nir_build_imm(build, 2, 32, v); 414} 415 416static inline nir_ssa_def * 417nir_imm_ivec3(nir_builder *build, int x, int y, int z) 418{ 419 nir_const_value v[3] = { 420 nir_const_value_for_int(x, 32), 421 nir_const_value_for_int(y, 32), 422 nir_const_value_for_int(z, 32), 423 }; 424 425 return nir_build_imm(build, 3, 32, v); 426} 427 428static inline nir_ssa_def * 429nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w) 430{ 431 nir_const_value v[4] = { 432 nir_const_value_for_int(x, 32), 433 nir_const_value_for_int(y, 32), 434 nir_const_value_for_int(z, 32), 435 nir_const_value_for_int(w, 32), 436 }; 437 438 return nir_build_imm(build, 4, 32, v); 439} 440 441static inline nir_ssa_def * 442nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr) 443{ 444 const nir_op_info *op_info = &nir_op_infos[instr->op]; 445 446 instr->exact = build->exact; 447 448 /* Guess the number of components the destination temporary should have 449 * based on our input sizes, if it's not fixed for the op. 450 */ 451 unsigned num_components = op_info->output_size; 452 if (num_components == 0) { 453 for (unsigned i = 0; i < op_info->num_inputs; i++) { 454 if (op_info->input_sizes[i] == 0) 455 num_components = MAX2(num_components, 456 instr->src[i].src.ssa->num_components); 457 } 458 } 459 assert(num_components != 0); 460 461 /* Figure out the bitwidth based on the source bitwidth if the instruction 462 * is variable-width. 463 */ 464 unsigned bit_size = nir_alu_type_get_type_size(op_info->output_type); 465 if (bit_size == 0) { 466 for (unsigned i = 0; i < op_info->num_inputs; i++) { 467 unsigned src_bit_size = instr->src[i].src.ssa->bit_size; 468 if (nir_alu_type_get_type_size(op_info->input_types[i]) == 0) { 469 if (bit_size) 470 assert(src_bit_size == bit_size); 471 else 472 bit_size = src_bit_size; 473 } else { 474 assert(src_bit_size == 475 nir_alu_type_get_type_size(op_info->input_types[i])); 476 } 477 } 478 } 479 480 /* When in doubt, assume 32. */ 481 if (bit_size == 0) 482 bit_size = 32; 483 484 /* Make sure we don't swizzle from outside of our source vector (like if a 485 * scalar value was passed into a multiply with a vector). 486 */ 487 for (unsigned i = 0; i < op_info->num_inputs; i++) { 488 for (unsigned j = instr->src[i].src.ssa->num_components; 489 j < NIR_MAX_VEC_COMPONENTS; j++) { 490 instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1; 491 } 492 } 493 494 nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, 495 bit_size, NULL); 496 instr->dest.write_mask = (1 << num_components) - 1; 497 498 nir_builder_instr_insert(build, &instr->instr); 499 500 return &instr->dest.dest.ssa; 501} 502 503static inline nir_ssa_def * 504nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, 505 nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3) 506{ 507 nir_alu_instr *instr = nir_alu_instr_create(build->shader, op); 508 if (!instr) 509 return NULL; 510 511 instr->src[0].src = nir_src_for_ssa(src0); 512 if (src1) 513 instr->src[1].src = nir_src_for_ssa(src1); 514 if (src2) 515 instr->src[2].src = nir_src_for_ssa(src2); 516 if (src3) 517 instr->src[3].src = nir_src_for_ssa(src3); 518 519 return nir_builder_alu_instr_finish_and_insert(build, instr); 520} 521 522/* for the couple special cases with more than 4 src args: */ 523static inline nir_ssa_def * 524nir_build_alu_src_arr(nir_builder *build, nir_op op, nir_ssa_def **srcs) 525{ 526 const nir_op_info *op_info = &nir_op_infos[op]; 527 nir_alu_instr *instr = nir_alu_instr_create(build->shader, op); 528 if (!instr) 529 return NULL; 530 531 for (unsigned i = 0; i < op_info->num_inputs; i++) 532 instr->src[i].src = nir_src_for_ssa(srcs[i]); 533 534 return nir_builder_alu_instr_finish_and_insert(build, instr); 535} 536 537/* Generic builder for system values. */ 538static inline nir_ssa_def * 539nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index, 540 unsigned num_components, unsigned bit_size) 541{ 542 nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op); 543 if (nir_intrinsic_infos[op].dest_components > 0) 544 assert(num_components == nir_intrinsic_infos[op].dest_components); 545 else 546 load->num_components = num_components; 547 load->const_index[0] = index; 548 549 nir_ssa_dest_init(&load->instr, &load->dest, 550 num_components, bit_size, NULL); 551 nir_builder_instr_insert(build, &load->instr); 552 return &load->dest.ssa; 553} 554 555#include "nir_builder_opcodes.h" 556#undef nir_deref_mode_is 557 558static inline nir_ssa_def * 559nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components) 560{ 561 return nir_build_alu_src_arr(build, nir_op_vec(num_components), comp); 562} 563 564static inline nir_ssa_def * 565nir_mov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) 566{ 567 assert(!src.abs && !src.negate); 568 if (src.src.is_ssa && src.src.ssa->num_components == num_components) { 569 bool any_swizzles = false; 570 for (unsigned i = 0; i < num_components; i++) { 571 if (src.swizzle[i] != i) 572 any_swizzles = true; 573 } 574 if (!any_swizzles) 575 return src.src.ssa; 576 } 577 578 nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_mov); 579 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, 580 nir_src_bit_size(src.src), NULL); 581 mov->exact = build->exact; 582 mov->dest.write_mask = (1 << num_components) - 1; 583 mov->src[0] = src; 584 nir_builder_instr_insert(build, &mov->instr); 585 586 return &mov->dest.dest.ssa; 587} 588 589/** 590 * Construct a mov that reswizzles the source's components. 591 */ 592static inline nir_ssa_def * 593nir_swizzle(nir_builder *build, nir_ssa_def *src, const unsigned *swiz, 594 unsigned num_components) 595{ 596 assert(num_components <= NIR_MAX_VEC_COMPONENTS); 597 nir_alu_src alu_src = { NIR_SRC_INIT }; 598 alu_src.src = nir_src_for_ssa(src); 599 600 bool is_identity_swizzle = true; 601 for (unsigned i = 0; i < num_components && i < NIR_MAX_VEC_COMPONENTS; i++) { 602 if (swiz[i] != i) 603 is_identity_swizzle = false; 604 alu_src.swizzle[i] = swiz[i]; 605 } 606 607 if (num_components == src->num_components && is_identity_swizzle) 608 return src; 609 610 return nir_mov_alu(build, alu_src, num_components); 611} 612 613/* Selects the right fdot given the number of components in each source. */ 614static inline nir_ssa_def * 615nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) 616{ 617 assert(src0->num_components == src1->num_components); 618 switch (src0->num_components) { 619 case 1: return nir_fmul(build, src0, src1); 620 case 2: return nir_fdot2(build, src0, src1); 621 case 3: return nir_fdot3(build, src0, src1); 622 case 4: return nir_fdot4(build, src0, src1); 623 case 5: return nir_fdot5(build, src0, src1); 624 case 8: return nir_fdot8(build, src0, src1); 625 case 16: return nir_fdot16(build, src0, src1); 626 default: 627 unreachable("bad component size"); 628 } 629 630 return NULL; 631} 632 633static inline nir_ssa_def * 634nir_ball_iequal(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1) 635{ 636 switch (src0->num_components) { 637 case 1: return nir_ieq(b, src0, src1); 638 case 2: return nir_ball_iequal2(b, src0, src1); 639 case 3: return nir_ball_iequal3(b, src0, src1); 640 case 4: return nir_ball_iequal4(b, src0, src1); 641 case 5: return nir_ball_iequal5(b, src0, src1); 642 case 8: return nir_ball_iequal8(b, src0, src1); 643 case 16: return nir_ball_iequal16(b, src0, src1); 644 default: 645 unreachable("bad component size"); 646 } 647} 648 649static inline nir_ssa_def * 650nir_ball(nir_builder *b, nir_ssa_def *src) 651{ 652 return nir_ball_iequal(b, src, nir_imm_true(b)); 653} 654 655static inline nir_ssa_def * 656nir_bany_inequal(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1) 657{ 658 switch (src0->num_components) { 659 case 1: return nir_ine(b, src0, src1); 660 case 2: return nir_bany_inequal2(b, src0, src1); 661 case 3: return nir_bany_inequal3(b, src0, src1); 662 case 4: return nir_bany_inequal4(b, src0, src1); 663 case 5: return nir_bany_inequal5(b, src0, src1); 664 case 8: return nir_bany_inequal8(b, src0, src1); 665 case 16: return nir_bany_inequal16(b, src0, src1); 666 default: 667 unreachable("bad component size"); 668 } 669} 670 671static inline nir_ssa_def * 672nir_bany(nir_builder *b, nir_ssa_def *src) 673{ 674 return nir_bany_inequal(b, src, nir_imm_false(b)); 675} 676 677static inline nir_ssa_def * 678nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c) 679{ 680 return nir_swizzle(b, def, &c, 1); 681} 682 683static inline nir_ssa_def * 684nir_channels(nir_builder *b, nir_ssa_def *def, nir_component_mask_t mask) 685{ 686 unsigned num_channels = 0, swizzle[NIR_MAX_VEC_COMPONENTS] = { 0 }; 687 688 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { 689 if ((mask & (1 << i)) == 0) 690 continue; 691 swizzle[num_channels++] = i; 692 } 693 694 return nir_swizzle(b, def, swizzle, num_channels); 695} 696 697static inline nir_ssa_def * 698_nir_select_from_array_helper(nir_builder *b, nir_ssa_def **arr, 699 nir_ssa_def *idx, 700 unsigned start, unsigned end) 701{ 702 if (start == end - 1) { 703 return arr[start]; 704 } else { 705 unsigned mid = start + (end - start) / 2; 706 return nir_bcsel(b, nir_ilt(b, idx, nir_imm_intN_t(b, mid, idx->bit_size)), 707 _nir_select_from_array_helper(b, arr, idx, start, mid), 708 _nir_select_from_array_helper(b, arr, idx, mid, end)); 709 } 710} 711 712static inline nir_ssa_def * 713nir_select_from_ssa_def_array(nir_builder *b, nir_ssa_def **arr, 714 unsigned arr_len, nir_ssa_def *idx) 715{ 716 return _nir_select_from_array_helper(b, arr, idx, 0, arr_len); 717} 718 719static inline nir_ssa_def * 720nir_vector_extract(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *c) 721{ 722 nir_src c_src = nir_src_for_ssa(c); 723 if (nir_src_is_const(c_src)) { 724 uint64_t c_const = nir_src_as_uint(c_src); 725 if (c_const < vec->num_components) 726 return nir_channel(b, vec, c_const); 727 else 728 return nir_ssa_undef(b, 1, vec->bit_size); 729 } else { 730 nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS]; 731 for (unsigned i = 0; i < vec->num_components; i++) 732 comps[i] = nir_channel(b, vec, i); 733 return nir_select_from_ssa_def_array(b, comps, vec->num_components, c); 734 } 735} 736 737/** Replaces the component of `vec` specified by `c` with `scalar` */ 738static inline nir_ssa_def * 739nir_vector_insert_imm(nir_builder *b, nir_ssa_def *vec, 740 nir_ssa_def *scalar, unsigned c) 741{ 742 assert(scalar->num_components == 1); 743 assert(c < vec->num_components); 744 745 nir_op vec_op = nir_op_vec(vec->num_components); 746 nir_alu_instr *vec_instr = nir_alu_instr_create(b->shader, vec_op); 747 748 for (unsigned i = 0; i < vec->num_components; i++) { 749 if (i == c) { 750 vec_instr->src[i].src = nir_src_for_ssa(scalar); 751 vec_instr->src[i].swizzle[0] = 0; 752 } else { 753 vec_instr->src[i].src = nir_src_for_ssa(vec); 754 vec_instr->src[i].swizzle[0] = i; 755 } 756 } 757 758 return nir_builder_alu_instr_finish_and_insert(b, vec_instr); 759} 760 761/** Replaces the component of `vec` specified by `c` with `scalar` */ 762static inline nir_ssa_def * 763nir_vector_insert(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *scalar, 764 nir_ssa_def *c) 765{ 766 assert(scalar->num_components == 1); 767 assert(c->num_components == 1); 768 769 nir_src c_src = nir_src_for_ssa(c); 770 if (nir_src_is_const(c_src)) { 771 uint64_t c_const = nir_src_as_uint(c_src); 772 if (c_const < vec->num_components) 773 return nir_vector_insert_imm(b, vec, scalar, c_const); 774 else 775 return vec; 776 } else { 777 nir_const_value per_comp_idx_const[NIR_MAX_VEC_COMPONENTS]; 778 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) 779 per_comp_idx_const[i] = nir_const_value_for_int(i, c->bit_size); 780 nir_ssa_def *per_comp_idx = 781 nir_build_imm(b, vec->num_components, 782 c->bit_size, per_comp_idx_const); 783 784 /* nir_builder will automatically splat out scalars to vectors so an 785 * insert is as simple as "if I'm the channel, replace me with the 786 * scalar." 787 */ 788 return nir_bcsel(b, nir_ieq(b, c, per_comp_idx), scalar, vec); 789 } 790} 791 792static inline nir_ssa_def * 793nir_i2i(nir_builder *build, nir_ssa_def *x, unsigned dest_bit_size) 794{ 795 if (x->bit_size == dest_bit_size) 796 return x; 797 798 switch (dest_bit_size) { 799 case 64: return nir_i2i64(build, x); 800 case 32: return nir_i2i32(build, x); 801 case 16: return nir_i2i16(build, x); 802 case 8: return nir_i2i8(build, x); 803 default: unreachable("Invalid bit size"); 804 } 805} 806 807static inline nir_ssa_def * 808nir_u2u(nir_builder *build, nir_ssa_def *x, unsigned dest_bit_size) 809{ 810 if (x->bit_size == dest_bit_size) 811 return x; 812 813 switch (dest_bit_size) { 814 case 64: return nir_u2u64(build, x); 815 case 32: return nir_u2u32(build, x); 816 case 16: return nir_u2u16(build, x); 817 case 8: return nir_u2u8(build, x); 818 default: unreachable("Invalid bit size"); 819 } 820} 821 822static inline nir_ssa_def * 823nir_iadd_imm(nir_builder *build, nir_ssa_def *x, uint64_t y) 824{ 825 assert(x->bit_size <= 64); 826 y &= BITFIELD64_MASK(x->bit_size); 827 828 if (y == 0) { 829 return x; 830 } else { 831 return nir_iadd(build, x, nir_imm_intN_t(build, y, x->bit_size)); 832 } 833} 834 835static inline nir_ssa_def * 836nir_iadd_imm_nuw(nir_builder *b, nir_ssa_def *x, uint64_t y) 837{ 838 nir_ssa_def *d = nir_iadd_imm(b, x, y); 839 if (d != x && d->parent_instr->type == nir_instr_type_alu) 840 nir_instr_as_alu(d->parent_instr)->no_unsigned_wrap = true; 841 return d; 842} 843 844static inline nir_ssa_def * 845nir_iadd_nuw(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 846{ 847 nir_ssa_def *d = nir_iadd(b, x, y); 848 nir_instr_as_alu(d->parent_instr)->no_unsigned_wrap = true; 849 return d; 850} 851 852static inline nir_ssa_def * 853nir_ieq_imm(nir_builder *build, nir_ssa_def *x, uint64_t y) 854{ 855 return nir_ieq(build, x, nir_imm_intN_t(build, y, x->bit_size)); 856} 857 858/* Use nir_iadd(x, -y) for reversing parameter ordering */ 859static inline nir_ssa_def * 860nir_isub_imm(nir_builder *build, uint64_t y, nir_ssa_def *x) 861{ 862 return nir_isub(build, nir_imm_intN_t(build, y, x->bit_size), x); 863} 864 865static inline nir_ssa_def * 866_nir_mul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y, bool amul) 867{ 868 assert(x->bit_size <= 64); 869 y &= BITFIELD64_MASK(x->bit_size); 870 871 if (y == 0) { 872 return nir_imm_intN_t(build, 0, x->bit_size); 873 } else if (y == 1) { 874 return x; 875 } else if (!build->shader->options->lower_bitops && 876 util_is_power_of_two_or_zero64(y)) { 877 return nir_ishl(build, x, nir_imm_int(build, ffsll(y) - 1)); 878 } else if (amul) { 879 return nir_amul(build, x, nir_imm_intN_t(build, y, x->bit_size)); 880 } else { 881 return nir_imul(build, x, nir_imm_intN_t(build, y, x->bit_size)); 882 } 883} 884 885static inline nir_ssa_def * 886nir_imul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y) 887{ 888 return _nir_mul_imm(build, x, y, false); 889} 890 891static inline nir_ssa_def * 892nir_amul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y) 893{ 894 return _nir_mul_imm(build, x, y, true); 895} 896 897static inline nir_ssa_def * 898nir_fadd_imm(nir_builder *build, nir_ssa_def *x, double y) 899{ 900 return nir_fadd(build, x, nir_imm_floatN_t(build, y, x->bit_size)); 901} 902 903static inline nir_ssa_def * 904nir_fmul_imm(nir_builder *build, nir_ssa_def *x, double y) 905{ 906 return nir_fmul(build, x, nir_imm_floatN_t(build, y, x->bit_size)); 907} 908 909static inline nir_ssa_def * 910nir_iand_imm(nir_builder *build, nir_ssa_def *x, uint64_t y) 911{ 912 assert(x->bit_size <= 64); 913 y &= BITFIELD64_MASK(x->bit_size); 914 915 if (y == 0) { 916 return nir_imm_intN_t(build, 0, x->bit_size); 917 } else if (y == BITFIELD64_MASK(x->bit_size)) { 918 return x; 919 } else { 920 return nir_iand(build, x, nir_imm_intN_t(build, y, x->bit_size)); 921 } 922} 923 924static inline nir_ssa_def * 925nir_ishr_imm(nir_builder *build, nir_ssa_def *x, uint32_t y) 926{ 927 if (y == 0) { 928 return x; 929 } else { 930 return nir_ishr(build, x, nir_imm_int(build, y)); 931 } 932} 933 934static inline nir_ssa_def * 935nir_ushr_imm(nir_builder *build, nir_ssa_def *x, uint32_t y) 936{ 937 if (y == 0) { 938 return x; 939 } else { 940 return nir_ushr(build, x, nir_imm_int(build, y)); 941 } 942} 943 944static inline nir_ssa_def * 945nir_udiv_imm(nir_builder *build, nir_ssa_def *x, uint64_t y) 946{ 947 assert(x->bit_size <= 64); 948 y &= BITFIELD64_MASK(x->bit_size); 949 950 if (y == 1) { 951 return x; 952 } else if (util_is_power_of_two_nonzero(y)) { 953 return nir_ushr_imm(build, x, ffsll(y) - 1); 954 } else { 955 return nir_udiv(build, x, nir_imm_intN_t(build, y, x->bit_size)); 956 } 957} 958 959static inline nir_ssa_def * 960nir_fclamp(nir_builder *b, 961 nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val) 962{ 963 return nir_fmin(b, nir_fmax(b, x, min_val), max_val); 964} 965 966static inline nir_ssa_def * 967nir_iclamp(nir_builder *b, 968 nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val) 969{ 970 return nir_imin(b, nir_imax(b, x, min_val), max_val); 971} 972 973static inline nir_ssa_def * 974nir_uclamp(nir_builder *b, 975 nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val) 976{ 977 return nir_umin(b, nir_umax(b, x, min_val), max_val); 978} 979 980static inline nir_ssa_def * 981nir_ffma_imm12(nir_builder *build, nir_ssa_def *src0, double src1, double src2) 982{ 983 if (build->shader->options->avoid_ternary_with_two_constants) 984 return nir_fadd_imm(build, nir_fmul_imm(build, src0, src1), src2); 985 else 986 return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), 987 nir_imm_floatN_t(build, src2, src0->bit_size)); 988} 989 990static inline nir_ssa_def * 991nir_ffma_imm1(nir_builder *build, nir_ssa_def *src0, double src1, nir_ssa_def *src2) 992{ 993 return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2); 994} 995 996static inline nir_ssa_def * 997nir_ffma_imm2(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1, double src2) 998{ 999 return nir_ffma(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size)); 1000} 1001 1002static inline nir_ssa_def * 1003nir_a_minus_bc(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1, 1004 nir_ssa_def *src2) 1005{ 1006 return nir_ffma(build, nir_fneg(build, src1), src2, src0); 1007} 1008 1009static inline nir_ssa_def * 1010nir_pack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size) 1011{ 1012 assert(src->num_components * src->bit_size == dest_bit_size); 1013 1014 switch (dest_bit_size) { 1015 case 64: 1016 switch (src->bit_size) { 1017 case 32: return nir_pack_64_2x32(b, src); 1018 case 16: return nir_pack_64_4x16(b, src); 1019 default: break; 1020 } 1021 break; 1022 1023 case 32: 1024 if (src->bit_size == 16) 1025 return nir_pack_32_2x16(b, src); 1026 break; 1027 1028 default: 1029 break; 1030 } 1031 1032 /* If we got here, we have no dedicated unpack opcode. */ 1033 nir_ssa_def *dest = nir_imm_intN_t(b, 0, dest_bit_size); 1034 for (unsigned i = 0; i < src->num_components; i++) { 1035 nir_ssa_def *val = nir_u2u(b, nir_channel(b, src, i), dest_bit_size); 1036 val = nir_ishl(b, val, nir_imm_int(b, i * src->bit_size)); 1037 dest = nir_ior(b, dest, val); 1038 } 1039 return dest; 1040} 1041 1042static inline nir_ssa_def * 1043nir_unpack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size) 1044{ 1045 assert(src->num_components == 1); 1046 assert(src->bit_size > dest_bit_size); 1047 const unsigned dest_num_components = src->bit_size / dest_bit_size; 1048 assert(dest_num_components <= NIR_MAX_VEC_COMPONENTS); 1049 1050 switch (src->bit_size) { 1051 case 64: 1052 switch (dest_bit_size) { 1053 case 32: return nir_unpack_64_2x32(b, src); 1054 case 16: return nir_unpack_64_4x16(b, src); 1055 default: break; 1056 } 1057 break; 1058 1059 case 32: 1060 if (dest_bit_size == 16) 1061 return nir_unpack_32_2x16(b, src); 1062 break; 1063 1064 default: 1065 break; 1066 } 1067 1068 /* If we got here, we have no dedicated unpack opcode. */ 1069 nir_ssa_def *dest_comps[NIR_MAX_VEC_COMPONENTS]; 1070 for (unsigned i = 0; i < dest_num_components; i++) { 1071 nir_ssa_def *val = nir_ushr_imm(b, src, i * dest_bit_size); 1072 dest_comps[i] = nir_u2u(b, val, dest_bit_size); 1073 } 1074 return nir_vec(b, dest_comps, dest_num_components); 1075} 1076 1077/** 1078 * Treats srcs as if it's one big blob of bits and extracts the range of bits 1079 * given by 1080 * 1081 * [first_bit, first_bit + dest_num_components * dest_bit_size) 1082 * 1083 * The range can have any alignment or size as long as it's an integer number 1084 * of destination components and fits inside the concatenated sources. 1085 * 1086 * TODO: The one caveat here is that we can't handle byte alignment if 64-bit 1087 * values are involved because that would require pack/unpack to/from a vec8 1088 * which NIR currently does not support. 1089 */ 1090static inline nir_ssa_def * 1091nir_extract_bits(nir_builder *b, nir_ssa_def **srcs, unsigned num_srcs, 1092 unsigned first_bit, 1093 unsigned dest_num_components, unsigned dest_bit_size) 1094{ 1095 const unsigned num_bits = dest_num_components * dest_bit_size; 1096 1097 /* Figure out the common bit size */ 1098 unsigned common_bit_size = dest_bit_size; 1099 for (unsigned i = 0; i < num_srcs; i++) 1100 common_bit_size = MIN2(common_bit_size, srcs[i]->bit_size); 1101 if (first_bit > 0) 1102 common_bit_size = MIN2(common_bit_size, (1u << (ffs(first_bit) - 1))); 1103 1104 /* We don't want to have to deal with 1-bit values */ 1105 assert(common_bit_size >= 8); 1106 1107 nir_ssa_def *common_comps[NIR_MAX_VEC_COMPONENTS * sizeof(uint64_t)]; 1108 assert(num_bits / common_bit_size <= ARRAY_SIZE(common_comps)); 1109 1110 /* First, unpack to the common bit size and select the components from the 1111 * source. 1112 */ 1113 int src_idx = -1; 1114 unsigned src_start_bit = 0; 1115 unsigned src_end_bit = 0; 1116 for (unsigned i = 0; i < num_bits / common_bit_size; i++) { 1117 const unsigned bit = first_bit + (i * common_bit_size); 1118 while (bit >= src_end_bit) { 1119 src_idx++; 1120 assert(src_idx < (int) num_srcs); 1121 src_start_bit = src_end_bit; 1122 src_end_bit += srcs[src_idx]->bit_size * 1123 srcs[src_idx]->num_components; 1124 } 1125 assert(bit >= src_start_bit); 1126 assert(bit + common_bit_size <= src_end_bit); 1127 const unsigned rel_bit = bit - src_start_bit; 1128 const unsigned src_bit_size = srcs[src_idx]->bit_size; 1129 1130 nir_ssa_def *comp = nir_channel(b, srcs[src_idx], 1131 rel_bit / src_bit_size); 1132 if (srcs[src_idx]->bit_size > common_bit_size) { 1133 nir_ssa_def *unpacked = nir_unpack_bits(b, comp, common_bit_size); 1134 comp = nir_channel(b, unpacked, (rel_bit % src_bit_size) / 1135 common_bit_size); 1136 } 1137 common_comps[i] = comp; 1138 } 1139 1140 /* Now, re-pack the destination if we have to */ 1141 if (dest_bit_size > common_bit_size) { 1142 unsigned common_per_dest = dest_bit_size / common_bit_size; 1143 nir_ssa_def *dest_comps[NIR_MAX_VEC_COMPONENTS]; 1144 for (unsigned i = 0; i < dest_num_components; i++) { 1145 nir_ssa_def *unpacked = nir_vec(b, common_comps + i * common_per_dest, 1146 common_per_dest); 1147 dest_comps[i] = nir_pack_bits(b, unpacked, dest_bit_size); 1148 } 1149 return nir_vec(b, dest_comps, dest_num_components); 1150 } else { 1151 assert(dest_bit_size == common_bit_size); 1152 return nir_vec(b, common_comps, dest_num_components); 1153 } 1154} 1155 1156static inline nir_ssa_def * 1157nir_bitcast_vector(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size) 1158{ 1159 assert((src->bit_size * src->num_components) % dest_bit_size == 0); 1160 const unsigned dest_num_components = 1161 (src->bit_size * src->num_components) / dest_bit_size; 1162 assert(dest_num_components <= NIR_MAX_VEC_COMPONENTS); 1163 1164 return nir_extract_bits(b, &src, 1, 0, dest_num_components, dest_bit_size); 1165} 1166 1167/** 1168 * Pad a value to N components with undefs of matching bit size. 1169 * If the value already contains >= num_components, it is returned without change. 1170 */ 1171static inline nir_ssa_def * 1172nir_pad_vector(nir_builder *b, nir_ssa_def *src, unsigned num_components) 1173{ 1174 assert(src->num_components <= num_components); 1175 if (src->num_components == num_components) 1176 return src; 1177 1178 nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS]; 1179 nir_ssa_def *undef = nir_ssa_undef(b, 1, src->bit_size); 1180 unsigned i = 0; 1181 for (; i < src->num_components; i++) 1182 components[i] = nir_channel(b, src, i); 1183 for (; i < num_components; i++) 1184 components[i] = undef; 1185 1186 return nir_vec(b, components, num_components); 1187} 1188 1189/** 1190 * Pad a value to N components with copies of the given immediate of matching 1191 * bit size. If the value already contains >= num_components, it is returned 1192 * without change. 1193 */ 1194static inline nir_ssa_def * 1195nir_pad_vector_imm_int(nir_builder *b, nir_ssa_def *src, uint64_t imm_val, 1196 unsigned num_components) 1197{ 1198 assert(src->num_components <= num_components); 1199 if (src->num_components == num_components) 1200 return src; 1201 1202 nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS]; 1203 nir_ssa_def *imm = nir_imm_intN_t(b, imm_val, src->bit_size); 1204 unsigned i = 0; 1205 for (; i < src->num_components; i++) 1206 components[i] = nir_channel(b, src, i); 1207 for (; i < num_components; i++) 1208 components[i] = imm; 1209 1210 return nir_vec(b, components, num_components); 1211} 1212 1213/** 1214 * Pad a value to 4 components with undefs of matching bit size. 1215 * If the value already contains >= 4 components, it is returned without change. 1216 */ 1217static inline nir_ssa_def * 1218nir_pad_vec4(nir_builder *b, nir_ssa_def *src) 1219{ 1220 return nir_pad_vector(b, src, 4); 1221} 1222 1223/** 1224 * Turns a nir_src into a nir_ssa_def * so it can be passed to 1225 * nir_build_alu()-based builder calls. 1226 * 1227 * See nir_ssa_for_alu_src() for alu instructions. 1228 */ 1229static inline nir_ssa_def * 1230nir_ssa_for_src(nir_builder *build, nir_src src, int num_components) 1231{ 1232 if (src.is_ssa && src.ssa->num_components == num_components) 1233 return src.ssa; 1234 1235 assert((unsigned)num_components <= nir_src_num_components(src)); 1236 1237 nir_alu_src alu = { NIR_SRC_INIT }; 1238 alu.src = src; 1239 for (int j = 0; j < NIR_MAX_VEC_COMPONENTS; j++) 1240 alu.swizzle[j] = j; 1241 1242 return nir_mov_alu(build, alu, num_components); 1243} 1244 1245/** 1246 * Similar to nir_ssa_for_src(), but for alu srcs, respecting the 1247 * nir_alu_src's swizzle. 1248 */ 1249static inline nir_ssa_def * 1250nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn) 1251{ 1252 if (nir_alu_src_is_trivial_ssa(instr, srcn)) 1253 return instr->src[srcn].src.ssa; 1254 1255 nir_alu_src *src = &instr->src[srcn]; 1256 unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn); 1257 return nir_mov_alu(build, *src, num_components); 1258} 1259 1260static inline unsigned 1261nir_get_ptr_bitsize(nir_shader *shader) 1262{ 1263 if (shader->info.stage == MESA_SHADER_KERNEL) 1264 return shader->info.cs.ptr_size; 1265 return 32; 1266} 1267 1268static inline nir_deref_instr * 1269nir_build_deref_var(nir_builder *build, nir_variable *var) 1270{ 1271 nir_deref_instr *deref = 1272 nir_deref_instr_create(build->shader, nir_deref_type_var); 1273 1274 deref->modes = (nir_variable_mode)var->data.mode; 1275 deref->type = var->type; 1276 deref->var = var; 1277 1278 nir_ssa_dest_init(&deref->instr, &deref->dest, 1, 1279 nir_get_ptr_bitsize(build->shader), NULL); 1280 1281 nir_builder_instr_insert(build, &deref->instr); 1282 1283 return deref; 1284} 1285 1286static inline nir_deref_instr * 1287nir_build_deref_array(nir_builder *build, nir_deref_instr *parent, 1288 nir_ssa_def *index) 1289{ 1290 assert(glsl_type_is_array(parent->type) || 1291 glsl_type_is_matrix(parent->type) || 1292 glsl_type_is_vector(parent->type)); 1293 1294 assert(index->bit_size == parent->dest.ssa.bit_size); 1295 1296 nir_deref_instr *deref = 1297 nir_deref_instr_create(build->shader, nir_deref_type_array); 1298 1299 deref->modes = parent->modes; 1300 deref->type = glsl_get_array_element(parent->type); 1301 deref->parent = nir_src_for_ssa(&parent->dest.ssa); 1302 deref->arr.index = nir_src_for_ssa(index); 1303 1304 nir_ssa_dest_init(&deref->instr, &deref->dest, 1305 parent->dest.ssa.num_components, 1306 parent->dest.ssa.bit_size, NULL); 1307 1308 nir_builder_instr_insert(build, &deref->instr); 1309 1310 return deref; 1311} 1312 1313static inline nir_deref_instr * 1314nir_build_deref_array_imm(nir_builder *build, nir_deref_instr *parent, 1315 int64_t index) 1316{ 1317 assert(parent->dest.is_ssa); 1318 nir_ssa_def *idx_ssa = nir_imm_intN_t(build, index, 1319 parent->dest.ssa.bit_size); 1320 1321 return nir_build_deref_array(build, parent, idx_ssa); 1322} 1323 1324static inline nir_deref_instr * 1325nir_build_deref_ptr_as_array(nir_builder *build, nir_deref_instr *parent, 1326 nir_ssa_def *index) 1327{ 1328 assert(parent->deref_type == nir_deref_type_array || 1329 parent->deref_type == nir_deref_type_ptr_as_array || 1330 parent->deref_type == nir_deref_type_cast); 1331 1332 assert(index->bit_size == parent->dest.ssa.bit_size); 1333 1334 nir_deref_instr *deref = 1335 nir_deref_instr_create(build->shader, nir_deref_type_ptr_as_array); 1336 1337 deref->modes = parent->modes; 1338 deref->type = parent->type; 1339 deref->parent = nir_src_for_ssa(&parent->dest.ssa); 1340 deref->arr.index = nir_src_for_ssa(index); 1341 1342 nir_ssa_dest_init(&deref->instr, &deref->dest, 1343 parent->dest.ssa.num_components, 1344 parent->dest.ssa.bit_size, NULL); 1345 1346 nir_builder_instr_insert(build, &deref->instr); 1347 1348 return deref; 1349} 1350 1351static inline nir_deref_instr * 1352nir_build_deref_array_wildcard(nir_builder *build, nir_deref_instr *parent) 1353{ 1354 assert(glsl_type_is_array(parent->type) || 1355 glsl_type_is_matrix(parent->type)); 1356 1357 nir_deref_instr *deref = 1358 nir_deref_instr_create(build->shader, nir_deref_type_array_wildcard); 1359 1360 deref->modes = parent->modes; 1361 deref->type = glsl_get_array_element(parent->type); 1362 deref->parent = nir_src_for_ssa(&parent->dest.ssa); 1363 1364 nir_ssa_dest_init(&deref->instr, &deref->dest, 1365 parent->dest.ssa.num_components, 1366 parent->dest.ssa.bit_size, NULL); 1367 1368 nir_builder_instr_insert(build, &deref->instr); 1369 1370 return deref; 1371} 1372 1373static inline nir_deref_instr * 1374nir_build_deref_struct(nir_builder *build, nir_deref_instr *parent, 1375 unsigned index) 1376{ 1377 assert(glsl_type_is_struct_or_ifc(parent->type)); 1378 1379 nir_deref_instr *deref = 1380 nir_deref_instr_create(build->shader, nir_deref_type_struct); 1381 1382 deref->modes = parent->modes; 1383 deref->type = glsl_get_struct_field(parent->type, index); 1384 deref->parent = nir_src_for_ssa(&parent->dest.ssa); 1385 deref->strct.index = index; 1386 1387 nir_ssa_dest_init(&deref->instr, &deref->dest, 1388 parent->dest.ssa.num_components, 1389 parent->dest.ssa.bit_size, NULL); 1390 1391 nir_builder_instr_insert(build, &deref->instr); 1392 1393 return deref; 1394} 1395 1396static inline nir_deref_instr * 1397nir_build_deref_cast(nir_builder *build, nir_ssa_def *parent, 1398 nir_variable_mode modes, const struct glsl_type *type, 1399 unsigned ptr_stride) 1400{ 1401 nir_deref_instr *deref = 1402 nir_deref_instr_create(build->shader, nir_deref_type_cast); 1403 1404 deref->modes = modes; 1405 deref->type = type; 1406 deref->parent = nir_src_for_ssa(parent); 1407 deref->cast.ptr_stride = ptr_stride; 1408 1409 nir_ssa_dest_init(&deref->instr, &deref->dest, 1410 parent->num_components, parent->bit_size, NULL); 1411 1412 nir_builder_instr_insert(build, &deref->instr); 1413 1414 return deref; 1415} 1416 1417static inline nir_deref_instr * 1418nir_alignment_deref_cast(nir_builder *build, nir_deref_instr *parent, 1419 uint32_t align_mul, uint32_t align_offset) 1420{ 1421 nir_deref_instr *deref = 1422 nir_deref_instr_create(build->shader, nir_deref_type_cast); 1423 1424 deref->modes = parent->modes; 1425 deref->type = parent->type; 1426 deref->parent = nir_src_for_ssa(&parent->dest.ssa); 1427 deref->cast.ptr_stride = nir_deref_instr_array_stride(deref); 1428 deref->cast.align_mul = align_mul; 1429 deref->cast.align_offset = align_offset; 1430 1431 nir_ssa_dest_init(&deref->instr, &deref->dest, 1432 parent->dest.ssa.num_components, 1433 parent->dest.ssa.bit_size, NULL); 1434 1435 nir_builder_instr_insert(build, &deref->instr); 1436 1437 return deref; 1438} 1439 1440/** Returns a deref that follows another but starting from the given parent 1441 * 1442 * The new deref will be the same type and take the same array or struct index 1443 * as the leader deref but it may have a different parent. This is very 1444 * useful for walking deref paths. 1445 */ 1446static inline nir_deref_instr * 1447nir_build_deref_follower(nir_builder *b, nir_deref_instr *parent, 1448 nir_deref_instr *leader) 1449{ 1450 /* If the derefs would have the same parent, don't make a new one */ 1451 assert(leader->parent.is_ssa); 1452 if (leader->parent.ssa == &parent->dest.ssa) 1453 return leader; 1454 1455 UNUSED nir_deref_instr *leader_parent = nir_src_as_deref(leader->parent); 1456 1457 switch (leader->deref_type) { 1458 case nir_deref_type_var: 1459 unreachable("A var dereference cannot have a parent"); 1460 break; 1461 1462 case nir_deref_type_array: 1463 case nir_deref_type_array_wildcard: 1464 assert(glsl_type_is_matrix(parent->type) || 1465 glsl_type_is_array(parent->type) || 1466 (leader->deref_type == nir_deref_type_array && 1467 glsl_type_is_vector(parent->type))); 1468 assert(glsl_get_length(parent->type) == 1469 glsl_get_length(leader_parent->type)); 1470 1471 if (leader->deref_type == nir_deref_type_array) { 1472 assert(leader->arr.index.is_ssa); 1473 nir_ssa_def *index = nir_i2i(b, leader->arr.index.ssa, 1474 parent->dest.ssa.bit_size); 1475 return nir_build_deref_array(b, parent, index); 1476 } else { 1477 return nir_build_deref_array_wildcard(b, parent); 1478 } 1479 1480 case nir_deref_type_struct: 1481 assert(glsl_type_is_struct_or_ifc(parent->type)); 1482 assert(glsl_get_length(parent->type) == 1483 glsl_get_length(leader_parent->type)); 1484 1485 return nir_build_deref_struct(b, parent, leader->strct.index); 1486 1487 default: 1488 unreachable("Invalid deref instruction type"); 1489 } 1490} 1491 1492static inline nir_ssa_def * 1493nir_load_reg(nir_builder *build, nir_register *reg) 1494{ 1495 return nir_ssa_for_src(build, nir_src_for_reg(reg), reg->num_components); 1496} 1497 1498static inline void 1499nir_store_reg(nir_builder *build, nir_register *reg, 1500 nir_ssa_def *def, nir_component_mask_t write_mask) 1501{ 1502 assert(reg->num_components == def->num_components); 1503 assert(reg->bit_size == def->bit_size); 1504 1505 nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_mov); 1506 mov->src[0].src = nir_src_for_ssa(def); 1507 mov->dest.dest = nir_dest_for_reg(reg); 1508 mov->dest.write_mask = write_mask & BITFIELD_MASK(reg->num_components); 1509 nir_builder_instr_insert(build, &mov->instr); 1510} 1511 1512static inline nir_ssa_def * 1513nir_load_deref_with_access(nir_builder *build, nir_deref_instr *deref, 1514 enum gl_access_qualifier access) 1515{ 1516 return nir_build_load_deref(build, glsl_get_vector_elements(deref->type), 1517 glsl_get_bit_size(deref->type), &deref->dest.ssa, 1518 access); 1519} 1520 1521#undef nir_load_deref 1522static inline nir_ssa_def * 1523nir_load_deref(nir_builder *build, nir_deref_instr *deref) 1524{ 1525 return nir_load_deref_with_access(build, deref, (enum gl_access_qualifier)0); 1526} 1527 1528static inline void 1529nir_store_deref_with_access(nir_builder *build, nir_deref_instr *deref, 1530 nir_ssa_def *value, unsigned writemask, 1531 enum gl_access_qualifier access) 1532{ 1533 writemask &= (1u << value->num_components) - 1u; 1534 nir_build_store_deref(build, &deref->dest.ssa, value, writemask, access); 1535} 1536 1537#undef nir_store_deref 1538static inline void 1539nir_store_deref(nir_builder *build, nir_deref_instr *deref, 1540 nir_ssa_def *value, unsigned writemask) 1541{ 1542 nir_store_deref_with_access(build, deref, value, writemask, 1543 (enum gl_access_qualifier)0); 1544} 1545 1546static inline void 1547nir_copy_deref_with_access(nir_builder *build, nir_deref_instr *dest, 1548 nir_deref_instr *src, 1549 enum gl_access_qualifier dest_access, 1550 enum gl_access_qualifier src_access) 1551{ 1552 nir_build_copy_deref(build, &dest->dest.ssa, &src->dest.ssa, dest_access, src_access); 1553} 1554 1555#undef nir_copy_deref 1556static inline void 1557nir_copy_deref(nir_builder *build, nir_deref_instr *dest, nir_deref_instr *src) 1558{ 1559 nir_copy_deref_with_access(build, dest, src, 1560 (enum gl_access_qualifier) 0, 1561 (enum gl_access_qualifier) 0); 1562} 1563 1564static inline void 1565nir_memcpy_deref_with_access(nir_builder *build, nir_deref_instr *dest, 1566 nir_deref_instr *src, nir_ssa_def *size, 1567 enum gl_access_qualifier dest_access, 1568 enum gl_access_qualifier src_access) 1569{ 1570 nir_build_memcpy_deref(build, &dest->dest.ssa, &src->dest.ssa, 1571 size, dest_access, src_access); 1572} 1573 1574#undef nir_memcpy_deref 1575static inline void 1576nir_memcpy_deref(nir_builder *build, nir_deref_instr *dest, 1577 nir_deref_instr *src, nir_ssa_def *size) 1578{ 1579 nir_memcpy_deref_with_access(build, dest, src, size, 1580 (enum gl_access_qualifier)0, 1581 (enum gl_access_qualifier)0); 1582} 1583 1584static inline nir_ssa_def * 1585nir_load_var(nir_builder *build, nir_variable *var) 1586{ 1587 return nir_load_deref(build, nir_build_deref_var(build, var)); 1588} 1589 1590static inline void 1591nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value, 1592 unsigned writemask) 1593{ 1594 nir_store_deref(build, nir_build_deref_var(build, var), value, writemask); 1595} 1596 1597static inline void 1598nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src) 1599{ 1600 nir_copy_deref(build, nir_build_deref_var(build, dest), 1601 nir_build_deref_var(build, src)); 1602} 1603 1604#undef nir_load_global 1605static inline nir_ssa_def * 1606nir_load_global(nir_builder *build, nir_ssa_def *addr, unsigned align, 1607 unsigned num_components, unsigned bit_size) 1608{ 1609 nir_intrinsic_instr *load = 1610 nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_global); 1611 load->num_components = num_components; 1612 load->src[0] = nir_src_for_ssa(addr); 1613 nir_intrinsic_set_align(load, align, 0); 1614 nir_ssa_dest_init(&load->instr, &load->dest, 1615 num_components, bit_size, NULL); 1616 nir_builder_instr_insert(build, &load->instr); 1617 return &load->dest.ssa; 1618} 1619 1620#undef nir_store_global 1621static inline void 1622nir_store_global(nir_builder *build, nir_ssa_def *addr, unsigned align, 1623 nir_ssa_def *value, nir_component_mask_t write_mask) 1624{ 1625 nir_intrinsic_instr *store = 1626 nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_global); 1627 store->num_components = value->num_components; 1628 store->src[0] = nir_src_for_ssa(value); 1629 store->src[1] = nir_src_for_ssa(addr); 1630 nir_intrinsic_set_write_mask(store, 1631 write_mask & BITFIELD_MASK(value->num_components)); 1632 nir_intrinsic_set_align(store, align, 0); 1633 nir_builder_instr_insert(build, &store->instr); 1634} 1635 1636#undef nir_load_global_constant 1637static inline nir_ssa_def * 1638nir_load_global_constant(nir_builder *build, nir_ssa_def *addr, unsigned align, 1639 unsigned num_components, unsigned bit_size) 1640{ 1641 nir_intrinsic_instr *load = 1642 nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_global_constant); 1643 load->num_components = num_components; 1644 load->src[0] = nir_src_for_ssa(addr); 1645 nir_intrinsic_set_align(load, align, 0); 1646 nir_ssa_dest_init(&load->instr, &load->dest, 1647 num_components, bit_size, NULL); 1648 nir_builder_instr_insert(build, &load->instr); 1649 return &load->dest.ssa; 1650} 1651 1652#undef nir_load_param 1653static inline nir_ssa_def * 1654nir_load_param(nir_builder *build, uint32_t param_idx) 1655{ 1656 assert(param_idx < build->impl->function->num_params); 1657 nir_parameter *param = &build->impl->function->params[param_idx]; 1658 return nir_build_load_param(build, param->num_components, param->bit_size, param_idx); 1659} 1660 1661/** 1662 * This function takes an I/O intrinsic like load/store_input, 1663 * and emits a sequence that calculates the full offset of that instruction, 1664 * including a stride to the base and component offsets. 1665 */ 1666static inline nir_ssa_def * 1667nir_build_calc_io_offset(nir_builder *b, 1668 nir_intrinsic_instr *intrin, 1669 nir_ssa_def *base_stride, 1670 unsigned component_stride) 1671{ 1672 /* base is the driver_location, which is in slots (1 slot = 4x4 bytes) */ 1673 nir_ssa_def *base_op = nir_imul_imm(b, base_stride, nir_intrinsic_base(intrin)); 1674 1675 /* offset should be interpreted in relation to the base, 1676 * so the instruction effectively reads/writes another input/output 1677 * when it has an offset 1678 */ 1679 nir_ssa_def *offset_op = nir_imul(b, base_stride, nir_ssa_for_src(b, *nir_get_io_offset_src(intrin), 1)); 1680 1681 /* component is in bytes */ 1682 unsigned const_op = nir_intrinsic_component(intrin) * component_stride; 1683 1684 return nir_iadd_imm_nuw(b, nir_iadd_nuw(b, base_op, offset_op), const_op); 1685} 1686 1687/* calculate a `(1 << value) - 1` in ssa without overflows */ 1688static inline nir_ssa_def * 1689nir_mask(nir_builder *b, nir_ssa_def *bits, unsigned dst_bit_size) 1690{ 1691 return nir_ushr(b, nir_imm_intN_t(b, -1, dst_bit_size), 1692 nir_isub_imm(b, dst_bit_size, nir_u2u32(b, bits))); 1693} 1694 1695static inline nir_ssa_def * 1696nir_f2b(nir_builder *build, nir_ssa_def *f) 1697{ 1698 return nir_f2b1(build, f); 1699} 1700 1701static inline nir_ssa_def * 1702nir_i2b(nir_builder *build, nir_ssa_def *i) 1703{ 1704 return nir_i2b1(build, i); 1705} 1706 1707static inline nir_ssa_def * 1708nir_b2f(nir_builder *build, nir_ssa_def *b, uint32_t bit_size) 1709{ 1710 switch (bit_size) { 1711 case 64: return nir_b2f64(build, b); 1712 case 32: return nir_b2f32(build, b); 1713 case 16: return nir_b2f16(build, b); 1714 default: 1715 unreachable("Invalid bit-size"); 1716 }; 1717} 1718 1719static inline nir_ssa_def * 1720nir_b2i(nir_builder *build, nir_ssa_def *b, uint32_t bit_size) 1721{ 1722 switch (bit_size) { 1723 case 64: return nir_b2i64(build, b); 1724 case 32: return nir_b2i32(build, b); 1725 case 16: return nir_b2i16(build, b); 1726 case 8: return nir_b2i8(build, b); 1727 default: 1728 unreachable("Invalid bit-size"); 1729 }; 1730} 1731static inline nir_ssa_def * 1732nir_load_barycentric(nir_builder *build, nir_intrinsic_op op, 1733 unsigned interp_mode) 1734{ 1735 unsigned num_components = op == nir_intrinsic_load_barycentric_model ? 3 : 2; 1736 nir_intrinsic_instr *bary = nir_intrinsic_instr_create(build->shader, op); 1737 nir_ssa_dest_init(&bary->instr, &bary->dest, num_components, 32, NULL); 1738 nir_intrinsic_set_interp_mode(bary, interp_mode); 1739 nir_builder_instr_insert(build, &bary->instr); 1740 return &bary->dest.ssa; 1741} 1742 1743static inline void 1744nir_jump(nir_builder *build, nir_jump_type jump_type) 1745{ 1746 assert(jump_type != nir_jump_goto && jump_type != nir_jump_goto_if); 1747 nir_jump_instr *jump = nir_jump_instr_create(build->shader, jump_type); 1748 nir_builder_instr_insert(build, &jump->instr); 1749} 1750 1751static inline void 1752nir_goto(nir_builder *build, struct nir_block *target) 1753{ 1754 assert(!build->impl->structured); 1755 nir_jump_instr *jump = nir_jump_instr_create(build->shader, nir_jump_goto); 1756 jump->target = target; 1757 nir_builder_instr_insert(build, &jump->instr); 1758} 1759 1760static inline void 1761nir_goto_if(nir_builder *build, struct nir_block *target, nir_src cond, 1762 struct nir_block *else_target) 1763{ 1764 assert(!build->impl->structured); 1765 nir_jump_instr *jump = nir_jump_instr_create(build->shader, nir_jump_goto_if); 1766 jump->condition = cond; 1767 jump->target = target; 1768 jump->else_target = else_target; 1769 nir_builder_instr_insert(build, &jump->instr); 1770} 1771 1772static inline nir_ssa_def * 1773nir_compare_func(nir_builder *b, enum compare_func func, 1774 nir_ssa_def *src0, nir_ssa_def *src1) 1775{ 1776 switch (func) { 1777 case COMPARE_FUNC_NEVER: 1778 return nir_imm_int(b, 0); 1779 case COMPARE_FUNC_ALWAYS: 1780 return nir_imm_int(b, ~0); 1781 case COMPARE_FUNC_EQUAL: 1782 return nir_feq(b, src0, src1); 1783 case COMPARE_FUNC_NOTEQUAL: 1784 return nir_fneu(b, src0, src1); 1785 case COMPARE_FUNC_GREATER: 1786 return nir_flt(b, src1, src0); 1787 case COMPARE_FUNC_GEQUAL: 1788 return nir_fge(b, src0, src1); 1789 case COMPARE_FUNC_LESS: 1790 return nir_flt(b, src0, src1); 1791 case COMPARE_FUNC_LEQUAL: 1792 return nir_fge(b, src1, src0); 1793 } 1794 unreachable("bad compare func"); 1795} 1796 1797static inline void 1798nir_scoped_memory_barrier(nir_builder *b, 1799 nir_scope scope, 1800 nir_memory_semantics semantics, 1801 nir_variable_mode modes) 1802{ 1803 nir_scoped_barrier(b, NIR_SCOPE_NONE, scope, semantics, modes); 1804} 1805 1806static inline nir_ssa_def * 1807nir_type_convert(nir_builder *b, 1808 nir_ssa_def *src, 1809 nir_alu_type src_type, 1810 nir_alu_type dest_type) 1811{ 1812 assert(nir_alu_type_get_type_size(src_type) == 0 || 1813 nir_alu_type_get_type_size(src_type) == src->bit_size); 1814 1815 src_type = (nir_alu_type) (src_type | src->bit_size); 1816 1817 nir_op opcode = 1818 nir_type_conversion_op(src_type, dest_type, nir_rounding_mode_undef); 1819 1820 return nir_build_alu(b, opcode, src, NULL, NULL, NULL); 1821} 1822 1823static inline nir_ssa_def * 1824nir_convert_to_bit_size(nir_builder *b, 1825 nir_ssa_def *src, 1826 nir_alu_type type, 1827 unsigned bit_size) 1828{ 1829 return nir_type_convert(b, src, type, (nir_alu_type) (type | bit_size)); 1830} 1831 1832static inline nir_ssa_def * 1833nir_i2iN(nir_builder *b, nir_ssa_def *src, unsigned bit_size) 1834{ 1835 return nir_convert_to_bit_size(b, src, nir_type_int, bit_size); 1836} 1837 1838static inline nir_ssa_def * 1839nir_u2uN(nir_builder *b, nir_ssa_def *src, unsigned bit_size) 1840{ 1841 return nir_convert_to_bit_size(b, src, nir_type_uint, bit_size); 1842} 1843 1844static inline nir_ssa_def * 1845nir_b2bN(nir_builder *b, nir_ssa_def *src, unsigned bit_size) 1846{ 1847 return nir_convert_to_bit_size(b, src, nir_type_bool, bit_size); 1848} 1849 1850static inline nir_ssa_def * 1851nir_f2fN(nir_builder *b, nir_ssa_def *src, unsigned bit_size) 1852{ 1853 return nir_convert_to_bit_size(b, src, nir_type_float, bit_size); 1854} 1855 1856static inline nir_ssa_def * 1857nir_i2fN(nir_builder *b, nir_ssa_def *src, unsigned bit_size) 1858{ 1859 return nir_type_convert(b, src, nir_type_int, 1860 (nir_alu_type) (nir_type_float | bit_size)); 1861} 1862 1863static inline nir_ssa_def * 1864nir_u2fN(nir_builder *b, nir_ssa_def *src, unsigned bit_size) 1865{ 1866 return nir_type_convert(b, src, nir_type_uint, 1867 (nir_alu_type) (nir_type_float | bit_size)); 1868} 1869 1870static inline nir_ssa_def * 1871nir_f2uN(nir_builder *b, nir_ssa_def *src, unsigned bit_size) 1872{ 1873 return nir_type_convert(b, src, nir_type_float, 1874 (nir_alu_type) (nir_type_uint | bit_size)); 1875} 1876 1877static inline nir_ssa_def * 1878nir_f2iN(nir_builder *b, nir_ssa_def *src, unsigned bit_size) 1879{ 1880 return nir_type_convert(b, src, nir_type_float, 1881 (nir_alu_type) (nir_type_int | bit_size)); 1882} 1883 1884#endif /* NIR_BUILDER_H */ 1885