1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * Jason Ekstrand (jason@jlekstrand.net) 26 * 27 */ 28 29/* 30 * This lowering pass converts references to input/output variables with 31 * loads/stores to actual input/output intrinsics. 32 */ 33 34#include "nir.h" 35#include "nir_builder.h" 36#include "nir_deref.h" 37 38struct lower_io_state { 39 void *dead_ctx; 40 nir_builder builder; 41 int (*type_size)(const struct glsl_type *type, bool); 42 nir_variable_mode modes; 43 nir_lower_io_options options; 44}; 45 46static nir_intrinsic_op 47ssbo_atomic_for_deref(nir_intrinsic_op deref_op) 48{ 49 switch (deref_op) { 50#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O; 51 OP(atomic_exchange) 52 OP(atomic_comp_swap) 53 OP(atomic_add) 54 OP(atomic_imin) 55 OP(atomic_umin) 56 OP(atomic_imax) 57 OP(atomic_umax) 58 OP(atomic_and) 59 OP(atomic_or) 60 OP(atomic_xor) 61 OP(atomic_fadd) 62 OP(atomic_fmin) 63 OP(atomic_fmax) 64 OP(atomic_fcomp_swap) 65#undef OP 66 default: 67 unreachable("Invalid SSBO atomic"); 68 } 69} 70 71static nir_intrinsic_op 72global_atomic_for_deref(nir_intrinsic_op deref_op) 73{ 74 switch (deref_op) { 75#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O; 76 OP(atomic_exchange) 77 OP(atomic_comp_swap) 78 OP(atomic_add) 79 OP(atomic_imin) 80 OP(atomic_umin) 81 OP(atomic_imax) 82 OP(atomic_umax) 83 OP(atomic_and) 84 OP(atomic_or) 85 OP(atomic_xor) 86 OP(atomic_fadd) 87 OP(atomic_fmin) 88 OP(atomic_fmax) 89 OP(atomic_fcomp_swap) 90#undef OP 91 default: 92 unreachable("Invalid SSBO atomic"); 93 } 94} 95 96void 97nir_assign_var_locations(struct exec_list *var_list, unsigned *size, 98 int (*type_size)(const struct glsl_type *, bool)) 99{ 100 unsigned location = 0; 101 102 nir_foreach_variable(var, var_list) { 103 /* 104 * UBOs have their own address spaces, so don't count them towards the 105 * number of global uniforms 106 */ 107 if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_mem_ssbo) 108 continue; 109 110 var->data.driver_location = location; 111 bool bindless_type_size = var->data.mode == nir_var_shader_in || 112 var->data.mode == nir_var_shader_out || 113 var->data.bindless; 114 location += type_size(var->type, bindless_type_size); 115 } 116 117 *size = location; 118} 119 120/** 121 * Return true if the given variable is a per-vertex input/output array. 122 * (such as geometry shader inputs). 123 */ 124bool 125nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage) 126{ 127 if (var->data.patch || !glsl_type_is_array(var->type)) 128 return false; 129 130 if (var->data.mode == nir_var_shader_in) 131 return stage == MESA_SHADER_GEOMETRY || 132 stage == MESA_SHADER_TESS_CTRL || 133 stage == MESA_SHADER_TESS_EVAL; 134 135 if (var->data.mode == nir_var_shader_out) 136 return stage == MESA_SHADER_TESS_CTRL; 137 138 return false; 139} 140 141static nir_ssa_def * 142get_io_offset(nir_builder *b, nir_deref_instr *deref, 143 nir_ssa_def **vertex_index, 144 int (*type_size)(const struct glsl_type *, bool), 145 unsigned *component, bool bts) 146{ 147 nir_deref_path path; 148 nir_deref_path_init(&path, deref, NULL); 149 150 assert(path.path[0]->deref_type == nir_deref_type_var); 151 nir_deref_instr **p = &path.path[1]; 152 153 /* For per-vertex input arrays (i.e. geometry shader inputs), keep the 154 * outermost array index separate. Process the rest normally. 155 */ 156 if (vertex_index != NULL) { 157 assert((*p)->deref_type == nir_deref_type_array); 158 *vertex_index = nir_ssa_for_src(b, (*p)->arr.index, 1); 159 p++; 160 } 161 162 if (path.path[0]->var->data.compact) { 163 assert((*p)->deref_type == nir_deref_type_array); 164 assert(glsl_type_is_scalar((*p)->type)); 165 166 /* We always lower indirect dereferences for "compact" array vars. */ 167 const unsigned index = nir_src_as_uint((*p)->arr.index); 168 const unsigned total_offset = *component + index; 169 const unsigned slot_offset = total_offset / 4; 170 *component = total_offset % 4; 171 return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset); 172 } 173 174 /* Just emit code and let constant-folding go to town */ 175 nir_ssa_def *offset = nir_imm_int(b, 0); 176 177 for (; *p; p++) { 178 if ((*p)->deref_type == nir_deref_type_array) { 179 unsigned size = type_size((*p)->type, bts); 180 181 nir_ssa_def *mul = 182 nir_imul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size); 183 184 offset = nir_iadd(b, offset, mul); 185 } else if ((*p)->deref_type == nir_deref_type_struct) { 186 /* p starts at path[1], so this is safe */ 187 nir_deref_instr *parent = *(p - 1); 188 189 unsigned field_offset = 0; 190 for (unsigned i = 0; i < (*p)->strct.index; i++) { 191 field_offset += type_size(glsl_get_struct_field(parent->type, i), bts); 192 } 193 offset = nir_iadd_imm(b, offset, field_offset); 194 } else { 195 unreachable("Unsupported deref type"); 196 } 197 } 198 199 nir_deref_path_finish(&path); 200 201 return offset; 202} 203 204static nir_intrinsic_instr * 205lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, 206 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset, 207 unsigned component) 208{ 209 const nir_shader *nir = state->builder.shader; 210 nir_variable_mode mode = var->data.mode; 211 nir_ssa_def *barycentric = NULL; 212 213 nir_intrinsic_op op; 214 switch (mode) { 215 case nir_var_shader_in: 216 if (nir->info.stage == MESA_SHADER_FRAGMENT && 217 nir->options->use_interpolated_input_intrinsics && 218 var->data.interpolation != INTERP_MODE_FLAT) { 219 assert(vertex_index == NULL); 220 221 nir_intrinsic_op bary_op; 222 if (var->data.sample || 223 (state->options & nir_lower_io_force_sample_interpolation)) 224 bary_op = nir_intrinsic_load_barycentric_sample; 225 else if (var->data.centroid) 226 bary_op = nir_intrinsic_load_barycentric_centroid; 227 else 228 bary_op = nir_intrinsic_load_barycentric_pixel; 229 230 barycentric = nir_load_barycentric(&state->builder, bary_op, 231 var->data.interpolation); 232 op = nir_intrinsic_load_interpolated_input; 233 } else { 234 op = vertex_index ? nir_intrinsic_load_per_vertex_input : 235 nir_intrinsic_load_input; 236 } 237 break; 238 case nir_var_shader_out: 239 op = vertex_index ? nir_intrinsic_load_per_vertex_output : 240 nir_intrinsic_load_output; 241 break; 242 case nir_var_uniform: 243 op = nir_intrinsic_load_uniform; 244 break; 245 case nir_var_mem_shared: 246 op = nir_intrinsic_load_shared; 247 break; 248 default: 249 unreachable("Unknown variable mode"); 250 } 251 252 nir_intrinsic_instr *load = 253 nir_intrinsic_instr_create(state->builder.shader, op); 254 load->num_components = intrin->num_components; 255 256 nir_intrinsic_set_base(load, var->data.driver_location); 257 if (mode == nir_var_shader_in || mode == nir_var_shader_out) 258 nir_intrinsic_set_component(load, component); 259 260 if (load->intrinsic == nir_intrinsic_load_uniform) 261 nir_intrinsic_set_range(load, 262 state->type_size(var->type, var->data.bindless)); 263 264 if (vertex_index) { 265 load->src[0] = nir_src_for_ssa(vertex_index); 266 load->src[1] = nir_src_for_ssa(offset); 267 } else if (barycentric) { 268 load->src[0] = nir_src_for_ssa(barycentric); 269 load->src[1] = nir_src_for_ssa(offset); 270 } else { 271 load->src[0] = nir_src_for_ssa(offset); 272 } 273 274 return load; 275} 276 277static nir_intrinsic_instr * 278lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, 279 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset, 280 unsigned component) 281{ 282 nir_variable_mode mode = var->data.mode; 283 284 nir_intrinsic_op op; 285 if (mode == nir_var_mem_shared) { 286 op = nir_intrinsic_store_shared; 287 } else { 288 assert(mode == nir_var_shader_out); 289 op = vertex_index ? nir_intrinsic_store_per_vertex_output : 290 nir_intrinsic_store_output; 291 } 292 293 nir_intrinsic_instr *store = 294 nir_intrinsic_instr_create(state->builder.shader, op); 295 store->num_components = intrin->num_components; 296 297 nir_src_copy(&store->src[0], &intrin->src[1], store); 298 299 nir_intrinsic_set_base(store, var->data.driver_location); 300 301 if (mode == nir_var_shader_out) 302 nir_intrinsic_set_component(store, component); 303 304 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin)); 305 306 if (vertex_index) 307 store->src[1] = nir_src_for_ssa(vertex_index); 308 309 store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset); 310 311 return store; 312} 313 314static nir_intrinsic_instr * 315lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state, 316 nir_variable *var, nir_ssa_def *offset) 317{ 318 assert(var->data.mode == nir_var_mem_shared); 319 320 nir_intrinsic_op op; 321 switch (intrin->intrinsic) { 322#define OP(O) case nir_intrinsic_deref_##O: op = nir_intrinsic_shared_##O; break; 323 OP(atomic_exchange) 324 OP(atomic_comp_swap) 325 OP(atomic_add) 326 OP(atomic_imin) 327 OP(atomic_umin) 328 OP(atomic_imax) 329 OP(atomic_umax) 330 OP(atomic_and) 331 OP(atomic_or) 332 OP(atomic_xor) 333 OP(atomic_fadd) 334 OP(atomic_fmin) 335 OP(atomic_fmax) 336 OP(atomic_fcomp_swap) 337#undef OP 338 default: 339 unreachable("Invalid atomic"); 340 } 341 342 nir_intrinsic_instr *atomic = 343 nir_intrinsic_instr_create(state->builder.shader, op); 344 345 nir_intrinsic_set_base(atomic, var->data.driver_location); 346 347 atomic->src[0] = nir_src_for_ssa(offset); 348 assert(nir_intrinsic_infos[intrin->intrinsic].num_srcs == 349 nir_intrinsic_infos[op].num_srcs); 350 for (unsigned i = 1; i < nir_intrinsic_infos[op].num_srcs; i++) { 351 nir_src_copy(&atomic->src[i], &intrin->src[i], atomic); 352 } 353 354 return atomic; 355} 356 357static nir_intrinsic_instr * 358lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, 359 nir_variable *var, nir_ssa_def *offset, unsigned component) 360{ 361 assert(var->data.mode == nir_var_shader_in); 362 363 /* Ignore interpolateAt() for flat variables - flat is flat. */ 364 if (var->data.interpolation == INTERP_MODE_FLAT) 365 return lower_load(intrin, state, NULL, var, offset, component); 366 367 nir_intrinsic_op bary_op; 368 switch (intrin->intrinsic) { 369 case nir_intrinsic_interp_deref_at_centroid: 370 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ? 371 nir_intrinsic_load_barycentric_sample : 372 nir_intrinsic_load_barycentric_centroid; 373 break; 374 case nir_intrinsic_interp_deref_at_sample: 375 bary_op = nir_intrinsic_load_barycentric_at_sample; 376 break; 377 case nir_intrinsic_interp_deref_at_offset: 378 bary_op = nir_intrinsic_load_barycentric_at_offset; 379 break; 380 default: 381 unreachable("Bogus interpolateAt() intrinsic."); 382 } 383 384 nir_intrinsic_instr *bary_setup = 385 nir_intrinsic_instr_create(state->builder.shader, bary_op); 386 387 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL); 388 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation); 389 390 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample || 391 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset) 392 nir_src_copy(&bary_setup->src[0], &intrin->src[1], bary_setup); 393 394 nir_builder_instr_insert(&state->builder, &bary_setup->instr); 395 396 nir_intrinsic_instr *load = 397 nir_intrinsic_instr_create(state->builder.shader, 398 nir_intrinsic_load_interpolated_input); 399 load->num_components = intrin->num_components; 400 401 nir_intrinsic_set_base(load, var->data.driver_location); 402 nir_intrinsic_set_component(load, component); 403 404 load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa); 405 load->src[1] = nir_src_for_ssa(offset); 406 407 return load; 408} 409 410static bool 411nir_lower_io_block(nir_block *block, 412 struct lower_io_state *state) 413{ 414 nir_builder *b = &state->builder; 415 const nir_shader_compiler_options *options = b->shader->options; 416 bool progress = false; 417 418 nir_foreach_instr_safe(instr, block) { 419 if (instr->type != nir_instr_type_intrinsic) 420 continue; 421 422 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 423 424 switch (intrin->intrinsic) { 425 case nir_intrinsic_load_deref: 426 case nir_intrinsic_store_deref: 427 case nir_intrinsic_deref_atomic_add: 428 case nir_intrinsic_deref_atomic_imin: 429 case nir_intrinsic_deref_atomic_umin: 430 case nir_intrinsic_deref_atomic_imax: 431 case nir_intrinsic_deref_atomic_umax: 432 case nir_intrinsic_deref_atomic_and: 433 case nir_intrinsic_deref_atomic_or: 434 case nir_intrinsic_deref_atomic_xor: 435 case nir_intrinsic_deref_atomic_exchange: 436 case nir_intrinsic_deref_atomic_comp_swap: 437 case nir_intrinsic_deref_atomic_fadd: 438 case nir_intrinsic_deref_atomic_fmin: 439 case nir_intrinsic_deref_atomic_fmax: 440 case nir_intrinsic_deref_atomic_fcomp_swap: 441 /* We can lower the io for this nir instrinsic */ 442 break; 443 case nir_intrinsic_interp_deref_at_centroid: 444 case nir_intrinsic_interp_deref_at_sample: 445 case nir_intrinsic_interp_deref_at_offset: 446 /* We can optionally lower these to load_interpolated_input */ 447 if (options->use_interpolated_input_intrinsics) 448 break; 449 default: 450 /* We can't lower the io for this nir instrinsic, so skip it */ 451 continue; 452 } 453 454 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 455 456 nir_variable *var = nir_deref_instr_get_variable(deref); 457 nir_variable_mode mode = var->data.mode; 458 459 if ((state->modes & mode) == 0) 460 continue; 461 462 if (mode != nir_var_shader_in && 463 mode != nir_var_shader_out && 464 mode != nir_var_mem_shared && 465 mode != nir_var_uniform) 466 continue; 467 468 b->cursor = nir_before_instr(instr); 469 470 const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage); 471 472 nir_ssa_def *offset; 473 nir_ssa_def *vertex_index = NULL; 474 unsigned component_offset = var->data.location_frac; 475 bool bindless_type_size = mode == nir_var_shader_in || 476 mode == nir_var_shader_out || 477 var->data.bindless; 478 479 offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL, 480 state->type_size, &component_offset, 481 bindless_type_size); 482 483 nir_intrinsic_instr *replacement; 484 485 switch (intrin->intrinsic) { 486 case nir_intrinsic_load_deref: 487 replacement = lower_load(intrin, state, vertex_index, var, offset, 488 component_offset); 489 break; 490 491 case nir_intrinsic_store_deref: 492 replacement = lower_store(intrin, state, vertex_index, var, offset, 493 component_offset); 494 break; 495 496 case nir_intrinsic_deref_atomic_add: 497 case nir_intrinsic_deref_atomic_imin: 498 case nir_intrinsic_deref_atomic_umin: 499 case nir_intrinsic_deref_atomic_imax: 500 case nir_intrinsic_deref_atomic_umax: 501 case nir_intrinsic_deref_atomic_and: 502 case nir_intrinsic_deref_atomic_or: 503 case nir_intrinsic_deref_atomic_xor: 504 case nir_intrinsic_deref_atomic_exchange: 505 case nir_intrinsic_deref_atomic_comp_swap: 506 case nir_intrinsic_deref_atomic_fadd: 507 case nir_intrinsic_deref_atomic_fmin: 508 case nir_intrinsic_deref_atomic_fmax: 509 case nir_intrinsic_deref_atomic_fcomp_swap: 510 assert(vertex_index == NULL); 511 replacement = lower_atomic(intrin, state, var, offset); 512 break; 513 514 case nir_intrinsic_interp_deref_at_centroid: 515 case nir_intrinsic_interp_deref_at_sample: 516 case nir_intrinsic_interp_deref_at_offset: 517 assert(vertex_index == NULL); 518 replacement = lower_interpolate_at(intrin, state, var, offset, 519 component_offset); 520 break; 521 522 default: 523 continue; 524 } 525 526 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) { 527 if (intrin->dest.is_ssa) { 528 nir_ssa_dest_init(&replacement->instr, &replacement->dest, 529 intrin->dest.ssa.num_components, 530 intrin->dest.ssa.bit_size, NULL); 531 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 532 nir_src_for_ssa(&replacement->dest.ssa)); 533 } else { 534 nir_dest_copy(&replacement->dest, &intrin->dest, &intrin->instr); 535 } 536 } 537 538 nir_instr_insert_before(&intrin->instr, &replacement->instr); 539 nir_instr_remove(&intrin->instr); 540 progress = true; 541 } 542 543 return progress; 544} 545 546static bool 547nir_lower_io_impl(nir_function_impl *impl, 548 nir_variable_mode modes, 549 int (*type_size)(const struct glsl_type *, bool), 550 nir_lower_io_options options) 551{ 552 struct lower_io_state state; 553 bool progress = false; 554 555 nir_builder_init(&state.builder, impl); 556 state.dead_ctx = ralloc_context(NULL); 557 state.modes = modes; 558 state.type_size = type_size; 559 state.options = options; 560 561 nir_foreach_block(block, impl) { 562 progress |= nir_lower_io_block(block, &state); 563 } 564 565 ralloc_free(state.dead_ctx); 566 567 nir_metadata_preserve(impl, nir_metadata_block_index | 568 nir_metadata_dominance); 569 return progress; 570} 571 572bool 573nir_lower_io(nir_shader *shader, nir_variable_mode modes, 574 int (*type_size)(const struct glsl_type *, bool), 575 nir_lower_io_options options) 576{ 577 bool progress = false; 578 579 nir_foreach_function(function, shader) { 580 if (function->impl) { 581 progress |= nir_lower_io_impl(function->impl, modes, 582 type_size, options); 583 } 584 } 585 586 return progress; 587} 588 589static unsigned 590type_scalar_size_bytes(const struct glsl_type *type) 591{ 592 assert(glsl_type_is_vector_or_scalar(type) || 593 glsl_type_is_matrix(type)); 594 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; 595} 596 597static nir_ssa_def * 598build_addr_iadd(nir_builder *b, nir_ssa_def *addr, 599 nir_address_format addr_format, nir_ssa_def *offset) 600{ 601 assert(offset->num_components == 1); 602 assert(addr->bit_size == offset->bit_size); 603 604 switch (addr_format) { 605 case nir_address_format_32bit_global: 606 case nir_address_format_64bit_global: 607 assert(addr->num_components == 1); 608 return nir_iadd(b, addr, offset); 609 610 case nir_address_format_64bit_bounded_global: 611 assert(addr->num_components == 4); 612 return nir_vec4(b, nir_channel(b, addr, 0), 613 nir_channel(b, addr, 1), 614 nir_channel(b, addr, 2), 615 nir_iadd(b, nir_channel(b, addr, 3), offset)); 616 617 case nir_address_format_32bit_index_offset: 618 assert(addr->num_components == 2); 619 return nir_vec2(b, nir_channel(b, addr, 0), 620 nir_iadd(b, nir_channel(b, addr, 1), offset)); 621 } 622 unreachable("Invalid address format"); 623} 624 625static nir_ssa_def * 626build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr, 627 nir_address_format addr_format, int64_t offset) 628{ 629 return build_addr_iadd(b, addr, addr_format, 630 nir_imm_intN_t(b, offset, addr->bit_size)); 631} 632 633static nir_ssa_def * 634addr_to_index(nir_builder *b, nir_ssa_def *addr, 635 nir_address_format addr_format) 636{ 637 assert(addr_format == nir_address_format_32bit_index_offset); 638 assert(addr->num_components == 2); 639 return nir_channel(b, addr, 0); 640} 641 642static nir_ssa_def * 643addr_to_offset(nir_builder *b, nir_ssa_def *addr, 644 nir_address_format addr_format) 645{ 646 assert(addr_format == nir_address_format_32bit_index_offset); 647 assert(addr->num_components == 2); 648 return nir_channel(b, addr, 1); 649} 650 651/** Returns true if the given address format resolves to a global address */ 652static bool 653addr_format_is_global(nir_address_format addr_format) 654{ 655 return addr_format == nir_address_format_32bit_global || 656 addr_format == nir_address_format_64bit_global || 657 addr_format == nir_address_format_64bit_bounded_global; 658} 659 660static nir_ssa_def * 661addr_to_global(nir_builder *b, nir_ssa_def *addr, 662 nir_address_format addr_format) 663{ 664 switch (addr_format) { 665 case nir_address_format_32bit_global: 666 case nir_address_format_64bit_global: 667 assert(addr->num_components == 1); 668 return addr; 669 670 case nir_address_format_64bit_bounded_global: 671 assert(addr->num_components == 4); 672 return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)), 673 nir_u2u64(b, nir_channel(b, addr, 3))); 674 675 case nir_address_format_32bit_index_offset: 676 unreachable("Cannot get a 64-bit address with this address format"); 677 } 678 679 unreachable("Invalid address format"); 680} 681 682static bool 683addr_format_needs_bounds_check(nir_address_format addr_format) 684{ 685 return addr_format == nir_address_format_64bit_bounded_global; 686} 687 688static nir_ssa_def * 689addr_is_in_bounds(nir_builder *b, nir_ssa_def *addr, 690 nir_address_format addr_format, unsigned size) 691{ 692 assert(addr_format == nir_address_format_64bit_bounded_global); 693 assert(addr->num_components == 4); 694 return nir_ige(b, nir_channel(b, addr, 2), 695 nir_iadd_imm(b, nir_channel(b, addr, 3), size)); 696} 697 698static nir_ssa_def * 699build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, 700 nir_ssa_def *addr, nir_address_format addr_format, 701 unsigned num_components) 702{ 703 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode; 704 705 nir_intrinsic_op op; 706 switch (mode) { 707 case nir_var_mem_ubo: 708 op = nir_intrinsic_load_ubo; 709 break; 710 case nir_var_mem_ssbo: 711 if (addr_format_is_global(addr_format)) 712 op = nir_intrinsic_load_global; 713 else 714 op = nir_intrinsic_load_ssbo; 715 break; 716 case nir_var_mem_global: 717 assert(addr_format_is_global(addr_format)); 718 op = nir_intrinsic_load_global; 719 break; 720 case nir_var_shader_in: 721 assert(addr_format_is_global(addr_format)); 722 op = nir_intrinsic_load_kernel_input; 723 break; 724 default: 725 unreachable("Unsupported explicit IO variable mode"); 726 } 727 728 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); 729 730 if (addr_format_is_global(addr_format)) { 731 load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 732 } else { 733 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 734 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 735 } 736 737 if (mode != nir_var_mem_ubo && mode != nir_var_shader_in) 738 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin)); 739 740 /* TODO: We should try and provide a better alignment. For OpenCL, we need 741 * to plumb the alignment through from SPIR-V when we have one. 742 */ 743 nir_intrinsic_set_align(load, intrin->dest.ssa.bit_size / 8, 0); 744 745 assert(intrin->dest.is_ssa); 746 load->num_components = num_components; 747 nir_ssa_dest_init(&load->instr, &load->dest, num_components, 748 intrin->dest.ssa.bit_size, intrin->dest.ssa.name); 749 750 assert(load->dest.ssa.bit_size % 8 == 0); 751 752 if (addr_format_needs_bounds_check(addr_format)) { 753 /* The Vulkan spec for robustBufferAccess gives us quite a few options 754 * as to what we can do with an OOB read. Unfortunately, returning 755 * undefined values isn't one of them so we return an actual zero. 756 */ 757 nir_ssa_def *zero = nir_imm_zero(b, load->num_components, 758 load->dest.ssa.bit_size); 759 760 const unsigned load_size = 761 (load->dest.ssa.bit_size / 8) * load->num_components; 762 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size)); 763 764 nir_builder_instr_insert(b, &load->instr); 765 766 nir_pop_if(b, NULL); 767 768 return nir_if_phi(b, &load->dest.ssa, zero); 769 } else { 770 nir_builder_instr_insert(b, &load->instr); 771 return &load->dest.ssa; 772 } 773} 774 775static void 776build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, 777 nir_ssa_def *addr, nir_address_format addr_format, 778 nir_ssa_def *value, nir_component_mask_t write_mask) 779{ 780 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode; 781 782 nir_intrinsic_op op; 783 switch (mode) { 784 case nir_var_mem_ssbo: 785 if (addr_format_is_global(addr_format)) 786 op = nir_intrinsic_store_global; 787 else 788 op = nir_intrinsic_store_ssbo; 789 break; 790 case nir_var_mem_global: 791 assert(addr_format_is_global(addr_format)); 792 op = nir_intrinsic_store_global; 793 break; 794 default: 795 unreachable("Unsupported explicit IO variable mode"); 796 } 797 798 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); 799 800 store->src[0] = nir_src_for_ssa(value); 801 if (addr_format_is_global(addr_format)) { 802 store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 803 } else { 804 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 805 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 806 } 807 808 nir_intrinsic_set_write_mask(store, write_mask); 809 810 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin)); 811 812 /* TODO: We should try and provide a better alignment. For OpenCL, we need 813 * to plumb the alignment through from SPIR-V when we have one. 814 */ 815 nir_intrinsic_set_align(store, value->bit_size / 8, 0); 816 817 assert(value->num_components == 1 || 818 value->num_components == intrin->num_components); 819 store->num_components = value->num_components; 820 821 assert(value->bit_size % 8 == 0); 822 823 if (addr_format_needs_bounds_check(addr_format)) { 824 const unsigned store_size = (value->bit_size / 8) * store->num_components; 825 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size)); 826 827 nir_builder_instr_insert(b, &store->instr); 828 829 nir_pop_if(b, NULL); 830 } else { 831 nir_builder_instr_insert(b, &store->instr); 832 } 833} 834 835static nir_ssa_def * 836build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin, 837 nir_ssa_def *addr, nir_address_format addr_format) 838{ 839 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode; 840 const unsigned num_data_srcs = 841 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1; 842 843 nir_intrinsic_op op; 844 switch (mode) { 845 case nir_var_mem_ssbo: 846 if (addr_format_is_global(addr_format)) 847 op = global_atomic_for_deref(intrin->intrinsic); 848 else 849 op = ssbo_atomic_for_deref(intrin->intrinsic); 850 break; 851 case nir_var_mem_global: 852 assert(addr_format_is_global(addr_format)); 853 op = global_atomic_for_deref(intrin->intrinsic); 854 break; 855 default: 856 unreachable("Unsupported explicit IO variable mode"); 857 } 858 859 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op); 860 861 unsigned src = 0; 862 if (addr_format_is_global(addr_format)) { 863 atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 864 } else { 865 atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 866 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 867 } 868 for (unsigned i = 0; i < num_data_srcs; i++) { 869 atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa); 870 } 871 872 /* Global atomics don't have access flags because they assume that the 873 * address may be non-uniform. 874 */ 875 if (!addr_format_is_global(addr_format)) 876 nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin)); 877 878 assert(intrin->dest.ssa.num_components == 1); 879 nir_ssa_dest_init(&atomic->instr, &atomic->dest, 880 1, intrin->dest.ssa.bit_size, intrin->dest.ssa.name); 881 882 assert(atomic->dest.ssa.bit_size % 8 == 0); 883 884 if (addr_format_needs_bounds_check(addr_format)) { 885 const unsigned atomic_size = atomic->dest.ssa.bit_size / 8; 886 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size)); 887 888 nir_builder_instr_insert(b, &atomic->instr); 889 890 nir_pop_if(b, NULL); 891 return nir_if_phi(b, &atomic->dest.ssa, 892 nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size)); 893 } else { 894 nir_builder_instr_insert(b, &atomic->instr); 895 return &atomic->dest.ssa; 896 } 897} 898 899nir_ssa_def * 900nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref, 901 nir_ssa_def *base_addr, 902 nir_address_format addr_format) 903{ 904 assert(deref->dest.is_ssa); 905 switch (deref->deref_type) { 906 case nir_deref_type_var: 907 assert(deref->mode == nir_var_shader_in); 908 return nir_imm_intN_t(b, deref->var->data.driver_location, 909 deref->dest.ssa.bit_size); 910 911 case nir_deref_type_array: { 912 nir_deref_instr *parent = nir_deref_instr_parent(deref); 913 914 unsigned stride = glsl_get_explicit_stride(parent->type); 915 if ((glsl_type_is_matrix(parent->type) && 916 glsl_matrix_type_is_row_major(parent->type)) || 917 (glsl_type_is_vector(parent->type) && stride == 0)) 918 stride = type_scalar_size_bytes(parent->type); 919 920 assert(stride > 0); 921 922 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); 923 index = nir_i2i(b, index, base_addr->bit_size); 924 return build_addr_iadd(b, base_addr, addr_format, 925 nir_imul_imm(b, index, stride)); 926 } 927 928 case nir_deref_type_ptr_as_array: { 929 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); 930 index = nir_i2i(b, index, base_addr->bit_size); 931 unsigned stride = nir_deref_instr_ptr_as_array_stride(deref); 932 return build_addr_iadd(b, base_addr, addr_format, 933 nir_imul_imm(b, index, stride)); 934 } 935 936 case nir_deref_type_array_wildcard: 937 unreachable("Wildcards should be lowered by now"); 938 break; 939 940 case nir_deref_type_struct: { 941 nir_deref_instr *parent = nir_deref_instr_parent(deref); 942 int offset = glsl_get_struct_field_offset(parent->type, 943 deref->strct.index); 944 assert(offset >= 0); 945 return build_addr_iadd_imm(b, base_addr, addr_format, offset); 946 } 947 948 case nir_deref_type_cast: 949 /* Nothing to do here */ 950 return base_addr; 951 } 952 953 unreachable("Invalid NIR deref type"); 954} 955 956void 957nir_lower_explicit_io_instr(nir_builder *b, 958 nir_intrinsic_instr *intrin, 959 nir_ssa_def *addr, 960 nir_address_format addr_format) 961{ 962 b->cursor = nir_after_instr(&intrin->instr); 963 964 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 965 unsigned vec_stride = glsl_get_explicit_stride(deref->type); 966 unsigned scalar_size = type_scalar_size_bytes(deref->type); 967 assert(vec_stride == 0 || glsl_type_is_vector(deref->type)); 968 assert(vec_stride == 0 || vec_stride >= scalar_size); 969 970 if (intrin->intrinsic == nir_intrinsic_load_deref) { 971 nir_ssa_def *value; 972 if (vec_stride > scalar_size) { 973 nir_ssa_def *comps[4] = { NULL, }; 974 for (unsigned i = 0; i < intrin->num_components; i++) { 975 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format, 976 vec_stride * i); 977 comps[i] = build_explicit_io_load(b, intrin, comp_addr, 978 addr_format, 1); 979 } 980 value = nir_vec(b, comps, intrin->num_components); 981 } else { 982 value = build_explicit_io_load(b, intrin, addr, addr_format, 983 intrin->num_components); 984 } 985 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value)); 986 } else if (intrin->intrinsic == nir_intrinsic_store_deref) { 987 assert(intrin->src[1].is_ssa); 988 nir_ssa_def *value = intrin->src[1].ssa; 989 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin); 990 if (vec_stride > scalar_size) { 991 for (unsigned i = 0; i < intrin->num_components; i++) { 992 if (!(write_mask & (1 << i))) 993 continue; 994 995 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format, 996 vec_stride * i); 997 build_explicit_io_store(b, intrin, comp_addr, addr_format, 998 nir_channel(b, value, i), 1); 999 } 1000 } else { 1001 build_explicit_io_store(b, intrin, addr, addr_format, 1002 value, write_mask); 1003 } 1004 } else { 1005 nir_ssa_def *value = 1006 build_explicit_io_atomic(b, intrin, addr, addr_format); 1007 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value)); 1008 } 1009 1010 nir_instr_remove(&intrin->instr); 1011} 1012 1013static void 1014lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref, 1015 nir_address_format addr_format) 1016{ 1017 /* Just delete the deref if it's not used. We can't use 1018 * nir_deref_instr_remove_if_unused here because it may remove more than 1019 * one deref which could break our list walking since we walk the list 1020 * backwards. 1021 */ 1022 assert(list_empty(&deref->dest.ssa.if_uses)); 1023 if (list_empty(&deref->dest.ssa.uses)) { 1024 nir_instr_remove(&deref->instr); 1025 return; 1026 } 1027 1028 b->cursor = nir_after_instr(&deref->instr); 1029 1030 nir_ssa_def *base_addr = NULL; 1031 if (deref->deref_type != nir_deref_type_var) { 1032 assert(deref->parent.is_ssa); 1033 base_addr = deref->parent.ssa; 1034 } 1035 1036 nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr, 1037 addr_format); 1038 1039 nir_instr_remove(&deref->instr); 1040 nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(addr)); 1041} 1042 1043static void 1044lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin, 1045 nir_address_format addr_format) 1046{ 1047 assert(intrin->src[0].is_ssa); 1048 nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format); 1049} 1050 1051static void 1052lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin, 1053 nir_address_format addr_format) 1054{ 1055 b->cursor = nir_after_instr(&intrin->instr); 1056 1057 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1058 1059 assert(glsl_type_is_array(deref->type)); 1060 assert(glsl_get_length(deref->type) == 0); 1061 unsigned stride = glsl_get_explicit_stride(deref->type); 1062 assert(stride > 0); 1063 1064 assert(addr_format == nir_address_format_32bit_index_offset); 1065 nir_ssa_def *addr = &deref->dest.ssa; 1066 nir_ssa_def *index = addr_to_index(b, addr, addr_format); 1067 nir_ssa_def *offset = addr_to_offset(b, addr, addr_format); 1068 1069 nir_intrinsic_instr *bsize = 1070 nir_intrinsic_instr_create(b->shader, nir_intrinsic_get_buffer_size); 1071 bsize->src[0] = nir_src_for_ssa(index); 1072 nir_ssa_dest_init(&bsize->instr, &bsize->dest, 1, 32, NULL); 1073 nir_builder_instr_insert(b, &bsize->instr); 1074 1075 nir_ssa_def *arr_size = 1076 nir_idiv(b, nir_isub(b, &bsize->dest.ssa, offset), 1077 nir_imm_int(b, stride)); 1078 1079 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(arr_size)); 1080 nir_instr_remove(&intrin->instr); 1081} 1082 1083static bool 1084nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes, 1085 nir_address_format addr_format) 1086{ 1087 bool progress = false; 1088 1089 nir_builder b; 1090 nir_builder_init(&b, impl); 1091 1092 /* Walk in reverse order so that we can see the full deref chain when we 1093 * lower the access operations. We lower them assuming that the derefs 1094 * will be turned into address calculations later. 1095 */ 1096 nir_foreach_block_reverse(block, impl) { 1097 nir_foreach_instr_reverse_safe(instr, block) { 1098 switch (instr->type) { 1099 case nir_instr_type_deref: { 1100 nir_deref_instr *deref = nir_instr_as_deref(instr); 1101 if (deref->mode & modes) { 1102 lower_explicit_io_deref(&b, deref, addr_format); 1103 progress = true; 1104 } 1105 break; 1106 } 1107 1108 case nir_instr_type_intrinsic: { 1109 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1110 switch (intrin->intrinsic) { 1111 case nir_intrinsic_load_deref: 1112 case nir_intrinsic_store_deref: 1113 case nir_intrinsic_deref_atomic_add: 1114 case nir_intrinsic_deref_atomic_imin: 1115 case nir_intrinsic_deref_atomic_umin: 1116 case nir_intrinsic_deref_atomic_imax: 1117 case nir_intrinsic_deref_atomic_umax: 1118 case nir_intrinsic_deref_atomic_and: 1119 case nir_intrinsic_deref_atomic_or: 1120 case nir_intrinsic_deref_atomic_xor: 1121 case nir_intrinsic_deref_atomic_exchange: 1122 case nir_intrinsic_deref_atomic_comp_swap: 1123 case nir_intrinsic_deref_atomic_fadd: 1124 case nir_intrinsic_deref_atomic_fmin: 1125 case nir_intrinsic_deref_atomic_fmax: 1126 case nir_intrinsic_deref_atomic_fcomp_swap: { 1127 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1128 if (deref->mode & modes) { 1129 lower_explicit_io_access(&b, intrin, addr_format); 1130 progress = true; 1131 } 1132 break; 1133 } 1134 1135 case nir_intrinsic_deref_buffer_array_length: { 1136 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1137 if (deref->mode & modes) { 1138 lower_explicit_io_array_length(&b, intrin, addr_format); 1139 progress = true; 1140 } 1141 break; 1142 } 1143 1144 default: 1145 break; 1146 } 1147 break; 1148 } 1149 1150 default: 1151 /* Nothing to do */ 1152 break; 1153 } 1154 } 1155 } 1156 1157 if (progress) { 1158 nir_metadata_preserve(impl, nir_metadata_block_index | 1159 nir_metadata_dominance); 1160 } 1161 1162 return progress; 1163} 1164 1165bool 1166nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes, 1167 nir_address_format addr_format) 1168{ 1169 bool progress = false; 1170 1171 nir_foreach_function(function, shader) { 1172 if (function->impl && 1173 nir_lower_explicit_io_impl(function->impl, modes, addr_format)) 1174 progress = true; 1175 } 1176 1177 return progress; 1178} 1179 1180/** 1181 * Return the offset source for a load/store intrinsic. 1182 */ 1183nir_src * 1184nir_get_io_offset_src(nir_intrinsic_instr *instr) 1185{ 1186 switch (instr->intrinsic) { 1187 case nir_intrinsic_load_input: 1188 case nir_intrinsic_load_output: 1189 case nir_intrinsic_load_shared: 1190 case nir_intrinsic_load_uniform: 1191 case nir_intrinsic_load_global: 1192 case nir_intrinsic_load_scratch: 1193 return &instr->src[0]; 1194 case nir_intrinsic_load_ubo: 1195 case nir_intrinsic_load_ssbo: 1196 case nir_intrinsic_load_per_vertex_input: 1197 case nir_intrinsic_load_per_vertex_output: 1198 case nir_intrinsic_load_interpolated_input: 1199 case nir_intrinsic_store_output: 1200 case nir_intrinsic_store_shared: 1201 case nir_intrinsic_store_global: 1202 case nir_intrinsic_store_scratch: 1203 return &instr->src[1]; 1204 case nir_intrinsic_store_ssbo: 1205 case nir_intrinsic_store_per_vertex_output: 1206 return &instr->src[2]; 1207 default: 1208 return NULL; 1209 } 1210} 1211 1212/** 1213 * Return the vertex index source for a load/store per_vertex intrinsic. 1214 */ 1215nir_src * 1216nir_get_io_vertex_index_src(nir_intrinsic_instr *instr) 1217{ 1218 switch (instr->intrinsic) { 1219 case nir_intrinsic_load_per_vertex_input: 1220 case nir_intrinsic_load_per_vertex_output: 1221 return &instr->src[0]; 1222 case nir_intrinsic_store_per_vertex_output: 1223 return &instr->src[1]; 1224 default: 1225 return NULL; 1226 } 1227} 1228