nir_lower_io.c revision 7ec681f3
1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * Jason Ekstrand (jason@jlekstrand.net) 26 * 27 */ 28 29/* 30 * This lowering pass converts references to input/output variables with 31 * loads/stores to actual input/output intrinsics. 32 */ 33 34#include "nir.h" 35#include "nir_builder.h" 36#include "nir_deref.h" 37 38#include "util/u_math.h" 39 40struct lower_io_state { 41 void *dead_ctx; 42 nir_builder builder; 43 int (*type_size)(const struct glsl_type *type, bool); 44 nir_variable_mode modes; 45 nir_lower_io_options options; 46}; 47 48static nir_intrinsic_op 49ssbo_atomic_for_deref(nir_intrinsic_op deref_op) 50{ 51 switch (deref_op) { 52#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O; 53 OP(atomic_exchange) 54 OP(atomic_comp_swap) 55 OP(atomic_add) 56 OP(atomic_imin) 57 OP(atomic_umin) 58 OP(atomic_imax) 59 OP(atomic_umax) 60 OP(atomic_and) 61 OP(atomic_or) 62 OP(atomic_xor) 63 OP(atomic_fadd) 64 OP(atomic_fmin) 65 OP(atomic_fmax) 66 OP(atomic_fcomp_swap) 67#undef OP 68 default: 69 unreachable("Invalid SSBO atomic"); 70 } 71} 72 73static nir_intrinsic_op 74global_atomic_for_deref(nir_intrinsic_op deref_op) 75{ 76 switch (deref_op) { 77#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O; 78 OP(atomic_exchange) 79 OP(atomic_comp_swap) 80 OP(atomic_add) 81 OP(atomic_imin) 82 OP(atomic_umin) 83 OP(atomic_imax) 84 OP(atomic_umax) 85 OP(atomic_and) 86 OP(atomic_or) 87 OP(atomic_xor) 88 OP(atomic_fadd) 89 OP(atomic_fmin) 90 OP(atomic_fmax) 91 OP(atomic_fcomp_swap) 92#undef OP 93 default: 94 unreachable("Invalid SSBO atomic"); 95 } 96} 97 98static nir_intrinsic_op 99shared_atomic_for_deref(nir_intrinsic_op deref_op) 100{ 101 switch (deref_op) { 102#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O; 103 OP(atomic_exchange) 104 OP(atomic_comp_swap) 105 OP(atomic_add) 106 OP(atomic_imin) 107 OP(atomic_umin) 108 OP(atomic_imax) 109 OP(atomic_umax) 110 OP(atomic_and) 111 OP(atomic_or) 112 OP(atomic_xor) 113 OP(atomic_fadd) 114 OP(atomic_fmin) 115 OP(atomic_fmax) 116 OP(atomic_fcomp_swap) 117#undef OP 118 default: 119 unreachable("Invalid shared atomic"); 120 } 121} 122 123void 124nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode, 125 unsigned *size, 126 int (*type_size)(const struct glsl_type *, bool)) 127{ 128 unsigned location = 0; 129 130 nir_foreach_variable_with_modes(var, shader, mode) { 131 var->data.driver_location = location; 132 bool bindless_type_size = var->data.mode == nir_var_shader_in || 133 var->data.mode == nir_var_shader_out || 134 var->data.bindless; 135 location += type_size(var->type, bindless_type_size); 136 } 137 138 *size = location; 139} 140 141/** 142 * Some inputs and outputs are arrayed, meaning that there is an extra level 143 * of array indexing to handle mismatches between the shader interface and the 144 * dispatch pattern of the shader. For instance, geometry shaders are 145 * executed per-primitive while their inputs and outputs are specified 146 * per-vertex so all inputs and outputs have to be additionally indexed with 147 * the vertex index within the primitive. 148 */ 149bool 150nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage) 151{ 152 if (var->data.patch || !glsl_type_is_array(var->type)) 153 return false; 154 155 if (var->data.mode == nir_var_shader_in) 156 return stage == MESA_SHADER_GEOMETRY || 157 stage == MESA_SHADER_TESS_CTRL || 158 stage == MESA_SHADER_TESS_EVAL; 159 160 if (var->data.mode == nir_var_shader_out) 161 return stage == MESA_SHADER_TESS_CTRL || 162 stage == MESA_SHADER_MESH; 163 164 return false; 165} 166 167static unsigned get_number_of_slots(struct lower_io_state *state, 168 const nir_variable *var) 169{ 170 const struct glsl_type *type = var->type; 171 172 if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) { 173 assert(glsl_type_is_array(type)); 174 type = glsl_get_array_element(type); 175 } 176 177 return state->type_size(type, var->data.bindless); 178} 179 180static nir_ssa_def * 181get_io_offset(nir_builder *b, nir_deref_instr *deref, 182 nir_ssa_def **array_index, 183 int (*type_size)(const struct glsl_type *, bool), 184 unsigned *component, bool bts) 185{ 186 nir_deref_path path; 187 nir_deref_path_init(&path, deref, NULL); 188 189 assert(path.path[0]->deref_type == nir_deref_type_var); 190 nir_deref_instr **p = &path.path[1]; 191 192 /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader 193 * inputs), skip the outermost array index. Process the rest normally. 194 */ 195 if (array_index != NULL) { 196 assert((*p)->deref_type == nir_deref_type_array); 197 *array_index = nir_ssa_for_src(b, (*p)->arr.index, 1); 198 p++; 199 } 200 201 if (path.path[0]->var->data.compact) { 202 assert((*p)->deref_type == nir_deref_type_array); 203 assert(glsl_type_is_scalar((*p)->type)); 204 205 /* We always lower indirect dereferences for "compact" array vars. */ 206 const unsigned index = nir_src_as_uint((*p)->arr.index); 207 const unsigned total_offset = *component + index; 208 const unsigned slot_offset = total_offset / 4; 209 *component = total_offset % 4; 210 return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset); 211 } 212 213 /* Just emit code and let constant-folding go to town */ 214 nir_ssa_def *offset = nir_imm_int(b, 0); 215 216 for (; *p; p++) { 217 if ((*p)->deref_type == nir_deref_type_array) { 218 unsigned size = type_size((*p)->type, bts); 219 220 nir_ssa_def *mul = 221 nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size); 222 223 offset = nir_iadd(b, offset, mul); 224 } else if ((*p)->deref_type == nir_deref_type_struct) { 225 /* p starts at path[1], so this is safe */ 226 nir_deref_instr *parent = *(p - 1); 227 228 unsigned field_offset = 0; 229 for (unsigned i = 0; i < (*p)->strct.index; i++) { 230 field_offset += type_size(glsl_get_struct_field(parent->type, i), bts); 231 } 232 offset = nir_iadd_imm(b, offset, field_offset); 233 } else { 234 unreachable("Unsupported deref type"); 235 } 236 } 237 238 nir_deref_path_finish(&path); 239 240 return offset; 241} 242 243static nir_ssa_def * 244emit_load(struct lower_io_state *state, 245 nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 246 unsigned component, unsigned num_components, unsigned bit_size, 247 nir_alu_type dest_type) 248{ 249 nir_builder *b = &state->builder; 250 const nir_shader *nir = b->shader; 251 nir_variable_mode mode = var->data.mode; 252 nir_ssa_def *barycentric = NULL; 253 254 nir_intrinsic_op op; 255 switch (mode) { 256 case nir_var_shader_in: 257 if (nir->info.stage == MESA_SHADER_FRAGMENT && 258 nir->options->use_interpolated_input_intrinsics && 259 var->data.interpolation != INTERP_MODE_FLAT && 260 !var->data.per_primitive) { 261 if (var->data.interpolation == INTERP_MODE_EXPLICIT) { 262 assert(array_index != NULL); 263 op = nir_intrinsic_load_input_vertex; 264 } else { 265 assert(array_index == NULL); 266 267 nir_intrinsic_op bary_op; 268 if (var->data.sample || 269 (state->options & nir_lower_io_force_sample_interpolation)) 270 bary_op = nir_intrinsic_load_barycentric_sample; 271 else if (var->data.centroid) 272 bary_op = nir_intrinsic_load_barycentric_centroid; 273 else 274 bary_op = nir_intrinsic_load_barycentric_pixel; 275 276 barycentric = nir_load_barycentric(&state->builder, bary_op, 277 var->data.interpolation); 278 op = nir_intrinsic_load_interpolated_input; 279 } 280 } else { 281 op = array_index ? nir_intrinsic_load_per_vertex_input : 282 nir_intrinsic_load_input; 283 } 284 break; 285 case nir_var_shader_out: 286 op = !array_index ? nir_intrinsic_load_output : 287 var->data.per_primitive ? nir_intrinsic_load_per_primitive_output : 288 nir_intrinsic_load_per_vertex_output; 289 break; 290 case nir_var_uniform: 291 op = nir_intrinsic_load_uniform; 292 break; 293 default: 294 unreachable("Unknown variable mode"); 295 } 296 297 nir_intrinsic_instr *load = 298 nir_intrinsic_instr_create(state->builder.shader, op); 299 load->num_components = num_components; 300 301 nir_intrinsic_set_base(load, var->data.driver_location); 302 if (mode == nir_var_shader_in || mode == nir_var_shader_out) 303 nir_intrinsic_set_component(load, component); 304 305 if (load->intrinsic == nir_intrinsic_load_uniform) 306 nir_intrinsic_set_range(load, 307 state->type_size(var->type, var->data.bindless)); 308 309 if (nir_intrinsic_has_access(load)) 310 nir_intrinsic_set_access(load, var->data.access); 311 312 nir_intrinsic_set_dest_type(load, dest_type); 313 314 if (load->intrinsic != nir_intrinsic_load_uniform) { 315 nir_io_semantics semantics = {0}; 316 semantics.location = var->data.location; 317 semantics.num_slots = get_number_of_slots(state, var); 318 semantics.fb_fetch_output = var->data.fb_fetch_output; 319 semantics.medium_precision = 320 var->data.precision == GLSL_PRECISION_MEDIUM || 321 var->data.precision == GLSL_PRECISION_LOW; 322 nir_intrinsic_set_io_semantics(load, semantics); 323 } 324 325 if (array_index) { 326 load->src[0] = nir_src_for_ssa(array_index); 327 load->src[1] = nir_src_for_ssa(offset); 328 } else if (barycentric) { 329 load->src[0] = nir_src_for_ssa(barycentric); 330 load->src[1] = nir_src_for_ssa(offset); 331 } else { 332 load->src[0] = nir_src_for_ssa(offset); 333 } 334 335 nir_ssa_dest_init(&load->instr, &load->dest, 336 num_components, bit_size, NULL); 337 nir_builder_instr_insert(b, &load->instr); 338 339 return &load->dest.ssa; 340} 341 342static nir_ssa_def * 343lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, 344 nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 345 unsigned component, const struct glsl_type *type) 346{ 347 assert(intrin->dest.is_ssa); 348 if (intrin->dest.ssa.bit_size == 64 && 349 (state->options & nir_lower_io_lower_64bit_to_32)) { 350 nir_builder *b = &state->builder; 351 352 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false); 353 354 nir_ssa_def *comp64[4]; 355 assert(component == 0 || component == 2); 356 unsigned dest_comp = 0; 357 while (dest_comp < intrin->dest.ssa.num_components) { 358 const unsigned num_comps = 359 MIN2(intrin->dest.ssa.num_components - dest_comp, 360 (4 - component) / 2); 361 362 nir_ssa_def *data32 = 363 emit_load(state, array_index, var, offset, component, 364 num_comps * 2, 32, nir_type_uint32); 365 for (unsigned i = 0; i < num_comps; i++) { 366 comp64[dest_comp + i] = 367 nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2))); 368 } 369 370 /* Only the first store has a component offset */ 371 component = 0; 372 dest_comp += num_comps; 373 offset = nir_iadd_imm(b, offset, slot_size); 374 } 375 376 return nir_vec(b, comp64, intrin->dest.ssa.num_components); 377 } else if (intrin->dest.ssa.bit_size == 1) { 378 /* Booleans are 32-bit */ 379 assert(glsl_type_is_boolean(type)); 380 return nir_b2b1(&state->builder, 381 emit_load(state, array_index, var, offset, component, 382 intrin->dest.ssa.num_components, 32, 383 nir_type_bool32)); 384 } else { 385 return emit_load(state, array_index, var, offset, component, 386 intrin->dest.ssa.num_components, 387 intrin->dest.ssa.bit_size, 388 nir_get_nir_type_for_glsl_type(type)); 389 } 390} 391 392static void 393emit_store(struct lower_io_state *state, nir_ssa_def *data, 394 nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 395 unsigned component, unsigned num_components, 396 nir_component_mask_t write_mask, nir_alu_type src_type) 397{ 398 nir_builder *b = &state->builder; 399 400 assert(var->data.mode == nir_var_shader_out); 401 nir_intrinsic_op op = 402 !array_index ? nir_intrinsic_store_output : 403 var->data.per_primitive ? nir_intrinsic_store_per_primitive_output : 404 nir_intrinsic_store_per_vertex_output; 405 406 nir_intrinsic_instr *store = 407 nir_intrinsic_instr_create(state->builder.shader, op); 408 store->num_components = num_components; 409 410 store->src[0] = nir_src_for_ssa(data); 411 412 nir_intrinsic_set_base(store, var->data.driver_location); 413 nir_intrinsic_set_component(store, component); 414 nir_intrinsic_set_src_type(store, src_type); 415 416 nir_intrinsic_set_write_mask(store, write_mask); 417 418 if (nir_intrinsic_has_access(store)) 419 nir_intrinsic_set_access(store, var->data.access); 420 421 if (array_index) 422 store->src[1] = nir_src_for_ssa(array_index); 423 424 store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset); 425 426 unsigned gs_streams = 0; 427 if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) { 428 if (var->data.stream & NIR_STREAM_PACKED) { 429 gs_streams = var->data.stream & ~NIR_STREAM_PACKED; 430 } else { 431 assert(var->data.stream < 4); 432 gs_streams = 0; 433 for (unsigned i = 0; i < num_components; ++i) 434 gs_streams |= var->data.stream << (2 * i); 435 } 436 } 437 438 nir_io_semantics semantics = {0}; 439 semantics.location = var->data.location; 440 semantics.num_slots = get_number_of_slots(state, var); 441 semantics.dual_source_blend_index = var->data.index; 442 semantics.gs_streams = gs_streams; 443 semantics.medium_precision = 444 var->data.precision == GLSL_PRECISION_MEDIUM || 445 var->data.precision == GLSL_PRECISION_LOW; 446 semantics.per_view = var->data.per_view; 447 nir_intrinsic_set_io_semantics(store, semantics); 448 449 nir_builder_instr_insert(b, &store->instr); 450} 451 452static void 453lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, 454 nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 455 unsigned component, const struct glsl_type *type) 456{ 457 assert(intrin->src[1].is_ssa); 458 if (intrin->src[1].ssa->bit_size == 64 && 459 (state->options & nir_lower_io_lower_64bit_to_32)) { 460 nir_builder *b = &state->builder; 461 462 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false); 463 464 assert(component == 0 || component == 2); 465 unsigned src_comp = 0; 466 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin); 467 while (src_comp < intrin->num_components) { 468 const unsigned num_comps = 469 MIN2(intrin->num_components - src_comp, 470 (4 - component) / 2); 471 472 if (write_mask & BITFIELD_MASK(num_comps)) { 473 nir_ssa_def *data = 474 nir_channels(b, intrin->src[1].ssa, 475 BITFIELD_RANGE(src_comp, num_comps)); 476 nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32); 477 478 nir_component_mask_t write_mask32 = 0; 479 for (unsigned i = 0; i < num_comps; i++) { 480 if (write_mask & BITFIELD_MASK(num_comps) & (1 << i)) 481 write_mask32 |= 3 << (i * 2); 482 } 483 484 emit_store(state, data32, array_index, var, offset, 485 component, data32->num_components, write_mask32, 486 nir_type_uint32); 487 } 488 489 /* Only the first store has a component offset */ 490 component = 0; 491 src_comp += num_comps; 492 write_mask >>= num_comps; 493 offset = nir_iadd_imm(b, offset, slot_size); 494 } 495 } else if (intrin->dest.ssa.bit_size == 1) { 496 /* Booleans are 32-bit */ 497 assert(glsl_type_is_boolean(type)); 498 nir_ssa_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa); 499 emit_store(state, b32_val, array_index, var, offset, 500 component, intrin->num_components, 501 nir_intrinsic_write_mask(intrin), 502 nir_type_bool32); 503 } else { 504 emit_store(state, intrin->src[1].ssa, array_index, var, offset, 505 component, intrin->num_components, 506 nir_intrinsic_write_mask(intrin), 507 nir_get_nir_type_for_glsl_type(type)); 508 } 509} 510 511static nir_ssa_def * 512lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, 513 nir_variable *var, nir_ssa_def *offset, unsigned component, 514 const struct glsl_type *type) 515{ 516 nir_builder *b = &state->builder; 517 assert(var->data.mode == nir_var_shader_in); 518 519 /* Ignore interpolateAt() for flat variables - flat is flat. Lower 520 * interpolateAtVertex() for explicit variables. 521 */ 522 if (var->data.interpolation == INTERP_MODE_FLAT || 523 var->data.interpolation == INTERP_MODE_EXPLICIT) { 524 nir_ssa_def *vertex_index = NULL; 525 526 if (var->data.interpolation == INTERP_MODE_EXPLICIT) { 527 assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex); 528 vertex_index = intrin->src[1].ssa; 529 } 530 531 return lower_load(intrin, state, vertex_index, var, offset, component, type); 532 } 533 534 /* None of the supported APIs allow interpolation on 64-bit things */ 535 assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32); 536 537 nir_intrinsic_op bary_op; 538 switch (intrin->intrinsic) { 539 case nir_intrinsic_interp_deref_at_centroid: 540 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ? 541 nir_intrinsic_load_barycentric_sample : 542 nir_intrinsic_load_barycentric_centroid; 543 break; 544 case nir_intrinsic_interp_deref_at_sample: 545 bary_op = nir_intrinsic_load_barycentric_at_sample; 546 break; 547 case nir_intrinsic_interp_deref_at_offset: 548 bary_op = nir_intrinsic_load_barycentric_at_offset; 549 break; 550 default: 551 unreachable("Bogus interpolateAt() intrinsic."); 552 } 553 554 nir_intrinsic_instr *bary_setup = 555 nir_intrinsic_instr_create(state->builder.shader, bary_op); 556 557 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL); 558 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation); 559 560 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample || 561 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset || 562 intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex) 563 nir_src_copy(&bary_setup->src[0], &intrin->src[1]); 564 565 nir_builder_instr_insert(b, &bary_setup->instr); 566 567 nir_io_semantics semantics = {0}; 568 semantics.location = var->data.location; 569 semantics.num_slots = get_number_of_slots(state, var); 570 semantics.medium_precision = 571 var->data.precision == GLSL_PRECISION_MEDIUM || 572 var->data.precision == GLSL_PRECISION_LOW; 573 574 assert(intrin->dest.is_ssa); 575 nir_ssa_def *load = 576 nir_load_interpolated_input(&state->builder, 577 intrin->dest.ssa.num_components, 578 intrin->dest.ssa.bit_size, 579 &bary_setup->dest.ssa, 580 offset, 581 .base = var->data.driver_location, 582 .component = component, 583 .io_semantics = semantics); 584 585 return load; 586} 587 588static bool 589nir_lower_io_block(nir_block *block, 590 struct lower_io_state *state) 591{ 592 nir_builder *b = &state->builder; 593 const nir_shader_compiler_options *options = b->shader->options; 594 bool progress = false; 595 596 nir_foreach_instr_safe(instr, block) { 597 if (instr->type != nir_instr_type_intrinsic) 598 continue; 599 600 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 601 602 switch (intrin->intrinsic) { 603 case nir_intrinsic_load_deref: 604 case nir_intrinsic_store_deref: 605 /* We can lower the io for this nir instrinsic */ 606 break; 607 case nir_intrinsic_interp_deref_at_centroid: 608 case nir_intrinsic_interp_deref_at_sample: 609 case nir_intrinsic_interp_deref_at_offset: 610 case nir_intrinsic_interp_deref_at_vertex: 611 /* We can optionally lower these to load_interpolated_input */ 612 if (options->use_interpolated_input_intrinsics || 613 options->lower_interpolate_at) 614 break; 615 FALLTHROUGH; 616 default: 617 /* We can't lower the io for this nir instrinsic, so skip it */ 618 continue; 619 } 620 621 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 622 if (!nir_deref_mode_is_one_of(deref, state->modes)) 623 continue; 624 625 nir_variable *var = nir_deref_instr_get_variable(deref); 626 627 b->cursor = nir_before_instr(instr); 628 629 const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage); 630 631 nir_ssa_def *offset; 632 nir_ssa_def *array_index = NULL; 633 unsigned component_offset = var->data.location_frac; 634 bool bindless_type_size = var->data.mode == nir_var_shader_in || 635 var->data.mode == nir_var_shader_out || 636 var->data.bindless; 637 638 if (nir_deref_instr_is_known_out_of_bounds(deref)) { 639 /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says: 640 * 641 * In the subsections described above for array, vector, matrix and 642 * structure accesses, any out-of-bounds access produced undefined 643 * behavior.... 644 * Out-of-bounds reads return undefined values, which 645 * include values from other variables of the active program or zero. 646 * Out-of-bounds writes may be discarded or overwrite 647 * other variables of the active program. 648 * 649 * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero 650 * for reads. 651 * 652 * Otherwise get_io_offset would return out-of-bound offset which may 653 * result in out-of-bound loading/storing of inputs/outputs, 654 * that could cause issues in drivers down the line. 655 */ 656 if (intrin->intrinsic != nir_intrinsic_store_deref) { 657 nir_ssa_def *zero = 658 nir_imm_zero(b, intrin->dest.ssa.num_components, 659 intrin->dest.ssa.bit_size); 660 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 661 zero); 662 } 663 664 nir_instr_remove(&intrin->instr); 665 progress = true; 666 continue; 667 } 668 669 offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL, 670 state->type_size, &component_offset, 671 bindless_type_size); 672 673 nir_ssa_def *replacement = NULL; 674 675 switch (intrin->intrinsic) { 676 case nir_intrinsic_load_deref: 677 replacement = lower_load(intrin, state, array_index, var, offset, 678 component_offset, deref->type); 679 break; 680 681 case nir_intrinsic_store_deref: 682 lower_store(intrin, state, array_index, var, offset, 683 component_offset, deref->type); 684 break; 685 686 case nir_intrinsic_interp_deref_at_centroid: 687 case nir_intrinsic_interp_deref_at_sample: 688 case nir_intrinsic_interp_deref_at_offset: 689 case nir_intrinsic_interp_deref_at_vertex: 690 assert(array_index == NULL); 691 replacement = lower_interpolate_at(intrin, state, var, offset, 692 component_offset, deref->type); 693 break; 694 695 default: 696 continue; 697 } 698 699 if (replacement) { 700 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 701 replacement); 702 } 703 nir_instr_remove(&intrin->instr); 704 progress = true; 705 } 706 707 return progress; 708} 709 710static bool 711nir_lower_io_impl(nir_function_impl *impl, 712 nir_variable_mode modes, 713 int (*type_size)(const struct glsl_type *, bool), 714 nir_lower_io_options options) 715{ 716 struct lower_io_state state; 717 bool progress = false; 718 719 nir_builder_init(&state.builder, impl); 720 state.dead_ctx = ralloc_context(NULL); 721 state.modes = modes; 722 state.type_size = type_size; 723 state.options = options; 724 725 ASSERTED nir_variable_mode supported_modes = 726 nir_var_shader_in | nir_var_shader_out | nir_var_uniform; 727 assert(!(modes & ~supported_modes)); 728 729 nir_foreach_block(block, impl) { 730 progress |= nir_lower_io_block(block, &state); 731 } 732 733 ralloc_free(state.dead_ctx); 734 735 nir_metadata_preserve(impl, nir_metadata_none); 736 737 return progress; 738} 739 740/** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics 741 * 742 * This pass is intended to be used for cross-stage shader I/O and driver- 743 * managed uniforms to turn deref-based access into a simpler model using 744 * locations or offsets. For fragment shader inputs, it can optionally turn 745 * load_deref into an explicit interpolation using barycentrics coming from 746 * one of the load_barycentric_* intrinsics. This pass requires that all 747 * deref chains are complete and contain no casts. 748 */ 749bool 750nir_lower_io(nir_shader *shader, nir_variable_mode modes, 751 int (*type_size)(const struct glsl_type *, bool), 752 nir_lower_io_options options) 753{ 754 bool progress = false; 755 756 nir_foreach_function(function, shader) { 757 if (function->impl) { 758 progress |= nir_lower_io_impl(function->impl, modes, 759 type_size, options); 760 } 761 } 762 763 return progress; 764} 765 766static unsigned 767type_scalar_size_bytes(const struct glsl_type *type) 768{ 769 assert(glsl_type_is_vector_or_scalar(type) || 770 glsl_type_is_matrix(type)); 771 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; 772} 773 774static nir_ssa_def * 775build_addr_iadd(nir_builder *b, nir_ssa_def *addr, 776 nir_address_format addr_format, 777 nir_variable_mode modes, 778 nir_ssa_def *offset) 779{ 780 assert(offset->num_components == 1); 781 782 switch (addr_format) { 783 case nir_address_format_32bit_global: 784 case nir_address_format_64bit_global: 785 case nir_address_format_32bit_offset: 786 assert(addr->bit_size == offset->bit_size); 787 assert(addr->num_components == 1); 788 return nir_iadd(b, addr, offset); 789 790 case nir_address_format_32bit_offset_as_64bit: 791 assert(addr->num_components == 1); 792 assert(offset->bit_size == 32); 793 return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset)); 794 795 case nir_address_format_64bit_global_32bit_offset: 796 case nir_address_format_64bit_bounded_global: 797 assert(addr->num_components == 4); 798 assert(addr->bit_size == offset->bit_size); 799 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3); 800 801 case nir_address_format_32bit_index_offset: 802 assert(addr->num_components == 2); 803 assert(addr->bit_size == offset->bit_size); 804 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1); 805 806 case nir_address_format_32bit_index_offset_pack64: 807 assert(addr->num_components == 1); 808 assert(offset->bit_size == 32); 809 return nir_pack_64_2x32_split(b, 810 nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset), 811 nir_unpack_64_2x32_split_y(b, addr)); 812 813 case nir_address_format_vec2_index_32bit_offset: 814 assert(addr->num_components == 3); 815 assert(offset->bit_size == 32); 816 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2); 817 818 case nir_address_format_62bit_generic: 819 assert(addr->num_components == 1); 820 assert(addr->bit_size == 64); 821 assert(offset->bit_size == 64); 822 if (!(modes & ~(nir_var_function_temp | 823 nir_var_shader_temp | 824 nir_var_mem_shared))) { 825 /* If we're sure it's one of these modes, we can do an easy 32-bit 826 * addition and don't need to bother with 64-bit math. 827 */ 828 nir_ssa_def *addr32 = nir_unpack_64_2x32_split_x(b, addr); 829 nir_ssa_def *type = nir_unpack_64_2x32_split_y(b, addr); 830 addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset)); 831 return nir_pack_64_2x32_split(b, addr32, type); 832 } else { 833 return nir_iadd(b, addr, offset); 834 } 835 836 case nir_address_format_logical: 837 unreachable("Unsupported address format"); 838 } 839 unreachable("Invalid address format"); 840} 841 842static unsigned 843addr_get_offset_bit_size(nir_ssa_def *addr, nir_address_format addr_format) 844{ 845 if (addr_format == nir_address_format_32bit_offset_as_64bit || 846 addr_format == nir_address_format_32bit_index_offset_pack64) 847 return 32; 848 return addr->bit_size; 849} 850 851static nir_ssa_def * 852build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr, 853 nir_address_format addr_format, 854 nir_variable_mode modes, 855 int64_t offset) 856{ 857 return build_addr_iadd(b, addr, addr_format, modes, 858 nir_imm_intN_t(b, offset, 859 addr_get_offset_bit_size(addr, addr_format))); 860} 861 862static nir_ssa_def * 863build_addr_for_var(nir_builder *b, nir_variable *var, 864 nir_address_format addr_format) 865{ 866 assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared | 867 nir_var_shader_temp | nir_var_function_temp | 868 nir_var_mem_push_const | nir_var_mem_constant)); 869 870 const unsigned num_comps = nir_address_format_num_components(addr_format); 871 const unsigned bit_size = nir_address_format_bit_size(addr_format); 872 873 switch (addr_format) { 874 case nir_address_format_32bit_global: 875 case nir_address_format_64bit_global: { 876 nir_ssa_def *base_addr; 877 switch (var->data.mode) { 878 case nir_var_shader_temp: 879 base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0); 880 break; 881 882 case nir_var_function_temp: 883 base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1); 884 break; 885 886 case nir_var_mem_constant: 887 base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size); 888 break; 889 890 case nir_var_mem_shared: 891 base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size); 892 break; 893 894 default: 895 unreachable("Unsupported variable mode"); 896 } 897 898 return build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode, 899 var->data.driver_location); 900 } 901 902 case nir_address_format_32bit_offset: 903 assert(var->data.driver_location <= UINT32_MAX); 904 return nir_imm_int(b, var->data.driver_location); 905 906 case nir_address_format_32bit_offset_as_64bit: 907 assert(var->data.driver_location <= UINT32_MAX); 908 return nir_imm_int64(b, var->data.driver_location); 909 910 case nir_address_format_62bit_generic: 911 switch (var->data.mode) { 912 case nir_var_shader_temp: 913 case nir_var_function_temp: 914 assert(var->data.driver_location <= UINT32_MAX); 915 return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64); 916 917 case nir_var_mem_shared: 918 assert(var->data.driver_location <= UINT32_MAX); 919 return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64); 920 921 default: 922 unreachable("Unsupported variable mode"); 923 } 924 925 default: 926 unreachable("Unsupported address format"); 927 } 928} 929 930static nir_ssa_def * 931build_runtime_addr_mode_check(nir_builder *b, nir_ssa_def *addr, 932 nir_address_format addr_format, 933 nir_variable_mode mode) 934{ 935 /* The compile-time check failed; do a run-time check */ 936 switch (addr_format) { 937 case nir_address_format_62bit_generic: { 938 assert(addr->num_components == 1); 939 assert(addr->bit_size == 64); 940 nir_ssa_def *mode_enum = nir_ushr(b, addr, nir_imm_int(b, 62)); 941 switch (mode) { 942 case nir_var_function_temp: 943 case nir_var_shader_temp: 944 return nir_ieq_imm(b, mode_enum, 0x2); 945 946 case nir_var_mem_shared: 947 return nir_ieq_imm(b, mode_enum, 0x1); 948 949 case nir_var_mem_global: 950 return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0), 951 nir_ieq_imm(b, mode_enum, 0x3)); 952 953 default: 954 unreachable("Invalid mode check intrinsic"); 955 } 956 } 957 958 default: 959 unreachable("Unsupported address mode"); 960 } 961} 962 963static nir_ssa_def * 964addr_to_index(nir_builder *b, nir_ssa_def *addr, 965 nir_address_format addr_format) 966{ 967 switch (addr_format) { 968 case nir_address_format_32bit_index_offset: 969 assert(addr->num_components == 2); 970 return nir_channel(b, addr, 0); 971 case nir_address_format_32bit_index_offset_pack64: 972 return nir_unpack_64_2x32_split_y(b, addr); 973 case nir_address_format_vec2_index_32bit_offset: 974 assert(addr->num_components == 3); 975 return nir_channels(b, addr, 0x3); 976 default: unreachable("Invalid address format"); 977 } 978} 979 980static nir_ssa_def * 981addr_to_offset(nir_builder *b, nir_ssa_def *addr, 982 nir_address_format addr_format) 983{ 984 switch (addr_format) { 985 case nir_address_format_32bit_index_offset: 986 assert(addr->num_components == 2); 987 return nir_channel(b, addr, 1); 988 case nir_address_format_32bit_index_offset_pack64: 989 return nir_unpack_64_2x32_split_x(b, addr); 990 case nir_address_format_vec2_index_32bit_offset: 991 assert(addr->num_components == 3); 992 return nir_channel(b, addr, 2); 993 case nir_address_format_32bit_offset: 994 return addr; 995 case nir_address_format_32bit_offset_as_64bit: 996 case nir_address_format_62bit_generic: 997 return nir_u2u32(b, addr); 998 default: 999 unreachable("Invalid address format"); 1000 } 1001} 1002 1003/** Returns true if the given address format resolves to a global address */ 1004static bool 1005addr_format_is_global(nir_address_format addr_format, 1006 nir_variable_mode mode) 1007{ 1008 if (addr_format == nir_address_format_62bit_generic) 1009 return mode == nir_var_mem_global; 1010 1011 return addr_format == nir_address_format_32bit_global || 1012 addr_format == nir_address_format_64bit_global || 1013 addr_format == nir_address_format_64bit_global_32bit_offset || 1014 addr_format == nir_address_format_64bit_bounded_global; 1015} 1016 1017static bool 1018addr_format_is_offset(nir_address_format addr_format, 1019 nir_variable_mode mode) 1020{ 1021 if (addr_format == nir_address_format_62bit_generic) 1022 return mode != nir_var_mem_global; 1023 1024 return addr_format == nir_address_format_32bit_offset || 1025 addr_format == nir_address_format_32bit_offset_as_64bit; 1026} 1027 1028static nir_ssa_def * 1029addr_to_global(nir_builder *b, nir_ssa_def *addr, 1030 nir_address_format addr_format) 1031{ 1032 switch (addr_format) { 1033 case nir_address_format_32bit_global: 1034 case nir_address_format_64bit_global: 1035 case nir_address_format_62bit_generic: 1036 assert(addr->num_components == 1); 1037 return addr; 1038 1039 case nir_address_format_64bit_global_32bit_offset: 1040 case nir_address_format_64bit_bounded_global: 1041 assert(addr->num_components == 4); 1042 return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)), 1043 nir_u2u64(b, nir_channel(b, addr, 3))); 1044 1045 case nir_address_format_32bit_index_offset: 1046 case nir_address_format_32bit_index_offset_pack64: 1047 case nir_address_format_vec2_index_32bit_offset: 1048 case nir_address_format_32bit_offset: 1049 case nir_address_format_32bit_offset_as_64bit: 1050 case nir_address_format_logical: 1051 unreachable("Cannot get a 64-bit address with this address format"); 1052 } 1053 1054 unreachable("Invalid address format"); 1055} 1056 1057static bool 1058addr_format_needs_bounds_check(nir_address_format addr_format) 1059{ 1060 return addr_format == nir_address_format_64bit_bounded_global; 1061} 1062 1063static nir_ssa_def * 1064addr_is_in_bounds(nir_builder *b, nir_ssa_def *addr, 1065 nir_address_format addr_format, unsigned size) 1066{ 1067 assert(addr_format == nir_address_format_64bit_bounded_global); 1068 assert(addr->num_components == 4); 1069 return nir_ige(b, nir_channel(b, addr, 2), 1070 nir_iadd_imm(b, nir_channel(b, addr, 3), size)); 1071} 1072 1073static void 1074nir_get_explicit_deref_range(nir_deref_instr *deref, 1075 nir_address_format addr_format, 1076 uint32_t *out_base, 1077 uint32_t *out_range) 1078{ 1079 uint32_t base = 0; 1080 uint32_t range = glsl_get_explicit_size(deref->type, false); 1081 1082 while (true) { 1083 nir_deref_instr *parent = nir_deref_instr_parent(deref); 1084 1085 switch (deref->deref_type) { 1086 case nir_deref_type_array: 1087 case nir_deref_type_array_wildcard: 1088 case nir_deref_type_ptr_as_array: { 1089 const unsigned stride = nir_deref_instr_array_stride(deref); 1090 if (stride == 0) 1091 goto fail; 1092 1093 if (!parent) 1094 goto fail; 1095 1096 if (deref->deref_type != nir_deref_type_array_wildcard && 1097 nir_src_is_const(deref->arr.index)) { 1098 base += stride * nir_src_as_uint(deref->arr.index); 1099 } else { 1100 if (glsl_get_length(parent->type) == 0) 1101 goto fail; 1102 range += stride * (glsl_get_length(parent->type) - 1); 1103 } 1104 break; 1105 } 1106 1107 case nir_deref_type_struct: { 1108 if (!parent) 1109 goto fail; 1110 1111 base += glsl_get_struct_field_offset(parent->type, deref->strct.index); 1112 break; 1113 } 1114 1115 case nir_deref_type_cast: { 1116 nir_instr *parent_instr = deref->parent.ssa->parent_instr; 1117 1118 switch (parent_instr->type) { 1119 case nir_instr_type_load_const: { 1120 nir_load_const_instr *load = nir_instr_as_load_const(parent_instr); 1121 1122 switch (addr_format) { 1123 case nir_address_format_32bit_offset: 1124 base += load->value[1].u32; 1125 break; 1126 case nir_address_format_32bit_index_offset: 1127 base += load->value[1].u32; 1128 break; 1129 case nir_address_format_vec2_index_32bit_offset: 1130 base += load->value[2].u32; 1131 break; 1132 default: 1133 goto fail; 1134 } 1135 1136 *out_base = base; 1137 *out_range = range; 1138 return; 1139 } 1140 1141 case nir_instr_type_intrinsic: { 1142 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr); 1143 switch (intr->intrinsic) { 1144 case nir_intrinsic_load_vulkan_descriptor: 1145 /* Assume that a load_vulkan_descriptor won't contribute to an 1146 * offset within the resource. 1147 */ 1148 break; 1149 default: 1150 goto fail; 1151 } 1152 1153 *out_base = base; 1154 *out_range = range; 1155 return; 1156 } 1157 1158 default: 1159 goto fail; 1160 } 1161 } 1162 1163 default: 1164 goto fail; 1165 } 1166 1167 deref = parent; 1168 } 1169 1170fail: 1171 *out_base = 0; 1172 *out_range = ~0; 1173} 1174 1175static nir_variable_mode 1176canonicalize_generic_modes(nir_variable_mode modes) 1177{ 1178 assert(modes != 0); 1179 if (util_bitcount(modes) == 1) 1180 return modes; 1181 1182 assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp | 1183 nir_var_mem_shared | nir_var_mem_global))); 1184 1185 /* Canonicalize by converting shader_temp to function_temp */ 1186 if (modes & nir_var_shader_temp) { 1187 modes &= ~nir_var_shader_temp; 1188 modes |= nir_var_function_temp; 1189 } 1190 1191 return modes; 1192} 1193 1194static nir_ssa_def * 1195build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, 1196 nir_ssa_def *addr, nir_address_format addr_format, 1197 nir_variable_mode modes, 1198 uint32_t align_mul, uint32_t align_offset, 1199 unsigned num_components) 1200{ 1201 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1202 modes = canonicalize_generic_modes(modes); 1203 1204 if (util_bitcount(modes) > 1) { 1205 if (addr_format_is_global(addr_format, modes)) { 1206 return build_explicit_io_load(b, intrin, addr, addr_format, 1207 nir_var_mem_global, 1208 align_mul, align_offset, 1209 num_components); 1210 } else if (modes & nir_var_function_temp) { 1211 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1212 nir_var_function_temp)); 1213 nir_ssa_def *res1 = 1214 build_explicit_io_load(b, intrin, addr, addr_format, 1215 nir_var_function_temp, 1216 align_mul, align_offset, 1217 num_components); 1218 nir_push_else(b, NULL); 1219 nir_ssa_def *res2 = 1220 build_explicit_io_load(b, intrin, addr, addr_format, 1221 modes & ~nir_var_function_temp, 1222 align_mul, align_offset, 1223 num_components); 1224 nir_pop_if(b, NULL); 1225 return nir_if_phi(b, res1, res2); 1226 } else { 1227 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1228 nir_var_mem_shared)); 1229 assert(modes & nir_var_mem_shared); 1230 nir_ssa_def *res1 = 1231 build_explicit_io_load(b, intrin, addr, addr_format, 1232 nir_var_mem_shared, 1233 align_mul, align_offset, 1234 num_components); 1235 nir_push_else(b, NULL); 1236 assert(modes & nir_var_mem_global); 1237 nir_ssa_def *res2 = 1238 build_explicit_io_load(b, intrin, addr, addr_format, 1239 nir_var_mem_global, 1240 align_mul, align_offset, 1241 num_components); 1242 nir_pop_if(b, NULL); 1243 return nir_if_phi(b, res1, res2); 1244 } 1245 } 1246 1247 assert(util_bitcount(modes) == 1); 1248 const nir_variable_mode mode = modes; 1249 1250 nir_intrinsic_op op; 1251 switch (intrin->intrinsic) { 1252 case nir_intrinsic_load_deref: 1253 switch (mode) { 1254 case nir_var_mem_ubo: 1255 if (addr_format == nir_address_format_64bit_global_32bit_offset) 1256 op = nir_intrinsic_load_global_constant_offset; 1257 else if (addr_format == nir_address_format_64bit_bounded_global) 1258 op = nir_intrinsic_load_global_constant_bounded; 1259 else if (addr_format_is_global(addr_format, mode)) 1260 op = nir_intrinsic_load_global_constant; 1261 else 1262 op = nir_intrinsic_load_ubo; 1263 break; 1264 case nir_var_mem_ssbo: 1265 if (addr_format_is_global(addr_format, mode)) 1266 op = nir_intrinsic_load_global; 1267 else 1268 op = nir_intrinsic_load_ssbo; 1269 break; 1270 case nir_var_mem_global: 1271 assert(addr_format_is_global(addr_format, mode)); 1272 op = nir_intrinsic_load_global; 1273 break; 1274 case nir_var_uniform: 1275 assert(addr_format_is_offset(addr_format, mode)); 1276 assert(b->shader->info.stage == MESA_SHADER_KERNEL); 1277 op = nir_intrinsic_load_kernel_input; 1278 break; 1279 case nir_var_mem_shared: 1280 assert(addr_format_is_offset(addr_format, mode)); 1281 op = nir_intrinsic_load_shared; 1282 break; 1283 case nir_var_shader_temp: 1284 case nir_var_function_temp: 1285 if (addr_format_is_offset(addr_format, mode)) { 1286 op = nir_intrinsic_load_scratch; 1287 } else { 1288 assert(addr_format_is_global(addr_format, mode)); 1289 op = nir_intrinsic_load_global; 1290 } 1291 break; 1292 case nir_var_mem_push_const: 1293 assert(addr_format == nir_address_format_32bit_offset); 1294 op = nir_intrinsic_load_push_constant; 1295 break; 1296 case nir_var_mem_constant: 1297 if (addr_format_is_offset(addr_format, mode)) { 1298 op = nir_intrinsic_load_constant; 1299 } else { 1300 assert(addr_format_is_global(addr_format, mode)); 1301 op = nir_intrinsic_load_global_constant; 1302 } 1303 break; 1304 default: 1305 unreachable("Unsupported explicit IO variable mode"); 1306 } 1307 break; 1308 1309 case nir_intrinsic_load_deref_block_intel: 1310 switch (mode) { 1311 case nir_var_mem_ssbo: 1312 if (addr_format_is_global(addr_format, mode)) 1313 op = nir_intrinsic_load_global_block_intel; 1314 else 1315 op = nir_intrinsic_load_ssbo_block_intel; 1316 break; 1317 case nir_var_mem_global: 1318 op = nir_intrinsic_load_global_block_intel; 1319 break; 1320 case nir_var_mem_shared: 1321 op = nir_intrinsic_load_shared_block_intel; 1322 break; 1323 default: 1324 unreachable("Unsupported explicit IO variable mode"); 1325 } 1326 break; 1327 1328 default: 1329 unreachable("Invalid intrinsic"); 1330 } 1331 1332 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); 1333 1334 if (op == nir_intrinsic_load_global_constant_offset) { 1335 assert(addr_format == nir_address_format_64bit_global_32bit_offset); 1336 load->src[0] = nir_src_for_ssa( 1337 nir_pack_64_2x32(b, nir_channels(b, addr, 0x3))); 1338 load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3)); 1339 } else if (op == nir_intrinsic_load_global_constant_bounded) { 1340 assert(addr_format == nir_address_format_64bit_bounded_global); 1341 load->src[0] = nir_src_for_ssa( 1342 nir_pack_64_2x32(b, nir_channels(b, addr, 0x3))); 1343 load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3)); 1344 load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2)); 1345 } else if (addr_format_is_global(addr_format, mode)) { 1346 load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 1347 } else if (addr_format_is_offset(addr_format, mode)) { 1348 assert(addr->num_components == 1); 1349 load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1350 } else { 1351 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 1352 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1353 } 1354 1355 if (nir_intrinsic_has_access(load)) 1356 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin)); 1357 1358 if (op == nir_intrinsic_load_constant) { 1359 nir_intrinsic_set_base(load, 0); 1360 nir_intrinsic_set_range(load, b->shader->constant_data_size); 1361 } else if (mode == nir_var_mem_push_const) { 1362 /* Push constants are required to be able to be chased back to the 1363 * variable so we can provide a base/range. 1364 */ 1365 nir_variable *var = nir_deref_instr_get_variable(deref); 1366 nir_intrinsic_set_base(load, 0); 1367 nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false)); 1368 } 1369 1370 unsigned bit_size = intrin->dest.ssa.bit_size; 1371 if (bit_size == 1) { 1372 /* TODO: Make the native bool bit_size an option. */ 1373 bit_size = 32; 1374 } 1375 1376 if (nir_intrinsic_has_align(load)) 1377 nir_intrinsic_set_align(load, align_mul, align_offset); 1378 1379 if (nir_intrinsic_has_range_base(load)) { 1380 unsigned base, range; 1381 nir_get_explicit_deref_range(deref, addr_format, &base, &range); 1382 nir_intrinsic_set_range_base(load, base); 1383 nir_intrinsic_set_range(load, range); 1384 } 1385 1386 assert(intrin->dest.is_ssa); 1387 load->num_components = num_components; 1388 nir_ssa_dest_init(&load->instr, &load->dest, num_components, 1389 bit_size, NULL); 1390 1391 assert(bit_size % 8 == 0); 1392 1393 nir_ssa_def *result; 1394 if (addr_format_needs_bounds_check(addr_format) && 1395 op != nir_intrinsic_load_global_constant_bounded) { 1396 /* We don't need to bounds-check global_constant_bounded because bounds 1397 * checking is handled by the intrinsic itself. 1398 * 1399 * The Vulkan spec for robustBufferAccess gives us quite a few options 1400 * as to what we can do with an OOB read. Unfortunately, returning 1401 * undefined values isn't one of them so we return an actual zero. 1402 */ 1403 nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size); 1404 1405 /* TODO: Better handle block_intel. */ 1406 const unsigned load_size = (bit_size / 8) * load->num_components; 1407 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size)); 1408 1409 nir_builder_instr_insert(b, &load->instr); 1410 1411 nir_pop_if(b, NULL); 1412 1413 result = nir_if_phi(b, &load->dest.ssa, zero); 1414 } else { 1415 nir_builder_instr_insert(b, &load->instr); 1416 result = &load->dest.ssa; 1417 } 1418 1419 if (intrin->dest.ssa.bit_size == 1) { 1420 /* For shared, we can go ahead and use NIR's and/or the back-end's 1421 * standard encoding for booleans rather than forcing a 0/1 boolean. 1422 * This should save an instruction or two. 1423 */ 1424 if (mode == nir_var_mem_shared || 1425 mode == nir_var_shader_temp || 1426 mode == nir_var_function_temp) 1427 result = nir_b2b1(b, result); 1428 else 1429 result = nir_i2b(b, result); 1430 } 1431 1432 return result; 1433} 1434 1435static void 1436build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, 1437 nir_ssa_def *addr, nir_address_format addr_format, 1438 nir_variable_mode modes, 1439 uint32_t align_mul, uint32_t align_offset, 1440 nir_ssa_def *value, nir_component_mask_t write_mask) 1441{ 1442 modes = canonicalize_generic_modes(modes); 1443 1444 if (util_bitcount(modes) > 1) { 1445 if (addr_format_is_global(addr_format, modes)) { 1446 build_explicit_io_store(b, intrin, addr, addr_format, 1447 nir_var_mem_global, 1448 align_mul, align_offset, 1449 value, write_mask); 1450 } else if (modes & nir_var_function_temp) { 1451 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1452 nir_var_function_temp)); 1453 build_explicit_io_store(b, intrin, addr, addr_format, 1454 nir_var_function_temp, 1455 align_mul, align_offset, 1456 value, write_mask); 1457 nir_push_else(b, NULL); 1458 build_explicit_io_store(b, intrin, addr, addr_format, 1459 modes & ~nir_var_function_temp, 1460 align_mul, align_offset, 1461 value, write_mask); 1462 nir_pop_if(b, NULL); 1463 } else { 1464 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1465 nir_var_mem_shared)); 1466 assert(modes & nir_var_mem_shared); 1467 build_explicit_io_store(b, intrin, addr, addr_format, 1468 nir_var_mem_shared, 1469 align_mul, align_offset, 1470 value, write_mask); 1471 nir_push_else(b, NULL); 1472 assert(modes & nir_var_mem_global); 1473 build_explicit_io_store(b, intrin, addr, addr_format, 1474 nir_var_mem_global, 1475 align_mul, align_offset, 1476 value, write_mask); 1477 nir_pop_if(b, NULL); 1478 } 1479 return; 1480 } 1481 1482 assert(util_bitcount(modes) == 1); 1483 const nir_variable_mode mode = modes; 1484 1485 nir_intrinsic_op op; 1486 switch (intrin->intrinsic) { 1487 case nir_intrinsic_store_deref: 1488 assert(write_mask != 0); 1489 1490 switch (mode) { 1491 case nir_var_mem_ssbo: 1492 if (addr_format_is_global(addr_format, mode)) 1493 op = nir_intrinsic_store_global; 1494 else 1495 op = nir_intrinsic_store_ssbo; 1496 break; 1497 case nir_var_mem_global: 1498 assert(addr_format_is_global(addr_format, mode)); 1499 op = nir_intrinsic_store_global; 1500 break; 1501 case nir_var_mem_shared: 1502 assert(addr_format_is_offset(addr_format, mode)); 1503 op = nir_intrinsic_store_shared; 1504 break; 1505 case nir_var_shader_temp: 1506 case nir_var_function_temp: 1507 if (addr_format_is_offset(addr_format, mode)) { 1508 op = nir_intrinsic_store_scratch; 1509 } else { 1510 assert(addr_format_is_global(addr_format, mode)); 1511 op = nir_intrinsic_store_global; 1512 } 1513 break; 1514 default: 1515 unreachable("Unsupported explicit IO variable mode"); 1516 } 1517 break; 1518 1519 case nir_intrinsic_store_deref_block_intel: 1520 assert(write_mask == 0); 1521 1522 switch (mode) { 1523 case nir_var_mem_ssbo: 1524 if (addr_format_is_global(addr_format, mode)) 1525 op = nir_intrinsic_store_global_block_intel; 1526 else 1527 op = nir_intrinsic_store_ssbo_block_intel; 1528 break; 1529 case nir_var_mem_global: 1530 op = nir_intrinsic_store_global_block_intel; 1531 break; 1532 case nir_var_mem_shared: 1533 op = nir_intrinsic_store_shared_block_intel; 1534 break; 1535 default: 1536 unreachable("Unsupported explicit IO variable mode"); 1537 } 1538 break; 1539 1540 default: 1541 unreachable("Invalid intrinsic"); 1542 } 1543 1544 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); 1545 1546 if (value->bit_size == 1) { 1547 /* For shared, we can go ahead and use NIR's and/or the back-end's 1548 * standard encoding for booleans rather than forcing a 0/1 boolean. 1549 * This should save an instruction or two. 1550 * 1551 * TODO: Make the native bool bit_size an option. 1552 */ 1553 if (mode == nir_var_mem_shared || 1554 mode == nir_var_shader_temp || 1555 mode == nir_var_function_temp) 1556 value = nir_b2b32(b, value); 1557 else 1558 value = nir_b2i(b, value, 32); 1559 } 1560 1561 store->src[0] = nir_src_for_ssa(value); 1562 if (addr_format_is_global(addr_format, mode)) { 1563 store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 1564 } else if (addr_format_is_offset(addr_format, mode)) { 1565 assert(addr->num_components == 1); 1566 store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1567 } else { 1568 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 1569 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1570 } 1571 1572 nir_intrinsic_set_write_mask(store, write_mask); 1573 1574 if (nir_intrinsic_has_access(store)) 1575 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin)); 1576 1577 nir_intrinsic_set_align(store, align_mul, align_offset); 1578 1579 assert(value->num_components == 1 || 1580 value->num_components == intrin->num_components); 1581 store->num_components = value->num_components; 1582 1583 assert(value->bit_size % 8 == 0); 1584 1585 if (addr_format_needs_bounds_check(addr_format)) { 1586 /* TODO: Better handle block_intel. */ 1587 const unsigned store_size = (value->bit_size / 8) * store->num_components; 1588 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size)); 1589 1590 nir_builder_instr_insert(b, &store->instr); 1591 1592 nir_pop_if(b, NULL); 1593 } else { 1594 nir_builder_instr_insert(b, &store->instr); 1595 } 1596} 1597 1598static nir_ssa_def * 1599build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin, 1600 nir_ssa_def *addr, nir_address_format addr_format, 1601 nir_variable_mode modes) 1602{ 1603 modes = canonicalize_generic_modes(modes); 1604 1605 if (util_bitcount(modes) > 1) { 1606 if (addr_format_is_global(addr_format, modes)) { 1607 return build_explicit_io_atomic(b, intrin, addr, addr_format, 1608 nir_var_mem_global); 1609 } else if (modes & nir_var_function_temp) { 1610 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1611 nir_var_function_temp)); 1612 nir_ssa_def *res1 = 1613 build_explicit_io_atomic(b, intrin, addr, addr_format, 1614 nir_var_function_temp); 1615 nir_push_else(b, NULL); 1616 nir_ssa_def *res2 = 1617 build_explicit_io_atomic(b, intrin, addr, addr_format, 1618 modes & ~nir_var_function_temp); 1619 nir_pop_if(b, NULL); 1620 return nir_if_phi(b, res1, res2); 1621 } else { 1622 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1623 nir_var_mem_shared)); 1624 assert(modes & nir_var_mem_shared); 1625 nir_ssa_def *res1 = 1626 build_explicit_io_atomic(b, intrin, addr, addr_format, 1627 nir_var_mem_shared); 1628 nir_push_else(b, NULL); 1629 assert(modes & nir_var_mem_global); 1630 nir_ssa_def *res2 = 1631 build_explicit_io_atomic(b, intrin, addr, addr_format, 1632 nir_var_mem_global); 1633 nir_pop_if(b, NULL); 1634 return nir_if_phi(b, res1, res2); 1635 } 1636 } 1637 1638 assert(util_bitcount(modes) == 1); 1639 const nir_variable_mode mode = modes; 1640 1641 const unsigned num_data_srcs = 1642 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1; 1643 1644 nir_intrinsic_op op; 1645 switch (mode) { 1646 case nir_var_mem_ssbo: 1647 if (addr_format_is_global(addr_format, mode)) 1648 op = global_atomic_for_deref(intrin->intrinsic); 1649 else 1650 op = ssbo_atomic_for_deref(intrin->intrinsic); 1651 break; 1652 case nir_var_mem_global: 1653 assert(addr_format_is_global(addr_format, mode)); 1654 op = global_atomic_for_deref(intrin->intrinsic); 1655 break; 1656 case nir_var_mem_shared: 1657 assert(addr_format_is_offset(addr_format, mode)); 1658 op = shared_atomic_for_deref(intrin->intrinsic); 1659 break; 1660 default: 1661 unreachable("Unsupported explicit IO variable mode"); 1662 } 1663 1664 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op); 1665 1666 unsigned src = 0; 1667 if (addr_format_is_global(addr_format, mode)) { 1668 atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 1669 } else if (addr_format_is_offset(addr_format, mode)) { 1670 assert(addr->num_components == 1); 1671 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1672 } else { 1673 atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 1674 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1675 } 1676 for (unsigned i = 0; i < num_data_srcs; i++) { 1677 atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa); 1678 } 1679 1680 /* Global atomics don't have access flags because they assume that the 1681 * address may be non-uniform. 1682 */ 1683 if (nir_intrinsic_has_access(atomic)) 1684 nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin)); 1685 1686 assert(intrin->dest.ssa.num_components == 1); 1687 nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1688 1, intrin->dest.ssa.bit_size, NULL); 1689 1690 assert(atomic->dest.ssa.bit_size % 8 == 0); 1691 1692 if (addr_format_needs_bounds_check(addr_format)) { 1693 const unsigned atomic_size = atomic->dest.ssa.bit_size / 8; 1694 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size)); 1695 1696 nir_builder_instr_insert(b, &atomic->instr); 1697 1698 nir_pop_if(b, NULL); 1699 return nir_if_phi(b, &atomic->dest.ssa, 1700 nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size)); 1701 } else { 1702 nir_builder_instr_insert(b, &atomic->instr); 1703 return &atomic->dest.ssa; 1704 } 1705} 1706 1707nir_ssa_def * 1708nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref, 1709 nir_ssa_def *base_addr, 1710 nir_address_format addr_format) 1711{ 1712 assert(deref->dest.is_ssa); 1713 switch (deref->deref_type) { 1714 case nir_deref_type_var: 1715 return build_addr_for_var(b, deref->var, addr_format); 1716 1717 case nir_deref_type_array: { 1718 unsigned stride = nir_deref_instr_array_stride(deref); 1719 assert(stride > 0); 1720 1721 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); 1722 index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format)); 1723 return build_addr_iadd(b, base_addr, addr_format, deref->modes, 1724 nir_amul_imm(b, index, stride)); 1725 } 1726 1727 case nir_deref_type_ptr_as_array: { 1728 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); 1729 index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format)); 1730 unsigned stride = nir_deref_instr_array_stride(deref); 1731 return build_addr_iadd(b, base_addr, addr_format, deref->modes, 1732 nir_amul_imm(b, index, stride)); 1733 } 1734 1735 case nir_deref_type_array_wildcard: 1736 unreachable("Wildcards should be lowered by now"); 1737 break; 1738 1739 case nir_deref_type_struct: { 1740 nir_deref_instr *parent = nir_deref_instr_parent(deref); 1741 int offset = glsl_get_struct_field_offset(parent->type, 1742 deref->strct.index); 1743 assert(offset >= 0); 1744 return build_addr_iadd_imm(b, base_addr, addr_format, 1745 deref->modes, offset); 1746 } 1747 1748 case nir_deref_type_cast: 1749 /* Nothing to do here */ 1750 return base_addr; 1751 } 1752 1753 unreachable("Invalid NIR deref type"); 1754} 1755 1756void 1757nir_lower_explicit_io_instr(nir_builder *b, 1758 nir_intrinsic_instr *intrin, 1759 nir_ssa_def *addr, 1760 nir_address_format addr_format) 1761{ 1762 b->cursor = nir_after_instr(&intrin->instr); 1763 1764 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1765 unsigned vec_stride = glsl_get_explicit_stride(deref->type); 1766 unsigned scalar_size = type_scalar_size_bytes(deref->type); 1767 assert(vec_stride == 0 || glsl_type_is_vector(deref->type)); 1768 assert(vec_stride == 0 || vec_stride >= scalar_size); 1769 1770 uint32_t align_mul, align_offset; 1771 if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) { 1772 /* If we don't have an alignment from the deref, assume scalar */ 1773 align_mul = scalar_size; 1774 align_offset = 0; 1775 } 1776 1777 switch (intrin->intrinsic) { 1778 case nir_intrinsic_load_deref: { 1779 nir_ssa_def *value; 1780 if (vec_stride > scalar_size) { 1781 nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL, }; 1782 for (unsigned i = 0; i < intrin->num_components; i++) { 1783 unsigned comp_offset = i * vec_stride; 1784 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format, 1785 deref->modes, 1786 comp_offset); 1787 comps[i] = build_explicit_io_load(b, intrin, comp_addr, 1788 addr_format, deref->modes, 1789 align_mul, 1790 (align_offset + comp_offset) % 1791 align_mul, 1792 1); 1793 } 1794 value = nir_vec(b, comps, intrin->num_components); 1795 } else { 1796 value = build_explicit_io_load(b, intrin, addr, addr_format, 1797 deref->modes, align_mul, align_offset, 1798 intrin->num_components); 1799 } 1800 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value); 1801 break; 1802 } 1803 1804 case nir_intrinsic_store_deref: { 1805 assert(intrin->src[1].is_ssa); 1806 nir_ssa_def *value = intrin->src[1].ssa; 1807 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin); 1808 if (vec_stride > scalar_size) { 1809 for (unsigned i = 0; i < intrin->num_components; i++) { 1810 if (!(write_mask & (1 << i))) 1811 continue; 1812 1813 unsigned comp_offset = i * vec_stride; 1814 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format, 1815 deref->modes, 1816 comp_offset); 1817 build_explicit_io_store(b, intrin, comp_addr, addr_format, 1818 deref->modes, align_mul, 1819 (align_offset + comp_offset) % align_mul, 1820 nir_channel(b, value, i), 1); 1821 } 1822 } else { 1823 build_explicit_io_store(b, intrin, addr, addr_format, 1824 deref->modes, align_mul, align_offset, 1825 value, write_mask); 1826 } 1827 break; 1828 } 1829 1830 case nir_intrinsic_load_deref_block_intel: { 1831 nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format, 1832 deref->modes, 1833 align_mul, align_offset, 1834 intrin->num_components); 1835 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value); 1836 break; 1837 } 1838 1839 case nir_intrinsic_store_deref_block_intel: { 1840 assert(intrin->src[1].is_ssa); 1841 nir_ssa_def *value = intrin->src[1].ssa; 1842 const nir_component_mask_t write_mask = 0; 1843 build_explicit_io_store(b, intrin, addr, addr_format, 1844 deref->modes, align_mul, align_offset, 1845 value, write_mask); 1846 break; 1847 } 1848 1849 default: { 1850 nir_ssa_def *value = 1851 build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes); 1852 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value); 1853 break; 1854 } 1855 } 1856 1857 nir_instr_remove(&intrin->instr); 1858} 1859 1860bool 1861nir_get_explicit_deref_align(nir_deref_instr *deref, 1862 bool default_to_type_align, 1863 uint32_t *align_mul, 1864 uint32_t *align_offset) 1865{ 1866 if (deref->deref_type == nir_deref_type_var) { 1867 /* If we see a variable, align_mul is effectively infinite because we 1868 * know the offset exactly (up to the offset of the base pointer for the 1869 * given variable mode). We have to pick something so we choose 256B 1870 * as an arbitrary alignment which seems high enough for any reasonable 1871 * wide-load use-case. Back-ends should clamp alignments down if 256B 1872 * is too large for some reason. 1873 */ 1874 *align_mul = 256; 1875 *align_offset = deref->var->data.driver_location % 256; 1876 return true; 1877 } 1878 1879 /* If we're a cast deref that has an alignment, use that. */ 1880 if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) { 1881 *align_mul = deref->cast.align_mul; 1882 *align_offset = deref->cast.align_offset; 1883 return true; 1884 } 1885 1886 /* Otherwise, we need to compute the alignment based on the parent */ 1887 nir_deref_instr *parent = nir_deref_instr_parent(deref); 1888 if (parent == NULL) { 1889 assert(deref->deref_type == nir_deref_type_cast); 1890 if (default_to_type_align) { 1891 /* If we don't have a parent, assume the type's alignment, if any. */ 1892 unsigned type_align = glsl_get_explicit_alignment(deref->type); 1893 if (type_align == 0) 1894 return false; 1895 1896 *align_mul = type_align; 1897 *align_offset = 0; 1898 return true; 1899 } else { 1900 return false; 1901 } 1902 } 1903 1904 uint32_t parent_mul, parent_offset; 1905 if (!nir_get_explicit_deref_align(parent, default_to_type_align, 1906 &parent_mul, &parent_offset)) 1907 return false; 1908 1909 switch (deref->deref_type) { 1910 case nir_deref_type_var: 1911 unreachable("Handled above"); 1912 1913 case nir_deref_type_array: 1914 case nir_deref_type_array_wildcard: 1915 case nir_deref_type_ptr_as_array: { 1916 const unsigned stride = nir_deref_instr_array_stride(deref); 1917 if (stride == 0) 1918 return false; 1919 1920 if (deref->deref_type != nir_deref_type_array_wildcard && 1921 nir_src_is_const(deref->arr.index)) { 1922 unsigned offset = nir_src_as_uint(deref->arr.index) * stride; 1923 *align_mul = parent_mul; 1924 *align_offset = (parent_offset + offset) % parent_mul; 1925 } else { 1926 /* If this is a wildcard or an indirect deref, we have to go with the 1927 * power-of-two gcd. 1928 */ 1929 *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1)); 1930 *align_offset = parent_offset % *align_mul; 1931 } 1932 return true; 1933 } 1934 1935 case nir_deref_type_struct: { 1936 const int offset = glsl_get_struct_field_offset(parent->type, 1937 deref->strct.index); 1938 if (offset < 0) 1939 return false; 1940 1941 *align_mul = parent_mul; 1942 *align_offset = (parent_offset + offset) % parent_mul; 1943 return true; 1944 } 1945 1946 case nir_deref_type_cast: 1947 /* We handled the explicit alignment case above. */ 1948 assert(deref->cast.align_mul == 0); 1949 *align_mul = parent_mul; 1950 *align_offset = parent_offset; 1951 return true; 1952 } 1953 1954 unreachable("Invalid deref_instr_type"); 1955} 1956 1957static void 1958lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref, 1959 nir_address_format addr_format) 1960{ 1961 /* Just delete the deref if it's not used. We can't use 1962 * nir_deref_instr_remove_if_unused here because it may remove more than 1963 * one deref which could break our list walking since we walk the list 1964 * backwards. 1965 */ 1966 assert(list_is_empty(&deref->dest.ssa.if_uses)); 1967 if (list_is_empty(&deref->dest.ssa.uses)) { 1968 nir_instr_remove(&deref->instr); 1969 return; 1970 } 1971 1972 b->cursor = nir_after_instr(&deref->instr); 1973 1974 nir_ssa_def *base_addr = NULL; 1975 if (deref->deref_type != nir_deref_type_var) { 1976 assert(deref->parent.is_ssa); 1977 base_addr = deref->parent.ssa; 1978 } 1979 1980 nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr, 1981 addr_format); 1982 assert(addr->bit_size == deref->dest.ssa.bit_size); 1983 assert(addr->num_components == deref->dest.ssa.num_components); 1984 1985 nir_instr_remove(&deref->instr); 1986 nir_ssa_def_rewrite_uses(&deref->dest.ssa, addr); 1987} 1988 1989static void 1990lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin, 1991 nir_address_format addr_format) 1992{ 1993 assert(intrin->src[0].is_ssa); 1994 nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format); 1995} 1996 1997static void 1998lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin, 1999 nir_address_format addr_format) 2000{ 2001 b->cursor = nir_after_instr(&intrin->instr); 2002 2003 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2004 2005 assert(glsl_type_is_array(deref->type)); 2006 assert(glsl_get_length(deref->type) == 0); 2007 assert(nir_deref_mode_is(deref, nir_var_mem_ssbo)); 2008 unsigned stride = glsl_get_explicit_stride(deref->type); 2009 assert(stride > 0); 2010 2011 nir_ssa_def *addr = &deref->dest.ssa; 2012 nir_ssa_def *index = addr_to_index(b, addr, addr_format); 2013 nir_ssa_def *offset = addr_to_offset(b, addr, addr_format); 2014 unsigned access = nir_intrinsic_access(intrin); 2015 2016 nir_ssa_def *arr_size = nir_get_ssbo_size(b, index, .access=access); 2017 arr_size = nir_imax(b, nir_isub(b, arr_size, offset), nir_imm_int(b, 0u)); 2018 arr_size = nir_idiv(b, arr_size, nir_imm_int(b, stride)); 2019 2020 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, arr_size); 2021 nir_instr_remove(&intrin->instr); 2022} 2023 2024static void 2025lower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin, 2026 nir_address_format addr_format) 2027{ 2028 if (addr_format_is_global(addr_format, 0)) { 2029 /* If the address format is always global, then the driver can use 2030 * global addresses regardless of the mode. In that case, don't create 2031 * a check, just whack the intrinsic to addr_mode_is and delegate to the 2032 * driver lowering. 2033 */ 2034 intrin->intrinsic = nir_intrinsic_addr_mode_is; 2035 return; 2036 } 2037 2038 assert(intrin->src[0].is_ssa); 2039 nir_ssa_def *addr = intrin->src[0].ssa; 2040 2041 b->cursor = nir_instr_remove(&intrin->instr); 2042 2043 nir_ssa_def *is_mode = 2044 build_runtime_addr_mode_check(b, addr, addr_format, 2045 nir_intrinsic_memory_modes(intrin)); 2046 2047 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, is_mode); 2048} 2049 2050static bool 2051nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes, 2052 nir_address_format addr_format) 2053{ 2054 bool progress = false; 2055 2056 nir_builder b; 2057 nir_builder_init(&b, impl); 2058 2059 /* Walk in reverse order so that we can see the full deref chain when we 2060 * lower the access operations. We lower them assuming that the derefs 2061 * will be turned into address calculations later. 2062 */ 2063 nir_foreach_block_reverse(block, impl) { 2064 nir_foreach_instr_reverse_safe(instr, block) { 2065 switch (instr->type) { 2066 case nir_instr_type_deref: { 2067 nir_deref_instr *deref = nir_instr_as_deref(instr); 2068 if (nir_deref_mode_is_in_set(deref, modes)) { 2069 lower_explicit_io_deref(&b, deref, addr_format); 2070 progress = true; 2071 } 2072 break; 2073 } 2074 2075 case nir_instr_type_intrinsic: { 2076 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 2077 switch (intrin->intrinsic) { 2078 case nir_intrinsic_load_deref: 2079 case nir_intrinsic_store_deref: 2080 case nir_intrinsic_load_deref_block_intel: 2081 case nir_intrinsic_store_deref_block_intel: 2082 case nir_intrinsic_deref_atomic_add: 2083 case nir_intrinsic_deref_atomic_imin: 2084 case nir_intrinsic_deref_atomic_umin: 2085 case nir_intrinsic_deref_atomic_imax: 2086 case nir_intrinsic_deref_atomic_umax: 2087 case nir_intrinsic_deref_atomic_and: 2088 case nir_intrinsic_deref_atomic_or: 2089 case nir_intrinsic_deref_atomic_xor: 2090 case nir_intrinsic_deref_atomic_exchange: 2091 case nir_intrinsic_deref_atomic_comp_swap: 2092 case nir_intrinsic_deref_atomic_fadd: 2093 case nir_intrinsic_deref_atomic_fmin: 2094 case nir_intrinsic_deref_atomic_fmax: 2095 case nir_intrinsic_deref_atomic_fcomp_swap: { 2096 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2097 if (nir_deref_mode_is_in_set(deref, modes)) { 2098 lower_explicit_io_access(&b, intrin, addr_format); 2099 progress = true; 2100 } 2101 break; 2102 } 2103 2104 case nir_intrinsic_deref_buffer_array_length: { 2105 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2106 if (nir_deref_mode_is_in_set(deref, modes)) { 2107 lower_explicit_io_array_length(&b, intrin, addr_format); 2108 progress = true; 2109 } 2110 break; 2111 } 2112 2113 case nir_intrinsic_deref_mode_is: { 2114 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2115 if (nir_deref_mode_is_in_set(deref, modes)) { 2116 lower_explicit_io_mode_check(&b, intrin, addr_format); 2117 progress = true; 2118 } 2119 break; 2120 } 2121 2122 default: 2123 break; 2124 } 2125 break; 2126 } 2127 2128 default: 2129 /* Nothing to do */ 2130 break; 2131 } 2132 } 2133 } 2134 2135 if (progress) { 2136 nir_metadata_preserve(impl, nir_metadata_block_index | 2137 nir_metadata_dominance); 2138 } else { 2139 nir_metadata_preserve(impl, nir_metadata_all); 2140 } 2141 2142 return progress; 2143} 2144 2145/** Lower explicitly laid out I/O access to byte offset/address intrinsics 2146 * 2147 * This pass is intended to be used for any I/O which touches memory external 2148 * to the shader or which is directly visible to the client. It requires that 2149 * all data types in the given modes have a explicit stride/offset decorations 2150 * to tell it exactly how to calculate the offset/address for the given load, 2151 * store, or atomic operation. If the offset/stride information does not come 2152 * from the client explicitly (as with shared variables in GL or Vulkan), 2153 * nir_lower_vars_to_explicit_types() can be used to add them. 2154 * 2155 * Unlike nir_lower_io, this pass is fully capable of handling incomplete 2156 * pointer chains which may contain cast derefs. It does so by walking the 2157 * deref chain backwards and simply replacing each deref, one at a time, with 2158 * the appropriate address calculation. The pass takes a nir_address_format 2159 * parameter which describes how the offset or address is to be represented 2160 * during calculations. By ensuring that the address is always in a 2161 * consistent format, pointers can safely be conjured from thin air by the 2162 * driver, stored to variables, passed through phis, etc. 2163 * 2164 * The one exception to the simple algorithm described above is for handling 2165 * row-major matrices in which case we may look down one additional level of 2166 * the deref chain. 2167 * 2168 * This pass is also capable of handling OpenCL generic pointers. If the 2169 * address mode is global, it will lower any ambiguous (more than one mode) 2170 * access to global and pass through the deref_mode_is run-time checks as 2171 * addr_mode_is. This assumes the driver has somehow mapped shared and 2172 * scratch memory to the global address space. For other modes such as 2173 * 62bit_generic, there is an enum embedded in the address and we lower 2174 * ambiguous access to an if-ladder and deref_mode_is to a check against the 2175 * embedded enum. If nir_lower_explicit_io is called on any shader that 2176 * contains generic pointers, it must either be used on all of the generic 2177 * modes or none. 2178 */ 2179bool 2180nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes, 2181 nir_address_format addr_format) 2182{ 2183 bool progress = false; 2184 2185 nir_foreach_function(function, shader) { 2186 if (function->impl && 2187 nir_lower_explicit_io_impl(function->impl, modes, addr_format)) 2188 progress = true; 2189 } 2190 2191 return progress; 2192} 2193 2194static bool 2195nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl, 2196 nir_variable_mode modes, 2197 glsl_type_size_align_func type_info) 2198{ 2199 bool progress = false; 2200 2201 nir_foreach_block(block, impl) { 2202 nir_foreach_instr(instr, block) { 2203 if (instr->type != nir_instr_type_deref) 2204 continue; 2205 2206 nir_deref_instr *deref = nir_instr_as_deref(instr); 2207 if (!nir_deref_mode_is_in_set(deref, modes)) 2208 continue; 2209 2210 unsigned size, alignment; 2211 const struct glsl_type *new_type = 2212 glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment); 2213 if (new_type != deref->type) { 2214 progress = true; 2215 deref->type = new_type; 2216 } 2217 if (deref->deref_type == nir_deref_type_cast) { 2218 /* See also glsl_type::get_explicit_type_for_size_align() */ 2219 unsigned new_stride = align(size, alignment); 2220 if (new_stride != deref->cast.ptr_stride) { 2221 deref->cast.ptr_stride = new_stride; 2222 progress = true; 2223 } 2224 } 2225 } 2226 } 2227 2228 if (progress) { 2229 nir_metadata_preserve(impl, nir_metadata_block_index | 2230 nir_metadata_dominance | 2231 nir_metadata_live_ssa_defs | 2232 nir_metadata_loop_analysis); 2233 } else { 2234 nir_metadata_preserve(impl, nir_metadata_all); 2235 } 2236 2237 return progress; 2238} 2239 2240static bool 2241lower_vars_to_explicit(nir_shader *shader, 2242 struct exec_list *vars, nir_variable_mode mode, 2243 glsl_type_size_align_func type_info) 2244{ 2245 bool progress = false; 2246 unsigned offset; 2247 switch (mode) { 2248 case nir_var_uniform: 2249 assert(shader->info.stage == MESA_SHADER_KERNEL); 2250 offset = 0; 2251 break; 2252 case nir_var_function_temp: 2253 case nir_var_shader_temp: 2254 offset = shader->scratch_size; 2255 break; 2256 case nir_var_mem_shared: 2257 offset = shader->info.shared_size; 2258 break; 2259 case nir_var_mem_constant: 2260 offset = shader->constant_data_size; 2261 break; 2262 case nir_var_shader_call_data: 2263 case nir_var_ray_hit_attrib: 2264 offset = 0; 2265 break; 2266 default: 2267 unreachable("Unsupported mode"); 2268 } 2269 nir_foreach_variable_in_list(var, vars) { 2270 if (var->data.mode != mode) 2271 continue; 2272 2273 unsigned size, align; 2274 const struct glsl_type *explicit_type = 2275 glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align); 2276 2277 if (explicit_type != var->type) 2278 var->type = explicit_type; 2279 2280 UNUSED bool is_empty_struct = 2281 glsl_type_is_struct_or_ifc(explicit_type) && 2282 glsl_get_length(explicit_type) == 0; 2283 2284 assert(util_is_power_of_two_nonzero(align) || is_empty_struct); 2285 var->data.driver_location = ALIGN_POT(offset, align); 2286 offset = var->data.driver_location + size; 2287 progress = true; 2288 } 2289 2290 switch (mode) { 2291 case nir_var_uniform: 2292 assert(shader->info.stage == MESA_SHADER_KERNEL); 2293 shader->num_uniforms = offset; 2294 break; 2295 case nir_var_shader_temp: 2296 case nir_var_function_temp: 2297 shader->scratch_size = offset; 2298 break; 2299 case nir_var_mem_shared: 2300 shader->info.shared_size = offset; 2301 break; 2302 case nir_var_mem_constant: 2303 shader->constant_data_size = offset; 2304 break; 2305 case nir_var_shader_call_data: 2306 case nir_var_ray_hit_attrib: 2307 break; 2308 default: 2309 unreachable("Unsupported mode"); 2310 } 2311 2312 return progress; 2313} 2314 2315/* If nir_lower_vars_to_explicit_types is called on any shader that contains 2316 * generic pointers, it must either be used on all of the generic modes or 2317 * none. 2318 */ 2319bool 2320nir_lower_vars_to_explicit_types(nir_shader *shader, 2321 nir_variable_mode modes, 2322 glsl_type_size_align_func type_info) 2323{ 2324 /* TODO: Situations which need to be handled to support more modes: 2325 * - row-major matrices 2326 * - compact shader inputs/outputs 2327 * - interface types 2328 */ 2329 ASSERTED nir_variable_mode supported = 2330 nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant | 2331 nir_var_shader_temp | nir_var_function_temp | nir_var_uniform | 2332 nir_var_shader_call_data | nir_var_ray_hit_attrib; 2333 assert(!(modes & ~supported) && "unsupported"); 2334 2335 bool progress = false; 2336 2337 if (modes & nir_var_uniform) 2338 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info); 2339 2340 if (modes & nir_var_mem_shared) { 2341 assert(!shader->info.shared_memory_explicit_layout); 2342 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info); 2343 } 2344 2345 if (modes & nir_var_shader_temp) 2346 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info); 2347 if (modes & nir_var_mem_constant) 2348 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info); 2349 if (modes & nir_var_shader_call_data) 2350 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info); 2351 if (modes & nir_var_ray_hit_attrib) 2352 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info); 2353 2354 nir_foreach_function(function, shader) { 2355 if (function->impl) { 2356 if (modes & nir_var_function_temp) 2357 progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info); 2358 2359 progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info); 2360 } 2361 } 2362 2363 return progress; 2364} 2365 2366static void 2367write_constant(void *dst, size_t dst_size, 2368 const nir_constant *c, const struct glsl_type *type) 2369{ 2370 if (glsl_type_is_vector_or_scalar(type)) { 2371 const unsigned num_components = glsl_get_vector_elements(type); 2372 const unsigned bit_size = glsl_get_bit_size(type); 2373 if (bit_size == 1) { 2374 /* Booleans are special-cased to be 32-bit 2375 * 2376 * TODO: Make the native bool bit_size an option. 2377 */ 2378 assert(num_components * 4 <= dst_size); 2379 for (unsigned i = 0; i < num_components; i++) { 2380 int32_t b32 = -(int)c->values[i].b; 2381 memcpy((char *)dst + i * 4, &b32, 4); 2382 } 2383 } else { 2384 assert(bit_size >= 8 && bit_size % 8 == 0); 2385 const unsigned byte_size = bit_size / 8; 2386 assert(num_components * byte_size <= dst_size); 2387 for (unsigned i = 0; i < num_components; i++) { 2388 /* Annoyingly, thanks to packed structs, we can't make any 2389 * assumptions about the alignment of dst. To avoid any strange 2390 * issues with unaligned writes, we always use memcpy. 2391 */ 2392 memcpy((char *)dst + i * byte_size, &c->values[i], byte_size); 2393 } 2394 } 2395 } else if (glsl_type_is_array_or_matrix(type)) { 2396 const unsigned array_len = glsl_get_length(type); 2397 const unsigned stride = glsl_get_explicit_stride(type); 2398 assert(stride > 0); 2399 const struct glsl_type *elem_type = glsl_get_array_element(type); 2400 for (unsigned i = 0; i < array_len; i++) { 2401 unsigned elem_offset = i * stride; 2402 assert(elem_offset < dst_size); 2403 write_constant((char *)dst + elem_offset, dst_size - elem_offset, 2404 c->elements[i], elem_type); 2405 } 2406 } else { 2407 assert(glsl_type_is_struct_or_ifc(type)); 2408 const unsigned num_fields = glsl_get_length(type); 2409 for (unsigned i = 0; i < num_fields; i++) { 2410 const int field_offset = glsl_get_struct_field_offset(type, i); 2411 assert(field_offset >= 0 && field_offset < dst_size); 2412 const struct glsl_type *field_type = glsl_get_struct_field(type, i); 2413 write_constant((char *)dst + field_offset, dst_size - field_offset, 2414 c->elements[i], field_type); 2415 } 2416 } 2417} 2418 2419void 2420nir_gather_explicit_io_initializers(nir_shader *shader, 2421 void *dst, size_t dst_size, 2422 nir_variable_mode mode) 2423{ 2424 /* It doesn't really make sense to gather initializers for more than one 2425 * mode at a time. If this ever becomes well-defined, we can drop the 2426 * assert then. 2427 */ 2428 assert(util_bitcount(mode) == 1); 2429 2430 nir_foreach_variable_with_modes(var, shader, mode) { 2431 assert(var->data.driver_location < dst_size); 2432 write_constant((char *)dst + var->data.driver_location, 2433 dst_size - var->data.driver_location, 2434 var->constant_initializer, var->type); 2435 } 2436} 2437 2438/** 2439 * Return the offset source for a load/store intrinsic. 2440 */ 2441nir_src * 2442nir_get_io_offset_src(nir_intrinsic_instr *instr) 2443{ 2444 switch (instr->intrinsic) { 2445 case nir_intrinsic_load_input: 2446 case nir_intrinsic_load_output: 2447 case nir_intrinsic_load_shared: 2448 case nir_intrinsic_load_uniform: 2449 case nir_intrinsic_load_kernel_input: 2450 case nir_intrinsic_load_global: 2451 case nir_intrinsic_load_global_constant: 2452 case nir_intrinsic_load_scratch: 2453 case nir_intrinsic_load_fs_input_interp_deltas: 2454 case nir_intrinsic_shared_atomic_add: 2455 case nir_intrinsic_shared_atomic_and: 2456 case nir_intrinsic_shared_atomic_comp_swap: 2457 case nir_intrinsic_shared_atomic_exchange: 2458 case nir_intrinsic_shared_atomic_fadd: 2459 case nir_intrinsic_shared_atomic_fcomp_swap: 2460 case nir_intrinsic_shared_atomic_fmax: 2461 case nir_intrinsic_shared_atomic_fmin: 2462 case nir_intrinsic_shared_atomic_imax: 2463 case nir_intrinsic_shared_atomic_imin: 2464 case nir_intrinsic_shared_atomic_or: 2465 case nir_intrinsic_shared_atomic_umax: 2466 case nir_intrinsic_shared_atomic_umin: 2467 case nir_intrinsic_shared_atomic_xor: 2468 case nir_intrinsic_global_atomic_add: 2469 case nir_intrinsic_global_atomic_and: 2470 case nir_intrinsic_global_atomic_comp_swap: 2471 case nir_intrinsic_global_atomic_exchange: 2472 case nir_intrinsic_global_atomic_fadd: 2473 case nir_intrinsic_global_atomic_fcomp_swap: 2474 case nir_intrinsic_global_atomic_fmax: 2475 case nir_intrinsic_global_atomic_fmin: 2476 case nir_intrinsic_global_atomic_imax: 2477 case nir_intrinsic_global_atomic_imin: 2478 case nir_intrinsic_global_atomic_or: 2479 case nir_intrinsic_global_atomic_umax: 2480 case nir_intrinsic_global_atomic_umin: 2481 case nir_intrinsic_global_atomic_xor: 2482 return &instr->src[0]; 2483 case nir_intrinsic_load_ubo: 2484 case nir_intrinsic_load_ssbo: 2485 case nir_intrinsic_load_input_vertex: 2486 case nir_intrinsic_load_per_vertex_input: 2487 case nir_intrinsic_load_per_vertex_output: 2488 case nir_intrinsic_load_per_primitive_output: 2489 case nir_intrinsic_load_interpolated_input: 2490 case nir_intrinsic_store_output: 2491 case nir_intrinsic_store_shared: 2492 case nir_intrinsic_store_global: 2493 case nir_intrinsic_store_scratch: 2494 case nir_intrinsic_ssbo_atomic_add: 2495 case nir_intrinsic_ssbo_atomic_imin: 2496 case nir_intrinsic_ssbo_atomic_umin: 2497 case nir_intrinsic_ssbo_atomic_imax: 2498 case nir_intrinsic_ssbo_atomic_umax: 2499 case nir_intrinsic_ssbo_atomic_and: 2500 case nir_intrinsic_ssbo_atomic_or: 2501 case nir_intrinsic_ssbo_atomic_xor: 2502 case nir_intrinsic_ssbo_atomic_exchange: 2503 case nir_intrinsic_ssbo_atomic_comp_swap: 2504 case nir_intrinsic_ssbo_atomic_fadd: 2505 case nir_intrinsic_ssbo_atomic_fmin: 2506 case nir_intrinsic_ssbo_atomic_fmax: 2507 case nir_intrinsic_ssbo_atomic_fcomp_swap: 2508 return &instr->src[1]; 2509 case nir_intrinsic_store_ssbo: 2510 case nir_intrinsic_store_per_vertex_output: 2511 case nir_intrinsic_store_per_primitive_output: 2512 return &instr->src[2]; 2513 default: 2514 return NULL; 2515 } 2516} 2517 2518/** 2519 * Return the vertex index source for a load/store per_vertex intrinsic. 2520 */ 2521nir_src * 2522nir_get_io_vertex_index_src(nir_intrinsic_instr *instr) 2523{ 2524 switch (instr->intrinsic) { 2525 case nir_intrinsic_load_per_vertex_input: 2526 case nir_intrinsic_load_per_vertex_output: 2527 return &instr->src[0]; 2528 case nir_intrinsic_store_per_vertex_output: 2529 return &instr->src[1]; 2530 default: 2531 return NULL; 2532 } 2533} 2534 2535/** 2536 * Return the numeric constant that identify a NULL pointer for each address 2537 * format. 2538 */ 2539const nir_const_value * 2540nir_address_format_null_value(nir_address_format addr_format) 2541{ 2542 const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = { 2543 [nir_address_format_32bit_global] = {{0}}, 2544 [nir_address_format_64bit_global] = {{0}}, 2545 [nir_address_format_64bit_global_32bit_offset] = {{0}}, 2546 [nir_address_format_64bit_bounded_global] = {{0}}, 2547 [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}}, 2548 [nir_address_format_32bit_index_offset_pack64] = {{.u64 = ~0ull}}, 2549 [nir_address_format_vec2_index_32bit_offset] = {{.u32 = ~0}, {.u32 = ~0}, {.u32 = ~0}}, 2550 [nir_address_format_32bit_offset] = {{.u32 = ~0}}, 2551 [nir_address_format_32bit_offset_as_64bit] = {{.u64 = ~0ull}}, 2552 [nir_address_format_62bit_generic] = {{.u64 = 0}}, 2553 [nir_address_format_logical] = {{.u32 = ~0}}, 2554 }; 2555 2556 assert(addr_format < ARRAY_SIZE(null_values)); 2557 return null_values[addr_format]; 2558} 2559 2560nir_ssa_def * 2561nir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 2562 nir_address_format addr_format) 2563{ 2564 switch (addr_format) { 2565 case nir_address_format_32bit_global: 2566 case nir_address_format_64bit_global: 2567 case nir_address_format_64bit_bounded_global: 2568 case nir_address_format_32bit_index_offset: 2569 case nir_address_format_vec2_index_32bit_offset: 2570 case nir_address_format_32bit_offset: 2571 case nir_address_format_62bit_generic: 2572 return nir_ball_iequal(b, addr0, addr1); 2573 2574 case nir_address_format_64bit_global_32bit_offset: 2575 return nir_ball_iequal(b, nir_channels(b, addr0, 0xb), 2576 nir_channels(b, addr1, 0xb)); 2577 2578 case nir_address_format_32bit_offset_as_64bit: 2579 assert(addr0->num_components == 1 && addr1->num_components == 1); 2580 return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)); 2581 2582 case nir_address_format_32bit_index_offset_pack64: 2583 assert(addr0->num_components == 1 && addr1->num_components == 1); 2584 return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1)); 2585 2586 case nir_address_format_logical: 2587 unreachable("Unsupported address format"); 2588 } 2589 2590 unreachable("Invalid address format"); 2591} 2592 2593nir_ssa_def * 2594nir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 2595 nir_address_format addr_format) 2596{ 2597 switch (addr_format) { 2598 case nir_address_format_32bit_global: 2599 case nir_address_format_64bit_global: 2600 case nir_address_format_32bit_offset: 2601 case nir_address_format_32bit_index_offset_pack64: 2602 case nir_address_format_62bit_generic: 2603 assert(addr0->num_components == 1); 2604 assert(addr1->num_components == 1); 2605 return nir_isub(b, addr0, addr1); 2606 2607 case nir_address_format_32bit_offset_as_64bit: 2608 assert(addr0->num_components == 1); 2609 assert(addr1->num_components == 1); 2610 return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1))); 2611 2612 case nir_address_format_64bit_global_32bit_offset: 2613 case nir_address_format_64bit_bounded_global: 2614 return nir_isub(b, addr_to_global(b, addr0, addr_format), 2615 addr_to_global(b, addr1, addr_format)); 2616 2617 case nir_address_format_32bit_index_offset: 2618 assert(addr0->num_components == 2); 2619 assert(addr1->num_components == 2); 2620 /* Assume the same buffer index. */ 2621 return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1)); 2622 2623 case nir_address_format_vec2_index_32bit_offset: 2624 assert(addr0->num_components == 3); 2625 assert(addr1->num_components == 3); 2626 /* Assume the same buffer index. */ 2627 return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2)); 2628 2629 case nir_address_format_logical: 2630 unreachable("Unsupported address format"); 2631 } 2632 2633 unreachable("Invalid address format"); 2634} 2635 2636static bool 2637is_input(nir_intrinsic_instr *intrin) 2638{ 2639 return intrin->intrinsic == nir_intrinsic_load_input || 2640 intrin->intrinsic == nir_intrinsic_load_per_vertex_input || 2641 intrin->intrinsic == nir_intrinsic_load_interpolated_input || 2642 intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas; 2643} 2644 2645static bool 2646is_output(nir_intrinsic_instr *intrin) 2647{ 2648 return intrin->intrinsic == nir_intrinsic_load_output || 2649 intrin->intrinsic == nir_intrinsic_load_per_vertex_output || 2650 intrin->intrinsic == nir_intrinsic_load_per_primitive_output || 2651 intrin->intrinsic == nir_intrinsic_store_output || 2652 intrin->intrinsic == nir_intrinsic_store_per_vertex_output || 2653 intrin->intrinsic == nir_intrinsic_store_per_primitive_output; 2654} 2655 2656static bool is_dual_slot(nir_intrinsic_instr *intrin) 2657{ 2658 if (intrin->intrinsic == nir_intrinsic_store_output || 2659 intrin->intrinsic == nir_intrinsic_store_per_vertex_output) { 2660 return nir_src_bit_size(intrin->src[0]) == 64 && 2661 nir_src_num_components(intrin->src[0]) >= 3; 2662 } 2663 2664 return nir_dest_bit_size(intrin->dest) == 64 && 2665 nir_dest_num_components(intrin->dest) >= 3; 2666} 2667 2668/** 2669 * This pass adds constant offsets to instr->const_index[0] for input/output 2670 * intrinsics, and resets the offset source to 0. Non-constant offsets remain 2671 * unchanged - since we don't know what part of a compound variable is 2672 * accessed, we allocate storage for the entire thing. For drivers that use 2673 * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that 2674 * the offset source will be 0, so that they don't have to add it in manually. 2675 */ 2676 2677static bool 2678add_const_offset_to_base_block(nir_block *block, nir_builder *b, 2679 nir_variable_mode modes) 2680{ 2681 bool progress = false; 2682 nir_foreach_instr_safe(instr, block) { 2683 if (instr->type != nir_instr_type_intrinsic) 2684 continue; 2685 2686 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 2687 2688 if (((modes & nir_var_shader_in) && is_input(intrin)) || 2689 ((modes & nir_var_shader_out) && is_output(intrin))) { 2690 nir_src *offset = nir_get_io_offset_src(intrin); 2691 2692 /* TODO: Better handling of per-view variables here */ 2693 if (nir_src_is_const(*offset) && 2694 !nir_intrinsic_io_semantics(intrin).per_view) { 2695 unsigned off = nir_src_as_uint(*offset); 2696 2697 nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off); 2698 2699 nir_io_semantics sem = nir_intrinsic_io_semantics(intrin); 2700 sem.location += off; 2701 /* non-indirect indexing should reduce num_slots */ 2702 sem.num_slots = is_dual_slot(intrin) ? 2 : 1; 2703 nir_intrinsic_set_io_semantics(intrin, sem); 2704 2705 b->cursor = nir_before_instr(&intrin->instr); 2706 nir_instr_rewrite_src(&intrin->instr, offset, 2707 nir_src_for_ssa(nir_imm_int(b, 0))); 2708 progress = true; 2709 } 2710 } 2711 } 2712 2713 return progress; 2714} 2715 2716bool 2717nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes) 2718{ 2719 bool progress = false; 2720 2721 nir_foreach_function(f, nir) { 2722 if (f->impl) { 2723 nir_builder b; 2724 nir_builder_init(&b, f->impl); 2725 nir_foreach_block(block, f->impl) { 2726 progress |= add_const_offset_to_base_block(block, &b, modes); 2727 } 2728 } 2729 } 2730 2731 return progress; 2732} 2733 2734