1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "anv_nir.h" 25#include "program/prog_parameter.h" 26#include "nir/nir_builder.h" 27#include "compiler/brw_nir.h" 28#include "util/mesa-sha1.h" 29#include "util/set.h" 30 31/* Sampler tables don't actually have a maximum size but we pick one just so 32 * that we don't end up emitting too much state on-the-fly. 33 */ 34#define MAX_SAMPLER_TABLE_SIZE 128 35#define BINDLESS_OFFSET 255 36 37struct apply_pipeline_layout_state { 38 const struct anv_physical_device *pdevice; 39 40 const struct anv_pipeline_layout *layout; 41 bool add_bounds_checks; 42 nir_address_format desc_addr_format; 43 nir_address_format ssbo_addr_format; 44 nir_address_format ubo_addr_format; 45 46 /* Place to flag lowered instructions so we don't lower them twice */ 47 struct set *lowered_instrs; 48 49 bool uses_constants; 50 bool has_dynamic_buffers; 51 uint8_t constants_offset; 52 struct { 53 bool desc_buffer_used; 54 uint8_t desc_offset; 55 56 uint8_t *use_count; 57 uint8_t *surface_offsets; 58 uint8_t *sampler_offsets; 59 } set[MAX_SETS]; 60}; 61 62static nir_address_format 63addr_format_for_desc_type(VkDescriptorType desc_type, 64 struct apply_pipeline_layout_state *state) 65{ 66 switch (desc_type) { 67 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 68 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: 69 return state->ssbo_addr_format; 70 71 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 72 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: 73 return state->ubo_addr_format; 74 75 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: 76 return state->desc_addr_format; 77 78 default: 79 unreachable("Unsupported descriptor type"); 80 } 81} 82 83static void 84add_binding(struct apply_pipeline_layout_state *state, 85 uint32_t set, uint32_t binding) 86{ 87 const struct anv_descriptor_set_binding_layout *bind_layout = 88 &state->layout->set[set].layout->binding[binding]; 89 90 if (state->set[set].use_count[binding] < UINT8_MAX) 91 state->set[set].use_count[binding]++; 92 93 /* Only flag the descriptor buffer as used if there's actually data for 94 * this binding. This lets us be lazy and call this function constantly 95 * without worrying about unnecessarily enabling the buffer. 96 */ 97 if (anv_descriptor_size(bind_layout)) 98 state->set[set].desc_buffer_used = true; 99} 100 101static void 102add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src) 103{ 104 nir_deref_instr *deref = nir_src_as_deref(src); 105 nir_variable *var = nir_deref_instr_get_variable(deref); 106 add_binding(state, var->data.descriptor_set, var->data.binding); 107} 108 109static void 110add_tex_src_binding(struct apply_pipeline_layout_state *state, 111 nir_tex_instr *tex, nir_tex_src_type deref_src_type) 112{ 113 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type); 114 if (deref_src_idx < 0) 115 return; 116 117 add_deref_src_binding(state, tex->src[deref_src_idx].src); 118} 119 120static bool 121get_used_bindings(UNUSED nir_builder *_b, nir_instr *instr, void *_state) 122{ 123 struct apply_pipeline_layout_state *state = _state; 124 125 switch (instr->type) { 126 case nir_instr_type_intrinsic: { 127 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 128 switch (intrin->intrinsic) { 129 case nir_intrinsic_vulkan_resource_index: 130 add_binding(state, nir_intrinsic_desc_set(intrin), 131 nir_intrinsic_binding(intrin)); 132 break; 133 134 case nir_intrinsic_image_deref_load: 135 case nir_intrinsic_image_deref_store: 136 case nir_intrinsic_image_deref_atomic_add: 137 case nir_intrinsic_image_deref_atomic_imin: 138 case nir_intrinsic_image_deref_atomic_umin: 139 case nir_intrinsic_image_deref_atomic_imax: 140 case nir_intrinsic_image_deref_atomic_umax: 141 case nir_intrinsic_image_deref_atomic_and: 142 case nir_intrinsic_image_deref_atomic_or: 143 case nir_intrinsic_image_deref_atomic_xor: 144 case nir_intrinsic_image_deref_atomic_exchange: 145 case nir_intrinsic_image_deref_atomic_comp_swap: 146 case nir_intrinsic_image_deref_atomic_fadd: 147 case nir_intrinsic_image_deref_size: 148 case nir_intrinsic_image_deref_samples: 149 case nir_intrinsic_image_deref_load_param_intel: 150 case nir_intrinsic_image_deref_load_raw_intel: 151 case nir_intrinsic_image_deref_store_raw_intel: 152 add_deref_src_binding(state, intrin->src[0]); 153 break; 154 155 case nir_intrinsic_load_constant: 156 state->uses_constants = true; 157 break; 158 159 default: 160 break; 161 } 162 break; 163 } 164 case nir_instr_type_tex: { 165 nir_tex_instr *tex = nir_instr_as_tex(instr); 166 add_tex_src_binding(state, tex, nir_tex_src_texture_deref); 167 add_tex_src_binding(state, tex, nir_tex_src_sampler_deref); 168 break; 169 } 170 default: 171 break; 172 } 173 174 return false; 175} 176 177static nir_intrinsic_instr * 178find_descriptor_for_index_src(nir_src src, 179 struct apply_pipeline_layout_state *state) 180{ 181 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src); 182 183 while (intrin && intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) 184 intrin = nir_src_as_intrinsic(intrin->src[0]); 185 186 if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index) 187 return NULL; 188 189 return intrin; 190} 191 192static bool 193descriptor_has_bti(nir_intrinsic_instr *intrin, 194 struct apply_pipeline_layout_state *state) 195{ 196 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index); 197 198 uint32_t set = nir_intrinsic_desc_set(intrin); 199 uint32_t binding = nir_intrinsic_binding(intrin); 200 const struct anv_descriptor_set_binding_layout *bind_layout = 201 &state->layout->set[set].layout->binding[binding]; 202 203 uint32_t surface_index; 204 if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM) 205 surface_index = state->set[set].desc_offset; 206 else 207 surface_index = state->set[set].surface_offsets[binding]; 208 209 /* Only lower to a BTI message if we have a valid binding table index. */ 210 return surface_index < MAX_BINDING_TABLE_SIZE; 211} 212 213static nir_address_format 214descriptor_address_format(nir_intrinsic_instr *intrin, 215 struct apply_pipeline_layout_state *state) 216{ 217 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index); 218 219 uint32_t set = nir_intrinsic_desc_set(intrin); 220 uint32_t binding = nir_intrinsic_binding(intrin); 221 const struct anv_descriptor_set_binding_layout *bind_layout = 222 &state->layout->set[set].layout->binding[binding]; 223 224 return addr_format_for_desc_type(bind_layout->type, state); 225} 226 227static nir_intrinsic_instr * 228nir_deref_find_descriptor(nir_deref_instr *deref, 229 struct apply_pipeline_layout_state *state) 230{ 231 while (1) { 232 /* Nothing we will use this on has a variable */ 233 assert(deref->deref_type != nir_deref_type_var); 234 235 nir_deref_instr *parent = nir_src_as_deref(deref->parent); 236 if (!parent) 237 break; 238 239 deref = parent; 240 } 241 assert(deref->deref_type == nir_deref_type_cast); 242 243 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent); 244 if (!intrin || intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor) 245 return false; 246 247 return find_descriptor_for_index_src(intrin->src[0], state); 248} 249 250static nir_ssa_def * 251build_load_descriptor_mem(nir_builder *b, 252 nir_ssa_def *desc_addr, unsigned desc_offset, 253 unsigned num_components, unsigned bit_size, 254 struct apply_pipeline_layout_state *state) 255 256{ 257 switch (state->desc_addr_format) { 258 case nir_address_format_64bit_global_32bit_offset: { 259 nir_ssa_def *base_addr = 260 nir_pack_64_2x32(b, nir_channels(b, desc_addr, 0x3)); 261 nir_ssa_def *offset32 = 262 nir_iadd_imm(b, nir_channel(b, desc_addr, 3), desc_offset); 263 264 return nir_load_global_constant_offset(b, num_components, bit_size, 265 base_addr, offset32, 266 .align_mul = 8, 267 .align_offset = desc_offset % 8); 268 } 269 270 case nir_address_format_32bit_index_offset: { 271 nir_ssa_def *surface_index = nir_channel(b, desc_addr, 0); 272 nir_ssa_def *offset32 = 273 nir_iadd_imm(b, nir_channel(b, desc_addr, 1), desc_offset); 274 275 return nir_load_ubo(b, num_components, bit_size, 276 surface_index, offset32, 277 .align_mul = 8, 278 .align_offset = desc_offset % 8, 279 .range_base = 0, 280 .range = ~0); 281 } 282 283 default: 284 unreachable("Unsupported address format"); 285 } 286} 287 288/** Build a Vulkan resource index 289 * 290 * A "resource index" is the term used by our SPIR-V parser and the relevant 291 * NIR intrinsics for a reference into a descriptor set. It acts much like a 292 * deref in NIR except that it accesses opaque descriptors instead of memory. 293 * 294 * Coming out of SPIR-V, both the resource indices (in the form of 295 * vulkan_resource_[re]index intrinsics) and the memory derefs (in the form 296 * of nir_deref_instr) use the same vector component/bit size. The meaning 297 * of those values for memory derefs (nir_deref_instr) is given by the 298 * nir_address_format associated with the descriptor type. For resource 299 * indices, it's an entirely internal to ANV encoding which describes, in some 300 * sense, the address of the descriptor. Thanks to the NIR/SPIR-V rules, it 301 * must be packed into the same size SSA values as a memory address. For this 302 * reason, the actual encoding may depend both on the address format for 303 * memory derefs and the descriptor address format. 304 * 305 * The load_vulkan_descriptor intrinsic exists to provide a transition point 306 * between these two forms of derefs: descriptor and memory. 307 */ 308static nir_ssa_def * 309build_res_index(nir_builder *b, uint32_t set, uint32_t binding, 310 nir_ssa_def *array_index, nir_address_format addr_format, 311 struct apply_pipeline_layout_state *state) 312{ 313 const struct anv_descriptor_set_binding_layout *bind_layout = 314 &state->layout->set[set].layout->binding[binding]; 315 316 uint32_t array_size = bind_layout->array_size; 317 318 switch (addr_format) { 319 case nir_address_format_64bit_global_32bit_offset: 320 case nir_address_format_64bit_bounded_global: { 321 uint32_t set_idx; 322 switch (state->desc_addr_format) { 323 case nir_address_format_64bit_global_32bit_offset: 324 set_idx = set; 325 break; 326 327 case nir_address_format_32bit_index_offset: 328 assert(state->set[set].desc_offset < MAX_BINDING_TABLE_SIZE); 329 set_idx = state->set[set].desc_offset; 330 break; 331 332 default: 333 unreachable("Unsupported address format"); 334 } 335 336 assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS); 337 uint32_t dynamic_offset_index = 0xff; /* No dynamic offset */ 338 if (bind_layout->dynamic_offset_index >= 0) { 339 dynamic_offset_index = 340 state->layout->set[set].dynamic_offset_start + 341 bind_layout->dynamic_offset_index; 342 } 343 344 const uint32_t packed = (set_idx << 16) | dynamic_offset_index; 345 346 return nir_vec4(b, nir_imm_int(b, packed), 347 nir_imm_int(b, bind_layout->descriptor_offset), 348 nir_imm_int(b, array_size - 1), 349 array_index); 350 } 351 352 case nir_address_format_32bit_index_offset: { 353 assert(state->desc_addr_format == nir_address_format_32bit_index_offset); 354 if (bind_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { 355 uint32_t surface_index = state->set[set].desc_offset; 356 return nir_imm_ivec2(b, surface_index, 357 bind_layout->descriptor_offset); 358 } else { 359 uint32_t surface_index = state->set[set].surface_offsets[binding]; 360 assert(array_size > 0 && array_size <= UINT16_MAX); 361 assert(surface_index <= UINT16_MAX); 362 uint32_t packed = ((array_size - 1) << 16) | surface_index; 363 return nir_vec2(b, array_index, nir_imm_int(b, packed)); 364 } 365 } 366 367 default: 368 unreachable("Unsupported address format"); 369 } 370} 371 372struct res_index_defs { 373 nir_ssa_def *set_idx; 374 nir_ssa_def *dyn_offset_base; 375 nir_ssa_def *desc_offset_base; 376 nir_ssa_def *array_index; 377}; 378 379static struct res_index_defs 380unpack_res_index(nir_builder *b, nir_ssa_def *index) 381{ 382 struct res_index_defs defs; 383 384 nir_ssa_def *packed = nir_channel(b, index, 0); 385 defs.set_idx = nir_extract_u16(b, packed, nir_imm_int(b, 1)); 386 defs.dyn_offset_base = nir_extract_u16(b, packed, nir_imm_int(b, 0)); 387 388 defs.desc_offset_base = nir_channel(b, index, 1); 389 defs.array_index = nir_umin(b, nir_channel(b, index, 2), 390 nir_channel(b, index, 3)); 391 392 return defs; 393} 394 395/** Adjust a Vulkan resource index 396 * 397 * This is the equivalent of nir_deref_type_ptr_as_array for resource indices. 398 * For array descriptors, it allows us to adjust the array index. Thanks to 399 * variable pointers, we cannot always fold this re-index operation into the 400 * vulkan_resource_index intrinsic and we have to do it based on nothing but 401 * the address format. 402 */ 403static nir_ssa_def * 404build_res_reindex(nir_builder *b, nir_ssa_def *orig, nir_ssa_def *delta, 405 nir_address_format addr_format) 406{ 407 switch (addr_format) { 408 case nir_address_format_64bit_global_32bit_offset: 409 case nir_address_format_64bit_bounded_global: 410 return nir_vec4(b, nir_channel(b, orig, 0), 411 nir_channel(b, orig, 1), 412 nir_channel(b, orig, 2), 413 nir_iadd(b, nir_channel(b, orig, 3), delta)); 414 415 case nir_address_format_32bit_index_offset: 416 return nir_vec2(b, nir_iadd(b, nir_channel(b, orig, 0), delta), 417 nir_channel(b, orig, 1)); 418 419 default: 420 unreachable("Unhandled address format"); 421 } 422} 423 424/** Get the address for a descriptor given its resource index 425 * 426 * Because of the re-indexing operations, we can't bounds check descriptor 427 * array access until we have the final index. That means we end up doing the 428 * bounds check here, if needed. See unpack_res_index() for more details. 429 * 430 * This function takes both a bind_layout and a desc_type which are used to 431 * determine the descriptor stride for array descriptors. The bind_layout is 432 * optional for buffer descriptor types. 433 */ 434static nir_ssa_def * 435build_desc_addr(nir_builder *b, 436 const struct anv_descriptor_set_binding_layout *bind_layout, 437 const VkDescriptorType desc_type, 438 nir_ssa_def *index, nir_address_format addr_format, 439 struct apply_pipeline_layout_state *state) 440{ 441 switch (addr_format) { 442 case nir_address_format_64bit_global_32bit_offset: 443 case nir_address_format_64bit_bounded_global: { 444 struct res_index_defs res = unpack_res_index(b, index); 445 446 nir_ssa_def *desc_offset = res.desc_offset_base; 447 if (desc_type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { 448 /* Compute the actual descriptor offset. For inline uniform blocks, 449 * the array index is ignored as they are only allowed to be a single 450 * descriptor (not an array) and there is no concept of a "stride". 451 * 452 * We use the bind_layout, if available, because it provides a more 453 * accurate descriptor size. 454 */ 455 const unsigned stride = bind_layout ? 456 anv_descriptor_size(bind_layout) : 457 anv_descriptor_type_size(state->pdevice, desc_type); 458 459 desc_offset = 460 nir_iadd(b, desc_offset, nir_imul_imm(b, res.array_index, stride)); 461 } 462 463 switch (state->desc_addr_format) { 464 case nir_address_format_64bit_global_32bit_offset: { 465 nir_ssa_def *base_addr = 466 nir_load_desc_set_address_intel(b, res.set_idx); 467 return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr), 468 nir_unpack_64_2x32_split_y(b, base_addr), 469 nir_imm_int(b, UINT32_MAX), 470 desc_offset); 471 } 472 473 case nir_address_format_32bit_index_offset: 474 return nir_vec2(b, res.set_idx, desc_offset); 475 476 default: 477 unreachable("Unhandled address format"); 478 } 479 } 480 481 case nir_address_format_32bit_index_offset: 482 assert(desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT); 483 assert(state->desc_addr_format == nir_address_format_32bit_index_offset); 484 return index; 485 486 default: 487 unreachable("Unhandled address format"); 488 } 489} 490 491/** Convert a Vulkan resource index into a buffer address 492 * 493 * In some cases, this does a memory load from the descriptor set and, in 494 * others, it simply converts from one form to another. 495 * 496 * See build_res_index for details about each resource index format. 497 */ 498static nir_ssa_def * 499build_buffer_addr_for_res_index(nir_builder *b, 500 const VkDescriptorType desc_type, 501 nir_ssa_def *res_index, 502 nir_address_format addr_format, 503 struct apply_pipeline_layout_state *state) 504{ 505 if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { 506 assert(addr_format == state->desc_addr_format); 507 return build_desc_addr(b, NULL, desc_type, res_index, addr_format, state); 508 } else if (addr_format == nir_address_format_32bit_index_offset) { 509 nir_ssa_def *array_index = nir_channel(b, res_index, 0); 510 nir_ssa_def *packed = nir_channel(b, res_index, 1); 511 nir_ssa_def *array_max = nir_extract_u16(b, packed, nir_imm_int(b, 1)); 512 nir_ssa_def *surface_index = nir_extract_u16(b, packed, nir_imm_int(b, 0)); 513 514 if (state->add_bounds_checks) 515 array_index = nir_umin(b, array_index, array_max); 516 517 return nir_vec2(b, nir_iadd(b, surface_index, array_index), 518 nir_imm_int(b, 0)); 519 } 520 521 nir_ssa_def *desc_addr = 522 build_desc_addr(b, NULL, desc_type, res_index, addr_format, state); 523 524 nir_ssa_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state); 525 526 if (state->has_dynamic_buffers) { 527 struct res_index_defs res = unpack_res_index(b, res_index); 528 529 /* This shader has dynamic offsets and we have no way of knowing 530 * (save from the dynamic offset base index) if this buffer has a 531 * dynamic offset. 532 */ 533 nir_ssa_def *dyn_offset_idx = 534 nir_iadd(b, res.dyn_offset_base, res.array_index); 535 if (state->add_bounds_checks) { 536 dyn_offset_idx = nir_umin(b, dyn_offset_idx, 537 nir_imm_int(b, MAX_DYNAMIC_BUFFERS)); 538 } 539 540 nir_ssa_def *dyn_load = 541 nir_load_push_constant(b, 1, 32, nir_imul_imm(b, dyn_offset_idx, 4), 542 .base = offsetof(struct anv_push_constants, dynamic_offsets), 543 .range = MAX_DYNAMIC_BUFFERS * 4); 544 545 nir_ssa_def *dynamic_offset = 546 nir_bcsel(b, nir_ieq_imm(b, res.dyn_offset_base, 0xff), 547 nir_imm_int(b, 0), dyn_load); 548 549 /* The dynamic offset gets added to the base pointer so that we 550 * have a sliding window range. 551 */ 552 nir_ssa_def *base_ptr = 553 nir_pack_64_2x32(b, nir_channels(b, desc, 0x3)); 554 base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset)); 555 desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr), 556 nir_unpack_64_2x32_split_y(b, base_ptr), 557 nir_channel(b, desc, 2), 558 nir_channel(b, desc, 3)); 559 } 560 561 /* The last element of the vec4 is always zero. 562 * 563 * See also struct anv_address_range_descriptor 564 */ 565 return nir_vec4(b, nir_channel(b, desc, 0), 566 nir_channel(b, desc, 1), 567 nir_channel(b, desc, 2), 568 nir_imm_int(b, 0)); 569} 570 571/** Loads descriptor memory for a variable-based deref chain 572 * 573 * The deref chain has to terminate at a variable with a descriptor_set and 574 * binding set. This is used for images, textures, and samplers. 575 */ 576static nir_ssa_def * 577build_load_var_deref_descriptor_mem(nir_builder *b, nir_deref_instr *deref, 578 unsigned desc_offset, 579 unsigned num_components, unsigned bit_size, 580 struct apply_pipeline_layout_state *state) 581{ 582 nir_variable *var = nir_deref_instr_get_variable(deref); 583 584 const uint32_t set = var->data.descriptor_set; 585 const uint32_t binding = var->data.binding; 586 const struct anv_descriptor_set_binding_layout *bind_layout = 587 &state->layout->set[set].layout->binding[binding]; 588 589 nir_ssa_def *array_index; 590 if (deref->deref_type != nir_deref_type_var) { 591 assert(deref->deref_type == nir_deref_type_array); 592 assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var); 593 assert(deref->arr.index.is_ssa); 594 array_index = deref->arr.index.ssa; 595 } else { 596 array_index = nir_imm_int(b, 0); 597 } 598 599 /* It doesn't really matter what address format we choose as everything 600 * will constant-fold nicely. Choose one that uses the actual descriptor 601 * buffer so we don't run into issues index/offset assumptions. 602 */ 603 const nir_address_format addr_format = 604 nir_address_format_64bit_bounded_global; 605 606 nir_ssa_def *res_index = 607 build_res_index(b, set, binding, array_index, addr_format, state); 608 609 nir_ssa_def *desc_addr = 610 build_desc_addr(b, bind_layout, bind_layout->type, 611 res_index, addr_format, state); 612 613 return build_load_descriptor_mem(b, desc_addr, desc_offset, 614 num_components, bit_size, state); 615} 616 617/** A recursive form of build_res_index() 618 * 619 * This recursively walks a resource [re]index chain and builds the resource 620 * index. It places the new code with the resource [re]index operation in the 621 * hopes of better CSE. This means the cursor is not where you left it when 622 * this function returns. 623 */ 624static nir_ssa_def * 625build_res_index_for_chain(nir_builder *b, nir_intrinsic_instr *intrin, 626 nir_address_format addr_format, 627 uint32_t *set, uint32_t *binding, 628 struct apply_pipeline_layout_state *state) 629{ 630 if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) { 631 b->cursor = nir_before_instr(&intrin->instr); 632 assert(intrin->src[0].is_ssa); 633 *set = nir_intrinsic_desc_set(intrin); 634 *binding = nir_intrinsic_binding(intrin); 635 return build_res_index(b, *set, *binding, intrin->src[0].ssa, 636 addr_format, state); 637 } else { 638 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex); 639 nir_intrinsic_instr *parent = nir_src_as_intrinsic(intrin->src[0]); 640 nir_ssa_def *index = 641 build_res_index_for_chain(b, parent, addr_format, 642 set, binding, state); 643 644 b->cursor = nir_before_instr(&intrin->instr); 645 646 assert(intrin->src[1].is_ssa); 647 return build_res_reindex(b, index, intrin->src[1].ssa, addr_format); 648 } 649} 650 651/** Builds a buffer address for a given vulkan [re]index intrinsic 652 * 653 * The cursor is not where you left it when this function returns. 654 */ 655static nir_ssa_def * 656build_buffer_addr_for_idx_intrin(nir_builder *b, 657 nir_intrinsic_instr *idx_intrin, 658 nir_address_format addr_format, 659 struct apply_pipeline_layout_state *state) 660{ 661 uint32_t set = UINT32_MAX, binding = UINT32_MAX; 662 nir_ssa_def *res_index = 663 build_res_index_for_chain(b, idx_intrin, addr_format, 664 &set, &binding, state); 665 666 const struct anv_descriptor_set_binding_layout *bind_layout = 667 &state->layout->set[set].layout->binding[binding]; 668 669 return build_buffer_addr_for_res_index(b, bind_layout->type, 670 res_index, addr_format, state); 671} 672 673/** Builds a buffer address for deref chain 674 * 675 * This assumes that you can chase the chain all the way back to the original 676 * vulkan_resource_index intrinsic. 677 * 678 * The cursor is not where you left it when this function returns. 679 */ 680static nir_ssa_def * 681build_buffer_addr_for_deref(nir_builder *b, nir_deref_instr *deref, 682 nir_address_format addr_format, 683 struct apply_pipeline_layout_state *state) 684{ 685 nir_deref_instr *parent = nir_deref_instr_parent(deref); 686 if (parent) { 687 nir_ssa_def *addr = 688 build_buffer_addr_for_deref(b, parent, addr_format, state); 689 690 b->cursor = nir_before_instr(&deref->instr); 691 return nir_explicit_io_address_from_deref(b, deref, addr, addr_format); 692 } 693 694 nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent); 695 assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor); 696 697 nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]); 698 699 b->cursor = nir_before_instr(&deref->instr); 700 701 return build_buffer_addr_for_idx_intrin(b, idx_intrin, addr_format, state); 702} 703 704static bool 705try_lower_direct_buffer_intrinsic(nir_builder *b, 706 nir_intrinsic_instr *intrin, bool is_atomic, 707 struct apply_pipeline_layout_state *state) 708{ 709 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 710 if (!nir_deref_mode_is_one_of(deref, nir_var_mem_ubo | nir_var_mem_ssbo)) 711 return false; 712 713 nir_intrinsic_instr *desc = nir_deref_find_descriptor(deref, state); 714 if (desc == NULL) { 715 /* We should always be able to find the descriptor for UBO access. */ 716 assert(nir_deref_mode_is_one_of(deref, nir_var_mem_ssbo)); 717 return false; 718 } 719 720 nir_address_format addr_format = descriptor_address_format(desc, state); 721 722 if (nir_deref_mode_is(deref, nir_var_mem_ssbo)) { 723 /* 64-bit atomics only support A64 messages so we can't lower them to 724 * the index+offset model. 725 */ 726 if (is_atomic && nir_dest_bit_size(intrin->dest) == 64 && 727 !state->pdevice->info.has_lsc) 728 return false; 729 730 /* Normal binding table-based messages can't handle non-uniform access 731 * so we have to fall back to A64. 732 */ 733 if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM) 734 return false; 735 736 if (!descriptor_has_bti(desc, state)) 737 return false; 738 739 /* Rewrite to 32bit_index_offset whenever we can */ 740 addr_format = nir_address_format_32bit_index_offset; 741 } else { 742 assert(nir_deref_mode_is(deref, nir_var_mem_ubo)); 743 744 /* Rewrite to 32bit_index_offset whenever we can */ 745 if (descriptor_has_bti(desc, state)) 746 addr_format = nir_address_format_32bit_index_offset; 747 } 748 749 nir_ssa_def *addr = 750 build_buffer_addr_for_deref(b, deref, addr_format, state); 751 752 b->cursor = nir_before_instr(&intrin->instr); 753 nir_lower_explicit_io_instr(b, intrin, addr, addr_format); 754 755 return true; 756} 757 758static bool 759lower_load_accel_struct_desc(nir_builder *b, 760 nir_intrinsic_instr *load_desc, 761 struct apply_pipeline_layout_state *state) 762{ 763 assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor); 764 765 nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]); 766 767 /* It doesn't really matter what address format we choose as 768 * everything will constant-fold nicely. Choose one that uses the 769 * actual descriptor buffer. 770 */ 771 const nir_address_format addr_format = 772 nir_address_format_64bit_bounded_global; 773 774 uint32_t set = UINT32_MAX, binding = UINT32_MAX; 775 nir_ssa_def *res_index = 776 build_res_index_for_chain(b, idx_intrin, addr_format, 777 &set, &binding, state); 778 779 const struct anv_descriptor_set_binding_layout *bind_layout = 780 &state->layout->set[set].layout->binding[binding]; 781 782 b->cursor = nir_before_instr(&load_desc->instr); 783 784 nir_ssa_def *desc_addr = 785 build_desc_addr(b, bind_layout, bind_layout->type, 786 res_index, addr_format, state); 787 788 /* Acceleration structure descriptors are always uint64_t */ 789 nir_ssa_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 1, 64, state); 790 791 assert(load_desc->dest.is_ssa); 792 assert(load_desc->dest.ssa.bit_size == 64); 793 assert(load_desc->dest.ssa.num_components == 1); 794 nir_ssa_def_rewrite_uses(&load_desc->dest.ssa, desc); 795 nir_instr_remove(&load_desc->instr); 796 797 return true; 798} 799 800static bool 801lower_direct_buffer_instr(nir_builder *b, nir_instr *instr, void *_state) 802{ 803 struct apply_pipeline_layout_state *state = _state; 804 805 if (instr->type != nir_instr_type_intrinsic) 806 return false; 807 808 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 809 switch (intrin->intrinsic) { 810 case nir_intrinsic_load_deref: 811 case nir_intrinsic_store_deref: 812 return try_lower_direct_buffer_intrinsic(b, intrin, false, state); 813 814 case nir_intrinsic_deref_atomic_add: 815 case nir_intrinsic_deref_atomic_imin: 816 case nir_intrinsic_deref_atomic_umin: 817 case nir_intrinsic_deref_atomic_imax: 818 case nir_intrinsic_deref_atomic_umax: 819 case nir_intrinsic_deref_atomic_and: 820 case nir_intrinsic_deref_atomic_or: 821 case nir_intrinsic_deref_atomic_xor: 822 case nir_intrinsic_deref_atomic_exchange: 823 case nir_intrinsic_deref_atomic_comp_swap: 824 case nir_intrinsic_deref_atomic_fadd: 825 case nir_intrinsic_deref_atomic_fmin: 826 case nir_intrinsic_deref_atomic_fmax: 827 case nir_intrinsic_deref_atomic_fcomp_swap: 828 return try_lower_direct_buffer_intrinsic(b, intrin, true, state); 829 830 case nir_intrinsic_get_ssbo_size: { 831 /* The get_ssbo_size intrinsic always just takes a 832 * index/reindex intrinsic. 833 */ 834 nir_intrinsic_instr *idx_intrin = 835 find_descriptor_for_index_src(intrin->src[0], state); 836 if (idx_intrin == NULL || !descriptor_has_bti(idx_intrin, state)) 837 return false; 838 839 b->cursor = nir_before_instr(&intrin->instr); 840 841 /* We just checked that this is a BTI descriptor */ 842 const nir_address_format addr_format = 843 nir_address_format_32bit_index_offset; 844 845 nir_ssa_def *buffer_addr = 846 build_buffer_addr_for_idx_intrin(b, idx_intrin, addr_format, state); 847 848 b->cursor = nir_before_instr(&intrin->instr); 849 nir_ssa_def *bti = nir_channel(b, buffer_addr, 0); 850 851 nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], 852 nir_src_for_ssa(bti)); 853 _mesa_set_add(state->lowered_instrs, intrin); 854 return true; 855 } 856 857 case nir_intrinsic_load_vulkan_descriptor: 858 if (nir_intrinsic_desc_type(intrin) == 859 VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) 860 return lower_load_accel_struct_desc(b, intrin, state); 861 return false; 862 863 default: 864 return false; 865 } 866} 867 868static bool 869lower_res_index_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, 870 struct apply_pipeline_layout_state *state) 871{ 872 b->cursor = nir_before_instr(&intrin->instr); 873 874 nir_address_format addr_format = 875 addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state); 876 877 assert(intrin->src[0].is_ssa); 878 nir_ssa_def *index = 879 build_res_index(b, nir_intrinsic_desc_set(intrin), 880 nir_intrinsic_binding(intrin), 881 intrin->src[0].ssa, 882 addr_format, state); 883 884 assert(intrin->dest.is_ssa); 885 assert(intrin->dest.ssa.bit_size == index->bit_size); 886 assert(intrin->dest.ssa.num_components == index->num_components); 887 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, index); 888 nir_instr_remove(&intrin->instr); 889 890 return true; 891} 892 893static bool 894lower_res_reindex_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, 895 struct apply_pipeline_layout_state *state) 896{ 897 b->cursor = nir_before_instr(&intrin->instr); 898 899 nir_address_format addr_format = 900 addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state); 901 902 assert(intrin->src[0].is_ssa && intrin->src[1].is_ssa); 903 nir_ssa_def *index = 904 build_res_reindex(b, intrin->src[0].ssa, 905 intrin->src[1].ssa, 906 addr_format); 907 908 assert(intrin->dest.is_ssa); 909 assert(intrin->dest.ssa.bit_size == index->bit_size); 910 assert(intrin->dest.ssa.num_components == index->num_components); 911 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, index); 912 nir_instr_remove(&intrin->instr); 913 914 return true; 915} 916 917static bool 918lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin, 919 struct apply_pipeline_layout_state *state) 920{ 921 b->cursor = nir_before_instr(&intrin->instr); 922 923 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); 924 nir_address_format addr_format = addr_format_for_desc_type(desc_type, state); 925 926 assert(intrin->dest.is_ssa); 927 nir_foreach_use(src, &intrin->dest.ssa) { 928 if (src->parent_instr->type != nir_instr_type_deref) 929 continue; 930 931 nir_deref_instr *cast = nir_instr_as_deref(src->parent_instr); 932 assert(cast->deref_type == nir_deref_type_cast); 933 switch (desc_type) { 934 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 935 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: 936 cast->cast.align_mul = ANV_UBO_ALIGNMENT; 937 cast->cast.align_offset = 0; 938 break; 939 940 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 941 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: 942 cast->cast.align_mul = ANV_SSBO_ALIGNMENT; 943 cast->cast.align_offset = 0; 944 break; 945 946 default: 947 break; 948 } 949 } 950 951 assert(intrin->src[0].is_ssa); 952 nir_ssa_def *desc = 953 build_buffer_addr_for_res_index(b, desc_type, intrin->src[0].ssa, 954 addr_format, state); 955 956 assert(intrin->dest.is_ssa); 957 assert(intrin->dest.ssa.bit_size == desc->bit_size); 958 assert(intrin->dest.ssa.num_components == desc->num_components); 959 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc); 960 nir_instr_remove(&intrin->instr); 961 962 return true; 963} 964 965static bool 966lower_get_ssbo_size(nir_builder *b, nir_intrinsic_instr *intrin, 967 struct apply_pipeline_layout_state *state) 968{ 969 if (_mesa_set_search(state->lowered_instrs, intrin)) 970 return false; 971 972 b->cursor = nir_before_instr(&intrin->instr); 973 974 nir_address_format addr_format = 975 addr_format_for_desc_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, state); 976 977 assert(intrin->src[0].is_ssa); 978 nir_ssa_def *desc = 979 build_buffer_addr_for_res_index(b, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 980 intrin->src[0].ssa, addr_format, state); 981 982 switch (addr_format) { 983 case nir_address_format_64bit_global_32bit_offset: 984 case nir_address_format_64bit_bounded_global: { 985 nir_ssa_def *size = nir_channel(b, desc, 2); 986 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, size); 987 nir_instr_remove(&intrin->instr); 988 break; 989 } 990 991 case nir_address_format_32bit_index_offset: 992 /* The binding table index is the first component of the address. The 993 * back-end wants a scalar binding table index source. 994 */ 995 nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], 996 nir_src_for_ssa(nir_channel(b, desc, 0))); 997 break; 998 999 default: 1000 unreachable("Unsupported address format"); 1001 } 1002 1003 return true; 1004} 1005 1006static bool 1007image_binding_needs_lowered_surface(nir_variable *var) 1008{ 1009 return !(var->data.access & ACCESS_NON_READABLE) && 1010 var->data.image.format != PIPE_FORMAT_NONE; 1011} 1012 1013static bool 1014lower_image_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, 1015 struct apply_pipeline_layout_state *state) 1016{ 1017 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1018 nir_variable *var = nir_deref_instr_get_variable(deref); 1019 1020 unsigned set = var->data.descriptor_set; 1021 unsigned binding = var->data.binding; 1022 unsigned binding_offset = state->set[set].surface_offsets[binding]; 1023 1024 b->cursor = nir_before_instr(&intrin->instr); 1025 1026 ASSERTED const bool use_bindless = state->pdevice->has_bindless_images; 1027 1028 if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) { 1029 b->cursor = nir_instr_remove(&intrin->instr); 1030 1031 assert(!use_bindless); /* Otherwise our offsets would be wrong */ 1032 const unsigned param = nir_intrinsic_base(intrin); 1033 1034 nir_ssa_def *desc = 1035 build_load_var_deref_descriptor_mem(b, deref, param * 16, 1036 intrin->dest.ssa.num_components, 1037 intrin->dest.ssa.bit_size, state); 1038 1039 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc); 1040 } else if (binding_offset > MAX_BINDING_TABLE_SIZE) { 1041 const unsigned desc_comp = 1042 image_binding_needs_lowered_surface(var) ? 1 : 0; 1043 nir_ssa_def *desc = 1044 build_load_var_deref_descriptor_mem(b, deref, 0, 2, 32, state); 1045 nir_ssa_def *handle = nir_channel(b, desc, desc_comp); 1046 nir_rewrite_image_intrinsic(intrin, handle, true); 1047 } else { 1048 unsigned array_size = 1049 state->layout->set[set].layout->binding[binding].array_size; 1050 1051 nir_ssa_def *index = NULL; 1052 if (deref->deref_type != nir_deref_type_var) { 1053 assert(deref->deref_type == nir_deref_type_array); 1054 index = nir_ssa_for_src(b, deref->arr.index, 1); 1055 if (state->add_bounds_checks) 1056 index = nir_umin(b, index, nir_imm_int(b, array_size - 1)); 1057 } else { 1058 index = nir_imm_int(b, 0); 1059 } 1060 1061 index = nir_iadd_imm(b, index, binding_offset); 1062 nir_rewrite_image_intrinsic(intrin, index, false); 1063 } 1064 1065 return true; 1066} 1067 1068static bool 1069lower_load_constant(nir_builder *b, nir_intrinsic_instr *intrin, 1070 struct apply_pipeline_layout_state *state) 1071{ 1072 b->cursor = nir_instr_remove(&intrin->instr); 1073 1074 /* Any constant-offset load_constant instructions should have been removed 1075 * by constant folding. 1076 */ 1077 assert(!nir_src_is_const(intrin->src[0])); 1078 nir_ssa_def *offset = nir_iadd_imm(b, nir_ssa_for_src(b, intrin->src[0], 1), 1079 nir_intrinsic_base(intrin)); 1080 1081 nir_ssa_def *data; 1082 if (state->pdevice->use_softpin) { 1083 unsigned load_size = intrin->dest.ssa.num_components * 1084 intrin->dest.ssa.bit_size / 8; 1085 unsigned load_align = intrin->dest.ssa.bit_size / 8; 1086 1087 assert(load_size < b->shader->constant_data_size); 1088 unsigned max_offset = b->shader->constant_data_size - load_size; 1089 offset = nir_umin(b, offset, nir_imm_int(b, max_offset)); 1090 1091 nir_ssa_def *const_data_base_addr = nir_pack_64_2x32_split(b, 1092 nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW), 1093 nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH)); 1094 1095 data = nir_load_global_constant(b, nir_iadd(b, const_data_base_addr, 1096 nir_u2u64(b, offset)), 1097 load_align, 1098 intrin->dest.ssa.num_components, 1099 intrin->dest.ssa.bit_size); 1100 } else { 1101 nir_ssa_def *index = nir_imm_int(b, state->constants_offset); 1102 1103 data = nir_load_ubo(b, intrin->num_components, intrin->dest.ssa.bit_size, 1104 index, offset, 1105 .align_mul = intrin->dest.ssa.bit_size / 8, 1106 .align_offset = 0, 1107 .range_base = nir_intrinsic_base(intrin), 1108 .range = nir_intrinsic_range(intrin)); 1109 } 1110 1111 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, data); 1112 1113 return true; 1114} 1115 1116static void 1117lower_tex_deref(nir_builder *b, nir_tex_instr *tex, 1118 nir_tex_src_type deref_src_type, 1119 unsigned *base_index, unsigned plane, 1120 struct apply_pipeline_layout_state *state) 1121{ 1122 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type); 1123 if (deref_src_idx < 0) 1124 return; 1125 1126 nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src); 1127 nir_variable *var = nir_deref_instr_get_variable(deref); 1128 1129 unsigned set = var->data.descriptor_set; 1130 unsigned binding = var->data.binding; 1131 unsigned array_size = 1132 state->layout->set[set].layout->binding[binding].array_size; 1133 1134 unsigned binding_offset; 1135 if (deref_src_type == nir_tex_src_texture_deref) { 1136 binding_offset = state->set[set].surface_offsets[binding]; 1137 } else { 1138 assert(deref_src_type == nir_tex_src_sampler_deref); 1139 binding_offset = state->set[set].sampler_offsets[binding]; 1140 } 1141 1142 nir_tex_src_type offset_src_type; 1143 nir_ssa_def *index = NULL; 1144 if (binding_offset > MAX_BINDING_TABLE_SIZE) { 1145 const unsigned plane_offset = 1146 plane * sizeof(struct anv_sampled_image_descriptor); 1147 1148 nir_ssa_def *desc = 1149 build_load_var_deref_descriptor_mem(b, deref, plane_offset, 1150 2, 32, state); 1151 1152 if (deref_src_type == nir_tex_src_texture_deref) { 1153 offset_src_type = nir_tex_src_texture_handle; 1154 index = nir_channel(b, desc, 0); 1155 } else { 1156 assert(deref_src_type == nir_tex_src_sampler_deref); 1157 offset_src_type = nir_tex_src_sampler_handle; 1158 index = nir_channel(b, desc, 1); 1159 } 1160 } else { 1161 if (deref_src_type == nir_tex_src_texture_deref) { 1162 offset_src_type = nir_tex_src_texture_offset; 1163 } else { 1164 assert(deref_src_type == nir_tex_src_sampler_deref); 1165 offset_src_type = nir_tex_src_sampler_offset; 1166 } 1167 1168 *base_index = binding_offset + plane; 1169 1170 if (deref->deref_type != nir_deref_type_var) { 1171 assert(deref->deref_type == nir_deref_type_array); 1172 1173 if (nir_src_is_const(deref->arr.index)) { 1174 unsigned arr_index = MIN2(nir_src_as_uint(deref->arr.index), array_size - 1); 1175 struct anv_sampler **immutable_samplers = 1176 state->layout->set[set].layout->binding[binding].immutable_samplers; 1177 if (immutable_samplers) { 1178 /* Array of YCbCr samplers are tightly packed in the binding 1179 * tables, compute the offset of an element in the array by 1180 * adding the number of planes of all preceding elements. 1181 */ 1182 unsigned desc_arr_index = 0; 1183 for (int i = 0; i < arr_index; i++) 1184 desc_arr_index += immutable_samplers[i]->n_planes; 1185 *base_index += desc_arr_index; 1186 } else { 1187 *base_index += arr_index; 1188 } 1189 } else { 1190 /* From VK_KHR_sampler_ycbcr_conversion: 1191 * 1192 * If sampler Y’CBCR conversion is enabled, the combined image 1193 * sampler must be indexed only by constant integral expressions 1194 * when aggregated into arrays in shader code, irrespective of 1195 * the shaderSampledImageArrayDynamicIndexing feature. 1196 */ 1197 assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1); 1198 1199 index = nir_ssa_for_src(b, deref->arr.index, 1); 1200 1201 if (state->add_bounds_checks) 1202 index = nir_umin(b, index, nir_imm_int(b, array_size - 1)); 1203 } 1204 } 1205 } 1206 1207 if (index) { 1208 nir_instr_rewrite_src(&tex->instr, &tex->src[deref_src_idx].src, 1209 nir_src_for_ssa(index)); 1210 tex->src[deref_src_idx].src_type = offset_src_type; 1211 } else { 1212 nir_tex_instr_remove_src(tex, deref_src_idx); 1213 } 1214} 1215 1216static uint32_t 1217tex_instr_get_and_remove_plane_src(nir_tex_instr *tex) 1218{ 1219 int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane); 1220 if (plane_src_idx < 0) 1221 return 0; 1222 1223 unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src); 1224 1225 nir_tex_instr_remove_src(tex, plane_src_idx); 1226 1227 return plane; 1228} 1229 1230static nir_ssa_def * 1231build_def_array_select(nir_builder *b, nir_ssa_def **srcs, nir_ssa_def *idx, 1232 unsigned start, unsigned end) 1233{ 1234 if (start == end - 1) { 1235 return srcs[start]; 1236 } else { 1237 unsigned mid = start + (end - start) / 2; 1238 return nir_bcsel(b, nir_ilt(b, idx, nir_imm_int(b, mid)), 1239 build_def_array_select(b, srcs, idx, start, mid), 1240 build_def_array_select(b, srcs, idx, mid, end)); 1241 } 1242} 1243 1244static void 1245lower_gfx7_tex_swizzle(nir_builder *b, nir_tex_instr *tex, unsigned plane, 1246 struct apply_pipeline_layout_state *state) 1247{ 1248 assert(state->pdevice->info.verx10 == 70); 1249 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF || 1250 nir_tex_instr_is_query(tex) || 1251 tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */ 1252 (tex->is_shadow && tex->is_new_style_shadow)) 1253 return; 1254 1255 int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); 1256 assert(deref_src_idx >= 0); 1257 1258 nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src); 1259 nir_variable *var = nir_deref_instr_get_variable(deref); 1260 1261 unsigned set = var->data.descriptor_set; 1262 unsigned binding = var->data.binding; 1263 const struct anv_descriptor_set_binding_layout *bind_layout = 1264 &state->layout->set[set].layout->binding[binding]; 1265 1266 if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0) 1267 return; 1268 1269 b->cursor = nir_before_instr(&tex->instr); 1270 1271 const unsigned plane_offset = 1272 plane * sizeof(struct anv_texture_swizzle_descriptor); 1273 nir_ssa_def *swiz = 1274 build_load_var_deref_descriptor_mem(b, deref, plane_offset, 1275 1, 32, state); 1276 1277 b->cursor = nir_after_instr(&tex->instr); 1278 1279 assert(tex->dest.ssa.bit_size == 32); 1280 assert(tex->dest.ssa.num_components == 4); 1281 1282 /* Initializing to undef is ok; nir_opt_undef will clean it up. */ 1283 nir_ssa_def *undef = nir_ssa_undef(b, 1, 32); 1284 nir_ssa_def *comps[8]; 1285 for (unsigned i = 0; i < ARRAY_SIZE(comps); i++) 1286 comps[i] = undef; 1287 1288 comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0); 1289 if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float) 1290 comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1); 1291 else 1292 comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1); 1293 comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->dest.ssa, 0); 1294 comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->dest.ssa, 1); 1295 comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->dest.ssa, 2); 1296 comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->dest.ssa, 3); 1297 1298 nir_ssa_def *swiz_comps[4]; 1299 for (unsigned i = 0; i < 4; i++) { 1300 nir_ssa_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i)); 1301 swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8); 1302 } 1303 nir_ssa_def *swiz_tex_res = nir_vec(b, swiz_comps, 4); 1304 1305 /* Rewrite uses before we insert so we don't rewrite this use */ 1306 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, 1307 swiz_tex_res, 1308 swiz_tex_res->parent_instr); 1309} 1310 1311static bool 1312lower_tex(nir_builder *b, nir_tex_instr *tex, 1313 struct apply_pipeline_layout_state *state) 1314{ 1315 unsigned plane = tex_instr_get_and_remove_plane_src(tex); 1316 1317 /* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this 1318 * before we lower the derefs away so we can still find the descriptor. 1319 */ 1320 if (state->pdevice->info.verx10 == 70) 1321 lower_gfx7_tex_swizzle(b, tex, plane, state); 1322 1323 b->cursor = nir_before_instr(&tex->instr); 1324 1325 lower_tex_deref(b, tex, nir_tex_src_texture_deref, 1326 &tex->texture_index, plane, state); 1327 1328 lower_tex_deref(b, tex, nir_tex_src_sampler_deref, 1329 &tex->sampler_index, plane, state); 1330 1331 return true; 1332} 1333 1334static bool 1335apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state) 1336{ 1337 struct apply_pipeline_layout_state *state = _state; 1338 1339 switch (instr->type) { 1340 case nir_instr_type_intrinsic: { 1341 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1342 switch (intrin->intrinsic) { 1343 case nir_intrinsic_vulkan_resource_index: 1344 return lower_res_index_intrinsic(b, intrin, state); 1345 case nir_intrinsic_vulkan_resource_reindex: 1346 return lower_res_reindex_intrinsic(b, intrin, state); 1347 case nir_intrinsic_load_vulkan_descriptor: 1348 return lower_load_vulkan_descriptor(b, intrin, state); 1349 case nir_intrinsic_get_ssbo_size: 1350 return lower_get_ssbo_size(b, intrin, state); 1351 case nir_intrinsic_image_deref_load: 1352 case nir_intrinsic_image_deref_store: 1353 case nir_intrinsic_image_deref_atomic_add: 1354 case nir_intrinsic_image_deref_atomic_imin: 1355 case nir_intrinsic_image_deref_atomic_umin: 1356 case nir_intrinsic_image_deref_atomic_imax: 1357 case nir_intrinsic_image_deref_atomic_umax: 1358 case nir_intrinsic_image_deref_atomic_and: 1359 case nir_intrinsic_image_deref_atomic_or: 1360 case nir_intrinsic_image_deref_atomic_xor: 1361 case nir_intrinsic_image_deref_atomic_exchange: 1362 case nir_intrinsic_image_deref_atomic_comp_swap: 1363 case nir_intrinsic_image_deref_atomic_fadd: 1364 case nir_intrinsic_image_deref_size: 1365 case nir_intrinsic_image_deref_samples: 1366 case nir_intrinsic_image_deref_load_param_intel: 1367 case nir_intrinsic_image_deref_load_raw_intel: 1368 case nir_intrinsic_image_deref_store_raw_intel: 1369 return lower_image_intrinsic(b, intrin, state); 1370 case nir_intrinsic_load_constant: 1371 return lower_load_constant(b, intrin, state); 1372 default: 1373 return false; 1374 } 1375 break; 1376 } 1377 case nir_instr_type_tex: 1378 return lower_tex(b, nir_instr_as_tex(instr), state); 1379 default: 1380 return false; 1381 } 1382} 1383 1384struct binding_info { 1385 uint32_t binding; 1386 uint8_t set; 1387 uint16_t score; 1388}; 1389 1390static int 1391compare_binding_infos(const void *_a, const void *_b) 1392{ 1393 const struct binding_info *a = _a, *b = _b; 1394 if (a->score != b->score) 1395 return b->score - a->score; 1396 1397 if (a->set != b->set) 1398 return a->set - b->set; 1399 1400 return a->binding - b->binding; 1401} 1402 1403void 1404anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, 1405 bool robust_buffer_access, 1406 const struct anv_pipeline_layout *layout, 1407 nir_shader *shader, 1408 struct anv_pipeline_bind_map *map) 1409{ 1410 void *mem_ctx = ralloc_context(NULL); 1411 1412 struct apply_pipeline_layout_state state = { 1413 .pdevice = pdevice, 1414 .layout = layout, 1415 .add_bounds_checks = robust_buffer_access, 1416 .desc_addr_format = brw_shader_stage_is_bindless(shader->info.stage) ? 1417 nir_address_format_64bit_global_32bit_offset : 1418 nir_address_format_32bit_index_offset, 1419 .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_buffer_access), 1420 .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_buffer_access), 1421 .lowered_instrs = _mesa_pointer_set_create(mem_ctx), 1422 }; 1423 1424 for (unsigned s = 0; s < layout->num_sets; s++) { 1425 const unsigned count = layout->set[s].layout->binding_count; 1426 state.set[s].use_count = rzalloc_array(mem_ctx, uint8_t, count); 1427 state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count); 1428 state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count); 1429 } 1430 1431 nir_shader_instructions_pass(shader, get_used_bindings, 1432 nir_metadata_all, &state); 1433 1434 for (unsigned s = 0; s < layout->num_sets; s++) { 1435 if (state.desc_addr_format != nir_address_format_32bit_index_offset) { 1436 state.set[s].desc_offset = BINDLESS_OFFSET; 1437 } else if (state.set[s].desc_buffer_used) { 1438 map->surface_to_descriptor[map->surface_count] = 1439 (struct anv_pipeline_binding) { 1440 .set = ANV_DESCRIPTOR_SET_DESCRIPTORS, 1441 .index = s, 1442 }; 1443 state.set[s].desc_offset = map->surface_count; 1444 map->surface_count++; 1445 } 1446 } 1447 1448 if (state.uses_constants && !pdevice->use_softpin) { 1449 state.constants_offset = map->surface_count; 1450 map->surface_to_descriptor[map->surface_count].set = 1451 ANV_DESCRIPTOR_SET_SHADER_CONSTANTS; 1452 map->surface_count++; 1453 } 1454 1455 unsigned used_binding_count = 0; 1456 for (uint32_t set = 0; set < layout->num_sets; set++) { 1457 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; 1458 for (unsigned b = 0; b < set_layout->binding_count; b++) { 1459 if (state.set[set].use_count[b] == 0) 1460 continue; 1461 1462 used_binding_count++; 1463 } 1464 } 1465 1466 struct binding_info *infos = 1467 rzalloc_array(mem_ctx, struct binding_info, used_binding_count); 1468 used_binding_count = 0; 1469 for (uint32_t set = 0; set < layout->num_sets; set++) { 1470 const struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; 1471 for (unsigned b = 0; b < set_layout->binding_count; b++) { 1472 if (state.set[set].use_count[b] == 0) 1473 continue; 1474 1475 const struct anv_descriptor_set_binding_layout *binding = 1476 &layout->set[set].layout->binding[b]; 1477 1478 /* Do a fixed-point calculation to generate a score based on the 1479 * number of uses and the binding array size. We shift by 7 instead 1480 * of 8 because we're going to use the top bit below to make 1481 * everything which does not support bindless super higher priority 1482 * than things which do. 1483 */ 1484 uint16_t score = ((uint16_t)state.set[set].use_count[b] << 7) / 1485 binding->array_size; 1486 1487 /* If the descriptor type doesn't support bindless then put it at the 1488 * beginning so we guarantee it gets a slot. 1489 */ 1490 if (!anv_descriptor_supports_bindless(pdevice, binding, true) || 1491 !anv_descriptor_supports_bindless(pdevice, binding, false)) 1492 score |= 1 << 15; 1493 1494 infos[used_binding_count++] = (struct binding_info) { 1495 .set = set, 1496 .binding = b, 1497 .score = score, 1498 }; 1499 } 1500 } 1501 1502 /* Order the binding infos based on score with highest scores first. If 1503 * scores are equal we then order by set and binding. 1504 */ 1505 qsort(infos, used_binding_count, sizeof(struct binding_info), 1506 compare_binding_infos); 1507 1508 for (unsigned i = 0; i < used_binding_count; i++) { 1509 unsigned set = infos[i].set, b = infos[i].binding; 1510 const struct anv_descriptor_set_binding_layout *binding = 1511 &layout->set[set].layout->binding[b]; 1512 1513 const uint32_t array_size = binding->array_size; 1514 1515 if (binding->dynamic_offset_index >= 0) 1516 state.has_dynamic_buffers = true; 1517 1518 if (binding->data & ANV_DESCRIPTOR_SURFACE_STATE) { 1519 if (map->surface_count + array_size > MAX_BINDING_TABLE_SIZE || 1520 anv_descriptor_requires_bindless(pdevice, binding, false) || 1521 brw_shader_stage_is_bindless(shader->info.stage)) { 1522 /* If this descriptor doesn't fit in the binding table or if it 1523 * requires bindless for some reason, flag it as bindless. 1524 */ 1525 assert(anv_descriptor_supports_bindless(pdevice, binding, false)); 1526 state.set[set].surface_offsets[b] = BINDLESS_OFFSET; 1527 } else { 1528 state.set[set].surface_offsets[b] = map->surface_count; 1529 if (binding->dynamic_offset_index < 0) { 1530 struct anv_sampler **samplers = binding->immutable_samplers; 1531 for (unsigned i = 0; i < binding->array_size; i++) { 1532 uint8_t planes = samplers ? samplers[i]->n_planes : 1; 1533 for (uint8_t p = 0; p < planes; p++) { 1534 map->surface_to_descriptor[map->surface_count++] = 1535 (struct anv_pipeline_binding) { 1536 .set = set, 1537 .index = binding->descriptor_index + i, 1538 .plane = p, 1539 }; 1540 } 1541 } 1542 } else { 1543 for (unsigned i = 0; i < binding->array_size; i++) { 1544 map->surface_to_descriptor[map->surface_count++] = 1545 (struct anv_pipeline_binding) { 1546 .set = set, 1547 .index = binding->descriptor_index + i, 1548 .dynamic_offset_index = 1549 layout->set[set].dynamic_offset_start + 1550 binding->dynamic_offset_index + i, 1551 }; 1552 } 1553 } 1554 } 1555 assert(map->surface_count <= MAX_BINDING_TABLE_SIZE); 1556 } 1557 1558 if (binding->data & ANV_DESCRIPTOR_SAMPLER_STATE) { 1559 if (map->sampler_count + array_size > MAX_SAMPLER_TABLE_SIZE || 1560 anv_descriptor_requires_bindless(pdevice, binding, true) || 1561 brw_shader_stage_is_bindless(shader->info.stage)) { 1562 /* If this descriptor doesn't fit in the binding table or if it 1563 * requires bindless for some reason, flag it as bindless. 1564 * 1565 * We also make large sampler arrays bindless because we can avoid 1566 * using indirect sends thanks to bindless samplers being packed 1567 * less tightly than the sampler table. 1568 */ 1569 assert(anv_descriptor_supports_bindless(pdevice, binding, true)); 1570 state.set[set].sampler_offsets[b] = BINDLESS_OFFSET; 1571 } else { 1572 state.set[set].sampler_offsets[b] = map->sampler_count; 1573 struct anv_sampler **samplers = binding->immutable_samplers; 1574 for (unsigned i = 0; i < binding->array_size; i++) { 1575 uint8_t planes = samplers ? samplers[i]->n_planes : 1; 1576 for (uint8_t p = 0; p < planes; p++) { 1577 map->sampler_to_descriptor[map->sampler_count++] = 1578 (struct anv_pipeline_binding) { 1579 .set = set, 1580 .index = binding->descriptor_index + i, 1581 .plane = p, 1582 }; 1583 } 1584 } 1585 } 1586 } 1587 } 1588 1589 nir_foreach_uniform_variable(var, shader) { 1590 const struct glsl_type *glsl_type = glsl_without_array(var->type); 1591 1592 if (!glsl_type_is_image(glsl_type)) 1593 continue; 1594 1595 enum glsl_sampler_dim dim = glsl_get_sampler_dim(glsl_type); 1596 1597 const uint32_t set = var->data.descriptor_set; 1598 const uint32_t binding = var->data.binding; 1599 const struct anv_descriptor_set_binding_layout *bind_layout = 1600 &layout->set[set].layout->binding[binding]; 1601 const uint32_t array_size = bind_layout->array_size; 1602 1603 if (state.set[set].use_count[binding] == 0) 1604 continue; 1605 1606 if (state.set[set].surface_offsets[binding] >= MAX_BINDING_TABLE_SIZE) 1607 continue; 1608 1609 struct anv_pipeline_binding *pipe_binding = 1610 &map->surface_to_descriptor[state.set[set].surface_offsets[binding]]; 1611 for (unsigned i = 0; i < array_size; i++) { 1612 assert(pipe_binding[i].set == set); 1613 assert(pipe_binding[i].index == bind_layout->descriptor_index + i); 1614 1615 if (dim == GLSL_SAMPLER_DIM_SUBPASS || 1616 dim == GLSL_SAMPLER_DIM_SUBPASS_MS) 1617 pipe_binding[i].input_attachment_index = var->data.index + i; 1618 1619 pipe_binding[i].lowered_storage_surface = 1620 image_binding_needs_lowered_surface(var); 1621 } 1622 } 1623 1624 /* Before we do the normal lowering, we look for any SSBO operations 1625 * that we can lower to the BTI model and lower them up-front. The BTI 1626 * model can perform better than the A64 model for a couple reasons: 1627 * 1628 * 1. 48-bit address calculations are potentially expensive and using 1629 * the BTI model lets us simply compute 32-bit offsets and the 1630 * hardware adds the 64-bit surface base address. 1631 * 1632 * 2. The BTI messages, because they use surface states, do bounds 1633 * checking for us. With the A64 model, we have to do our own 1634 * bounds checking and this means wider pointers and extra 1635 * calculations and branching in the shader. 1636 * 1637 * The solution to both of these is to convert things to the BTI model 1638 * opportunistically. The reason why we need to do this as a pre-pass 1639 * is for two reasons: 1640 * 1641 * 1. The BTI model requires nir_address_format_32bit_index_offset 1642 * pointers which are not the same type as the pointers needed for 1643 * the A64 model. Because all our derefs are set up for the A64 1644 * model (in case we have variable pointers), we have to crawl all 1645 * the way back to the vulkan_resource_index intrinsic and build a 1646 * completely fresh index+offset calculation. 1647 * 1648 * 2. Because the variable-pointers-capable lowering that we do as part 1649 * of apply_pipeline_layout_block is destructive (It really has to 1650 * be to handle variable pointers properly), we've lost the deref 1651 * information by the time we get to the load/store/atomic 1652 * intrinsics in that pass. 1653 */ 1654 nir_shader_instructions_pass(shader, lower_direct_buffer_instr, 1655 nir_metadata_block_index | 1656 nir_metadata_dominance, 1657 &state); 1658 1659 /* We just got rid of all the direct access. Delete it so it's not in the 1660 * way when we do our indirect lowering. 1661 */ 1662 nir_opt_dce(shader); 1663 1664 nir_shader_instructions_pass(shader, apply_pipeline_layout, 1665 nir_metadata_block_index | 1666 nir_metadata_dominance, 1667 &state); 1668 1669 ralloc_free(mem_ctx); 1670 1671 if (brw_shader_stage_is_bindless(shader->info.stage)) { 1672 assert(map->surface_count == 0); 1673 assert(map->sampler_count == 0); 1674 } 1675 1676 /* Now that we're done computing the surface and sampler portions of the 1677 * bind map, hash them. This lets us quickly determine if the actual 1678 * mapping has changed and not just a no-op pipeline change. 1679 */ 1680 _mesa_sha1_compute(map->surface_to_descriptor, 1681 map->surface_count * sizeof(struct anv_pipeline_binding), 1682 map->surface_sha1); 1683 _mesa_sha1_compute(map->sampler_to_descriptor, 1684 map->sampler_count * sizeof(struct anv_pipeline_binding), 1685 map->sampler_sha1); 1686} 1687