1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "anv_nir.h" 25#include "program/prog_parameter.h" 26#include "nir/nir_builder.h" 27#include "compiler/brw_nir.h" 28#include "util/set.h" 29 30/* Sampler tables don't actually have a maximum size but we pick one just so 31 * that we don't end up emitting too much state on-the-fly. 32 */ 33#define MAX_SAMPLER_TABLE_SIZE 128 34#define BINDLESS_OFFSET 255 35 36struct apply_pipeline_layout_state { 37 const struct anv_physical_device *pdevice; 38 39 nir_shader *shader; 40 nir_builder builder; 41 42 struct anv_pipeline_layout *layout; 43 bool add_bounds_checks; 44 nir_address_format ssbo_addr_format; 45 46 /* Place to flag lowered instructions so we don't lower them twice */ 47 struct set *lowered_instrs; 48 49 int dynamic_offset_uniform_start; 50 51 bool uses_constants; 52 uint8_t constants_offset; 53 struct { 54 bool desc_buffer_used; 55 uint8_t desc_offset; 56 57 uint8_t *use_count; 58 uint8_t *surface_offsets; 59 uint8_t *sampler_offsets; 60 } set[MAX_SETS]; 61}; 62 63static void 64add_binding(struct apply_pipeline_layout_state *state, 65 uint32_t set, uint32_t binding) 66{ 67 const struct anv_descriptor_set_binding_layout *bind_layout = 68 &state->layout->set[set].layout->binding[binding]; 69 70 if (state->set[set].use_count[binding] < UINT8_MAX) 71 state->set[set].use_count[binding]++; 72 73 /* Only flag the descriptor buffer as used if there's actually data for 74 * this binding. This lets us be lazy and call this function constantly 75 * without worrying about unnecessarily enabling the buffer. 76 */ 77 if (anv_descriptor_size(bind_layout)) 78 state->set[set].desc_buffer_used = true; 79} 80 81static void 82add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src) 83{ 84 nir_deref_instr *deref = nir_src_as_deref(src); 85 nir_variable *var = nir_deref_instr_get_variable(deref); 86 add_binding(state, var->data.descriptor_set, var->data.binding); 87} 88 89static void 90add_tex_src_binding(struct apply_pipeline_layout_state *state, 91 nir_tex_instr *tex, nir_tex_src_type deref_src_type) 92{ 93 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type); 94 if (deref_src_idx < 0) 95 return; 96 97 add_deref_src_binding(state, tex->src[deref_src_idx].src); 98} 99 100static void 101get_used_bindings_block(nir_block *block, 102 struct apply_pipeline_layout_state *state) 103{ 104 nir_foreach_instr_safe(instr, block) { 105 switch (instr->type) { 106 case nir_instr_type_intrinsic: { 107 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 108 switch (intrin->intrinsic) { 109 case nir_intrinsic_vulkan_resource_index: 110 add_binding(state, nir_intrinsic_desc_set(intrin), 111 nir_intrinsic_binding(intrin)); 112 break; 113 114 case nir_intrinsic_image_deref_load: 115 case nir_intrinsic_image_deref_store: 116 case nir_intrinsic_image_deref_atomic_add: 117 case nir_intrinsic_image_deref_atomic_min: 118 case nir_intrinsic_image_deref_atomic_max: 119 case nir_intrinsic_image_deref_atomic_and: 120 case nir_intrinsic_image_deref_atomic_or: 121 case nir_intrinsic_image_deref_atomic_xor: 122 case nir_intrinsic_image_deref_atomic_exchange: 123 case nir_intrinsic_image_deref_atomic_comp_swap: 124 case nir_intrinsic_image_deref_size: 125 case nir_intrinsic_image_deref_samples: 126 case nir_intrinsic_image_deref_load_param_intel: 127 case nir_intrinsic_image_deref_load_raw_intel: 128 case nir_intrinsic_image_deref_store_raw_intel: 129 add_deref_src_binding(state, intrin->src[0]); 130 break; 131 132 case nir_intrinsic_load_constant: 133 state->uses_constants = true; 134 break; 135 136 default: 137 break; 138 } 139 break; 140 } 141 case nir_instr_type_tex: { 142 nir_tex_instr *tex = nir_instr_as_tex(instr); 143 add_tex_src_binding(state, tex, nir_tex_src_texture_deref); 144 add_tex_src_binding(state, tex, nir_tex_src_sampler_deref); 145 break; 146 } 147 default: 148 continue; 149 } 150 } 151} 152 153static bool 154find_descriptor_for_index_src(nir_src src, 155 struct apply_pipeline_layout_state *state) 156{ 157 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src); 158 159 while (intrin && intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) 160 intrin = nir_src_as_intrinsic(intrin->src[0]); 161 162 if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index) 163 return false; 164 165 uint32_t set = nir_intrinsic_desc_set(intrin); 166 uint32_t binding = nir_intrinsic_binding(intrin); 167 uint32_t surface_index = state->set[set].surface_offsets[binding]; 168 169 /* Only lower to a BTI message if we have a valid binding table index. */ 170 return surface_index < MAX_BINDING_TABLE_SIZE; 171} 172 173static bool 174nir_deref_find_descriptor(nir_deref_instr *deref, 175 struct apply_pipeline_layout_state *state) 176{ 177 while (1) { 178 /* Nothing we will use this on has a variable */ 179 assert(deref->deref_type != nir_deref_type_var); 180 181 nir_deref_instr *parent = nir_src_as_deref(deref->parent); 182 if (!parent) 183 break; 184 185 deref = parent; 186 } 187 assert(deref->deref_type == nir_deref_type_cast); 188 189 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent); 190 if (!intrin || intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor) 191 return false; 192 193 return find_descriptor_for_index_src(intrin->src[0], state); 194} 195 196static nir_ssa_def * 197build_index_for_res_reindex(nir_intrinsic_instr *intrin, 198 struct apply_pipeline_layout_state *state) 199{ 200 nir_builder *b = &state->builder; 201 202 if (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) { 203 nir_ssa_def *bti = 204 build_index_for_res_reindex(nir_src_as_intrinsic(intrin->src[0]), state); 205 206 b->cursor = nir_before_instr(&intrin->instr); 207 return nir_iadd(b, bti, nir_ssa_for_src(b, intrin->src[1], 1)); 208 } 209 210 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index); 211 212 uint32_t set = nir_intrinsic_desc_set(intrin); 213 uint32_t binding = nir_intrinsic_binding(intrin); 214 215 const struct anv_descriptor_set_binding_layout *bind_layout = 216 &state->layout->set[set].layout->binding[binding]; 217 218 uint32_t surface_index = state->set[set].surface_offsets[binding]; 219 uint32_t array_size = bind_layout->array_size; 220 221 b->cursor = nir_before_instr(&intrin->instr); 222 223 nir_ssa_def *array_index = nir_ssa_for_src(b, intrin->src[0], 1); 224 if (nir_src_is_const(intrin->src[0]) || state->add_bounds_checks) 225 array_index = nir_umin(b, array_index, nir_imm_int(b, array_size - 1)); 226 227 return nir_iadd_imm(b, array_index, surface_index); 228} 229 230static nir_ssa_def * 231build_index_offset_for_deref(nir_deref_instr *deref, 232 struct apply_pipeline_layout_state *state) 233{ 234 nir_builder *b = &state->builder; 235 236 nir_deref_instr *parent = nir_deref_instr_parent(deref); 237 if (parent) { 238 nir_ssa_def *addr = build_index_offset_for_deref(parent, state); 239 240 b->cursor = nir_before_instr(&deref->instr); 241 return nir_explicit_io_address_from_deref(b, deref, addr, 242 nir_address_format_32bit_index_offset); 243 } 244 245 nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent); 246 assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor); 247 248 nir_ssa_def *index = 249 build_index_for_res_reindex(nir_src_as_intrinsic(load_desc->src[0]), state); 250 251 /* Return a 0 offset which will get picked up by the recursion */ 252 b->cursor = nir_before_instr(&deref->instr); 253 return nir_vec2(b, index, nir_imm_int(b, 0)); 254} 255 256static bool 257try_lower_direct_buffer_intrinsic(nir_intrinsic_instr *intrin, bool is_atomic, 258 struct apply_pipeline_layout_state *state) 259{ 260 nir_builder *b = &state->builder; 261 262 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 263 if (deref->mode != nir_var_mem_ssbo) 264 return false; 265 266 /* 64-bit atomics only support A64 messages so we can't lower them to the 267 * index+offset model. 268 */ 269 if (is_atomic && nir_dest_bit_size(intrin->dest) == 64) 270 return false; 271 272 /* Normal binding table-based messages can't handle non-uniform access so 273 * we have to fall back to A64. 274 */ 275 if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM) 276 return false; 277 278 if (!nir_deref_find_descriptor(deref, state)) 279 return false; 280 281 nir_ssa_def *addr = build_index_offset_for_deref(deref, state); 282 283 b->cursor = nir_before_instr(&intrin->instr); 284 nir_lower_explicit_io_instr(b, intrin, addr, 285 nir_address_format_32bit_index_offset); 286 return true; 287} 288 289static void 290lower_direct_buffer_access(nir_function_impl *impl, 291 struct apply_pipeline_layout_state *state) 292{ 293 nir_foreach_block(block, impl) { 294 nir_foreach_instr_safe(instr, block) { 295 if (instr->type != nir_instr_type_intrinsic) 296 continue; 297 298 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 299 switch (intrin->intrinsic) { 300 case nir_intrinsic_load_deref: 301 case nir_intrinsic_store_deref: 302 try_lower_direct_buffer_intrinsic(intrin, false, state); 303 break; 304 case nir_intrinsic_deref_atomic_add: 305 case nir_intrinsic_deref_atomic_imin: 306 case nir_intrinsic_deref_atomic_umin: 307 case nir_intrinsic_deref_atomic_imax: 308 case nir_intrinsic_deref_atomic_umax: 309 case nir_intrinsic_deref_atomic_and: 310 case nir_intrinsic_deref_atomic_or: 311 case nir_intrinsic_deref_atomic_xor: 312 case nir_intrinsic_deref_atomic_exchange: 313 case nir_intrinsic_deref_atomic_comp_swap: 314 case nir_intrinsic_deref_atomic_fmin: 315 case nir_intrinsic_deref_atomic_fmax: 316 case nir_intrinsic_deref_atomic_fcomp_swap: 317 try_lower_direct_buffer_intrinsic(intrin, true, state); 318 break; 319 320 case nir_intrinsic_get_buffer_size: { 321 /* The get_buffer_size intrinsic always just takes a 322 * index/reindex intrinsic. 323 */ 324 if (!find_descriptor_for_index_src(intrin->src[0], state)) 325 break; 326 327 nir_ssa_def *index = 328 build_index_for_res_reindex(nir_src_as_intrinsic(intrin->src[0]), 329 state); 330 nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], 331 nir_src_for_ssa(index)); 332 _mesa_set_add(state->lowered_instrs, intrin); 333 } 334 335 default: 336 break; 337 } 338 } 339 } 340} 341 342static nir_address_format 343desc_addr_format(VkDescriptorType desc_type, 344 struct apply_pipeline_layout_state *state) 345{ 346 return (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || 347 desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) ? 348 state->ssbo_addr_format : nir_address_format_32bit_index_offset; 349} 350 351static void 352lower_res_index_intrinsic(nir_intrinsic_instr *intrin, 353 struct apply_pipeline_layout_state *state) 354{ 355 nir_builder *b = &state->builder; 356 357 b->cursor = nir_before_instr(&intrin->instr); 358 359 uint32_t set = nir_intrinsic_desc_set(intrin); 360 uint32_t binding = nir_intrinsic_binding(intrin); 361 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); 362 363 const struct anv_descriptor_set_binding_layout *bind_layout = 364 &state->layout->set[set].layout->binding[binding]; 365 366 uint32_t surface_index = state->set[set].surface_offsets[binding]; 367 uint32_t array_size = bind_layout->array_size; 368 369 nir_ssa_def *array_index = nir_ssa_for_src(b, intrin->src[0], 1); 370 if (nir_src_is_const(intrin->src[0]) || state->add_bounds_checks) 371 array_index = nir_umin(b, array_index, nir_imm_int(b, array_size - 1)); 372 373 nir_ssa_def *index; 374 if (state->pdevice->has_a64_buffer_access && 375 (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || 376 desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) { 377 /* We store the descriptor offset as 16.8.8 where the top 16 bits are 378 * the offset into the descriptor set, the next 8 are the binding table 379 * index of the descriptor buffer, and the bottom 8 bits are the offset 380 * (in bytes) into the dynamic offset table. 381 */ 382 assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS); 383 uint32_t dynamic_offset_index = 0xff; /* No dynamic offset */ 384 if (bind_layout->dynamic_offset_index >= 0) { 385 dynamic_offset_index = 386 state->layout->set[set].dynamic_offset_start + 387 bind_layout->dynamic_offset_index; 388 } 389 390 const uint32_t desc_offset = 391 bind_layout->descriptor_offset << 16 | 392 (uint32_t)state->set[set].desc_offset << 8 | 393 dynamic_offset_index; 394 395 if (state->add_bounds_checks) { 396 assert(desc_addr_format(desc_type, state) == 397 nir_address_format_64bit_bounded_global); 398 assert(intrin->dest.ssa.num_components == 4); 399 assert(intrin->dest.ssa.bit_size == 32); 400 index = nir_vec4(b, nir_imm_int(b, desc_offset), 401 nir_ssa_for_src(b, intrin->src[0], 1), 402 nir_imm_int(b, array_size - 1), 403 nir_ssa_undef(b, 1, 32)); 404 } else { 405 assert(desc_addr_format(desc_type, state) == 406 nir_address_format_64bit_global); 407 assert(intrin->dest.ssa.num_components == 1); 408 assert(intrin->dest.ssa.bit_size == 64); 409 index = nir_pack_64_2x32_split(b, nir_imm_int(b, desc_offset), 410 nir_ssa_for_src(b, intrin->src[0], 1)); 411 } 412 } else if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM) { 413 /* This is an inline uniform block. Just reference the descriptor set 414 * and use the descriptor offset as the base. 415 */ 416 assert(desc_addr_format(desc_type, state) == 417 nir_address_format_32bit_index_offset); 418 assert(intrin->dest.ssa.num_components == 2); 419 assert(intrin->dest.ssa.bit_size == 32); 420 index = nir_imm_ivec2(b, state->set[set].desc_offset, 421 bind_layout->descriptor_offset); 422 } else { 423 assert(desc_addr_format(desc_type, state) == 424 nir_address_format_32bit_index_offset); 425 assert(intrin->dest.ssa.num_components == 2); 426 assert(intrin->dest.ssa.bit_size == 32); 427 index = nir_vec2(b, nir_iadd_imm(b, array_index, surface_index), 428 nir_imm_int(b, 0)); 429 } 430 431 assert(intrin->dest.is_ssa); 432 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(index)); 433 nir_instr_remove(&intrin->instr); 434} 435 436static void 437lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin, 438 struct apply_pipeline_layout_state *state) 439{ 440 nir_builder *b = &state->builder; 441 442 b->cursor = nir_before_instr(&intrin->instr); 443 444 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); 445 446 /* For us, the resource indices are just indices into the binding table and 447 * array elements are sequential. A resource_reindex just turns into an 448 * add of the two indices. 449 */ 450 assert(intrin->src[0].is_ssa && intrin->src[1].is_ssa); 451 nir_ssa_def *old_index = intrin->src[0].ssa; 452 nir_ssa_def *offset = intrin->src[1].ssa; 453 454 nir_ssa_def *new_index; 455 switch (desc_addr_format(desc_type, state)) { 456 case nir_address_format_64bit_bounded_global: 457 /* See also lower_res_index_intrinsic() */ 458 assert(intrin->dest.ssa.num_components == 4); 459 assert(intrin->dest.ssa.bit_size == 32); 460 new_index = nir_vec4(b, nir_channel(b, old_index, 0), 461 nir_iadd(b, nir_channel(b, old_index, 1), 462 offset), 463 nir_channel(b, old_index, 2), 464 nir_ssa_undef(b, 1, 32)); 465 break; 466 467 case nir_address_format_64bit_global: { 468 /* See also lower_res_index_intrinsic() */ 469 assert(intrin->dest.ssa.num_components == 1); 470 assert(intrin->dest.ssa.bit_size == 64); 471 nir_ssa_def *base = nir_unpack_64_2x32_split_x(b, old_index); 472 nir_ssa_def *arr_idx = nir_unpack_64_2x32_split_y(b, old_index); 473 new_index = nir_pack_64_2x32_split(b, base, nir_iadd(b, arr_idx, offset)); 474 break; 475 } 476 477 case nir_address_format_32bit_index_offset: 478 assert(intrin->dest.ssa.num_components == 2); 479 assert(intrin->dest.ssa.bit_size == 32); 480 new_index = nir_vec2(b, nir_iadd(b, nir_channel(b, old_index, 0), offset), 481 nir_channel(b, old_index, 1)); 482 break; 483 484 default: 485 unreachable("Uhandled address format"); 486 } 487 488 assert(intrin->dest.is_ssa); 489 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(new_index)); 490 nir_instr_remove(&intrin->instr); 491} 492 493static nir_ssa_def * 494build_ssbo_descriptor_load(const VkDescriptorType desc_type, 495 nir_ssa_def *index, 496 struct apply_pipeline_layout_state *state) 497{ 498 nir_builder *b = &state->builder; 499 500 nir_ssa_def *desc_offset, *array_index; 501 switch (state->ssbo_addr_format) { 502 case nir_address_format_64bit_bounded_global: 503 /* See also lower_res_index_intrinsic() */ 504 desc_offset = nir_channel(b, index, 0); 505 array_index = nir_umin(b, nir_channel(b, index, 1), 506 nir_channel(b, index, 2)); 507 break; 508 509 case nir_address_format_64bit_global: 510 /* See also lower_res_index_intrinsic() */ 511 desc_offset = nir_unpack_64_2x32_split_x(b, index); 512 array_index = nir_unpack_64_2x32_split_y(b, index); 513 break; 514 515 default: 516 unreachable("Unhandled address format for SSBO"); 517 } 518 519 /* The desc_offset is actually 16.8.8 */ 520 nir_ssa_def *desc_buffer_index = 521 nir_extract_u8(b, desc_offset, nir_imm_int(b, 1)); 522 nir_ssa_def *desc_offset_base = 523 nir_extract_u16(b, desc_offset, nir_imm_int(b, 1)); 524 525 /* Compute the actual descriptor offset */ 526 const unsigned descriptor_size = 527 anv_descriptor_type_size(state->pdevice, desc_type); 528 desc_offset = nir_iadd(b, desc_offset_base, 529 nir_imul_imm(b, array_index, descriptor_size)); 530 531 nir_intrinsic_instr *desc_load = 532 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo); 533 desc_load->src[0] = nir_src_for_ssa(desc_buffer_index); 534 desc_load->src[1] = nir_src_for_ssa(desc_offset); 535 desc_load->num_components = 4; 536 nir_ssa_dest_init(&desc_load->instr, &desc_load->dest, 4, 32, NULL); 537 nir_builder_instr_insert(b, &desc_load->instr); 538 539 return &desc_load->dest.ssa; 540} 541 542static void 543lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin, 544 struct apply_pipeline_layout_state *state) 545{ 546 nir_builder *b = &state->builder; 547 548 b->cursor = nir_before_instr(&intrin->instr); 549 550 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); 551 552 assert(intrin->src[0].is_ssa); 553 nir_ssa_def *index = intrin->src[0].ssa; 554 555 nir_ssa_def *desc; 556 if (state->pdevice->has_a64_buffer_access && 557 (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || 558 desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) { 559 desc = build_ssbo_descriptor_load(desc_type, index, state); 560 561 /* We want nir_address_format_64bit_global */ 562 if (!state->add_bounds_checks) 563 desc = nir_pack_64_2x32(b, nir_channels(b, desc, 0x3)); 564 565 if (state->dynamic_offset_uniform_start >= 0) { 566 /* This shader has dynamic offsets and we have no way of knowing 567 * (save from the dynamic offset base index) if this buffer has a 568 * dynamic offset. 569 */ 570 nir_ssa_def *desc_offset, *array_index; 571 switch (state->ssbo_addr_format) { 572 case nir_address_format_64bit_bounded_global: 573 /* See also lower_res_index_intrinsic() */ 574 desc_offset = nir_channel(b, index, 0); 575 array_index = nir_umin(b, nir_channel(b, index, 1), 576 nir_channel(b, index, 2)); 577 break; 578 579 case nir_address_format_64bit_global: 580 /* See also lower_res_index_intrinsic() */ 581 desc_offset = nir_unpack_64_2x32_split_x(b, index); 582 array_index = nir_unpack_64_2x32_split_y(b, index); 583 break; 584 585 default: 586 unreachable("Unhandled address format for SSBO"); 587 } 588 589 nir_ssa_def *dyn_offset_base = 590 nir_extract_u8(b, desc_offset, nir_imm_int(b, 0)); 591 nir_ssa_def *dyn_offset_idx = 592 nir_iadd(b, dyn_offset_base, array_index); 593 if (state->add_bounds_checks) { 594 dyn_offset_idx = nir_umin(b, dyn_offset_idx, 595 nir_imm_int(b, MAX_DYNAMIC_BUFFERS)); 596 } 597 598 nir_intrinsic_instr *dyn_load = 599 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform); 600 nir_intrinsic_set_base(dyn_load, state->dynamic_offset_uniform_start); 601 nir_intrinsic_set_range(dyn_load, MAX_DYNAMIC_BUFFERS * 4); 602 dyn_load->src[0] = nir_src_for_ssa(nir_imul_imm(b, dyn_offset_idx, 4)); 603 dyn_load->num_components = 1; 604 nir_ssa_dest_init(&dyn_load->instr, &dyn_load->dest, 1, 32, NULL); 605 nir_builder_instr_insert(b, &dyn_load->instr); 606 607 nir_ssa_def *dynamic_offset = 608 nir_bcsel(b, nir_ieq(b, dyn_offset_base, nir_imm_int(b, 0xff)), 609 nir_imm_int(b, 0), &dyn_load->dest.ssa); 610 611 switch (state->ssbo_addr_format) { 612 case nir_address_format_64bit_bounded_global: { 613 /* The dynamic offset gets added to the base pointer so that we 614 * have a sliding window range. 615 */ 616 nir_ssa_def *base_ptr = 617 nir_pack_64_2x32(b, nir_channels(b, desc, 0x3)); 618 base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset)); 619 desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr), 620 nir_unpack_64_2x32_split_y(b, base_ptr), 621 nir_channel(b, desc, 2), 622 nir_channel(b, desc, 3)); 623 break; 624 } 625 626 case nir_address_format_64bit_global: 627 desc = nir_iadd(b, desc, nir_u2u64(b, dynamic_offset)); 628 break; 629 630 default: 631 unreachable("Unhandled address format for SSBO"); 632 } 633 } 634 } else { 635 /* We follow the nir_address_format_32bit_index_offset model */ 636 desc = index; 637 } 638 639 assert(intrin->dest.is_ssa); 640 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc)); 641 nir_instr_remove(&intrin->instr); 642} 643 644static void 645lower_get_buffer_size(nir_intrinsic_instr *intrin, 646 struct apply_pipeline_layout_state *state) 647{ 648 if (_mesa_set_search(state->lowered_instrs, intrin)) 649 return; 650 651 nir_builder *b = &state->builder; 652 653 b->cursor = nir_before_instr(&intrin->instr); 654 655 const VkDescriptorType desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 656 657 assert(intrin->src[0].is_ssa); 658 nir_ssa_def *index = intrin->src[0].ssa; 659 660 if (state->pdevice->has_a64_buffer_access) { 661 nir_ssa_def *desc = build_ssbo_descriptor_load(desc_type, index, state); 662 nir_ssa_def *size = nir_channel(b, desc, 2); 663 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(size)); 664 nir_instr_remove(&intrin->instr); 665 } else { 666 /* We're following the nir_address_format_32bit_index_offset model so 667 * the binding table index is the first component of the address. The 668 * back-end wants a scalar binding table index source. 669 */ 670 nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], 671 nir_src_for_ssa(nir_channel(b, index, 0))); 672 } 673} 674 675static nir_ssa_def * 676build_descriptor_load(nir_deref_instr *deref, unsigned offset, 677 unsigned num_components, unsigned bit_size, 678 struct apply_pipeline_layout_state *state) 679{ 680 nir_variable *var = nir_deref_instr_get_variable(deref); 681 682 unsigned set = var->data.descriptor_set; 683 unsigned binding = var->data.binding; 684 unsigned array_size = 685 state->layout->set[set].layout->binding[binding].array_size; 686 687 const struct anv_descriptor_set_binding_layout *bind_layout = 688 &state->layout->set[set].layout->binding[binding]; 689 690 nir_builder *b = &state->builder; 691 692 nir_ssa_def *desc_buffer_index = 693 nir_imm_int(b, state->set[set].desc_offset); 694 695 nir_ssa_def *desc_offset = 696 nir_imm_int(b, bind_layout->descriptor_offset + offset); 697 if (deref->deref_type != nir_deref_type_var) { 698 assert(deref->deref_type == nir_deref_type_array); 699 700 const unsigned descriptor_size = anv_descriptor_size(bind_layout); 701 nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1); 702 if (state->add_bounds_checks) 703 arr_index = nir_umin(b, arr_index, nir_imm_int(b, array_size - 1)); 704 705 desc_offset = nir_iadd(b, desc_offset, 706 nir_imul_imm(b, arr_index, descriptor_size)); 707 } 708 709 nir_intrinsic_instr *desc_load = 710 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo); 711 desc_load->src[0] = nir_src_for_ssa(desc_buffer_index); 712 desc_load->src[1] = nir_src_for_ssa(desc_offset); 713 desc_load->num_components = num_components; 714 nir_ssa_dest_init(&desc_load->instr, &desc_load->dest, 715 num_components, bit_size, NULL); 716 nir_builder_instr_insert(b, &desc_load->instr); 717 718 return &desc_load->dest.ssa; 719} 720 721static void 722lower_image_intrinsic(nir_intrinsic_instr *intrin, 723 struct apply_pipeline_layout_state *state) 724{ 725 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 726 nir_variable *var = nir_deref_instr_get_variable(deref); 727 728 unsigned set = var->data.descriptor_set; 729 unsigned binding = var->data.binding; 730 unsigned binding_offset = state->set[set].surface_offsets[binding]; 731 732 nir_builder *b = &state->builder; 733 b->cursor = nir_before_instr(&intrin->instr); 734 735 const bool use_bindless = state->pdevice->has_bindless_images; 736 737 if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) { 738 b->cursor = nir_instr_remove(&intrin->instr); 739 740 assert(!use_bindless); /* Otherwise our offsets would be wrong */ 741 const unsigned param = nir_intrinsic_base(intrin); 742 743 nir_ssa_def *desc = 744 build_descriptor_load(deref, param * 16, 745 intrin->dest.ssa.num_components, 746 intrin->dest.ssa.bit_size, state); 747 748 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc)); 749 } else if (binding_offset > MAX_BINDING_TABLE_SIZE) { 750 const bool write_only = 751 (var->data.image.access & ACCESS_NON_READABLE) != 0; 752 nir_ssa_def *desc = 753 build_descriptor_load(deref, 0, 2, 32, state); 754 nir_ssa_def *handle = nir_channel(b, desc, write_only ? 1 : 0); 755 nir_rewrite_image_intrinsic(intrin, handle, true); 756 } else { 757 unsigned array_size = 758 state->layout->set[set].layout->binding[binding].array_size; 759 760 nir_ssa_def *index = NULL; 761 if (deref->deref_type != nir_deref_type_var) { 762 assert(deref->deref_type == nir_deref_type_array); 763 index = nir_ssa_for_src(b, deref->arr.index, 1); 764 if (state->add_bounds_checks) 765 index = nir_umin(b, index, nir_imm_int(b, array_size - 1)); 766 } else { 767 index = nir_imm_int(b, 0); 768 } 769 770 index = nir_iadd_imm(b, index, binding_offset); 771 nir_rewrite_image_intrinsic(intrin, index, false); 772 } 773} 774 775static void 776lower_load_constant(nir_intrinsic_instr *intrin, 777 struct apply_pipeline_layout_state *state) 778{ 779 nir_builder *b = &state->builder; 780 781 b->cursor = nir_before_instr(&intrin->instr); 782 783 nir_ssa_def *index = nir_imm_int(b, state->constants_offset); 784 nir_ssa_def *offset = nir_iadd(b, nir_ssa_for_src(b, intrin->src[0], 1), 785 nir_imm_int(b, nir_intrinsic_base(intrin))); 786 787 nir_intrinsic_instr *load_ubo = 788 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo); 789 load_ubo->num_components = intrin->num_components; 790 load_ubo->src[0] = nir_src_for_ssa(index); 791 load_ubo->src[1] = nir_src_for_ssa(offset); 792 nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest, 793 intrin->dest.ssa.num_components, 794 intrin->dest.ssa.bit_size, NULL); 795 nir_builder_instr_insert(b, &load_ubo->instr); 796 797 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 798 nir_src_for_ssa(&load_ubo->dest.ssa)); 799 nir_instr_remove(&intrin->instr); 800} 801 802static void 803lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type, 804 unsigned *base_index, unsigned plane, 805 struct apply_pipeline_layout_state *state) 806{ 807 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type); 808 if (deref_src_idx < 0) 809 return; 810 811 nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src); 812 nir_variable *var = nir_deref_instr_get_variable(deref); 813 814 unsigned set = var->data.descriptor_set; 815 unsigned binding = var->data.binding; 816 unsigned array_size = 817 state->layout->set[set].layout->binding[binding].array_size; 818 819 unsigned binding_offset; 820 if (deref_src_type == nir_tex_src_texture_deref) { 821 binding_offset = state->set[set].surface_offsets[binding]; 822 } else { 823 assert(deref_src_type == nir_tex_src_sampler_deref); 824 binding_offset = state->set[set].sampler_offsets[binding]; 825 } 826 827 nir_builder *b = &state->builder; 828 829 nir_tex_src_type offset_src_type; 830 nir_ssa_def *index = NULL; 831 if (binding_offset > MAX_BINDING_TABLE_SIZE) { 832 const unsigned plane_offset = 833 plane * sizeof(struct anv_sampled_image_descriptor); 834 835 nir_ssa_def *desc = 836 build_descriptor_load(deref, plane_offset, 2, 32, state); 837 838 if (deref_src_type == nir_tex_src_texture_deref) { 839 offset_src_type = nir_tex_src_texture_handle; 840 index = nir_channel(b, desc, 0); 841 } else { 842 assert(deref_src_type == nir_tex_src_sampler_deref); 843 offset_src_type = nir_tex_src_sampler_handle; 844 index = nir_channel(b, desc, 1); 845 } 846 } else { 847 if (deref_src_type == nir_tex_src_texture_deref) { 848 offset_src_type = nir_tex_src_texture_offset; 849 } else { 850 assert(deref_src_type == nir_tex_src_sampler_deref); 851 offset_src_type = nir_tex_src_sampler_offset; 852 } 853 854 *base_index = binding_offset + plane; 855 856 if (deref->deref_type != nir_deref_type_var) { 857 assert(deref->deref_type == nir_deref_type_array); 858 859 if (nir_src_is_const(deref->arr.index)) { 860 unsigned arr_index = MIN2(nir_src_as_uint(deref->arr.index), array_size - 1); 861 struct anv_sampler **immutable_samplers = 862 state->layout->set[set].layout->binding[binding].immutable_samplers; 863 if (immutable_samplers) { 864 /* Array of YCbCr samplers are tightly packed in the binding 865 * tables, compute the offset of an element in the array by 866 * adding the number of planes of all preceding elements. 867 */ 868 unsigned desc_arr_index = 0; 869 for (int i = 0; i < arr_index; i++) 870 desc_arr_index += immutable_samplers[i]->n_planes; 871 *base_index += desc_arr_index; 872 } else { 873 *base_index += arr_index; 874 } 875 } else { 876 /* From VK_KHR_sampler_ycbcr_conversion: 877 * 878 * If sampler Y’CBCR conversion is enabled, the combined image 879 * sampler must be indexed only by constant integral expressions 880 * when aggregated into arrays in shader code, irrespective of 881 * the shaderSampledImageArrayDynamicIndexing feature. 882 */ 883 assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1); 884 885 index = nir_ssa_for_src(b, deref->arr.index, 1); 886 887 if (state->add_bounds_checks) 888 index = nir_umin(b, index, nir_imm_int(b, array_size - 1)); 889 } 890 } 891 } 892 893 if (index) { 894 nir_instr_rewrite_src(&tex->instr, &tex->src[deref_src_idx].src, 895 nir_src_for_ssa(index)); 896 tex->src[deref_src_idx].src_type = offset_src_type; 897 } else { 898 nir_tex_instr_remove_src(tex, deref_src_idx); 899 } 900} 901 902static uint32_t 903tex_instr_get_and_remove_plane_src(nir_tex_instr *tex) 904{ 905 int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane); 906 if (plane_src_idx < 0) 907 return 0; 908 909 unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src); 910 911 nir_tex_instr_remove_src(tex, plane_src_idx); 912 913 return plane; 914} 915 916static nir_ssa_def * 917build_def_array_select(nir_builder *b, nir_ssa_def **srcs, nir_ssa_def *idx, 918 unsigned start, unsigned end) 919{ 920 if (start == end - 1) { 921 return srcs[start]; 922 } else { 923 unsigned mid = start + (end - start) / 2; 924 return nir_bcsel(b, nir_ilt(b, idx, nir_imm_int(b, mid)), 925 build_def_array_select(b, srcs, idx, start, mid), 926 build_def_array_select(b, srcs, idx, mid, end)); 927 } 928} 929 930static void 931lower_gen7_tex_swizzle(nir_tex_instr *tex, unsigned plane, 932 struct apply_pipeline_layout_state *state) 933{ 934 assert(state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell); 935 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF || 936 nir_tex_instr_is_query(tex) || 937 tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */ 938 (tex->is_shadow && tex->is_new_style_shadow)) 939 return; 940 941 int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); 942 assert(deref_src_idx >= 0); 943 944 nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src); 945 nir_variable *var = nir_deref_instr_get_variable(deref); 946 947 unsigned set = var->data.descriptor_set; 948 unsigned binding = var->data.binding; 949 const struct anv_descriptor_set_binding_layout *bind_layout = 950 &state->layout->set[set].layout->binding[binding]; 951 952 if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0) 953 return; 954 955 nir_builder *b = &state->builder; 956 b->cursor = nir_before_instr(&tex->instr); 957 958 const unsigned plane_offset = 959 plane * sizeof(struct anv_texture_swizzle_descriptor); 960 nir_ssa_def *swiz = 961 build_descriptor_load(deref, plane_offset, 1, 32, state); 962 963 b->cursor = nir_after_instr(&tex->instr); 964 965 assert(tex->dest.ssa.bit_size == 32); 966 assert(tex->dest.ssa.num_components == 4); 967 968 /* Initializing to undef is ok; nir_opt_undef will clean it up. */ 969 nir_ssa_def *undef = nir_ssa_undef(b, 1, 32); 970 nir_ssa_def *comps[8]; 971 for (unsigned i = 0; i < ARRAY_SIZE(comps); i++) 972 comps[i] = undef; 973 974 comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0); 975 if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float) 976 comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1); 977 else 978 comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1); 979 comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->dest.ssa, 0); 980 comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->dest.ssa, 1); 981 comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->dest.ssa, 2); 982 comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->dest.ssa, 3); 983 984 nir_ssa_def *swiz_comps[4]; 985 for (unsigned i = 0; i < 4; i++) { 986 nir_ssa_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i)); 987 swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8); 988 } 989 nir_ssa_def *swiz_tex_res = nir_vec(b, swiz_comps, 4); 990 991 /* Rewrite uses before we insert so we don't rewrite this use */ 992 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, 993 nir_src_for_ssa(swiz_tex_res), 994 swiz_tex_res->parent_instr); 995} 996 997static void 998lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) 999{ 1000 unsigned plane = tex_instr_get_and_remove_plane_src(tex); 1001 1002 /* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this 1003 * before we lower the derefs away so we can still find the descriptor. 1004 */ 1005 if (state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell) 1006 lower_gen7_tex_swizzle(tex, plane, state); 1007 1008 state->builder.cursor = nir_before_instr(&tex->instr); 1009 1010 lower_tex_deref(tex, nir_tex_src_texture_deref, 1011 &tex->texture_index, plane, state); 1012 1013 lower_tex_deref(tex, nir_tex_src_sampler_deref, 1014 &tex->sampler_index, plane, state); 1015 1016 /* The backend only ever uses this to mark used surfaces. We don't care 1017 * about that little optimization so it just needs to be non-zero. 1018 */ 1019 tex->texture_array_size = 1; 1020} 1021 1022static void 1023apply_pipeline_layout_block(nir_block *block, 1024 struct apply_pipeline_layout_state *state) 1025{ 1026 nir_foreach_instr_safe(instr, block) { 1027 switch (instr->type) { 1028 case nir_instr_type_intrinsic: { 1029 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1030 switch (intrin->intrinsic) { 1031 case nir_intrinsic_vulkan_resource_index: 1032 lower_res_index_intrinsic(intrin, state); 1033 break; 1034 case nir_intrinsic_vulkan_resource_reindex: 1035 lower_res_reindex_intrinsic(intrin, state); 1036 break; 1037 case nir_intrinsic_load_vulkan_descriptor: 1038 lower_load_vulkan_descriptor(intrin, state); 1039 break; 1040 case nir_intrinsic_get_buffer_size: 1041 lower_get_buffer_size(intrin, state); 1042 break; 1043 case nir_intrinsic_image_deref_load: 1044 case nir_intrinsic_image_deref_store: 1045 case nir_intrinsic_image_deref_atomic_add: 1046 case nir_intrinsic_image_deref_atomic_min: 1047 case nir_intrinsic_image_deref_atomic_max: 1048 case nir_intrinsic_image_deref_atomic_and: 1049 case nir_intrinsic_image_deref_atomic_or: 1050 case nir_intrinsic_image_deref_atomic_xor: 1051 case nir_intrinsic_image_deref_atomic_exchange: 1052 case nir_intrinsic_image_deref_atomic_comp_swap: 1053 case nir_intrinsic_image_deref_size: 1054 case nir_intrinsic_image_deref_samples: 1055 case nir_intrinsic_image_deref_load_param_intel: 1056 case nir_intrinsic_image_deref_load_raw_intel: 1057 case nir_intrinsic_image_deref_store_raw_intel: 1058 lower_image_intrinsic(intrin, state); 1059 break; 1060 case nir_intrinsic_load_constant: 1061 lower_load_constant(intrin, state); 1062 break; 1063 default: 1064 break; 1065 } 1066 break; 1067 } 1068 case nir_instr_type_tex: 1069 lower_tex(nir_instr_as_tex(instr), state); 1070 break; 1071 default: 1072 continue; 1073 } 1074 } 1075} 1076 1077struct binding_info { 1078 uint32_t binding; 1079 uint8_t set; 1080 uint16_t score; 1081}; 1082 1083static int 1084compare_binding_infos(const void *_a, const void *_b) 1085{ 1086 const struct binding_info *a = _a, *b = _b; 1087 if (a->score != b->score) 1088 return b->score - a->score; 1089 1090 if (a->set != b->set) 1091 return a->set - b->set; 1092 1093 return a->binding - b->binding; 1094} 1095 1096void 1097anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, 1098 bool robust_buffer_access, 1099 struct anv_pipeline_layout *layout, 1100 nir_shader *shader, 1101 struct brw_stage_prog_data *prog_data, 1102 struct anv_pipeline_bind_map *map) 1103{ 1104 void *mem_ctx = ralloc_context(NULL); 1105 1106 struct apply_pipeline_layout_state state = { 1107 .pdevice = pdevice, 1108 .shader = shader, 1109 .layout = layout, 1110 .add_bounds_checks = robust_buffer_access, 1111 .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_buffer_access), 1112 .lowered_instrs = _mesa_pointer_set_create(mem_ctx), 1113 .dynamic_offset_uniform_start = -1, 1114 }; 1115 1116 for (unsigned s = 0; s < layout->num_sets; s++) { 1117 const unsigned count = layout->set[s].layout->binding_count; 1118 state.set[s].use_count = rzalloc_array(mem_ctx, uint8_t, count); 1119 state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count); 1120 state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count); 1121 } 1122 1123 nir_foreach_function(function, shader) { 1124 if (!function->impl) 1125 continue; 1126 1127 nir_foreach_block(block, function->impl) 1128 get_used_bindings_block(block, &state); 1129 } 1130 1131 for (unsigned s = 0; s < layout->num_sets; s++) { 1132 if (state.set[s].desc_buffer_used) { 1133 map->surface_to_descriptor[map->surface_count] = 1134 (struct anv_pipeline_binding) { 1135 .set = ANV_DESCRIPTOR_SET_DESCRIPTORS, 1136 .binding = s, 1137 }; 1138 state.set[s].desc_offset = map->surface_count; 1139 map->surface_count++; 1140 } 1141 } 1142 1143 if (state.uses_constants) { 1144 state.constants_offset = map->surface_count; 1145 map->surface_to_descriptor[map->surface_count].set = 1146 ANV_DESCRIPTOR_SET_SHADER_CONSTANTS; 1147 map->surface_count++; 1148 } 1149 1150 unsigned used_binding_count = 0; 1151 for (uint32_t set = 0; set < layout->num_sets; set++) { 1152 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; 1153 for (unsigned b = 0; b < set_layout->binding_count; b++) { 1154 if (state.set[set].use_count[b] == 0) 1155 continue; 1156 1157 used_binding_count++; 1158 } 1159 } 1160 1161 struct binding_info *infos = 1162 rzalloc_array(mem_ctx, struct binding_info, used_binding_count); 1163 used_binding_count = 0; 1164 for (uint32_t set = 0; set < layout->num_sets; set++) { 1165 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; 1166 for (unsigned b = 0; b < set_layout->binding_count; b++) { 1167 if (state.set[set].use_count[b] == 0) 1168 continue; 1169 1170 struct anv_descriptor_set_binding_layout *binding = 1171 &layout->set[set].layout->binding[b]; 1172 1173 /* Do a fixed-point calculation to generate a score based on the 1174 * number of uses and the binding array size. We shift by 7 instead 1175 * of 8 because we're going to use the top bit below to make 1176 * everything which does not support bindless super higher priority 1177 * than things which do. 1178 */ 1179 uint16_t score = ((uint16_t)state.set[set].use_count[b] << 7) / 1180 binding->array_size; 1181 1182 /* If the descriptor type doesn't support bindless then put it at the 1183 * beginning so we guarantee it gets a slot. 1184 */ 1185 if (!anv_descriptor_supports_bindless(pdevice, binding, true) || 1186 !anv_descriptor_supports_bindless(pdevice, binding, false)) 1187 score |= 1 << 15; 1188 1189 infos[used_binding_count++] = (struct binding_info) { 1190 .set = set, 1191 .binding = b, 1192 .score = score, 1193 }; 1194 } 1195 } 1196 1197 /* Order the binding infos based on score with highest scores first. If 1198 * scores are equal we then order by set and binding. 1199 */ 1200 qsort(infos, used_binding_count, sizeof(struct binding_info), 1201 compare_binding_infos); 1202 1203 bool have_dynamic_buffers = false; 1204 1205 for (unsigned i = 0; i < used_binding_count; i++) { 1206 unsigned set = infos[i].set, b = infos[i].binding; 1207 struct anv_descriptor_set_binding_layout *binding = 1208 &layout->set[set].layout->binding[b]; 1209 1210 if (binding->dynamic_offset_index >= 0) 1211 have_dynamic_buffers = true; 1212 1213 const uint32_t array_size = binding->array_size; 1214 1215 if (binding->data & ANV_DESCRIPTOR_SURFACE_STATE) { 1216 if (map->surface_count + array_size > MAX_BINDING_TABLE_SIZE || 1217 anv_descriptor_requires_bindless(pdevice, binding, false)) { 1218 /* If this descriptor doesn't fit in the binding table or if it 1219 * requires bindless for some reason, flag it as bindless. 1220 */ 1221 assert(anv_descriptor_supports_bindless(pdevice, binding, false)); 1222 state.set[set].surface_offsets[b] = BINDLESS_OFFSET; 1223 } else { 1224 state.set[set].surface_offsets[b] = map->surface_count; 1225 struct anv_sampler **samplers = binding->immutable_samplers; 1226 for (unsigned i = 0; i < binding->array_size; i++) { 1227 uint8_t planes = samplers ? samplers[i]->n_planes : 1; 1228 for (uint8_t p = 0; p < planes; p++) { 1229 map->surface_to_descriptor[map->surface_count++] = 1230 (struct anv_pipeline_binding) { 1231 .set = set, 1232 .binding = b, 1233 .index = i, 1234 .plane = p, 1235 }; 1236 } 1237 } 1238 } 1239 assert(map->surface_count <= MAX_BINDING_TABLE_SIZE); 1240 } 1241 1242 if (binding->data & ANV_DESCRIPTOR_SAMPLER_STATE) { 1243 if (map->sampler_count + array_size > MAX_SAMPLER_TABLE_SIZE || 1244 anv_descriptor_requires_bindless(pdevice, binding, true)) { 1245 /* If this descriptor doesn't fit in the binding table or if it 1246 * requires bindless for some reason, flag it as bindless. 1247 * 1248 * We also make large sampler arrays bindless because we can avoid 1249 * using indirect sends thanks to bindless samplers being packed 1250 * less tightly than the sampler table. 1251 */ 1252 assert(anv_descriptor_supports_bindless(pdevice, binding, true)); 1253 state.set[set].sampler_offsets[b] = BINDLESS_OFFSET; 1254 } else { 1255 state.set[set].sampler_offsets[b] = map->sampler_count; 1256 struct anv_sampler **samplers = binding->immutable_samplers; 1257 for (unsigned i = 0; i < binding->array_size; i++) { 1258 uint8_t planes = samplers ? samplers[i]->n_planes : 1; 1259 for (uint8_t p = 0; p < planes; p++) { 1260 map->sampler_to_descriptor[map->sampler_count++] = 1261 (struct anv_pipeline_binding) { 1262 .set = set, 1263 .binding = b, 1264 .index = i, 1265 .plane = p, 1266 }; 1267 } 1268 } 1269 } 1270 } 1271 } 1272 1273 if (have_dynamic_buffers) { 1274 state.dynamic_offset_uniform_start = shader->num_uniforms; 1275 uint32_t *param = brw_stage_prog_data_add_params(prog_data, 1276 MAX_DYNAMIC_BUFFERS); 1277 for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) 1278 param[i] = ANV_PARAM_DYN_OFFSET(i); 1279 shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 4; 1280 assert(shader->num_uniforms == prog_data->nr_params * 4); 1281 } 1282 1283 nir_foreach_variable(var, &shader->uniforms) { 1284 const struct glsl_type *glsl_type = glsl_without_array(var->type); 1285 1286 if (!glsl_type_is_image(glsl_type)) 1287 continue; 1288 1289 enum glsl_sampler_dim dim = glsl_get_sampler_dim(glsl_type); 1290 1291 const uint32_t set = var->data.descriptor_set; 1292 const uint32_t binding = var->data.binding; 1293 const uint32_t array_size = 1294 layout->set[set].layout->binding[binding].array_size; 1295 1296 if (state.set[set].use_count[binding] == 0) 1297 continue; 1298 1299 if (state.set[set].surface_offsets[binding] >= MAX_BINDING_TABLE_SIZE) 1300 continue; 1301 1302 struct anv_pipeline_binding *pipe_binding = 1303 &map->surface_to_descriptor[state.set[set].surface_offsets[binding]]; 1304 for (unsigned i = 0; i < array_size; i++) { 1305 assert(pipe_binding[i].set == set); 1306 assert(pipe_binding[i].binding == binding); 1307 assert(pipe_binding[i].index == i); 1308 1309 if (dim == GLSL_SAMPLER_DIM_SUBPASS || 1310 dim == GLSL_SAMPLER_DIM_SUBPASS_MS) 1311 pipe_binding[i].input_attachment_index = var->data.index + i; 1312 1313 pipe_binding[i].write_only = 1314 (var->data.image.access & ACCESS_NON_READABLE) != 0; 1315 } 1316 } 1317 1318 nir_foreach_function(function, shader) { 1319 if (!function->impl) 1320 continue; 1321 1322 /* Before we do the normal lowering, we look for any SSBO operations 1323 * that we can lower to the BTI model and lower them up-front. The BTI 1324 * model can perform better than the A64 model for a couple reasons: 1325 * 1326 * 1. 48-bit address calculations are potentially expensive and using 1327 * the BTI model lets us simply compute 32-bit offsets and the 1328 * hardware adds the 64-bit surface base address. 1329 * 1330 * 2. The BTI messages, because they use surface states, do bounds 1331 * checking for us. With the A64 model, we have to do our own 1332 * bounds checking and this means wider pointers and extra 1333 * calculations and branching in the shader. 1334 * 1335 * The solution to both of these is to convert things to the BTI model 1336 * opportunistically. The reason why we need to do this as a pre-pass 1337 * is for two reasons: 1338 * 1339 * 1. The BTI model requires nir_address_format_32bit_index_offset 1340 * pointers which are not the same type as the pointers needed for 1341 * the A64 model. Because all our derefs are set up for the A64 1342 * model (in case we have variable pointers), we have to crawl all 1343 * the way back to the vulkan_resource_index intrinsic and build a 1344 * completely fresh index+offset calculation. 1345 * 1346 * 2. Because the variable-pointers-capable lowering that we do as part 1347 * of apply_pipeline_layout_block is destructive (It really has to 1348 * be to handle variable pointers properly), we've lost the deref 1349 * information by the time we get to the load/store/atomic 1350 * intrinsics in that pass. 1351 */ 1352 lower_direct_buffer_access(function->impl, &state); 1353 1354 nir_builder_init(&state.builder, function->impl); 1355 nir_foreach_block(block, function->impl) 1356 apply_pipeline_layout_block(block, &state); 1357 nir_metadata_preserve(function->impl, nir_metadata_block_index | 1358 nir_metadata_dominance); 1359 } 1360 1361 ralloc_free(mem_ctx); 1362} 1363