1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdbool.h> 26#include <string.h> 27#include <unistd.h> 28#include <fcntl.h> 29 30#include "util/mesa-sha1.h" 31#include "util/os_time.h" 32#include "common/intel_l3_config.h" 33#include "common/intel_disasm.h" 34#include "common/intel_sample_positions.h" 35#include "anv_private.h" 36#include "compiler/brw_nir.h" 37#include "compiler/brw_nir_rt.h" 38#include "anv_nir.h" 39#include "nir/nir_xfb_info.h" 40#include "spirv/nir_spirv.h" 41#include "vk_util.h" 42 43/* Needed for SWIZZLE macros */ 44#include "program/prog_instruction.h" 45 46// Shader functions 47#define SPIR_V_MAGIC_NUMBER 0x07230203 48 49struct anv_spirv_debug_data { 50 struct anv_device *device; 51 const struct vk_shader_module *module; 52}; 53 54static void anv_spirv_nir_debug(void *private_data, 55 enum nir_spirv_debug_level level, 56 size_t spirv_offset, 57 const char *message) 58{ 59 struct anv_spirv_debug_data *debug_data = private_data; 60 61 switch (level) { 62 case NIR_SPIRV_DEBUG_LEVEL_INFO: 63 vk_logi(VK_LOG_OBJS(&debug_data->module->base), 64 "SPIR-V offset %lu: %s", 65 (unsigned long) spirv_offset, message); 66 break; 67 case NIR_SPIRV_DEBUG_LEVEL_WARNING: 68 vk_logw(VK_LOG_OBJS(&debug_data->module->base), 69 "SPIR-V offset %lu: %s", 70 (unsigned long) spirv_offset, message); 71 break; 72 case NIR_SPIRV_DEBUG_LEVEL_ERROR: 73 vk_loge(VK_LOG_OBJS(&debug_data->module->base), 74 "SPIR-V offset %lu: %s", 75 (unsigned long) spirv_offset, message); 76 break; 77 default: 78 break; 79 } 80} 81 82/* Eventually, this will become part of anv_CreateShader. Unfortunately, 83 * we can't do that yet because we don't have the ability to copy nir. 84 */ 85static nir_shader * 86anv_shader_compile_to_nir(struct anv_device *device, 87 void *mem_ctx, 88 const struct vk_shader_module *module, 89 const char *entrypoint_name, 90 gl_shader_stage stage, 91 const VkSpecializationInfo *spec_info) 92{ 93 const struct anv_physical_device *pdevice = device->physical; 94 const struct brw_compiler *compiler = pdevice->compiler; 95 const nir_shader_compiler_options *nir_options = 96 compiler->glsl_compiler_options[stage].NirOptions; 97 98 uint32_t *spirv = (uint32_t *) module->data; 99 assert(spirv[0] == SPIR_V_MAGIC_NUMBER); 100 assert(module->size % 4 == 0); 101 102 uint32_t num_spec_entries = 0; 103 struct nir_spirv_specialization *spec_entries = 104 vk_spec_info_to_nir_spirv(spec_info, &num_spec_entries); 105 106 struct anv_spirv_debug_data spirv_debug_data = { 107 .device = device, 108 .module = module, 109 }; 110 struct spirv_to_nir_options spirv_options = { 111 .caps = { 112 .demote_to_helper_invocation = true, 113 .derivative_group = true, 114 .descriptor_array_dynamic_indexing = true, 115 .descriptor_array_non_uniform_indexing = true, 116 .descriptor_indexing = true, 117 .device_group = true, 118 .draw_parameters = true, 119 .float16 = pdevice->info.ver >= 8, 120 .float32_atomic_add = pdevice->info.has_lsc, 121 .float32_atomic_min_max = pdevice->info.ver >= 9, 122 .float64 = pdevice->info.ver >= 8, 123 .float64_atomic_min_max = pdevice->info.has_lsc, 124 .fragment_shader_sample_interlock = pdevice->info.ver >= 9, 125 .fragment_shader_pixel_interlock = pdevice->info.ver >= 9, 126 .geometry_streams = true, 127 /* When KHR_format_feature_flags2 is enabled, the read/write without 128 * format is per format, so just report true. It's up to the 129 * application to check. 130 */ 131 .image_read_without_format = device->vk.enabled_extensions.KHR_format_feature_flags2, 132 .image_write_without_format = true, 133 .int8 = pdevice->info.ver >= 8, 134 .int16 = pdevice->info.ver >= 8, 135 .int64 = pdevice->info.ver >= 8, 136 .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin, 137 .integer_functions2 = pdevice->info.ver >= 8, 138 .min_lod = true, 139 .multiview = true, 140 .physical_storage_buffer_address = pdevice->has_a64_buffer_access, 141 .post_depth_coverage = pdevice->info.ver >= 9, 142 .runtime_descriptor_array = true, 143 .float_controls = pdevice->info.ver >= 8, 144 .ray_tracing = pdevice->info.has_ray_tracing, 145 .shader_clock = true, 146 .shader_viewport_index_layer = true, 147 .stencil_export = pdevice->info.ver >= 9, 148 .storage_8bit = pdevice->info.ver >= 8, 149 .storage_16bit = pdevice->info.ver >= 8, 150 .subgroup_arithmetic = true, 151 .subgroup_basic = true, 152 .subgroup_ballot = true, 153 .subgroup_dispatch = true, 154 .subgroup_quad = true, 155 .subgroup_uniform_control_flow = true, 156 .subgroup_shuffle = true, 157 .subgroup_vote = true, 158 .tessellation = true, 159 .transform_feedback = pdevice->info.ver >= 8, 160 .variable_pointers = true, 161 .vk_memory_model = true, 162 .vk_memory_model_device_scope = true, 163 .workgroup_memory_explicit_layout = true, 164 .fragment_shading_rate = pdevice->info.ver >= 11, 165 }, 166 .ubo_addr_format = 167 anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access), 168 .ssbo_addr_format = 169 anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access), 170 .phys_ssbo_addr_format = nir_address_format_64bit_global, 171 .push_const_addr_format = nir_address_format_logical, 172 173 /* TODO: Consider changing this to an address format that has the NULL 174 * pointer equals to 0. That might be a better format to play nice 175 * with certain code / code generators. 176 */ 177 .shared_addr_format = nir_address_format_32bit_offset, 178 .debug = { 179 .func = anv_spirv_nir_debug, 180 .private_data = &spirv_debug_data, 181 }, 182 }; 183 184 185 nir_shader *nir = 186 spirv_to_nir(spirv, module->size / 4, 187 spec_entries, num_spec_entries, 188 stage, entrypoint_name, &spirv_options, nir_options); 189 if (!nir) { 190 free(spec_entries); 191 return NULL; 192 } 193 194 assert(nir->info.stage == stage); 195 nir_validate_shader(nir, "after spirv_to_nir"); 196 nir_validate_ssa_dominance(nir, "after spirv_to_nir"); 197 ralloc_steal(mem_ctx, nir); 198 199 free(spec_entries); 200 201 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = { 202 .point_coord = true, 203 }; 204 NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings); 205 206 if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) { 207 fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n", 208 gl_shader_stage_name(stage)); 209 nir_print_shader(nir, stderr); 210 } 211 212 /* We have to lower away local constant initializers right before we 213 * inline functions. That way they get properly initialized at the top 214 * of the function and not at the top of its caller. 215 */ 216 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); 217 NIR_PASS_V(nir, nir_lower_returns); 218 NIR_PASS_V(nir, nir_inline_functions); 219 NIR_PASS_V(nir, nir_copy_prop); 220 NIR_PASS_V(nir, nir_opt_deref); 221 222 /* Pick off the single entrypoint that we want */ 223 foreach_list_typed_safe(nir_function, func, node, &nir->functions) { 224 if (!func->is_entrypoint) 225 exec_node_remove(&func->node); 226 } 227 assert(exec_list_length(&nir->functions) == 1); 228 229 /* Now that we've deleted all but the main function, we can go ahead and 230 * lower the rest of the constant initializers. We do this here so that 231 * nir_remove_dead_variables and split_per_member_structs below see the 232 * corresponding stores. 233 */ 234 NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); 235 236 /* Split member structs. We do this before lower_io_to_temporaries so that 237 * it doesn't lower system values to temporaries by accident. 238 */ 239 NIR_PASS_V(nir, nir_split_var_copies); 240 NIR_PASS_V(nir, nir_split_per_member_structs); 241 242 NIR_PASS_V(nir, nir_remove_dead_variables, 243 nir_var_shader_in | nir_var_shader_out | nir_var_system_value | 244 nir_var_shader_call_data | nir_var_ray_hit_attrib, 245 NULL); 246 247 NIR_PASS_V(nir, nir_propagate_invariant, false); 248 NIR_PASS_V(nir, nir_lower_io_to_temporaries, 249 nir_shader_get_entrypoint(nir), true, false); 250 251 NIR_PASS_V(nir, nir_lower_frexp); 252 253 /* Vulkan uses the separate-shader linking model */ 254 nir->info.separate_shader = true; 255 256 brw_preprocess_nir(compiler, nir, NULL); 257 258 return nir; 259} 260 261VkResult 262anv_pipeline_init(struct anv_pipeline *pipeline, 263 struct anv_device *device, 264 enum anv_pipeline_type type, 265 VkPipelineCreateFlags flags, 266 const VkAllocationCallbacks *pAllocator) 267{ 268 VkResult result; 269 270 memset(pipeline, 0, sizeof(*pipeline)); 271 272 vk_object_base_init(&device->vk, &pipeline->base, 273 VK_OBJECT_TYPE_PIPELINE); 274 pipeline->device = device; 275 276 /* It's the job of the child class to provide actual backing storage for 277 * the batch by setting batch.start, batch.next, and batch.end. 278 */ 279 pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc; 280 pipeline->batch.relocs = &pipeline->batch_relocs; 281 pipeline->batch.status = VK_SUCCESS; 282 283 result = anv_reloc_list_init(&pipeline->batch_relocs, 284 pipeline->batch.alloc); 285 if (result != VK_SUCCESS) 286 return result; 287 288 pipeline->mem_ctx = ralloc_context(NULL); 289 290 pipeline->type = type; 291 pipeline->flags = flags; 292 293 util_dynarray_init(&pipeline->executables, pipeline->mem_ctx); 294 295 return VK_SUCCESS; 296} 297 298void 299anv_pipeline_finish(struct anv_pipeline *pipeline, 300 struct anv_device *device, 301 const VkAllocationCallbacks *pAllocator) 302{ 303 anv_reloc_list_finish(&pipeline->batch_relocs, 304 pAllocator ? pAllocator : &device->vk.alloc); 305 ralloc_free(pipeline->mem_ctx); 306 vk_object_base_finish(&pipeline->base); 307} 308 309void anv_DestroyPipeline( 310 VkDevice _device, 311 VkPipeline _pipeline, 312 const VkAllocationCallbacks* pAllocator) 313{ 314 ANV_FROM_HANDLE(anv_device, device, _device); 315 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); 316 317 if (!pipeline) 318 return; 319 320 switch (pipeline->type) { 321 case ANV_PIPELINE_GRAPHICS: { 322 struct anv_graphics_pipeline *gfx_pipeline = 323 anv_pipeline_to_graphics(pipeline); 324 325 if (gfx_pipeline->blend_state.map) 326 anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state); 327 if (gfx_pipeline->cps_state.map) 328 anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->cps_state); 329 330 for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) { 331 if (gfx_pipeline->shaders[s]) 332 anv_shader_bin_unref(device, gfx_pipeline->shaders[s]); 333 } 334 break; 335 } 336 337 case ANV_PIPELINE_COMPUTE: { 338 struct anv_compute_pipeline *compute_pipeline = 339 anv_pipeline_to_compute(pipeline); 340 341 if (compute_pipeline->cs) 342 anv_shader_bin_unref(device, compute_pipeline->cs); 343 344 break; 345 } 346 347 case ANV_PIPELINE_RAY_TRACING: { 348 struct anv_ray_tracing_pipeline *rt_pipeline = 349 anv_pipeline_to_ray_tracing(pipeline); 350 351 util_dynarray_foreach(&rt_pipeline->shaders, 352 struct anv_shader_bin *, shader) { 353 anv_shader_bin_unref(device, *shader); 354 } 355 break; 356 } 357 358 default: 359 unreachable("invalid pipeline type"); 360 } 361 362 anv_pipeline_finish(pipeline, device, pAllocator); 363 vk_free2(&device->vk.alloc, pAllocator, pipeline); 364} 365 366static const uint32_t vk_to_intel_primitive_type[] = { 367 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, 368 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, 369 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, 370 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, 371 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, 372 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, 373 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ, 374 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, 375 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ, 376 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, 377}; 378 379static void 380populate_sampler_prog_key(const struct intel_device_info *devinfo, 381 struct brw_sampler_prog_key_data *key) 382{ 383 /* Almost all multisampled textures are compressed. The only time when we 384 * don't compress a multisampled texture is for 16x MSAA with a surface 385 * width greater than 8k which is a bit of an edge case. Since the sampler 386 * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe 387 * to tell the compiler to always assume compression. 388 */ 389 key->compressed_multisample_layout_mask = ~0; 390 391 /* SkyLake added support for 16x MSAA. With this came a new message for 392 * reading from a 16x MSAA surface with compression. The new message was 393 * needed because now the MCS data is 64 bits instead of 32 or lower as is 394 * the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which 395 * message we use. Fortunately, the 16x message works for 8x, 4x, and 2x 396 * so we can just use it unconditionally. This may not be quite as 397 * efficient but it saves us from recompiling. 398 */ 399 if (devinfo->ver >= 9) 400 key->msaa_16 = ~0; 401 402 /* XXX: Handle texture swizzle on HSW- */ 403 for (int i = 0; i < MAX_SAMPLERS; i++) { 404 /* Assume color sampler, no swizzling. (Works for BDW+) */ 405 key->swizzles[i] = SWIZZLE_XYZW; 406 } 407} 408 409static void 410populate_base_prog_key(const struct intel_device_info *devinfo, 411 enum brw_subgroup_size_type subgroup_size_type, 412 bool robust_buffer_acccess, 413 struct brw_base_prog_key *key) 414{ 415 key->subgroup_size_type = subgroup_size_type; 416 key->robust_buffer_access = robust_buffer_acccess; 417 418 populate_sampler_prog_key(devinfo, &key->tex); 419} 420 421static void 422populate_vs_prog_key(const struct intel_device_info *devinfo, 423 enum brw_subgroup_size_type subgroup_size_type, 424 bool robust_buffer_acccess, 425 struct brw_vs_prog_key *key) 426{ 427 memset(key, 0, sizeof(*key)); 428 429 populate_base_prog_key(devinfo, subgroup_size_type, 430 robust_buffer_acccess, &key->base); 431 432 /* XXX: Handle vertex input work-arounds */ 433 434 /* XXX: Handle sampler_prog_key */ 435} 436 437static void 438populate_tcs_prog_key(const struct intel_device_info *devinfo, 439 enum brw_subgroup_size_type subgroup_size_type, 440 bool robust_buffer_acccess, 441 unsigned input_vertices, 442 struct brw_tcs_prog_key *key) 443{ 444 memset(key, 0, sizeof(*key)); 445 446 populate_base_prog_key(devinfo, subgroup_size_type, 447 robust_buffer_acccess, &key->base); 448 449 key->input_vertices = input_vertices; 450} 451 452static void 453populate_tes_prog_key(const struct intel_device_info *devinfo, 454 enum brw_subgroup_size_type subgroup_size_type, 455 bool robust_buffer_acccess, 456 struct brw_tes_prog_key *key) 457{ 458 memset(key, 0, sizeof(*key)); 459 460 populate_base_prog_key(devinfo, subgroup_size_type, 461 robust_buffer_acccess, &key->base); 462} 463 464static void 465populate_gs_prog_key(const struct intel_device_info *devinfo, 466 enum brw_subgroup_size_type subgroup_size_type, 467 bool robust_buffer_acccess, 468 struct brw_gs_prog_key *key) 469{ 470 memset(key, 0, sizeof(*key)); 471 472 populate_base_prog_key(devinfo, subgroup_size_type, 473 robust_buffer_acccess, &key->base); 474} 475 476static bool 477pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline, 478 const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info) 479{ 480 if (pipeline->sample_shading_enable) 481 return false; 482 483 /* Not dynamic & not specified for the pipeline. */ 484 if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 && !fsr_info) 485 return false; 486 487 /* Not dynamic & pipeline has a 1x1 fragment shading rate with no 488 * possibility for element of the pipeline to change the value. 489 */ 490 if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 && 491 fsr_info->fragmentSize.width <= 1 && 492 fsr_info->fragmentSize.height <= 1 && 493 fsr_info->combinerOps[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR && 494 fsr_info->combinerOps[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR) 495 return false; 496 497 return true; 498} 499 500static void 501populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline, 502 VkPipelineShaderStageCreateFlags flags, 503 bool robust_buffer_acccess, 504 const struct anv_subpass *subpass, 505 const VkPipelineMultisampleStateCreateInfo *ms_info, 506 const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info, 507 struct brw_wm_prog_key *key) 508{ 509 const struct anv_device *device = pipeline->base.device; 510 const struct intel_device_info *devinfo = &device->info; 511 512 memset(key, 0, sizeof(*key)); 513 514 populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base); 515 516 /* We set this to 0 here and set to the actual value before we call 517 * brw_compile_fs. 518 */ 519 key->input_slots_valid = 0; 520 521 /* Vulkan doesn't specify a default */ 522 key->high_quality_derivatives = false; 523 524 /* XXX Vulkan doesn't appear to specify */ 525 key->clamp_fragment_color = false; 526 527 key->ignore_sample_mask_out = false; 528 529 assert(subpass->color_count <= MAX_RTS); 530 for (uint32_t i = 0; i < subpass->color_count; i++) { 531 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) 532 key->color_outputs_valid |= (1 << i); 533 } 534 535 key->nr_color_regions = subpass->color_count; 536 537 /* To reduce possible shader recompilations we would need to know if 538 * there is a SampleMask output variable to compute if we should emit 539 * code to workaround the issue that hardware disables alpha to coverage 540 * when there is SampleMask output. 541 */ 542 key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable; 543 544 /* Vulkan doesn't support fixed-function alpha test */ 545 key->alpha_test_replicate_alpha = false; 546 547 if (ms_info) { 548 /* We should probably pull this out of the shader, but it's fairly 549 * harmless to compute it and then let dead-code take care of it. 550 */ 551 if (ms_info->rasterizationSamples > 1) { 552 key->persample_interp = ms_info->sampleShadingEnable && 553 (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1; 554 key->multisample_fbo = true; 555 } 556 557 key->frag_coord_adds_sample_pos = key->persample_interp; 558 } 559 560 key->coarse_pixel = 561 device->vk.enabled_extensions.KHR_fragment_shading_rate && 562 pipeline_has_coarse_pixel(pipeline, fsr_info); 563} 564 565static void 566populate_cs_prog_key(const struct intel_device_info *devinfo, 567 enum brw_subgroup_size_type subgroup_size_type, 568 bool robust_buffer_acccess, 569 struct brw_cs_prog_key *key) 570{ 571 memset(key, 0, sizeof(*key)); 572 573 populate_base_prog_key(devinfo, subgroup_size_type, 574 robust_buffer_acccess, &key->base); 575} 576 577static void 578populate_bs_prog_key(const struct intel_device_info *devinfo, 579 VkPipelineShaderStageCreateFlags flags, 580 bool robust_buffer_access, 581 struct brw_bs_prog_key *key) 582{ 583 memset(key, 0, sizeof(*key)); 584 585 populate_base_prog_key(devinfo, flags, robust_buffer_access, &key->base); 586} 587 588struct anv_pipeline_stage { 589 gl_shader_stage stage; 590 591 const struct vk_shader_module *module; 592 const char *entrypoint; 593 const VkSpecializationInfo *spec_info; 594 595 unsigned char shader_sha1[20]; 596 597 union brw_any_prog_key key; 598 599 struct { 600 gl_shader_stage stage; 601 unsigned char sha1[20]; 602 } cache_key; 603 604 nir_shader *nir; 605 606 struct anv_pipeline_binding surface_to_descriptor[256]; 607 struct anv_pipeline_binding sampler_to_descriptor[256]; 608 struct anv_pipeline_bind_map bind_map; 609 610 union brw_any_prog_data prog_data; 611 612 uint32_t num_stats; 613 struct brw_compile_stats stats[3]; 614 char *disasm[3]; 615 616 VkPipelineCreationFeedbackEXT feedback; 617 618 const unsigned *code; 619 620 struct anv_shader_bin *bin; 621}; 622 623static void 624anv_pipeline_hash_shader(const struct vk_shader_module *module, 625 const char *entrypoint, 626 gl_shader_stage stage, 627 const VkSpecializationInfo *spec_info, 628 unsigned char *sha1_out) 629{ 630 struct mesa_sha1 ctx; 631 _mesa_sha1_init(&ctx); 632 633 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1)); 634 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint)); 635 _mesa_sha1_update(&ctx, &stage, sizeof(stage)); 636 if (spec_info) { 637 _mesa_sha1_update(&ctx, spec_info->pMapEntries, 638 spec_info->mapEntryCount * 639 sizeof(*spec_info->pMapEntries)); 640 _mesa_sha1_update(&ctx, spec_info->pData, 641 spec_info->dataSize); 642 } 643 644 _mesa_sha1_final(&ctx, sha1_out); 645} 646 647static void 648anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline, 649 struct anv_pipeline_layout *layout, 650 struct anv_pipeline_stage *stages, 651 unsigned char *sha1_out) 652{ 653 struct mesa_sha1 ctx; 654 _mesa_sha1_init(&ctx); 655 656 _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask, 657 sizeof(pipeline->subpass->view_mask)); 658 659 if (layout) 660 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 661 662 const bool rba = pipeline->base.device->robust_buffer_access; 663 _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 664 665 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 666 if (stages[s].entrypoint) { 667 _mesa_sha1_update(&ctx, stages[s].shader_sha1, 668 sizeof(stages[s].shader_sha1)); 669 _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s)); 670 } 671 } 672 673 _mesa_sha1_final(&ctx, sha1_out); 674} 675 676static void 677anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline, 678 struct anv_pipeline_layout *layout, 679 struct anv_pipeline_stage *stage, 680 unsigned char *sha1_out) 681{ 682 struct mesa_sha1 ctx; 683 _mesa_sha1_init(&ctx); 684 685 if (layout) 686 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 687 688 const bool rba = pipeline->base.device->robust_buffer_access; 689 _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 690 691 _mesa_sha1_update(&ctx, stage->shader_sha1, 692 sizeof(stage->shader_sha1)); 693 _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs)); 694 695 _mesa_sha1_final(&ctx, sha1_out); 696} 697 698static void 699anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline, 700 struct anv_pipeline_layout *layout, 701 struct anv_pipeline_stage *stage, 702 unsigned char *sha1_out) 703{ 704 struct mesa_sha1 ctx; 705 _mesa_sha1_init(&ctx); 706 707 if (layout != NULL) 708 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 709 710 const bool rba = pipeline->base.device->robust_buffer_access; 711 _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 712 713 _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1)); 714 _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs)); 715 716 _mesa_sha1_final(&ctx, sha1_out); 717} 718 719static void 720anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline, 721 struct anv_pipeline_layout *layout, 722 struct anv_pipeline_stage *intersection, 723 struct anv_pipeline_stage *any_hit, 724 unsigned char *sha1_out) 725{ 726 struct mesa_sha1 ctx; 727 _mesa_sha1_init(&ctx); 728 729 if (layout != NULL) 730 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 731 732 const bool rba = pipeline->base.device->robust_buffer_access; 733 _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 734 735 _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1)); 736 _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs)); 737 _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1)); 738 _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs)); 739 740 _mesa_sha1_final(&ctx, sha1_out); 741} 742 743static nir_shader * 744anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline, 745 struct anv_pipeline_cache *cache, 746 void *mem_ctx, 747 struct anv_pipeline_stage *stage) 748{ 749 const struct brw_compiler *compiler = 750 pipeline->device->physical->compiler; 751 const nir_shader_compiler_options *nir_options = 752 compiler->glsl_compiler_options[stage->stage].NirOptions; 753 nir_shader *nir; 754 755 nir = anv_device_search_for_nir(pipeline->device, cache, 756 nir_options, 757 stage->shader_sha1, 758 mem_ctx); 759 if (nir) { 760 assert(nir->info.stage == stage->stage); 761 return nir; 762 } 763 764 nir = anv_shader_compile_to_nir(pipeline->device, 765 mem_ctx, 766 stage->module, 767 stage->entrypoint, 768 stage->stage, 769 stage->spec_info); 770 if (nir) { 771 anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1); 772 return nir; 773 } 774 775 return NULL; 776} 777 778static void 779shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) 780{ 781 assert(glsl_type_is_vector_or_scalar(type)); 782 783 uint32_t comp_size = glsl_type_is_boolean(type) 784 ? 4 : glsl_get_bit_size(type) / 8; 785 unsigned length = glsl_get_vector_elements(type); 786 *size = comp_size * length, 787 *align = comp_size * (length == 3 ? 4 : length); 788} 789 790static void 791anv_pipeline_lower_nir(struct anv_pipeline *pipeline, 792 void *mem_ctx, 793 struct anv_pipeline_stage *stage, 794 struct anv_pipeline_layout *layout) 795{ 796 const struct anv_physical_device *pdevice = pipeline->device->physical; 797 const struct brw_compiler *compiler = pdevice->compiler; 798 799 struct brw_stage_prog_data *prog_data = &stage->prog_data.base; 800 nir_shader *nir = stage->nir; 801 802 if (nir->info.stage == MESA_SHADER_FRAGMENT) { 803 /* Check if sample shading is enabled in the shader and toggle 804 * it on for the pipeline independent if sampleShadingEnable is set. 805 */ 806 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 807 if (nir->info.fs.uses_sample_shading) 808 anv_pipeline_to_graphics(pipeline)->sample_shading_enable = true; 809 810 NIR_PASS_V(nir, nir_lower_wpos_center, 811 anv_pipeline_to_graphics(pipeline)->sample_shading_enable); 812 NIR_PASS_V(nir, nir_lower_input_attachments, 813 &(nir_input_attachment_options) { 814 .use_fragcoord_sysval = true, 815 .use_layer_id_sysval = true, 816 }); 817 } 818 819 NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout); 820 821 if (pipeline->type == ANV_PIPELINE_GRAPHICS) { 822 NIR_PASS_V(nir, anv_nir_lower_multiview, 823 anv_pipeline_to_graphics(pipeline)); 824 } 825 826 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 827 828 NIR_PASS_V(nir, brw_nir_lower_storage_image, compiler->devinfo); 829 830 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global, 831 nir_address_format_64bit_global); 832 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const, 833 nir_address_format_32bit_offset); 834 835 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ 836 anv_nir_apply_pipeline_layout(pdevice, 837 pipeline->device->robust_buffer_access, 838 layout, nir, &stage->bind_map); 839 840 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo, 841 anv_nir_ubo_addr_format(pdevice, 842 pipeline->device->robust_buffer_access)); 843 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo, 844 anv_nir_ssbo_addr_format(pdevice, 845 pipeline->device->robust_buffer_access)); 846 847 /* First run copy-prop to get rid of all of the vec() that address 848 * calculations often create and then constant-fold so that, when we 849 * get to anv_nir_lower_ubo_loads, we can detect constant offsets. 850 */ 851 NIR_PASS_V(nir, nir_copy_prop); 852 NIR_PASS_V(nir, nir_opt_constant_folding); 853 854 NIR_PASS_V(nir, anv_nir_lower_ubo_loads); 855 856 /* We don't support non-uniform UBOs and non-uniform SSBO access is 857 * handled naturally by falling back to A64 messages. 858 */ 859 NIR_PASS_V(nir, nir_lower_non_uniform_access, 860 &(nir_lower_non_uniform_access_options) { 861 .types = nir_lower_non_uniform_texture_access | 862 nir_lower_non_uniform_image_access, 863 .callback = NULL, 864 }); 865 866 anv_nir_compute_push_layout(pdevice, pipeline->device->robust_buffer_access, 867 nir, prog_data, &stage->bind_map, mem_ctx); 868 869 if (gl_shader_stage_uses_workgroup(nir->info.stage)) { 870 if (!nir->info.shared_memory_explicit_layout) { 871 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, 872 nir_var_mem_shared, shared_type_info); 873 } 874 875 NIR_PASS_V(nir, nir_lower_explicit_io, 876 nir_var_mem_shared, nir_address_format_32bit_offset); 877 878 if (nir->info.zero_initialize_shared_memory && 879 nir->info.shared_size > 0) { 880 /* The effective Shared Local Memory size is at least 1024 bytes and 881 * is always rounded to a power of two, so it is OK to align the size 882 * used by the shader to chunk_size -- which does simplify the logic. 883 */ 884 const unsigned chunk_size = 16; 885 const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size); 886 assert(shared_size <= 887 intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size)); 888 889 NIR_PASS_V(nir, nir_zero_initialize_shared_memory, 890 shared_size, chunk_size); 891 } 892 } 893 894 stage->nir = nir; 895} 896 897static void 898anv_pipeline_link_vs(const struct brw_compiler *compiler, 899 struct anv_pipeline_stage *vs_stage, 900 struct anv_pipeline_stage *next_stage) 901{ 902 if (next_stage) 903 brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir); 904} 905 906static void 907anv_pipeline_compile_vs(const struct brw_compiler *compiler, 908 void *mem_ctx, 909 struct anv_graphics_pipeline *pipeline, 910 struct anv_pipeline_stage *vs_stage) 911{ 912 /* When using Primitive Replication for multiview, each view gets its own 913 * position slot. 914 */ 915 uint32_t pos_slots = pipeline->use_primitive_replication ? 916 anv_subpass_view_count(pipeline->subpass) : 1; 917 918 brw_compute_vue_map(compiler->devinfo, 919 &vs_stage->prog_data.vs.base.vue_map, 920 vs_stage->nir->info.outputs_written, 921 vs_stage->nir->info.separate_shader, 922 pos_slots); 923 924 vs_stage->num_stats = 1; 925 926 struct brw_compile_vs_params params = { 927 .nir = vs_stage->nir, 928 .key = &vs_stage->key.vs, 929 .prog_data = &vs_stage->prog_data.vs, 930 .stats = vs_stage->stats, 931 .log_data = pipeline->base.device, 932 }; 933 934 vs_stage->code = brw_compile_vs(compiler, mem_ctx, ¶ms); 935} 936 937static void 938merge_tess_info(struct shader_info *tes_info, 939 const struct shader_info *tcs_info) 940{ 941 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says: 942 * 943 * "PointMode. Controls generation of points rather than triangles 944 * or lines. This functionality defaults to disabled, and is 945 * enabled if either shader stage includes the execution mode. 946 * 947 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw, 948 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd, 949 * and OutputVertices, it says: 950 * 951 * "One mode must be set in at least one of the tessellation 952 * shader stages." 953 * 954 * So, the fields can be set in either the TCS or TES, but they must 955 * agree if set in both. Our backend looks at TES, so bitwise-or in 956 * the values from the TCS. 957 */ 958 assert(tcs_info->tess.tcs_vertices_out == 0 || 959 tes_info->tess.tcs_vertices_out == 0 || 960 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out); 961 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out; 962 963 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || 964 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED || 965 tcs_info->tess.spacing == tes_info->tess.spacing); 966 tes_info->tess.spacing |= tcs_info->tess.spacing; 967 968 assert(tcs_info->tess.primitive_mode == 0 || 969 tes_info->tess.primitive_mode == 0 || 970 tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode); 971 tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode; 972 tes_info->tess.ccw |= tcs_info->tess.ccw; 973 tes_info->tess.point_mode |= tcs_info->tess.point_mode; 974} 975 976static void 977anv_pipeline_link_tcs(const struct brw_compiler *compiler, 978 struct anv_pipeline_stage *tcs_stage, 979 struct anv_pipeline_stage *tes_stage) 980{ 981 assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL); 982 983 brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir); 984 985 nir_lower_patch_vertices(tes_stage->nir, 986 tcs_stage->nir->info.tess.tcs_vertices_out, 987 NULL); 988 989 /* Copy TCS info into the TES info */ 990 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info); 991 992 /* Whacking the key after cache lookup is a bit sketchy, but all of 993 * this comes from the SPIR-V, which is part of the hash used for the 994 * pipeline cache. So it should be safe. 995 */ 996 tcs_stage->key.tcs.tes_primitive_mode = 997 tes_stage->nir->info.tess.primitive_mode; 998 tcs_stage->key.tcs.quads_workaround = 999 compiler->devinfo->ver < 9 && 1000 tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ && 1001 tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL; 1002} 1003 1004static void 1005anv_pipeline_compile_tcs(const struct brw_compiler *compiler, 1006 void *mem_ctx, 1007 struct anv_device *device, 1008 struct anv_pipeline_stage *tcs_stage, 1009 struct anv_pipeline_stage *prev_stage) 1010{ 1011 tcs_stage->key.tcs.outputs_written = 1012 tcs_stage->nir->info.outputs_written; 1013 tcs_stage->key.tcs.patch_outputs_written = 1014 tcs_stage->nir->info.patch_outputs_written; 1015 1016 tcs_stage->num_stats = 1; 1017 tcs_stage->code = brw_compile_tcs(compiler, device, mem_ctx, 1018 &tcs_stage->key.tcs, 1019 &tcs_stage->prog_data.tcs, 1020 tcs_stage->nir, -1, 1021 tcs_stage->stats, NULL); 1022} 1023 1024static void 1025anv_pipeline_link_tes(const struct brw_compiler *compiler, 1026 struct anv_pipeline_stage *tes_stage, 1027 struct anv_pipeline_stage *next_stage) 1028{ 1029 if (next_stage) 1030 brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir); 1031} 1032 1033static void 1034anv_pipeline_compile_tes(const struct brw_compiler *compiler, 1035 void *mem_ctx, 1036 struct anv_device *device, 1037 struct anv_pipeline_stage *tes_stage, 1038 struct anv_pipeline_stage *tcs_stage) 1039{ 1040 tes_stage->key.tes.inputs_read = 1041 tcs_stage->nir->info.outputs_written; 1042 tes_stage->key.tes.patch_inputs_read = 1043 tcs_stage->nir->info.patch_outputs_written; 1044 1045 tes_stage->num_stats = 1; 1046 tes_stage->code = brw_compile_tes(compiler, device, mem_ctx, 1047 &tes_stage->key.tes, 1048 &tcs_stage->prog_data.tcs.base.vue_map, 1049 &tes_stage->prog_data.tes, 1050 tes_stage->nir, -1, 1051 tes_stage->stats, NULL); 1052} 1053 1054static void 1055anv_pipeline_link_gs(const struct brw_compiler *compiler, 1056 struct anv_pipeline_stage *gs_stage, 1057 struct anv_pipeline_stage *next_stage) 1058{ 1059 if (next_stage) 1060 brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir); 1061} 1062 1063static void 1064anv_pipeline_compile_gs(const struct brw_compiler *compiler, 1065 void *mem_ctx, 1066 struct anv_device *device, 1067 struct anv_pipeline_stage *gs_stage, 1068 struct anv_pipeline_stage *prev_stage) 1069{ 1070 brw_compute_vue_map(compiler->devinfo, 1071 &gs_stage->prog_data.gs.base.vue_map, 1072 gs_stage->nir->info.outputs_written, 1073 gs_stage->nir->info.separate_shader, 1); 1074 1075 gs_stage->num_stats = 1; 1076 gs_stage->code = brw_compile_gs(compiler, device, mem_ctx, 1077 &gs_stage->key.gs, 1078 &gs_stage->prog_data.gs, 1079 gs_stage->nir, -1, 1080 gs_stage->stats, NULL); 1081} 1082 1083static void 1084anv_pipeline_link_fs(const struct brw_compiler *compiler, 1085 struct anv_pipeline_stage *stage) 1086{ 1087 unsigned num_rt_bindings; 1088 struct anv_pipeline_binding rt_bindings[MAX_RTS]; 1089 if (stage->key.wm.nr_color_regions > 0) { 1090 assert(stage->key.wm.nr_color_regions <= MAX_RTS); 1091 for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) { 1092 if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) { 1093 rt_bindings[rt] = (struct anv_pipeline_binding) { 1094 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, 1095 .index = rt, 1096 }; 1097 } else { 1098 /* Setup a null render target */ 1099 rt_bindings[rt] = (struct anv_pipeline_binding) { 1100 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, 1101 .index = UINT32_MAX, 1102 }; 1103 } 1104 } 1105 num_rt_bindings = stage->key.wm.nr_color_regions; 1106 } else { 1107 /* Setup a null render target */ 1108 rt_bindings[0] = (struct anv_pipeline_binding) { 1109 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, 1110 .index = UINT32_MAX, 1111 }; 1112 num_rt_bindings = 1; 1113 } 1114 1115 assert(num_rt_bindings <= MAX_RTS); 1116 assert(stage->bind_map.surface_count == 0); 1117 typed_memcpy(stage->bind_map.surface_to_descriptor, 1118 rt_bindings, num_rt_bindings); 1119 stage->bind_map.surface_count += num_rt_bindings; 1120 1121 /* Now that we've set up the color attachments, we can go through and 1122 * eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the 1123 * hopes that dead code can clean them up in this and any earlier shader 1124 * stages. 1125 */ 1126 nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir); 1127 bool deleted_output = false; 1128 nir_foreach_shader_out_variable_safe(var, stage->nir) { 1129 /* TODO: We don't delete depth/stencil writes. We probably could if the 1130 * subpass doesn't have a depth/stencil attachment. 1131 */ 1132 if (var->data.location < FRAG_RESULT_DATA0) 1133 continue; 1134 1135 const unsigned rt = var->data.location - FRAG_RESULT_DATA0; 1136 1137 /* If this is the RT at location 0 and we have alpha to coverage 1138 * enabled we still need that write because it will affect the coverage 1139 * mask even if it's never written to a color target. 1140 */ 1141 if (rt == 0 && stage->key.wm.alpha_to_coverage) 1142 continue; 1143 1144 const unsigned array_len = 1145 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; 1146 assert(rt + array_len <= MAX_RTS); 1147 1148 if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid & 1149 BITFIELD_RANGE(rt, array_len))) { 1150 deleted_output = true; 1151 var->data.mode = nir_var_function_temp; 1152 exec_node_remove(&var->node); 1153 exec_list_push_tail(&impl->locals, &var->node); 1154 } 1155 } 1156 1157 if (deleted_output) 1158 nir_fixup_deref_modes(stage->nir); 1159 1160 /* Initially the valid outputs value is based off the renderpass color 1161 * attachments (see populate_wm_prog_key()), now that we've potentially 1162 * deleted variables that map to unused attachments, we need to update the 1163 * valid outputs for the backend compiler based on what output variables 1164 * are actually used. */ 1165 stage->key.wm.color_outputs_valid = 0; 1166 nir_foreach_shader_out_variable_safe(var, stage->nir) { 1167 if (var->data.location < FRAG_RESULT_DATA0) 1168 continue; 1169 1170 const unsigned rt = var->data.location - FRAG_RESULT_DATA0; 1171 const unsigned array_len = 1172 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; 1173 assert(rt + array_len <= MAX_RTS); 1174 1175 stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len); 1176 } 1177 1178 /* We stored the number of subpass color attachments in nr_color_regions 1179 * when calculating the key for caching. Now that we've computed the bind 1180 * map, we can reduce this to the actual max before we go into the back-end 1181 * compiler. 1182 */ 1183 stage->key.wm.nr_color_regions = 1184 util_last_bit(stage->key.wm.color_outputs_valid); 1185} 1186 1187static void 1188anv_pipeline_compile_fs(const struct brw_compiler *compiler, 1189 void *mem_ctx, 1190 struct anv_device *device, 1191 struct anv_pipeline_stage *fs_stage, 1192 struct anv_pipeline_stage *prev_stage) 1193{ 1194 /* TODO: we could set this to 0 based on the information in nir_shader, but 1195 * we need this before we call spirv_to_nir. 1196 */ 1197 assert(prev_stage); 1198 fs_stage->key.wm.input_slots_valid = 1199 prev_stage->prog_data.vue.vue_map.slots_valid; 1200 1201 struct brw_compile_fs_params params = { 1202 .nir = fs_stage->nir, 1203 .key = &fs_stage->key.wm, 1204 .prog_data = &fs_stage->prog_data.wm, 1205 1206 .allow_spilling = true, 1207 .stats = fs_stage->stats, 1208 .log_data = device, 1209 }; 1210 1211 fs_stage->code = brw_compile_fs(compiler, mem_ctx, ¶ms); 1212 1213 fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 + 1214 (uint32_t)fs_stage->prog_data.wm.dispatch_16 + 1215 (uint32_t)fs_stage->prog_data.wm.dispatch_32; 1216 1217 if (fs_stage->key.wm.color_outputs_valid == 0 && 1218 !fs_stage->prog_data.wm.has_side_effects && 1219 !fs_stage->prog_data.wm.uses_omask && 1220 !fs_stage->key.wm.alpha_to_coverage && 1221 !fs_stage->prog_data.wm.uses_kill && 1222 fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF && 1223 !fs_stage->prog_data.wm.computed_stencil) { 1224 /* This fragment shader has no outputs and no side effects. Go ahead 1225 * and return the code pointer so we don't accidentally think the 1226 * compile failed but zero out prog_data which will set program_size to 1227 * zero and disable the stage. 1228 */ 1229 memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data)); 1230 } 1231} 1232 1233static void 1234anv_pipeline_add_executable(struct anv_pipeline *pipeline, 1235 struct anv_pipeline_stage *stage, 1236 struct brw_compile_stats *stats, 1237 uint32_t code_offset) 1238{ 1239 char *nir = NULL; 1240 if (stage->nir && 1241 (pipeline->flags & 1242 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) { 1243 nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx); 1244 } 1245 1246 char *disasm = NULL; 1247 if (stage->code && 1248 (pipeline->flags & 1249 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) { 1250 char *stream_data = NULL; 1251 size_t stream_size = 0; 1252 FILE *stream = open_memstream(&stream_data, &stream_size); 1253 1254 uint32_t push_size = 0; 1255 for (unsigned i = 0; i < 4; i++) 1256 push_size += stage->bind_map.push_ranges[i].length; 1257 if (push_size > 0) { 1258 fprintf(stream, "Push constant ranges:\n"); 1259 for (unsigned i = 0; i < 4; i++) { 1260 if (stage->bind_map.push_ranges[i].length == 0) 1261 continue; 1262 1263 fprintf(stream, " RANGE%d (%dB): ", i, 1264 stage->bind_map.push_ranges[i].length * 32); 1265 1266 switch (stage->bind_map.push_ranges[i].set) { 1267 case ANV_DESCRIPTOR_SET_NULL: 1268 fprintf(stream, "NULL"); 1269 break; 1270 1271 case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: 1272 fprintf(stream, "Vulkan push constants and API params"); 1273 break; 1274 1275 case ANV_DESCRIPTOR_SET_DESCRIPTORS: 1276 fprintf(stream, "Descriptor buffer for set %d (start=%dB)", 1277 stage->bind_map.push_ranges[i].index, 1278 stage->bind_map.push_ranges[i].start * 32); 1279 break; 1280 1281 case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS: 1282 unreachable("gl_NumWorkgroups is never pushed"); 1283 1284 case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS: 1285 fprintf(stream, "Inline shader constant data (start=%dB)", 1286 stage->bind_map.push_ranges[i].start * 32); 1287 break; 1288 1289 case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS: 1290 unreachable("Color attachments can't be pushed"); 1291 1292 default: 1293 fprintf(stream, "UBO (set=%d binding=%d start=%dB)", 1294 stage->bind_map.push_ranges[i].set, 1295 stage->bind_map.push_ranges[i].index, 1296 stage->bind_map.push_ranges[i].start * 32); 1297 break; 1298 } 1299 fprintf(stream, "\n"); 1300 } 1301 fprintf(stream, "\n"); 1302 } 1303 1304 /* Creating this is far cheaper than it looks. It's perfectly fine to 1305 * do it for every binary. 1306 */ 1307 intel_disassemble(&pipeline->device->info, 1308 stage->code, code_offset, stream); 1309 1310 fclose(stream); 1311 1312 /* Copy it to a ralloc'd thing */ 1313 disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1); 1314 memcpy(disasm, stream_data, stream_size); 1315 disasm[stream_size] = 0; 1316 1317 free(stream_data); 1318 } 1319 1320 const struct anv_pipeline_executable exe = { 1321 .stage = stage->stage, 1322 .stats = *stats, 1323 .nir = nir, 1324 .disasm = disasm, 1325 }; 1326 util_dynarray_append(&pipeline->executables, 1327 struct anv_pipeline_executable, exe); 1328} 1329 1330static void 1331anv_pipeline_add_executables(struct anv_pipeline *pipeline, 1332 struct anv_pipeline_stage *stage, 1333 struct anv_shader_bin *bin) 1334{ 1335 if (stage->stage == MESA_SHADER_FRAGMENT) { 1336 /* We pull the prog data and stats out of the anv_shader_bin because 1337 * the anv_pipeline_stage may not be fully populated if we successfully 1338 * looked up the shader in a cache. 1339 */ 1340 const struct brw_wm_prog_data *wm_prog_data = 1341 (const struct brw_wm_prog_data *)bin->prog_data; 1342 struct brw_compile_stats *stats = bin->stats; 1343 1344 if (wm_prog_data->dispatch_8) { 1345 anv_pipeline_add_executable(pipeline, stage, stats++, 0); 1346 } 1347 1348 if (wm_prog_data->dispatch_16) { 1349 anv_pipeline_add_executable(pipeline, stage, stats++, 1350 wm_prog_data->prog_offset_16); 1351 } 1352 1353 if (wm_prog_data->dispatch_32) { 1354 anv_pipeline_add_executable(pipeline, stage, stats++, 1355 wm_prog_data->prog_offset_32); 1356 } 1357 } else { 1358 anv_pipeline_add_executable(pipeline, stage, bin->stats, 0); 1359 } 1360} 1361 1362static enum brw_subgroup_size_type 1363anv_subgroup_size_type(gl_shader_stage stage, 1364 VkPipelineShaderStageCreateFlags flags, 1365 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info) 1366{ 1367 enum brw_subgroup_size_type subgroup_size_type; 1368 1369 if (rss_info) { 1370 assert(stage == MESA_SHADER_COMPUTE); 1371 /* These enum values are expressly chosen to be equal to the subgroup 1372 * size that they require. 1373 */ 1374 assert(rss_info->requiredSubgroupSize == 8 || 1375 rss_info->requiredSubgroupSize == 16 || 1376 rss_info->requiredSubgroupSize == 32); 1377 subgroup_size_type = rss_info->requiredSubgroupSize; 1378 } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) { 1379 subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING; 1380 } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) { 1381 assert(stage == MESA_SHADER_COMPUTE); 1382 /* If the client expressly requests full subgroups and they don't 1383 * specify a subgroup size neither allow varying subgroups, we need to 1384 * pick one. So we specify the API value of 32. Performance will 1385 * likely be terrible in this case but there's nothing we can do about 1386 * that. The client should have chosen a size. 1387 */ 1388 subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32; 1389 } else { 1390 subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT; 1391 } 1392 1393 return subgroup_size_type; 1394} 1395 1396static void 1397anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline) 1398{ 1399 /* TODO: Cache this pipeline-wide information. */ 1400 1401 if (anv_pipeline_is_primitive(pipeline)) { 1402 /* Primitive replication depends on information from all the shaders. 1403 * Recover this bit from the fact that we have more than one position slot 1404 * in the vertex shader when using it. 1405 */ 1406 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT); 1407 int pos_slots = 0; 1408 const struct brw_vue_prog_data *vue_prog_data = 1409 (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data; 1410 const struct brw_vue_map *vue_map = &vue_prog_data->vue_map; 1411 for (int i = 0; i < vue_map->num_slots; i++) { 1412 if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS) 1413 pos_slots++; 1414 } 1415 pipeline->use_primitive_replication = pos_slots > 1; 1416 } 1417} 1418 1419static VkResult 1420anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, 1421 struct anv_pipeline_cache *cache, 1422 const VkGraphicsPipelineCreateInfo *info) 1423{ 1424 VkPipelineCreationFeedbackEXT pipeline_feedback = { 1425 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, 1426 }; 1427 int64_t pipeline_start = os_time_get_nano(); 1428 1429 const struct brw_compiler *compiler = pipeline->base.device->physical->compiler; 1430 struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {}; 1431 1432 /* Information on which states are considered dynamic. */ 1433 const VkPipelineDynamicStateCreateInfo *dyn_info = 1434 info->pDynamicState; 1435 uint32_t dynamic_states = 0; 1436 if (dyn_info) { 1437 for (unsigned i = 0; i < dyn_info->dynamicStateCount; i++) 1438 dynamic_states |= 1439 anv_cmd_dirty_bit_for_vk_dynamic_state(dyn_info->pDynamicStates[i]); 1440 } 1441 1442 VkResult result; 1443 for (uint32_t i = 0; i < info->stageCount; i++) { 1444 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i]; 1445 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); 1446 1447 int64_t stage_start = os_time_get_nano(); 1448 1449 stages[stage].stage = stage; 1450 stages[stage].module = vk_shader_module_from_handle(sinfo->module); 1451 stages[stage].entrypoint = sinfo->pName; 1452 stages[stage].spec_info = sinfo->pSpecializationInfo; 1453 anv_pipeline_hash_shader(stages[stage].module, 1454 stages[stage].entrypoint, 1455 stage, 1456 stages[stage].spec_info, 1457 stages[stage].shader_sha1); 1458 1459 enum brw_subgroup_size_type subgroup_size_type = 1460 anv_subgroup_size_type(stage, sinfo->flags, NULL); 1461 1462 const struct intel_device_info *devinfo = &pipeline->base.device->info; 1463 switch (stage) { 1464 case MESA_SHADER_VERTEX: 1465 populate_vs_prog_key(devinfo, subgroup_size_type, 1466 pipeline->base.device->robust_buffer_access, 1467 &stages[stage].key.vs); 1468 break; 1469 case MESA_SHADER_TESS_CTRL: 1470 populate_tcs_prog_key(devinfo, subgroup_size_type, 1471 pipeline->base.device->robust_buffer_access, 1472 info->pTessellationState->patchControlPoints, 1473 &stages[stage].key.tcs); 1474 break; 1475 case MESA_SHADER_TESS_EVAL: 1476 populate_tes_prog_key(devinfo, subgroup_size_type, 1477 pipeline->base.device->robust_buffer_access, 1478 &stages[stage].key.tes); 1479 break; 1480 case MESA_SHADER_GEOMETRY: 1481 populate_gs_prog_key(devinfo, subgroup_size_type, 1482 pipeline->base.device->robust_buffer_access, 1483 &stages[stage].key.gs); 1484 break; 1485 case MESA_SHADER_FRAGMENT: { 1486 const bool raster_enabled = 1487 !info->pRasterizationState->rasterizerDiscardEnable || 1488 dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE; 1489 populate_wm_prog_key(pipeline, subgroup_size_type, 1490 pipeline->base.device->robust_buffer_access, 1491 pipeline->subpass, 1492 raster_enabled ? info->pMultisampleState : NULL, 1493 vk_find_struct_const(info->pNext, 1494 PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR), 1495 &stages[stage].key.wm); 1496 break; 1497 } 1498 default: 1499 unreachable("Invalid graphics shader stage"); 1500 } 1501 1502 stages[stage].feedback.duration += os_time_get_nano() - stage_start; 1503 stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT; 1504 } 1505 1506 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT); 1507 1508 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 1509 1510 unsigned char sha1[20]; 1511 anv_pipeline_hash_graphics(pipeline, layout, stages, sha1); 1512 1513 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 1514 if (!stages[s].entrypoint) 1515 continue; 1516 1517 stages[s].cache_key.stage = s; 1518 memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1)); 1519 } 1520 1521 const bool skip_cache_lookup = 1522 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR); 1523 1524 if (!skip_cache_lookup) { 1525 unsigned found = 0; 1526 unsigned cache_hits = 0; 1527 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 1528 if (!stages[s].entrypoint) 1529 continue; 1530 1531 int64_t stage_start = os_time_get_nano(); 1532 1533 bool cache_hit; 1534 struct anv_shader_bin *bin = 1535 anv_device_search_for_kernel(pipeline->base.device, cache, 1536 &stages[s].cache_key, 1537 sizeof(stages[s].cache_key), &cache_hit); 1538 if (bin) { 1539 found++; 1540 pipeline->shaders[s] = bin; 1541 } 1542 1543 if (cache_hit) { 1544 cache_hits++; 1545 stages[s].feedback.flags |= 1546 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; 1547 } 1548 stages[s].feedback.duration += os_time_get_nano() - stage_start; 1549 } 1550 1551 if (found == __builtin_popcount(pipeline->active_stages)) { 1552 if (cache_hits == found) { 1553 pipeline_feedback.flags |= 1554 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; 1555 } 1556 /* We found all our shaders in the cache. We're done. */ 1557 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 1558 if (!stages[s].entrypoint) 1559 continue; 1560 1561 anv_pipeline_add_executables(&pipeline->base, &stages[s], 1562 pipeline->shaders[s]); 1563 } 1564 anv_pipeline_init_from_cached_graphics(pipeline); 1565 goto done; 1566 } else if (found > 0) { 1567 /* We found some but not all of our shaders. This shouldn't happen 1568 * most of the time but it can if we have a partially populated 1569 * pipeline cache. 1570 */ 1571 assert(found < __builtin_popcount(pipeline->active_stages)); 1572 1573 vk_perf(VK_LOG_OBJS(&cache->base), 1574 "Found a partial pipeline in the cache. This is " 1575 "most likely caused by an incomplete pipeline cache " 1576 "import or export"); 1577 1578 /* We're going to have to recompile anyway, so just throw away our 1579 * references to the shaders in the cache. We'll get them out of the 1580 * cache again as part of the compilation process. 1581 */ 1582 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 1583 stages[s].feedback.flags = 0; 1584 if (pipeline->shaders[s]) { 1585 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]); 1586 pipeline->shaders[s] = NULL; 1587 } 1588 } 1589 } 1590 } 1591 1592 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) 1593 return VK_PIPELINE_COMPILE_REQUIRED_EXT; 1594 1595 void *pipeline_ctx = ralloc_context(NULL); 1596 1597 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 1598 if (!stages[s].entrypoint) 1599 continue; 1600 1601 int64_t stage_start = os_time_get_nano(); 1602 1603 assert(stages[s].stage == s); 1604 assert(pipeline->shaders[s] == NULL); 1605 1606 stages[s].bind_map = (struct anv_pipeline_bind_map) { 1607 .surface_to_descriptor = stages[s].surface_to_descriptor, 1608 .sampler_to_descriptor = stages[s].sampler_to_descriptor 1609 }; 1610 1611 stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, 1612 pipeline_ctx, 1613 &stages[s]); 1614 if (stages[s].nir == NULL) { 1615 result = vk_error(pipeline, VK_ERROR_UNKNOWN); 1616 goto fail; 1617 } 1618 1619 /* This is rather ugly. 1620 * 1621 * Any variable annotated as interpolated by sample essentially disables 1622 * coarse pixel shading. Unfortunately the CTS tests exercising this set 1623 * the varying value in the previous stage using a constant. Our NIR 1624 * infrastructure is clever enough to lookup variables across stages and 1625 * constant fold, removing the variable. So in order to comply with CTS 1626 * we have check variables here. 1627 */ 1628 if (s == MESA_SHADER_FRAGMENT) { 1629 nir_foreach_variable_in_list(var, &stages[s].nir->variables) { 1630 if (var->data.sample) { 1631 stages[s].key.wm.coarse_pixel = false; 1632 break; 1633 } 1634 } 1635 } 1636 1637 stages[s].feedback.duration += os_time_get_nano() - stage_start; 1638 } 1639 1640 /* Walk backwards to link */ 1641 struct anv_pipeline_stage *next_stage = NULL; 1642 for (int s = ARRAY_SIZE(pipeline->shaders) - 1; s >= 0; s--) { 1643 if (!stages[s].entrypoint) 1644 continue; 1645 1646 switch (s) { 1647 case MESA_SHADER_VERTEX: 1648 anv_pipeline_link_vs(compiler, &stages[s], next_stage); 1649 break; 1650 case MESA_SHADER_TESS_CTRL: 1651 anv_pipeline_link_tcs(compiler, &stages[s], next_stage); 1652 break; 1653 case MESA_SHADER_TESS_EVAL: 1654 anv_pipeline_link_tes(compiler, &stages[s], next_stage); 1655 break; 1656 case MESA_SHADER_GEOMETRY: 1657 anv_pipeline_link_gs(compiler, &stages[s], next_stage); 1658 break; 1659 case MESA_SHADER_FRAGMENT: 1660 anv_pipeline_link_fs(compiler, &stages[s]); 1661 break; 1662 default: 1663 unreachable("Invalid graphics shader stage"); 1664 } 1665 1666 next_stage = &stages[s]; 1667 } 1668 1669 if (pipeline->base.device->info.ver >= 12 && 1670 pipeline->subpass->view_mask != 0) { 1671 /* For some pipelines HW Primitive Replication can be used instead of 1672 * instancing to implement Multiview. This depend on how viewIndex is 1673 * used in all the active shaders, so this check can't be done per 1674 * individual shaders. 1675 */ 1676 nir_shader *shaders[MESA_SHADER_STAGES] = {}; 1677 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) 1678 shaders[s] = stages[s].nir; 1679 1680 pipeline->use_primitive_replication = 1681 anv_check_for_primitive_replication(shaders, pipeline); 1682 } else { 1683 pipeline->use_primitive_replication = false; 1684 } 1685 1686 struct anv_pipeline_stage *prev_stage = NULL; 1687 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 1688 if (!stages[s].entrypoint) 1689 continue; 1690 1691 int64_t stage_start = os_time_get_nano(); 1692 1693 void *stage_ctx = ralloc_context(NULL); 1694 1695 anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout); 1696 1697 if (prev_stage && compiler->glsl_compiler_options[s].NirOptions->unify_interfaces) { 1698 prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read & 1699 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); 1700 stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written & 1701 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); 1702 prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read; 1703 stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written; 1704 } 1705 1706 ralloc_free(stage_ctx); 1707 1708 stages[s].feedback.duration += os_time_get_nano() - stage_start; 1709 1710 prev_stage = &stages[s]; 1711 } 1712 1713 prev_stage = NULL; 1714 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { 1715 if (!stages[s].entrypoint) 1716 continue; 1717 1718 int64_t stage_start = os_time_get_nano(); 1719 1720 void *stage_ctx = ralloc_context(NULL); 1721 1722 nir_xfb_info *xfb_info = NULL; 1723 if (s == MESA_SHADER_VERTEX || 1724 s == MESA_SHADER_TESS_EVAL || 1725 s == MESA_SHADER_GEOMETRY) 1726 xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx); 1727 1728 switch (s) { 1729 case MESA_SHADER_VERTEX: 1730 anv_pipeline_compile_vs(compiler, stage_ctx, pipeline, 1731 &stages[s]); 1732 break; 1733 case MESA_SHADER_TESS_CTRL: 1734 anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device, 1735 &stages[s], prev_stage); 1736 break; 1737 case MESA_SHADER_TESS_EVAL: 1738 anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device, 1739 &stages[s], prev_stage); 1740 break; 1741 case MESA_SHADER_GEOMETRY: 1742 anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device, 1743 &stages[s], prev_stage); 1744 break; 1745 case MESA_SHADER_FRAGMENT: 1746 anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device, 1747 &stages[s], prev_stage); 1748 break; 1749 default: 1750 unreachable("Invalid graphics shader stage"); 1751 } 1752 if (stages[s].code == NULL) { 1753 ralloc_free(stage_ctx); 1754 result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY); 1755 goto fail; 1756 } 1757 1758 anv_nir_validate_push_layout(&stages[s].prog_data.base, 1759 &stages[s].bind_map); 1760 1761 struct anv_shader_bin *bin = 1762 anv_device_upload_kernel(pipeline->base.device, cache, s, 1763 &stages[s].cache_key, 1764 sizeof(stages[s].cache_key), 1765 stages[s].code, 1766 stages[s].prog_data.base.program_size, 1767 &stages[s].prog_data.base, 1768 brw_prog_data_size(s), 1769 stages[s].stats, stages[s].num_stats, 1770 xfb_info, &stages[s].bind_map); 1771 if (!bin) { 1772 ralloc_free(stage_ctx); 1773 result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 1774 goto fail; 1775 } 1776 1777 anv_pipeline_add_executables(&pipeline->base, &stages[s], bin); 1778 1779 pipeline->shaders[s] = bin; 1780 ralloc_free(stage_ctx); 1781 1782 stages[s].feedback.duration += os_time_get_nano() - stage_start; 1783 1784 prev_stage = &stages[s]; 1785 } 1786 1787 ralloc_free(pipeline_ctx); 1788 1789done: 1790 1791 if (pipeline->shaders[MESA_SHADER_FRAGMENT] && 1792 pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) { 1793 /* This can happen if we decided to implicitly disable the fragment 1794 * shader. See anv_pipeline_compile_fs(). 1795 */ 1796 anv_shader_bin_unref(pipeline->base.device, 1797 pipeline->shaders[MESA_SHADER_FRAGMENT]); 1798 pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL; 1799 pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT; 1800 } 1801 1802 pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 1803 1804 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback = 1805 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT); 1806 if (create_feedback) { 1807 *create_feedback->pPipelineCreationFeedback = pipeline_feedback; 1808 1809 assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount); 1810 for (uint32_t i = 0; i < info->stageCount; i++) { 1811 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage); 1812 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback; 1813 } 1814 } 1815 1816 return VK_SUCCESS; 1817 1818fail: 1819 ralloc_free(pipeline_ctx); 1820 1821 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 1822 if (pipeline->shaders[s]) 1823 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]); 1824 } 1825 1826 return result; 1827} 1828 1829VkResult 1830anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, 1831 struct anv_pipeline_cache *cache, 1832 const VkComputePipelineCreateInfo *info, 1833 const struct vk_shader_module *module, 1834 const char *entrypoint, 1835 const VkSpecializationInfo *spec_info) 1836{ 1837 VkPipelineCreationFeedbackEXT pipeline_feedback = { 1838 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, 1839 }; 1840 int64_t pipeline_start = os_time_get_nano(); 1841 1842 const struct brw_compiler *compiler = pipeline->base.device->physical->compiler; 1843 1844 struct anv_pipeline_stage stage = { 1845 .stage = MESA_SHADER_COMPUTE, 1846 .module = module, 1847 .entrypoint = entrypoint, 1848 .spec_info = spec_info, 1849 .cache_key = { 1850 .stage = MESA_SHADER_COMPUTE, 1851 }, 1852 .feedback = { 1853 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, 1854 }, 1855 }; 1856 anv_pipeline_hash_shader(stage.module, 1857 stage.entrypoint, 1858 MESA_SHADER_COMPUTE, 1859 stage.spec_info, 1860 stage.shader_sha1); 1861 1862 struct anv_shader_bin *bin = NULL; 1863 1864 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info = 1865 vk_find_struct_const(info->stage.pNext, 1866 PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT); 1867 1868 const enum brw_subgroup_size_type subgroup_size_type = 1869 anv_subgroup_size_type(MESA_SHADER_COMPUTE, info->stage.flags, rss_info); 1870 1871 populate_cs_prog_key(&pipeline->base.device->info, subgroup_size_type, 1872 pipeline->base.device->robust_buffer_access, 1873 &stage.key.cs); 1874 1875 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 1876 1877 const bool skip_cache_lookup = 1878 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR); 1879 1880 anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1); 1881 1882 bool cache_hit = false; 1883 if (!skip_cache_lookup) { 1884 bin = anv_device_search_for_kernel(pipeline->base.device, cache, 1885 &stage.cache_key, 1886 sizeof(stage.cache_key), 1887 &cache_hit); 1888 } 1889 1890 if (bin == NULL && 1891 (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)) 1892 return VK_PIPELINE_COMPILE_REQUIRED_EXT; 1893 1894 void *mem_ctx = ralloc_context(NULL); 1895 if (bin == NULL) { 1896 int64_t stage_start = os_time_get_nano(); 1897 1898 stage.bind_map = (struct anv_pipeline_bind_map) { 1899 .surface_to_descriptor = stage.surface_to_descriptor, 1900 .sampler_to_descriptor = stage.sampler_to_descriptor 1901 }; 1902 1903 /* Set up a binding for the gl_NumWorkGroups */ 1904 stage.bind_map.surface_count = 1; 1905 stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) { 1906 .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS, 1907 }; 1908 1909 stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage); 1910 if (stage.nir == NULL) { 1911 ralloc_free(mem_ctx); 1912 return vk_error(pipeline, VK_ERROR_UNKNOWN); 1913 } 1914 1915 NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id); 1916 1917 anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout); 1918 1919 NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics); 1920 1921 stage.num_stats = 1; 1922 1923 struct brw_compile_cs_params params = { 1924 .nir = stage.nir, 1925 .key = &stage.key.cs, 1926 .prog_data = &stage.prog_data.cs, 1927 .stats = stage.stats, 1928 .log_data = pipeline->base.device, 1929 }; 1930 1931 stage.code = brw_compile_cs(compiler, mem_ctx, ¶ms); 1932 if (stage.code == NULL) { 1933 ralloc_free(mem_ctx); 1934 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 1935 } 1936 1937 anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map); 1938 1939 if (!stage.prog_data.cs.uses_num_work_groups) { 1940 assert(stage.bind_map.surface_to_descriptor[0].set == 1941 ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS); 1942 stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL; 1943 } 1944 1945 const unsigned code_size = stage.prog_data.base.program_size; 1946 bin = anv_device_upload_kernel(pipeline->base.device, cache, 1947 MESA_SHADER_COMPUTE, 1948 &stage.cache_key, sizeof(stage.cache_key), 1949 stage.code, code_size, 1950 &stage.prog_data.base, 1951 sizeof(stage.prog_data.cs), 1952 stage.stats, stage.num_stats, 1953 NULL, &stage.bind_map); 1954 if (!bin) { 1955 ralloc_free(mem_ctx); 1956 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 1957 } 1958 1959 stage.feedback.duration = os_time_get_nano() - stage_start; 1960 } 1961 1962 anv_pipeline_add_executables(&pipeline->base, &stage, bin); 1963 1964 ralloc_free(mem_ctx); 1965 1966 if (cache_hit) { 1967 stage.feedback.flags |= 1968 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; 1969 pipeline_feedback.flags |= 1970 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; 1971 } 1972 pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 1973 1974 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback = 1975 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT); 1976 if (create_feedback) { 1977 *create_feedback->pPipelineCreationFeedback = pipeline_feedback; 1978 1979 assert(create_feedback->pipelineStageCreationFeedbackCount == 1); 1980 create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback; 1981 } 1982 1983 pipeline->cs = bin; 1984 1985 return VK_SUCCESS; 1986} 1987 1988/** 1989 * Copy pipeline state not marked as dynamic. 1990 * Dynamic state is pipeline state which hasn't been provided at pipeline 1991 * creation time, but is dynamically provided afterwards using various 1992 * vkCmdSet* functions. 1993 * 1994 * The set of state considered "non_dynamic" is determined by the pieces of 1995 * state that have their corresponding VkDynamicState enums omitted from 1996 * VkPipelineDynamicStateCreateInfo::pDynamicStates. 1997 * 1998 * @param[out] pipeline Destination non_dynamic state. 1999 * @param[in] pCreateInfo Source of non_dynamic state to be copied. 2000 */ 2001static void 2002copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline, 2003 const VkGraphicsPipelineCreateInfo *pCreateInfo) 2004{ 2005 anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; 2006 struct anv_subpass *subpass = pipeline->subpass; 2007 2008 pipeline->dynamic_state = default_dynamic_state; 2009 2010 states &= ~pipeline->dynamic_states; 2011 2012 struct anv_dynamic_state *dynamic = &pipeline->dynamic_state; 2013 2014 bool raster_discard = 2015 pCreateInfo->pRasterizationState->rasterizerDiscardEnable && 2016 !(pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE); 2017 2018 /* Section 9.2 of the Vulkan 1.0.15 spec says: 2019 * 2020 * pViewportState is [...] NULL if the pipeline 2021 * has rasterization disabled. 2022 */ 2023 if (!raster_discard) { 2024 assert(pCreateInfo->pViewportState); 2025 2026 dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; 2027 if (states & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) { 2028 typed_memcpy(dynamic->viewport.viewports, 2029 pCreateInfo->pViewportState->pViewports, 2030 pCreateInfo->pViewportState->viewportCount); 2031 } 2032 2033 dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; 2034 if (states & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) { 2035 typed_memcpy(dynamic->scissor.scissors, 2036 pCreateInfo->pViewportState->pScissors, 2037 pCreateInfo->pViewportState->scissorCount); 2038 } 2039 } 2040 2041 if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) { 2042 assert(pCreateInfo->pRasterizationState); 2043 dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth; 2044 } 2045 2046 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) { 2047 assert(pCreateInfo->pRasterizationState); 2048 dynamic->depth_bias.bias = 2049 pCreateInfo->pRasterizationState->depthBiasConstantFactor; 2050 dynamic->depth_bias.clamp = 2051 pCreateInfo->pRasterizationState->depthBiasClamp; 2052 dynamic->depth_bias.slope = 2053 pCreateInfo->pRasterizationState->depthBiasSlopeFactor; 2054 } 2055 2056 if (states & ANV_CMD_DIRTY_DYNAMIC_CULL_MODE) { 2057 assert(pCreateInfo->pRasterizationState); 2058 dynamic->cull_mode = 2059 pCreateInfo->pRasterizationState->cullMode; 2060 } 2061 2062 if (states & ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE) { 2063 assert(pCreateInfo->pRasterizationState); 2064 dynamic->front_face = 2065 pCreateInfo->pRasterizationState->frontFace; 2066 } 2067 2068 if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) && 2069 (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { 2070 assert(pCreateInfo->pInputAssemblyState); 2071 dynamic->primitive_topology = pCreateInfo->pInputAssemblyState->topology; 2072 } 2073 2074 if (states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) { 2075 assert(pCreateInfo->pRasterizationState); 2076 dynamic->raster_discard = 2077 pCreateInfo->pRasterizationState->rasterizerDiscardEnable; 2078 } 2079 2080 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE) { 2081 assert(pCreateInfo->pRasterizationState); 2082 dynamic->depth_bias_enable = 2083 pCreateInfo->pRasterizationState->depthBiasEnable; 2084 } 2085 2086 if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE) && 2087 (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { 2088 assert(pCreateInfo->pInputAssemblyState); 2089 dynamic->primitive_restart_enable = 2090 pCreateInfo->pInputAssemblyState->primitiveRestartEnable; 2091 } 2092 2093 /* Section 9.2 of the Vulkan 1.0.15 spec says: 2094 * 2095 * pColorBlendState is [...] NULL if the pipeline has rasterization 2096 * disabled or if the subpass of the render pass the pipeline is 2097 * created against does not use any color attachments. 2098 */ 2099 bool uses_color_att = false; 2100 for (unsigned i = 0; i < subpass->color_count; ++i) { 2101 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) { 2102 uses_color_att = true; 2103 break; 2104 } 2105 } 2106 2107 if (uses_color_att && !raster_discard) { 2108 assert(pCreateInfo->pColorBlendState); 2109 2110 if (states & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) 2111 typed_memcpy(dynamic->blend_constants, 2112 pCreateInfo->pColorBlendState->blendConstants, 4); 2113 } 2114 2115 /* If there is no depthstencil attachment, then don't read 2116 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may 2117 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is 2118 * no need to override the depthstencil defaults in 2119 * anv_pipeline::dynamic_state when there is no depthstencil attachment. 2120 * 2121 * Section 9.2 of the Vulkan 1.0.15 spec says: 2122 * 2123 * pDepthStencilState is [...] NULL if the pipeline has rasterization 2124 * disabled or if the subpass of the render pass the pipeline is created 2125 * against does not use a depth/stencil attachment. 2126 */ 2127 if (!raster_discard && subpass->depth_stencil_attachment) { 2128 assert(pCreateInfo->pDepthStencilState); 2129 2130 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) { 2131 dynamic->depth_bounds.min = 2132 pCreateInfo->pDepthStencilState->minDepthBounds; 2133 dynamic->depth_bounds.max = 2134 pCreateInfo->pDepthStencilState->maxDepthBounds; 2135 } 2136 2137 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) { 2138 dynamic->stencil_compare_mask.front = 2139 pCreateInfo->pDepthStencilState->front.compareMask; 2140 dynamic->stencil_compare_mask.back = 2141 pCreateInfo->pDepthStencilState->back.compareMask; 2142 } 2143 2144 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) { 2145 dynamic->stencil_write_mask.front = 2146 pCreateInfo->pDepthStencilState->front.writeMask; 2147 dynamic->stencil_write_mask.back = 2148 pCreateInfo->pDepthStencilState->back.writeMask; 2149 } 2150 2151 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) { 2152 dynamic->stencil_reference.front = 2153 pCreateInfo->pDepthStencilState->front.reference; 2154 dynamic->stencil_reference.back = 2155 pCreateInfo->pDepthStencilState->back.reference; 2156 } 2157 2158 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) { 2159 dynamic->depth_test_enable = 2160 pCreateInfo->pDepthStencilState->depthTestEnable; 2161 } 2162 2163 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) { 2164 dynamic->depth_write_enable = 2165 pCreateInfo->pDepthStencilState->depthWriteEnable; 2166 } 2167 2168 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) { 2169 dynamic->depth_compare_op = 2170 pCreateInfo->pDepthStencilState->depthCompareOp; 2171 } 2172 2173 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) { 2174 dynamic->depth_bounds_test_enable = 2175 pCreateInfo->pDepthStencilState->depthBoundsTestEnable; 2176 } 2177 2178 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) { 2179 dynamic->stencil_test_enable = 2180 pCreateInfo->pDepthStencilState->stencilTestEnable; 2181 } 2182 2183 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP) { 2184 const VkPipelineDepthStencilStateCreateInfo *info = 2185 pCreateInfo->pDepthStencilState; 2186 memcpy(&dynamic->stencil_op.front, &info->front, 2187 sizeof(dynamic->stencil_op.front)); 2188 memcpy(&dynamic->stencil_op.back, &info->back, 2189 sizeof(dynamic->stencil_op.back)); 2190 } 2191 } 2192 2193 const VkPipelineRasterizationLineStateCreateInfoEXT *line_state = 2194 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, 2195 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); 2196 if (!raster_discard && line_state && line_state->stippledLineEnable) { 2197 if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) { 2198 dynamic->line_stipple.factor = line_state->lineStippleFactor; 2199 dynamic->line_stipple.pattern = line_state->lineStipplePattern; 2200 } 2201 } 2202 2203 const VkPipelineMultisampleStateCreateInfo *ms_info = 2204 pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? NULL : 2205 pCreateInfo->pMultisampleState; 2206 if (states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) { 2207 const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ? 2208 vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL; 2209 2210 if (sl_info) { 2211 dynamic->sample_locations.samples = 2212 sl_info->sampleLocationsInfo.sampleLocationsCount; 2213 const VkSampleLocationEXT *positions = 2214 sl_info->sampleLocationsInfo.pSampleLocations; 2215 for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) { 2216 dynamic->sample_locations.locations[i].x = positions[i].x; 2217 dynamic->sample_locations.locations[i].y = positions[i].y; 2218 } 2219 } 2220 } 2221 /* Ensure we always have valid values for sample_locations. */ 2222 if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations && 2223 dynamic->sample_locations.samples == 0) { 2224 dynamic->sample_locations.samples = 2225 ms_info ? ms_info->rasterizationSamples : 1; 2226 const struct intel_sample_position *positions = 2227 intel_get_sample_positions(dynamic->sample_locations.samples); 2228 for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) { 2229 dynamic->sample_locations.locations[i].x = positions[i].x; 2230 dynamic->sample_locations.locations[i].y = positions[i].y; 2231 } 2232 } 2233 2234 if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) { 2235 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable && 2236 uses_color_att) { 2237 assert(pCreateInfo->pColorBlendState); 2238 const VkPipelineColorWriteCreateInfoEXT *color_write_info = 2239 vk_find_struct_const(pCreateInfo->pColorBlendState->pNext, 2240 PIPELINE_COLOR_WRITE_CREATE_INFO_EXT); 2241 2242 if (color_write_info) { 2243 dynamic->color_writes = 0; 2244 for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) { 2245 dynamic->color_writes |= 2246 color_write_info->pColorWriteEnables[i] ? (1u << i) : 0; 2247 } 2248 } 2249 } 2250 } 2251 2252 const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_state = 2253 vk_find_struct_const(pCreateInfo->pNext, 2254 PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR); 2255 if (fsr_state) { 2256 if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) 2257 dynamic->fragment_shading_rate = fsr_state->fragmentSize; 2258 } 2259 2260 pipeline->dynamic_state_mask = states; 2261 2262 /* Mark states that can either be dynamic or fully baked into the pipeline. 2263 */ 2264 pipeline->static_state_mask = states & 2265 (ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS | 2266 ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | 2267 ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE | 2268 ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | 2269 ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP | 2270 ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY); 2271} 2272 2273static void 2274anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) 2275{ 2276#ifdef DEBUG 2277 struct anv_render_pass *renderpass = NULL; 2278 struct anv_subpass *subpass = NULL; 2279 2280 /* Assert that all required members of VkGraphicsPipelineCreateInfo are 2281 * present. See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines. 2282 */ 2283 assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); 2284 2285 renderpass = anv_render_pass_from_handle(info->renderPass); 2286 assert(renderpass); 2287 2288 assert(info->subpass < renderpass->subpass_count); 2289 subpass = &renderpass->subpasses[info->subpass]; 2290 2291 assert(info->stageCount >= 1); 2292 assert(info->pRasterizationState); 2293 if (!info->pRasterizationState->rasterizerDiscardEnable) { 2294 assert(info->pViewportState); 2295 assert(info->pMultisampleState); 2296 2297 if (subpass && subpass->depth_stencil_attachment) 2298 assert(info->pDepthStencilState); 2299 2300 if (subpass && subpass->color_count > 0) { 2301 bool all_color_unused = true; 2302 for (int i = 0; i < subpass->color_count; i++) { 2303 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) 2304 all_color_unused = false; 2305 } 2306 /* pColorBlendState is ignored if the pipeline has rasterization 2307 * disabled or if the subpass of the render pass the pipeline is 2308 * created against does not use any color attachments. 2309 */ 2310 assert(info->pColorBlendState || all_color_unused); 2311 } 2312 } 2313 2314 for (uint32_t i = 0; i < info->stageCount; ++i) { 2315 switch (info->pStages[i].stage) { 2316 case VK_SHADER_STAGE_VERTEX_BIT: 2317 assert(info->pVertexInputState); 2318 assert(info->pInputAssemblyState); 2319 break; 2320 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: 2321 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: 2322 assert(info->pTessellationState); 2323 break; 2324 default: 2325 break; 2326 } 2327 } 2328#endif 2329} 2330 2331/** 2332 * Calculate the desired L3 partitioning based on the current state of the 2333 * pipeline. For now this simply returns the conservative defaults calculated 2334 * by get_default_l3_weights(), but we could probably do better by gathering 2335 * more statistics from the pipeline state (e.g. guess of expected URB usage 2336 * and bound surfaces), or by using feed-back from performance counters. 2337 */ 2338void 2339anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm) 2340{ 2341 const struct intel_device_info *devinfo = &pipeline->device->info; 2342 2343 const struct intel_l3_weights w = 2344 intel_get_default_l3_weights(devinfo, true, needs_slm); 2345 2346 pipeline->l3_config = intel_get_l3_config(devinfo, w); 2347} 2348 2349static VkLineRasterizationModeEXT 2350vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT *line_info, 2351 const VkPipelineMultisampleStateCreateInfo *ms_info) 2352{ 2353 VkLineRasterizationModeEXT line_mode = 2354 line_info ? line_info->lineRasterizationMode : 2355 VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT; 2356 2357 if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) { 2358 if (ms_info && ms_info->rasterizationSamples > 1) { 2359 return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT; 2360 } else { 2361 return VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT; 2362 } 2363 } 2364 2365 return line_mode; 2366} 2367 2368VkResult 2369anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, 2370 struct anv_device *device, 2371 struct anv_pipeline_cache *cache, 2372 const VkGraphicsPipelineCreateInfo *pCreateInfo, 2373 const VkAllocationCallbacks *alloc) 2374{ 2375 VkResult result; 2376 2377 anv_pipeline_validate_create_info(pCreateInfo); 2378 2379 result = anv_pipeline_init(&pipeline->base, device, 2380 ANV_PIPELINE_GRAPHICS, pCreateInfo->flags, 2381 alloc); 2382 if (result != VK_SUCCESS) 2383 return result; 2384 2385 anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS, 2386 pipeline->batch_data, sizeof(pipeline->batch_data)); 2387 2388 ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass); 2389 assert(pCreateInfo->subpass < render_pass->subpass_count); 2390 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass]; 2391 2392 assert(pCreateInfo->pRasterizationState); 2393 2394 if (pCreateInfo->pDynamicState) { 2395 /* Remove all of the states that are marked as dynamic */ 2396 uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; 2397 for (uint32_t s = 0; s < count; s++) { 2398 pipeline->dynamic_states |= anv_cmd_dirty_bit_for_vk_dynamic_state( 2399 pCreateInfo->pDynamicState->pDynamicStates[s]); 2400 } 2401 } 2402 2403 pipeline->active_stages = 0; 2404 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) 2405 pipeline->active_stages |= pCreateInfo->pStages[i].stage; 2406 2407 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) 2408 pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; 2409 2410 copy_non_dynamic_state(pipeline, pCreateInfo); 2411 2412 pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable; 2413 2414 /* Previously we enabled depth clipping when !depthClampEnable. 2415 * DepthClipStateCreateInfo now makes depth clipping explicit so if the 2416 * clipping info is available, use its enable value to determine clipping, 2417 * otherwise fallback to the previous !depthClampEnable logic. 2418 */ 2419 const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info = 2420 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, 2421 PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT); 2422 pipeline->depth_clip_enable = clip_info ? clip_info->depthClipEnable : !pipeline->depth_clamp_enable; 2423 2424 pipeline->sample_shading_enable = 2425 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable && 2426 pCreateInfo->pMultisampleState && 2427 pCreateInfo->pMultisampleState->sampleShadingEnable; 2428 2429 result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo); 2430 if (result != VK_SUCCESS) { 2431 anv_pipeline_finish(&pipeline->base, device, alloc); 2432 return result; 2433 } 2434 2435 anv_pipeline_setup_l3_config(&pipeline->base, false); 2436 2437 if (anv_pipeline_is_primitive(pipeline)) { 2438 const VkPipelineVertexInputStateCreateInfo *vi_info = 2439 pCreateInfo->pVertexInputState; 2440 2441 const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read; 2442 2443 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { 2444 const VkVertexInputAttributeDescription *desc = 2445 &vi_info->pVertexAttributeDescriptions[i]; 2446 2447 if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location))) 2448 pipeline->vb_used |= 1 << desc->binding; 2449 } 2450 2451 for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { 2452 const VkVertexInputBindingDescription *desc = 2453 &vi_info->pVertexBindingDescriptions[i]; 2454 2455 pipeline->vb[desc->binding].stride = desc->stride; 2456 2457 /* Step rate is programmed per vertex element (attribute), not 2458 * binding. Set up a map of which bindings step per instance, for 2459 * reference by vertex element setup. */ 2460 switch (desc->inputRate) { 2461 default: 2462 case VK_VERTEX_INPUT_RATE_VERTEX: 2463 pipeline->vb[desc->binding].instanced = false; 2464 break; 2465 case VK_VERTEX_INPUT_RATE_INSTANCE: 2466 pipeline->vb[desc->binding].instanced = true; 2467 break; 2468 } 2469 2470 pipeline->vb[desc->binding].instance_divisor = 1; 2471 } 2472 2473 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state = 2474 vk_find_struct_const(vi_info->pNext, 2475 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); 2476 if (vi_div_state) { 2477 for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) { 2478 const VkVertexInputBindingDivisorDescriptionEXT *desc = 2479 &vi_div_state->pVertexBindingDivisors[i]; 2480 2481 pipeline->vb[desc->binding].instance_divisor = desc->divisor; 2482 } 2483 } 2484 2485 /* Our implementation of VK_KHR_multiview uses instancing to draw the 2486 * different views. If the client asks for instancing, we need to multiply 2487 * the instance divisor by the number of views ensure that we repeat the 2488 * client's per-instance data once for each view. 2489 */ 2490 if (pipeline->subpass->view_mask && !pipeline->use_primitive_replication) { 2491 const uint32_t view_count = anv_subpass_view_count(pipeline->subpass); 2492 for (uint32_t vb = 0; vb < MAX_VBS; vb++) { 2493 if (pipeline->vb[vb].instanced) 2494 pipeline->vb[vb].instance_divisor *= view_count; 2495 } 2496 } 2497 2498 const VkPipelineInputAssemblyStateCreateInfo *ia_info = 2499 pCreateInfo->pInputAssemblyState; 2500 const VkPipelineTessellationStateCreateInfo *tess_info = 2501 pCreateInfo->pTessellationState; 2502 2503 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) 2504 pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints); 2505 else 2506 pipeline->topology = vk_to_intel_primitive_type[ia_info->topology]; 2507 } 2508 2509 /* If rasterization is not enabled, ms_info must be ignored. */ 2510 const bool raster_enabled = 2511 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable || 2512 (pipeline->dynamic_states & 2513 ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE); 2514 2515 const VkPipelineMultisampleStateCreateInfo *ms_info = 2516 raster_enabled ? pCreateInfo->pMultisampleState : NULL; 2517 2518 const VkPipelineRasterizationLineStateCreateInfoEXT *line_info = 2519 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, 2520 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); 2521 2522 /* Store line mode, polygon mode and rasterization samples, these are used 2523 * for dynamic primitive topology. 2524 */ 2525 pipeline->line_mode = vk_line_rasterization_mode(line_info, ms_info); 2526 pipeline->polygon_mode = pCreateInfo->pRasterizationState->polygonMode; 2527 pipeline->rasterization_samples = 2528 ms_info ? ms_info->rasterizationSamples : 1; 2529 2530 return VK_SUCCESS; 2531} 2532 2533static VkResult 2534compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline, 2535 struct anv_pipeline_cache *cache, 2536 nir_shader *nir, 2537 struct anv_pipeline_stage *stage, 2538 struct anv_shader_bin **shader_out, 2539 void *mem_ctx) 2540{ 2541 const struct brw_compiler *compiler = 2542 pipeline->base.device->physical->compiler; 2543 const struct intel_device_info *devinfo = compiler->devinfo; 2544 2545 nir_shader **resume_shaders = NULL; 2546 uint32_t num_resume_shaders = 0; 2547 if (nir->info.stage != MESA_SHADER_COMPUTE) { 2548 NIR_PASS_V(nir, nir_lower_shader_calls, 2549 nir_address_format_64bit_global, 2550 BRW_BTD_STACK_ALIGN, 2551 &resume_shaders, &num_resume_shaders, mem_ctx); 2552 NIR_PASS_V(nir, brw_nir_lower_shader_calls); 2553 NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo); 2554 } 2555 2556 for (unsigned i = 0; i < num_resume_shaders; i++) { 2557 NIR_PASS_V(resume_shaders[i], brw_nir_lower_shader_calls); 2558 NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo); 2559 } 2560 2561 stage->code = 2562 brw_compile_bs(compiler, pipeline->base.device, mem_ctx, 2563 &stage->key.bs, &stage->prog_data.bs, nir, 2564 num_resume_shaders, resume_shaders, stage->stats, NULL); 2565 if (stage->code == NULL) 2566 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 2567 2568 /* Ray-tracing shaders don't have a "real" bind map */ 2569 struct anv_pipeline_bind_map empty_bind_map = {}; 2570 2571 const unsigned code_size = stage->prog_data.base.program_size; 2572 struct anv_shader_bin *bin = 2573 anv_device_upload_kernel(pipeline->base.device, 2574 cache, 2575 stage->stage, 2576 &stage->cache_key, sizeof(stage->cache_key), 2577 stage->code, code_size, 2578 &stage->prog_data.base, 2579 sizeof(stage->prog_data.bs), 2580 stage->stats, 1, 2581 NULL, &empty_bind_map); 2582 if (bin == NULL) 2583 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 2584 2585 /* TODO: Figure out executables for resume shaders */ 2586 anv_pipeline_add_executables(&pipeline->base, stage, bin); 2587 util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin); 2588 2589 *shader_out = bin; 2590 2591 return VK_SUCCESS; 2592} 2593 2594static bool 2595is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info) 2596{ 2597 if (info->pDynamicState == NULL) 2598 return false; 2599 2600 for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) { 2601 if (info->pDynamicState->pDynamicStates[i] == 2602 VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR) 2603 return true; 2604 } 2605 2606 return false; 2607} 2608 2609static void 2610anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline, 2611 const VkRayTracingPipelineCreateInfoKHR *info, 2612 uint32_t *stack_max) 2613{ 2614 if (is_rt_stack_size_dynamic(info)) { 2615 pipeline->stack_size = 0; /* 0 means dynamic */ 2616 } else { 2617 /* From the Vulkan spec: 2618 * 2619 * "If the stack size is not set explicitly, the stack size for a 2620 * pipeline is: 2621 * 2622 * rayGenStackMax + 2623 * min(1, maxPipelineRayRecursionDepth) × 2624 * max(closestHitStackMax, missStackMax, 2625 * intersectionStackMax + anyHitStackMax) + 2626 * max(0, maxPipelineRayRecursionDepth-1) × 2627 * max(closestHitStackMax, missStackMax) + 2628 * 2 × callableStackMax" 2629 */ 2630 pipeline->stack_size = 2631 stack_max[MESA_SHADER_RAYGEN] + 2632 MIN2(1, info->maxPipelineRayRecursionDepth) * 2633 MAX4(stack_max[MESA_SHADER_CLOSEST_HIT], 2634 stack_max[MESA_SHADER_MISS], 2635 stack_max[MESA_SHADER_INTERSECTION], 2636 stack_max[MESA_SHADER_ANY_HIT]) + 2637 MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) * 2638 MAX2(stack_max[MESA_SHADER_CLOSEST_HIT], 2639 stack_max[MESA_SHADER_MISS]) + 2640 2 * stack_max[MESA_SHADER_CALLABLE]; 2641 2642 /* This is an extremely unlikely case but we need to set it to some 2643 * non-zero value so that we don't accidentally think it's dynamic. 2644 * Our minimum stack size is 2KB anyway so we could set to any small 2645 * value we like. 2646 */ 2647 if (pipeline->stack_size == 0) 2648 pipeline->stack_size = 1; 2649 } 2650} 2651 2652static struct anv_pipeline_stage * 2653anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline, 2654 const VkRayTracingPipelineCreateInfoKHR *info, 2655 void *pipeline_ctx) 2656{ 2657 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 2658 2659 /* Create enough stage entries for all shader modules plus potential 2660 * combinaisons in the groups. 2661 */ 2662 struct anv_pipeline_stage *stages = 2663 rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount); 2664 2665 for (uint32_t i = 0; i < info->stageCount; i++) { 2666 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i]; 2667 if (sinfo->module == VK_NULL_HANDLE) 2668 continue; 2669 2670 int64_t stage_start = os_time_get_nano(); 2671 2672 stages[i] = (struct anv_pipeline_stage) { 2673 .stage = vk_to_mesa_shader_stage(sinfo->stage), 2674 .module = vk_shader_module_from_handle(sinfo->module), 2675 .entrypoint = sinfo->pName, 2676 .spec_info = sinfo->pSpecializationInfo, 2677 .cache_key = { 2678 .stage = vk_to_mesa_shader_stage(sinfo->stage), 2679 }, 2680 .feedback = { 2681 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, 2682 }, 2683 }; 2684 2685 populate_bs_prog_key(&pipeline->base.device->info, sinfo->flags, 2686 pipeline->base.device->robust_buffer_access, 2687 &stages[i].key.bs); 2688 2689 anv_pipeline_hash_shader(stages[i].module, 2690 stages[i].entrypoint, 2691 stages[i].stage, 2692 stages[i].spec_info, 2693 stages[i].shader_sha1); 2694 2695 if (stages[i].stage != MESA_SHADER_INTERSECTION) { 2696 anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i], 2697 stages[i].cache_key.sha1); 2698 } 2699 2700 stages[i].feedback.duration += os_time_get_nano() - stage_start; 2701 } 2702 2703 for (uint32_t i = 0; i < info->groupCount; i++) { 2704 const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i]; 2705 2706 if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR) 2707 continue; 2708 2709 int64_t stage_start = os_time_get_nano(); 2710 2711 uint32_t intersection_idx = ginfo->intersectionShader; 2712 assert(intersection_idx < info->stageCount); 2713 2714 uint32_t any_hit_idx = ginfo->anyHitShader; 2715 if (any_hit_idx != VK_SHADER_UNUSED_KHR) { 2716 assert(any_hit_idx < info->stageCount); 2717 anv_pipeline_hash_ray_tracing_combined_shader(pipeline, 2718 layout, 2719 &stages[intersection_idx], 2720 &stages[any_hit_idx], 2721 stages[intersection_idx].cache_key.sha1); 2722 } else { 2723 anv_pipeline_hash_ray_tracing_shader(pipeline, layout, 2724 &stages[intersection_idx], 2725 stages[intersection_idx].cache_key.sha1); 2726 } 2727 2728 stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start; 2729 } 2730 2731 return stages; 2732} 2733 2734static bool 2735anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline, 2736 struct anv_pipeline_cache *cache, 2737 const VkRayTracingPipelineCreateInfoKHR *info, 2738 struct anv_pipeline_stage *stages, 2739 uint32_t *stack_max) 2740{ 2741 uint32_t shaders = 0, cache_hits = 0; 2742 for (uint32_t i = 0; i < info->stageCount; i++) { 2743 if (stages[i].entrypoint == NULL) 2744 continue; 2745 2746 shaders++; 2747 2748 int64_t stage_start = os_time_get_nano(); 2749 2750 bool cache_hit; 2751 stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache, 2752 &stages[i].cache_key, 2753 sizeof(stages[i].cache_key), 2754 &cache_hit); 2755 if (cache_hit) { 2756 cache_hits++; 2757 stages[i].feedback.flags |= 2758 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; 2759 } 2760 2761 if (stages[i].bin != NULL) { 2762 anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin); 2763 util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin); 2764 2765 uint32_t stack_size = 2766 brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size; 2767 stack_max[stages[i].stage] = 2768 MAX2(stack_max[stages[i].stage], stack_size); 2769 } 2770 2771 stages[i].feedback.duration += os_time_get_nano() - stage_start; 2772 } 2773 2774 return cache_hits == shaders; 2775} 2776 2777static VkResult 2778anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline, 2779 struct anv_pipeline_cache *cache, 2780 const VkRayTracingPipelineCreateInfoKHR *info) 2781{ 2782 const struct intel_device_info *devinfo = &pipeline->base.device->info; 2783 VkResult result; 2784 2785 VkPipelineCreationFeedbackEXT pipeline_feedback = { 2786 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, 2787 }; 2788 int64_t pipeline_start = os_time_get_nano(); 2789 2790 void *pipeline_ctx = ralloc_context(NULL); 2791 2792 struct anv_pipeline_stage *stages = 2793 anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx); 2794 2795 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 2796 2797 const bool skip_cache_lookup = 2798 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR); 2799 2800 uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {}; 2801 2802 if (!skip_cache_lookup && 2803 anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) { 2804 pipeline_feedback.flags |= 2805 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; 2806 goto done; 2807 } 2808 2809 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) { 2810 ralloc_free(pipeline_ctx); 2811 return VK_PIPELINE_COMPILE_REQUIRED_EXT; 2812 } 2813 2814 for (uint32_t i = 0; i < info->stageCount; i++) { 2815 if (stages[i].entrypoint == NULL) 2816 continue; 2817 2818 int64_t stage_start = os_time_get_nano(); 2819 2820 stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, 2821 pipeline_ctx, &stages[i]); 2822 if (stages[i].nir == NULL) { 2823 ralloc_free(pipeline_ctx); 2824 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 2825 } 2826 2827 anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout); 2828 2829 stages[i].feedback.duration += os_time_get_nano() - stage_start; 2830 } 2831 2832 for (uint32_t i = 0; i < info->stageCount; i++) { 2833 if (stages[i].entrypoint == NULL) 2834 continue; 2835 2836 /* Shader found in cache already. */ 2837 if (stages[i].bin != NULL) 2838 continue; 2839 2840 /* We handle intersection shaders as part of the group */ 2841 if (stages[i].stage == MESA_SHADER_INTERSECTION) 2842 continue; 2843 2844 int64_t stage_start = os_time_get_nano(); 2845 2846 void *stage_ctx = ralloc_context(pipeline_ctx); 2847 2848 nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir); 2849 switch (stages[i].stage) { 2850 case MESA_SHADER_RAYGEN: 2851 brw_nir_lower_raygen(nir); 2852 break; 2853 2854 case MESA_SHADER_ANY_HIT: 2855 brw_nir_lower_any_hit(nir, devinfo); 2856 break; 2857 2858 case MESA_SHADER_CLOSEST_HIT: 2859 brw_nir_lower_closest_hit(nir); 2860 break; 2861 2862 case MESA_SHADER_MISS: 2863 brw_nir_lower_miss(nir); 2864 break; 2865 2866 case MESA_SHADER_INTERSECTION: 2867 unreachable("These are handled later"); 2868 2869 case MESA_SHADER_CALLABLE: 2870 brw_nir_lower_callable(nir); 2871 break; 2872 2873 default: 2874 unreachable("Invalid ray-tracing shader stage"); 2875 } 2876 2877 result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i], 2878 &stages[i].bin, stage_ctx); 2879 if (result != VK_SUCCESS) { 2880 ralloc_free(pipeline_ctx); 2881 return result; 2882 } 2883 2884 uint32_t stack_size = 2885 brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size; 2886 stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size); 2887 2888 ralloc_free(stage_ctx); 2889 2890 stages[i].feedback.duration += os_time_get_nano() - stage_start; 2891 } 2892 2893 for (uint32_t i = 0; i < info->groupCount; i++) { 2894 const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i]; 2895 struct anv_rt_shader_group *group = &pipeline->groups[i]; 2896 group->type = ginfo->type; 2897 switch (ginfo->type) { 2898 case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: 2899 assert(ginfo->generalShader < info->stageCount); 2900 group->general = stages[ginfo->generalShader].bin; 2901 break; 2902 2903 case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: 2904 if (ginfo->anyHitShader < info->stageCount) 2905 group->any_hit = stages[ginfo->anyHitShader].bin; 2906 2907 if (ginfo->closestHitShader < info->stageCount) 2908 group->closest_hit = stages[ginfo->closestHitShader].bin; 2909 break; 2910 2911 case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: { 2912 if (ginfo->closestHitShader < info->stageCount) 2913 group->closest_hit = stages[ginfo->closestHitShader].bin; 2914 2915 uint32_t intersection_idx = info->pGroups[i].intersectionShader; 2916 assert(intersection_idx < info->stageCount); 2917 2918 /* Only compile this stage if not already found in the cache. */ 2919 if (stages[intersection_idx].bin == NULL) { 2920 /* The any-hit and intersection shader have to be combined */ 2921 uint32_t any_hit_idx = info->pGroups[i].anyHitShader; 2922 const nir_shader *any_hit = NULL; 2923 if (any_hit_idx < info->stageCount) 2924 any_hit = stages[any_hit_idx].nir; 2925 2926 void *group_ctx = ralloc_context(pipeline_ctx); 2927 nir_shader *intersection = 2928 nir_shader_clone(group_ctx, stages[intersection_idx].nir); 2929 2930 brw_nir_lower_combined_intersection_any_hit(intersection, any_hit, 2931 devinfo); 2932 2933 result = compile_upload_rt_shader(pipeline, cache, 2934 intersection, 2935 &stages[intersection_idx], 2936 &group->intersection, 2937 group_ctx); 2938 ralloc_free(group_ctx); 2939 if (result != VK_SUCCESS) 2940 return result; 2941 } else { 2942 group->intersection = stages[intersection_idx].bin; 2943 } 2944 2945 uint32_t stack_size = 2946 brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size; 2947 stack_max[MESA_SHADER_INTERSECTION] = 2948 MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size); 2949 2950 break; 2951 } 2952 2953 default: 2954 unreachable("Invalid ray tracing shader group type"); 2955 } 2956 } 2957 2958 done: 2959 ralloc_free(pipeline_ctx); 2960 2961 anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max); 2962 2963 pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 2964 2965 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback = 2966 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT); 2967 if (create_feedback) { 2968 *create_feedback->pPipelineCreationFeedback = pipeline_feedback; 2969 2970 assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount); 2971 for (uint32_t i = 0; i < info->stageCount; i++) { 2972 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage); 2973 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback; 2974 } 2975 } 2976 2977 return VK_SUCCESS; 2978} 2979 2980VkResult 2981anv_device_init_rt_shaders(struct anv_device *device) 2982{ 2983 if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline) 2984 return VK_SUCCESS; 2985 2986 bool cache_hit; 2987 2988 struct brw_rt_trampoline { 2989 char name[16]; 2990 struct brw_cs_prog_key key; 2991 } trampoline_key = { 2992 .name = "rt-trampoline", 2993 .key = { 2994 /* TODO: Other subgroup sizes? */ 2995 .base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_8, 2996 }, 2997 }; 2998 device->rt_trampoline = 2999 anv_device_search_for_kernel(device, &device->default_pipeline_cache, 3000 &trampoline_key, sizeof(trampoline_key), 3001 &cache_hit); 3002 if (device->rt_trampoline == NULL) { 3003 3004 void *tmp_ctx = ralloc_context(NULL); 3005 nir_shader *trampoline_nir = 3006 brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx); 3007 3008 struct anv_pipeline_bind_map bind_map = { 3009 .surface_count = 0, 3010 .sampler_count = 0, 3011 }; 3012 uint32_t dummy_params[4] = { 0, }; 3013 struct brw_cs_prog_data trampoline_prog_data = { 3014 .base.nr_params = 4, 3015 .base.param = dummy_params, 3016 .uses_inline_data = true, 3017 .uses_btd_stack_ids = true, 3018 }; 3019 struct brw_compile_cs_params params = { 3020 .nir = trampoline_nir, 3021 .key = &trampoline_key.key, 3022 .prog_data = &trampoline_prog_data, 3023 .log_data = device, 3024 }; 3025 const unsigned *tramp_data = 3026 brw_compile_cs(device->physical->compiler, tmp_ctx, ¶ms); 3027 3028 device->rt_trampoline = 3029 anv_device_upload_kernel(device, &device->default_pipeline_cache, 3030 MESA_SHADER_COMPUTE, 3031 &trampoline_key, sizeof(trampoline_key), 3032 tramp_data, 3033 trampoline_prog_data.base.program_size, 3034 &trampoline_prog_data.base, 3035 sizeof(trampoline_prog_data), 3036 NULL, 0, NULL, &bind_map); 3037 3038 ralloc_free(tmp_ctx); 3039 3040 if (device->rt_trampoline == NULL) 3041 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 3042 } 3043 3044 struct brw_rt_trivial_return { 3045 char name[16]; 3046 struct brw_bs_prog_key key; 3047 } return_key = { 3048 .name = "rt-trivial-ret", 3049 }; 3050 device->rt_trivial_return = 3051 anv_device_search_for_kernel(device, &device->default_pipeline_cache, 3052 &return_key, sizeof(return_key), 3053 &cache_hit); 3054 if (device->rt_trivial_return == NULL) { 3055 void *tmp_ctx = ralloc_context(NULL); 3056 nir_shader *trivial_return_nir = 3057 brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx); 3058 3059 NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, &device->info); 3060 3061 struct anv_pipeline_bind_map bind_map = { 3062 .surface_count = 0, 3063 .sampler_count = 0, 3064 }; 3065 struct brw_bs_prog_data return_prog_data = { 0, }; 3066 const unsigned *return_data = 3067 brw_compile_bs(device->physical->compiler, device, tmp_ctx, 3068 &return_key.key, &return_prog_data, trivial_return_nir, 3069 0, 0, NULL, NULL); 3070 3071 device->rt_trivial_return = 3072 anv_device_upload_kernel(device, &device->default_pipeline_cache, 3073 MESA_SHADER_CALLABLE, 3074 &return_key, sizeof(return_key), 3075 return_data, return_prog_data.base.program_size, 3076 &return_prog_data.base, sizeof(return_prog_data), 3077 NULL, 0, NULL, &bind_map); 3078 3079 ralloc_free(tmp_ctx); 3080 3081 if (device->rt_trivial_return == NULL) { 3082 anv_shader_bin_unref(device, device->rt_trampoline); 3083 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 3084 } 3085 } 3086 3087 return VK_SUCCESS; 3088} 3089 3090void 3091anv_device_finish_rt_shaders(struct anv_device *device) 3092{ 3093 if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline) 3094 return; 3095 3096 anv_shader_bin_unref(device, device->rt_trampoline); 3097} 3098 3099VkResult 3100anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline, 3101 struct anv_device *device, 3102 struct anv_pipeline_cache *cache, 3103 const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 3104 const VkAllocationCallbacks *alloc) 3105{ 3106 VkResult result; 3107 3108 util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx); 3109 3110 result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo); 3111 if (result != VK_SUCCESS) 3112 goto fail; 3113 3114 anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false); 3115 3116 return VK_SUCCESS; 3117 3118fail: 3119 util_dynarray_foreach(&pipeline->shaders, 3120 struct anv_shader_bin *, shader) { 3121 anv_shader_bin_unref(device, *shader); 3122 } 3123 return result; 3124} 3125 3126#define WRITE_STR(field, ...) ({ \ 3127 memset(field, 0, sizeof(field)); \ 3128 UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \ 3129 assert(i > 0 && i < sizeof(field)); \ 3130}) 3131 3132VkResult anv_GetPipelineExecutablePropertiesKHR( 3133 VkDevice device, 3134 const VkPipelineInfoKHR* pPipelineInfo, 3135 uint32_t* pExecutableCount, 3136 VkPipelineExecutablePropertiesKHR* pProperties) 3137{ 3138 ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline); 3139 VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount); 3140 3141 util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) { 3142 vk_outarray_append(&out, props) { 3143 gl_shader_stage stage = exe->stage; 3144 props->stages = mesa_to_vk_shader_stage(stage); 3145 3146 unsigned simd_width = exe->stats.dispatch_width; 3147 if (stage == MESA_SHADER_FRAGMENT) { 3148 WRITE_STR(props->name, "%s%d %s", 3149 simd_width ? "SIMD" : "vec", 3150 simd_width ? simd_width : 4, 3151 _mesa_shader_stage_to_string(stage)); 3152 } else { 3153 WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage)); 3154 } 3155 WRITE_STR(props->description, "%s%d %s shader", 3156 simd_width ? "SIMD" : "vec", 3157 simd_width ? simd_width : 4, 3158 _mesa_shader_stage_to_string(stage)); 3159 3160 /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan 3161 * wants a subgroup size of 1. 3162 */ 3163 props->subgroupSize = MAX2(simd_width, 1); 3164 } 3165 } 3166 3167 return vk_outarray_status(&out); 3168} 3169 3170static const struct anv_pipeline_executable * 3171anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index) 3172{ 3173 assert(index < util_dynarray_num_elements(&pipeline->executables, 3174 struct anv_pipeline_executable)); 3175 return util_dynarray_element( 3176 &pipeline->executables, struct anv_pipeline_executable, index); 3177} 3178 3179VkResult anv_GetPipelineExecutableStatisticsKHR( 3180 VkDevice device, 3181 const VkPipelineExecutableInfoKHR* pExecutableInfo, 3182 uint32_t* pStatisticCount, 3183 VkPipelineExecutableStatisticKHR* pStatistics) 3184{ 3185 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline); 3186 VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount); 3187 3188 const struct anv_pipeline_executable *exe = 3189 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); 3190 3191 const struct brw_stage_prog_data *prog_data; 3192 switch (pipeline->type) { 3193 case ANV_PIPELINE_GRAPHICS: { 3194 prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data; 3195 break; 3196 } 3197 case ANV_PIPELINE_COMPUTE: { 3198 prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data; 3199 break; 3200 } 3201 default: 3202 unreachable("invalid pipeline type"); 3203 } 3204 3205 vk_outarray_append(&out, stat) { 3206 WRITE_STR(stat->name, "Instruction Count"); 3207 WRITE_STR(stat->description, 3208 "Number of GEN instructions in the final generated " 3209 "shader executable."); 3210 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3211 stat->value.u64 = exe->stats.instructions; 3212 } 3213 3214 vk_outarray_append(&out, stat) { 3215 WRITE_STR(stat->name, "SEND Count"); 3216 WRITE_STR(stat->description, 3217 "Number of instructions in the final generated shader " 3218 "executable which access external units such as the " 3219 "constant cache or the sampler."); 3220 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3221 stat->value.u64 = exe->stats.sends; 3222 } 3223 3224 vk_outarray_append(&out, stat) { 3225 WRITE_STR(stat->name, "Loop Count"); 3226 WRITE_STR(stat->description, 3227 "Number of loops (not unrolled) in the final generated " 3228 "shader executable."); 3229 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3230 stat->value.u64 = exe->stats.loops; 3231 } 3232 3233 vk_outarray_append(&out, stat) { 3234 WRITE_STR(stat->name, "Cycle Count"); 3235 WRITE_STR(stat->description, 3236 "Estimate of the number of EU cycles required to execute " 3237 "the final generated executable. This is an estimate only " 3238 "and may vary greatly from actual run-time performance."); 3239 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3240 stat->value.u64 = exe->stats.cycles; 3241 } 3242 3243 vk_outarray_append(&out, stat) { 3244 WRITE_STR(stat->name, "Spill Count"); 3245 WRITE_STR(stat->description, 3246 "Number of scratch spill operations. This gives a rough " 3247 "estimate of the cost incurred due to spilling temporary " 3248 "values to memory. If this is non-zero, you may want to " 3249 "adjust your shader to reduce register pressure."); 3250 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3251 stat->value.u64 = exe->stats.spills; 3252 } 3253 3254 vk_outarray_append(&out, stat) { 3255 WRITE_STR(stat->name, "Fill Count"); 3256 WRITE_STR(stat->description, 3257 "Number of scratch fill operations. This gives a rough " 3258 "estimate of the cost incurred due to spilling temporary " 3259 "values to memory. If this is non-zero, you may want to " 3260 "adjust your shader to reduce register pressure."); 3261 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3262 stat->value.u64 = exe->stats.fills; 3263 } 3264 3265 vk_outarray_append(&out, stat) { 3266 WRITE_STR(stat->name, "Scratch Memory Size"); 3267 WRITE_STR(stat->description, 3268 "Number of bytes of scratch memory required by the " 3269 "generated shader executable. If this is non-zero, you " 3270 "may want to adjust your shader to reduce register " 3271 "pressure."); 3272 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3273 stat->value.u64 = prog_data->total_scratch; 3274 } 3275 3276 if (gl_shader_stage_uses_workgroup(exe->stage)) { 3277 vk_outarray_append(&out, stat) { 3278 WRITE_STR(stat->name, "Workgroup Memory Size"); 3279 WRITE_STR(stat->description, 3280 "Number of bytes of workgroup shared memory used by this " 3281 "shader including any padding."); 3282 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 3283 stat->value.u64 = prog_data->total_shared; 3284 } 3285 } 3286 3287 return vk_outarray_status(&out); 3288} 3289 3290static bool 3291write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir, 3292 const char *data) 3293{ 3294 ir->isText = VK_TRUE; 3295 3296 size_t data_len = strlen(data) + 1; 3297 3298 if (ir->pData == NULL) { 3299 ir->dataSize = data_len; 3300 return true; 3301 } 3302 3303 strncpy(ir->pData, data, ir->dataSize); 3304 if (ir->dataSize < data_len) 3305 return false; 3306 3307 ir->dataSize = data_len; 3308 return true; 3309} 3310 3311VkResult anv_GetPipelineExecutableInternalRepresentationsKHR( 3312 VkDevice device, 3313 const VkPipelineExecutableInfoKHR* pExecutableInfo, 3314 uint32_t* pInternalRepresentationCount, 3315 VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations) 3316{ 3317 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline); 3318 VK_OUTARRAY_MAKE(out, pInternalRepresentations, 3319 pInternalRepresentationCount); 3320 bool incomplete_text = false; 3321 3322 const struct anv_pipeline_executable *exe = 3323 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); 3324 3325 if (exe->nir) { 3326 vk_outarray_append(&out, ir) { 3327 WRITE_STR(ir->name, "Final NIR"); 3328 WRITE_STR(ir->description, 3329 "Final NIR before going into the back-end compiler"); 3330 3331 if (!write_ir_text(ir, exe->nir)) 3332 incomplete_text = true; 3333 } 3334 } 3335 3336 if (exe->disasm) { 3337 vk_outarray_append(&out, ir) { 3338 WRITE_STR(ir->name, "GEN Assembly"); 3339 WRITE_STR(ir->description, 3340 "Final GEN assembly for the generated shader binary"); 3341 3342 if (!write_ir_text(ir, exe->disasm)) 3343 incomplete_text = true; 3344 } 3345 } 3346 3347 return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out); 3348} 3349 3350VkResult 3351anv_GetRayTracingShaderGroupHandlesKHR( 3352 VkDevice _device, 3353 VkPipeline _pipeline, 3354 uint32_t firstGroup, 3355 uint32_t groupCount, 3356 size_t dataSize, 3357 void* pData) 3358{ 3359 ANV_FROM_HANDLE(anv_device, device, _device); 3360 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); 3361 3362 if (pipeline->type != ANV_PIPELINE_RAY_TRACING) 3363 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); 3364 3365 struct anv_ray_tracing_pipeline *rt_pipeline = 3366 anv_pipeline_to_ray_tracing(pipeline); 3367 3368 for (uint32_t i = 0; i < groupCount; i++) { 3369 struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i]; 3370 memcpy(pData, group->handle, sizeof(group->handle)); 3371 pData += sizeof(group->handle); 3372 } 3373 3374 return VK_SUCCESS; 3375} 3376 3377VkResult 3378anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR( 3379 VkDevice _device, 3380 VkPipeline pipeline, 3381 uint32_t firstGroup, 3382 uint32_t groupCount, 3383 size_t dataSize, 3384 void* pData) 3385{ 3386 ANV_FROM_HANDLE(anv_device, device, _device); 3387 unreachable("Unimplemented"); 3388 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); 3389} 3390 3391VkDeviceSize 3392anv_GetRayTracingShaderGroupStackSizeKHR( 3393 VkDevice device, 3394 VkPipeline _pipeline, 3395 uint32_t group, 3396 VkShaderGroupShaderKHR groupShader) 3397{ 3398 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); 3399 assert(pipeline->type == ANV_PIPELINE_RAY_TRACING); 3400 3401 struct anv_ray_tracing_pipeline *rt_pipeline = 3402 anv_pipeline_to_ray_tracing(pipeline); 3403 3404 assert(group < rt_pipeline->group_count); 3405 3406 struct anv_shader_bin *bin; 3407 switch (groupShader) { 3408 case VK_SHADER_GROUP_SHADER_GENERAL_KHR: 3409 bin = rt_pipeline->groups[group].general; 3410 break; 3411 3412 case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR: 3413 bin = rt_pipeline->groups[group].closest_hit; 3414 break; 3415 3416 case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR: 3417 bin = rt_pipeline->groups[group].any_hit; 3418 break; 3419 3420 case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR: 3421 bin = rt_pipeline->groups[group].intersection; 3422 break; 3423 3424 default: 3425 unreachable("Invalid VkShaderGroupShader enum"); 3426 } 3427 3428 if (bin == NULL) 3429 return 0; 3430 3431 return brw_bs_prog_data_const(bin->prog_data)->max_stack_size; 3432} 3433