101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2015 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include <assert.h> 2501e04c3fSmrg#include <stdbool.h> 2601e04c3fSmrg#include <string.h> 2701e04c3fSmrg#include <unistd.h> 2801e04c3fSmrg#include <fcntl.h> 2901e04c3fSmrg 3001e04c3fSmrg#include "util/mesa-sha1.h" 319f464c52Smaya#include "util/os_time.h" 327ec681f3Smrg#include "common/intel_l3_config.h" 337ec681f3Smrg#include "common/intel_disasm.h" 347ec681f3Smrg#include "common/intel_sample_positions.h" 3501e04c3fSmrg#include "anv_private.h" 3601e04c3fSmrg#include "compiler/brw_nir.h" 377ec681f3Smrg#include "compiler/brw_nir_rt.h" 3801e04c3fSmrg#include "anv_nir.h" 399f464c52Smaya#include "nir/nir_xfb_info.h" 4001e04c3fSmrg#include "spirv/nir_spirv.h" 4101e04c3fSmrg#include "vk_util.h" 4201e04c3fSmrg 4301e04c3fSmrg/* Needed for SWIZZLE macros */ 4401e04c3fSmrg#include "program/prog_instruction.h" 4501e04c3fSmrg 4601e04c3fSmrg// Shader functions 477ec681f3Smrg#define SPIR_V_MAGIC_NUMBER 0x07230203 4801e04c3fSmrg 497ec681f3Smrgstruct anv_spirv_debug_data { 507ec681f3Smrg struct anv_device *device; 517ec681f3Smrg const struct vk_shader_module *module; 527ec681f3Smrg}; 5301e04c3fSmrg 547ec681f3Smrgstatic void anv_spirv_nir_debug(void *private_data, 557ec681f3Smrg enum nir_spirv_debug_level level, 567ec681f3Smrg size_t spirv_offset, 577ec681f3Smrg const char *message) 5801e04c3fSmrg{ 597ec681f3Smrg struct anv_spirv_debug_data *debug_data = private_data; 607ec681f3Smrg 617ec681f3Smrg switch (level) { 627ec681f3Smrg case NIR_SPIRV_DEBUG_LEVEL_INFO: 637ec681f3Smrg vk_logi(VK_LOG_OBJS(&debug_data->module->base), 647ec681f3Smrg "SPIR-V offset %lu: %s", 657ec681f3Smrg (unsigned long) spirv_offset, message); 667ec681f3Smrg break; 677ec681f3Smrg case NIR_SPIRV_DEBUG_LEVEL_WARNING: 687ec681f3Smrg vk_logw(VK_LOG_OBJS(&debug_data->module->base), 697ec681f3Smrg "SPIR-V offset %lu: %s", 707ec681f3Smrg (unsigned long) spirv_offset, message); 717ec681f3Smrg break; 727ec681f3Smrg case NIR_SPIRV_DEBUG_LEVEL_ERROR: 737ec681f3Smrg vk_loge(VK_LOG_OBJS(&debug_data->module->base), 747ec681f3Smrg "SPIR-V offset %lu: %s", 757ec681f3Smrg (unsigned long) spirv_offset, message); 767ec681f3Smrg break; 777ec681f3Smrg default: 787ec681f3Smrg break; 797ec681f3Smrg } 8001e04c3fSmrg} 8101e04c3fSmrg 8201e04c3fSmrg/* Eventually, this will become part of anv_CreateShader. Unfortunately, 8301e04c3fSmrg * we can't do that yet because we don't have the ability to copy nir. 8401e04c3fSmrg */ 8501e04c3fSmrgstatic nir_shader * 869f464c52Smayaanv_shader_compile_to_nir(struct anv_device *device, 8701e04c3fSmrg void *mem_ctx, 887ec681f3Smrg const struct vk_shader_module *module, 8901e04c3fSmrg const char *entrypoint_name, 9001e04c3fSmrg gl_shader_stage stage, 9101e04c3fSmrg const VkSpecializationInfo *spec_info) 9201e04c3fSmrg{ 937ec681f3Smrg const struct anv_physical_device *pdevice = device->physical; 949f464c52Smaya const struct brw_compiler *compiler = pdevice->compiler; 9501e04c3fSmrg const nir_shader_compiler_options *nir_options = 9601e04c3fSmrg compiler->glsl_compiler_options[stage].NirOptions; 9701e04c3fSmrg 9801e04c3fSmrg uint32_t *spirv = (uint32_t *) module->data; 9901e04c3fSmrg assert(spirv[0] == SPIR_V_MAGIC_NUMBER); 10001e04c3fSmrg assert(module->size % 4 == 0); 10101e04c3fSmrg 10201e04c3fSmrg uint32_t num_spec_entries = 0; 1037ec681f3Smrg struct nir_spirv_specialization *spec_entries = 1047ec681f3Smrg vk_spec_info_to_nir_spirv(spec_info, &num_spec_entries); 10501e04c3fSmrg 1067ec681f3Smrg struct anv_spirv_debug_data spirv_debug_data = { 1077ec681f3Smrg .device = device, 1087ec681f3Smrg .module = module, 1097ec681f3Smrg }; 11001e04c3fSmrg struct spirv_to_nir_options spirv_options = { 11101e04c3fSmrg .caps = { 1127ec681f3Smrg .demote_to_helper_invocation = true, 1139f464c52Smaya .derivative_group = true, 1149f464c52Smaya .descriptor_array_dynamic_indexing = true, 1159f464c52Smaya .descriptor_array_non_uniform_indexing = true, 1169f464c52Smaya .descriptor_indexing = true, 11701e04c3fSmrg .device_group = true, 11801e04c3fSmrg .draw_parameters = true, 1197ec681f3Smrg .float16 = pdevice->info.ver >= 8, 1207ec681f3Smrg .float32_atomic_add = pdevice->info.has_lsc, 1217ec681f3Smrg .float32_atomic_min_max = pdevice->info.ver >= 9, 1227ec681f3Smrg .float64 = pdevice->info.ver >= 8, 1237ec681f3Smrg .float64_atomic_min_max = pdevice->info.has_lsc, 1247ec681f3Smrg .fragment_shader_sample_interlock = pdevice->info.ver >= 9, 1257ec681f3Smrg .fragment_shader_pixel_interlock = pdevice->info.ver >= 9, 1269f464c52Smaya .geometry_streams = true, 1277ec681f3Smrg /* When KHR_format_feature_flags2 is enabled, the read/write without 1287ec681f3Smrg * format is per format, so just report true. It's up to the 1297ec681f3Smrg * application to check. 1307ec681f3Smrg */ 1317ec681f3Smrg .image_read_without_format = device->vk.enabled_extensions.KHR_format_feature_flags2, 13201e04c3fSmrg .image_write_without_format = true, 1337ec681f3Smrg .int8 = pdevice->info.ver >= 8, 1347ec681f3Smrg .int16 = pdevice->info.ver >= 8, 1357ec681f3Smrg .int64 = pdevice->info.ver >= 8, 1367ec681f3Smrg .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin, 1377ec681f3Smrg .integer_functions2 = pdevice->info.ver >= 8, 1389f464c52Smaya .min_lod = true, 13901e04c3fSmrg .multiview = true, 1409f464c52Smaya .physical_storage_buffer_address = pdevice->has_a64_buffer_access, 1417ec681f3Smrg .post_depth_coverage = pdevice->info.ver >= 9, 1429f464c52Smaya .runtime_descriptor_array = true, 1437ec681f3Smrg .float_controls = pdevice->info.ver >= 8, 1447ec681f3Smrg .ray_tracing = pdevice->info.has_ray_tracing, 1457ec681f3Smrg .shader_clock = true, 14601e04c3fSmrg .shader_viewport_index_layer = true, 1477ec681f3Smrg .stencil_export = pdevice->info.ver >= 9, 1487ec681f3Smrg .storage_8bit = pdevice->info.ver >= 8, 1497ec681f3Smrg .storage_16bit = pdevice->info.ver >= 8, 15001e04c3fSmrg .subgroup_arithmetic = true, 15101e04c3fSmrg .subgroup_basic = true, 15201e04c3fSmrg .subgroup_ballot = true, 1537ec681f3Smrg .subgroup_dispatch = true, 15401e04c3fSmrg .subgroup_quad = true, 1557ec681f3Smrg .subgroup_uniform_control_flow = true, 15601e04c3fSmrg .subgroup_shuffle = true, 15701e04c3fSmrg .subgroup_vote = true, 1589f464c52Smaya .tessellation = true, 1597ec681f3Smrg .transform_feedback = pdevice->info.ver >= 8, 1609f464c52Smaya .variable_pointers = true, 1617ec681f3Smrg .vk_memory_model = true, 1627ec681f3Smrg .vk_memory_model_device_scope = true, 1637ec681f3Smrg .workgroup_memory_explicit_layout = true, 1647ec681f3Smrg .fragment_shading_rate = pdevice->info.ver >= 11, 1657ec681f3Smrg }, 1667ec681f3Smrg .ubo_addr_format = 1677ec681f3Smrg anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access), 1687ec681f3Smrg .ssbo_addr_format = 1697ec681f3Smrg anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access), 1707ec681f3Smrg .phys_ssbo_addr_format = nir_address_format_64bit_global, 1717ec681f3Smrg .push_const_addr_format = nir_address_format_logical, 1727ec681f3Smrg 1737ec681f3Smrg /* TODO: Consider changing this to an address format that has the NULL 1747ec681f3Smrg * pointer equals to 0. That might be a better format to play nice 1757ec681f3Smrg * with certain code / code generators. 1767ec681f3Smrg */ 1777ec681f3Smrg .shared_addr_format = nir_address_format_32bit_offset, 1787ec681f3Smrg .debug = { 1797ec681f3Smrg .func = anv_spirv_nir_debug, 1807ec681f3Smrg .private_data = &spirv_debug_data, 18101e04c3fSmrg }, 18201e04c3fSmrg }; 18301e04c3fSmrg 1849f464c52Smaya 1857ec681f3Smrg nir_shader *nir = 18601e04c3fSmrg spirv_to_nir(spirv, module->size / 4, 18701e04c3fSmrg spec_entries, num_spec_entries, 18801e04c3fSmrg stage, entrypoint_name, &spirv_options, nir_options); 1897ec681f3Smrg if (!nir) { 1907ec681f3Smrg free(spec_entries); 1917ec681f3Smrg return NULL; 1927ec681f3Smrg } 1937ec681f3Smrg 19401e04c3fSmrg assert(nir->info.stage == stage); 19501e04c3fSmrg nir_validate_shader(nir, "after spirv_to_nir"); 1967ec681f3Smrg nir_validate_ssa_dominance(nir, "after spirv_to_nir"); 19701e04c3fSmrg ralloc_steal(mem_ctx, nir); 19801e04c3fSmrg 19901e04c3fSmrg free(spec_entries); 20001e04c3fSmrg 2017ec681f3Smrg const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = { 2027ec681f3Smrg .point_coord = true, 2037ec681f3Smrg }; 2047ec681f3Smrg NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings); 2057ec681f3Smrg 2067ec681f3Smrg if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) { 20701e04c3fSmrg fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n", 20801e04c3fSmrg gl_shader_stage_name(stage)); 20901e04c3fSmrg nir_print_shader(nir, stderr); 21001e04c3fSmrg } 21101e04c3fSmrg 21201e04c3fSmrg /* We have to lower away local constant initializers right before we 21301e04c3fSmrg * inline functions. That way they get properly initialized at the top 21401e04c3fSmrg * of the function and not at the top of its caller. 21501e04c3fSmrg */ 2167ec681f3Smrg NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); 21701e04c3fSmrg NIR_PASS_V(nir, nir_lower_returns); 21801e04c3fSmrg NIR_PASS_V(nir, nir_inline_functions); 2197ec681f3Smrg NIR_PASS_V(nir, nir_copy_prop); 2209f464c52Smaya NIR_PASS_V(nir, nir_opt_deref); 22101e04c3fSmrg 22201e04c3fSmrg /* Pick off the single entrypoint that we want */ 22301e04c3fSmrg foreach_list_typed_safe(nir_function, func, node, &nir->functions) { 2247ec681f3Smrg if (!func->is_entrypoint) 22501e04c3fSmrg exec_node_remove(&func->node); 22601e04c3fSmrg } 22701e04c3fSmrg assert(exec_list_length(&nir->functions) == 1); 22801e04c3fSmrg 22901e04c3fSmrg /* Now that we've deleted all but the main function, we can go ahead and 23001e04c3fSmrg * lower the rest of the constant initializers. We do this here so that 23101e04c3fSmrg * nir_remove_dead_variables and split_per_member_structs below see the 23201e04c3fSmrg * corresponding stores. 23301e04c3fSmrg */ 2347ec681f3Smrg NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); 23501e04c3fSmrg 23601e04c3fSmrg /* Split member structs. We do this before lower_io_to_temporaries so that 23701e04c3fSmrg * it doesn't lower system values to temporaries by accident. 23801e04c3fSmrg */ 23901e04c3fSmrg NIR_PASS_V(nir, nir_split_var_copies); 24001e04c3fSmrg NIR_PASS_V(nir, nir_split_per_member_structs); 24101e04c3fSmrg 24201e04c3fSmrg NIR_PASS_V(nir, nir_remove_dead_variables, 2437ec681f3Smrg nir_var_shader_in | nir_var_shader_out | nir_var_system_value | 2447ec681f3Smrg nir_var_shader_call_data | nir_var_ray_hit_attrib, 2457ec681f3Smrg NULL); 24601e04c3fSmrg 2477ec681f3Smrg NIR_PASS_V(nir, nir_propagate_invariant, false); 24801e04c3fSmrg NIR_PASS_V(nir, nir_lower_io_to_temporaries, 2497ec681f3Smrg nir_shader_get_entrypoint(nir), true, false); 25001e04c3fSmrg 2519f464c52Smaya NIR_PASS_V(nir, nir_lower_frexp); 2529f464c52Smaya 25301e04c3fSmrg /* Vulkan uses the separate-shader linking model */ 25401e04c3fSmrg nir->info.separate_shader = true; 25501e04c3fSmrg 2567ec681f3Smrg brw_preprocess_nir(compiler, nir, NULL); 25701e04c3fSmrg 25801e04c3fSmrg return nir; 25901e04c3fSmrg} 26001e04c3fSmrg 2617ec681f3SmrgVkResult 2627ec681f3Smrganv_pipeline_init(struct anv_pipeline *pipeline, 2637ec681f3Smrg struct anv_device *device, 2647ec681f3Smrg enum anv_pipeline_type type, 2657ec681f3Smrg VkPipelineCreateFlags flags, 2667ec681f3Smrg const VkAllocationCallbacks *pAllocator) 2677ec681f3Smrg{ 2687ec681f3Smrg VkResult result; 2697ec681f3Smrg 2707ec681f3Smrg memset(pipeline, 0, sizeof(*pipeline)); 2717ec681f3Smrg 2727ec681f3Smrg vk_object_base_init(&device->vk, &pipeline->base, 2737ec681f3Smrg VK_OBJECT_TYPE_PIPELINE); 2747ec681f3Smrg pipeline->device = device; 2757ec681f3Smrg 2767ec681f3Smrg /* It's the job of the child class to provide actual backing storage for 2777ec681f3Smrg * the batch by setting batch.start, batch.next, and batch.end. 2787ec681f3Smrg */ 2797ec681f3Smrg pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc; 2807ec681f3Smrg pipeline->batch.relocs = &pipeline->batch_relocs; 2817ec681f3Smrg pipeline->batch.status = VK_SUCCESS; 2827ec681f3Smrg 2837ec681f3Smrg result = anv_reloc_list_init(&pipeline->batch_relocs, 2847ec681f3Smrg pipeline->batch.alloc); 2857ec681f3Smrg if (result != VK_SUCCESS) 2867ec681f3Smrg return result; 2877ec681f3Smrg 2887ec681f3Smrg pipeline->mem_ctx = ralloc_context(NULL); 2897ec681f3Smrg 2907ec681f3Smrg pipeline->type = type; 2917ec681f3Smrg pipeline->flags = flags; 2927ec681f3Smrg 2937ec681f3Smrg util_dynarray_init(&pipeline->executables, pipeline->mem_ctx); 2947ec681f3Smrg 2957ec681f3Smrg return VK_SUCCESS; 2967ec681f3Smrg} 2977ec681f3Smrg 2987ec681f3Smrgvoid 2997ec681f3Smrganv_pipeline_finish(struct anv_pipeline *pipeline, 3007ec681f3Smrg struct anv_device *device, 3017ec681f3Smrg const VkAllocationCallbacks *pAllocator) 3027ec681f3Smrg{ 3037ec681f3Smrg anv_reloc_list_finish(&pipeline->batch_relocs, 3047ec681f3Smrg pAllocator ? pAllocator : &device->vk.alloc); 3057ec681f3Smrg ralloc_free(pipeline->mem_ctx); 3067ec681f3Smrg vk_object_base_finish(&pipeline->base); 3077ec681f3Smrg} 3087ec681f3Smrg 30901e04c3fSmrgvoid anv_DestroyPipeline( 31001e04c3fSmrg VkDevice _device, 31101e04c3fSmrg VkPipeline _pipeline, 31201e04c3fSmrg const VkAllocationCallbacks* pAllocator) 31301e04c3fSmrg{ 31401e04c3fSmrg ANV_FROM_HANDLE(anv_device, device, _device); 31501e04c3fSmrg ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); 31601e04c3fSmrg 31701e04c3fSmrg if (!pipeline) 31801e04c3fSmrg return; 31901e04c3fSmrg 3207ec681f3Smrg switch (pipeline->type) { 3217ec681f3Smrg case ANV_PIPELINE_GRAPHICS: { 3227ec681f3Smrg struct anv_graphics_pipeline *gfx_pipeline = 3237ec681f3Smrg anv_pipeline_to_graphics(pipeline); 32401e04c3fSmrg 3257ec681f3Smrg if (gfx_pipeline->blend_state.map) 3267ec681f3Smrg anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state); 3277ec681f3Smrg if (gfx_pipeline->cps_state.map) 3287ec681f3Smrg anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->cps_state); 3297ec681f3Smrg 3307ec681f3Smrg for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) { 3317ec681f3Smrg if (gfx_pipeline->shaders[s]) 3327ec681f3Smrg anv_shader_bin_unref(device, gfx_pipeline->shaders[s]); 3337ec681f3Smrg } 3347ec681f3Smrg break; 3357ec681f3Smrg } 3367ec681f3Smrg 3377ec681f3Smrg case ANV_PIPELINE_COMPUTE: { 3387ec681f3Smrg struct anv_compute_pipeline *compute_pipeline = 3397ec681f3Smrg anv_pipeline_to_compute(pipeline); 3407ec681f3Smrg 3417ec681f3Smrg if (compute_pipeline->cs) 3427ec681f3Smrg anv_shader_bin_unref(device, compute_pipeline->cs); 3437ec681f3Smrg 3447ec681f3Smrg break; 3457ec681f3Smrg } 3467ec681f3Smrg 3477ec681f3Smrg case ANV_PIPELINE_RAY_TRACING: { 3487ec681f3Smrg struct anv_ray_tracing_pipeline *rt_pipeline = 3497ec681f3Smrg anv_pipeline_to_ray_tracing(pipeline); 3507ec681f3Smrg 3517ec681f3Smrg util_dynarray_foreach(&rt_pipeline->shaders, 3527ec681f3Smrg struct anv_shader_bin *, shader) { 3537ec681f3Smrg anv_shader_bin_unref(device, *shader); 3547ec681f3Smrg } 3557ec681f3Smrg break; 3567ec681f3Smrg } 3577ec681f3Smrg 3587ec681f3Smrg default: 3597ec681f3Smrg unreachable("invalid pipeline type"); 36001e04c3fSmrg } 36101e04c3fSmrg 3627ec681f3Smrg anv_pipeline_finish(pipeline, device, pAllocator); 3637ec681f3Smrg vk_free2(&device->vk.alloc, pAllocator, pipeline); 36401e04c3fSmrg} 36501e04c3fSmrg 3667ec681f3Smrgstatic const uint32_t vk_to_intel_primitive_type[] = { 36701e04c3fSmrg [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, 36801e04c3fSmrg [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, 36901e04c3fSmrg [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, 37001e04c3fSmrg [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, 37101e04c3fSmrg [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, 37201e04c3fSmrg [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, 37301e04c3fSmrg [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ, 37401e04c3fSmrg [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, 37501e04c3fSmrg [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ, 37601e04c3fSmrg [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, 37701e04c3fSmrg}; 37801e04c3fSmrg 37901e04c3fSmrgstatic void 3807ec681f3Smrgpopulate_sampler_prog_key(const struct intel_device_info *devinfo, 38101e04c3fSmrg struct brw_sampler_prog_key_data *key) 38201e04c3fSmrg{ 38301e04c3fSmrg /* Almost all multisampled textures are compressed. The only time when we 38401e04c3fSmrg * don't compress a multisampled texture is for 16x MSAA with a surface 38501e04c3fSmrg * width greater than 8k which is a bit of an edge case. Since the sampler 38601e04c3fSmrg * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe 38701e04c3fSmrg * to tell the compiler to always assume compression. 38801e04c3fSmrg */ 38901e04c3fSmrg key->compressed_multisample_layout_mask = ~0; 39001e04c3fSmrg 39101e04c3fSmrg /* SkyLake added support for 16x MSAA. With this came a new message for 39201e04c3fSmrg * reading from a 16x MSAA surface with compression. The new message was 39301e04c3fSmrg * needed because now the MCS data is 64 bits instead of 32 or lower as is 39401e04c3fSmrg * the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which 39501e04c3fSmrg * message we use. Fortunately, the 16x message works for 8x, 4x, and 2x 39601e04c3fSmrg * so we can just use it unconditionally. This may not be quite as 39701e04c3fSmrg * efficient but it saves us from recompiling. 39801e04c3fSmrg */ 3997ec681f3Smrg if (devinfo->ver >= 9) 40001e04c3fSmrg key->msaa_16 = ~0; 40101e04c3fSmrg 40201e04c3fSmrg /* XXX: Handle texture swizzle on HSW- */ 40301e04c3fSmrg for (int i = 0; i < MAX_SAMPLERS; i++) { 40401e04c3fSmrg /* Assume color sampler, no swizzling. (Works for BDW+) */ 40501e04c3fSmrg key->swizzles[i] = SWIZZLE_XYZW; 40601e04c3fSmrg } 40701e04c3fSmrg} 40801e04c3fSmrg 40901e04c3fSmrgstatic void 4107ec681f3Smrgpopulate_base_prog_key(const struct intel_device_info *devinfo, 4117ec681f3Smrg enum brw_subgroup_size_type subgroup_size_type, 4127ec681f3Smrg bool robust_buffer_acccess, 4137ec681f3Smrg struct brw_base_prog_key *key) 4147ec681f3Smrg{ 4157ec681f3Smrg key->subgroup_size_type = subgroup_size_type; 4167ec681f3Smrg key->robust_buffer_access = robust_buffer_acccess; 4177ec681f3Smrg 4187ec681f3Smrg populate_sampler_prog_key(devinfo, &key->tex); 4197ec681f3Smrg} 4207ec681f3Smrg 4217ec681f3Smrgstatic void 4227ec681f3Smrgpopulate_vs_prog_key(const struct intel_device_info *devinfo, 4237ec681f3Smrg enum brw_subgroup_size_type subgroup_size_type, 4247ec681f3Smrg bool robust_buffer_acccess, 42501e04c3fSmrg struct brw_vs_prog_key *key) 42601e04c3fSmrg{ 42701e04c3fSmrg memset(key, 0, sizeof(*key)); 42801e04c3fSmrg 4297ec681f3Smrg populate_base_prog_key(devinfo, subgroup_size_type, 4307ec681f3Smrg robust_buffer_acccess, &key->base); 43101e04c3fSmrg 43201e04c3fSmrg /* XXX: Handle vertex input work-arounds */ 43301e04c3fSmrg 43401e04c3fSmrg /* XXX: Handle sampler_prog_key */ 43501e04c3fSmrg} 43601e04c3fSmrg 43701e04c3fSmrgstatic void 4387ec681f3Smrgpopulate_tcs_prog_key(const struct intel_device_info *devinfo, 4397ec681f3Smrg enum brw_subgroup_size_type subgroup_size_type, 4407ec681f3Smrg bool robust_buffer_acccess, 44101e04c3fSmrg unsigned input_vertices, 44201e04c3fSmrg struct brw_tcs_prog_key *key) 44301e04c3fSmrg{ 44401e04c3fSmrg memset(key, 0, sizeof(*key)); 44501e04c3fSmrg 4467ec681f3Smrg populate_base_prog_key(devinfo, subgroup_size_type, 4477ec681f3Smrg robust_buffer_acccess, &key->base); 44801e04c3fSmrg 44901e04c3fSmrg key->input_vertices = input_vertices; 45001e04c3fSmrg} 45101e04c3fSmrg 45201e04c3fSmrgstatic void 4537ec681f3Smrgpopulate_tes_prog_key(const struct intel_device_info *devinfo, 4547ec681f3Smrg enum brw_subgroup_size_type subgroup_size_type, 4557ec681f3Smrg bool robust_buffer_acccess, 45601e04c3fSmrg struct brw_tes_prog_key *key) 45701e04c3fSmrg{ 45801e04c3fSmrg memset(key, 0, sizeof(*key)); 45901e04c3fSmrg 4607ec681f3Smrg populate_base_prog_key(devinfo, subgroup_size_type, 4617ec681f3Smrg robust_buffer_acccess, &key->base); 46201e04c3fSmrg} 46301e04c3fSmrg 46401e04c3fSmrgstatic void 4657ec681f3Smrgpopulate_gs_prog_key(const struct intel_device_info *devinfo, 4667ec681f3Smrg enum brw_subgroup_size_type subgroup_size_type, 4677ec681f3Smrg bool robust_buffer_acccess, 46801e04c3fSmrg struct brw_gs_prog_key *key) 46901e04c3fSmrg{ 47001e04c3fSmrg memset(key, 0, sizeof(*key)); 47101e04c3fSmrg 4727ec681f3Smrg populate_base_prog_key(devinfo, subgroup_size_type, 4737ec681f3Smrg robust_buffer_acccess, &key->base); 4747ec681f3Smrg} 4757ec681f3Smrg 4767ec681f3Smrgstatic bool 4777ec681f3Smrgpipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline, 4787ec681f3Smrg const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info) 4797ec681f3Smrg{ 4807ec681f3Smrg if (pipeline->sample_shading_enable) 4817ec681f3Smrg return false; 4827ec681f3Smrg 4837ec681f3Smrg /* Not dynamic & not specified for the pipeline. */ 4847ec681f3Smrg if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 && !fsr_info) 4857ec681f3Smrg return false; 4867ec681f3Smrg 4877ec681f3Smrg /* Not dynamic & pipeline has a 1x1 fragment shading rate with no 4887ec681f3Smrg * possibility for element of the pipeline to change the value. 4897ec681f3Smrg */ 4907ec681f3Smrg if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 && 4917ec681f3Smrg fsr_info->fragmentSize.width <= 1 && 4927ec681f3Smrg fsr_info->fragmentSize.height <= 1 && 4937ec681f3Smrg fsr_info->combinerOps[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR && 4947ec681f3Smrg fsr_info->combinerOps[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR) 4957ec681f3Smrg return false; 4967ec681f3Smrg 4977ec681f3Smrg return true; 49801e04c3fSmrg} 49901e04c3fSmrg 50001e04c3fSmrgstatic void 5017ec681f3Smrgpopulate_wm_prog_key(const struct anv_graphics_pipeline *pipeline, 5027ec681f3Smrg VkPipelineShaderStageCreateFlags flags, 5037ec681f3Smrg bool robust_buffer_acccess, 50401e04c3fSmrg const struct anv_subpass *subpass, 50501e04c3fSmrg const VkPipelineMultisampleStateCreateInfo *ms_info, 5067ec681f3Smrg const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info, 50701e04c3fSmrg struct brw_wm_prog_key *key) 50801e04c3fSmrg{ 5097ec681f3Smrg const struct anv_device *device = pipeline->base.device; 5107ec681f3Smrg const struct intel_device_info *devinfo = &device->info; 5117ec681f3Smrg 51201e04c3fSmrg memset(key, 0, sizeof(*key)); 51301e04c3fSmrg 5147ec681f3Smrg populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base); 51501e04c3fSmrg 51601e04c3fSmrg /* We set this to 0 here and set to the actual value before we call 51701e04c3fSmrg * brw_compile_fs. 51801e04c3fSmrg */ 51901e04c3fSmrg key->input_slots_valid = 0; 52001e04c3fSmrg 52101e04c3fSmrg /* Vulkan doesn't specify a default */ 52201e04c3fSmrg key->high_quality_derivatives = false; 52301e04c3fSmrg 52401e04c3fSmrg /* XXX Vulkan doesn't appear to specify */ 52501e04c3fSmrg key->clamp_fragment_color = false; 52601e04c3fSmrg 5277ec681f3Smrg key->ignore_sample_mask_out = false; 5287ec681f3Smrg 52901e04c3fSmrg assert(subpass->color_count <= MAX_RTS); 53001e04c3fSmrg for (uint32_t i = 0; i < subpass->color_count; i++) { 53101e04c3fSmrg if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) 53201e04c3fSmrg key->color_outputs_valid |= (1 << i); 53301e04c3fSmrg } 53401e04c3fSmrg 5357ec681f3Smrg key->nr_color_regions = subpass->color_count; 53601e04c3fSmrg 5379f464c52Smaya /* To reduce possible shader recompilations we would need to know if 5389f464c52Smaya * there is a SampleMask output variable to compute if we should emit 5399f464c52Smaya * code to workaround the issue that hardware disables alpha to coverage 5409f464c52Smaya * when there is SampleMask output. 5419f464c52Smaya */ 5429f464c52Smaya key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable; 5439f464c52Smaya 5449f464c52Smaya /* Vulkan doesn't support fixed-function alpha test */ 5459f464c52Smaya key->alpha_test_replicate_alpha = false; 54601e04c3fSmrg 54701e04c3fSmrg if (ms_info) { 54801e04c3fSmrg /* We should probably pull this out of the shader, but it's fairly 54901e04c3fSmrg * harmless to compute it and then let dead-code take care of it. 55001e04c3fSmrg */ 55101e04c3fSmrg if (ms_info->rasterizationSamples > 1) { 5529f464c52Smaya key->persample_interp = ms_info->sampleShadingEnable && 55301e04c3fSmrg (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1; 55401e04c3fSmrg key->multisample_fbo = true; 55501e04c3fSmrg } 55601e04c3fSmrg 5579f464c52Smaya key->frag_coord_adds_sample_pos = key->persample_interp; 55801e04c3fSmrg } 5597ec681f3Smrg 5607ec681f3Smrg key->coarse_pixel = 5617ec681f3Smrg device->vk.enabled_extensions.KHR_fragment_shading_rate && 5627ec681f3Smrg pipeline_has_coarse_pixel(pipeline, fsr_info); 56301e04c3fSmrg} 56401e04c3fSmrg 56501e04c3fSmrgstatic void 5667ec681f3Smrgpopulate_cs_prog_key(const struct intel_device_info *devinfo, 5677ec681f3Smrg enum brw_subgroup_size_type subgroup_size_type, 5687ec681f3Smrg bool robust_buffer_acccess, 56901e04c3fSmrg struct brw_cs_prog_key *key) 57001e04c3fSmrg{ 57101e04c3fSmrg memset(key, 0, sizeof(*key)); 57201e04c3fSmrg 5737ec681f3Smrg populate_base_prog_key(devinfo, subgroup_size_type, 5747ec681f3Smrg robust_buffer_acccess, &key->base); 5757ec681f3Smrg} 5767ec681f3Smrg 5777ec681f3Smrgstatic void 5787ec681f3Smrgpopulate_bs_prog_key(const struct intel_device_info *devinfo, 5797ec681f3Smrg VkPipelineShaderStageCreateFlags flags, 5807ec681f3Smrg bool robust_buffer_access, 5817ec681f3Smrg struct brw_bs_prog_key *key) 5827ec681f3Smrg{ 5837ec681f3Smrg memset(key, 0, sizeof(*key)); 5847ec681f3Smrg 5857ec681f3Smrg populate_base_prog_key(devinfo, flags, robust_buffer_access, &key->base); 58601e04c3fSmrg} 58701e04c3fSmrg 58801e04c3fSmrgstruct anv_pipeline_stage { 58901e04c3fSmrg gl_shader_stage stage; 59001e04c3fSmrg 5917ec681f3Smrg const struct vk_shader_module *module; 59201e04c3fSmrg const char *entrypoint; 59301e04c3fSmrg const VkSpecializationInfo *spec_info; 59401e04c3fSmrg 5959f464c52Smaya unsigned char shader_sha1[20]; 5969f464c52Smaya 59701e04c3fSmrg union brw_any_prog_key key; 59801e04c3fSmrg 59901e04c3fSmrg struct { 60001e04c3fSmrg gl_shader_stage stage; 60101e04c3fSmrg unsigned char sha1[20]; 60201e04c3fSmrg } cache_key; 60301e04c3fSmrg 60401e04c3fSmrg nir_shader *nir; 60501e04c3fSmrg 60601e04c3fSmrg struct anv_pipeline_binding surface_to_descriptor[256]; 60701e04c3fSmrg struct anv_pipeline_binding sampler_to_descriptor[256]; 60801e04c3fSmrg struct anv_pipeline_bind_map bind_map; 60901e04c3fSmrg 61001e04c3fSmrg union brw_any_prog_data prog_data; 6119f464c52Smaya 6127ec681f3Smrg uint32_t num_stats; 6137ec681f3Smrg struct brw_compile_stats stats[3]; 6147ec681f3Smrg char *disasm[3]; 6157ec681f3Smrg 6169f464c52Smaya VkPipelineCreationFeedbackEXT feedback; 6177ec681f3Smrg 6187ec681f3Smrg const unsigned *code; 6197ec681f3Smrg 6207ec681f3Smrg struct anv_shader_bin *bin; 62101e04c3fSmrg}; 62201e04c3fSmrg 62301e04c3fSmrgstatic void 6247ec681f3Smrganv_pipeline_hash_shader(const struct vk_shader_module *module, 6259f464c52Smaya const char *entrypoint, 6269f464c52Smaya gl_shader_stage stage, 6279f464c52Smaya const VkSpecializationInfo *spec_info, 6289f464c52Smaya unsigned char *sha1_out) 62901e04c3fSmrg{ 6309f464c52Smaya struct mesa_sha1 ctx; 6319f464c52Smaya _mesa_sha1_init(&ctx); 6329f464c52Smaya 6339f464c52Smaya _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1)); 6349f464c52Smaya _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint)); 6359f464c52Smaya _mesa_sha1_update(&ctx, &stage, sizeof(stage)); 6369f464c52Smaya if (spec_info) { 6379f464c52Smaya _mesa_sha1_update(&ctx, spec_info->pMapEntries, 6389f464c52Smaya spec_info->mapEntryCount * 6399f464c52Smaya sizeof(*spec_info->pMapEntries)); 6409f464c52Smaya _mesa_sha1_update(&ctx, spec_info->pData, 6419f464c52Smaya spec_info->dataSize); 64201e04c3fSmrg } 6439f464c52Smaya 6449f464c52Smaya _mesa_sha1_final(&ctx, sha1_out); 64501e04c3fSmrg} 64601e04c3fSmrg 64701e04c3fSmrgstatic void 6487ec681f3Smrganv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline, 64901e04c3fSmrg struct anv_pipeline_layout *layout, 65001e04c3fSmrg struct anv_pipeline_stage *stages, 65101e04c3fSmrg unsigned char *sha1_out) 65201e04c3fSmrg{ 65301e04c3fSmrg struct mesa_sha1 ctx; 65401e04c3fSmrg _mesa_sha1_init(&ctx); 65501e04c3fSmrg 65601e04c3fSmrg _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask, 65701e04c3fSmrg sizeof(pipeline->subpass->view_mask)); 65801e04c3fSmrg 65901e04c3fSmrg if (layout) 66001e04c3fSmrg _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 66101e04c3fSmrg 6627ec681f3Smrg const bool rba = pipeline->base.device->robust_buffer_access; 66301e04c3fSmrg _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 66401e04c3fSmrg 6657ec681f3Smrg for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 6669f464c52Smaya if (stages[s].entrypoint) { 6679f464c52Smaya _mesa_sha1_update(&ctx, stages[s].shader_sha1, 6689f464c52Smaya sizeof(stages[s].shader_sha1)); 6699f464c52Smaya _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s)); 6709f464c52Smaya } 67101e04c3fSmrg } 67201e04c3fSmrg 67301e04c3fSmrg _mesa_sha1_final(&ctx, sha1_out); 67401e04c3fSmrg} 67501e04c3fSmrg 67601e04c3fSmrgstatic void 6777ec681f3Smrganv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline, 67801e04c3fSmrg struct anv_pipeline_layout *layout, 67901e04c3fSmrg struct anv_pipeline_stage *stage, 68001e04c3fSmrg unsigned char *sha1_out) 68101e04c3fSmrg{ 68201e04c3fSmrg struct mesa_sha1 ctx; 68301e04c3fSmrg _mesa_sha1_init(&ctx); 68401e04c3fSmrg 68501e04c3fSmrg if (layout) 68601e04c3fSmrg _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 68701e04c3fSmrg 6887ec681f3Smrg const bool rba = pipeline->base.device->robust_buffer_access; 68901e04c3fSmrg _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 69001e04c3fSmrg 6919f464c52Smaya _mesa_sha1_update(&ctx, stage->shader_sha1, 6929f464c52Smaya sizeof(stage->shader_sha1)); 6939f464c52Smaya _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs)); 69401e04c3fSmrg 69501e04c3fSmrg _mesa_sha1_final(&ctx, sha1_out); 69601e04c3fSmrg} 69701e04c3fSmrg 6987ec681f3Smrgstatic void 6997ec681f3Smrganv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline, 7007ec681f3Smrg struct anv_pipeline_layout *layout, 7017ec681f3Smrg struct anv_pipeline_stage *stage, 7027ec681f3Smrg unsigned char *sha1_out) 7037ec681f3Smrg{ 7047ec681f3Smrg struct mesa_sha1 ctx; 7057ec681f3Smrg _mesa_sha1_init(&ctx); 7067ec681f3Smrg 7077ec681f3Smrg if (layout != NULL) 7087ec681f3Smrg _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 7097ec681f3Smrg 7107ec681f3Smrg const bool rba = pipeline->base.device->robust_buffer_access; 7117ec681f3Smrg _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 7127ec681f3Smrg 7137ec681f3Smrg _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1)); 7147ec681f3Smrg _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs)); 7157ec681f3Smrg 7167ec681f3Smrg _mesa_sha1_final(&ctx, sha1_out); 7177ec681f3Smrg} 7187ec681f3Smrg 7197ec681f3Smrgstatic void 7207ec681f3Smrganv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline, 7217ec681f3Smrg struct anv_pipeline_layout *layout, 7227ec681f3Smrg struct anv_pipeline_stage *intersection, 7237ec681f3Smrg struct anv_pipeline_stage *any_hit, 7247ec681f3Smrg unsigned char *sha1_out) 7257ec681f3Smrg{ 7267ec681f3Smrg struct mesa_sha1 ctx; 7277ec681f3Smrg _mesa_sha1_init(&ctx); 7287ec681f3Smrg 7297ec681f3Smrg if (layout != NULL) 7307ec681f3Smrg _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 7317ec681f3Smrg 7327ec681f3Smrg const bool rba = pipeline->base.device->robust_buffer_access; 7337ec681f3Smrg _mesa_sha1_update(&ctx, &rba, sizeof(rba)); 7347ec681f3Smrg 7357ec681f3Smrg _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1)); 7367ec681f3Smrg _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs)); 7377ec681f3Smrg _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1)); 7387ec681f3Smrg _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs)); 7397ec681f3Smrg 7407ec681f3Smrg _mesa_sha1_final(&ctx, sha1_out); 7417ec681f3Smrg} 7427ec681f3Smrg 7439f464c52Smayastatic nir_shader * 7449f464c52Smayaanv_pipeline_stage_get_nir(struct anv_pipeline *pipeline, 7459f464c52Smaya struct anv_pipeline_cache *cache, 7469f464c52Smaya void *mem_ctx, 7479f464c52Smaya struct anv_pipeline_stage *stage) 7489f464c52Smaya{ 7499f464c52Smaya const struct brw_compiler *compiler = 7507ec681f3Smrg pipeline->device->physical->compiler; 7519f464c52Smaya const nir_shader_compiler_options *nir_options = 7529f464c52Smaya compiler->glsl_compiler_options[stage->stage].NirOptions; 7539f464c52Smaya nir_shader *nir; 7549f464c52Smaya 7559f464c52Smaya nir = anv_device_search_for_nir(pipeline->device, cache, 7569f464c52Smaya nir_options, 7579f464c52Smaya stage->shader_sha1, 7589f464c52Smaya mem_ctx); 7599f464c52Smaya if (nir) { 7609f464c52Smaya assert(nir->info.stage == stage->stage); 7619f464c52Smaya return nir; 7629f464c52Smaya } 7639f464c52Smaya 7649f464c52Smaya nir = anv_shader_compile_to_nir(pipeline->device, 7659f464c52Smaya mem_ctx, 7669f464c52Smaya stage->module, 7679f464c52Smaya stage->entrypoint, 7689f464c52Smaya stage->stage, 7699f464c52Smaya stage->spec_info); 7709f464c52Smaya if (nir) { 7719f464c52Smaya anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1); 7729f464c52Smaya return nir; 7739f464c52Smaya } 7749f464c52Smaya 7759f464c52Smaya return NULL; 7769f464c52Smaya} 7779f464c52Smaya 7787ec681f3Smrgstatic void 7797ec681f3Smrgshared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) 7807ec681f3Smrg{ 7817ec681f3Smrg assert(glsl_type_is_vector_or_scalar(type)); 7827ec681f3Smrg 7837ec681f3Smrg uint32_t comp_size = glsl_type_is_boolean(type) 7847ec681f3Smrg ? 4 : glsl_get_bit_size(type) / 8; 7857ec681f3Smrg unsigned length = glsl_get_vector_elements(type); 7867ec681f3Smrg *size = comp_size * length, 7877ec681f3Smrg *align = comp_size * (length == 3 ? 4 : length); 7887ec681f3Smrg} 7897ec681f3Smrg 79001e04c3fSmrgstatic void 79101e04c3fSmrganv_pipeline_lower_nir(struct anv_pipeline *pipeline, 79201e04c3fSmrg void *mem_ctx, 79301e04c3fSmrg struct anv_pipeline_stage *stage, 79401e04c3fSmrg struct anv_pipeline_layout *layout) 79501e04c3fSmrg{ 7967ec681f3Smrg const struct anv_physical_device *pdevice = pipeline->device->physical; 7979f464c52Smaya const struct brw_compiler *compiler = pdevice->compiler; 79801e04c3fSmrg 79901e04c3fSmrg struct brw_stage_prog_data *prog_data = &stage->prog_data.base; 80001e04c3fSmrg nir_shader *nir = stage->nir; 80101e04c3fSmrg 8029f464c52Smaya if (nir->info.stage == MESA_SHADER_FRAGMENT) { 8037ec681f3Smrg /* Check if sample shading is enabled in the shader and toggle 8047ec681f3Smrg * it on for the pipeline independent if sampleShadingEnable is set. 8057ec681f3Smrg */ 8067ec681f3Smrg nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 8077ec681f3Smrg if (nir->info.fs.uses_sample_shading) 8087ec681f3Smrg anv_pipeline_to_graphics(pipeline)->sample_shading_enable = true; 8097ec681f3Smrg 8107ec681f3Smrg NIR_PASS_V(nir, nir_lower_wpos_center, 8117ec681f3Smrg anv_pipeline_to_graphics(pipeline)->sample_shading_enable); 8127ec681f3Smrg NIR_PASS_V(nir, nir_lower_input_attachments, 8137ec681f3Smrg &(nir_input_attachment_options) { 8147ec681f3Smrg .use_fragcoord_sysval = true, 8157ec681f3Smrg .use_layer_id_sysval = true, 8167ec681f3Smrg }); 8179f464c52Smaya } 8189f464c52Smaya 81901e04c3fSmrg NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout); 82001e04c3fSmrg 8217ec681f3Smrg if (pipeline->type == ANV_PIPELINE_GRAPHICS) { 8227ec681f3Smrg NIR_PASS_V(nir, anv_nir_lower_multiview, 8237ec681f3Smrg anv_pipeline_to_graphics(pipeline)); 8247ec681f3Smrg } 82501e04c3fSmrg 8267ec681f3Smrg nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 82701e04c3fSmrg 8287ec681f3Smrg NIR_PASS_V(nir, brw_nir_lower_storage_image, compiler->devinfo); 82901e04c3fSmrg 8307ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global, 8317ec681f3Smrg nir_address_format_64bit_global); 8327ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const, 8337ec681f3Smrg nir_address_format_32bit_offset); 83401e04c3fSmrg 8357ec681f3Smrg /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ 8367ec681f3Smrg anv_nir_apply_pipeline_layout(pdevice, 8377ec681f3Smrg pipeline->device->robust_buffer_access, 8387ec681f3Smrg layout, nir, &stage->bind_map); 8397ec681f3Smrg 8407ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo, 8417ec681f3Smrg anv_nir_ubo_addr_format(pdevice, 8427ec681f3Smrg pipeline->device->robust_buffer_access)); 8437ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo, 8447ec681f3Smrg anv_nir_ssbo_addr_format(pdevice, 8457ec681f3Smrg pipeline->device->robust_buffer_access)); 8467ec681f3Smrg 8477ec681f3Smrg /* First run copy-prop to get rid of all of the vec() that address 8487ec681f3Smrg * calculations often create and then constant-fold so that, when we 8497ec681f3Smrg * get to anv_nir_lower_ubo_loads, we can detect constant offsets. 8507ec681f3Smrg */ 8517ec681f3Smrg NIR_PASS_V(nir, nir_copy_prop); 8527ec681f3Smrg NIR_PASS_V(nir, nir_opt_constant_folding); 85301e04c3fSmrg 8547ec681f3Smrg NIR_PASS_V(nir, anv_nir_lower_ubo_loads); 8557ec681f3Smrg 8567ec681f3Smrg /* We don't support non-uniform UBOs and non-uniform SSBO access is 8577ec681f3Smrg * handled naturally by falling back to A64 messages. 8587ec681f3Smrg */ 8597ec681f3Smrg NIR_PASS_V(nir, nir_lower_non_uniform_access, 8607ec681f3Smrg &(nir_lower_non_uniform_access_options) { 8617ec681f3Smrg .types = nir_lower_non_uniform_texture_access | 8627ec681f3Smrg nir_lower_non_uniform_image_access, 8637ec681f3Smrg .callback = NULL, 8647ec681f3Smrg }); 8657ec681f3Smrg 8667ec681f3Smrg anv_nir_compute_push_layout(pdevice, pipeline->device->robust_buffer_access, 8677ec681f3Smrg nir, prog_data, &stage->bind_map, mem_ctx); 8687ec681f3Smrg 8697ec681f3Smrg if (gl_shader_stage_uses_workgroup(nir->info.stage)) { 8707ec681f3Smrg if (!nir->info.shared_memory_explicit_layout) { 8717ec681f3Smrg NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, 8727ec681f3Smrg nir_var_mem_shared, shared_type_info); 87301e04c3fSmrg } 87401e04c3fSmrg 8757ec681f3Smrg NIR_PASS_V(nir, nir_lower_explicit_io, 8767ec681f3Smrg nir_var_mem_shared, nir_address_format_32bit_offset); 87701e04c3fSmrg 8787ec681f3Smrg if (nir->info.zero_initialize_shared_memory && 8797ec681f3Smrg nir->info.shared_size > 0) { 8807ec681f3Smrg /* The effective Shared Local Memory size is at least 1024 bytes and 8817ec681f3Smrg * is always rounded to a power of two, so it is OK to align the size 8827ec681f3Smrg * used by the shader to chunk_size -- which does simplify the logic. 8837ec681f3Smrg */ 8847ec681f3Smrg const unsigned chunk_size = 16; 8857ec681f3Smrg const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size); 8867ec681f3Smrg assert(shared_size <= 8877ec681f3Smrg intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size)); 88801e04c3fSmrg 8897ec681f3Smrg NIR_PASS_V(nir, nir_zero_initialize_shared_memory, 8907ec681f3Smrg shared_size, chunk_size); 8917ec681f3Smrg } 89201e04c3fSmrg } 89301e04c3fSmrg 89401e04c3fSmrg stage->nir = nir; 89501e04c3fSmrg} 89601e04c3fSmrg 89701e04c3fSmrgstatic void 89801e04c3fSmrganv_pipeline_link_vs(const struct brw_compiler *compiler, 89901e04c3fSmrg struct anv_pipeline_stage *vs_stage, 90001e04c3fSmrg struct anv_pipeline_stage *next_stage) 90101e04c3fSmrg{ 90201e04c3fSmrg if (next_stage) 9037ec681f3Smrg brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir); 90401e04c3fSmrg} 90501e04c3fSmrg 9067ec681f3Smrgstatic void 90701e04c3fSmrganv_pipeline_compile_vs(const struct brw_compiler *compiler, 90801e04c3fSmrg void *mem_ctx, 9097ec681f3Smrg struct anv_graphics_pipeline *pipeline, 91001e04c3fSmrg struct anv_pipeline_stage *vs_stage) 91101e04c3fSmrg{ 9127ec681f3Smrg /* When using Primitive Replication for multiview, each view gets its own 9137ec681f3Smrg * position slot. 9147ec681f3Smrg */ 9157ec681f3Smrg uint32_t pos_slots = pipeline->use_primitive_replication ? 9167ec681f3Smrg anv_subpass_view_count(pipeline->subpass) : 1; 9177ec681f3Smrg 91801e04c3fSmrg brw_compute_vue_map(compiler->devinfo, 91901e04c3fSmrg &vs_stage->prog_data.vs.base.vue_map, 92001e04c3fSmrg vs_stage->nir->info.outputs_written, 9217ec681f3Smrg vs_stage->nir->info.separate_shader, 9227ec681f3Smrg pos_slots); 9237ec681f3Smrg 9247ec681f3Smrg vs_stage->num_stats = 1; 9257ec681f3Smrg 9267ec681f3Smrg struct brw_compile_vs_params params = { 9277ec681f3Smrg .nir = vs_stage->nir, 9287ec681f3Smrg .key = &vs_stage->key.vs, 9297ec681f3Smrg .prog_data = &vs_stage->prog_data.vs, 9307ec681f3Smrg .stats = vs_stage->stats, 9317ec681f3Smrg .log_data = pipeline->base.device, 9327ec681f3Smrg }; 93301e04c3fSmrg 9347ec681f3Smrg vs_stage->code = brw_compile_vs(compiler, mem_ctx, ¶ms); 93501e04c3fSmrg} 93601e04c3fSmrg 93701e04c3fSmrgstatic void 93801e04c3fSmrgmerge_tess_info(struct shader_info *tes_info, 93901e04c3fSmrg const struct shader_info *tcs_info) 94001e04c3fSmrg{ 94101e04c3fSmrg /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says: 94201e04c3fSmrg * 94301e04c3fSmrg * "PointMode. Controls generation of points rather than triangles 94401e04c3fSmrg * or lines. This functionality defaults to disabled, and is 94501e04c3fSmrg * enabled if either shader stage includes the execution mode. 94601e04c3fSmrg * 94701e04c3fSmrg * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw, 94801e04c3fSmrg * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd, 94901e04c3fSmrg * and OutputVertices, it says: 95001e04c3fSmrg * 95101e04c3fSmrg * "One mode must be set in at least one of the tessellation 95201e04c3fSmrg * shader stages." 95301e04c3fSmrg * 95401e04c3fSmrg * So, the fields can be set in either the TCS or TES, but they must 95501e04c3fSmrg * agree if set in both. Our backend looks at TES, so bitwise-or in 95601e04c3fSmrg * the values from the TCS. 95701e04c3fSmrg */ 95801e04c3fSmrg assert(tcs_info->tess.tcs_vertices_out == 0 || 95901e04c3fSmrg tes_info->tess.tcs_vertices_out == 0 || 96001e04c3fSmrg tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out); 96101e04c3fSmrg tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out; 96201e04c3fSmrg 96301e04c3fSmrg assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || 96401e04c3fSmrg tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED || 96501e04c3fSmrg tcs_info->tess.spacing == tes_info->tess.spacing); 96601e04c3fSmrg tes_info->tess.spacing |= tcs_info->tess.spacing; 96701e04c3fSmrg 96801e04c3fSmrg assert(tcs_info->tess.primitive_mode == 0 || 96901e04c3fSmrg tes_info->tess.primitive_mode == 0 || 97001e04c3fSmrg tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode); 97101e04c3fSmrg tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode; 97201e04c3fSmrg tes_info->tess.ccw |= tcs_info->tess.ccw; 97301e04c3fSmrg tes_info->tess.point_mode |= tcs_info->tess.point_mode; 97401e04c3fSmrg} 97501e04c3fSmrg 97601e04c3fSmrgstatic void 97701e04c3fSmrganv_pipeline_link_tcs(const struct brw_compiler *compiler, 97801e04c3fSmrg struct anv_pipeline_stage *tcs_stage, 97901e04c3fSmrg struct anv_pipeline_stage *tes_stage) 98001e04c3fSmrg{ 98101e04c3fSmrg assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL); 98201e04c3fSmrg 9837ec681f3Smrg brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir); 98401e04c3fSmrg 98501e04c3fSmrg nir_lower_patch_vertices(tes_stage->nir, 98601e04c3fSmrg tcs_stage->nir->info.tess.tcs_vertices_out, 98701e04c3fSmrg NULL); 98801e04c3fSmrg 98901e04c3fSmrg /* Copy TCS info into the TES info */ 99001e04c3fSmrg merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info); 99101e04c3fSmrg 99201e04c3fSmrg /* Whacking the key after cache lookup is a bit sketchy, but all of 99301e04c3fSmrg * this comes from the SPIR-V, which is part of the hash used for the 99401e04c3fSmrg * pipeline cache. So it should be safe. 99501e04c3fSmrg */ 99601e04c3fSmrg tcs_stage->key.tcs.tes_primitive_mode = 99701e04c3fSmrg tes_stage->nir->info.tess.primitive_mode; 99801e04c3fSmrg tcs_stage->key.tcs.quads_workaround = 9997ec681f3Smrg compiler->devinfo->ver < 9 && 100001e04c3fSmrg tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ && 100101e04c3fSmrg tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL; 100201e04c3fSmrg} 100301e04c3fSmrg 10047ec681f3Smrgstatic void 100501e04c3fSmrganv_pipeline_compile_tcs(const struct brw_compiler *compiler, 100601e04c3fSmrg void *mem_ctx, 10079f464c52Smaya struct anv_device *device, 100801e04c3fSmrg struct anv_pipeline_stage *tcs_stage, 100901e04c3fSmrg struct anv_pipeline_stage *prev_stage) 101001e04c3fSmrg{ 101101e04c3fSmrg tcs_stage->key.tcs.outputs_written = 101201e04c3fSmrg tcs_stage->nir->info.outputs_written; 101301e04c3fSmrg tcs_stage->key.tcs.patch_outputs_written = 101401e04c3fSmrg tcs_stage->nir->info.patch_outputs_written; 101501e04c3fSmrg 10167ec681f3Smrg tcs_stage->num_stats = 1; 10177ec681f3Smrg tcs_stage->code = brw_compile_tcs(compiler, device, mem_ctx, 10187ec681f3Smrg &tcs_stage->key.tcs, 10197ec681f3Smrg &tcs_stage->prog_data.tcs, 10207ec681f3Smrg tcs_stage->nir, -1, 10217ec681f3Smrg tcs_stage->stats, NULL); 102201e04c3fSmrg} 102301e04c3fSmrg 102401e04c3fSmrgstatic void 102501e04c3fSmrganv_pipeline_link_tes(const struct brw_compiler *compiler, 102601e04c3fSmrg struct anv_pipeline_stage *tes_stage, 102701e04c3fSmrg struct anv_pipeline_stage *next_stage) 102801e04c3fSmrg{ 102901e04c3fSmrg if (next_stage) 10307ec681f3Smrg brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir); 103101e04c3fSmrg} 103201e04c3fSmrg 10337ec681f3Smrgstatic void 103401e04c3fSmrganv_pipeline_compile_tes(const struct brw_compiler *compiler, 103501e04c3fSmrg void *mem_ctx, 10369f464c52Smaya struct anv_device *device, 103701e04c3fSmrg struct anv_pipeline_stage *tes_stage, 103801e04c3fSmrg struct anv_pipeline_stage *tcs_stage) 103901e04c3fSmrg{ 104001e04c3fSmrg tes_stage->key.tes.inputs_read = 104101e04c3fSmrg tcs_stage->nir->info.outputs_written; 104201e04c3fSmrg tes_stage->key.tes.patch_inputs_read = 104301e04c3fSmrg tcs_stage->nir->info.patch_outputs_written; 104401e04c3fSmrg 10457ec681f3Smrg tes_stage->num_stats = 1; 10467ec681f3Smrg tes_stage->code = brw_compile_tes(compiler, device, mem_ctx, 10477ec681f3Smrg &tes_stage->key.tes, 10487ec681f3Smrg &tcs_stage->prog_data.tcs.base.vue_map, 10497ec681f3Smrg &tes_stage->prog_data.tes, 10507ec681f3Smrg tes_stage->nir, -1, 10517ec681f3Smrg tes_stage->stats, NULL); 105201e04c3fSmrg} 105301e04c3fSmrg 105401e04c3fSmrgstatic void 105501e04c3fSmrganv_pipeline_link_gs(const struct brw_compiler *compiler, 105601e04c3fSmrg struct anv_pipeline_stage *gs_stage, 105701e04c3fSmrg struct anv_pipeline_stage *next_stage) 105801e04c3fSmrg{ 105901e04c3fSmrg if (next_stage) 10607ec681f3Smrg brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir); 106101e04c3fSmrg} 106201e04c3fSmrg 10637ec681f3Smrgstatic void 106401e04c3fSmrganv_pipeline_compile_gs(const struct brw_compiler *compiler, 106501e04c3fSmrg void *mem_ctx, 10669f464c52Smaya struct anv_device *device, 106701e04c3fSmrg struct anv_pipeline_stage *gs_stage, 106801e04c3fSmrg struct anv_pipeline_stage *prev_stage) 106901e04c3fSmrg{ 107001e04c3fSmrg brw_compute_vue_map(compiler->devinfo, 107101e04c3fSmrg &gs_stage->prog_data.gs.base.vue_map, 107201e04c3fSmrg gs_stage->nir->info.outputs_written, 10737ec681f3Smrg gs_stage->nir->info.separate_shader, 1); 10747ec681f3Smrg 10757ec681f3Smrg gs_stage->num_stats = 1; 10767ec681f3Smrg gs_stage->code = brw_compile_gs(compiler, device, mem_ctx, 10777ec681f3Smrg &gs_stage->key.gs, 10787ec681f3Smrg &gs_stage->prog_data.gs, 10797ec681f3Smrg gs_stage->nir, -1, 10807ec681f3Smrg gs_stage->stats, NULL); 108101e04c3fSmrg} 108201e04c3fSmrg 108301e04c3fSmrgstatic void 108401e04c3fSmrganv_pipeline_link_fs(const struct brw_compiler *compiler, 108501e04c3fSmrg struct anv_pipeline_stage *stage) 108601e04c3fSmrg{ 10877ec681f3Smrg unsigned num_rt_bindings; 10887ec681f3Smrg struct anv_pipeline_binding rt_bindings[MAX_RTS]; 10897ec681f3Smrg if (stage->key.wm.nr_color_regions > 0) { 10907ec681f3Smrg assert(stage->key.wm.nr_color_regions <= MAX_RTS); 10917ec681f3Smrg for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) { 10927ec681f3Smrg if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) { 10937ec681f3Smrg rt_bindings[rt] = (struct anv_pipeline_binding) { 10947ec681f3Smrg .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, 10957ec681f3Smrg .index = rt, 10967ec681f3Smrg }; 10977ec681f3Smrg } else { 10987ec681f3Smrg /* Setup a null render target */ 10997ec681f3Smrg rt_bindings[rt] = (struct anv_pipeline_binding) { 11007ec681f3Smrg .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, 11017ec681f3Smrg .index = UINT32_MAX, 11027ec681f3Smrg }; 11037ec681f3Smrg } 11049f464c52Smaya } 11057ec681f3Smrg num_rt_bindings = stage->key.wm.nr_color_regions; 11067ec681f3Smrg } else { 11077ec681f3Smrg /* Setup a null render target */ 11087ec681f3Smrg rt_bindings[0] = (struct anv_pipeline_binding) { 11097ec681f3Smrg .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, 11107ec681f3Smrg .index = UINT32_MAX, 11117ec681f3Smrg }; 11127ec681f3Smrg num_rt_bindings = 1; 111301e04c3fSmrg } 111401e04c3fSmrg 11157ec681f3Smrg assert(num_rt_bindings <= MAX_RTS); 11167ec681f3Smrg assert(stage->bind_map.surface_count == 0); 11177ec681f3Smrg typed_memcpy(stage->bind_map.surface_to_descriptor, 11187ec681f3Smrg rt_bindings, num_rt_bindings); 11197ec681f3Smrg stage->bind_map.surface_count += num_rt_bindings; 112001e04c3fSmrg 11217ec681f3Smrg /* Now that we've set up the color attachments, we can go through and 11227ec681f3Smrg * eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the 11237ec681f3Smrg * hopes that dead code can clean them up in this and any earlier shader 11247ec681f3Smrg * stages. 11257ec681f3Smrg */ 11267ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir); 112701e04c3fSmrg bool deleted_output = false; 11287ec681f3Smrg nir_foreach_shader_out_variable_safe(var, stage->nir) { 11297ec681f3Smrg /* TODO: We don't delete depth/stencil writes. We probably could if the 11307ec681f3Smrg * subpass doesn't have a depth/stencil attachment. 11317ec681f3Smrg */ 113201e04c3fSmrg if (var->data.location < FRAG_RESULT_DATA0) 113301e04c3fSmrg continue; 113401e04c3fSmrg 113501e04c3fSmrg const unsigned rt = var->data.location - FRAG_RESULT_DATA0; 11369f464c52Smaya 11377ec681f3Smrg /* If this is the RT at location 0 and we have alpha to coverage 11387ec681f3Smrg * enabled we still need that write because it will affect the coverage 11397ec681f3Smrg * mask even if it's never written to a color target. 11407ec681f3Smrg */ 11417ec681f3Smrg if (rt == 0 && stage->key.wm.alpha_to_coverage) 11427ec681f3Smrg continue; 11437ec681f3Smrg 11447ec681f3Smrg const unsigned array_len = 11457ec681f3Smrg glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; 11467ec681f3Smrg assert(rt + array_len <= MAX_RTS); 11477ec681f3Smrg 11487ec681f3Smrg if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid & 11497ec681f3Smrg BITFIELD_RANGE(rt, array_len))) { 115001e04c3fSmrg deleted_output = true; 11519f464c52Smaya var->data.mode = nir_var_function_temp; 115201e04c3fSmrg exec_node_remove(&var->node); 115301e04c3fSmrg exec_list_push_tail(&impl->locals, &var->node); 115401e04c3fSmrg } 115501e04c3fSmrg } 115601e04c3fSmrg 115701e04c3fSmrg if (deleted_output) 115801e04c3fSmrg nir_fixup_deref_modes(stage->nir); 115901e04c3fSmrg 11607ec681f3Smrg /* Initially the valid outputs value is based off the renderpass color 11617ec681f3Smrg * attachments (see populate_wm_prog_key()), now that we've potentially 11627ec681f3Smrg * deleted variables that map to unused attachments, we need to update the 11637ec681f3Smrg * valid outputs for the backend compiler based on what output variables 11647ec681f3Smrg * are actually used. */ 11657ec681f3Smrg stage->key.wm.color_outputs_valid = 0; 11667ec681f3Smrg nir_foreach_shader_out_variable_safe(var, stage->nir) { 11677ec681f3Smrg if (var->data.location < FRAG_RESULT_DATA0) 11687ec681f3Smrg continue; 11697ec681f3Smrg 11707ec681f3Smrg const unsigned rt = var->data.location - FRAG_RESULT_DATA0; 11717ec681f3Smrg const unsigned array_len = 11727ec681f3Smrg glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; 11737ec681f3Smrg assert(rt + array_len <= MAX_RTS); 11747ec681f3Smrg 11757ec681f3Smrg stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len); 117601e04c3fSmrg } 117701e04c3fSmrg 11787ec681f3Smrg /* We stored the number of subpass color attachments in nr_color_regions 11797ec681f3Smrg * when calculating the key for caching. Now that we've computed the bind 11807ec681f3Smrg * map, we can reduce this to the actual max before we go into the back-end 11817ec681f3Smrg * compiler. 118201e04c3fSmrg */ 11837ec681f3Smrg stage->key.wm.nr_color_regions = 11847ec681f3Smrg util_last_bit(stage->key.wm.color_outputs_valid); 118501e04c3fSmrg} 118601e04c3fSmrg 11877ec681f3Smrgstatic void 118801e04c3fSmrganv_pipeline_compile_fs(const struct brw_compiler *compiler, 118901e04c3fSmrg void *mem_ctx, 11909f464c52Smaya struct anv_device *device, 119101e04c3fSmrg struct anv_pipeline_stage *fs_stage, 119201e04c3fSmrg struct anv_pipeline_stage *prev_stage) 119301e04c3fSmrg{ 119401e04c3fSmrg /* TODO: we could set this to 0 based on the information in nir_shader, but 119501e04c3fSmrg * we need this before we call spirv_to_nir. 119601e04c3fSmrg */ 119701e04c3fSmrg assert(prev_stage); 119801e04c3fSmrg fs_stage->key.wm.input_slots_valid = 119901e04c3fSmrg prev_stage->prog_data.vue.vue_map.slots_valid; 120001e04c3fSmrg 12017ec681f3Smrg struct brw_compile_fs_params params = { 12027ec681f3Smrg .nir = fs_stage->nir, 12037ec681f3Smrg .key = &fs_stage->key.wm, 12047ec681f3Smrg .prog_data = &fs_stage->prog_data.wm, 12057ec681f3Smrg 12067ec681f3Smrg .allow_spilling = true, 12077ec681f3Smrg .stats = fs_stage->stats, 12087ec681f3Smrg .log_data = device, 12097ec681f3Smrg }; 12107ec681f3Smrg 12117ec681f3Smrg fs_stage->code = brw_compile_fs(compiler, mem_ctx, ¶ms); 12127ec681f3Smrg 12137ec681f3Smrg fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 + 12147ec681f3Smrg (uint32_t)fs_stage->prog_data.wm.dispatch_16 + 12157ec681f3Smrg (uint32_t)fs_stage->prog_data.wm.dispatch_32; 121601e04c3fSmrg 12177ec681f3Smrg if (fs_stage->key.wm.color_outputs_valid == 0 && 121801e04c3fSmrg !fs_stage->prog_data.wm.has_side_effects && 12197ec681f3Smrg !fs_stage->prog_data.wm.uses_omask && 12207ec681f3Smrg !fs_stage->key.wm.alpha_to_coverage && 122101e04c3fSmrg !fs_stage->prog_data.wm.uses_kill && 122201e04c3fSmrg fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF && 122301e04c3fSmrg !fs_stage->prog_data.wm.computed_stencil) { 122401e04c3fSmrg /* This fragment shader has no outputs and no side effects. Go ahead 122501e04c3fSmrg * and return the code pointer so we don't accidentally think the 122601e04c3fSmrg * compile failed but zero out prog_data which will set program_size to 122701e04c3fSmrg * zero and disable the stage. 122801e04c3fSmrg */ 122901e04c3fSmrg memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data)); 123001e04c3fSmrg } 123101e04c3fSmrg} 123201e04c3fSmrg 12337ec681f3Smrgstatic void 12347ec681f3Smrganv_pipeline_add_executable(struct anv_pipeline *pipeline, 12357ec681f3Smrg struct anv_pipeline_stage *stage, 12367ec681f3Smrg struct brw_compile_stats *stats, 12377ec681f3Smrg uint32_t code_offset) 123801e04c3fSmrg{ 12397ec681f3Smrg char *nir = NULL; 12407ec681f3Smrg if (stage->nir && 12417ec681f3Smrg (pipeline->flags & 12427ec681f3Smrg VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) { 12437ec681f3Smrg nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx); 12447ec681f3Smrg } 12459f464c52Smaya 12467ec681f3Smrg char *disasm = NULL; 12477ec681f3Smrg if (stage->code && 12487ec681f3Smrg (pipeline->flags & 12497ec681f3Smrg VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) { 12507ec681f3Smrg char *stream_data = NULL; 12517ec681f3Smrg size_t stream_size = 0; 12527ec681f3Smrg FILE *stream = open_memstream(&stream_data, &stream_size); 12537ec681f3Smrg 12547ec681f3Smrg uint32_t push_size = 0; 12557ec681f3Smrg for (unsigned i = 0; i < 4; i++) 12567ec681f3Smrg push_size += stage->bind_map.push_ranges[i].length; 12577ec681f3Smrg if (push_size > 0) { 12587ec681f3Smrg fprintf(stream, "Push constant ranges:\n"); 12597ec681f3Smrg for (unsigned i = 0; i < 4; i++) { 12607ec681f3Smrg if (stage->bind_map.push_ranges[i].length == 0) 12617ec681f3Smrg continue; 12627ec681f3Smrg 12637ec681f3Smrg fprintf(stream, " RANGE%d (%dB): ", i, 12647ec681f3Smrg stage->bind_map.push_ranges[i].length * 32); 12657ec681f3Smrg 12667ec681f3Smrg switch (stage->bind_map.push_ranges[i].set) { 12677ec681f3Smrg case ANV_DESCRIPTOR_SET_NULL: 12687ec681f3Smrg fprintf(stream, "NULL"); 12697ec681f3Smrg break; 12707ec681f3Smrg 12717ec681f3Smrg case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: 12727ec681f3Smrg fprintf(stream, "Vulkan push constants and API params"); 12737ec681f3Smrg break; 12747ec681f3Smrg 12757ec681f3Smrg case ANV_DESCRIPTOR_SET_DESCRIPTORS: 12767ec681f3Smrg fprintf(stream, "Descriptor buffer for set %d (start=%dB)", 12777ec681f3Smrg stage->bind_map.push_ranges[i].index, 12787ec681f3Smrg stage->bind_map.push_ranges[i].start * 32); 12797ec681f3Smrg break; 12807ec681f3Smrg 12817ec681f3Smrg case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS: 12827ec681f3Smrg unreachable("gl_NumWorkgroups is never pushed"); 12837ec681f3Smrg 12847ec681f3Smrg case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS: 12857ec681f3Smrg fprintf(stream, "Inline shader constant data (start=%dB)", 12867ec681f3Smrg stage->bind_map.push_ranges[i].start * 32); 12877ec681f3Smrg break; 12887ec681f3Smrg 12897ec681f3Smrg case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS: 12907ec681f3Smrg unreachable("Color attachments can't be pushed"); 12917ec681f3Smrg 12927ec681f3Smrg default: 12937ec681f3Smrg fprintf(stream, "UBO (set=%d binding=%d start=%dB)", 12947ec681f3Smrg stage->bind_map.push_ranges[i].set, 12957ec681f3Smrg stage->bind_map.push_ranges[i].index, 12967ec681f3Smrg stage->bind_map.push_ranges[i].start * 32); 12977ec681f3Smrg break; 12987ec681f3Smrg } 12997ec681f3Smrg fprintf(stream, "\n"); 13007ec681f3Smrg } 13017ec681f3Smrg fprintf(stream, "\n"); 13027ec681f3Smrg } 130301e04c3fSmrg 13047ec681f3Smrg /* Creating this is far cheaper than it looks. It's perfectly fine to 13057ec681f3Smrg * do it for every binary. 13067ec681f3Smrg */ 13077ec681f3Smrg intel_disassemble(&pipeline->device->info, 13087ec681f3Smrg stage->code, code_offset, stream); 130901e04c3fSmrg 13107ec681f3Smrg fclose(stream); 131101e04c3fSmrg 13127ec681f3Smrg /* Copy it to a ralloc'd thing */ 13137ec681f3Smrg disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1); 13147ec681f3Smrg memcpy(disasm, stream_data, stream_size); 13157ec681f3Smrg disasm[stream_size] = 0; 131601e04c3fSmrg 13177ec681f3Smrg free(stream_data); 13187ec681f3Smrg } 13199f464c52Smaya 13207ec681f3Smrg const struct anv_pipeline_executable exe = { 13217ec681f3Smrg .stage = stage->stage, 13227ec681f3Smrg .stats = *stats, 13237ec681f3Smrg .nir = nir, 13247ec681f3Smrg .disasm = disasm, 13257ec681f3Smrg }; 13267ec681f3Smrg util_dynarray_append(&pipeline->executables, 13277ec681f3Smrg struct anv_pipeline_executable, exe); 13287ec681f3Smrg} 13297ec681f3Smrg 13307ec681f3Smrgstatic void 13317ec681f3Smrganv_pipeline_add_executables(struct anv_pipeline *pipeline, 13327ec681f3Smrg struct anv_pipeline_stage *stage, 13337ec681f3Smrg struct anv_shader_bin *bin) 13347ec681f3Smrg{ 13357ec681f3Smrg if (stage->stage == MESA_SHADER_FRAGMENT) { 13367ec681f3Smrg /* We pull the prog data and stats out of the anv_shader_bin because 13377ec681f3Smrg * the anv_pipeline_stage may not be fully populated if we successfully 13387ec681f3Smrg * looked up the shader in a cache. 13397ec681f3Smrg */ 13407ec681f3Smrg const struct brw_wm_prog_data *wm_prog_data = 13417ec681f3Smrg (const struct brw_wm_prog_data *)bin->prog_data; 13427ec681f3Smrg struct brw_compile_stats *stats = bin->stats; 13437ec681f3Smrg 13447ec681f3Smrg if (wm_prog_data->dispatch_8) { 13457ec681f3Smrg anv_pipeline_add_executable(pipeline, stage, stats++, 0); 13467ec681f3Smrg } 13477ec681f3Smrg 13487ec681f3Smrg if (wm_prog_data->dispatch_16) { 13497ec681f3Smrg anv_pipeline_add_executable(pipeline, stage, stats++, 13507ec681f3Smrg wm_prog_data->prog_offset_16); 13517ec681f3Smrg } 13527ec681f3Smrg 13537ec681f3Smrg if (wm_prog_data->dispatch_32) { 13547ec681f3Smrg anv_pipeline_add_executable(pipeline, stage, stats++, 13557ec681f3Smrg wm_prog_data->prog_offset_32); 13567ec681f3Smrg } 13577ec681f3Smrg } else { 13587ec681f3Smrg anv_pipeline_add_executable(pipeline, stage, bin->stats, 0); 13597ec681f3Smrg } 13607ec681f3Smrg} 13617ec681f3Smrg 13627ec681f3Smrgstatic enum brw_subgroup_size_type 13637ec681f3Smrganv_subgroup_size_type(gl_shader_stage stage, 13647ec681f3Smrg VkPipelineShaderStageCreateFlags flags, 13657ec681f3Smrg const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info) 13667ec681f3Smrg{ 13677ec681f3Smrg enum brw_subgroup_size_type subgroup_size_type; 13687ec681f3Smrg 13697ec681f3Smrg if (rss_info) { 13707ec681f3Smrg assert(stage == MESA_SHADER_COMPUTE); 13717ec681f3Smrg /* These enum values are expressly chosen to be equal to the subgroup 13727ec681f3Smrg * size that they require. 13737ec681f3Smrg */ 13747ec681f3Smrg assert(rss_info->requiredSubgroupSize == 8 || 13757ec681f3Smrg rss_info->requiredSubgroupSize == 16 || 13767ec681f3Smrg rss_info->requiredSubgroupSize == 32); 13777ec681f3Smrg subgroup_size_type = rss_info->requiredSubgroupSize; 13787ec681f3Smrg } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) { 13797ec681f3Smrg subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING; 13807ec681f3Smrg } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) { 13817ec681f3Smrg assert(stage == MESA_SHADER_COMPUTE); 13827ec681f3Smrg /* If the client expressly requests full subgroups and they don't 13837ec681f3Smrg * specify a subgroup size neither allow varying subgroups, we need to 13847ec681f3Smrg * pick one. So we specify the API value of 32. Performance will 13857ec681f3Smrg * likely be terrible in this case but there's nothing we can do about 13867ec681f3Smrg * that. The client should have chosen a size. 13877ec681f3Smrg */ 13887ec681f3Smrg subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32; 13897ec681f3Smrg } else { 13907ec681f3Smrg subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT; 13917ec681f3Smrg } 13927ec681f3Smrg 13937ec681f3Smrg return subgroup_size_type; 13947ec681f3Smrg} 13957ec681f3Smrg 13967ec681f3Smrgstatic void 13977ec681f3Smrganv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline) 13987ec681f3Smrg{ 13997ec681f3Smrg /* TODO: Cache this pipeline-wide information. */ 14007ec681f3Smrg 14017ec681f3Smrg if (anv_pipeline_is_primitive(pipeline)) { 14027ec681f3Smrg /* Primitive replication depends on information from all the shaders. 14037ec681f3Smrg * Recover this bit from the fact that we have more than one position slot 14047ec681f3Smrg * in the vertex shader when using it. 14057ec681f3Smrg */ 14067ec681f3Smrg assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT); 14077ec681f3Smrg int pos_slots = 0; 14087ec681f3Smrg const struct brw_vue_prog_data *vue_prog_data = 14097ec681f3Smrg (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data; 14107ec681f3Smrg const struct brw_vue_map *vue_map = &vue_prog_data->vue_map; 14117ec681f3Smrg for (int i = 0; i < vue_map->num_slots; i++) { 14127ec681f3Smrg if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS) 14137ec681f3Smrg pos_slots++; 14147ec681f3Smrg } 14157ec681f3Smrg pipeline->use_primitive_replication = pos_slots > 1; 14167ec681f3Smrg } 14177ec681f3Smrg} 14187ec681f3Smrg 14197ec681f3Smrgstatic VkResult 14207ec681f3Smrganv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, 14217ec681f3Smrg struct anv_pipeline_cache *cache, 14227ec681f3Smrg const VkGraphicsPipelineCreateInfo *info) 14237ec681f3Smrg{ 14247ec681f3Smrg VkPipelineCreationFeedbackEXT pipeline_feedback = { 14257ec681f3Smrg .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, 14267ec681f3Smrg }; 14277ec681f3Smrg int64_t pipeline_start = os_time_get_nano(); 14287ec681f3Smrg 14297ec681f3Smrg const struct brw_compiler *compiler = pipeline->base.device->physical->compiler; 14307ec681f3Smrg struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {}; 14317ec681f3Smrg 14327ec681f3Smrg /* Information on which states are considered dynamic. */ 14337ec681f3Smrg const VkPipelineDynamicStateCreateInfo *dyn_info = 14347ec681f3Smrg info->pDynamicState; 14357ec681f3Smrg uint32_t dynamic_states = 0; 14367ec681f3Smrg if (dyn_info) { 14377ec681f3Smrg for (unsigned i = 0; i < dyn_info->dynamicStateCount; i++) 14387ec681f3Smrg dynamic_states |= 14397ec681f3Smrg anv_cmd_dirty_bit_for_vk_dynamic_state(dyn_info->pDynamicStates[i]); 14407ec681f3Smrg } 14417ec681f3Smrg 14427ec681f3Smrg VkResult result; 14437ec681f3Smrg for (uint32_t i = 0; i < info->stageCount; i++) { 14447ec681f3Smrg const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i]; 14457ec681f3Smrg gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); 14467ec681f3Smrg 14477ec681f3Smrg int64_t stage_start = os_time_get_nano(); 14487ec681f3Smrg 14497ec681f3Smrg stages[stage].stage = stage; 14507ec681f3Smrg stages[stage].module = vk_shader_module_from_handle(sinfo->module); 14517ec681f3Smrg stages[stage].entrypoint = sinfo->pName; 145201e04c3fSmrg stages[stage].spec_info = sinfo->pSpecializationInfo; 14539f464c52Smaya anv_pipeline_hash_shader(stages[stage].module, 14549f464c52Smaya stages[stage].entrypoint, 14559f464c52Smaya stage, 14569f464c52Smaya stages[stage].spec_info, 14579f464c52Smaya stages[stage].shader_sha1); 145801e04c3fSmrg 14597ec681f3Smrg enum brw_subgroup_size_type subgroup_size_type = 14607ec681f3Smrg anv_subgroup_size_type(stage, sinfo->flags, NULL); 14617ec681f3Smrg 14627ec681f3Smrg const struct intel_device_info *devinfo = &pipeline->base.device->info; 146301e04c3fSmrg switch (stage) { 146401e04c3fSmrg case MESA_SHADER_VERTEX: 14657ec681f3Smrg populate_vs_prog_key(devinfo, subgroup_size_type, 14667ec681f3Smrg pipeline->base.device->robust_buffer_access, 14677ec681f3Smrg &stages[stage].key.vs); 146801e04c3fSmrg break; 146901e04c3fSmrg case MESA_SHADER_TESS_CTRL: 14707ec681f3Smrg populate_tcs_prog_key(devinfo, subgroup_size_type, 14717ec681f3Smrg pipeline->base.device->robust_buffer_access, 147201e04c3fSmrg info->pTessellationState->patchControlPoints, 147301e04c3fSmrg &stages[stage].key.tcs); 147401e04c3fSmrg break; 147501e04c3fSmrg case MESA_SHADER_TESS_EVAL: 14767ec681f3Smrg populate_tes_prog_key(devinfo, subgroup_size_type, 14777ec681f3Smrg pipeline->base.device->robust_buffer_access, 14787ec681f3Smrg &stages[stage].key.tes); 147901e04c3fSmrg break; 148001e04c3fSmrg case MESA_SHADER_GEOMETRY: 14817ec681f3Smrg populate_gs_prog_key(devinfo, subgroup_size_type, 14827ec681f3Smrg pipeline->base.device->robust_buffer_access, 14837ec681f3Smrg &stages[stage].key.gs); 148401e04c3fSmrg break; 14857ec681f3Smrg case MESA_SHADER_FRAGMENT: { 14867ec681f3Smrg const bool raster_enabled = 14877ec681f3Smrg !info->pRasterizationState->rasterizerDiscardEnable || 14887ec681f3Smrg dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE; 14897ec681f3Smrg populate_wm_prog_key(pipeline, subgroup_size_type, 14907ec681f3Smrg pipeline->base.device->robust_buffer_access, 14917ec681f3Smrg pipeline->subpass, 14927ec681f3Smrg raster_enabled ? info->pMultisampleState : NULL, 14937ec681f3Smrg vk_find_struct_const(info->pNext, 14947ec681f3Smrg PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR), 149501e04c3fSmrg &stages[stage].key.wm); 149601e04c3fSmrg break; 14977ec681f3Smrg } 149801e04c3fSmrg default: 149901e04c3fSmrg unreachable("Invalid graphics shader stage"); 150001e04c3fSmrg } 15019f464c52Smaya 15029f464c52Smaya stages[stage].feedback.duration += os_time_get_nano() - stage_start; 15039f464c52Smaya stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT; 150401e04c3fSmrg } 150501e04c3fSmrg 150601e04c3fSmrg assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT); 150701e04c3fSmrg 150801e04c3fSmrg ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 150901e04c3fSmrg 151001e04c3fSmrg unsigned char sha1[20]; 151101e04c3fSmrg anv_pipeline_hash_graphics(pipeline, layout, stages, sha1); 151201e04c3fSmrg 15137ec681f3Smrg for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 151401e04c3fSmrg if (!stages[s].entrypoint) 151501e04c3fSmrg continue; 151601e04c3fSmrg 151701e04c3fSmrg stages[s].cache_key.stage = s; 151801e04c3fSmrg memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1)); 15197ec681f3Smrg } 152001e04c3fSmrg 15217ec681f3Smrg const bool skip_cache_lookup = 15227ec681f3Smrg (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR); 15239f464c52Smaya 15247ec681f3Smrg if (!skip_cache_lookup) { 15257ec681f3Smrg unsigned found = 0; 15267ec681f3Smrg unsigned cache_hits = 0; 15277ec681f3Smrg for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 15287ec681f3Smrg if (!stages[s].entrypoint) 15297ec681f3Smrg continue; 153001e04c3fSmrg 15317ec681f3Smrg int64_t stage_start = os_time_get_nano(); 15327ec681f3Smrg 15337ec681f3Smrg bool cache_hit; 15347ec681f3Smrg struct anv_shader_bin *bin = 15357ec681f3Smrg anv_device_search_for_kernel(pipeline->base.device, cache, 15367ec681f3Smrg &stages[s].cache_key, 15377ec681f3Smrg sizeof(stages[s].cache_key), &cache_hit); 15387ec681f3Smrg if (bin) { 15397ec681f3Smrg found++; 15407ec681f3Smrg pipeline->shaders[s] = bin; 15417ec681f3Smrg } 15427ec681f3Smrg 15437ec681f3Smrg if (cache_hit) { 15447ec681f3Smrg cache_hits++; 15457ec681f3Smrg stages[s].feedback.flags |= 15467ec681f3Smrg VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; 15477ec681f3Smrg } 15487ec681f3Smrg stages[s].feedback.duration += os_time_get_nano() - stage_start; 15499f464c52Smaya } 15507ec681f3Smrg 15517ec681f3Smrg if (found == __builtin_popcount(pipeline->active_stages)) { 15527ec681f3Smrg if (cache_hits == found) { 15537ec681f3Smrg pipeline_feedback.flags |= 15547ec681f3Smrg VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; 15557ec681f3Smrg } 15567ec681f3Smrg /* We found all our shaders in the cache. We're done. */ 15577ec681f3Smrg for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 15587ec681f3Smrg if (!stages[s].entrypoint) 15597ec681f3Smrg continue; 15607ec681f3Smrg 15617ec681f3Smrg anv_pipeline_add_executables(&pipeline->base, &stages[s], 15627ec681f3Smrg pipeline->shaders[s]); 15637ec681f3Smrg } 15647ec681f3Smrg anv_pipeline_init_from_cached_graphics(pipeline); 15657ec681f3Smrg goto done; 15667ec681f3Smrg } else if (found > 0) { 15677ec681f3Smrg /* We found some but not all of our shaders. This shouldn't happen 15687ec681f3Smrg * most of the time but it can if we have a partially populated 15697ec681f3Smrg * pipeline cache. 15707ec681f3Smrg */ 15717ec681f3Smrg assert(found < __builtin_popcount(pipeline->active_stages)); 15727ec681f3Smrg 15737ec681f3Smrg vk_perf(VK_LOG_OBJS(&cache->base), 15747ec681f3Smrg "Found a partial pipeline in the cache. This is " 15757ec681f3Smrg "most likely caused by an incomplete pipeline cache " 15767ec681f3Smrg "import or export"); 15777ec681f3Smrg 15787ec681f3Smrg /* We're going to have to recompile anyway, so just throw away our 15797ec681f3Smrg * references to the shaders in the cache. We'll get them out of the 15807ec681f3Smrg * cache again as part of the compilation process. 15817ec681f3Smrg */ 15827ec681f3Smrg for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 15837ec681f3Smrg stages[s].feedback.flags = 0; 15847ec681f3Smrg if (pipeline->shaders[s]) { 15857ec681f3Smrg anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]); 15867ec681f3Smrg pipeline->shaders[s] = NULL; 15877ec681f3Smrg } 158801e04c3fSmrg } 158901e04c3fSmrg } 159001e04c3fSmrg } 159101e04c3fSmrg 15927ec681f3Smrg if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) 15937ec681f3Smrg return VK_PIPELINE_COMPILE_REQUIRED_EXT; 15947ec681f3Smrg 159501e04c3fSmrg void *pipeline_ctx = ralloc_context(NULL); 159601e04c3fSmrg 15977ec681f3Smrg for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 159801e04c3fSmrg if (!stages[s].entrypoint) 159901e04c3fSmrg continue; 160001e04c3fSmrg 16019f464c52Smaya int64_t stage_start = os_time_get_nano(); 16029f464c52Smaya 160301e04c3fSmrg assert(stages[s].stage == s); 160401e04c3fSmrg assert(pipeline->shaders[s] == NULL); 160501e04c3fSmrg 160601e04c3fSmrg stages[s].bind_map = (struct anv_pipeline_bind_map) { 160701e04c3fSmrg .surface_to_descriptor = stages[s].surface_to_descriptor, 160801e04c3fSmrg .sampler_to_descriptor = stages[s].sampler_to_descriptor 160901e04c3fSmrg }; 161001e04c3fSmrg 16117ec681f3Smrg stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, 16129f464c52Smaya pipeline_ctx, 16139f464c52Smaya &stages[s]); 161401e04c3fSmrg if (stages[s].nir == NULL) { 16157ec681f3Smrg result = vk_error(pipeline, VK_ERROR_UNKNOWN); 161601e04c3fSmrg goto fail; 161701e04c3fSmrg } 16189f464c52Smaya 16197ec681f3Smrg /* This is rather ugly. 16207ec681f3Smrg * 16217ec681f3Smrg * Any variable annotated as interpolated by sample essentially disables 16227ec681f3Smrg * coarse pixel shading. Unfortunately the CTS tests exercising this set 16237ec681f3Smrg * the varying value in the previous stage using a constant. Our NIR 16247ec681f3Smrg * infrastructure is clever enough to lookup variables across stages and 16257ec681f3Smrg * constant fold, removing the variable. So in order to comply with CTS 16267ec681f3Smrg * we have check variables here. 16277ec681f3Smrg */ 16287ec681f3Smrg if (s == MESA_SHADER_FRAGMENT) { 16297ec681f3Smrg nir_foreach_variable_in_list(var, &stages[s].nir->variables) { 16307ec681f3Smrg if (var->data.sample) { 16317ec681f3Smrg stages[s].key.wm.coarse_pixel = false; 16327ec681f3Smrg break; 16337ec681f3Smrg } 16347ec681f3Smrg } 16357ec681f3Smrg } 16367ec681f3Smrg 16379f464c52Smaya stages[s].feedback.duration += os_time_get_nano() - stage_start; 163801e04c3fSmrg } 163901e04c3fSmrg 164001e04c3fSmrg /* Walk backwards to link */ 164101e04c3fSmrg struct anv_pipeline_stage *next_stage = NULL; 16427ec681f3Smrg for (int s = ARRAY_SIZE(pipeline->shaders) - 1; s >= 0; s--) { 164301e04c3fSmrg if (!stages[s].entrypoint) 164401e04c3fSmrg continue; 164501e04c3fSmrg 164601e04c3fSmrg switch (s) { 164701e04c3fSmrg case MESA_SHADER_VERTEX: 164801e04c3fSmrg anv_pipeline_link_vs(compiler, &stages[s], next_stage); 164901e04c3fSmrg break; 165001e04c3fSmrg case MESA_SHADER_TESS_CTRL: 165101e04c3fSmrg anv_pipeline_link_tcs(compiler, &stages[s], next_stage); 165201e04c3fSmrg break; 165301e04c3fSmrg case MESA_SHADER_TESS_EVAL: 165401e04c3fSmrg anv_pipeline_link_tes(compiler, &stages[s], next_stage); 165501e04c3fSmrg break; 165601e04c3fSmrg case MESA_SHADER_GEOMETRY: 165701e04c3fSmrg anv_pipeline_link_gs(compiler, &stages[s], next_stage); 165801e04c3fSmrg break; 165901e04c3fSmrg case MESA_SHADER_FRAGMENT: 166001e04c3fSmrg anv_pipeline_link_fs(compiler, &stages[s]); 166101e04c3fSmrg break; 166201e04c3fSmrg default: 166301e04c3fSmrg unreachable("Invalid graphics shader stage"); 166401e04c3fSmrg } 166501e04c3fSmrg 166601e04c3fSmrg next_stage = &stages[s]; 166701e04c3fSmrg } 166801e04c3fSmrg 16697ec681f3Smrg if (pipeline->base.device->info.ver >= 12 && 16707ec681f3Smrg pipeline->subpass->view_mask != 0) { 16717ec681f3Smrg /* For some pipelines HW Primitive Replication can be used instead of 16727ec681f3Smrg * instancing to implement Multiview. This depend on how viewIndex is 16737ec681f3Smrg * used in all the active shaders, so this check can't be done per 16747ec681f3Smrg * individual shaders. 16757ec681f3Smrg */ 16767ec681f3Smrg nir_shader *shaders[MESA_SHADER_STAGES] = {}; 16777ec681f3Smrg for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) 16787ec681f3Smrg shaders[s] = stages[s].nir; 16797ec681f3Smrg 16807ec681f3Smrg pipeline->use_primitive_replication = 16817ec681f3Smrg anv_check_for_primitive_replication(shaders, pipeline); 16827ec681f3Smrg } else { 16837ec681f3Smrg pipeline->use_primitive_replication = false; 16847ec681f3Smrg } 16857ec681f3Smrg 168601e04c3fSmrg struct anv_pipeline_stage *prev_stage = NULL; 16877ec681f3Smrg for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 16887ec681f3Smrg if (!stages[s].entrypoint) 16897ec681f3Smrg continue; 16907ec681f3Smrg 16917ec681f3Smrg int64_t stage_start = os_time_get_nano(); 16927ec681f3Smrg 16937ec681f3Smrg void *stage_ctx = ralloc_context(NULL); 16947ec681f3Smrg 16957ec681f3Smrg anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout); 16967ec681f3Smrg 16977ec681f3Smrg if (prev_stage && compiler->glsl_compiler_options[s].NirOptions->unify_interfaces) { 16987ec681f3Smrg prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read & 16997ec681f3Smrg ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); 17007ec681f3Smrg stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written & 17017ec681f3Smrg ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); 17027ec681f3Smrg prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read; 17037ec681f3Smrg stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written; 17047ec681f3Smrg } 17057ec681f3Smrg 17067ec681f3Smrg ralloc_free(stage_ctx); 17077ec681f3Smrg 17087ec681f3Smrg stages[s].feedback.duration += os_time_get_nano() - stage_start; 17097ec681f3Smrg 17107ec681f3Smrg prev_stage = &stages[s]; 17117ec681f3Smrg } 17127ec681f3Smrg 17137ec681f3Smrg prev_stage = NULL; 171401e04c3fSmrg for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { 171501e04c3fSmrg if (!stages[s].entrypoint) 171601e04c3fSmrg continue; 171701e04c3fSmrg 17189f464c52Smaya int64_t stage_start = os_time_get_nano(); 17199f464c52Smaya 172001e04c3fSmrg void *stage_ctx = ralloc_context(NULL); 172101e04c3fSmrg 17229f464c52Smaya nir_xfb_info *xfb_info = NULL; 17239f464c52Smaya if (s == MESA_SHADER_VERTEX || 17249f464c52Smaya s == MESA_SHADER_TESS_EVAL || 17259f464c52Smaya s == MESA_SHADER_GEOMETRY) 17269f464c52Smaya xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx); 17279f464c52Smaya 172801e04c3fSmrg switch (s) { 172901e04c3fSmrg case MESA_SHADER_VERTEX: 17307ec681f3Smrg anv_pipeline_compile_vs(compiler, stage_ctx, pipeline, 17317ec681f3Smrg &stages[s]); 173201e04c3fSmrg break; 173301e04c3fSmrg case MESA_SHADER_TESS_CTRL: 17347ec681f3Smrg anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device, 17357ec681f3Smrg &stages[s], prev_stage); 173601e04c3fSmrg break; 173701e04c3fSmrg case MESA_SHADER_TESS_EVAL: 17387ec681f3Smrg anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device, 17397ec681f3Smrg &stages[s], prev_stage); 174001e04c3fSmrg break; 174101e04c3fSmrg case MESA_SHADER_GEOMETRY: 17427ec681f3Smrg anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device, 17437ec681f3Smrg &stages[s], prev_stage); 174401e04c3fSmrg break; 174501e04c3fSmrg case MESA_SHADER_FRAGMENT: 17467ec681f3Smrg anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device, 17477ec681f3Smrg &stages[s], prev_stage); 174801e04c3fSmrg break; 174901e04c3fSmrg default: 175001e04c3fSmrg unreachable("Invalid graphics shader stage"); 175101e04c3fSmrg } 17527ec681f3Smrg if (stages[s].code == NULL) { 175301e04c3fSmrg ralloc_free(stage_ctx); 17547ec681f3Smrg result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY); 175501e04c3fSmrg goto fail; 175601e04c3fSmrg } 175701e04c3fSmrg 17587ec681f3Smrg anv_nir_validate_push_layout(&stages[s].prog_data.base, 17597ec681f3Smrg &stages[s].bind_map); 17607ec681f3Smrg 176101e04c3fSmrg struct anv_shader_bin *bin = 17627ec681f3Smrg anv_device_upload_kernel(pipeline->base.device, cache, s, 176301e04c3fSmrg &stages[s].cache_key, 176401e04c3fSmrg sizeof(stages[s].cache_key), 17657ec681f3Smrg stages[s].code, 17667ec681f3Smrg stages[s].prog_data.base.program_size, 176701e04c3fSmrg &stages[s].prog_data.base, 176801e04c3fSmrg brw_prog_data_size(s), 17697ec681f3Smrg stages[s].stats, stages[s].num_stats, 17709f464c52Smaya xfb_info, &stages[s].bind_map); 177101e04c3fSmrg if (!bin) { 177201e04c3fSmrg ralloc_free(stage_ctx); 17737ec681f3Smrg result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 177401e04c3fSmrg goto fail; 177501e04c3fSmrg } 177601e04c3fSmrg 17777ec681f3Smrg anv_pipeline_add_executables(&pipeline->base, &stages[s], bin); 17787ec681f3Smrg 177901e04c3fSmrg pipeline->shaders[s] = bin; 178001e04c3fSmrg ralloc_free(stage_ctx); 178101e04c3fSmrg 17829f464c52Smaya stages[s].feedback.duration += os_time_get_nano() - stage_start; 17839f464c52Smaya 178401e04c3fSmrg prev_stage = &stages[s]; 178501e04c3fSmrg } 178601e04c3fSmrg 178701e04c3fSmrg ralloc_free(pipeline_ctx); 178801e04c3fSmrg 178901e04c3fSmrgdone: 179001e04c3fSmrg 179101e04c3fSmrg if (pipeline->shaders[MESA_SHADER_FRAGMENT] && 179201e04c3fSmrg pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) { 179301e04c3fSmrg /* This can happen if we decided to implicitly disable the fragment 179401e04c3fSmrg * shader. See anv_pipeline_compile_fs(). 179501e04c3fSmrg */ 17967ec681f3Smrg anv_shader_bin_unref(pipeline->base.device, 179701e04c3fSmrg pipeline->shaders[MESA_SHADER_FRAGMENT]); 179801e04c3fSmrg pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL; 179901e04c3fSmrg pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT; 180001e04c3fSmrg } 180101e04c3fSmrg 18029f464c52Smaya pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 18039f464c52Smaya 18049f464c52Smaya const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback = 18059f464c52Smaya vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT); 18069f464c52Smaya if (create_feedback) { 18079f464c52Smaya *create_feedback->pPipelineCreationFeedback = pipeline_feedback; 18089f464c52Smaya 18099f464c52Smaya assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount); 18109f464c52Smaya for (uint32_t i = 0; i < info->stageCount; i++) { 18119f464c52Smaya gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage); 18129f464c52Smaya create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback; 18139f464c52Smaya } 18149f464c52Smaya } 18159f464c52Smaya 181601e04c3fSmrg return VK_SUCCESS; 181701e04c3fSmrg 181801e04c3fSmrgfail: 181901e04c3fSmrg ralloc_free(pipeline_ctx); 182001e04c3fSmrg 18217ec681f3Smrg for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { 182201e04c3fSmrg if (pipeline->shaders[s]) 18237ec681f3Smrg anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]); 182401e04c3fSmrg } 182501e04c3fSmrg 182601e04c3fSmrg return result; 182701e04c3fSmrg} 182801e04c3fSmrg 182901e04c3fSmrgVkResult 18307ec681f3Smrganv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, 183101e04c3fSmrg struct anv_pipeline_cache *cache, 183201e04c3fSmrg const VkComputePipelineCreateInfo *info, 18337ec681f3Smrg const struct vk_shader_module *module, 183401e04c3fSmrg const char *entrypoint, 183501e04c3fSmrg const VkSpecializationInfo *spec_info) 183601e04c3fSmrg{ 18379f464c52Smaya VkPipelineCreationFeedbackEXT pipeline_feedback = { 18389f464c52Smaya .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, 18399f464c52Smaya }; 18409f464c52Smaya int64_t pipeline_start = os_time_get_nano(); 18419f464c52Smaya 18427ec681f3Smrg const struct brw_compiler *compiler = pipeline->base.device->physical->compiler; 184301e04c3fSmrg 184401e04c3fSmrg struct anv_pipeline_stage stage = { 184501e04c3fSmrg .stage = MESA_SHADER_COMPUTE, 184601e04c3fSmrg .module = module, 184701e04c3fSmrg .entrypoint = entrypoint, 184801e04c3fSmrg .spec_info = spec_info, 184901e04c3fSmrg .cache_key = { 185001e04c3fSmrg .stage = MESA_SHADER_COMPUTE, 18519f464c52Smaya }, 18529f464c52Smaya .feedback = { 18539f464c52Smaya .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, 18549f464c52Smaya }, 185501e04c3fSmrg }; 18569f464c52Smaya anv_pipeline_hash_shader(stage.module, 18579f464c52Smaya stage.entrypoint, 18589f464c52Smaya MESA_SHADER_COMPUTE, 18599f464c52Smaya stage.spec_info, 18609f464c52Smaya stage.shader_sha1); 186101e04c3fSmrg 186201e04c3fSmrg struct anv_shader_bin *bin = NULL; 186301e04c3fSmrg 18647ec681f3Smrg const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info = 18657ec681f3Smrg vk_find_struct_const(info->stage.pNext, 18667ec681f3Smrg PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT); 18677ec681f3Smrg 18687ec681f3Smrg const enum brw_subgroup_size_type subgroup_size_type = 18697ec681f3Smrg anv_subgroup_size_type(MESA_SHADER_COMPUTE, info->stage.flags, rss_info); 18707ec681f3Smrg 18717ec681f3Smrg populate_cs_prog_key(&pipeline->base.device->info, subgroup_size_type, 18727ec681f3Smrg pipeline->base.device->robust_buffer_access, 18737ec681f3Smrg &stage.key.cs); 187401e04c3fSmrg 187501e04c3fSmrg ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 187601e04c3fSmrg 18777ec681f3Smrg const bool skip_cache_lookup = 18787ec681f3Smrg (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR); 18797ec681f3Smrg 188001e04c3fSmrg anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1); 188101e04c3fSmrg 18827ec681f3Smrg bool cache_hit = false; 18837ec681f3Smrg if (!skip_cache_lookup) { 18847ec681f3Smrg bin = anv_device_search_for_kernel(pipeline->base.device, cache, 18857ec681f3Smrg &stage.cache_key, 18867ec681f3Smrg sizeof(stage.cache_key), 18877ec681f3Smrg &cache_hit); 18887ec681f3Smrg } 18897ec681f3Smrg 18907ec681f3Smrg if (bin == NULL && 18917ec681f3Smrg (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)) 18927ec681f3Smrg return VK_PIPELINE_COMPILE_REQUIRED_EXT; 18937ec681f3Smrg 18947ec681f3Smrg void *mem_ctx = ralloc_context(NULL); 189501e04c3fSmrg if (bin == NULL) { 18969f464c52Smaya int64_t stage_start = os_time_get_nano(); 18979f464c52Smaya 189801e04c3fSmrg stage.bind_map = (struct anv_pipeline_bind_map) { 189901e04c3fSmrg .surface_to_descriptor = stage.surface_to_descriptor, 190001e04c3fSmrg .sampler_to_descriptor = stage.sampler_to_descriptor 190101e04c3fSmrg }; 190201e04c3fSmrg 19039f464c52Smaya /* Set up a binding for the gl_NumWorkGroups */ 19049f464c52Smaya stage.bind_map.surface_count = 1; 19059f464c52Smaya stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) { 19069f464c52Smaya .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS, 19079f464c52Smaya }; 19089f464c52Smaya 19097ec681f3Smrg stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage); 191001e04c3fSmrg if (stage.nir == NULL) { 191101e04c3fSmrg ralloc_free(mem_ctx); 19127ec681f3Smrg return vk_error(pipeline, VK_ERROR_UNKNOWN); 191301e04c3fSmrg } 191401e04c3fSmrg 19157ec681f3Smrg NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id); 191601e04c3fSmrg 19177ec681f3Smrg anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout); 191801e04c3fSmrg 19197ec681f3Smrg NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics); 19207ec681f3Smrg 19217ec681f3Smrg stage.num_stats = 1; 19227ec681f3Smrg 19237ec681f3Smrg struct brw_compile_cs_params params = { 19247ec681f3Smrg .nir = stage.nir, 19257ec681f3Smrg .key = &stage.key.cs, 19267ec681f3Smrg .prog_data = &stage.prog_data.cs, 19277ec681f3Smrg .stats = stage.stats, 19287ec681f3Smrg .log_data = pipeline->base.device, 19297ec681f3Smrg }; 19307ec681f3Smrg 19317ec681f3Smrg stage.code = brw_compile_cs(compiler, mem_ctx, ¶ms); 19327ec681f3Smrg if (stage.code == NULL) { 193301e04c3fSmrg ralloc_free(mem_ctx); 19347ec681f3Smrg return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 19357ec681f3Smrg } 19367ec681f3Smrg 19377ec681f3Smrg anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map); 19387ec681f3Smrg 19397ec681f3Smrg if (!stage.prog_data.cs.uses_num_work_groups) { 19407ec681f3Smrg assert(stage.bind_map.surface_to_descriptor[0].set == 19417ec681f3Smrg ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS); 19427ec681f3Smrg stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL; 194301e04c3fSmrg } 194401e04c3fSmrg 194501e04c3fSmrg const unsigned code_size = stage.prog_data.base.program_size; 19467ec681f3Smrg bin = anv_device_upload_kernel(pipeline->base.device, cache, 19477ec681f3Smrg MESA_SHADER_COMPUTE, 194801e04c3fSmrg &stage.cache_key, sizeof(stage.cache_key), 19497ec681f3Smrg stage.code, code_size, 195001e04c3fSmrg &stage.prog_data.base, 195101e04c3fSmrg sizeof(stage.prog_data.cs), 19527ec681f3Smrg stage.stats, stage.num_stats, 19539f464c52Smaya NULL, &stage.bind_map); 195401e04c3fSmrg if (!bin) { 195501e04c3fSmrg ralloc_free(mem_ctx); 19567ec681f3Smrg return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 195701e04c3fSmrg } 195801e04c3fSmrg 19599f464c52Smaya stage.feedback.duration = os_time_get_nano() - stage_start; 19609f464c52Smaya } 19619f464c52Smaya 19627ec681f3Smrg anv_pipeline_add_executables(&pipeline->base, &stage, bin); 19637ec681f3Smrg 19647ec681f3Smrg ralloc_free(mem_ctx); 19657ec681f3Smrg 19669f464c52Smaya if (cache_hit) { 19679f464c52Smaya stage.feedback.flags |= 19689f464c52Smaya VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; 19699f464c52Smaya pipeline_feedback.flags |= 19709f464c52Smaya VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; 19719f464c52Smaya } 19729f464c52Smaya pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 19739f464c52Smaya 19749f464c52Smaya const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback = 19759f464c52Smaya vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT); 19769f464c52Smaya if (create_feedback) { 19779f464c52Smaya *create_feedback->pPipelineCreationFeedback = pipeline_feedback; 19789f464c52Smaya 19799f464c52Smaya assert(create_feedback->pipelineStageCreationFeedbackCount == 1); 19809f464c52Smaya create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback; 198101e04c3fSmrg } 198201e04c3fSmrg 19837ec681f3Smrg pipeline->cs = bin; 198401e04c3fSmrg 198501e04c3fSmrg return VK_SUCCESS; 198601e04c3fSmrg} 198701e04c3fSmrg 198801e04c3fSmrg/** 198901e04c3fSmrg * Copy pipeline state not marked as dynamic. 199001e04c3fSmrg * Dynamic state is pipeline state which hasn't been provided at pipeline 199101e04c3fSmrg * creation time, but is dynamically provided afterwards using various 199201e04c3fSmrg * vkCmdSet* functions. 199301e04c3fSmrg * 199401e04c3fSmrg * The set of state considered "non_dynamic" is determined by the pieces of 199501e04c3fSmrg * state that have their corresponding VkDynamicState enums omitted from 199601e04c3fSmrg * VkPipelineDynamicStateCreateInfo::pDynamicStates. 199701e04c3fSmrg * 199801e04c3fSmrg * @param[out] pipeline Destination non_dynamic state. 199901e04c3fSmrg * @param[in] pCreateInfo Source of non_dynamic state to be copied. 200001e04c3fSmrg */ 200101e04c3fSmrgstatic void 20027ec681f3Smrgcopy_non_dynamic_state(struct anv_graphics_pipeline *pipeline, 200301e04c3fSmrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 200401e04c3fSmrg{ 200501e04c3fSmrg anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; 200601e04c3fSmrg struct anv_subpass *subpass = pipeline->subpass; 200701e04c3fSmrg 200801e04c3fSmrg pipeline->dynamic_state = default_dynamic_state; 200901e04c3fSmrg 20107ec681f3Smrg states &= ~pipeline->dynamic_states; 201101e04c3fSmrg 201201e04c3fSmrg struct anv_dynamic_state *dynamic = &pipeline->dynamic_state; 201301e04c3fSmrg 20147ec681f3Smrg bool raster_discard = 20157ec681f3Smrg pCreateInfo->pRasterizationState->rasterizerDiscardEnable && 20167ec681f3Smrg !(pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE); 20177ec681f3Smrg 201801e04c3fSmrg /* Section 9.2 of the Vulkan 1.0.15 spec says: 201901e04c3fSmrg * 202001e04c3fSmrg * pViewportState is [...] NULL if the pipeline 202101e04c3fSmrg * has rasterization disabled. 202201e04c3fSmrg */ 20237ec681f3Smrg if (!raster_discard) { 202401e04c3fSmrg assert(pCreateInfo->pViewportState); 202501e04c3fSmrg 202601e04c3fSmrg dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; 20277ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) { 202801e04c3fSmrg typed_memcpy(dynamic->viewport.viewports, 202901e04c3fSmrg pCreateInfo->pViewportState->pViewports, 203001e04c3fSmrg pCreateInfo->pViewportState->viewportCount); 203101e04c3fSmrg } 203201e04c3fSmrg 203301e04c3fSmrg dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; 20347ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) { 203501e04c3fSmrg typed_memcpy(dynamic->scissor.scissors, 203601e04c3fSmrg pCreateInfo->pViewportState->pScissors, 203701e04c3fSmrg pCreateInfo->pViewportState->scissorCount); 203801e04c3fSmrg } 203901e04c3fSmrg } 204001e04c3fSmrg 20417ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) { 204201e04c3fSmrg assert(pCreateInfo->pRasterizationState); 204301e04c3fSmrg dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth; 204401e04c3fSmrg } 204501e04c3fSmrg 20467ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) { 204701e04c3fSmrg assert(pCreateInfo->pRasterizationState); 204801e04c3fSmrg dynamic->depth_bias.bias = 204901e04c3fSmrg pCreateInfo->pRasterizationState->depthBiasConstantFactor; 205001e04c3fSmrg dynamic->depth_bias.clamp = 205101e04c3fSmrg pCreateInfo->pRasterizationState->depthBiasClamp; 205201e04c3fSmrg dynamic->depth_bias.slope = 205301e04c3fSmrg pCreateInfo->pRasterizationState->depthBiasSlopeFactor; 205401e04c3fSmrg } 205501e04c3fSmrg 20567ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_CULL_MODE) { 20577ec681f3Smrg assert(pCreateInfo->pRasterizationState); 20587ec681f3Smrg dynamic->cull_mode = 20597ec681f3Smrg pCreateInfo->pRasterizationState->cullMode; 20607ec681f3Smrg } 20617ec681f3Smrg 20627ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE) { 20637ec681f3Smrg assert(pCreateInfo->pRasterizationState); 20647ec681f3Smrg dynamic->front_face = 20657ec681f3Smrg pCreateInfo->pRasterizationState->frontFace; 20667ec681f3Smrg } 20677ec681f3Smrg 20687ec681f3Smrg if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) && 20697ec681f3Smrg (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { 20707ec681f3Smrg assert(pCreateInfo->pInputAssemblyState); 20717ec681f3Smrg dynamic->primitive_topology = pCreateInfo->pInputAssemblyState->topology; 20727ec681f3Smrg } 20737ec681f3Smrg 20747ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) { 20757ec681f3Smrg assert(pCreateInfo->pRasterizationState); 20767ec681f3Smrg dynamic->raster_discard = 20777ec681f3Smrg pCreateInfo->pRasterizationState->rasterizerDiscardEnable; 20787ec681f3Smrg } 20797ec681f3Smrg 20807ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE) { 20817ec681f3Smrg assert(pCreateInfo->pRasterizationState); 20827ec681f3Smrg dynamic->depth_bias_enable = 20837ec681f3Smrg pCreateInfo->pRasterizationState->depthBiasEnable; 20847ec681f3Smrg } 20857ec681f3Smrg 20867ec681f3Smrg if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE) && 20877ec681f3Smrg (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { 20887ec681f3Smrg assert(pCreateInfo->pInputAssemblyState); 20897ec681f3Smrg dynamic->primitive_restart_enable = 20907ec681f3Smrg pCreateInfo->pInputAssemblyState->primitiveRestartEnable; 20917ec681f3Smrg } 20927ec681f3Smrg 209301e04c3fSmrg /* Section 9.2 of the Vulkan 1.0.15 spec says: 209401e04c3fSmrg * 209501e04c3fSmrg * pColorBlendState is [...] NULL if the pipeline has rasterization 209601e04c3fSmrg * disabled or if the subpass of the render pass the pipeline is 209701e04c3fSmrg * created against does not use any color attachments. 209801e04c3fSmrg */ 209901e04c3fSmrg bool uses_color_att = false; 210001e04c3fSmrg for (unsigned i = 0; i < subpass->color_count; ++i) { 210101e04c3fSmrg if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) { 210201e04c3fSmrg uses_color_att = true; 210301e04c3fSmrg break; 210401e04c3fSmrg } 210501e04c3fSmrg } 210601e04c3fSmrg 21077ec681f3Smrg if (uses_color_att && !raster_discard) { 210801e04c3fSmrg assert(pCreateInfo->pColorBlendState); 210901e04c3fSmrg 21107ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) 211101e04c3fSmrg typed_memcpy(dynamic->blend_constants, 211201e04c3fSmrg pCreateInfo->pColorBlendState->blendConstants, 4); 211301e04c3fSmrg } 211401e04c3fSmrg 211501e04c3fSmrg /* If there is no depthstencil attachment, then don't read 211601e04c3fSmrg * pDepthStencilState. The Vulkan spec states that pDepthStencilState may 211701e04c3fSmrg * be NULL in this case. Even if pDepthStencilState is non-NULL, there is 211801e04c3fSmrg * no need to override the depthstencil defaults in 211901e04c3fSmrg * anv_pipeline::dynamic_state when there is no depthstencil attachment. 212001e04c3fSmrg * 212101e04c3fSmrg * Section 9.2 of the Vulkan 1.0.15 spec says: 212201e04c3fSmrg * 212301e04c3fSmrg * pDepthStencilState is [...] NULL if the pipeline has rasterization 212401e04c3fSmrg * disabled or if the subpass of the render pass the pipeline is created 212501e04c3fSmrg * against does not use a depth/stencil attachment. 212601e04c3fSmrg */ 21277ec681f3Smrg if (!raster_discard && subpass->depth_stencil_attachment) { 212801e04c3fSmrg assert(pCreateInfo->pDepthStencilState); 212901e04c3fSmrg 21307ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) { 213101e04c3fSmrg dynamic->depth_bounds.min = 213201e04c3fSmrg pCreateInfo->pDepthStencilState->minDepthBounds; 213301e04c3fSmrg dynamic->depth_bounds.max = 213401e04c3fSmrg pCreateInfo->pDepthStencilState->maxDepthBounds; 213501e04c3fSmrg } 213601e04c3fSmrg 21377ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) { 213801e04c3fSmrg dynamic->stencil_compare_mask.front = 213901e04c3fSmrg pCreateInfo->pDepthStencilState->front.compareMask; 214001e04c3fSmrg dynamic->stencil_compare_mask.back = 214101e04c3fSmrg pCreateInfo->pDepthStencilState->back.compareMask; 214201e04c3fSmrg } 214301e04c3fSmrg 21447ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) { 214501e04c3fSmrg dynamic->stencil_write_mask.front = 214601e04c3fSmrg pCreateInfo->pDepthStencilState->front.writeMask; 214701e04c3fSmrg dynamic->stencil_write_mask.back = 214801e04c3fSmrg pCreateInfo->pDepthStencilState->back.writeMask; 214901e04c3fSmrg } 215001e04c3fSmrg 21517ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) { 215201e04c3fSmrg dynamic->stencil_reference.front = 215301e04c3fSmrg pCreateInfo->pDepthStencilState->front.reference; 215401e04c3fSmrg dynamic->stencil_reference.back = 215501e04c3fSmrg pCreateInfo->pDepthStencilState->back.reference; 215601e04c3fSmrg } 21577ec681f3Smrg 21587ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) { 21597ec681f3Smrg dynamic->depth_test_enable = 21607ec681f3Smrg pCreateInfo->pDepthStencilState->depthTestEnable; 21617ec681f3Smrg } 21627ec681f3Smrg 21637ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) { 21647ec681f3Smrg dynamic->depth_write_enable = 21657ec681f3Smrg pCreateInfo->pDepthStencilState->depthWriteEnable; 21667ec681f3Smrg } 21677ec681f3Smrg 21687ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) { 21697ec681f3Smrg dynamic->depth_compare_op = 21707ec681f3Smrg pCreateInfo->pDepthStencilState->depthCompareOp; 21717ec681f3Smrg } 21727ec681f3Smrg 21737ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) { 21747ec681f3Smrg dynamic->depth_bounds_test_enable = 21757ec681f3Smrg pCreateInfo->pDepthStencilState->depthBoundsTestEnable; 21767ec681f3Smrg } 21777ec681f3Smrg 21787ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) { 21797ec681f3Smrg dynamic->stencil_test_enable = 21807ec681f3Smrg pCreateInfo->pDepthStencilState->stencilTestEnable; 21817ec681f3Smrg } 21827ec681f3Smrg 21837ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP) { 21847ec681f3Smrg const VkPipelineDepthStencilStateCreateInfo *info = 21857ec681f3Smrg pCreateInfo->pDepthStencilState; 21867ec681f3Smrg memcpy(&dynamic->stencil_op.front, &info->front, 21877ec681f3Smrg sizeof(dynamic->stencil_op.front)); 21887ec681f3Smrg memcpy(&dynamic->stencil_op.back, &info->back, 21897ec681f3Smrg sizeof(dynamic->stencil_op.back)); 21907ec681f3Smrg } 21917ec681f3Smrg } 21927ec681f3Smrg 21937ec681f3Smrg const VkPipelineRasterizationLineStateCreateInfoEXT *line_state = 21947ec681f3Smrg vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, 21957ec681f3Smrg PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); 21967ec681f3Smrg if (!raster_discard && line_state && line_state->stippledLineEnable) { 21977ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) { 21987ec681f3Smrg dynamic->line_stipple.factor = line_state->lineStippleFactor; 21997ec681f3Smrg dynamic->line_stipple.pattern = line_state->lineStipplePattern; 22007ec681f3Smrg } 22017ec681f3Smrg } 22027ec681f3Smrg 22037ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *ms_info = 22047ec681f3Smrg pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? NULL : 22057ec681f3Smrg pCreateInfo->pMultisampleState; 22067ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) { 22077ec681f3Smrg const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ? 22087ec681f3Smrg vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL; 22097ec681f3Smrg 22107ec681f3Smrg if (sl_info) { 22117ec681f3Smrg dynamic->sample_locations.samples = 22127ec681f3Smrg sl_info->sampleLocationsInfo.sampleLocationsCount; 22137ec681f3Smrg const VkSampleLocationEXT *positions = 22147ec681f3Smrg sl_info->sampleLocationsInfo.pSampleLocations; 22157ec681f3Smrg for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) { 22167ec681f3Smrg dynamic->sample_locations.locations[i].x = positions[i].x; 22177ec681f3Smrg dynamic->sample_locations.locations[i].y = positions[i].y; 22187ec681f3Smrg } 22197ec681f3Smrg } 22207ec681f3Smrg } 22217ec681f3Smrg /* Ensure we always have valid values for sample_locations. */ 22227ec681f3Smrg if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations && 22237ec681f3Smrg dynamic->sample_locations.samples == 0) { 22247ec681f3Smrg dynamic->sample_locations.samples = 22257ec681f3Smrg ms_info ? ms_info->rasterizationSamples : 1; 22267ec681f3Smrg const struct intel_sample_position *positions = 22277ec681f3Smrg intel_get_sample_positions(dynamic->sample_locations.samples); 22287ec681f3Smrg for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) { 22297ec681f3Smrg dynamic->sample_locations.locations[i].x = positions[i].x; 22307ec681f3Smrg dynamic->sample_locations.locations[i].y = positions[i].y; 22317ec681f3Smrg } 22327ec681f3Smrg } 22337ec681f3Smrg 22347ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) { 22357ec681f3Smrg if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable && 22367ec681f3Smrg uses_color_att) { 22377ec681f3Smrg assert(pCreateInfo->pColorBlendState); 22387ec681f3Smrg const VkPipelineColorWriteCreateInfoEXT *color_write_info = 22397ec681f3Smrg vk_find_struct_const(pCreateInfo->pColorBlendState->pNext, 22407ec681f3Smrg PIPELINE_COLOR_WRITE_CREATE_INFO_EXT); 22417ec681f3Smrg 22427ec681f3Smrg if (color_write_info) { 22437ec681f3Smrg dynamic->color_writes = 0; 22447ec681f3Smrg for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) { 22457ec681f3Smrg dynamic->color_writes |= 22467ec681f3Smrg color_write_info->pColorWriteEnables[i] ? (1u << i) : 0; 22477ec681f3Smrg } 22487ec681f3Smrg } 22497ec681f3Smrg } 22507ec681f3Smrg } 22517ec681f3Smrg 22527ec681f3Smrg const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_state = 22537ec681f3Smrg vk_find_struct_const(pCreateInfo->pNext, 22547ec681f3Smrg PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR); 22557ec681f3Smrg if (fsr_state) { 22567ec681f3Smrg if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) 22577ec681f3Smrg dynamic->fragment_shading_rate = fsr_state->fragmentSize; 225801e04c3fSmrg } 225901e04c3fSmrg 226001e04c3fSmrg pipeline->dynamic_state_mask = states; 22617ec681f3Smrg 22627ec681f3Smrg /* Mark states that can either be dynamic or fully baked into the pipeline. 22637ec681f3Smrg */ 22647ec681f3Smrg pipeline->static_state_mask = states & 22657ec681f3Smrg (ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS | 22667ec681f3Smrg ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | 22677ec681f3Smrg ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE | 22687ec681f3Smrg ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | 22697ec681f3Smrg ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP | 22707ec681f3Smrg ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY); 227101e04c3fSmrg} 227201e04c3fSmrg 227301e04c3fSmrgstatic void 227401e04c3fSmrganv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) 227501e04c3fSmrg{ 227601e04c3fSmrg#ifdef DEBUG 227701e04c3fSmrg struct anv_render_pass *renderpass = NULL; 227801e04c3fSmrg struct anv_subpass *subpass = NULL; 227901e04c3fSmrg 228001e04c3fSmrg /* Assert that all required members of VkGraphicsPipelineCreateInfo are 228101e04c3fSmrg * present. See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines. 228201e04c3fSmrg */ 228301e04c3fSmrg assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); 228401e04c3fSmrg 228501e04c3fSmrg renderpass = anv_render_pass_from_handle(info->renderPass); 228601e04c3fSmrg assert(renderpass); 228701e04c3fSmrg 228801e04c3fSmrg assert(info->subpass < renderpass->subpass_count); 228901e04c3fSmrg subpass = &renderpass->subpasses[info->subpass]; 229001e04c3fSmrg 229101e04c3fSmrg assert(info->stageCount >= 1); 229201e04c3fSmrg assert(info->pRasterizationState); 229301e04c3fSmrg if (!info->pRasterizationState->rasterizerDiscardEnable) { 229401e04c3fSmrg assert(info->pViewportState); 229501e04c3fSmrg assert(info->pMultisampleState); 229601e04c3fSmrg 229701e04c3fSmrg if (subpass && subpass->depth_stencil_attachment) 229801e04c3fSmrg assert(info->pDepthStencilState); 229901e04c3fSmrg 230001e04c3fSmrg if (subpass && subpass->color_count > 0) { 230101e04c3fSmrg bool all_color_unused = true; 230201e04c3fSmrg for (int i = 0; i < subpass->color_count; i++) { 230301e04c3fSmrg if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) 230401e04c3fSmrg all_color_unused = false; 230501e04c3fSmrg } 230601e04c3fSmrg /* pColorBlendState is ignored if the pipeline has rasterization 230701e04c3fSmrg * disabled or if the subpass of the render pass the pipeline is 230801e04c3fSmrg * created against does not use any color attachments. 230901e04c3fSmrg */ 231001e04c3fSmrg assert(info->pColorBlendState || all_color_unused); 231101e04c3fSmrg } 231201e04c3fSmrg } 231301e04c3fSmrg 231401e04c3fSmrg for (uint32_t i = 0; i < info->stageCount; ++i) { 231501e04c3fSmrg switch (info->pStages[i].stage) { 23167ec681f3Smrg case VK_SHADER_STAGE_VERTEX_BIT: 23177ec681f3Smrg assert(info->pVertexInputState); 23187ec681f3Smrg assert(info->pInputAssemblyState); 23197ec681f3Smrg break; 232001e04c3fSmrg case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: 232101e04c3fSmrg case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: 232201e04c3fSmrg assert(info->pTessellationState); 232301e04c3fSmrg break; 232401e04c3fSmrg default: 232501e04c3fSmrg break; 232601e04c3fSmrg } 232701e04c3fSmrg } 232801e04c3fSmrg#endif 232901e04c3fSmrg} 233001e04c3fSmrg 233101e04c3fSmrg/** 233201e04c3fSmrg * Calculate the desired L3 partitioning based on the current state of the 233301e04c3fSmrg * pipeline. For now this simply returns the conservative defaults calculated 233401e04c3fSmrg * by get_default_l3_weights(), but we could probably do better by gathering 233501e04c3fSmrg * more statistics from the pipeline state (e.g. guess of expected URB usage 233601e04c3fSmrg * and bound surfaces), or by using feed-back from performance counters. 233701e04c3fSmrg */ 233801e04c3fSmrgvoid 233901e04c3fSmrganv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm) 234001e04c3fSmrg{ 23417ec681f3Smrg const struct intel_device_info *devinfo = &pipeline->device->info; 23427ec681f3Smrg 23437ec681f3Smrg const struct intel_l3_weights w = 23447ec681f3Smrg intel_get_default_l3_weights(devinfo, true, needs_slm); 23457ec681f3Smrg 23467ec681f3Smrg pipeline->l3_config = intel_get_l3_config(devinfo, w); 23477ec681f3Smrg} 23487ec681f3Smrg 23497ec681f3Smrgstatic VkLineRasterizationModeEXT 23507ec681f3Smrgvk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT *line_info, 23517ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *ms_info) 23527ec681f3Smrg{ 23537ec681f3Smrg VkLineRasterizationModeEXT line_mode = 23547ec681f3Smrg line_info ? line_info->lineRasterizationMode : 23557ec681f3Smrg VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT; 235601e04c3fSmrg 23577ec681f3Smrg if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) { 23587ec681f3Smrg if (ms_info && ms_info->rasterizationSamples > 1) { 23597ec681f3Smrg return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT; 23607ec681f3Smrg } else { 23617ec681f3Smrg return VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT; 23627ec681f3Smrg } 23637ec681f3Smrg } 236401e04c3fSmrg 23657ec681f3Smrg return line_mode; 236601e04c3fSmrg} 236701e04c3fSmrg 236801e04c3fSmrgVkResult 23697ec681f3Smrganv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, 23707ec681f3Smrg struct anv_device *device, 23717ec681f3Smrg struct anv_pipeline_cache *cache, 23727ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 23737ec681f3Smrg const VkAllocationCallbacks *alloc) 237401e04c3fSmrg{ 237501e04c3fSmrg VkResult result; 237601e04c3fSmrg 237701e04c3fSmrg anv_pipeline_validate_create_info(pCreateInfo); 237801e04c3fSmrg 23797ec681f3Smrg result = anv_pipeline_init(&pipeline->base, device, 23807ec681f3Smrg ANV_PIPELINE_GRAPHICS, pCreateInfo->flags, 23817ec681f3Smrg alloc); 23827ec681f3Smrg if (result != VK_SUCCESS) 23837ec681f3Smrg return result; 238401e04c3fSmrg 23857ec681f3Smrg anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS, 23867ec681f3Smrg pipeline->batch_data, sizeof(pipeline->batch_data)); 238701e04c3fSmrg 238801e04c3fSmrg ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass); 238901e04c3fSmrg assert(pCreateInfo->subpass < render_pass->subpass_count); 239001e04c3fSmrg pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass]; 239101e04c3fSmrg 23927ec681f3Smrg assert(pCreateInfo->pRasterizationState); 239301e04c3fSmrg 23947ec681f3Smrg if (pCreateInfo->pDynamicState) { 23957ec681f3Smrg /* Remove all of the states that are marked as dynamic */ 23967ec681f3Smrg uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; 23977ec681f3Smrg for (uint32_t s = 0; s < count; s++) { 23987ec681f3Smrg pipeline->dynamic_states |= anv_cmd_dirty_bit_for_vk_dynamic_state( 23997ec681f3Smrg pCreateInfo->pDynamicState->pDynamicStates[s]); 24007ec681f3Smrg } 24017ec681f3Smrg } 24027ec681f3Smrg 24037ec681f3Smrg pipeline->active_stages = 0; 24047ec681f3Smrg for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) 24057ec681f3Smrg pipeline->active_stages |= pCreateInfo->pStages[i].stage; 24067ec681f3Smrg 24077ec681f3Smrg if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) 24087ec681f3Smrg pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; 240901e04c3fSmrg 241001e04c3fSmrg copy_non_dynamic_state(pipeline, pCreateInfo); 24117ec681f3Smrg 24127ec681f3Smrg pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable; 241301e04c3fSmrg 24149f464c52Smaya /* Previously we enabled depth clipping when !depthClampEnable. 24159f464c52Smaya * DepthClipStateCreateInfo now makes depth clipping explicit so if the 24169f464c52Smaya * clipping info is available, use its enable value to determine clipping, 24179f464c52Smaya * otherwise fallback to the previous !depthClampEnable logic. 24189f464c52Smaya */ 24199f464c52Smaya const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info = 24209f464c52Smaya vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, 24219f464c52Smaya PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT); 24229f464c52Smaya pipeline->depth_clip_enable = clip_info ? clip_info->depthClipEnable : !pipeline->depth_clamp_enable; 24239f464c52Smaya 24247ec681f3Smrg pipeline->sample_shading_enable = 24257ec681f3Smrg !pCreateInfo->pRasterizationState->rasterizerDiscardEnable && 24267ec681f3Smrg pCreateInfo->pMultisampleState && 24277ec681f3Smrg pCreateInfo->pMultisampleState->sampleShadingEnable; 242801e04c3fSmrg 242901e04c3fSmrg result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo); 243001e04c3fSmrg if (result != VK_SUCCESS) { 24317ec681f3Smrg anv_pipeline_finish(&pipeline->base, device, alloc); 243201e04c3fSmrg return result; 243301e04c3fSmrg } 243401e04c3fSmrg 24357ec681f3Smrg anv_pipeline_setup_l3_config(&pipeline->base, false); 243601e04c3fSmrg 24377ec681f3Smrg if (anv_pipeline_is_primitive(pipeline)) { 24387ec681f3Smrg const VkPipelineVertexInputStateCreateInfo *vi_info = 24397ec681f3Smrg pCreateInfo->pVertexInputState; 244001e04c3fSmrg 24417ec681f3Smrg const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read; 244201e04c3fSmrg 24437ec681f3Smrg for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { 24447ec681f3Smrg const VkVertexInputAttributeDescription *desc = 24457ec681f3Smrg &vi_info->pVertexAttributeDescriptions[i]; 244601e04c3fSmrg 24477ec681f3Smrg if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location))) 24487ec681f3Smrg pipeline->vb_used |= 1 << desc->binding; 24497ec681f3Smrg } 245001e04c3fSmrg 24517ec681f3Smrg for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { 24527ec681f3Smrg const VkVertexInputBindingDescription *desc = 24537ec681f3Smrg &vi_info->pVertexBindingDescriptions[i]; 24547ec681f3Smrg 24557ec681f3Smrg pipeline->vb[desc->binding].stride = desc->stride; 24567ec681f3Smrg 24577ec681f3Smrg /* Step rate is programmed per vertex element (attribute), not 24587ec681f3Smrg * binding. Set up a map of which bindings step per instance, for 24597ec681f3Smrg * reference by vertex element setup. */ 24607ec681f3Smrg switch (desc->inputRate) { 24617ec681f3Smrg default: 24627ec681f3Smrg case VK_VERTEX_INPUT_RATE_VERTEX: 24637ec681f3Smrg pipeline->vb[desc->binding].instanced = false; 24647ec681f3Smrg break; 24657ec681f3Smrg case VK_VERTEX_INPUT_RATE_INSTANCE: 24667ec681f3Smrg pipeline->vb[desc->binding].instanced = true; 24677ec681f3Smrg break; 24687ec681f3Smrg } 246901e04c3fSmrg 24707ec681f3Smrg pipeline->vb[desc->binding].instance_divisor = 1; 24717ec681f3Smrg } 247201e04c3fSmrg 24737ec681f3Smrg const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state = 24747ec681f3Smrg vk_find_struct_const(vi_info->pNext, 24757ec681f3Smrg PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); 24767ec681f3Smrg if (vi_div_state) { 24777ec681f3Smrg for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) { 24787ec681f3Smrg const VkVertexInputBindingDivisorDescriptionEXT *desc = 24797ec681f3Smrg &vi_div_state->pVertexBindingDivisors[i]; 248001e04c3fSmrg 24817ec681f3Smrg pipeline->vb[desc->binding].instance_divisor = desc->divisor; 24827ec681f3Smrg } 248301e04c3fSmrg } 248401e04c3fSmrg 24857ec681f3Smrg /* Our implementation of VK_KHR_multiview uses instancing to draw the 24867ec681f3Smrg * different views. If the client asks for instancing, we need to multiply 24877ec681f3Smrg * the instance divisor by the number of views ensure that we repeat the 24887ec681f3Smrg * client's per-instance data once for each view. 24897ec681f3Smrg */ 24907ec681f3Smrg if (pipeline->subpass->view_mask && !pipeline->use_primitive_replication) { 24917ec681f3Smrg const uint32_t view_count = anv_subpass_view_count(pipeline->subpass); 24927ec681f3Smrg for (uint32_t vb = 0; vb < MAX_VBS; vb++) { 24937ec681f3Smrg if (pipeline->vb[vb].instanced) 24947ec681f3Smrg pipeline->vb[vb].instance_divisor *= view_count; 24957ec681f3Smrg } 24967ec681f3Smrg } 249701e04c3fSmrg 24987ec681f3Smrg const VkPipelineInputAssemblyStateCreateInfo *ia_info = 24997ec681f3Smrg pCreateInfo->pInputAssemblyState; 25007ec681f3Smrg const VkPipelineTessellationStateCreateInfo *tess_info = 25017ec681f3Smrg pCreateInfo->pTessellationState; 250201e04c3fSmrg 25037ec681f3Smrg if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) 25047ec681f3Smrg pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints); 25057ec681f3Smrg else 25067ec681f3Smrg pipeline->topology = vk_to_intel_primitive_type[ia_info->topology]; 250701e04c3fSmrg } 250801e04c3fSmrg 25097ec681f3Smrg /* If rasterization is not enabled, ms_info must be ignored. */ 25107ec681f3Smrg const bool raster_enabled = 25117ec681f3Smrg !pCreateInfo->pRasterizationState->rasterizerDiscardEnable || 25127ec681f3Smrg (pipeline->dynamic_states & 25137ec681f3Smrg ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE); 25147ec681f3Smrg 25157ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *ms_info = 25167ec681f3Smrg raster_enabled ? pCreateInfo->pMultisampleState : NULL; 25177ec681f3Smrg 25187ec681f3Smrg const VkPipelineRasterizationLineStateCreateInfoEXT *line_info = 25197ec681f3Smrg vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, 25207ec681f3Smrg PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); 25217ec681f3Smrg 25227ec681f3Smrg /* Store line mode, polygon mode and rasterization samples, these are used 25237ec681f3Smrg * for dynamic primitive topology. 252401e04c3fSmrg */ 25257ec681f3Smrg pipeline->line_mode = vk_line_rasterization_mode(line_info, ms_info); 25267ec681f3Smrg pipeline->polygon_mode = pCreateInfo->pRasterizationState->polygonMode; 25277ec681f3Smrg pipeline->rasterization_samples = 25287ec681f3Smrg ms_info ? ms_info->rasterizationSamples : 1; 25297ec681f3Smrg 25307ec681f3Smrg return VK_SUCCESS; 25317ec681f3Smrg} 25327ec681f3Smrg 25337ec681f3Smrgstatic VkResult 25347ec681f3Smrgcompile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline, 25357ec681f3Smrg struct anv_pipeline_cache *cache, 25367ec681f3Smrg nir_shader *nir, 25377ec681f3Smrg struct anv_pipeline_stage *stage, 25387ec681f3Smrg struct anv_shader_bin **shader_out, 25397ec681f3Smrg void *mem_ctx) 25407ec681f3Smrg{ 25417ec681f3Smrg const struct brw_compiler *compiler = 25427ec681f3Smrg pipeline->base.device->physical->compiler; 25437ec681f3Smrg const struct intel_device_info *devinfo = compiler->devinfo; 25447ec681f3Smrg 25457ec681f3Smrg nir_shader **resume_shaders = NULL; 25467ec681f3Smrg uint32_t num_resume_shaders = 0; 25477ec681f3Smrg if (nir->info.stage != MESA_SHADER_COMPUTE) { 25487ec681f3Smrg NIR_PASS_V(nir, nir_lower_shader_calls, 25497ec681f3Smrg nir_address_format_64bit_global, 25507ec681f3Smrg BRW_BTD_STACK_ALIGN, 25517ec681f3Smrg &resume_shaders, &num_resume_shaders, mem_ctx); 25527ec681f3Smrg NIR_PASS_V(nir, brw_nir_lower_shader_calls); 25537ec681f3Smrg NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo); 255401e04c3fSmrg } 255501e04c3fSmrg 25567ec681f3Smrg for (unsigned i = 0; i < num_resume_shaders; i++) { 25577ec681f3Smrg NIR_PASS_V(resume_shaders[i], brw_nir_lower_shader_calls); 25587ec681f3Smrg NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo); 25597ec681f3Smrg } 256001e04c3fSmrg 25617ec681f3Smrg stage->code = 25627ec681f3Smrg brw_compile_bs(compiler, pipeline->base.device, mem_ctx, 25637ec681f3Smrg &stage->key.bs, &stage->prog_data.bs, nir, 25647ec681f3Smrg num_resume_shaders, resume_shaders, stage->stats, NULL); 25657ec681f3Smrg if (stage->code == NULL) 25667ec681f3Smrg return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 25677ec681f3Smrg 25687ec681f3Smrg /* Ray-tracing shaders don't have a "real" bind map */ 25697ec681f3Smrg struct anv_pipeline_bind_map empty_bind_map = {}; 25707ec681f3Smrg 25717ec681f3Smrg const unsigned code_size = stage->prog_data.base.program_size; 25727ec681f3Smrg struct anv_shader_bin *bin = 25737ec681f3Smrg anv_device_upload_kernel(pipeline->base.device, 25747ec681f3Smrg cache, 25757ec681f3Smrg stage->stage, 25767ec681f3Smrg &stage->cache_key, sizeof(stage->cache_key), 25777ec681f3Smrg stage->code, code_size, 25787ec681f3Smrg &stage->prog_data.base, 25797ec681f3Smrg sizeof(stage->prog_data.bs), 25807ec681f3Smrg stage->stats, 1, 25817ec681f3Smrg NULL, &empty_bind_map); 25827ec681f3Smrg if (bin == NULL) 25837ec681f3Smrg return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 25847ec681f3Smrg 25857ec681f3Smrg /* TODO: Figure out executables for resume shaders */ 25867ec681f3Smrg anv_pipeline_add_executables(&pipeline->base, stage, bin); 25877ec681f3Smrg util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin); 25887ec681f3Smrg 25897ec681f3Smrg *shader_out = bin; 259001e04c3fSmrg 259101e04c3fSmrg return VK_SUCCESS; 259201e04c3fSmrg} 25937ec681f3Smrg 25947ec681f3Smrgstatic bool 25957ec681f3Smrgis_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info) 25967ec681f3Smrg{ 25977ec681f3Smrg if (info->pDynamicState == NULL) 25987ec681f3Smrg return false; 25997ec681f3Smrg 26007ec681f3Smrg for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) { 26017ec681f3Smrg if (info->pDynamicState->pDynamicStates[i] == 26027ec681f3Smrg VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR) 26037ec681f3Smrg return true; 26047ec681f3Smrg } 26057ec681f3Smrg 26067ec681f3Smrg return false; 26077ec681f3Smrg} 26087ec681f3Smrg 26097ec681f3Smrgstatic void 26107ec681f3Smrganv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline, 26117ec681f3Smrg const VkRayTracingPipelineCreateInfoKHR *info, 26127ec681f3Smrg uint32_t *stack_max) 26137ec681f3Smrg{ 26147ec681f3Smrg if (is_rt_stack_size_dynamic(info)) { 26157ec681f3Smrg pipeline->stack_size = 0; /* 0 means dynamic */ 26167ec681f3Smrg } else { 26177ec681f3Smrg /* From the Vulkan spec: 26187ec681f3Smrg * 26197ec681f3Smrg * "If the stack size is not set explicitly, the stack size for a 26207ec681f3Smrg * pipeline is: 26217ec681f3Smrg * 26227ec681f3Smrg * rayGenStackMax + 26237ec681f3Smrg * min(1, maxPipelineRayRecursionDepth) × 26247ec681f3Smrg * max(closestHitStackMax, missStackMax, 26257ec681f3Smrg * intersectionStackMax + anyHitStackMax) + 26267ec681f3Smrg * max(0, maxPipelineRayRecursionDepth-1) × 26277ec681f3Smrg * max(closestHitStackMax, missStackMax) + 26287ec681f3Smrg * 2 × callableStackMax" 26297ec681f3Smrg */ 26307ec681f3Smrg pipeline->stack_size = 26317ec681f3Smrg stack_max[MESA_SHADER_RAYGEN] + 26327ec681f3Smrg MIN2(1, info->maxPipelineRayRecursionDepth) * 26337ec681f3Smrg MAX4(stack_max[MESA_SHADER_CLOSEST_HIT], 26347ec681f3Smrg stack_max[MESA_SHADER_MISS], 26357ec681f3Smrg stack_max[MESA_SHADER_INTERSECTION], 26367ec681f3Smrg stack_max[MESA_SHADER_ANY_HIT]) + 26377ec681f3Smrg MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) * 26387ec681f3Smrg MAX2(stack_max[MESA_SHADER_CLOSEST_HIT], 26397ec681f3Smrg stack_max[MESA_SHADER_MISS]) + 26407ec681f3Smrg 2 * stack_max[MESA_SHADER_CALLABLE]; 26417ec681f3Smrg 26427ec681f3Smrg /* This is an extremely unlikely case but we need to set it to some 26437ec681f3Smrg * non-zero value so that we don't accidentally think it's dynamic. 26447ec681f3Smrg * Our minimum stack size is 2KB anyway so we could set to any small 26457ec681f3Smrg * value we like. 26467ec681f3Smrg */ 26477ec681f3Smrg if (pipeline->stack_size == 0) 26487ec681f3Smrg pipeline->stack_size = 1; 26497ec681f3Smrg } 26507ec681f3Smrg} 26517ec681f3Smrg 26527ec681f3Smrgstatic struct anv_pipeline_stage * 26537ec681f3Smrganv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline, 26547ec681f3Smrg const VkRayTracingPipelineCreateInfoKHR *info, 26557ec681f3Smrg void *pipeline_ctx) 26567ec681f3Smrg{ 26577ec681f3Smrg ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 26587ec681f3Smrg 26597ec681f3Smrg /* Create enough stage entries for all shader modules plus potential 26607ec681f3Smrg * combinaisons in the groups. 26617ec681f3Smrg */ 26627ec681f3Smrg struct anv_pipeline_stage *stages = 26637ec681f3Smrg rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount); 26647ec681f3Smrg 26657ec681f3Smrg for (uint32_t i = 0; i < info->stageCount; i++) { 26667ec681f3Smrg const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i]; 26677ec681f3Smrg if (sinfo->module == VK_NULL_HANDLE) 26687ec681f3Smrg continue; 26697ec681f3Smrg 26707ec681f3Smrg int64_t stage_start = os_time_get_nano(); 26717ec681f3Smrg 26727ec681f3Smrg stages[i] = (struct anv_pipeline_stage) { 26737ec681f3Smrg .stage = vk_to_mesa_shader_stage(sinfo->stage), 26747ec681f3Smrg .module = vk_shader_module_from_handle(sinfo->module), 26757ec681f3Smrg .entrypoint = sinfo->pName, 26767ec681f3Smrg .spec_info = sinfo->pSpecializationInfo, 26777ec681f3Smrg .cache_key = { 26787ec681f3Smrg .stage = vk_to_mesa_shader_stage(sinfo->stage), 26797ec681f3Smrg }, 26807ec681f3Smrg .feedback = { 26817ec681f3Smrg .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, 26827ec681f3Smrg }, 26837ec681f3Smrg }; 26847ec681f3Smrg 26857ec681f3Smrg populate_bs_prog_key(&pipeline->base.device->info, sinfo->flags, 26867ec681f3Smrg pipeline->base.device->robust_buffer_access, 26877ec681f3Smrg &stages[i].key.bs); 26887ec681f3Smrg 26897ec681f3Smrg anv_pipeline_hash_shader(stages[i].module, 26907ec681f3Smrg stages[i].entrypoint, 26917ec681f3Smrg stages[i].stage, 26927ec681f3Smrg stages[i].spec_info, 26937ec681f3Smrg stages[i].shader_sha1); 26947ec681f3Smrg 26957ec681f3Smrg if (stages[i].stage != MESA_SHADER_INTERSECTION) { 26967ec681f3Smrg anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i], 26977ec681f3Smrg stages[i].cache_key.sha1); 26987ec681f3Smrg } 26997ec681f3Smrg 27007ec681f3Smrg stages[i].feedback.duration += os_time_get_nano() - stage_start; 27017ec681f3Smrg } 27027ec681f3Smrg 27037ec681f3Smrg for (uint32_t i = 0; i < info->groupCount; i++) { 27047ec681f3Smrg const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i]; 27057ec681f3Smrg 27067ec681f3Smrg if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR) 27077ec681f3Smrg continue; 27087ec681f3Smrg 27097ec681f3Smrg int64_t stage_start = os_time_get_nano(); 27107ec681f3Smrg 27117ec681f3Smrg uint32_t intersection_idx = ginfo->intersectionShader; 27127ec681f3Smrg assert(intersection_idx < info->stageCount); 27137ec681f3Smrg 27147ec681f3Smrg uint32_t any_hit_idx = ginfo->anyHitShader; 27157ec681f3Smrg if (any_hit_idx != VK_SHADER_UNUSED_KHR) { 27167ec681f3Smrg assert(any_hit_idx < info->stageCount); 27177ec681f3Smrg anv_pipeline_hash_ray_tracing_combined_shader(pipeline, 27187ec681f3Smrg layout, 27197ec681f3Smrg &stages[intersection_idx], 27207ec681f3Smrg &stages[any_hit_idx], 27217ec681f3Smrg stages[intersection_idx].cache_key.sha1); 27227ec681f3Smrg } else { 27237ec681f3Smrg anv_pipeline_hash_ray_tracing_shader(pipeline, layout, 27247ec681f3Smrg &stages[intersection_idx], 27257ec681f3Smrg stages[intersection_idx].cache_key.sha1); 27267ec681f3Smrg } 27277ec681f3Smrg 27287ec681f3Smrg stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start; 27297ec681f3Smrg } 27307ec681f3Smrg 27317ec681f3Smrg return stages; 27327ec681f3Smrg} 27337ec681f3Smrg 27347ec681f3Smrgstatic bool 27357ec681f3Smrganv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline, 27367ec681f3Smrg struct anv_pipeline_cache *cache, 27377ec681f3Smrg const VkRayTracingPipelineCreateInfoKHR *info, 27387ec681f3Smrg struct anv_pipeline_stage *stages, 27397ec681f3Smrg uint32_t *stack_max) 27407ec681f3Smrg{ 27417ec681f3Smrg uint32_t shaders = 0, cache_hits = 0; 27427ec681f3Smrg for (uint32_t i = 0; i < info->stageCount; i++) { 27437ec681f3Smrg if (stages[i].entrypoint == NULL) 27447ec681f3Smrg continue; 27457ec681f3Smrg 27467ec681f3Smrg shaders++; 27477ec681f3Smrg 27487ec681f3Smrg int64_t stage_start = os_time_get_nano(); 27497ec681f3Smrg 27507ec681f3Smrg bool cache_hit; 27517ec681f3Smrg stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache, 27527ec681f3Smrg &stages[i].cache_key, 27537ec681f3Smrg sizeof(stages[i].cache_key), 27547ec681f3Smrg &cache_hit); 27557ec681f3Smrg if (cache_hit) { 27567ec681f3Smrg cache_hits++; 27577ec681f3Smrg stages[i].feedback.flags |= 27587ec681f3Smrg VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; 27597ec681f3Smrg } 27607ec681f3Smrg 27617ec681f3Smrg if (stages[i].bin != NULL) { 27627ec681f3Smrg anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin); 27637ec681f3Smrg util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin); 27647ec681f3Smrg 27657ec681f3Smrg uint32_t stack_size = 27667ec681f3Smrg brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size; 27677ec681f3Smrg stack_max[stages[i].stage] = 27687ec681f3Smrg MAX2(stack_max[stages[i].stage], stack_size); 27697ec681f3Smrg } 27707ec681f3Smrg 27717ec681f3Smrg stages[i].feedback.duration += os_time_get_nano() - stage_start; 27727ec681f3Smrg } 27737ec681f3Smrg 27747ec681f3Smrg return cache_hits == shaders; 27757ec681f3Smrg} 27767ec681f3Smrg 27777ec681f3Smrgstatic VkResult 27787ec681f3Smrganv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline, 27797ec681f3Smrg struct anv_pipeline_cache *cache, 27807ec681f3Smrg const VkRayTracingPipelineCreateInfoKHR *info) 27817ec681f3Smrg{ 27827ec681f3Smrg const struct intel_device_info *devinfo = &pipeline->base.device->info; 27837ec681f3Smrg VkResult result; 27847ec681f3Smrg 27857ec681f3Smrg VkPipelineCreationFeedbackEXT pipeline_feedback = { 27867ec681f3Smrg .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, 27877ec681f3Smrg }; 27887ec681f3Smrg int64_t pipeline_start = os_time_get_nano(); 27897ec681f3Smrg 27907ec681f3Smrg void *pipeline_ctx = ralloc_context(NULL); 27917ec681f3Smrg 27927ec681f3Smrg struct anv_pipeline_stage *stages = 27937ec681f3Smrg anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx); 27947ec681f3Smrg 27957ec681f3Smrg ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); 27967ec681f3Smrg 27977ec681f3Smrg const bool skip_cache_lookup = 27987ec681f3Smrg (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR); 27997ec681f3Smrg 28007ec681f3Smrg uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {}; 28017ec681f3Smrg 28027ec681f3Smrg if (!skip_cache_lookup && 28037ec681f3Smrg anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) { 28047ec681f3Smrg pipeline_feedback.flags |= 28057ec681f3Smrg VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; 28067ec681f3Smrg goto done; 28077ec681f3Smrg } 28087ec681f3Smrg 28097ec681f3Smrg if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) { 28107ec681f3Smrg ralloc_free(pipeline_ctx); 28117ec681f3Smrg return VK_PIPELINE_COMPILE_REQUIRED_EXT; 28127ec681f3Smrg } 28137ec681f3Smrg 28147ec681f3Smrg for (uint32_t i = 0; i < info->stageCount; i++) { 28157ec681f3Smrg if (stages[i].entrypoint == NULL) 28167ec681f3Smrg continue; 28177ec681f3Smrg 28187ec681f3Smrg int64_t stage_start = os_time_get_nano(); 28197ec681f3Smrg 28207ec681f3Smrg stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, 28217ec681f3Smrg pipeline_ctx, &stages[i]); 28227ec681f3Smrg if (stages[i].nir == NULL) { 28237ec681f3Smrg ralloc_free(pipeline_ctx); 28247ec681f3Smrg return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY); 28257ec681f3Smrg } 28267ec681f3Smrg 28277ec681f3Smrg anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout); 28287ec681f3Smrg 28297ec681f3Smrg stages[i].feedback.duration += os_time_get_nano() - stage_start; 28307ec681f3Smrg } 28317ec681f3Smrg 28327ec681f3Smrg for (uint32_t i = 0; i < info->stageCount; i++) { 28337ec681f3Smrg if (stages[i].entrypoint == NULL) 28347ec681f3Smrg continue; 28357ec681f3Smrg 28367ec681f3Smrg /* Shader found in cache already. */ 28377ec681f3Smrg if (stages[i].bin != NULL) 28387ec681f3Smrg continue; 28397ec681f3Smrg 28407ec681f3Smrg /* We handle intersection shaders as part of the group */ 28417ec681f3Smrg if (stages[i].stage == MESA_SHADER_INTERSECTION) 28427ec681f3Smrg continue; 28437ec681f3Smrg 28447ec681f3Smrg int64_t stage_start = os_time_get_nano(); 28457ec681f3Smrg 28467ec681f3Smrg void *stage_ctx = ralloc_context(pipeline_ctx); 28477ec681f3Smrg 28487ec681f3Smrg nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir); 28497ec681f3Smrg switch (stages[i].stage) { 28507ec681f3Smrg case MESA_SHADER_RAYGEN: 28517ec681f3Smrg brw_nir_lower_raygen(nir); 28527ec681f3Smrg break; 28537ec681f3Smrg 28547ec681f3Smrg case MESA_SHADER_ANY_HIT: 28557ec681f3Smrg brw_nir_lower_any_hit(nir, devinfo); 28567ec681f3Smrg break; 28577ec681f3Smrg 28587ec681f3Smrg case MESA_SHADER_CLOSEST_HIT: 28597ec681f3Smrg brw_nir_lower_closest_hit(nir); 28607ec681f3Smrg break; 28617ec681f3Smrg 28627ec681f3Smrg case MESA_SHADER_MISS: 28637ec681f3Smrg brw_nir_lower_miss(nir); 28647ec681f3Smrg break; 28657ec681f3Smrg 28667ec681f3Smrg case MESA_SHADER_INTERSECTION: 28677ec681f3Smrg unreachable("These are handled later"); 28687ec681f3Smrg 28697ec681f3Smrg case MESA_SHADER_CALLABLE: 28707ec681f3Smrg brw_nir_lower_callable(nir); 28717ec681f3Smrg break; 28727ec681f3Smrg 28737ec681f3Smrg default: 28747ec681f3Smrg unreachable("Invalid ray-tracing shader stage"); 28757ec681f3Smrg } 28767ec681f3Smrg 28777ec681f3Smrg result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i], 28787ec681f3Smrg &stages[i].bin, stage_ctx); 28797ec681f3Smrg if (result != VK_SUCCESS) { 28807ec681f3Smrg ralloc_free(pipeline_ctx); 28817ec681f3Smrg return result; 28827ec681f3Smrg } 28837ec681f3Smrg 28847ec681f3Smrg uint32_t stack_size = 28857ec681f3Smrg brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size; 28867ec681f3Smrg stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size); 28877ec681f3Smrg 28887ec681f3Smrg ralloc_free(stage_ctx); 28897ec681f3Smrg 28907ec681f3Smrg stages[i].feedback.duration += os_time_get_nano() - stage_start; 28917ec681f3Smrg } 28927ec681f3Smrg 28937ec681f3Smrg for (uint32_t i = 0; i < info->groupCount; i++) { 28947ec681f3Smrg const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i]; 28957ec681f3Smrg struct anv_rt_shader_group *group = &pipeline->groups[i]; 28967ec681f3Smrg group->type = ginfo->type; 28977ec681f3Smrg switch (ginfo->type) { 28987ec681f3Smrg case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: 28997ec681f3Smrg assert(ginfo->generalShader < info->stageCount); 29007ec681f3Smrg group->general = stages[ginfo->generalShader].bin; 29017ec681f3Smrg break; 29027ec681f3Smrg 29037ec681f3Smrg case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: 29047ec681f3Smrg if (ginfo->anyHitShader < info->stageCount) 29057ec681f3Smrg group->any_hit = stages[ginfo->anyHitShader].bin; 29067ec681f3Smrg 29077ec681f3Smrg if (ginfo->closestHitShader < info->stageCount) 29087ec681f3Smrg group->closest_hit = stages[ginfo->closestHitShader].bin; 29097ec681f3Smrg break; 29107ec681f3Smrg 29117ec681f3Smrg case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: { 29127ec681f3Smrg if (ginfo->closestHitShader < info->stageCount) 29137ec681f3Smrg group->closest_hit = stages[ginfo->closestHitShader].bin; 29147ec681f3Smrg 29157ec681f3Smrg uint32_t intersection_idx = info->pGroups[i].intersectionShader; 29167ec681f3Smrg assert(intersection_idx < info->stageCount); 29177ec681f3Smrg 29187ec681f3Smrg /* Only compile this stage if not already found in the cache. */ 29197ec681f3Smrg if (stages[intersection_idx].bin == NULL) { 29207ec681f3Smrg /* The any-hit and intersection shader have to be combined */ 29217ec681f3Smrg uint32_t any_hit_idx = info->pGroups[i].anyHitShader; 29227ec681f3Smrg const nir_shader *any_hit = NULL; 29237ec681f3Smrg if (any_hit_idx < info->stageCount) 29247ec681f3Smrg any_hit = stages[any_hit_idx].nir; 29257ec681f3Smrg 29267ec681f3Smrg void *group_ctx = ralloc_context(pipeline_ctx); 29277ec681f3Smrg nir_shader *intersection = 29287ec681f3Smrg nir_shader_clone(group_ctx, stages[intersection_idx].nir); 29297ec681f3Smrg 29307ec681f3Smrg brw_nir_lower_combined_intersection_any_hit(intersection, any_hit, 29317ec681f3Smrg devinfo); 29327ec681f3Smrg 29337ec681f3Smrg result = compile_upload_rt_shader(pipeline, cache, 29347ec681f3Smrg intersection, 29357ec681f3Smrg &stages[intersection_idx], 29367ec681f3Smrg &group->intersection, 29377ec681f3Smrg group_ctx); 29387ec681f3Smrg ralloc_free(group_ctx); 29397ec681f3Smrg if (result != VK_SUCCESS) 29407ec681f3Smrg return result; 29417ec681f3Smrg } else { 29427ec681f3Smrg group->intersection = stages[intersection_idx].bin; 29437ec681f3Smrg } 29447ec681f3Smrg 29457ec681f3Smrg uint32_t stack_size = 29467ec681f3Smrg brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size; 29477ec681f3Smrg stack_max[MESA_SHADER_INTERSECTION] = 29487ec681f3Smrg MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size); 29497ec681f3Smrg 29507ec681f3Smrg break; 29517ec681f3Smrg } 29527ec681f3Smrg 29537ec681f3Smrg default: 29547ec681f3Smrg unreachable("Invalid ray tracing shader group type"); 29557ec681f3Smrg } 29567ec681f3Smrg } 29577ec681f3Smrg 29587ec681f3Smrg done: 29597ec681f3Smrg ralloc_free(pipeline_ctx); 29607ec681f3Smrg 29617ec681f3Smrg anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max); 29627ec681f3Smrg 29637ec681f3Smrg pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 29647ec681f3Smrg 29657ec681f3Smrg const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback = 29667ec681f3Smrg vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT); 29677ec681f3Smrg if (create_feedback) { 29687ec681f3Smrg *create_feedback->pPipelineCreationFeedback = pipeline_feedback; 29697ec681f3Smrg 29707ec681f3Smrg assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount); 29717ec681f3Smrg for (uint32_t i = 0; i < info->stageCount; i++) { 29727ec681f3Smrg gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage); 29737ec681f3Smrg create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback; 29747ec681f3Smrg } 29757ec681f3Smrg } 29767ec681f3Smrg 29777ec681f3Smrg return VK_SUCCESS; 29787ec681f3Smrg} 29797ec681f3Smrg 29807ec681f3SmrgVkResult 29817ec681f3Smrganv_device_init_rt_shaders(struct anv_device *device) 29827ec681f3Smrg{ 29837ec681f3Smrg if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline) 29847ec681f3Smrg return VK_SUCCESS; 29857ec681f3Smrg 29867ec681f3Smrg bool cache_hit; 29877ec681f3Smrg 29887ec681f3Smrg struct brw_rt_trampoline { 29897ec681f3Smrg char name[16]; 29907ec681f3Smrg struct brw_cs_prog_key key; 29917ec681f3Smrg } trampoline_key = { 29927ec681f3Smrg .name = "rt-trampoline", 29937ec681f3Smrg .key = { 29947ec681f3Smrg /* TODO: Other subgroup sizes? */ 29957ec681f3Smrg .base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_8, 29967ec681f3Smrg }, 29977ec681f3Smrg }; 29987ec681f3Smrg device->rt_trampoline = 29997ec681f3Smrg anv_device_search_for_kernel(device, &device->default_pipeline_cache, 30007ec681f3Smrg &trampoline_key, sizeof(trampoline_key), 30017ec681f3Smrg &cache_hit); 30027ec681f3Smrg if (device->rt_trampoline == NULL) { 30037ec681f3Smrg 30047ec681f3Smrg void *tmp_ctx = ralloc_context(NULL); 30057ec681f3Smrg nir_shader *trampoline_nir = 30067ec681f3Smrg brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx); 30077ec681f3Smrg 30087ec681f3Smrg struct anv_pipeline_bind_map bind_map = { 30097ec681f3Smrg .surface_count = 0, 30107ec681f3Smrg .sampler_count = 0, 30117ec681f3Smrg }; 30127ec681f3Smrg uint32_t dummy_params[4] = { 0, }; 30137ec681f3Smrg struct brw_cs_prog_data trampoline_prog_data = { 30147ec681f3Smrg .base.nr_params = 4, 30157ec681f3Smrg .base.param = dummy_params, 30167ec681f3Smrg .uses_inline_data = true, 30177ec681f3Smrg .uses_btd_stack_ids = true, 30187ec681f3Smrg }; 30197ec681f3Smrg struct brw_compile_cs_params params = { 30207ec681f3Smrg .nir = trampoline_nir, 30217ec681f3Smrg .key = &trampoline_key.key, 30227ec681f3Smrg .prog_data = &trampoline_prog_data, 30237ec681f3Smrg .log_data = device, 30247ec681f3Smrg }; 30257ec681f3Smrg const unsigned *tramp_data = 30267ec681f3Smrg brw_compile_cs(device->physical->compiler, tmp_ctx, ¶ms); 30277ec681f3Smrg 30287ec681f3Smrg device->rt_trampoline = 30297ec681f3Smrg anv_device_upload_kernel(device, &device->default_pipeline_cache, 30307ec681f3Smrg MESA_SHADER_COMPUTE, 30317ec681f3Smrg &trampoline_key, sizeof(trampoline_key), 30327ec681f3Smrg tramp_data, 30337ec681f3Smrg trampoline_prog_data.base.program_size, 30347ec681f3Smrg &trampoline_prog_data.base, 30357ec681f3Smrg sizeof(trampoline_prog_data), 30367ec681f3Smrg NULL, 0, NULL, &bind_map); 30377ec681f3Smrg 30387ec681f3Smrg ralloc_free(tmp_ctx); 30397ec681f3Smrg 30407ec681f3Smrg if (device->rt_trampoline == NULL) 30417ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 30427ec681f3Smrg } 30437ec681f3Smrg 30447ec681f3Smrg struct brw_rt_trivial_return { 30457ec681f3Smrg char name[16]; 30467ec681f3Smrg struct brw_bs_prog_key key; 30477ec681f3Smrg } return_key = { 30487ec681f3Smrg .name = "rt-trivial-ret", 30497ec681f3Smrg }; 30507ec681f3Smrg device->rt_trivial_return = 30517ec681f3Smrg anv_device_search_for_kernel(device, &device->default_pipeline_cache, 30527ec681f3Smrg &return_key, sizeof(return_key), 30537ec681f3Smrg &cache_hit); 30547ec681f3Smrg if (device->rt_trivial_return == NULL) { 30557ec681f3Smrg void *tmp_ctx = ralloc_context(NULL); 30567ec681f3Smrg nir_shader *trivial_return_nir = 30577ec681f3Smrg brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx); 30587ec681f3Smrg 30597ec681f3Smrg NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, &device->info); 30607ec681f3Smrg 30617ec681f3Smrg struct anv_pipeline_bind_map bind_map = { 30627ec681f3Smrg .surface_count = 0, 30637ec681f3Smrg .sampler_count = 0, 30647ec681f3Smrg }; 30657ec681f3Smrg struct brw_bs_prog_data return_prog_data = { 0, }; 30667ec681f3Smrg const unsigned *return_data = 30677ec681f3Smrg brw_compile_bs(device->physical->compiler, device, tmp_ctx, 30687ec681f3Smrg &return_key.key, &return_prog_data, trivial_return_nir, 30697ec681f3Smrg 0, 0, NULL, NULL); 30707ec681f3Smrg 30717ec681f3Smrg device->rt_trivial_return = 30727ec681f3Smrg anv_device_upload_kernel(device, &device->default_pipeline_cache, 30737ec681f3Smrg MESA_SHADER_CALLABLE, 30747ec681f3Smrg &return_key, sizeof(return_key), 30757ec681f3Smrg return_data, return_prog_data.base.program_size, 30767ec681f3Smrg &return_prog_data.base, sizeof(return_prog_data), 30777ec681f3Smrg NULL, 0, NULL, &bind_map); 30787ec681f3Smrg 30797ec681f3Smrg ralloc_free(tmp_ctx); 30807ec681f3Smrg 30817ec681f3Smrg if (device->rt_trivial_return == NULL) { 30827ec681f3Smrg anv_shader_bin_unref(device, device->rt_trampoline); 30837ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 30847ec681f3Smrg } 30857ec681f3Smrg } 30867ec681f3Smrg 30877ec681f3Smrg return VK_SUCCESS; 30887ec681f3Smrg} 30897ec681f3Smrg 30907ec681f3Smrgvoid 30917ec681f3Smrganv_device_finish_rt_shaders(struct anv_device *device) 30927ec681f3Smrg{ 30937ec681f3Smrg if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline) 30947ec681f3Smrg return; 30957ec681f3Smrg 30967ec681f3Smrg anv_shader_bin_unref(device, device->rt_trampoline); 30977ec681f3Smrg} 30987ec681f3Smrg 30997ec681f3SmrgVkResult 31007ec681f3Smrganv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline, 31017ec681f3Smrg struct anv_device *device, 31027ec681f3Smrg struct anv_pipeline_cache *cache, 31037ec681f3Smrg const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 31047ec681f3Smrg const VkAllocationCallbacks *alloc) 31057ec681f3Smrg{ 31067ec681f3Smrg VkResult result; 31077ec681f3Smrg 31087ec681f3Smrg util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx); 31097ec681f3Smrg 31107ec681f3Smrg result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo); 31117ec681f3Smrg if (result != VK_SUCCESS) 31127ec681f3Smrg goto fail; 31137ec681f3Smrg 31147ec681f3Smrg anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false); 31157ec681f3Smrg 31167ec681f3Smrg return VK_SUCCESS; 31177ec681f3Smrg 31187ec681f3Smrgfail: 31197ec681f3Smrg util_dynarray_foreach(&pipeline->shaders, 31207ec681f3Smrg struct anv_shader_bin *, shader) { 31217ec681f3Smrg anv_shader_bin_unref(device, *shader); 31227ec681f3Smrg } 31237ec681f3Smrg return result; 31247ec681f3Smrg} 31257ec681f3Smrg 31267ec681f3Smrg#define WRITE_STR(field, ...) ({ \ 31277ec681f3Smrg memset(field, 0, sizeof(field)); \ 31287ec681f3Smrg UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \ 31297ec681f3Smrg assert(i > 0 && i < sizeof(field)); \ 31307ec681f3Smrg}) 31317ec681f3Smrg 31327ec681f3SmrgVkResult anv_GetPipelineExecutablePropertiesKHR( 31337ec681f3Smrg VkDevice device, 31347ec681f3Smrg const VkPipelineInfoKHR* pPipelineInfo, 31357ec681f3Smrg uint32_t* pExecutableCount, 31367ec681f3Smrg VkPipelineExecutablePropertiesKHR* pProperties) 31377ec681f3Smrg{ 31387ec681f3Smrg ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline); 31397ec681f3Smrg VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount); 31407ec681f3Smrg 31417ec681f3Smrg util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) { 31427ec681f3Smrg vk_outarray_append(&out, props) { 31437ec681f3Smrg gl_shader_stage stage = exe->stage; 31447ec681f3Smrg props->stages = mesa_to_vk_shader_stage(stage); 31457ec681f3Smrg 31467ec681f3Smrg unsigned simd_width = exe->stats.dispatch_width; 31477ec681f3Smrg if (stage == MESA_SHADER_FRAGMENT) { 31487ec681f3Smrg WRITE_STR(props->name, "%s%d %s", 31497ec681f3Smrg simd_width ? "SIMD" : "vec", 31507ec681f3Smrg simd_width ? simd_width : 4, 31517ec681f3Smrg _mesa_shader_stage_to_string(stage)); 31527ec681f3Smrg } else { 31537ec681f3Smrg WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage)); 31547ec681f3Smrg } 31557ec681f3Smrg WRITE_STR(props->description, "%s%d %s shader", 31567ec681f3Smrg simd_width ? "SIMD" : "vec", 31577ec681f3Smrg simd_width ? simd_width : 4, 31587ec681f3Smrg _mesa_shader_stage_to_string(stage)); 31597ec681f3Smrg 31607ec681f3Smrg /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan 31617ec681f3Smrg * wants a subgroup size of 1. 31627ec681f3Smrg */ 31637ec681f3Smrg props->subgroupSize = MAX2(simd_width, 1); 31647ec681f3Smrg } 31657ec681f3Smrg } 31667ec681f3Smrg 31677ec681f3Smrg return vk_outarray_status(&out); 31687ec681f3Smrg} 31697ec681f3Smrg 31707ec681f3Smrgstatic const struct anv_pipeline_executable * 31717ec681f3Smrganv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index) 31727ec681f3Smrg{ 31737ec681f3Smrg assert(index < util_dynarray_num_elements(&pipeline->executables, 31747ec681f3Smrg struct anv_pipeline_executable)); 31757ec681f3Smrg return util_dynarray_element( 31767ec681f3Smrg &pipeline->executables, struct anv_pipeline_executable, index); 31777ec681f3Smrg} 31787ec681f3Smrg 31797ec681f3SmrgVkResult anv_GetPipelineExecutableStatisticsKHR( 31807ec681f3Smrg VkDevice device, 31817ec681f3Smrg const VkPipelineExecutableInfoKHR* pExecutableInfo, 31827ec681f3Smrg uint32_t* pStatisticCount, 31837ec681f3Smrg VkPipelineExecutableStatisticKHR* pStatistics) 31847ec681f3Smrg{ 31857ec681f3Smrg ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline); 31867ec681f3Smrg VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount); 31877ec681f3Smrg 31887ec681f3Smrg const struct anv_pipeline_executable *exe = 31897ec681f3Smrg anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); 31907ec681f3Smrg 31917ec681f3Smrg const struct brw_stage_prog_data *prog_data; 31927ec681f3Smrg switch (pipeline->type) { 31937ec681f3Smrg case ANV_PIPELINE_GRAPHICS: { 31947ec681f3Smrg prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data; 31957ec681f3Smrg break; 31967ec681f3Smrg } 31977ec681f3Smrg case ANV_PIPELINE_COMPUTE: { 31987ec681f3Smrg prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data; 31997ec681f3Smrg break; 32007ec681f3Smrg } 32017ec681f3Smrg default: 32027ec681f3Smrg unreachable("invalid pipeline type"); 32037ec681f3Smrg } 32047ec681f3Smrg 32057ec681f3Smrg vk_outarray_append(&out, stat) { 32067ec681f3Smrg WRITE_STR(stat->name, "Instruction Count"); 32077ec681f3Smrg WRITE_STR(stat->description, 32087ec681f3Smrg "Number of GEN instructions in the final generated " 32097ec681f3Smrg "shader executable."); 32107ec681f3Smrg stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 32117ec681f3Smrg stat->value.u64 = exe->stats.instructions; 32127ec681f3Smrg } 32137ec681f3Smrg 32147ec681f3Smrg vk_outarray_append(&out, stat) { 32157ec681f3Smrg WRITE_STR(stat->name, "SEND Count"); 32167ec681f3Smrg WRITE_STR(stat->description, 32177ec681f3Smrg "Number of instructions in the final generated shader " 32187ec681f3Smrg "executable which access external units such as the " 32197ec681f3Smrg "constant cache or the sampler."); 32207ec681f3Smrg stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 32217ec681f3Smrg stat->value.u64 = exe->stats.sends; 32227ec681f3Smrg } 32237ec681f3Smrg 32247ec681f3Smrg vk_outarray_append(&out, stat) { 32257ec681f3Smrg WRITE_STR(stat->name, "Loop Count"); 32267ec681f3Smrg WRITE_STR(stat->description, 32277ec681f3Smrg "Number of loops (not unrolled) in the final generated " 32287ec681f3Smrg "shader executable."); 32297ec681f3Smrg stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 32307ec681f3Smrg stat->value.u64 = exe->stats.loops; 32317ec681f3Smrg } 32327ec681f3Smrg 32337ec681f3Smrg vk_outarray_append(&out, stat) { 32347ec681f3Smrg WRITE_STR(stat->name, "Cycle Count"); 32357ec681f3Smrg WRITE_STR(stat->description, 32367ec681f3Smrg "Estimate of the number of EU cycles required to execute " 32377ec681f3Smrg "the final generated executable. This is an estimate only " 32387ec681f3Smrg "and may vary greatly from actual run-time performance."); 32397ec681f3Smrg stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 32407ec681f3Smrg stat->value.u64 = exe->stats.cycles; 32417ec681f3Smrg } 32427ec681f3Smrg 32437ec681f3Smrg vk_outarray_append(&out, stat) { 32447ec681f3Smrg WRITE_STR(stat->name, "Spill Count"); 32457ec681f3Smrg WRITE_STR(stat->description, 32467ec681f3Smrg "Number of scratch spill operations. This gives a rough " 32477ec681f3Smrg "estimate of the cost incurred due to spilling temporary " 32487ec681f3Smrg "values to memory. If this is non-zero, you may want to " 32497ec681f3Smrg "adjust your shader to reduce register pressure."); 32507ec681f3Smrg stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 32517ec681f3Smrg stat->value.u64 = exe->stats.spills; 32527ec681f3Smrg } 32537ec681f3Smrg 32547ec681f3Smrg vk_outarray_append(&out, stat) { 32557ec681f3Smrg WRITE_STR(stat->name, "Fill Count"); 32567ec681f3Smrg WRITE_STR(stat->description, 32577ec681f3Smrg "Number of scratch fill operations. This gives a rough " 32587ec681f3Smrg "estimate of the cost incurred due to spilling temporary " 32597ec681f3Smrg "values to memory. If this is non-zero, you may want to " 32607ec681f3Smrg "adjust your shader to reduce register pressure."); 32617ec681f3Smrg stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 32627ec681f3Smrg stat->value.u64 = exe->stats.fills; 32637ec681f3Smrg } 32647ec681f3Smrg 32657ec681f3Smrg vk_outarray_append(&out, stat) { 32667ec681f3Smrg WRITE_STR(stat->name, "Scratch Memory Size"); 32677ec681f3Smrg WRITE_STR(stat->description, 32687ec681f3Smrg "Number of bytes of scratch memory required by the " 32697ec681f3Smrg "generated shader executable. If this is non-zero, you " 32707ec681f3Smrg "may want to adjust your shader to reduce register " 32717ec681f3Smrg "pressure."); 32727ec681f3Smrg stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 32737ec681f3Smrg stat->value.u64 = prog_data->total_scratch; 32747ec681f3Smrg } 32757ec681f3Smrg 32767ec681f3Smrg if (gl_shader_stage_uses_workgroup(exe->stage)) { 32777ec681f3Smrg vk_outarray_append(&out, stat) { 32787ec681f3Smrg WRITE_STR(stat->name, "Workgroup Memory Size"); 32797ec681f3Smrg WRITE_STR(stat->description, 32807ec681f3Smrg "Number of bytes of workgroup shared memory used by this " 32817ec681f3Smrg "shader including any padding."); 32827ec681f3Smrg stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 32837ec681f3Smrg stat->value.u64 = prog_data->total_shared; 32847ec681f3Smrg } 32857ec681f3Smrg } 32867ec681f3Smrg 32877ec681f3Smrg return vk_outarray_status(&out); 32887ec681f3Smrg} 32897ec681f3Smrg 32907ec681f3Smrgstatic bool 32917ec681f3Smrgwrite_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir, 32927ec681f3Smrg const char *data) 32937ec681f3Smrg{ 32947ec681f3Smrg ir->isText = VK_TRUE; 32957ec681f3Smrg 32967ec681f3Smrg size_t data_len = strlen(data) + 1; 32977ec681f3Smrg 32987ec681f3Smrg if (ir->pData == NULL) { 32997ec681f3Smrg ir->dataSize = data_len; 33007ec681f3Smrg return true; 33017ec681f3Smrg } 33027ec681f3Smrg 33037ec681f3Smrg strncpy(ir->pData, data, ir->dataSize); 33047ec681f3Smrg if (ir->dataSize < data_len) 33057ec681f3Smrg return false; 33067ec681f3Smrg 33077ec681f3Smrg ir->dataSize = data_len; 33087ec681f3Smrg return true; 33097ec681f3Smrg} 33107ec681f3Smrg 33117ec681f3SmrgVkResult anv_GetPipelineExecutableInternalRepresentationsKHR( 33127ec681f3Smrg VkDevice device, 33137ec681f3Smrg const VkPipelineExecutableInfoKHR* pExecutableInfo, 33147ec681f3Smrg uint32_t* pInternalRepresentationCount, 33157ec681f3Smrg VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations) 33167ec681f3Smrg{ 33177ec681f3Smrg ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline); 33187ec681f3Smrg VK_OUTARRAY_MAKE(out, pInternalRepresentations, 33197ec681f3Smrg pInternalRepresentationCount); 33207ec681f3Smrg bool incomplete_text = false; 33217ec681f3Smrg 33227ec681f3Smrg const struct anv_pipeline_executable *exe = 33237ec681f3Smrg anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); 33247ec681f3Smrg 33257ec681f3Smrg if (exe->nir) { 33267ec681f3Smrg vk_outarray_append(&out, ir) { 33277ec681f3Smrg WRITE_STR(ir->name, "Final NIR"); 33287ec681f3Smrg WRITE_STR(ir->description, 33297ec681f3Smrg "Final NIR before going into the back-end compiler"); 33307ec681f3Smrg 33317ec681f3Smrg if (!write_ir_text(ir, exe->nir)) 33327ec681f3Smrg incomplete_text = true; 33337ec681f3Smrg } 33347ec681f3Smrg } 33357ec681f3Smrg 33367ec681f3Smrg if (exe->disasm) { 33377ec681f3Smrg vk_outarray_append(&out, ir) { 33387ec681f3Smrg WRITE_STR(ir->name, "GEN Assembly"); 33397ec681f3Smrg WRITE_STR(ir->description, 33407ec681f3Smrg "Final GEN assembly for the generated shader binary"); 33417ec681f3Smrg 33427ec681f3Smrg if (!write_ir_text(ir, exe->disasm)) 33437ec681f3Smrg incomplete_text = true; 33447ec681f3Smrg } 33457ec681f3Smrg } 33467ec681f3Smrg 33477ec681f3Smrg return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out); 33487ec681f3Smrg} 33497ec681f3Smrg 33507ec681f3SmrgVkResult 33517ec681f3Smrganv_GetRayTracingShaderGroupHandlesKHR( 33527ec681f3Smrg VkDevice _device, 33537ec681f3Smrg VkPipeline _pipeline, 33547ec681f3Smrg uint32_t firstGroup, 33557ec681f3Smrg uint32_t groupCount, 33567ec681f3Smrg size_t dataSize, 33577ec681f3Smrg void* pData) 33587ec681f3Smrg{ 33597ec681f3Smrg ANV_FROM_HANDLE(anv_device, device, _device); 33607ec681f3Smrg ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); 33617ec681f3Smrg 33627ec681f3Smrg if (pipeline->type != ANV_PIPELINE_RAY_TRACING) 33637ec681f3Smrg return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); 33647ec681f3Smrg 33657ec681f3Smrg struct anv_ray_tracing_pipeline *rt_pipeline = 33667ec681f3Smrg anv_pipeline_to_ray_tracing(pipeline); 33677ec681f3Smrg 33687ec681f3Smrg for (uint32_t i = 0; i < groupCount; i++) { 33697ec681f3Smrg struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i]; 33707ec681f3Smrg memcpy(pData, group->handle, sizeof(group->handle)); 33717ec681f3Smrg pData += sizeof(group->handle); 33727ec681f3Smrg } 33737ec681f3Smrg 33747ec681f3Smrg return VK_SUCCESS; 33757ec681f3Smrg} 33767ec681f3Smrg 33777ec681f3SmrgVkResult 33787ec681f3Smrganv_GetRayTracingCaptureReplayShaderGroupHandlesKHR( 33797ec681f3Smrg VkDevice _device, 33807ec681f3Smrg VkPipeline pipeline, 33817ec681f3Smrg uint32_t firstGroup, 33827ec681f3Smrg uint32_t groupCount, 33837ec681f3Smrg size_t dataSize, 33847ec681f3Smrg void* pData) 33857ec681f3Smrg{ 33867ec681f3Smrg ANV_FROM_HANDLE(anv_device, device, _device); 33877ec681f3Smrg unreachable("Unimplemented"); 33887ec681f3Smrg return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT); 33897ec681f3Smrg} 33907ec681f3Smrg 33917ec681f3SmrgVkDeviceSize 33927ec681f3Smrganv_GetRayTracingShaderGroupStackSizeKHR( 33937ec681f3Smrg VkDevice device, 33947ec681f3Smrg VkPipeline _pipeline, 33957ec681f3Smrg uint32_t group, 33967ec681f3Smrg VkShaderGroupShaderKHR groupShader) 33977ec681f3Smrg{ 33987ec681f3Smrg ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); 33997ec681f3Smrg assert(pipeline->type == ANV_PIPELINE_RAY_TRACING); 34007ec681f3Smrg 34017ec681f3Smrg struct anv_ray_tracing_pipeline *rt_pipeline = 34027ec681f3Smrg anv_pipeline_to_ray_tracing(pipeline); 34037ec681f3Smrg 34047ec681f3Smrg assert(group < rt_pipeline->group_count); 34057ec681f3Smrg 34067ec681f3Smrg struct anv_shader_bin *bin; 34077ec681f3Smrg switch (groupShader) { 34087ec681f3Smrg case VK_SHADER_GROUP_SHADER_GENERAL_KHR: 34097ec681f3Smrg bin = rt_pipeline->groups[group].general; 34107ec681f3Smrg break; 34117ec681f3Smrg 34127ec681f3Smrg case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR: 34137ec681f3Smrg bin = rt_pipeline->groups[group].closest_hit; 34147ec681f3Smrg break; 34157ec681f3Smrg 34167ec681f3Smrg case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR: 34177ec681f3Smrg bin = rt_pipeline->groups[group].any_hit; 34187ec681f3Smrg break; 34197ec681f3Smrg 34207ec681f3Smrg case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR: 34217ec681f3Smrg bin = rt_pipeline->groups[group].intersection; 34227ec681f3Smrg break; 34237ec681f3Smrg 34247ec681f3Smrg default: 34257ec681f3Smrg unreachable("Invalid VkShaderGroupShader enum"); 34267ec681f3Smrg } 34277ec681f3Smrg 34287ec681f3Smrg if (bin == NULL) 34297ec681f3Smrg return 0; 34307ec681f3Smrg 34317ec681f3Smrg return brw_bs_prog_data_const(bin->prog_data)->max_stack_size; 34327ec681f3Smrg} 3433