101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2016 Red Hat. 301e04c3fSmrg * Copyright © 2016 Bas Nieuwenhuizen 401e04c3fSmrg * 501e04c3fSmrg * based in part on anv driver which is: 601e04c3fSmrg * Copyright © 2015 Intel Corporation 701e04c3fSmrg * 801e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 901e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 1001e04c3fSmrg * to deal in the Software without restriction, including without limitation 1101e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 1201e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 1301e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1401e04c3fSmrg * 1501e04c3fSmrg * The above copyright notice and this permission notice (including the next 1601e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1701e04c3fSmrg * Software. 1801e04c3fSmrg * 1901e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 2001e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 2101e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 2201e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2301e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2401e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2501e04c3fSmrg * IN THE SOFTWARE. 2601e04c3fSmrg */ 2701e04c3fSmrg 287ec681f3Smrg#include "nir/nir.h" 297ec681f3Smrg#include "nir/nir_builder.h" 307ec681f3Smrg#include "nir/nir_xfb_info.h" 317ec681f3Smrg#include "spirv/nir_spirv.h" 327ec681f3Smrg#include "util/disk_cache.h" 3301e04c3fSmrg#include "util/mesa-sha1.h" 3401e04c3fSmrg#include "util/u_atomic.h" 357ec681f3Smrg#include "radv_cs.h" 3601e04c3fSmrg#include "radv_debug.h" 3701e04c3fSmrg#include "radv_private.h" 3801e04c3fSmrg#include "radv_shader.h" 3901e04c3fSmrg#include "vk_util.h" 4001e04c3fSmrg 4101e04c3fSmrg#include "util/debug.h" 427ec681f3Smrg#include "ac_binary.h" 4301e04c3fSmrg#include "ac_exp_param.h" 447ec681f3Smrg#include "ac_nir.h" 4501e04c3fSmrg#include "ac_shader_util.h" 467ec681f3Smrg#include "aco_interface.h" 477ec681f3Smrg#include "sid.h" 487ec681f3Smrg#include "vk_format.h" 4901e04c3fSmrg 5001e04c3fSmrgstruct radv_blend_state { 517ec681f3Smrg uint32_t blend_enable_4bit; 527ec681f3Smrg uint32_t need_src_alpha; 5301e04c3fSmrg 547ec681f3Smrg uint32_t cb_target_mask; 557ec681f3Smrg uint32_t cb_target_enabled_4bit; 567ec681f3Smrg uint32_t sx_mrt_blend_opt[8]; 577ec681f3Smrg uint32_t cb_blend_control[8]; 5801e04c3fSmrg 597ec681f3Smrg uint32_t spi_shader_col_format; 607ec681f3Smrg uint32_t col_format_is_int8; 617ec681f3Smrg uint32_t col_format_is_int10; 627ec681f3Smrg uint32_t cb_shader_mask; 637ec681f3Smrg uint32_t db_alpha_to_mask; 6401e04c3fSmrg 657ec681f3Smrg uint32_t commutative_4bit; 6601e04c3fSmrg 677ec681f3Smrg bool single_cb_enable; 687ec681f3Smrg bool mrt0_is_dual_src; 6901e04c3fSmrg}; 7001e04c3fSmrg 7101e04c3fSmrgstruct radv_dsa_order_invariance { 727ec681f3Smrg /* Whether the final result in Z/S buffers is guaranteed to be 737ec681f3Smrg * invariant under changes to the order in which fragments arrive. 747ec681f3Smrg */ 757ec681f3Smrg bool zs; 767ec681f3Smrg 777ec681f3Smrg /* Whether the set of fragments that pass the combined Z/S test is 787ec681f3Smrg * guaranteed to be invariant under changes to the order in which 797ec681f3Smrg * fragments arrive. 807ec681f3Smrg */ 817ec681f3Smrg bool pass_set; 8201e04c3fSmrg}; 8301e04c3fSmrg 847ec681f3Smrgstatic bool 857ec681f3Smrgradv_is_state_dynamic(const VkGraphicsPipelineCreateInfo *pCreateInfo, VkDynamicState state) 867ec681f3Smrg{ 877ec681f3Smrg if (pCreateInfo->pDynamicState) { 887ec681f3Smrg uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; 897ec681f3Smrg for (uint32_t i = 0; i < count; i++) { 907ec681f3Smrg if (pCreateInfo->pDynamicState->pDynamicStates[i] == state) 917ec681f3Smrg return true; 927ec681f3Smrg } 937ec681f3Smrg } 947ec681f3Smrg 957ec681f3Smrg return false; 967ec681f3Smrg} 9701e04c3fSmrg 987ec681f3Smrgstatic const VkPipelineMultisampleStateCreateInfo * 997ec681f3Smrgradv_pipeline_get_multisample_state(const VkGraphicsPipelineCreateInfo *pCreateInfo) 1007ec681f3Smrg{ 1017ec681f3Smrg if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable || 1027ec681f3Smrg radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT)) 1037ec681f3Smrg return pCreateInfo->pMultisampleState; 1047ec681f3Smrg return NULL; 1057ec681f3Smrg} 10601e04c3fSmrg 1077ec681f3Smrgstatic const VkPipelineTessellationStateCreateInfo * 1087ec681f3Smrgradv_pipeline_get_tessellation_state(const VkGraphicsPipelineCreateInfo *pCreateInfo) 10901e04c3fSmrg{ 1107ec681f3Smrg for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { 1117ec681f3Smrg if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT || 1127ec681f3Smrg pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) { 1137ec681f3Smrg return pCreateInfo->pTessellationState; 1147ec681f3Smrg } 1157ec681f3Smrg } 1167ec681f3Smrg return NULL; 1177ec681f3Smrg} 1187ec681f3Smrg 1197ec681f3Smrgstatic const VkPipelineDepthStencilStateCreateInfo * 1207ec681f3Smrgradv_pipeline_get_depth_stencil_state(const VkGraphicsPipelineCreateInfo *pCreateInfo) 1217ec681f3Smrg{ 1227ec681f3Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 1237ec681f3Smrg struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; 1247ec681f3Smrg 1257ec681f3Smrg if ((!pCreateInfo->pRasterizationState->rasterizerDiscardEnable && 1267ec681f3Smrg subpass->depth_stencil_attachment) || 1277ec681f3Smrg radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT)) 1287ec681f3Smrg return pCreateInfo->pDepthStencilState; 1297ec681f3Smrg return NULL; 1307ec681f3Smrg} 13101e04c3fSmrg 1327ec681f3Smrgstatic const VkPipelineColorBlendStateCreateInfo * 1337ec681f3Smrgradv_pipeline_get_color_blend_state(const VkGraphicsPipelineCreateInfo *pCreateInfo) 1347ec681f3Smrg{ 1357ec681f3Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 1367ec681f3Smrg struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; 13701e04c3fSmrg 1387ec681f3Smrg if ((!pCreateInfo->pRasterizationState->rasterizerDiscardEnable && subpass->has_color_att) || 1397ec681f3Smrg radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT)) 1407ec681f3Smrg return pCreateInfo->pColorBlendState; 1417ec681f3Smrg return NULL; 14201e04c3fSmrg} 14301e04c3fSmrg 1447ec681f3Smrgstatic bool 1457ec681f3Smrgradv_pipeline_has_ngg(const struct radv_pipeline *pipeline) 14601e04c3fSmrg{ 1477ec681f3Smrg if (pipeline->graphics.last_vgt_api_stage == MESA_SHADER_NONE) 1487ec681f3Smrg return false; 14901e04c3fSmrg 1507ec681f3Smrg struct radv_shader_variant *variant = 1517ec681f3Smrg pipeline->shaders[pipeline->graphics.last_vgt_api_stage]; 15201e04c3fSmrg 1537ec681f3Smrg return variant->info.is_ngg; 15401e04c3fSmrg} 15501e04c3fSmrg 1567ec681f3Smrgbool 1577ec681f3Smrgradv_pipeline_has_ngg_passthrough(const struct radv_pipeline *pipeline) 15801e04c3fSmrg{ 1597ec681f3Smrg if (pipeline->graphics.last_vgt_api_stage == MESA_SHADER_NONE) 1607ec681f3Smrg return false; 16101e04c3fSmrg 1627ec681f3Smrg assert(radv_pipeline_has_ngg(pipeline)); 1637ec681f3Smrg 1647ec681f3Smrg struct radv_shader_variant *variant = 1657ec681f3Smrg pipeline->shaders[pipeline->graphics.last_vgt_api_stage]; 1667ec681f3Smrg 1677ec681f3Smrg return variant->info.is_ngg_passthrough; 16801e04c3fSmrg} 16901e04c3fSmrg 1707ec681f3Smrgbool 1717ec681f3Smrgradv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline) 1727ec681f3Smrg{ 1737ec681f3Smrg return !!pipeline->gs_copy_shader; 1747ec681f3Smrg} 1757ec681f3Smrg 1767ec681f3Smrgvoid 1777ec681f3Smrgradv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline, 1787ec681f3Smrg const VkAllocationCallbacks *allocator) 1797ec681f3Smrg{ 1807ec681f3Smrg if (pipeline->type == RADV_PIPELINE_COMPUTE) { 1817ec681f3Smrg free(pipeline->compute.rt_group_handles); 1827ec681f3Smrg free(pipeline->compute.rt_stack_sizes); 1837ec681f3Smrg } else if (pipeline->type == RADV_PIPELINE_LIBRARY) { 1847ec681f3Smrg free(pipeline->library.groups); 1857ec681f3Smrg free(pipeline->library.stages); 1867ec681f3Smrg } 1877ec681f3Smrg 1887ec681f3Smrg for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) 1897ec681f3Smrg if (pipeline->shaders[i]) 1907ec681f3Smrg radv_shader_variant_destroy(device, pipeline->shaders[i]); 1917ec681f3Smrg 1927ec681f3Smrg if (pipeline->gs_copy_shader) 1937ec681f3Smrg radv_shader_variant_destroy(device, pipeline->gs_copy_shader); 1947ec681f3Smrg 1957ec681f3Smrg if (pipeline->cs.buf) 1967ec681f3Smrg free(pipeline->cs.buf); 1977ec681f3Smrg 1987ec681f3Smrg vk_object_base_finish(&pipeline->base); 1997ec681f3Smrg vk_free2(&device->vk.alloc, allocator, pipeline); 2007ec681f3Smrg} 2017ec681f3Smrg 2027ec681f3Smrgvoid 2037ec681f3Smrgradv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, 2047ec681f3Smrg const VkAllocationCallbacks *pAllocator) 2057ec681f3Smrg{ 2067ec681f3Smrg RADV_FROM_HANDLE(radv_device, device, _device); 2077ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); 2087ec681f3Smrg 2097ec681f3Smrg if (!_pipeline) 2107ec681f3Smrg return; 2117ec681f3Smrg 2127ec681f3Smrg radv_pipeline_destroy(device, pipeline, pAllocator); 2137ec681f3Smrg} 2147ec681f3Smrg 2157ec681f3Smrguint32_t 2167ec681f3Smrgradv_get_hash_flags(const struct radv_device *device, bool stats) 2177ec681f3Smrg{ 2187ec681f3Smrg uint32_t hash_flags = 0; 2197ec681f3Smrg 2207ec681f3Smrg if (device->physical_device->use_ngg_culling) 2217ec681f3Smrg hash_flags |= RADV_HASH_SHADER_USE_NGG_CULLING; 2227ec681f3Smrg if (device->instance->perftest_flags & RADV_PERFTEST_FORCE_EMULATE_RT) 2237ec681f3Smrg hash_flags |= RADV_HASH_SHADER_FORCE_EMULATE_RT; 2247ec681f3Smrg if (device->physical_device->cs_wave_size == 32) 2257ec681f3Smrg hash_flags |= RADV_HASH_SHADER_CS_WAVE32; 2267ec681f3Smrg if (device->physical_device->ps_wave_size == 32) 2277ec681f3Smrg hash_flags |= RADV_HASH_SHADER_PS_WAVE32; 2287ec681f3Smrg if (device->physical_device->ge_wave_size == 32) 2297ec681f3Smrg hash_flags |= RADV_HASH_SHADER_GE_WAVE32; 2307ec681f3Smrg if (device->physical_device->use_llvm) 2317ec681f3Smrg hash_flags |= RADV_HASH_SHADER_LLVM; 2327ec681f3Smrg if (stats) 2337ec681f3Smrg hash_flags |= RADV_HASH_SHADER_KEEP_STATISTICS; 2347ec681f3Smrg if (device->robust_buffer_access) /* forces per-attribute vertex descriptors */ 2357ec681f3Smrg hash_flags |= RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS; 2367ec681f3Smrg if (device->robust_buffer_access2) /* affects load/store vectorizer */ 2377ec681f3Smrg hash_flags |= RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2; 2387ec681f3Smrg return hash_flags; 2397ec681f3Smrg} 2407ec681f3Smrg 2417ec681f3Smrgstatic void 2427ec681f3Smrgradv_pipeline_init_scratch(const struct radv_device *device, struct radv_pipeline *pipeline) 2437ec681f3Smrg{ 2447ec681f3Smrg unsigned scratch_bytes_per_wave = 0; 2457ec681f3Smrg unsigned max_waves = 0; 2467ec681f3Smrg 2477ec681f3Smrg for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 2487ec681f3Smrg if (pipeline->shaders[i] && pipeline->shaders[i]->config.scratch_bytes_per_wave) { 2497ec681f3Smrg unsigned max_stage_waves = device->scratch_waves; 2507ec681f3Smrg 2517ec681f3Smrg scratch_bytes_per_wave = 2527ec681f3Smrg MAX2(scratch_bytes_per_wave, pipeline->shaders[i]->config.scratch_bytes_per_wave); 2537ec681f3Smrg 2547ec681f3Smrg max_stage_waves = 2557ec681f3Smrg MIN2(max_stage_waves, 4 * device->physical_device->rad_info.num_good_compute_units * 2567ec681f3Smrg radv_get_max_waves(device, pipeline->shaders[i], i)); 2577ec681f3Smrg max_waves = MAX2(max_waves, max_stage_waves); 2587ec681f3Smrg } 2597ec681f3Smrg } 2607ec681f3Smrg 2617ec681f3Smrg pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave; 2627ec681f3Smrg pipeline->max_waves = max_waves; 2637ec681f3Smrg} 2647ec681f3Smrg 2657ec681f3Smrgstatic uint32_t 2667ec681f3Smrgsi_translate_blend_function(VkBlendOp op) 2677ec681f3Smrg{ 2687ec681f3Smrg switch (op) { 2697ec681f3Smrg case VK_BLEND_OP_ADD: 2707ec681f3Smrg return V_028780_COMB_DST_PLUS_SRC; 2717ec681f3Smrg case VK_BLEND_OP_SUBTRACT: 2727ec681f3Smrg return V_028780_COMB_SRC_MINUS_DST; 2737ec681f3Smrg case VK_BLEND_OP_REVERSE_SUBTRACT: 2747ec681f3Smrg return V_028780_COMB_DST_MINUS_SRC; 2757ec681f3Smrg case VK_BLEND_OP_MIN: 2767ec681f3Smrg return V_028780_COMB_MIN_DST_SRC; 2777ec681f3Smrg case VK_BLEND_OP_MAX: 2787ec681f3Smrg return V_028780_COMB_MAX_DST_SRC; 2797ec681f3Smrg default: 2807ec681f3Smrg return 0; 2817ec681f3Smrg } 2827ec681f3Smrg} 2837ec681f3Smrg 2847ec681f3Smrgstatic uint32_t 2857ec681f3Smrgsi_translate_blend_factor(VkBlendFactor factor) 2867ec681f3Smrg{ 2877ec681f3Smrg switch (factor) { 2887ec681f3Smrg case VK_BLEND_FACTOR_ZERO: 2897ec681f3Smrg return V_028780_BLEND_ZERO; 2907ec681f3Smrg case VK_BLEND_FACTOR_ONE: 2917ec681f3Smrg return V_028780_BLEND_ONE; 2927ec681f3Smrg case VK_BLEND_FACTOR_SRC_COLOR: 2937ec681f3Smrg return V_028780_BLEND_SRC_COLOR; 2947ec681f3Smrg case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: 2957ec681f3Smrg return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 2967ec681f3Smrg case VK_BLEND_FACTOR_DST_COLOR: 2977ec681f3Smrg return V_028780_BLEND_DST_COLOR; 2987ec681f3Smrg case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: 2997ec681f3Smrg return V_028780_BLEND_ONE_MINUS_DST_COLOR; 3007ec681f3Smrg case VK_BLEND_FACTOR_SRC_ALPHA: 3017ec681f3Smrg return V_028780_BLEND_SRC_ALPHA; 3027ec681f3Smrg case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: 3037ec681f3Smrg return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 3047ec681f3Smrg case VK_BLEND_FACTOR_DST_ALPHA: 3057ec681f3Smrg return V_028780_BLEND_DST_ALPHA; 3067ec681f3Smrg case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: 3077ec681f3Smrg return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 3087ec681f3Smrg case VK_BLEND_FACTOR_CONSTANT_COLOR: 3097ec681f3Smrg return V_028780_BLEND_CONSTANT_COLOR; 3107ec681f3Smrg case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: 3117ec681f3Smrg return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 3127ec681f3Smrg case VK_BLEND_FACTOR_CONSTANT_ALPHA: 3137ec681f3Smrg return V_028780_BLEND_CONSTANT_ALPHA; 3147ec681f3Smrg case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: 3157ec681f3Smrg return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 3167ec681f3Smrg case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: 3177ec681f3Smrg return V_028780_BLEND_SRC_ALPHA_SATURATE; 3187ec681f3Smrg case VK_BLEND_FACTOR_SRC1_COLOR: 3197ec681f3Smrg return V_028780_BLEND_SRC1_COLOR; 3207ec681f3Smrg case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: 3217ec681f3Smrg return V_028780_BLEND_INV_SRC1_COLOR; 3227ec681f3Smrg case VK_BLEND_FACTOR_SRC1_ALPHA: 3237ec681f3Smrg return V_028780_BLEND_SRC1_ALPHA; 3247ec681f3Smrg case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: 3257ec681f3Smrg return V_028780_BLEND_INV_SRC1_ALPHA; 3267ec681f3Smrg default: 3277ec681f3Smrg return 0; 3287ec681f3Smrg } 3297ec681f3Smrg} 3307ec681f3Smrg 3317ec681f3Smrgstatic uint32_t 3327ec681f3Smrgsi_translate_blend_opt_function(VkBlendOp op) 3337ec681f3Smrg{ 3347ec681f3Smrg switch (op) { 3357ec681f3Smrg case VK_BLEND_OP_ADD: 3367ec681f3Smrg return V_028760_OPT_COMB_ADD; 3377ec681f3Smrg case VK_BLEND_OP_SUBTRACT: 3387ec681f3Smrg return V_028760_OPT_COMB_SUBTRACT; 3397ec681f3Smrg case VK_BLEND_OP_REVERSE_SUBTRACT: 3407ec681f3Smrg return V_028760_OPT_COMB_REVSUBTRACT; 3417ec681f3Smrg case VK_BLEND_OP_MIN: 3427ec681f3Smrg return V_028760_OPT_COMB_MIN; 3437ec681f3Smrg case VK_BLEND_OP_MAX: 3447ec681f3Smrg return V_028760_OPT_COMB_MAX; 3457ec681f3Smrg default: 3467ec681f3Smrg return V_028760_OPT_COMB_BLEND_DISABLED; 3477ec681f3Smrg } 3487ec681f3Smrg} 3497ec681f3Smrg 3507ec681f3Smrgstatic uint32_t 3517ec681f3Smrgsi_translate_blend_opt_factor(VkBlendFactor factor, bool is_alpha) 3527ec681f3Smrg{ 3537ec681f3Smrg switch (factor) { 3547ec681f3Smrg case VK_BLEND_FACTOR_ZERO: 3557ec681f3Smrg return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 3567ec681f3Smrg case VK_BLEND_FACTOR_ONE: 3577ec681f3Smrg return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 3587ec681f3Smrg case VK_BLEND_FACTOR_SRC_COLOR: 3597ec681f3Smrg return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 3607ec681f3Smrg : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 3617ec681f3Smrg case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: 3627ec681f3Smrg return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 3637ec681f3Smrg : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 3647ec681f3Smrg case VK_BLEND_FACTOR_SRC_ALPHA: 3657ec681f3Smrg return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 3667ec681f3Smrg case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: 3677ec681f3Smrg return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 3687ec681f3Smrg case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: 3697ec681f3Smrg return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 3707ec681f3Smrg : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 3717ec681f3Smrg default: 3727ec681f3Smrg return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 3737ec681f3Smrg } 37401e04c3fSmrg} 37501e04c3fSmrg 37601e04c3fSmrg/** 37701e04c3fSmrg * Get rid of DST in the blend factors by commuting the operands: 37801e04c3fSmrg * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 37901e04c3fSmrg */ 3807ec681f3Smrgstatic void 3817ec681f3Smrgsi_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor *dst_factor, 3827ec681f3Smrg VkBlendFactor expected_dst, VkBlendFactor replacement_src) 3837ec681f3Smrg{ 3847ec681f3Smrg if (*src_factor == expected_dst && *dst_factor == VK_BLEND_FACTOR_ZERO) { 3857ec681f3Smrg *src_factor = VK_BLEND_FACTOR_ZERO; 3867ec681f3Smrg *dst_factor = replacement_src; 3877ec681f3Smrg 3887ec681f3Smrg /* Commuting the operands requires reversing subtractions. */ 3897ec681f3Smrg if (*func == VK_BLEND_OP_SUBTRACT) 3907ec681f3Smrg *func = VK_BLEND_OP_REVERSE_SUBTRACT; 3917ec681f3Smrg else if (*func == VK_BLEND_OP_REVERSE_SUBTRACT) 3927ec681f3Smrg *func = VK_BLEND_OP_SUBTRACT; 3937ec681f3Smrg } 39401e04c3fSmrg} 39501e04c3fSmrg 3967ec681f3Smrgstatic bool 3977ec681f3Smrgsi_blend_factor_uses_dst(VkBlendFactor factor) 3987ec681f3Smrg{ 3997ec681f3Smrg return factor == VK_BLEND_FACTOR_DST_COLOR || factor == VK_BLEND_FACTOR_DST_ALPHA || 4007ec681f3Smrg factor == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || 4017ec681f3Smrg factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA || 4027ec681f3Smrg factor == VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; 4037ec681f3Smrg} 4047ec681f3Smrg 4057ec681f3Smrgstatic bool 4067ec681f3Smrgis_dual_src(VkBlendFactor factor) 4077ec681f3Smrg{ 4087ec681f3Smrg switch (factor) { 4097ec681f3Smrg case VK_BLEND_FACTOR_SRC1_COLOR: 4107ec681f3Smrg case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: 4117ec681f3Smrg case VK_BLEND_FACTOR_SRC1_ALPHA: 4127ec681f3Smrg case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: 4137ec681f3Smrg return true; 4147ec681f3Smrg default: 4157ec681f3Smrg return false; 4167ec681f3Smrg } 4177ec681f3Smrg} 4187ec681f3Smrg 4197ec681f3Smrgstatic unsigned 4207ec681f3Smrgradv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, 4217ec681f3Smrg bool blend_enable, bool blend_need_alpha) 4227ec681f3Smrg{ 4237ec681f3Smrg const struct util_format_description *desc = vk_format_description(vk_format); 4247ec681f3Smrg bool use_rbplus = device->physical_device->rad_info.rbplus_allowed; 4257ec681f3Smrg struct ac_spi_color_formats formats = {0}; 4267ec681f3Smrg unsigned format, ntype, swap; 4277ec681f3Smrg 4287ec681f3Smrg format = radv_translate_colorformat(vk_format); 4297ec681f3Smrg ntype = radv_translate_color_numformat(vk_format, desc, 4307ec681f3Smrg vk_format_get_first_non_void_channel(vk_format)); 4317ec681f3Smrg swap = radv_translate_colorswap(vk_format, false); 4327ec681f3Smrg 4337ec681f3Smrg ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus, &formats); 4347ec681f3Smrg 4357ec681f3Smrg if (blend_enable && blend_need_alpha) 4367ec681f3Smrg return formats.blend_alpha; 4377ec681f3Smrg else if (blend_need_alpha) 4387ec681f3Smrg return formats.alpha; 4397ec681f3Smrg else if (blend_enable) 4407ec681f3Smrg return formats.blend; 4417ec681f3Smrg else 4427ec681f3Smrg return formats.normal; 44301e04c3fSmrg} 44401e04c3fSmrg 44501e04c3fSmrgstatic bool 44601e04c3fSmrgformat_is_int8(VkFormat format) 44701e04c3fSmrg{ 4487ec681f3Smrg const struct util_format_description *desc = vk_format_description(format); 4497ec681f3Smrg int channel = vk_format_get_first_non_void_channel(format); 45001e04c3fSmrg 4517ec681f3Smrg return channel >= 0 && desc->channel[channel].pure_integer && desc->channel[channel].size == 8; 45201e04c3fSmrg} 45301e04c3fSmrg 45401e04c3fSmrgstatic bool 45501e04c3fSmrgformat_is_int10(VkFormat format) 45601e04c3fSmrg{ 4577ec681f3Smrg const struct util_format_description *desc = vk_format_description(format); 4587ec681f3Smrg 4597ec681f3Smrg if (desc->nr_channels != 4) 4607ec681f3Smrg return false; 4617ec681f3Smrg for (unsigned i = 0; i < 4; i++) { 4627ec681f3Smrg if (desc->channel[i].pure_integer && desc->channel[i].size == 10) 4637ec681f3Smrg return true; 4647ec681f3Smrg } 4657ec681f3Smrg return false; 4667ec681f3Smrg} 46701e04c3fSmrg 4687ec681f3Smrgstatic void 4697ec681f3Smrgradv_pipeline_compute_spi_color_formats(const struct radv_pipeline *pipeline, 4707ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 4717ec681f3Smrg struct radv_blend_state *blend) 4727ec681f3Smrg{ 4737ec681f3Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 4747ec681f3Smrg struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; 4757ec681f3Smrg unsigned col_format = 0, is_int8 = 0, is_int10 = 0; 4767ec681f3Smrg unsigned num_targets; 4777ec681f3Smrg 4787ec681f3Smrg for (unsigned i = 0; i < (blend->single_cb_enable ? 1 : subpass->color_count); ++i) { 4797ec681f3Smrg unsigned cf; 4807ec681f3Smrg 4817ec681f3Smrg if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED || 4827ec681f3Smrg !(blend->cb_target_mask & (0xfu << (i * 4)))) { 4837ec681f3Smrg cf = V_028714_SPI_SHADER_ZERO; 4847ec681f3Smrg } else { 4857ec681f3Smrg struct radv_render_pass_attachment *attachment = 4867ec681f3Smrg pass->attachments + subpass->color_attachments[i].attachment; 4877ec681f3Smrg bool blend_enable = blend->blend_enable_4bit & (0xfu << (i * 4)); 4887ec681f3Smrg 4897ec681f3Smrg cf = radv_choose_spi_color_format(pipeline->device, attachment->format, blend_enable, 4907ec681f3Smrg blend->need_src_alpha & (1 << i)); 4917ec681f3Smrg 4927ec681f3Smrg if (format_is_int8(attachment->format)) 4937ec681f3Smrg is_int8 |= 1 << i; 4947ec681f3Smrg if (format_is_int10(attachment->format)) 4957ec681f3Smrg is_int10 |= 1 << i; 4967ec681f3Smrg } 4977ec681f3Smrg 4987ec681f3Smrg col_format |= cf << (4 * i); 4997ec681f3Smrg } 5007ec681f3Smrg 5017ec681f3Smrg if (!(col_format & 0xf) && blend->need_src_alpha & (1 << 0)) { 5027ec681f3Smrg /* When a subpass doesn't have any color attachments, write the 5037ec681f3Smrg * alpha channel of MRT0 when alpha coverage is enabled because 5047ec681f3Smrg * the depth attachment needs it. 5057ec681f3Smrg */ 5067ec681f3Smrg col_format |= V_028714_SPI_SHADER_32_AR; 5077ec681f3Smrg } 5087ec681f3Smrg 5097ec681f3Smrg /* If the i-th target format is set, all previous target formats must 5107ec681f3Smrg * be non-zero to avoid hangs. 5117ec681f3Smrg */ 5127ec681f3Smrg num_targets = (util_last_bit(col_format) + 3) / 4; 5137ec681f3Smrg for (unsigned i = 0; i < num_targets; i++) { 5147ec681f3Smrg if (!(col_format & (0xfu << (i * 4)))) { 5157ec681f3Smrg col_format |= V_028714_SPI_SHADER_32_R << (i * 4); 5167ec681f3Smrg } 5177ec681f3Smrg } 5187ec681f3Smrg 5197ec681f3Smrg /* The output for dual source blending should have the same format as 5207ec681f3Smrg * the first output. 5217ec681f3Smrg */ 5227ec681f3Smrg if (blend->mrt0_is_dual_src) { 5237ec681f3Smrg assert(!(col_format >> 4)); 5247ec681f3Smrg col_format |= (col_format & 0xf) << 4; 5257ec681f3Smrg } 5267ec681f3Smrg 5277ec681f3Smrg blend->cb_shader_mask = ac_get_cb_shader_mask(col_format); 5287ec681f3Smrg blend->spi_shader_col_format = col_format; 5297ec681f3Smrg blend->col_format_is_int8 = is_int8; 5307ec681f3Smrg blend->col_format_is_int10 = is_int10; 53101e04c3fSmrg} 53201e04c3fSmrg 53301e04c3fSmrg/* 53401e04c3fSmrg * Ordered so that for each i, 53501e04c3fSmrg * radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]) == i. 53601e04c3fSmrg */ 53701e04c3fSmrgconst VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = { 5387ec681f3Smrg VK_FORMAT_R32_SFLOAT, 5397ec681f3Smrg VK_FORMAT_R32G32_SFLOAT, 5407ec681f3Smrg VK_FORMAT_R8G8B8A8_UNORM, 5417ec681f3Smrg VK_FORMAT_R16G16B16A16_UNORM, 5427ec681f3Smrg VK_FORMAT_R16G16B16A16_SNORM, 5437ec681f3Smrg VK_FORMAT_R16G16B16A16_UINT, 5447ec681f3Smrg VK_FORMAT_R16G16B16A16_SINT, 5457ec681f3Smrg VK_FORMAT_R32G32B32A32_SFLOAT, 5467ec681f3Smrg VK_FORMAT_R8G8B8A8_UINT, 5477ec681f3Smrg VK_FORMAT_R8G8B8A8_SINT, 5487ec681f3Smrg VK_FORMAT_A2R10G10B10_UINT_PACK32, 5497ec681f3Smrg VK_FORMAT_A2R10G10B10_SINT_PACK32, 55001e04c3fSmrg}; 55101e04c3fSmrg 5527ec681f3Smrgunsigned 5537ec681f3Smrgradv_format_meta_fs_key(struct radv_device *device, VkFormat format) 55401e04c3fSmrg{ 5557ec681f3Smrg unsigned col_format = radv_choose_spi_color_format(device, format, false, false); 5567ec681f3Smrg assert(col_format != V_028714_SPI_SHADER_32_AR); 5577ec681f3Smrg 5587ec681f3Smrg bool is_int8 = format_is_int8(format); 5597ec681f3Smrg bool is_int10 = format_is_int10(format); 5607ec681f3Smrg 5617ec681f3Smrg if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int8) 5627ec681f3Smrg return 8; 5637ec681f3Smrg else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int8) 5647ec681f3Smrg return 9; 5657ec681f3Smrg else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int10) 5667ec681f3Smrg return 10; 5677ec681f3Smrg else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int10) 5687ec681f3Smrg return 11; 5697ec681f3Smrg else { 5707ec681f3Smrg if (col_format >= V_028714_SPI_SHADER_32_AR) 5717ec681f3Smrg --col_format; /* Skip V_028714_SPI_SHADER_32_AR since there is no such VkFormat */ 5727ec681f3Smrg 5737ec681f3Smrg --col_format; /* Skip V_028714_SPI_SHADER_ZERO */ 5747ec681f3Smrg return col_format; 5757ec681f3Smrg } 57601e04c3fSmrg} 57701e04c3fSmrg 57801e04c3fSmrgstatic void 5797ec681f3Smrgradv_blend_check_commutativity(struct radv_blend_state *blend, VkBlendOp op, VkBlendFactor src, 5807ec681f3Smrg VkBlendFactor dst, unsigned chanmask) 58101e04c3fSmrg{ 5827ec681f3Smrg /* Src factor is allowed when it does not depend on Dst. */ 5837ec681f3Smrg static const uint32_t src_allowed = 5847ec681f3Smrg (1u << VK_BLEND_FACTOR_ONE) | (1u << VK_BLEND_FACTOR_SRC_COLOR) | 5857ec681f3Smrg (1u << VK_BLEND_FACTOR_SRC_ALPHA) | (1u << VK_BLEND_FACTOR_SRC_ALPHA_SATURATE) | 5867ec681f3Smrg (1u << VK_BLEND_FACTOR_CONSTANT_COLOR) | (1u << VK_BLEND_FACTOR_CONSTANT_ALPHA) | 5877ec681f3Smrg (1u << VK_BLEND_FACTOR_SRC1_COLOR) | (1u << VK_BLEND_FACTOR_SRC1_ALPHA) | 5887ec681f3Smrg (1u << VK_BLEND_FACTOR_ZERO) | (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR) | 5897ec681f3Smrg (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA) | 5907ec681f3Smrg (1u << VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR) | 5917ec681f3Smrg (1u << VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA) | 5927ec681f3Smrg (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR) | (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA); 5937ec681f3Smrg 5947ec681f3Smrg if (dst == VK_BLEND_FACTOR_ONE && (src_allowed & (1u << src))) { 5957ec681f3Smrg /* Addition is commutative, but floating point addition isn't 5967ec681f3Smrg * associative: subtle changes can be introduced via different 5977ec681f3Smrg * rounding. Be conservative, only enable for min and max. 5987ec681f3Smrg */ 5997ec681f3Smrg if (op == VK_BLEND_OP_MAX || op == VK_BLEND_OP_MIN) 6007ec681f3Smrg blend->commutative_4bit |= chanmask; 6017ec681f3Smrg } 6027ec681f3Smrg} 60301e04c3fSmrg 6047ec681f3Smrgstatic struct radv_blend_state 6057ec681f3Smrgradv_pipeline_init_blend_state(struct radv_pipeline *pipeline, 6067ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 6077ec681f3Smrg const struct radv_graphics_pipeline_create_info *extra) 6087ec681f3Smrg{ 6097ec681f3Smrg const VkPipelineColorBlendStateCreateInfo *vkblend = 6107ec681f3Smrg radv_pipeline_get_color_blend_state(pCreateInfo); 6117ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *vkms = 6127ec681f3Smrg radv_pipeline_get_multisample_state(pCreateInfo); 6137ec681f3Smrg struct radv_blend_state blend = {0}; 6147ec681f3Smrg unsigned mode = V_028808_CB_NORMAL; 6157ec681f3Smrg unsigned cb_color_control = 0; 6167ec681f3Smrg int i; 6177ec681f3Smrg 6187ec681f3Smrg if (extra && extra->custom_blend_mode) { 6197ec681f3Smrg blend.single_cb_enable = true; 6207ec681f3Smrg mode = extra->custom_blend_mode; 6217ec681f3Smrg } 6227ec681f3Smrg 6237ec681f3Smrg if (vkblend) { 6247ec681f3Smrg if (vkblend->logicOpEnable) 6257ec681f3Smrg cb_color_control |= S_028808_ROP3(si_translate_blend_logic_op(vkblend->logicOp)); 6267ec681f3Smrg else 6277ec681f3Smrg cb_color_control |= S_028808_ROP3(V_028808_ROP3_COPY); 6287ec681f3Smrg } 6297ec681f3Smrg 6307ec681f3Smrg if (pipeline->device->instance->debug_flags & RADV_DEBUG_NO_ATOC_DITHERING) 6317ec681f3Smrg { 6327ec681f3Smrg blend.db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(2) | S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 6337ec681f3Smrg S_028B70_ALPHA_TO_MASK_OFFSET2(2) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) | 6347ec681f3Smrg S_028B70_OFFSET_ROUND(0); 6357ec681f3Smrg } 6367ec681f3Smrg else 6377ec681f3Smrg { 6387ec681f3Smrg blend.db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(3) | S_028B70_ALPHA_TO_MASK_OFFSET1(1) | 6397ec681f3Smrg S_028B70_ALPHA_TO_MASK_OFFSET2(0) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) | 6407ec681f3Smrg S_028B70_OFFSET_ROUND(1); 6417ec681f3Smrg } 6427ec681f3Smrg 6437ec681f3Smrg if (vkms && vkms->alphaToCoverageEnable) { 6447ec681f3Smrg blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1); 6457ec681f3Smrg blend.need_src_alpha |= 0x1; 6467ec681f3Smrg } 6477ec681f3Smrg 6487ec681f3Smrg blend.cb_target_mask = 0; 6497ec681f3Smrg if (vkblend) { 6507ec681f3Smrg for (i = 0; i < vkblend->attachmentCount; i++) { 6517ec681f3Smrg const VkPipelineColorBlendAttachmentState *att = &vkblend->pAttachments[i]; 6527ec681f3Smrg unsigned blend_cntl = 0; 6537ec681f3Smrg unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 6547ec681f3Smrg VkBlendOp eqRGB = att->colorBlendOp; 6557ec681f3Smrg VkBlendFactor srcRGB = att->srcColorBlendFactor; 6567ec681f3Smrg VkBlendFactor dstRGB = att->dstColorBlendFactor; 6577ec681f3Smrg VkBlendOp eqA = att->alphaBlendOp; 6587ec681f3Smrg VkBlendFactor srcA = att->srcAlphaBlendFactor; 6597ec681f3Smrg VkBlendFactor dstA = att->dstAlphaBlendFactor; 6607ec681f3Smrg 6617ec681f3Smrg blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 6627ec681f3Smrg S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 6637ec681f3Smrg 6647ec681f3Smrg if (!att->colorWriteMask) 6657ec681f3Smrg continue; 6667ec681f3Smrg 6677ec681f3Smrg /* Ignore other blend targets if dual-source blending 6687ec681f3Smrg * is enabled to prevent wrong behaviour. 6697ec681f3Smrg */ 6707ec681f3Smrg if (blend.mrt0_is_dual_src) 6717ec681f3Smrg continue; 6727ec681f3Smrg 6737ec681f3Smrg blend.cb_target_mask |= (unsigned)att->colorWriteMask << (4 * i); 6747ec681f3Smrg blend.cb_target_enabled_4bit |= 0xfu << (4 * i); 6757ec681f3Smrg if (!att->blendEnable) { 6767ec681f3Smrg blend.cb_blend_control[i] = blend_cntl; 6777ec681f3Smrg continue; 6787ec681f3Smrg } 6797ec681f3Smrg 6807ec681f3Smrg if (is_dual_src(srcRGB) || is_dual_src(dstRGB) || is_dual_src(srcA) || is_dual_src(dstA)) 6817ec681f3Smrg if (i == 0) 6827ec681f3Smrg blend.mrt0_is_dual_src = true; 6837ec681f3Smrg 6847ec681f3Smrg if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) { 6857ec681f3Smrg srcRGB = VK_BLEND_FACTOR_ONE; 6867ec681f3Smrg dstRGB = VK_BLEND_FACTOR_ONE; 6877ec681f3Smrg } 6887ec681f3Smrg if (eqA == VK_BLEND_OP_MIN || eqA == VK_BLEND_OP_MAX) { 6897ec681f3Smrg srcA = VK_BLEND_FACTOR_ONE; 6907ec681f3Smrg dstA = VK_BLEND_FACTOR_ONE; 6917ec681f3Smrg } 6927ec681f3Smrg 6937ec681f3Smrg radv_blend_check_commutativity(&blend, eqRGB, srcRGB, dstRGB, 0x7u << (4 * i)); 6947ec681f3Smrg radv_blend_check_commutativity(&blend, eqA, srcA, dstA, 0x8u << (4 * i)); 6957ec681f3Smrg 6967ec681f3Smrg /* Blending optimizations for RB+. 6977ec681f3Smrg * These transformations don't change the behavior. 6987ec681f3Smrg * 6997ec681f3Smrg * First, get rid of DST in the blend factors: 7007ec681f3Smrg * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 7017ec681f3Smrg */ 7027ec681f3Smrg si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, VK_BLEND_FACTOR_DST_COLOR, 7037ec681f3Smrg VK_BLEND_FACTOR_SRC_COLOR); 7047ec681f3Smrg 7057ec681f3Smrg si_blend_remove_dst(&eqA, &srcA, &dstA, VK_BLEND_FACTOR_DST_COLOR, 7067ec681f3Smrg VK_BLEND_FACTOR_SRC_COLOR); 7077ec681f3Smrg 7087ec681f3Smrg si_blend_remove_dst(&eqA, &srcA, &dstA, VK_BLEND_FACTOR_DST_ALPHA, 7097ec681f3Smrg VK_BLEND_FACTOR_SRC_ALPHA); 7107ec681f3Smrg 7117ec681f3Smrg /* Look up the ideal settings from tables. */ 7127ec681f3Smrg srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 7137ec681f3Smrg dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 7147ec681f3Smrg srcA_opt = si_translate_blend_opt_factor(srcA, true); 7157ec681f3Smrg dstA_opt = si_translate_blend_opt_factor(dstA, true); 7167ec681f3Smrg 7177ec681f3Smrg /* Handle interdependencies. */ 7187ec681f3Smrg if (si_blend_factor_uses_dst(srcRGB)) 7197ec681f3Smrg dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 7207ec681f3Smrg if (si_blend_factor_uses_dst(srcA)) 7217ec681f3Smrg dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 7227ec681f3Smrg 7237ec681f3Smrg if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE && 7247ec681f3Smrg (dstRGB == VK_BLEND_FACTOR_ZERO || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA || 7257ec681f3Smrg dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE)) 7267ec681f3Smrg dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 7277ec681f3Smrg 7287ec681f3Smrg /* Set the final value. */ 7297ec681f3Smrg blend.sx_mrt_blend_opt[i] = 7307ec681f3Smrg S_028760_COLOR_SRC_OPT(srcRGB_opt) | S_028760_COLOR_DST_OPT(dstRGB_opt) | 7317ec681f3Smrg S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 7327ec681f3Smrg S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) | 7337ec681f3Smrg S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 7347ec681f3Smrg blend_cntl |= S_028780_ENABLE(1); 7357ec681f3Smrg 7367ec681f3Smrg blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 7377ec681f3Smrg blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 7387ec681f3Smrg blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 7397ec681f3Smrg if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 7407ec681f3Smrg blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 7417ec681f3Smrg blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 7427ec681f3Smrg blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 7437ec681f3Smrg blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 7447ec681f3Smrg } 7457ec681f3Smrg blend.cb_blend_control[i] = blend_cntl; 7467ec681f3Smrg 7477ec681f3Smrg blend.blend_enable_4bit |= 0xfu << (i * 4); 7487ec681f3Smrg 7497ec681f3Smrg if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA || 7507ec681f3Smrg srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || 7517ec681f3Smrg dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || 7527ec681f3Smrg srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA || 7537ec681f3Smrg dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA) 7547ec681f3Smrg blend.need_src_alpha |= 1 << i; 7557ec681f3Smrg } 7567ec681f3Smrg for (i = vkblend->attachmentCount; i < 8; i++) { 7577ec681f3Smrg blend.cb_blend_control[i] = 0; 7587ec681f3Smrg blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 7597ec681f3Smrg S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 7607ec681f3Smrg } 7617ec681f3Smrg } 7627ec681f3Smrg 7637ec681f3Smrg if (pipeline->device->physical_device->rad_info.has_rbplus) { 7647ec681f3Smrg /* Disable RB+ blend optimizations for dual source blending. */ 7657ec681f3Smrg if (blend.mrt0_is_dual_src) { 7667ec681f3Smrg for (i = 0; i < 8; i++) { 7677ec681f3Smrg blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | 7687ec681f3Smrg S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); 7697ec681f3Smrg } 7707ec681f3Smrg } 7717ec681f3Smrg 7727ec681f3Smrg /* RB+ doesn't work with dual source blending, logic op and 7737ec681f3Smrg * RESOLVE. 7747ec681f3Smrg */ 7757ec681f3Smrg if (blend.mrt0_is_dual_src || (vkblend && vkblend->logicOpEnable) || 7767ec681f3Smrg mode == V_028808_CB_RESOLVE) 7777ec681f3Smrg cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1); 7787ec681f3Smrg } 7797ec681f3Smrg 7807ec681f3Smrg if (blend.cb_target_mask) 7817ec681f3Smrg cb_color_control |= S_028808_MODE(mode); 7827ec681f3Smrg else 7837ec681f3Smrg cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE); 7847ec681f3Smrg 7857ec681f3Smrg radv_pipeline_compute_spi_color_formats(pipeline, pCreateInfo, &blend); 7867ec681f3Smrg 7877ec681f3Smrg pipeline->graphics.cb_color_control = cb_color_control; 7887ec681f3Smrg 7897ec681f3Smrg return blend; 79001e04c3fSmrg} 79101e04c3fSmrg 7927ec681f3Smrgstatic uint32_t 7937ec681f3Smrgsi_translate_fill(VkPolygonMode func) 7947ec681f3Smrg{ 7957ec681f3Smrg switch (func) { 7967ec681f3Smrg case VK_POLYGON_MODE_FILL: 7977ec681f3Smrg return V_028814_X_DRAW_TRIANGLES; 7987ec681f3Smrg case VK_POLYGON_MODE_LINE: 7997ec681f3Smrg return V_028814_X_DRAW_LINES; 8007ec681f3Smrg case VK_POLYGON_MODE_POINT: 8017ec681f3Smrg return V_028814_X_DRAW_POINTS; 8027ec681f3Smrg default: 8037ec681f3Smrg assert(0); 8047ec681f3Smrg return V_028814_X_DRAW_POINTS; 8057ec681f3Smrg } 80601e04c3fSmrg} 80701e04c3fSmrg 8087ec681f3Smrgstatic uint8_t 8097ec681f3Smrgradv_pipeline_get_ps_iter_samples(const VkGraphicsPipelineCreateInfo *pCreateInfo) 8107ec681f3Smrg{ 8117ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState; 8127ec681f3Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 8137ec681f3Smrg struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; 8147ec681f3Smrg uint32_t ps_iter_samples = 1; 8157ec681f3Smrg uint32_t num_samples; 8167ec681f3Smrg 8177ec681f3Smrg /* From the Vulkan 1.1.129 spec, 26.7. Sample Shading: 8187ec681f3Smrg * 8197ec681f3Smrg * "If the VK_AMD_mixed_attachment_samples extension is enabled and the 8207ec681f3Smrg * subpass uses color attachments, totalSamples is the number of 8217ec681f3Smrg * samples of the color attachments. Otherwise, totalSamples is the 8227ec681f3Smrg * value of VkPipelineMultisampleStateCreateInfo::rasterizationSamples 8237ec681f3Smrg * specified at pipeline creation time." 8247ec681f3Smrg */ 8257ec681f3Smrg if (subpass->has_color_att) { 8267ec681f3Smrg num_samples = subpass->color_sample_count; 8277ec681f3Smrg } else { 8287ec681f3Smrg num_samples = vkms->rasterizationSamples; 8297ec681f3Smrg } 8307ec681f3Smrg 8317ec681f3Smrg if (vkms->sampleShadingEnable) { 8327ec681f3Smrg ps_iter_samples = ceilf(vkms->minSampleShading * num_samples); 8337ec681f3Smrg ps_iter_samples = util_next_power_of_two(ps_iter_samples); 8347ec681f3Smrg } 8357ec681f3Smrg return ps_iter_samples; 83601e04c3fSmrg} 83701e04c3fSmrg 83801e04c3fSmrgstatic bool 83901e04c3fSmrgradv_is_depth_write_enabled(const VkPipelineDepthStencilStateCreateInfo *pCreateInfo) 84001e04c3fSmrg{ 8417ec681f3Smrg return pCreateInfo->depthTestEnable && pCreateInfo->depthWriteEnable && 8427ec681f3Smrg pCreateInfo->depthCompareOp != VK_COMPARE_OP_NEVER; 84301e04c3fSmrg} 84401e04c3fSmrg 84501e04c3fSmrgstatic bool 84601e04c3fSmrgradv_writes_stencil(const VkStencilOpState *state) 84701e04c3fSmrg{ 8487ec681f3Smrg return state->writeMask && 8497ec681f3Smrg (state->failOp != VK_STENCIL_OP_KEEP || state->passOp != VK_STENCIL_OP_KEEP || 8507ec681f3Smrg state->depthFailOp != VK_STENCIL_OP_KEEP); 85101e04c3fSmrg} 85201e04c3fSmrg 85301e04c3fSmrgstatic bool 85401e04c3fSmrgradv_is_stencil_write_enabled(const VkPipelineDepthStencilStateCreateInfo *pCreateInfo) 85501e04c3fSmrg{ 8567ec681f3Smrg return pCreateInfo->stencilTestEnable && 8577ec681f3Smrg (radv_writes_stencil(&pCreateInfo->front) || radv_writes_stencil(&pCreateInfo->back)); 85801e04c3fSmrg} 85901e04c3fSmrg 86001e04c3fSmrgstatic bool 86101e04c3fSmrgradv_is_ds_write_enabled(const VkPipelineDepthStencilStateCreateInfo *pCreateInfo) 86201e04c3fSmrg{ 8637ec681f3Smrg return radv_is_depth_write_enabled(pCreateInfo) || radv_is_stencil_write_enabled(pCreateInfo); 86401e04c3fSmrg} 86501e04c3fSmrg 86601e04c3fSmrgstatic bool 86701e04c3fSmrgradv_order_invariant_stencil_op(VkStencilOp op) 86801e04c3fSmrg{ 8697ec681f3Smrg /* REPLACE is normally order invariant, except when the stencil 8707ec681f3Smrg * reference value is written by the fragment shader. Tracking this 8717ec681f3Smrg * interaction does not seem worth the effort, so be conservative. 8727ec681f3Smrg */ 8737ec681f3Smrg return op != VK_STENCIL_OP_INCREMENT_AND_CLAMP && op != VK_STENCIL_OP_DECREMENT_AND_CLAMP && 8747ec681f3Smrg op != VK_STENCIL_OP_REPLACE; 87501e04c3fSmrg} 87601e04c3fSmrg 87701e04c3fSmrgstatic bool 87801e04c3fSmrgradv_order_invariant_stencil_state(const VkStencilOpState *state) 87901e04c3fSmrg{ 8807ec681f3Smrg /* Compute whether, assuming Z writes are disabled, this stencil state 8817ec681f3Smrg * is order invariant in the sense that the set of passing fragments as 8827ec681f3Smrg * well as the final stencil buffer result does not depend on the order 8837ec681f3Smrg * of fragments. 8847ec681f3Smrg */ 8857ec681f3Smrg return !state->writeMask || 8867ec681f3Smrg /* The following assumes that Z writes are disabled. */ 8877ec681f3Smrg (state->compareOp == VK_COMPARE_OP_ALWAYS && 8887ec681f3Smrg radv_order_invariant_stencil_op(state->passOp) && 8897ec681f3Smrg radv_order_invariant_stencil_op(state->depthFailOp)) || 8907ec681f3Smrg (state->compareOp == VK_COMPARE_OP_NEVER && 8917ec681f3Smrg radv_order_invariant_stencil_op(state->failOp)); 8927ec681f3Smrg} 8937ec681f3Smrg 8947ec681f3Smrgstatic bool 8957ec681f3Smrgradv_pipeline_has_dynamic_ds_states(const VkGraphicsPipelineCreateInfo *pCreateInfo) 8967ec681f3Smrg{ 8977ec681f3Smrg VkDynamicState ds_states[] = { 8987ec681f3Smrg VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, 8997ec681f3Smrg VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, 9007ec681f3Smrg VK_DYNAMIC_STATE_STENCIL_OP_EXT, 9017ec681f3Smrg }; 9027ec681f3Smrg 9037ec681f3Smrg for (uint32_t i = 0; i < ARRAY_SIZE(ds_states); i++) { 9047ec681f3Smrg if (radv_is_state_dynamic(pCreateInfo, ds_states[i])) 9057ec681f3Smrg return true; 9067ec681f3Smrg } 9077ec681f3Smrg 9087ec681f3Smrg return false; 90901e04c3fSmrg} 91001e04c3fSmrg 91101e04c3fSmrgstatic bool 91201e04c3fSmrgradv_pipeline_out_of_order_rast(struct radv_pipeline *pipeline, 9137ec681f3Smrg const struct radv_blend_state *blend, 9147ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 9157ec681f3Smrg{ 9167ec681f3Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 9177ec681f3Smrg struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; 9187ec681f3Smrg const VkPipelineDepthStencilStateCreateInfo *vkds = 9197ec681f3Smrg radv_pipeline_get_depth_stencil_state(pCreateInfo); 9207ec681f3Smrg const VkPipelineColorBlendStateCreateInfo *vkblend = 9217ec681f3Smrg radv_pipeline_get_color_blend_state(pCreateInfo); 9227ec681f3Smrg unsigned colormask = blend->cb_target_enabled_4bit; 9237ec681f3Smrg 9247ec681f3Smrg if (!pipeline->device->physical_device->out_of_order_rast_allowed) 9257ec681f3Smrg return false; 9267ec681f3Smrg 9277ec681f3Smrg /* Be conservative if a logic operation is enabled with color buffers. */ 9287ec681f3Smrg if (colormask && vkblend && vkblend->logicOpEnable) 9297ec681f3Smrg return false; 9307ec681f3Smrg 9317ec681f3Smrg /* Be conservative if an extended dynamic depth/stencil state is 9327ec681f3Smrg * enabled because the driver can't update out-of-order rasterization 9337ec681f3Smrg * dynamically. 9347ec681f3Smrg */ 9357ec681f3Smrg if (radv_pipeline_has_dynamic_ds_states(pCreateInfo)) 9367ec681f3Smrg return false; 9377ec681f3Smrg 9387ec681f3Smrg /* Default depth/stencil invariance when no attachment is bound. */ 9397ec681f3Smrg struct radv_dsa_order_invariance dsa_order_invariant = {.zs = true, .pass_set = true}; 9407ec681f3Smrg 9417ec681f3Smrg if (vkds) { 9427ec681f3Smrg struct radv_render_pass_attachment *attachment = 9437ec681f3Smrg pass->attachments + subpass->depth_stencil_attachment->attachment; 9447ec681f3Smrg bool has_stencil = vk_format_has_stencil(attachment->format); 9457ec681f3Smrg struct radv_dsa_order_invariance order_invariance[2]; 9467ec681f3Smrg struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; 9477ec681f3Smrg 9487ec681f3Smrg /* Compute depth/stencil order invariance in order to know if 9497ec681f3Smrg * it's safe to enable out-of-order. 9507ec681f3Smrg */ 9517ec681f3Smrg bool zfunc_is_ordered = vkds->depthCompareOp == VK_COMPARE_OP_NEVER || 9527ec681f3Smrg vkds->depthCompareOp == VK_COMPARE_OP_LESS || 9537ec681f3Smrg vkds->depthCompareOp == VK_COMPARE_OP_LESS_OR_EQUAL || 9547ec681f3Smrg vkds->depthCompareOp == VK_COMPARE_OP_GREATER || 9557ec681f3Smrg vkds->depthCompareOp == VK_COMPARE_OP_GREATER_OR_EQUAL; 9567ec681f3Smrg 9577ec681f3Smrg bool nozwrite_and_order_invariant_stencil = 9587ec681f3Smrg !radv_is_ds_write_enabled(vkds) || 9597ec681f3Smrg (!radv_is_depth_write_enabled(vkds) && radv_order_invariant_stencil_state(&vkds->front) && 9607ec681f3Smrg radv_order_invariant_stencil_state(&vkds->back)); 9617ec681f3Smrg 9627ec681f3Smrg order_invariance[1].zs = nozwrite_and_order_invariant_stencil || 9637ec681f3Smrg (!radv_is_stencil_write_enabled(vkds) && zfunc_is_ordered); 9647ec681f3Smrg order_invariance[0].zs = !radv_is_depth_write_enabled(vkds) || zfunc_is_ordered; 9657ec681f3Smrg 9667ec681f3Smrg order_invariance[1].pass_set = 9677ec681f3Smrg nozwrite_and_order_invariant_stencil || 9687ec681f3Smrg (!radv_is_stencil_write_enabled(vkds) && (vkds->depthCompareOp == VK_COMPARE_OP_ALWAYS || 9697ec681f3Smrg vkds->depthCompareOp == VK_COMPARE_OP_NEVER)); 9707ec681f3Smrg order_invariance[0].pass_set = 9717ec681f3Smrg !radv_is_depth_write_enabled(vkds) || (vkds->depthCompareOp == VK_COMPARE_OP_ALWAYS || 9727ec681f3Smrg vkds->depthCompareOp == VK_COMPARE_OP_NEVER); 9737ec681f3Smrg 9747ec681f3Smrg dsa_order_invariant = order_invariance[has_stencil]; 9757ec681f3Smrg if (!dsa_order_invariant.zs) 9767ec681f3Smrg return false; 9777ec681f3Smrg 9787ec681f3Smrg /* The set of PS invocations is always order invariant, 9797ec681f3Smrg * except when early Z/S tests are requested. 9807ec681f3Smrg */ 9817ec681f3Smrg if (ps && ps->info.ps.writes_memory && ps->info.ps.early_fragment_test && 9827ec681f3Smrg !dsa_order_invariant.pass_set) 9837ec681f3Smrg return false; 9847ec681f3Smrg 9857ec681f3Smrg /* Determine if out-of-order rasterization should be disabled 9867ec681f3Smrg * when occlusion queries are used. 9877ec681f3Smrg */ 9887ec681f3Smrg pipeline->graphics.disable_out_of_order_rast_for_occlusion = !dsa_order_invariant.pass_set; 9897ec681f3Smrg } 9907ec681f3Smrg 9917ec681f3Smrg /* No color buffers are enabled for writing. */ 9927ec681f3Smrg if (!colormask) 9937ec681f3Smrg return true; 9947ec681f3Smrg 9957ec681f3Smrg unsigned blendmask = colormask & blend->blend_enable_4bit; 9967ec681f3Smrg 9977ec681f3Smrg if (blendmask) { 9987ec681f3Smrg /* Only commutative blending. */ 9997ec681f3Smrg if (blendmask & ~blend->commutative_4bit) 10007ec681f3Smrg return false; 10017ec681f3Smrg 10027ec681f3Smrg if (!dsa_order_invariant.pass_set) 10037ec681f3Smrg return false; 10047ec681f3Smrg } 10057ec681f3Smrg 10067ec681f3Smrg if (colormask & ~blendmask) 10077ec681f3Smrg return false; 10087ec681f3Smrg 10097ec681f3Smrg return true; 10107ec681f3Smrg} 10117ec681f3Smrg 10127ec681f3Smrgstatic const VkConservativeRasterizationModeEXT 10137ec681f3Smrgradv_get_conservative_raster_mode(const VkPipelineRasterizationStateCreateInfo *pCreateInfo) 10147ec681f3Smrg{ 10157ec681f3Smrg const VkPipelineRasterizationConservativeStateCreateInfoEXT *conservative_raster = 10167ec681f3Smrg vk_find_struct_const(pCreateInfo->pNext, 10177ec681f3Smrg PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT); 10187ec681f3Smrg 10197ec681f3Smrg if (!conservative_raster) 10207ec681f3Smrg return VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT; 10217ec681f3Smrg return conservative_raster->conservativeRasterizationMode; 102201e04c3fSmrg} 102301e04c3fSmrg 102401e04c3fSmrgstatic void 102501e04c3fSmrgradv_pipeline_init_multisample_state(struct radv_pipeline *pipeline, 10267ec681f3Smrg const struct radv_blend_state *blend, 10277ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 10287ec681f3Smrg{ 10297ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *vkms = 10307ec681f3Smrg radv_pipeline_get_multisample_state(pCreateInfo); 10317ec681f3Smrg struct radv_multisample_state *ms = &pipeline->graphics.ms; 10327ec681f3Smrg unsigned num_tile_pipes = pipeline->device->physical_device->rad_info.num_tile_pipes; 10337ec681f3Smrg const VkConservativeRasterizationModeEXT mode = 10347ec681f3Smrg radv_get_conservative_raster_mode(pCreateInfo->pRasterizationState); 10357ec681f3Smrg bool out_of_order_rast = false; 10367ec681f3Smrg int ps_iter_samples = 1; 10377ec681f3Smrg uint32_t mask = 0xffff; 10387ec681f3Smrg 10397ec681f3Smrg if (vkms) { 10407ec681f3Smrg ms->num_samples = vkms->rasterizationSamples; 10417ec681f3Smrg 10427ec681f3Smrg /* From the Vulkan 1.1.129 spec, 26.7. Sample Shading: 10437ec681f3Smrg * 10447ec681f3Smrg * "Sample shading is enabled for a graphics pipeline: 10457ec681f3Smrg * 10467ec681f3Smrg * - If the interface of the fragment shader entry point of the 10477ec681f3Smrg * graphics pipeline includes an input variable decorated 10487ec681f3Smrg * with SampleId or SamplePosition. In this case 10497ec681f3Smrg * minSampleShadingFactor takes the value 1.0. 10507ec681f3Smrg * - Else if the sampleShadingEnable member of the 10517ec681f3Smrg * VkPipelineMultisampleStateCreateInfo structure specified 10527ec681f3Smrg * when creating the graphics pipeline is set to VK_TRUE. In 10537ec681f3Smrg * this case minSampleShadingFactor takes the value of 10547ec681f3Smrg * VkPipelineMultisampleStateCreateInfo::minSampleShading. 10557ec681f3Smrg * 10567ec681f3Smrg * Otherwise, sample shading is considered disabled." 10577ec681f3Smrg */ 10587ec681f3Smrg if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.uses_sample_shading) { 10597ec681f3Smrg ps_iter_samples = ms->num_samples; 10607ec681f3Smrg } else { 10617ec681f3Smrg ps_iter_samples = radv_pipeline_get_ps_iter_samples(pCreateInfo); 10627ec681f3Smrg } 10637ec681f3Smrg } else { 10647ec681f3Smrg ms->num_samples = 1; 10657ec681f3Smrg } 10667ec681f3Smrg 10677ec681f3Smrg const struct VkPipelineRasterizationStateRasterizationOrderAMD *raster_order = 10687ec681f3Smrg vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, 10697ec681f3Smrg PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD); 10707ec681f3Smrg if (raster_order && raster_order->rasterizationOrder == VK_RASTERIZATION_ORDER_RELAXED_AMD) { 10717ec681f3Smrg /* Out-of-order rasterization is explicitly enabled by the 10727ec681f3Smrg * application. 10737ec681f3Smrg */ 10747ec681f3Smrg out_of_order_rast = true; 10757ec681f3Smrg } else { 10767ec681f3Smrg /* Determine if the driver can enable out-of-order 10777ec681f3Smrg * rasterization internally. 10787ec681f3Smrg */ 10797ec681f3Smrg out_of_order_rast = radv_pipeline_out_of_order_rast(pipeline, blend, pCreateInfo); 10807ec681f3Smrg } 10817ec681f3Smrg 10827ec681f3Smrg ms->pa_sc_aa_config = 0; 10837ec681f3Smrg ms->db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_INCOHERENT_EQAA_READS(1) | 10847ec681f3Smrg S_028804_INTERPOLATE_COMP_Z(1) | S_028804_STATIC_ANCHOR_ASSOCIATIONS(1); 10857ec681f3Smrg 10867ec681f3Smrg /* Adjust MSAA state if conservative rasterization is enabled. */ 10877ec681f3Smrg if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) { 10887ec681f3Smrg ms->pa_sc_aa_config |= S_028BE0_AA_MASK_CENTROID_DTMN(1); 10897ec681f3Smrg 10907ec681f3Smrg ms->db_eqaa |= 10917ec681f3Smrg S_028804_ENABLE_POSTZ_OVERRASTERIZATION(1) | S_028804_OVERRASTERIZATION_AMOUNT(4); 10927ec681f3Smrg } 10937ec681f3Smrg 10947ec681f3Smrg ms->pa_sc_mode_cntl_1 = 10957ec681f3Smrg S_028A4C_WALK_FENCE_ENABLE(1) | // TODO linear dst fixes 10967ec681f3Smrg S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 10977ec681f3Smrg S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) | 10987ec681f3Smrg S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) | 10997ec681f3Smrg /* always 1: */ 11007ec681f3Smrg S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 11017ec681f3Smrg S_028A4C_TILE_WALK_ORDER_ENABLE(1) | S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 11027ec681f3Smrg S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | S_028A4C_FORCE_EOV_REZ_ENABLE(1); 11037ec681f3Smrg ms->pa_sc_mode_cntl_0 = S_028A48_ALTERNATE_RBS_PER_TILE( 11047ec681f3Smrg pipeline->device->physical_device->rad_info.chip_class >= GFX9) | 11057ec681f3Smrg S_028A48_VPORT_SCISSOR_ENABLE(1); 11067ec681f3Smrg 11077ec681f3Smrg const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line = vk_find_struct_const( 11087ec681f3Smrg pCreateInfo->pRasterizationState->pNext, PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); 11097ec681f3Smrg if (rast_line) { 11107ec681f3Smrg ms->pa_sc_mode_cntl_0 |= S_028A48_LINE_STIPPLE_ENABLE(rast_line->stippledLineEnable); 11117ec681f3Smrg if (rast_line->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) { 11127ec681f3Smrg /* From the Vulkan spec 1.1.129: 11137ec681f3Smrg * 11147ec681f3Smrg * "When VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT lines 11157ec681f3Smrg * are being rasterized, sample locations may all be 11167ec681f3Smrg * treated as being at the pixel center (this may 11177ec681f3Smrg * affect attribute and depth interpolation)." 11187ec681f3Smrg */ 11197ec681f3Smrg ms->num_samples = 1; 11207ec681f3Smrg } 11217ec681f3Smrg } 11227ec681f3Smrg 11237ec681f3Smrg if (ms->num_samples > 1) { 11247ec681f3Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 11257ec681f3Smrg struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; 11267ec681f3Smrg uint32_t z_samples = 11277ec681f3Smrg subpass->depth_stencil_attachment ? subpass->depth_sample_count : ms->num_samples; 11287ec681f3Smrg unsigned log_samples = util_logbase2(ms->num_samples); 11297ec681f3Smrg unsigned log_z_samples = util_logbase2(z_samples); 11307ec681f3Smrg unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples); 11317ec681f3Smrg ms->pa_sc_mode_cntl_0 |= S_028A48_MSAA_ENABLE(1); 11327ec681f3Smrg ms->db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) | 11337ec681f3Smrg S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | 11347ec681f3Smrg S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | 11357ec681f3Smrg S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples); 11367ec681f3Smrg ms->pa_sc_aa_config |= 11377ec681f3Smrg S_028BE0_MSAA_NUM_SAMPLES(log_samples) | 11387ec681f3Smrg S_028BE0_MAX_SAMPLE_DIST(radv_get_default_max_sample_dist(log_samples)) | 11397ec681f3Smrg S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | /* CM_R_028BE0_PA_SC_AA_CONFIG */ 11407ec681f3Smrg S_028BE0_COVERED_CENTROID_IS_CENTER( 11417ec681f3Smrg pipeline->device->physical_device->rad_info.chip_class >= GFX10_3); 11427ec681f3Smrg ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1); 11437ec681f3Smrg if (ps_iter_samples > 1) 11447ec681f3Smrg pipeline->graphics.spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2); 11457ec681f3Smrg } 11467ec681f3Smrg 11477ec681f3Smrg if (vkms && vkms->pSampleMask) { 11487ec681f3Smrg mask = vkms->pSampleMask[0] & 0xffff; 11497ec681f3Smrg } 11507ec681f3Smrg 11517ec681f3Smrg ms->pa_sc_aa_mask[0] = mask | (mask << 16); 11527ec681f3Smrg ms->pa_sc_aa_mask[1] = mask | (mask << 16); 11537ec681f3Smrg} 11547ec681f3Smrg 11557ec681f3Smrgstatic void 11567ec681f3Smrggfx103_pipeline_init_vrs_state(struct radv_pipeline *pipeline, 11577ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 11587ec681f3Smrg{ 11597ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *vkms = 11607ec681f3Smrg radv_pipeline_get_multisample_state(pCreateInfo); 11617ec681f3Smrg struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; 11627ec681f3Smrg struct radv_multisample_state *ms = &pipeline->graphics.ms; 11637ec681f3Smrg struct radv_vrs_state *vrs = &pipeline->graphics.vrs; 11647ec681f3Smrg 11657ec681f3Smrg if (vkms && (vkms->sampleShadingEnable || ps->info.ps.uses_sample_shading || 11667ec681f3Smrg ps->info.ps.reads_sample_mask_in)) { 11677ec681f3Smrg /* Disable VRS and use the rates from PS_ITER_SAMPLES if: 11687ec681f3Smrg * 11697ec681f3Smrg * 1) sample shading is enabled or per-sample interpolation is 11707ec681f3Smrg * used by the fragment shader 11717ec681f3Smrg * 2) the fragment shader reads gl_SampleMaskIn because the 11727ec681f3Smrg * 16-bit sample coverage mask isn't enough for MSAA8x and 11737ec681f3Smrg * 2x2 coarse shading isn't enough. 11747ec681f3Smrg */ 11757ec681f3Smrg vrs->pa_cl_vrs_cntl = S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE); 11767ec681f3Smrg 11777ec681f3Smrg /* Make sure sample shading is enabled even if only MSAA1x is 11787ec681f3Smrg * used because the SAMPLE_ITER combiner is in passthrough 11797ec681f3Smrg * mode if PS_ITER_SAMPLE is 0, and it uses the per-draw rate. 11807ec681f3Smrg * The default VRS rate when sample shading is enabled is 1x1. 11817ec681f3Smrg */ 11827ec681f3Smrg if (!G_028A4C_PS_ITER_SAMPLE(ms->pa_sc_mode_cntl_1)) 11837ec681f3Smrg ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(1); 11847ec681f3Smrg } else { 11857ec681f3Smrg vrs->pa_cl_vrs_cntl = S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU); 11867ec681f3Smrg } 11877ec681f3Smrg 11887ec681f3Smrg /* The primitive combiner is always passthrough. */ 11897ec681f3Smrg vrs->pa_cl_vrs_cntl |= S_028848_PRIMITIVE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU); 119001e04c3fSmrg} 119101e04c3fSmrg 119201e04c3fSmrgstatic bool 119301e04c3fSmrgradv_prim_can_use_guardband(enum VkPrimitiveTopology topology) 119401e04c3fSmrg{ 11957ec681f3Smrg switch (topology) { 11967ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: 11977ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: 11987ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: 11997ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: 12007ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: 12017ec681f3Smrg return false; 12027ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: 12037ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: 12047ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: 12057ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: 12067ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: 12077ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: 12087ec681f3Smrg return true; 12097ec681f3Smrg default: 12107ec681f3Smrg unreachable("unhandled primitive type"); 12117ec681f3Smrg } 121201e04c3fSmrg} 121301e04c3fSmrg 121401e04c3fSmrgstatic uint32_t 12157ec681f3Smrgsi_conv_gl_prim_to_gs_out(unsigned gl_prim) 12167ec681f3Smrg{ 12177ec681f3Smrg switch (gl_prim) { 12187ec681f3Smrg case 0: /* GL_POINTS */ 12197ec681f3Smrg return V_028A6C_POINTLIST; 12207ec681f3Smrg case 1: /* GL_LINES */ 12217ec681f3Smrg case 3: /* GL_LINE_STRIP */ 12227ec681f3Smrg case 0xA: /* GL_LINE_STRIP_ADJACENCY_ARB */ 12237ec681f3Smrg case 0x8E7A: /* GL_ISOLINES */ 12247ec681f3Smrg return V_028A6C_LINESTRIP; 12257ec681f3Smrg 12267ec681f3Smrg case 4: /* GL_TRIANGLES */ 12277ec681f3Smrg case 0xc: /* GL_TRIANGLES_ADJACENCY_ARB */ 12287ec681f3Smrg case 5: /* GL_TRIANGLE_STRIP */ 12297ec681f3Smrg case 7: /* GL_QUADS */ 12307ec681f3Smrg return V_028A6C_TRISTRIP; 12317ec681f3Smrg default: 12327ec681f3Smrg assert(0); 12337ec681f3Smrg return 0; 12347ec681f3Smrg } 123501e04c3fSmrg} 123601e04c3fSmrg 12377ec681f3Smrgstatic uint64_t 12387ec681f3Smrgradv_dynamic_state_mask(VkDynamicState state) 123901e04c3fSmrg{ 12407ec681f3Smrg switch (state) { 12417ec681f3Smrg case VK_DYNAMIC_STATE_VIEWPORT: 12427ec681f3Smrg case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT: 12437ec681f3Smrg return RADV_DYNAMIC_VIEWPORT; 12447ec681f3Smrg case VK_DYNAMIC_STATE_SCISSOR: 12457ec681f3Smrg case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT: 12467ec681f3Smrg return RADV_DYNAMIC_SCISSOR; 12477ec681f3Smrg case VK_DYNAMIC_STATE_LINE_WIDTH: 12487ec681f3Smrg return RADV_DYNAMIC_LINE_WIDTH; 12497ec681f3Smrg case VK_DYNAMIC_STATE_DEPTH_BIAS: 12507ec681f3Smrg return RADV_DYNAMIC_DEPTH_BIAS; 12517ec681f3Smrg case VK_DYNAMIC_STATE_BLEND_CONSTANTS: 12527ec681f3Smrg return RADV_DYNAMIC_BLEND_CONSTANTS; 12537ec681f3Smrg case VK_DYNAMIC_STATE_DEPTH_BOUNDS: 12547ec681f3Smrg return RADV_DYNAMIC_DEPTH_BOUNDS; 12557ec681f3Smrg case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: 12567ec681f3Smrg return RADV_DYNAMIC_STENCIL_COMPARE_MASK; 12577ec681f3Smrg case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: 12587ec681f3Smrg return RADV_DYNAMIC_STENCIL_WRITE_MASK; 12597ec681f3Smrg case VK_DYNAMIC_STATE_STENCIL_REFERENCE: 12607ec681f3Smrg return RADV_DYNAMIC_STENCIL_REFERENCE; 12617ec681f3Smrg case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT: 12627ec681f3Smrg return RADV_DYNAMIC_DISCARD_RECTANGLE; 12637ec681f3Smrg case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: 12647ec681f3Smrg return RADV_DYNAMIC_SAMPLE_LOCATIONS; 12657ec681f3Smrg case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT: 12667ec681f3Smrg return RADV_DYNAMIC_LINE_STIPPLE; 12677ec681f3Smrg case VK_DYNAMIC_STATE_CULL_MODE_EXT: 12687ec681f3Smrg return RADV_DYNAMIC_CULL_MODE; 12697ec681f3Smrg case VK_DYNAMIC_STATE_FRONT_FACE_EXT: 12707ec681f3Smrg return RADV_DYNAMIC_FRONT_FACE; 12717ec681f3Smrg case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT: 12727ec681f3Smrg return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY; 12737ec681f3Smrg case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT: 12747ec681f3Smrg return RADV_DYNAMIC_DEPTH_TEST_ENABLE; 12757ec681f3Smrg case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT: 12767ec681f3Smrg return RADV_DYNAMIC_DEPTH_WRITE_ENABLE; 12777ec681f3Smrg case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT: 12787ec681f3Smrg return RADV_DYNAMIC_DEPTH_COMPARE_OP; 12797ec681f3Smrg case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT: 12807ec681f3Smrg return RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE; 12817ec681f3Smrg case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT: 12827ec681f3Smrg return RADV_DYNAMIC_STENCIL_TEST_ENABLE; 12837ec681f3Smrg case VK_DYNAMIC_STATE_STENCIL_OP_EXT: 12847ec681f3Smrg return RADV_DYNAMIC_STENCIL_OP; 12857ec681f3Smrg case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT: 12867ec681f3Smrg return RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE; 12877ec681f3Smrg case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR: 12887ec681f3Smrg return RADV_DYNAMIC_FRAGMENT_SHADING_RATE; 12897ec681f3Smrg case VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT: 12907ec681f3Smrg return RADV_DYNAMIC_PATCH_CONTROL_POINTS; 12917ec681f3Smrg case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT: 12927ec681f3Smrg return RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE; 12937ec681f3Smrg case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT: 12947ec681f3Smrg return RADV_DYNAMIC_DEPTH_BIAS_ENABLE; 12957ec681f3Smrg case VK_DYNAMIC_STATE_LOGIC_OP_EXT: 12967ec681f3Smrg return RADV_DYNAMIC_LOGIC_OP; 12977ec681f3Smrg case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT: 12987ec681f3Smrg return RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE; 12997ec681f3Smrg case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT: 13007ec681f3Smrg return RADV_DYNAMIC_COLOR_WRITE_ENABLE; 13017ec681f3Smrg case VK_DYNAMIC_STATE_VERTEX_INPUT_EXT: 13027ec681f3Smrg return RADV_DYNAMIC_VERTEX_INPUT; 13037ec681f3Smrg default: 13047ec681f3Smrg unreachable("Unhandled dynamic state"); 13057ec681f3Smrg } 130601e04c3fSmrg} 130701e04c3fSmrg 13087ec681f3Smrgstatic bool 13097ec681f3Smrgradv_pipeline_is_blend_enabled(const VkGraphicsPipelineCreateInfo *pCreateInfo) 13107ec681f3Smrg{ 13117ec681f3Smrg const VkPipelineColorBlendStateCreateInfo *vkblend = 13127ec681f3Smrg radv_pipeline_get_color_blend_state(pCreateInfo); 13137ec681f3Smrg 13147ec681f3Smrg assert(vkblend); 13157ec681f3Smrg 13167ec681f3Smrg for (uint32_t i = 0; i < vkblend->attachmentCount; i++) { 13177ec681f3Smrg const VkPipelineColorBlendAttachmentState *att = &vkblend->pAttachments[i]; 13187ec681f3Smrg if (att->colorWriteMask && att->blendEnable) 13197ec681f3Smrg return true; 13207ec681f3Smrg } 13217ec681f3Smrg return false; 132201e04c3fSmrg} 132301e04c3fSmrg 13247ec681f3Smrgstatic uint64_t 13257ec681f3Smrgradv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreateInfo *pCreateInfo) 13267ec681f3Smrg{ 13277ec681f3Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 13287ec681f3Smrg struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; 13297ec681f3Smrg uint64_t states = RADV_DYNAMIC_ALL; 13307ec681f3Smrg 13317ec681f3Smrg /* If rasterization is disabled we do not care about any of the 13327ec681f3Smrg * dynamic states, since they are all rasterization related only, 13337ec681f3Smrg * except primitive topology, primitive restart enable, vertex 13347ec681f3Smrg * binding stride and rasterization discard itself. 13357ec681f3Smrg */ 13367ec681f3Smrg if (pCreateInfo->pRasterizationState->rasterizerDiscardEnable && 13377ec681f3Smrg !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT)) { 13387ec681f3Smrg return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | 13397ec681f3Smrg RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE | 13407ec681f3Smrg RADV_DYNAMIC_VERTEX_INPUT; 13417ec681f3Smrg } 13427ec681f3Smrg 13437ec681f3Smrg if (!pCreateInfo->pRasterizationState->depthBiasEnable && 13447ec681f3Smrg !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT)) 13457ec681f3Smrg states &= ~RADV_DYNAMIC_DEPTH_BIAS; 13467ec681f3Smrg 13477ec681f3Smrg if (!pCreateInfo->pDepthStencilState || 13487ec681f3Smrg (!pCreateInfo->pDepthStencilState->depthBoundsTestEnable && 13497ec681f3Smrg !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT))) 13507ec681f3Smrg states &= ~RADV_DYNAMIC_DEPTH_BOUNDS; 13517ec681f3Smrg 13527ec681f3Smrg if (!pCreateInfo->pDepthStencilState || 13537ec681f3Smrg (!pCreateInfo->pDepthStencilState->stencilTestEnable && 13547ec681f3Smrg !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT))) 13557ec681f3Smrg states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK | RADV_DYNAMIC_STENCIL_WRITE_MASK | 13567ec681f3Smrg RADV_DYNAMIC_STENCIL_REFERENCE); 13577ec681f3Smrg 13587ec681f3Smrg if (!vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT)) 13597ec681f3Smrg states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE; 13607ec681f3Smrg 13617ec681f3Smrg if (!pCreateInfo->pMultisampleState || 13627ec681f3Smrg !vk_find_struct_const(pCreateInfo->pMultisampleState->pNext, 13637ec681f3Smrg PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT)) 13647ec681f3Smrg states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS; 13657ec681f3Smrg 13667ec681f3Smrg if (!pCreateInfo->pRasterizationState) 13677ec681f3Smrg states &= ~RADV_DYNAMIC_LINE_STIPPLE; 13687ec681f3Smrg else { 13697ec681f3Smrg const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_info = vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, 13707ec681f3Smrg PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); 13717ec681f3Smrg if (!rast_line_info || !rast_line_info->stippledLineEnable) 13727ec681f3Smrg states &= ~RADV_DYNAMIC_LINE_STIPPLE; 13737ec681f3Smrg } 13747ec681f3Smrg 13757ec681f3Smrg if (!vk_find_struct_const(pCreateInfo->pNext, 13767ec681f3Smrg PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR) && 13777ec681f3Smrg !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR)) 13787ec681f3Smrg states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE; 13797ec681f3Smrg 13807ec681f3Smrg if (!subpass->has_color_att || 13817ec681f3Smrg !radv_pipeline_is_blend_enabled(pCreateInfo)) 13827ec681f3Smrg states &= ~RADV_DYNAMIC_BLEND_CONSTANTS; 13837ec681f3Smrg 13847ec681f3Smrg if (!subpass->has_color_att) 13857ec681f3Smrg states &= ~RADV_DYNAMIC_COLOR_WRITE_ENABLE; 13867ec681f3Smrg 13877ec681f3Smrg return states; 13887ec681f3Smrg} 13897ec681f3Smrg 13907ec681f3Smrgstatic struct radv_ia_multi_vgt_param_helpers 13917ec681f3Smrgradv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline) 13927ec681f3Smrg{ 13937ec681f3Smrg struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0}; 13947ec681f3Smrg const struct radv_device *device = pipeline->device; 13957ec681f3Smrg 13967ec681f3Smrg if (radv_pipeline_has_tess(pipeline)) 13977ec681f3Smrg ia_multi_vgt_param.primgroup_size = 13987ec681f3Smrg pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches; 13997ec681f3Smrg else if (radv_pipeline_has_gs(pipeline)) 14007ec681f3Smrg ia_multi_vgt_param.primgroup_size = 64; 14017ec681f3Smrg else 14027ec681f3Smrg ia_multi_vgt_param.primgroup_size = 128; /* recommended without a GS */ 14037ec681f3Smrg 14047ec681f3Smrg /* GS requirement. */ 14057ec681f3Smrg ia_multi_vgt_param.partial_es_wave = false; 14067ec681f3Smrg if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8) 14077ec681f3Smrg if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3) 14087ec681f3Smrg ia_multi_vgt_param.partial_es_wave = true; 14097ec681f3Smrg 14107ec681f3Smrg ia_multi_vgt_param.ia_switch_on_eoi = false; 14117ec681f3Smrg if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input) 14127ec681f3Smrg ia_multi_vgt_param.ia_switch_on_eoi = true; 14137ec681f3Smrg if (radv_pipeline_has_gs(pipeline) && pipeline->shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id) 14147ec681f3Smrg ia_multi_vgt_param.ia_switch_on_eoi = true; 14157ec681f3Smrg if (radv_pipeline_has_tess(pipeline)) { 14167ec681f3Smrg /* SWITCH_ON_EOI must be set if PrimID is used. */ 14177ec681f3Smrg if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id || 14187ec681f3Smrg radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id) 14197ec681f3Smrg ia_multi_vgt_param.ia_switch_on_eoi = true; 14207ec681f3Smrg } 14217ec681f3Smrg 14227ec681f3Smrg ia_multi_vgt_param.partial_vs_wave = false; 14237ec681f3Smrg if (radv_pipeline_has_tess(pipeline)) { 14247ec681f3Smrg /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */ 14257ec681f3Smrg if ((device->physical_device->rad_info.family == CHIP_TAHITI || 14267ec681f3Smrg device->physical_device->rad_info.family == CHIP_PITCAIRN || 14277ec681f3Smrg device->physical_device->rad_info.family == CHIP_BONAIRE) && 14287ec681f3Smrg radv_pipeline_has_gs(pipeline)) 14297ec681f3Smrg ia_multi_vgt_param.partial_vs_wave = true; 14307ec681f3Smrg /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ 14317ec681f3Smrg if (device->physical_device->rad_info.has_distributed_tess) { 14327ec681f3Smrg if (radv_pipeline_has_gs(pipeline)) { 14337ec681f3Smrg if (device->physical_device->rad_info.chip_class <= GFX8) 14347ec681f3Smrg ia_multi_vgt_param.partial_es_wave = true; 14357ec681f3Smrg } else { 14367ec681f3Smrg ia_multi_vgt_param.partial_vs_wave = true; 14377ec681f3Smrg } 14387ec681f3Smrg } 14397ec681f3Smrg } 14407ec681f3Smrg 14417ec681f3Smrg if (radv_pipeline_has_gs(pipeline)) { 14427ec681f3Smrg /* On these chips there is the possibility of a hang if the 14437ec681f3Smrg * pipeline uses a GS and partial_vs_wave is not set. 14447ec681f3Smrg * 14457ec681f3Smrg * This mostly does not hit 4-SE chips, as those typically set 14467ec681f3Smrg * ia_switch_on_eoi and then partial_vs_wave is set for pipelines 14477ec681f3Smrg * with GS due to another workaround. 14487ec681f3Smrg * 14497ec681f3Smrg * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242 14507ec681f3Smrg */ 14517ec681f3Smrg if (device->physical_device->rad_info.family == CHIP_TONGA || 14527ec681f3Smrg device->physical_device->rad_info.family == CHIP_FIJI || 14537ec681f3Smrg device->physical_device->rad_info.family == CHIP_POLARIS10 || 14547ec681f3Smrg device->physical_device->rad_info.family == CHIP_POLARIS11 || 14557ec681f3Smrg device->physical_device->rad_info.family == CHIP_POLARIS12 || 14567ec681f3Smrg device->physical_device->rad_info.family == CHIP_VEGAM) { 14577ec681f3Smrg ia_multi_vgt_param.partial_vs_wave = true; 14587ec681f3Smrg } 14597ec681f3Smrg } 14607ec681f3Smrg 14617ec681f3Smrg ia_multi_vgt_param.base = 14627ec681f3Smrg S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) | 14637ec681f3Smrg /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */ 14647ec681f3Smrg S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) | 14657ec681f3Smrg S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) | 14667ec681f3Smrg S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9); 14677ec681f3Smrg 14687ec681f3Smrg return ia_multi_vgt_param; 14697ec681f3Smrg} 14707ec681f3Smrg 14717ec681f3Smrgstatic void 14727ec681f3Smrgradv_pipeline_init_input_assembly_state(struct radv_pipeline *pipeline, 14737ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 14747ec681f3Smrg const struct radv_graphics_pipeline_create_info *extra) 14757ec681f3Smrg{ 14767ec681f3Smrg const VkPipelineInputAssemblyStateCreateInfo *ia_state = pCreateInfo->pInputAssemblyState; 14777ec681f3Smrg struct radv_shader_variant *tes = pipeline->shaders[MESA_SHADER_TESS_EVAL]; 14787ec681f3Smrg struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY]; 14797ec681f3Smrg 14807ec681f3Smrg pipeline->graphics.can_use_guardband = radv_prim_can_use_guardband(ia_state->topology); 14817ec681f3Smrg 14827ec681f3Smrg if (radv_pipeline_has_gs(pipeline)) { 14837ec681f3Smrg if (si_conv_gl_prim_to_gs_out(gs->info.gs.output_prim) == V_028A6C_TRISTRIP) 14847ec681f3Smrg pipeline->graphics.can_use_guardband = true; 14857ec681f3Smrg } else if (radv_pipeline_has_tess(pipeline)) { 14867ec681f3Smrg if (!tes->info.tes.point_mode && 14877ec681f3Smrg si_conv_gl_prim_to_gs_out(tes->info.tes.primitive_mode) == V_028A6C_TRISTRIP) 14887ec681f3Smrg pipeline->graphics.can_use_guardband = true; 14897ec681f3Smrg } 14907ec681f3Smrg 14917ec681f3Smrg if (extra && extra->use_rectlist) { 14927ec681f3Smrg pipeline->graphics.can_use_guardband = true; 14937ec681f3Smrg } 14947ec681f3Smrg 14957ec681f3Smrg pipeline->graphics.ia_multi_vgt_param = radv_compute_ia_multi_vgt_param_helpers(pipeline); 14967ec681f3Smrg} 149701e04c3fSmrg 149801e04c3fSmrgstatic void 149901e04c3fSmrgradv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, 15007ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 15017ec681f3Smrg const struct radv_graphics_pipeline_create_info *extra) 15027ec681f3Smrg{ 15037ec681f3Smrg uint64_t needed_states = radv_pipeline_needed_dynamic_state(pCreateInfo); 15047ec681f3Smrg uint64_t states = needed_states; 15057ec681f3Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 15067ec681f3Smrg struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; 15077ec681f3Smrg 15087ec681f3Smrg pipeline->dynamic_state = default_dynamic_state; 15097ec681f3Smrg pipeline->graphics.needed_dynamic_state = needed_states; 15107ec681f3Smrg 15117ec681f3Smrg if (pCreateInfo->pDynamicState) { 15127ec681f3Smrg /* Remove all of the states that are marked as dynamic */ 15137ec681f3Smrg uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; 15147ec681f3Smrg for (uint32_t s = 0; s < count; s++) 15157ec681f3Smrg states &= ~radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]); 15167ec681f3Smrg } 15177ec681f3Smrg 15187ec681f3Smrg struct radv_dynamic_state *dynamic = &pipeline->dynamic_state; 15197ec681f3Smrg 15207ec681f3Smrg if (needed_states & RADV_DYNAMIC_VIEWPORT) { 15217ec681f3Smrg assert(pCreateInfo->pViewportState); 15227ec681f3Smrg 15237ec681f3Smrg dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; 15247ec681f3Smrg if (states & RADV_DYNAMIC_VIEWPORT) { 15257ec681f3Smrg typed_memcpy(dynamic->viewport.viewports, pCreateInfo->pViewportState->pViewports, 15267ec681f3Smrg pCreateInfo->pViewportState->viewportCount); 15277ec681f3Smrg for (unsigned i = 0; i < dynamic->viewport.count; i++) 15287ec681f3Smrg radv_get_viewport_xform(&dynamic->viewport.viewports[i], 15297ec681f3Smrg dynamic->viewport.xform[i].scale, dynamic->viewport.xform[i].translate); 15307ec681f3Smrg } 15317ec681f3Smrg } 15327ec681f3Smrg 15337ec681f3Smrg if (needed_states & RADV_DYNAMIC_SCISSOR) { 15347ec681f3Smrg dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; 15357ec681f3Smrg if (states & RADV_DYNAMIC_SCISSOR) { 15367ec681f3Smrg typed_memcpy(dynamic->scissor.scissors, pCreateInfo->pViewportState->pScissors, 15377ec681f3Smrg pCreateInfo->pViewportState->scissorCount); 15387ec681f3Smrg } 15397ec681f3Smrg } 15407ec681f3Smrg 15417ec681f3Smrg if (states & RADV_DYNAMIC_LINE_WIDTH) { 15427ec681f3Smrg assert(pCreateInfo->pRasterizationState); 15437ec681f3Smrg dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth; 15447ec681f3Smrg } 15457ec681f3Smrg 15467ec681f3Smrg if (states & RADV_DYNAMIC_DEPTH_BIAS) { 15477ec681f3Smrg assert(pCreateInfo->pRasterizationState); 15487ec681f3Smrg dynamic->depth_bias.bias = pCreateInfo->pRasterizationState->depthBiasConstantFactor; 15497ec681f3Smrg dynamic->depth_bias.clamp = pCreateInfo->pRasterizationState->depthBiasClamp; 15507ec681f3Smrg dynamic->depth_bias.slope = pCreateInfo->pRasterizationState->depthBiasSlopeFactor; 15517ec681f3Smrg } 15527ec681f3Smrg 15537ec681f3Smrg /* Section 9.2 of the Vulkan 1.0.15 spec says: 15547ec681f3Smrg * 15557ec681f3Smrg * pColorBlendState is [...] NULL if the pipeline has rasterization 15567ec681f3Smrg * disabled or if the subpass of the render pass the pipeline is 15577ec681f3Smrg * created against does not use any color attachments. 15587ec681f3Smrg */ 15597ec681f3Smrg if (states & RADV_DYNAMIC_BLEND_CONSTANTS) { 15607ec681f3Smrg assert(pCreateInfo->pColorBlendState); 15617ec681f3Smrg typed_memcpy(dynamic->blend_constants, pCreateInfo->pColorBlendState->blendConstants, 4); 15627ec681f3Smrg } 15637ec681f3Smrg 15647ec681f3Smrg if (states & RADV_DYNAMIC_CULL_MODE) { 15657ec681f3Smrg dynamic->cull_mode = pCreateInfo->pRasterizationState->cullMode; 15667ec681f3Smrg } 15677ec681f3Smrg 15687ec681f3Smrg if (states & RADV_DYNAMIC_FRONT_FACE) { 15697ec681f3Smrg dynamic->front_face = pCreateInfo->pRasterizationState->frontFace; 15707ec681f3Smrg } 15717ec681f3Smrg 15727ec681f3Smrg if (states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) { 15737ec681f3Smrg dynamic->primitive_topology = si_translate_prim(pCreateInfo->pInputAssemblyState->topology); 15747ec681f3Smrg if (extra && extra->use_rectlist) { 15757ec681f3Smrg dynamic->primitive_topology = V_008958_DI_PT_RECTLIST; 15767ec681f3Smrg } 15777ec681f3Smrg } 15787ec681f3Smrg 15797ec681f3Smrg /* If there is no depthstencil attachment, then don't read 15807ec681f3Smrg * pDepthStencilState. The Vulkan spec states that pDepthStencilState may 15817ec681f3Smrg * be NULL in this case. Even if pDepthStencilState is non-NULL, there is 15827ec681f3Smrg * no need to override the depthstencil defaults in 15837ec681f3Smrg * radv_pipeline::dynamic_state when there is no depthstencil attachment. 15847ec681f3Smrg * 15857ec681f3Smrg * Section 9.2 of the Vulkan 1.0.15 spec says: 15867ec681f3Smrg * 15877ec681f3Smrg * pDepthStencilState is [...] NULL if the pipeline has rasterization 15887ec681f3Smrg * disabled or if the subpass of the render pass the pipeline is created 15897ec681f3Smrg * against does not use a depth/stencil attachment. 15907ec681f3Smrg */ 15917ec681f3Smrg if (needed_states && subpass->depth_stencil_attachment) { 15927ec681f3Smrg if (states & RADV_DYNAMIC_DEPTH_BOUNDS) { 15937ec681f3Smrg dynamic->depth_bounds.min = pCreateInfo->pDepthStencilState->minDepthBounds; 15947ec681f3Smrg dynamic->depth_bounds.max = pCreateInfo->pDepthStencilState->maxDepthBounds; 15957ec681f3Smrg } 15967ec681f3Smrg 15977ec681f3Smrg if (states & RADV_DYNAMIC_STENCIL_COMPARE_MASK) { 15987ec681f3Smrg dynamic->stencil_compare_mask.front = pCreateInfo->pDepthStencilState->front.compareMask; 15997ec681f3Smrg dynamic->stencil_compare_mask.back = pCreateInfo->pDepthStencilState->back.compareMask; 16007ec681f3Smrg } 16017ec681f3Smrg 16027ec681f3Smrg if (states & RADV_DYNAMIC_STENCIL_WRITE_MASK) { 16037ec681f3Smrg dynamic->stencil_write_mask.front = pCreateInfo->pDepthStencilState->front.writeMask; 16047ec681f3Smrg dynamic->stencil_write_mask.back = pCreateInfo->pDepthStencilState->back.writeMask; 16057ec681f3Smrg } 16067ec681f3Smrg 16077ec681f3Smrg if (states & RADV_DYNAMIC_STENCIL_REFERENCE) { 16087ec681f3Smrg dynamic->stencil_reference.front = pCreateInfo->pDepthStencilState->front.reference; 16097ec681f3Smrg dynamic->stencil_reference.back = pCreateInfo->pDepthStencilState->back.reference; 16107ec681f3Smrg } 16117ec681f3Smrg 16127ec681f3Smrg if (states & RADV_DYNAMIC_DEPTH_TEST_ENABLE) { 16137ec681f3Smrg dynamic->depth_test_enable = pCreateInfo->pDepthStencilState->depthTestEnable; 16147ec681f3Smrg } 16157ec681f3Smrg 16167ec681f3Smrg if (states & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) { 16177ec681f3Smrg dynamic->depth_write_enable = pCreateInfo->pDepthStencilState->depthWriteEnable; 16187ec681f3Smrg } 16197ec681f3Smrg 16207ec681f3Smrg if (states & RADV_DYNAMIC_DEPTH_COMPARE_OP) { 16217ec681f3Smrg dynamic->depth_compare_op = pCreateInfo->pDepthStencilState->depthCompareOp; 16227ec681f3Smrg } 16237ec681f3Smrg 16247ec681f3Smrg if (states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) { 16257ec681f3Smrg dynamic->depth_bounds_test_enable = pCreateInfo->pDepthStencilState->depthBoundsTestEnable; 16267ec681f3Smrg } 16277ec681f3Smrg 16287ec681f3Smrg if (states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) { 16297ec681f3Smrg dynamic->stencil_test_enable = pCreateInfo->pDepthStencilState->stencilTestEnable; 16307ec681f3Smrg } 16317ec681f3Smrg 16327ec681f3Smrg if (states & RADV_DYNAMIC_STENCIL_OP) { 16337ec681f3Smrg dynamic->stencil_op.front.compare_op = pCreateInfo->pDepthStencilState->front.compareOp; 16347ec681f3Smrg dynamic->stencil_op.front.fail_op = pCreateInfo->pDepthStencilState->front.failOp; 16357ec681f3Smrg dynamic->stencil_op.front.pass_op = pCreateInfo->pDepthStencilState->front.passOp; 16367ec681f3Smrg dynamic->stencil_op.front.depth_fail_op = 16377ec681f3Smrg pCreateInfo->pDepthStencilState->front.depthFailOp; 16387ec681f3Smrg 16397ec681f3Smrg dynamic->stencil_op.back.compare_op = pCreateInfo->pDepthStencilState->back.compareOp; 16407ec681f3Smrg dynamic->stencil_op.back.fail_op = pCreateInfo->pDepthStencilState->back.failOp; 16417ec681f3Smrg dynamic->stencil_op.back.pass_op = pCreateInfo->pDepthStencilState->back.passOp; 16427ec681f3Smrg dynamic->stencil_op.back.depth_fail_op = pCreateInfo->pDepthStencilState->back.depthFailOp; 16437ec681f3Smrg } 16447ec681f3Smrg } 16457ec681f3Smrg 16467ec681f3Smrg const VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info = 16477ec681f3Smrg vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT); 16487ec681f3Smrg if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) { 16497ec681f3Smrg dynamic->discard_rectangle.count = discard_rectangle_info->discardRectangleCount; 16507ec681f3Smrg if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) { 16517ec681f3Smrg typed_memcpy(dynamic->discard_rectangle.rectangles, 16527ec681f3Smrg discard_rectangle_info->pDiscardRectangles, 16537ec681f3Smrg discard_rectangle_info->discardRectangleCount); 16547ec681f3Smrg } 16557ec681f3Smrg } 16567ec681f3Smrg 16577ec681f3Smrg if (needed_states & RADV_DYNAMIC_SAMPLE_LOCATIONS) { 16587ec681f3Smrg const VkPipelineSampleLocationsStateCreateInfoEXT *sample_location_info = 16597ec681f3Smrg vk_find_struct_const(pCreateInfo->pMultisampleState->pNext, 16607ec681f3Smrg PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT); 16617ec681f3Smrg /* If sampleLocationsEnable is VK_FALSE, the default sample 16627ec681f3Smrg * locations are used and the values specified in 16637ec681f3Smrg * sampleLocationsInfo are ignored. 16647ec681f3Smrg */ 16657ec681f3Smrg if (sample_location_info->sampleLocationsEnable) { 16667ec681f3Smrg const VkSampleLocationsInfoEXT *pSampleLocationsInfo = 16677ec681f3Smrg &sample_location_info->sampleLocationsInfo; 16687ec681f3Smrg 16697ec681f3Smrg assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS); 16707ec681f3Smrg 16717ec681f3Smrg dynamic->sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel; 16727ec681f3Smrg dynamic->sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize; 16737ec681f3Smrg dynamic->sample_location.count = pSampleLocationsInfo->sampleLocationsCount; 16747ec681f3Smrg typed_memcpy(&dynamic->sample_location.locations[0], 16757ec681f3Smrg pSampleLocationsInfo->pSampleLocations, 16767ec681f3Smrg pSampleLocationsInfo->sampleLocationsCount); 16777ec681f3Smrg } 16787ec681f3Smrg } 16797ec681f3Smrg 16807ec681f3Smrg const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_info = vk_find_struct_const( 16817ec681f3Smrg pCreateInfo->pRasterizationState->pNext, PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); 16827ec681f3Smrg if (needed_states & RADV_DYNAMIC_LINE_STIPPLE) { 16837ec681f3Smrg dynamic->line_stipple.factor = rast_line_info->lineStippleFactor; 16847ec681f3Smrg dynamic->line_stipple.pattern = rast_line_info->lineStipplePattern; 16857ec681f3Smrg } 16867ec681f3Smrg 16877ec681f3Smrg if (!(states & RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE) || 16887ec681f3Smrg !(states & RADV_DYNAMIC_VERTEX_INPUT)) 16897ec681f3Smrg pipeline->graphics.uses_dynamic_stride = true; 16907ec681f3Smrg 16917ec681f3Smrg const VkPipelineFragmentShadingRateStateCreateInfoKHR *shading_rate = vk_find_struct_const( 16927ec681f3Smrg pCreateInfo->pNext, PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR); 16937ec681f3Smrg if (states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) { 16947ec681f3Smrg dynamic->fragment_shading_rate.size = shading_rate->fragmentSize; 16957ec681f3Smrg for (int i = 0; i < 2; i++) 16967ec681f3Smrg dynamic->fragment_shading_rate.combiner_ops[i] = shading_rate->combinerOps[i]; 16977ec681f3Smrg } 16987ec681f3Smrg 16997ec681f3Smrg if (states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE) { 17007ec681f3Smrg dynamic->depth_bias_enable = pCreateInfo->pRasterizationState->depthBiasEnable; 17017ec681f3Smrg } 17027ec681f3Smrg 17037ec681f3Smrg if (states & RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE) { 17047ec681f3Smrg dynamic->primitive_restart_enable = 17057ec681f3Smrg !!pCreateInfo->pInputAssemblyState->primitiveRestartEnable; 17067ec681f3Smrg } 17077ec681f3Smrg 17087ec681f3Smrg if (states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE) { 17097ec681f3Smrg dynamic->rasterizer_discard_enable = 17107ec681f3Smrg pCreateInfo->pRasterizationState->rasterizerDiscardEnable; 17117ec681f3Smrg } 17127ec681f3Smrg 17137ec681f3Smrg if (subpass->has_color_att && states & RADV_DYNAMIC_LOGIC_OP) { 17147ec681f3Smrg if (pCreateInfo->pColorBlendState->logicOpEnable) { 17157ec681f3Smrg dynamic->logic_op = si_translate_blend_logic_op(pCreateInfo->pColorBlendState->logicOp); 17167ec681f3Smrg } else { 17177ec681f3Smrg dynamic->logic_op = V_028808_ROP3_COPY; 17187ec681f3Smrg } 17197ec681f3Smrg } 17207ec681f3Smrg 17217ec681f3Smrg if (states & RADV_DYNAMIC_COLOR_WRITE_ENABLE) { 17227ec681f3Smrg const VkPipelineColorWriteCreateInfoEXT *color_write_info = vk_find_struct_const( 17237ec681f3Smrg pCreateInfo->pColorBlendState->pNext, PIPELINE_COLOR_WRITE_CREATE_INFO_EXT); 17247ec681f3Smrg if (color_write_info) { 17257ec681f3Smrg dynamic->color_write_enable = 0; 17267ec681f3Smrg for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) { 17277ec681f3Smrg dynamic->color_write_enable |= 17287ec681f3Smrg color_write_info->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0; 17297ec681f3Smrg } 17307ec681f3Smrg } 17317ec681f3Smrg } 17327ec681f3Smrg 17337ec681f3Smrg pipeline->dynamic_state.mask = states; 173401e04c3fSmrg} 173501e04c3fSmrg 173601e04c3fSmrgstatic void 17377ec681f3Smrgradv_pipeline_init_raster_state(struct radv_pipeline *pipeline, 17387ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 17397ec681f3Smrg{ 17407ec681f3Smrg const VkPipelineRasterizationStateCreateInfo *raster_info = pCreateInfo->pRasterizationState; 17417ec681f3Smrg const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *provoking_vtx_info = 17427ec681f3Smrg vk_find_struct_const(raster_info->pNext, 17437ec681f3Smrg PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT); 17447ec681f3Smrg bool provoking_vtx_last = false; 17457ec681f3Smrg 17467ec681f3Smrg if (provoking_vtx_info && 17477ec681f3Smrg provoking_vtx_info->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) { 17487ec681f3Smrg provoking_vtx_last = true; 17497ec681f3Smrg } 17507ec681f3Smrg 17517ec681f3Smrg pipeline->graphics.pa_su_sc_mode_cntl = 17527ec681f3Smrg S_028814_FACE(raster_info->frontFace) | 17537ec681f3Smrg S_028814_CULL_FRONT(!!(raster_info->cullMode & VK_CULL_MODE_FRONT_BIT)) | 17547ec681f3Smrg S_028814_CULL_BACK(!!(raster_info->cullMode & VK_CULL_MODE_BACK_BIT)) | 17557ec681f3Smrg S_028814_POLY_MODE(raster_info->polygonMode != VK_POLYGON_MODE_FILL) | 17567ec681f3Smrg S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(raster_info->polygonMode)) | 17577ec681f3Smrg S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(raster_info->polygonMode)) | 17587ec681f3Smrg S_028814_POLY_OFFSET_FRONT_ENABLE(raster_info->depthBiasEnable ? 1 : 0) | 17597ec681f3Smrg S_028814_POLY_OFFSET_BACK_ENABLE(raster_info->depthBiasEnable ? 1 : 0) | 17607ec681f3Smrg S_028814_POLY_OFFSET_PARA_ENABLE(raster_info->depthBiasEnable ? 1 : 0) | 17617ec681f3Smrg S_028814_PROVOKING_VTX_LAST(provoking_vtx_last); 17627ec681f3Smrg 17637ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { 17647ec681f3Smrg /* It should also be set if PERPENDICULAR_ENDCAP_ENA is set. */ 17657ec681f3Smrg pipeline->graphics.pa_su_sc_mode_cntl |= 17667ec681f3Smrg S_028814_KEEP_TOGETHER_ENABLE(raster_info->polygonMode != VK_POLYGON_MODE_FILL); 17677ec681f3Smrg } 17687ec681f3Smrg 17697ec681f3Smrg bool depth_clip_disable = raster_info->depthClampEnable; 17707ec681f3Smrg const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state = 17717ec681f3Smrg vk_find_struct_const(raster_info->pNext, 17727ec681f3Smrg PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT); 17737ec681f3Smrg if (depth_clip_state) { 17747ec681f3Smrg depth_clip_disable = !depth_clip_state->depthClipEnable; 17757ec681f3Smrg } 17767ec681f3Smrg 17777ec681f3Smrg pipeline->graphics.pa_cl_clip_cntl = 17787ec681f3Smrg S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions. 17797ec681f3Smrg S_028810_ZCLIP_NEAR_DISABLE(depth_clip_disable ? 1 : 0) | 17807ec681f3Smrg S_028810_ZCLIP_FAR_DISABLE(depth_clip_disable ? 1 : 0) | 17817ec681f3Smrg S_028810_DX_RASTERIZATION_KILL(raster_info->rasterizerDiscardEnable ? 1 : 0) | 17827ec681f3Smrg S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 17837ec681f3Smrg 17847ec681f3Smrg pipeline->graphics.uses_conservative_overestimate = 17857ec681f3Smrg radv_get_conservative_raster_mode(pCreateInfo->pRasterizationState) == 17867ec681f3Smrg VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT; 17877ec681f3Smrg} 17887ec681f3Smrg 17897ec681f3Smrgstatic void 17907ec681f3Smrgradv_pipeline_init_depth_stencil_state(struct radv_pipeline *pipeline, 17917ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 17927ec681f3Smrg{ 17937ec681f3Smrg const VkPipelineDepthStencilStateCreateInfo *ds_info = 17947ec681f3Smrg radv_pipeline_get_depth_stencil_state(pCreateInfo); 17957ec681f3Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 17967ec681f3Smrg struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; 17977ec681f3Smrg struct radv_render_pass_attachment *attachment = NULL; 17987ec681f3Smrg uint32_t db_depth_control = 0; 17997ec681f3Smrg 18007ec681f3Smrg if (subpass->depth_stencil_attachment) 18017ec681f3Smrg attachment = pass->attachments + subpass->depth_stencil_attachment->attachment; 18027ec681f3Smrg 18037ec681f3Smrg bool has_depth_attachment = attachment && vk_format_has_depth(attachment->format); 18047ec681f3Smrg bool has_stencil_attachment = attachment && vk_format_has_stencil(attachment->format); 18057ec681f3Smrg 18067ec681f3Smrg if (ds_info) { 18077ec681f3Smrg if (has_depth_attachment) { 18087ec681f3Smrg db_depth_control = S_028800_Z_ENABLE(ds_info->depthTestEnable ? 1 : 0) | 18097ec681f3Smrg S_028800_Z_WRITE_ENABLE(ds_info->depthWriteEnable ? 1 : 0) | 18107ec681f3Smrg S_028800_ZFUNC(ds_info->depthCompareOp) | 18117ec681f3Smrg S_028800_DEPTH_BOUNDS_ENABLE(ds_info->depthBoundsTestEnable ? 1 : 0); 18127ec681f3Smrg } 18137ec681f3Smrg 18147ec681f3Smrg if (has_stencil_attachment && ds_info->stencilTestEnable) { 18157ec681f3Smrg db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1); 18167ec681f3Smrg db_depth_control |= S_028800_STENCILFUNC(ds_info->front.compareOp); 18177ec681f3Smrg db_depth_control |= S_028800_STENCILFUNC_BF(ds_info->back.compareOp); 18187ec681f3Smrg } 18197ec681f3Smrg } 18207ec681f3Smrg 18217ec681f3Smrg pipeline->graphics.db_depth_control = db_depth_control; 18227ec681f3Smrg} 18237ec681f3Smrg 18247ec681f3Smrgstatic void 18257ec681f3Smrggfx9_get_gs_info(const struct radv_pipeline_key *key, const struct radv_pipeline *pipeline, 18267ec681f3Smrg nir_shader **nir, struct radv_shader_info *infos, struct gfx9_gs_info *out) 18277ec681f3Smrg{ 18287ec681f3Smrg struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY]; 18297ec681f3Smrg struct radv_es_output_info *es_info; 18307ec681f3Smrg bool has_tess = !!nir[MESA_SHADER_TESS_CTRL]; 18317ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) 18327ec681f3Smrg es_info = has_tess ? &gs_info->tes.es_info : &gs_info->vs.es_info; 18337ec681f3Smrg else 18347ec681f3Smrg es_info = has_tess ? &infos[MESA_SHADER_TESS_EVAL].tes.es_info 18357ec681f3Smrg : &infos[MESA_SHADER_VERTEX].vs.es_info; 18367ec681f3Smrg 18377ec681f3Smrg unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1); 18387ec681f3Smrg bool uses_adjacency; 18397ec681f3Smrg switch (key->vs.topology) { 18407ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: 18417ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: 18427ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: 18437ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: 18447ec681f3Smrg uses_adjacency = true; 18457ec681f3Smrg break; 18467ec681f3Smrg default: 18477ec681f3Smrg uses_adjacency = false; 18487ec681f3Smrg break; 18497ec681f3Smrg } 18507ec681f3Smrg 18517ec681f3Smrg /* All these are in dwords: */ 18527ec681f3Smrg /* We can't allow using the whole LDS, because GS waves compete with 18537ec681f3Smrg * other shader stages for LDS space. */ 18547ec681f3Smrg const unsigned max_lds_size = 8 * 1024; 18557ec681f3Smrg const unsigned esgs_itemsize = es_info->esgs_itemsize / 4; 18567ec681f3Smrg unsigned esgs_lds_size; 18577ec681f3Smrg 18587ec681f3Smrg /* All these are per subgroup: */ 18597ec681f3Smrg const unsigned max_out_prims = 32 * 1024; 18607ec681f3Smrg const unsigned max_es_verts = 255; 18617ec681f3Smrg const unsigned ideal_gs_prims = 64; 18627ec681f3Smrg unsigned max_gs_prims, gs_prims; 18637ec681f3Smrg unsigned min_es_verts, es_verts, worst_case_es_verts; 18647ec681f3Smrg 18657ec681f3Smrg if (uses_adjacency || gs_num_invocations > 1) 18667ec681f3Smrg max_gs_prims = 127 / gs_num_invocations; 18677ec681f3Smrg else 18687ec681f3Smrg max_gs_prims = 255; 18697ec681f3Smrg 18707ec681f3Smrg /* MAX_PRIMS_PER_SUBGROUP = gs_prims * max_vert_out * gs_invocations. 18717ec681f3Smrg * Make sure we don't go over the maximum value. 18727ec681f3Smrg */ 18737ec681f3Smrg if (gs_info->gs.vertices_out > 0) { 18747ec681f3Smrg max_gs_prims = 18757ec681f3Smrg MIN2(max_gs_prims, max_out_prims / (gs_info->gs.vertices_out * gs_num_invocations)); 18767ec681f3Smrg } 18777ec681f3Smrg assert(max_gs_prims > 0); 18787ec681f3Smrg 18797ec681f3Smrg /* If the primitive has adjacency, halve the number of vertices 18807ec681f3Smrg * that will be reused in multiple primitives. 18817ec681f3Smrg */ 18827ec681f3Smrg min_es_verts = gs_info->gs.vertices_in / (uses_adjacency ? 2 : 1); 18837ec681f3Smrg 18847ec681f3Smrg gs_prims = MIN2(ideal_gs_prims, max_gs_prims); 18857ec681f3Smrg worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts); 18867ec681f3Smrg 18877ec681f3Smrg /* Compute ESGS LDS size based on the worst case number of ES vertices 18887ec681f3Smrg * needed to create the target number of GS prims per subgroup. 18897ec681f3Smrg */ 18907ec681f3Smrg esgs_lds_size = esgs_itemsize * worst_case_es_verts; 18917ec681f3Smrg 18927ec681f3Smrg /* If total LDS usage is too big, refactor partitions based on ratio 18937ec681f3Smrg * of ESGS item sizes. 18947ec681f3Smrg */ 18957ec681f3Smrg if (esgs_lds_size > max_lds_size) { 18967ec681f3Smrg /* Our target GS Prims Per Subgroup was too large. Calculate 18977ec681f3Smrg * the maximum number of GS Prims Per Subgroup that will fit 18987ec681f3Smrg * into LDS, capped by the maximum that the hardware can support. 18997ec681f3Smrg */ 19007ec681f3Smrg gs_prims = MIN2((max_lds_size / (esgs_itemsize * min_es_verts)), max_gs_prims); 19017ec681f3Smrg assert(gs_prims > 0); 19027ec681f3Smrg worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts); 19037ec681f3Smrg 19047ec681f3Smrg esgs_lds_size = esgs_itemsize * worst_case_es_verts; 19057ec681f3Smrg assert(esgs_lds_size <= max_lds_size); 19067ec681f3Smrg } 19077ec681f3Smrg 19087ec681f3Smrg /* Now calculate remaining ESGS information. */ 19097ec681f3Smrg if (esgs_lds_size) 19107ec681f3Smrg es_verts = MIN2(esgs_lds_size / esgs_itemsize, max_es_verts); 19117ec681f3Smrg else 19127ec681f3Smrg es_verts = max_es_verts; 19137ec681f3Smrg 19147ec681f3Smrg /* Vertices for adjacency primitives are not always reused, so restore 19157ec681f3Smrg * it for ES_VERTS_PER_SUBGRP. 19167ec681f3Smrg */ 19177ec681f3Smrg min_es_verts = gs_info->gs.vertices_in; 19187ec681f3Smrg 19197ec681f3Smrg /* For normal primitives, the VGT only checks if they are past the ES 19207ec681f3Smrg * verts per subgroup after allocating a full GS primitive and if they 19217ec681f3Smrg * are, kick off a new subgroup. But if those additional ES verts are 19227ec681f3Smrg * unique (e.g. not reused) we need to make sure there is enough LDS 19237ec681f3Smrg * space to account for those ES verts beyond ES_VERTS_PER_SUBGRP. 19247ec681f3Smrg */ 19257ec681f3Smrg es_verts -= min_es_verts - 1; 19267ec681f3Smrg 19277ec681f3Smrg uint32_t es_verts_per_subgroup = es_verts; 19287ec681f3Smrg uint32_t gs_prims_per_subgroup = gs_prims; 19297ec681f3Smrg uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations; 19307ec681f3Smrg uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out; 19317ec681f3Smrg out->lds_size = align(esgs_lds_size, 128) / 128; 19327ec681f3Smrg out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) | 19337ec681f3Smrg S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) | 19347ec681f3Smrg S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup); 19357ec681f3Smrg out->vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup); 19367ec681f3Smrg out->vgt_esgs_ring_itemsize = esgs_itemsize; 19377ec681f3Smrg assert(max_prims_per_subgroup <= max_out_prims); 19387ec681f3Smrg 19397ec681f3Smrg gl_shader_stage es_stage = has_tess ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; 19407ec681f3Smrg unsigned workgroup_size = 19417ec681f3Smrg ac_compute_esgs_workgroup_size( 19427ec681f3Smrg pipeline->device->physical_device->rad_info.chip_class, infos[es_stage].wave_size, 19437ec681f3Smrg es_verts_per_subgroup, gs_inst_prims_in_subgroup); 19447ec681f3Smrg infos[es_stage].workgroup_size = workgroup_size; 19457ec681f3Smrg infos[MESA_SHADER_GEOMETRY].workgroup_size = workgroup_size; 19467ec681f3Smrg} 19477ec681f3Smrg 19487ec681f3Smrgstatic void 19497ec681f3Smrgclamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts, unsigned min_verts_per_prim, 19507ec681f3Smrg bool use_adjacency) 19517ec681f3Smrg{ 19527ec681f3Smrg unsigned max_reuse = max_esverts - min_verts_per_prim; 19537ec681f3Smrg if (use_adjacency) 19547ec681f3Smrg max_reuse /= 2; 19557ec681f3Smrg *max_gsprims = MIN2(*max_gsprims, 1 + max_reuse); 19567ec681f3Smrg} 19577ec681f3Smrg 19587ec681f3Smrgstatic unsigned 19597ec681f3Smrgradv_get_num_input_vertices(nir_shader **nir) 19607ec681f3Smrg{ 19617ec681f3Smrg if (nir[MESA_SHADER_GEOMETRY]) { 19627ec681f3Smrg nir_shader *gs = nir[MESA_SHADER_GEOMETRY]; 19637ec681f3Smrg 19647ec681f3Smrg return gs->info.gs.vertices_in; 19657ec681f3Smrg } 19667ec681f3Smrg 19677ec681f3Smrg if (nir[MESA_SHADER_TESS_CTRL]) { 19687ec681f3Smrg nir_shader *tes = nir[MESA_SHADER_TESS_EVAL]; 19697ec681f3Smrg 19707ec681f3Smrg if (tes->info.tess.point_mode) 19717ec681f3Smrg return 1; 19727ec681f3Smrg if (tes->info.tess.primitive_mode == GL_ISOLINES) 19737ec681f3Smrg return 2; 19747ec681f3Smrg return 3; 19757ec681f3Smrg } 19767ec681f3Smrg 19777ec681f3Smrg return 3; 19787ec681f3Smrg} 19797ec681f3Smrg 19807ec681f3Smrgstatic void 19817ec681f3Smrggfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum chip_class chip_class, uint32_t oversub_pc_lines) 19827ec681f3Smrg{ 19837ec681f3Smrg radeon_set_uconfig_reg( 19847ec681f3Smrg cs, R_030980_GE_PC_ALLOC, 19857ec681f3Smrg S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1)); 19867ec681f3Smrg} 19877ec681f3Smrg 19887ec681f3Smrgstatic void 19897ec681f3Smrggfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pipeline, 19907ec681f3Smrg nir_shader **nir, struct radv_shader_info *infos, struct gfx10_ngg_info *ngg) 19917ec681f3Smrg{ 19927ec681f3Smrg struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY]; 19937ec681f3Smrg struct radv_es_output_info *es_info = 19947ec681f3Smrg nir[MESA_SHADER_TESS_CTRL] ? &gs_info->tes.es_info : &gs_info->vs.es_info; 19957ec681f3Smrg unsigned gs_type = nir[MESA_SHADER_GEOMETRY] ? MESA_SHADER_GEOMETRY : MESA_SHADER_VERTEX; 19967ec681f3Smrg unsigned max_verts_per_prim = radv_get_num_input_vertices(nir); 19977ec681f3Smrg unsigned min_verts_per_prim = gs_type == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1; 19987ec681f3Smrg unsigned gs_num_invocations = nir[MESA_SHADER_GEOMETRY] ? MAX2(gs_info->gs.invocations, 1) : 1; 19997ec681f3Smrg bool uses_adjacency; 20007ec681f3Smrg switch (key->vs.topology) { 20017ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: 20027ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: 20037ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: 20047ec681f3Smrg case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: 20057ec681f3Smrg uses_adjacency = true; 20067ec681f3Smrg break; 20077ec681f3Smrg default: 20087ec681f3Smrg uses_adjacency = false; 20097ec681f3Smrg break; 20107ec681f3Smrg } 20117ec681f3Smrg 20127ec681f3Smrg /* All these are in dwords: */ 20137ec681f3Smrg /* We can't allow using the whole LDS, because GS waves compete with 20147ec681f3Smrg * other shader stages for LDS space. 20157ec681f3Smrg * 20167ec681f3Smrg * TODO: We should really take the shader's internal LDS use into 20177ec681f3Smrg * account. The linker will fail if the size is greater than 20187ec681f3Smrg * 8K dwords. 20197ec681f3Smrg */ 20207ec681f3Smrg const unsigned max_lds_size = 8 * 1024 - 768; 20217ec681f3Smrg const unsigned target_lds_size = max_lds_size; 20227ec681f3Smrg unsigned esvert_lds_size = 0; 20237ec681f3Smrg unsigned gsprim_lds_size = 0; 20247ec681f3Smrg 20257ec681f3Smrg /* All these are per subgroup: */ 20267ec681f3Smrg const unsigned min_esverts = 20277ec681f3Smrg pipeline->device->physical_device->rad_info.chip_class >= GFX10_3 ? 29 : 24; 20287ec681f3Smrg bool max_vert_out_per_gs_instance = false; 20297ec681f3Smrg unsigned max_esverts_base = 128; 20307ec681f3Smrg unsigned max_gsprims_base = 128; /* default prim group size clamp */ 20317ec681f3Smrg 20327ec681f3Smrg /* Hardware has the following non-natural restrictions on the value 20337ec681f3Smrg * of GE_CNTL.VERT_GRP_SIZE based on based on the primitive type of 20347ec681f3Smrg * the draw: 20357ec681f3Smrg * - at most 252 for any line input primitive type 20367ec681f3Smrg * - at most 251 for any quad input primitive type 20377ec681f3Smrg * - at most 251 for triangle strips with adjacency (this happens to 20387ec681f3Smrg * be the natural limit for triangle *lists* with adjacency) 20397ec681f3Smrg */ 20407ec681f3Smrg max_esverts_base = MIN2(max_esverts_base, 251 + max_verts_per_prim - 1); 20417ec681f3Smrg 20427ec681f3Smrg if (gs_type == MESA_SHADER_GEOMETRY) { 20437ec681f3Smrg unsigned max_out_verts_per_gsprim = gs_info->gs.vertices_out * gs_num_invocations; 20447ec681f3Smrg 20457ec681f3Smrg if (max_out_verts_per_gsprim <= 256) { 20467ec681f3Smrg if (max_out_verts_per_gsprim) { 20477ec681f3Smrg max_gsprims_base = MIN2(max_gsprims_base, 256 / max_out_verts_per_gsprim); 20487ec681f3Smrg } 20497ec681f3Smrg } else { 20507ec681f3Smrg /* Use special multi-cycling mode in which each GS 20517ec681f3Smrg * instance gets its own subgroup. Does not work with 20527ec681f3Smrg * tessellation. */ 20537ec681f3Smrg max_vert_out_per_gs_instance = true; 20547ec681f3Smrg max_gsprims_base = 1; 20557ec681f3Smrg max_out_verts_per_gsprim = gs_info->gs.vertices_out; 20567ec681f3Smrg } 20577ec681f3Smrg 20587ec681f3Smrg esvert_lds_size = es_info->esgs_itemsize / 4; 20597ec681f3Smrg gsprim_lds_size = (gs_info->gs.gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim; 20607ec681f3Smrg } else { 20617ec681f3Smrg /* VS and TES. */ 20627ec681f3Smrg /* LDS size for passing data from GS to ES. */ 20637ec681f3Smrg struct radv_streamout_info *so_info = nir[MESA_SHADER_TESS_CTRL] 20647ec681f3Smrg ? &infos[MESA_SHADER_TESS_EVAL].so 20657ec681f3Smrg : &infos[MESA_SHADER_VERTEX].so; 20667ec681f3Smrg 20677ec681f3Smrg if (so_info->num_outputs) 20687ec681f3Smrg esvert_lds_size = 4 * so_info->num_outputs + 1; 20697ec681f3Smrg 20707ec681f3Smrg /* GS stores Primitive IDs (one DWORD) into LDS at the address 20717ec681f3Smrg * corresponding to the ES thread of the provoking vertex. All 20727ec681f3Smrg * ES threads load and export PrimitiveID for their thread. 20737ec681f3Smrg */ 20747ec681f3Smrg if (!nir[MESA_SHADER_TESS_CTRL] && infos[MESA_SHADER_VERTEX].vs.outinfo.export_prim_id) 20757ec681f3Smrg esvert_lds_size = MAX2(esvert_lds_size, 1); 20767ec681f3Smrg } 20777ec681f3Smrg 20787ec681f3Smrg unsigned max_gsprims = max_gsprims_base; 20797ec681f3Smrg unsigned max_esverts = max_esverts_base; 20807ec681f3Smrg 20817ec681f3Smrg if (esvert_lds_size) 20827ec681f3Smrg max_esverts = MIN2(max_esverts, target_lds_size / esvert_lds_size); 20837ec681f3Smrg if (gsprim_lds_size) 20847ec681f3Smrg max_gsprims = MIN2(max_gsprims, target_lds_size / gsprim_lds_size); 20857ec681f3Smrg 20867ec681f3Smrg max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); 20877ec681f3Smrg clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency); 20887ec681f3Smrg assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1); 20897ec681f3Smrg 20907ec681f3Smrg if (esvert_lds_size || gsprim_lds_size) { 20917ec681f3Smrg /* Now that we have a rough proportionality between esverts 20927ec681f3Smrg * and gsprims based on the primitive type, scale both of them 20937ec681f3Smrg * down simultaneously based on required LDS space. 20947ec681f3Smrg * 20957ec681f3Smrg * We could be smarter about this if we knew how much vertex 20967ec681f3Smrg * reuse to expect. 20977ec681f3Smrg */ 20987ec681f3Smrg unsigned lds_total = max_esverts * esvert_lds_size + max_gsprims * gsprim_lds_size; 20997ec681f3Smrg if (lds_total > target_lds_size) { 21007ec681f3Smrg max_esverts = max_esverts * target_lds_size / lds_total; 21017ec681f3Smrg max_gsprims = max_gsprims * target_lds_size / lds_total; 21027ec681f3Smrg 21037ec681f3Smrg max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); 21047ec681f3Smrg clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency); 21057ec681f3Smrg assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1); 21067ec681f3Smrg } 21077ec681f3Smrg } 21087ec681f3Smrg 21097ec681f3Smrg /* Round up towards full wave sizes for better ALU utilization. */ 21107ec681f3Smrg if (!max_vert_out_per_gs_instance) { 21117ec681f3Smrg unsigned orig_max_esverts; 21127ec681f3Smrg unsigned orig_max_gsprims; 21137ec681f3Smrg unsigned wavesize; 21147ec681f3Smrg 21157ec681f3Smrg if (gs_type == MESA_SHADER_GEOMETRY) { 21167ec681f3Smrg wavesize = gs_info->wave_size; 21177ec681f3Smrg } else { 21187ec681f3Smrg wavesize = nir[MESA_SHADER_TESS_CTRL] ? infos[MESA_SHADER_TESS_EVAL].wave_size 21197ec681f3Smrg : infos[MESA_SHADER_VERTEX].wave_size; 21207ec681f3Smrg } 21217ec681f3Smrg 21227ec681f3Smrg do { 21237ec681f3Smrg orig_max_esverts = max_esverts; 21247ec681f3Smrg orig_max_gsprims = max_gsprims; 21257ec681f3Smrg 21267ec681f3Smrg max_esverts = align(max_esverts, wavesize); 21277ec681f3Smrg max_esverts = MIN2(max_esverts, max_esverts_base); 21287ec681f3Smrg if (esvert_lds_size) 21297ec681f3Smrg max_esverts = 21307ec681f3Smrg MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size); 21317ec681f3Smrg max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); 21327ec681f3Smrg 21337ec681f3Smrg /* Hardware restriction: minimum value of max_esverts */ 21347ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class == GFX10) 21357ec681f3Smrg max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim); 21367ec681f3Smrg else 21377ec681f3Smrg max_esverts = MAX2(max_esverts, min_esverts); 21387ec681f3Smrg 21397ec681f3Smrg max_gsprims = align(max_gsprims, wavesize); 21407ec681f3Smrg max_gsprims = MIN2(max_gsprims, max_gsprims_base); 21417ec681f3Smrg if (gsprim_lds_size) { 21427ec681f3Smrg /* Don't count unusable vertices to the LDS 21437ec681f3Smrg * size. Those are vertices above the maximum 21447ec681f3Smrg * number of vertices that can occur in the 21457ec681f3Smrg * workgroup, which is e.g. max_gsprims * 3 21467ec681f3Smrg * for triangles. 21477ec681f3Smrg */ 21487ec681f3Smrg unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); 21497ec681f3Smrg max_gsprims = MIN2(max_gsprims, 21507ec681f3Smrg (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size); 21517ec681f3Smrg } 21527ec681f3Smrg clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency); 21537ec681f3Smrg assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1); 21547ec681f3Smrg } while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims); 21557ec681f3Smrg 21567ec681f3Smrg /* Verify the restriction. */ 21577ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class == GFX10) 21587ec681f3Smrg assert(max_esverts >= min_esverts - 1 + max_verts_per_prim); 21597ec681f3Smrg else 21607ec681f3Smrg assert(max_esverts >= min_esverts); 21617ec681f3Smrg } else { 21627ec681f3Smrg /* Hardware restriction: minimum value of max_esverts */ 21637ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class == GFX10) 21647ec681f3Smrg max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim); 21657ec681f3Smrg else 21667ec681f3Smrg max_esverts = MAX2(max_esverts, min_esverts); 21677ec681f3Smrg } 21687ec681f3Smrg 21697ec681f3Smrg unsigned max_out_vertices = max_vert_out_per_gs_instance ? gs_info->gs.vertices_out 21707ec681f3Smrg : gs_type == MESA_SHADER_GEOMETRY 21717ec681f3Smrg ? max_gsprims * gs_num_invocations * gs_info->gs.vertices_out 21727ec681f3Smrg : max_esverts; 21737ec681f3Smrg assert(max_out_vertices <= 256); 21747ec681f3Smrg 21757ec681f3Smrg unsigned prim_amp_factor = 1; 21767ec681f3Smrg if (gs_type == MESA_SHADER_GEOMETRY) { 21777ec681f3Smrg /* Number of output primitives per GS input primitive after 21787ec681f3Smrg * GS instancing. */ 21797ec681f3Smrg prim_amp_factor = gs_info->gs.vertices_out; 21807ec681f3Smrg } 21817ec681f3Smrg 21827ec681f3Smrg /* On Gfx10, the GE only checks against the maximum number of ES verts 21837ec681f3Smrg * after allocating a full GS primitive. So we need to ensure that 21847ec681f3Smrg * whenever this check passes, there is enough space for a full 21857ec681f3Smrg * primitive without vertex reuse. 21867ec681f3Smrg */ 21877ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class == GFX10) 21887ec681f3Smrg ngg->hw_max_esverts = max_esverts - max_verts_per_prim + 1; 21897ec681f3Smrg else 21907ec681f3Smrg ngg->hw_max_esverts = max_esverts; 21917ec681f3Smrg 21927ec681f3Smrg ngg->max_gsprims = max_gsprims; 21937ec681f3Smrg ngg->max_out_verts = max_out_vertices; 21947ec681f3Smrg ngg->prim_amp_factor = prim_amp_factor; 21957ec681f3Smrg ngg->max_vert_out_per_gs_instance = max_vert_out_per_gs_instance; 21967ec681f3Smrg ngg->ngg_emit_size = max_gsprims * gsprim_lds_size; 21977ec681f3Smrg ngg->enable_vertex_grouping = true; 21987ec681f3Smrg 21997ec681f3Smrg /* Don't count unusable vertices. */ 22007ec681f3Smrg ngg->esgs_ring_size = MIN2(max_esverts, max_gsprims * max_verts_per_prim) * esvert_lds_size * 4; 22017ec681f3Smrg 22027ec681f3Smrg if (gs_type == MESA_SHADER_GEOMETRY) { 22037ec681f3Smrg ngg->vgt_esgs_ring_itemsize = es_info->esgs_itemsize / 4; 22047ec681f3Smrg } else { 22057ec681f3Smrg ngg->vgt_esgs_ring_itemsize = 1; 22067ec681f3Smrg } 22077ec681f3Smrg 22087ec681f3Smrg assert(ngg->hw_max_esverts >= min_esverts); /* HW limitation */ 22097ec681f3Smrg 22107ec681f3Smrg gl_shader_stage es_stage = nir[MESA_SHADER_TESS_CTRL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; 22117ec681f3Smrg unsigned workgroup_size = 22127ec681f3Smrg ac_compute_ngg_workgroup_size( 22137ec681f3Smrg max_esverts, max_gsprims * gs_num_invocations, max_out_vertices, prim_amp_factor); 22147ec681f3Smrg infos[MESA_SHADER_GEOMETRY].workgroup_size = workgroup_size; 22157ec681f3Smrg infos[es_stage].workgroup_size = workgroup_size; 22167ec681f3Smrg} 22177ec681f3Smrg 22187ec681f3Smrgstatic void 22197ec681f3Smrgradv_pipeline_init_gs_ring_state(struct radv_pipeline *pipeline, const struct gfx9_gs_info *gs) 22207ec681f3Smrg{ 22217ec681f3Smrg struct radv_device *device = pipeline->device; 22227ec681f3Smrg unsigned num_se = device->physical_device->rad_info.max_se; 22237ec681f3Smrg unsigned wave_size = 64; 22247ec681f3Smrg unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */ 22257ec681f3Smrg /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16. 22267ec681f3Smrg * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2). 22277ec681f3Smrg */ 22287ec681f3Smrg unsigned gs_vertex_reuse = 22297ec681f3Smrg (device->physical_device->rad_info.chip_class >= GFX8 ? 32 : 16) * num_se; 22307ec681f3Smrg unsigned alignment = 256 * num_se; 22317ec681f3Smrg /* The maximum size is 63.999 MB per SE. */ 22327ec681f3Smrg unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se; 22337ec681f3Smrg struct radv_shader_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info; 22347ec681f3Smrg 22357ec681f3Smrg /* Calculate the minimum size. */ 22367ec681f3Smrg unsigned min_esgs_ring_size = 22377ec681f3Smrg align(gs->vgt_esgs_ring_itemsize * 4 * gs_vertex_reuse * wave_size, alignment); 22387ec681f3Smrg /* These are recommended sizes, not minimum sizes. */ 22397ec681f3Smrg unsigned esgs_ring_size = 22407ec681f3Smrg max_gs_waves * 2 * wave_size * gs->vgt_esgs_ring_itemsize * 4 * gs_info->gs.vertices_in; 22417ec681f3Smrg unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size * gs_info->gs.max_gsvs_emit_size; 22427ec681f3Smrg 22437ec681f3Smrg min_esgs_ring_size = align(min_esgs_ring_size, alignment); 22447ec681f3Smrg esgs_ring_size = align(esgs_ring_size, alignment); 22457ec681f3Smrg gsvs_ring_size = align(gsvs_ring_size, alignment); 22467ec681f3Smrg 22477ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class <= GFX8) 22487ec681f3Smrg pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size); 22497ec681f3Smrg 22507ec681f3Smrg pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size); 225101e04c3fSmrg} 225201e04c3fSmrg 225301e04c3fSmrgstruct radv_shader_variant * 22547ec681f3Smrgradv_get_shader(const struct radv_pipeline *pipeline, gl_shader_stage stage) 22557ec681f3Smrg{ 22567ec681f3Smrg if (stage == MESA_SHADER_VERTEX) { 22577ec681f3Smrg if (pipeline->shaders[MESA_SHADER_VERTEX]) 22587ec681f3Smrg return pipeline->shaders[MESA_SHADER_VERTEX]; 22597ec681f3Smrg if (pipeline->shaders[MESA_SHADER_TESS_CTRL]) 22607ec681f3Smrg return pipeline->shaders[MESA_SHADER_TESS_CTRL]; 22617ec681f3Smrg if (pipeline->shaders[MESA_SHADER_GEOMETRY]) 22627ec681f3Smrg return pipeline->shaders[MESA_SHADER_GEOMETRY]; 22637ec681f3Smrg } else if (stage == MESA_SHADER_TESS_EVAL) { 22647ec681f3Smrg if (!radv_pipeline_has_tess(pipeline)) 22657ec681f3Smrg return NULL; 22667ec681f3Smrg if (pipeline->shaders[MESA_SHADER_TESS_EVAL]) 22677ec681f3Smrg return pipeline->shaders[MESA_SHADER_TESS_EVAL]; 22687ec681f3Smrg if (pipeline->shaders[MESA_SHADER_GEOMETRY]) 22697ec681f3Smrg return pipeline->shaders[MESA_SHADER_GEOMETRY]; 22707ec681f3Smrg } 22717ec681f3Smrg return pipeline->shaders[stage]; 22727ec681f3Smrg} 227301e04c3fSmrg 22747ec681f3Smrgstatic const struct radv_vs_output_info * 22757ec681f3Smrgget_vs_output_info(const struct radv_pipeline *pipeline) 227601e04c3fSmrg{ 22777ec681f3Smrg if (radv_pipeline_has_gs(pipeline)) 22787ec681f3Smrg if (radv_pipeline_has_ngg(pipeline)) 22797ec681f3Smrg return &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.vs.outinfo; 22807ec681f3Smrg else 22817ec681f3Smrg return &pipeline->gs_copy_shader->info.vs.outinfo; 22827ec681f3Smrg else if (radv_pipeline_has_tess(pipeline)) 22837ec681f3Smrg return &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.outinfo; 22847ec681f3Smrg else 22857ec681f3Smrg return &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.outinfo; 22867ec681f3Smrg} 22877ec681f3Smrg 22887ec681f3Smrgstatic bool 22897ec681f3Smrgradv_nir_stage_uses_xfb(const nir_shader *nir) 22907ec681f3Smrg{ 22917ec681f3Smrg nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL); 22927ec681f3Smrg bool uses_xfb = !!xfb; 22937ec681f3Smrg 22947ec681f3Smrg ralloc_free(xfb); 22957ec681f3Smrg return uses_xfb; 229601e04c3fSmrg} 229701e04c3fSmrg 229801e04c3fSmrgstatic void 22997ec681f3Smrgradv_link_shaders(struct radv_pipeline *pipeline, 23007ec681f3Smrg const struct radv_pipeline_key *pipeline_key, 23017ec681f3Smrg nir_shader **shaders, 23027ec681f3Smrg bool optimize_conservatively) 23037ec681f3Smrg{ 23047ec681f3Smrg nir_shader *ordered_shaders[MESA_SHADER_STAGES]; 23057ec681f3Smrg int shader_count = 0; 23067ec681f3Smrg 23077ec681f3Smrg if (shaders[MESA_SHADER_FRAGMENT]) { 23087ec681f3Smrg ordered_shaders[shader_count++] = shaders[MESA_SHADER_FRAGMENT]; 23097ec681f3Smrg } 23107ec681f3Smrg if (shaders[MESA_SHADER_GEOMETRY]) { 23117ec681f3Smrg ordered_shaders[shader_count++] = shaders[MESA_SHADER_GEOMETRY]; 23127ec681f3Smrg } 23137ec681f3Smrg if (shaders[MESA_SHADER_TESS_EVAL]) { 23147ec681f3Smrg ordered_shaders[shader_count++] = shaders[MESA_SHADER_TESS_EVAL]; 23157ec681f3Smrg } 23167ec681f3Smrg if (shaders[MESA_SHADER_TESS_CTRL]) { 23177ec681f3Smrg ordered_shaders[shader_count++] = shaders[MESA_SHADER_TESS_CTRL]; 23187ec681f3Smrg } 23197ec681f3Smrg if (shaders[MESA_SHADER_VERTEX]) { 23207ec681f3Smrg ordered_shaders[shader_count++] = shaders[MESA_SHADER_VERTEX]; 23217ec681f3Smrg } 23227ec681f3Smrg if (shaders[MESA_SHADER_COMPUTE]) { 23237ec681f3Smrg ordered_shaders[shader_count++] = shaders[MESA_SHADER_COMPUTE]; 23247ec681f3Smrg } 23257ec681f3Smrg 23267ec681f3Smrg bool has_geom_tess = shaders[MESA_SHADER_GEOMETRY] || shaders[MESA_SHADER_TESS_CTRL]; 23277ec681f3Smrg bool merged_gs = shaders[MESA_SHADER_GEOMETRY] && 23287ec681f3Smrg pipeline->device->physical_device->rad_info.chip_class >= GFX9; 23297ec681f3Smrg 23307ec681f3Smrg if (!optimize_conservatively && shader_count > 1) { 23317ec681f3Smrg unsigned first = ordered_shaders[shader_count - 1]->info.stage; 23327ec681f3Smrg unsigned last = ordered_shaders[0]->info.stage; 23337ec681f3Smrg 23347ec681f3Smrg if (ordered_shaders[0]->info.stage == MESA_SHADER_FRAGMENT && 23357ec681f3Smrg ordered_shaders[1]->info.has_transform_feedback_varyings) 23367ec681f3Smrg nir_link_xfb_varyings(ordered_shaders[1], ordered_shaders[0]); 23377ec681f3Smrg 23387ec681f3Smrg for (int i = 1; i < shader_count; ++i) { 23397ec681f3Smrg nir_lower_io_arrays_to_elements(ordered_shaders[i], ordered_shaders[i - 1]); 23407ec681f3Smrg } 23417ec681f3Smrg 23427ec681f3Smrg for (int i = 0; i < shader_count; ++i) { 23437ec681f3Smrg nir_variable_mode mask = 0; 23447ec681f3Smrg 23457ec681f3Smrg if (ordered_shaders[i]->info.stage != first) 23467ec681f3Smrg mask = mask | nir_var_shader_in; 23477ec681f3Smrg 23487ec681f3Smrg if (ordered_shaders[i]->info.stage != last) 23497ec681f3Smrg mask = mask | nir_var_shader_out; 23507ec681f3Smrg 23517ec681f3Smrg if (nir_lower_io_to_scalar_early(ordered_shaders[i], mask)) { 23527ec681f3Smrg /* Optimize the new vector code and then remove dead vars */ 23537ec681f3Smrg nir_copy_prop(ordered_shaders[i]); 23547ec681f3Smrg nir_opt_shrink_vectors(ordered_shaders[i], 23557ec681f3Smrg !pipeline->device->instance->disable_shrink_image_store); 23567ec681f3Smrg 23577ec681f3Smrg if (ordered_shaders[i]->info.stage != last) { 23587ec681f3Smrg /* Optimize swizzled movs of load_const for 23597ec681f3Smrg * nir_link_opt_varyings's constant propagation 23607ec681f3Smrg */ 23617ec681f3Smrg nir_opt_constant_folding(ordered_shaders[i]); 23627ec681f3Smrg /* For nir_link_opt_varyings's duplicate input opt */ 23637ec681f3Smrg nir_opt_cse(ordered_shaders[i]); 23647ec681f3Smrg } 23657ec681f3Smrg 23667ec681f3Smrg /* Run copy-propagation to help remove dead 23677ec681f3Smrg * output variables (some shaders have useless 23687ec681f3Smrg * copies to/from an output), so compaction 23697ec681f3Smrg * later will be more effective. 23707ec681f3Smrg * 23717ec681f3Smrg * This will have been done earlier but it might 23727ec681f3Smrg * not have worked because the outputs were vector. 23737ec681f3Smrg */ 23747ec681f3Smrg if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) 23757ec681f3Smrg nir_opt_copy_prop_vars(ordered_shaders[i]); 23767ec681f3Smrg 23777ec681f3Smrg nir_opt_dce(ordered_shaders[i]); 23787ec681f3Smrg nir_remove_dead_variables( 23797ec681f3Smrg ordered_shaders[i], nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, 23807ec681f3Smrg NULL); 23817ec681f3Smrg } 23827ec681f3Smrg } 23837ec681f3Smrg } 23847ec681f3Smrg 23857ec681f3Smrg bool uses_xfb = pipeline->graphics.last_vgt_api_stage != -1 && 23867ec681f3Smrg radv_nir_stage_uses_xfb(shaders[pipeline->graphics.last_vgt_api_stage]); 23877ec681f3Smrg if (!uses_xfb && !optimize_conservatively) { 23887ec681f3Smrg /* Remove PSIZ from shaders when it's not needed. 23897ec681f3Smrg * This is typically produced by translation layers like Zink or D9VK. 23907ec681f3Smrg */ 23917ec681f3Smrg for (unsigned i = 0; i < shader_count; ++i) { 23927ec681f3Smrg shader_info *info = &ordered_shaders[i]->info; 23937ec681f3Smrg if (!(info->outputs_written & VARYING_BIT_PSIZ)) 23947ec681f3Smrg continue; 23957ec681f3Smrg 23967ec681f3Smrg bool next_stage_needs_psiz = 23977ec681f3Smrg i != 0 && /* ordered_shaders is backwards, so next stage is: i - 1 */ 23987ec681f3Smrg ordered_shaders[i - 1]->info.inputs_read & VARYING_BIT_PSIZ; 23997ec681f3Smrg bool topology_uses_psiz = 24007ec681f3Smrg info->stage == pipeline->graphics.last_vgt_api_stage && 24017ec681f3Smrg ((info->stage == MESA_SHADER_VERTEX && pipeline_key->vs.topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) || 24027ec681f3Smrg (info->stage == MESA_SHADER_TESS_EVAL && info->tess.point_mode) || 24037ec681f3Smrg (info->stage == MESA_SHADER_GEOMETRY && info->gs.output_primitive == GL_POINTS)); 24047ec681f3Smrg 24057ec681f3Smrg nir_variable *psiz_var = 24067ec681f3Smrg nir_find_variable_with_location(ordered_shaders[i], nir_var_shader_out, VARYING_SLOT_PSIZ); 24077ec681f3Smrg 24087ec681f3Smrg if (!next_stage_needs_psiz && !topology_uses_psiz && psiz_var) { 24097ec681f3Smrg /* Change PSIZ to a global variable which allows it to be DCE'd. */ 24107ec681f3Smrg psiz_var->data.location = 0; 24117ec681f3Smrg psiz_var->data.mode = nir_var_shader_temp; 24127ec681f3Smrg 24137ec681f3Smrg info->outputs_written &= ~VARYING_BIT_PSIZ; 24147ec681f3Smrg nir_fixup_deref_modes(ordered_shaders[i]); 24157ec681f3Smrg nir_remove_dead_variables(ordered_shaders[i], nir_var_shader_temp, NULL); 24167ec681f3Smrg nir_opt_dce(ordered_shaders[i]); 24177ec681f3Smrg } 24187ec681f3Smrg } 24197ec681f3Smrg } 24207ec681f3Smrg 24217ec681f3Smrg for (int i = 1; !optimize_conservatively && (i < shader_count); ++i) { 24227ec681f3Smrg if (nir_link_opt_varyings(ordered_shaders[i], ordered_shaders[i - 1])) { 24237ec681f3Smrg nir_opt_constant_folding(ordered_shaders[i - 1]); 24247ec681f3Smrg nir_opt_algebraic(ordered_shaders[i - 1]); 24257ec681f3Smrg nir_opt_dce(ordered_shaders[i - 1]); 24267ec681f3Smrg } 24277ec681f3Smrg 24287ec681f3Smrg nir_remove_dead_variables(ordered_shaders[i], nir_var_shader_out, NULL); 24297ec681f3Smrg nir_remove_dead_variables(ordered_shaders[i - 1], nir_var_shader_in, NULL); 24307ec681f3Smrg 24317ec681f3Smrg bool progress = nir_remove_unused_varyings(ordered_shaders[i], ordered_shaders[i - 1]); 24327ec681f3Smrg 24337ec681f3Smrg nir_compact_varyings(ordered_shaders[i], ordered_shaders[i - 1], true); 24347ec681f3Smrg 24357ec681f3Smrg if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL || 24367ec681f3Smrg (ordered_shaders[i]->info.stage == MESA_SHADER_VERTEX && has_geom_tess) || 24377ec681f3Smrg (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_EVAL && merged_gs)) { 24387ec681f3Smrg nir_lower_io_to_vector(ordered_shaders[i], nir_var_shader_out); 24397ec681f3Smrg if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) 24407ec681f3Smrg nir_vectorize_tess_levels(ordered_shaders[i]); 24417ec681f3Smrg nir_opt_combine_stores(ordered_shaders[i], nir_var_shader_out); 24427ec681f3Smrg } 24437ec681f3Smrg if (ordered_shaders[i - 1]->info.stage == MESA_SHADER_GEOMETRY || 24447ec681f3Smrg ordered_shaders[i - 1]->info.stage == MESA_SHADER_TESS_CTRL || 24457ec681f3Smrg ordered_shaders[i - 1]->info.stage == MESA_SHADER_TESS_EVAL) { 24467ec681f3Smrg nir_lower_io_to_vector(ordered_shaders[i - 1], nir_var_shader_in); 24477ec681f3Smrg } 24487ec681f3Smrg 24497ec681f3Smrg if (progress) { 24507ec681f3Smrg if (nir_lower_global_vars_to_local(ordered_shaders[i])) { 24517ec681f3Smrg ac_nir_lower_indirect_derefs(ordered_shaders[i], 24527ec681f3Smrg pipeline->device->physical_device->rad_info.chip_class); 24537ec681f3Smrg /* remove dead writes, which can remove input loads */ 24547ec681f3Smrg nir_lower_vars_to_ssa(ordered_shaders[i]); 24557ec681f3Smrg nir_opt_dce(ordered_shaders[i]); 24567ec681f3Smrg } 24577ec681f3Smrg 24587ec681f3Smrg if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) { 24597ec681f3Smrg ac_nir_lower_indirect_derefs(ordered_shaders[i - 1], 24607ec681f3Smrg pipeline->device->physical_device->rad_info.chip_class); 24617ec681f3Smrg } 24627ec681f3Smrg } 24637ec681f3Smrg } 24647ec681f3Smrg} 24657ec681f3Smrg 24667ec681f3Smrgstatic void 24677ec681f3Smrgradv_set_driver_locations(struct radv_pipeline *pipeline, nir_shader **shaders, 24687ec681f3Smrg struct radv_shader_info infos[MESA_SHADER_STAGES]) 24697ec681f3Smrg{ 24707ec681f3Smrg if (shaders[MESA_SHADER_FRAGMENT]) { 24717ec681f3Smrg nir_foreach_shader_out_variable(var, shaders[MESA_SHADER_FRAGMENT]) 24727ec681f3Smrg { 24737ec681f3Smrg var->data.driver_location = var->data.location + var->data.index; 24747ec681f3Smrg } 24757ec681f3Smrg } 24767ec681f3Smrg 24777ec681f3Smrg if (!shaders[MESA_SHADER_VERTEX]) 24787ec681f3Smrg return; 24797ec681f3Smrg 24807ec681f3Smrg bool has_tess = shaders[MESA_SHADER_TESS_CTRL]; 24817ec681f3Smrg bool has_gs = shaders[MESA_SHADER_GEOMETRY]; 24827ec681f3Smrg 24837ec681f3Smrg /* Merged stage for VS and TES */ 24847ec681f3Smrg unsigned vs_info_idx = MESA_SHADER_VERTEX; 24857ec681f3Smrg unsigned tes_info_idx = MESA_SHADER_TESS_EVAL; 24867ec681f3Smrg 24877ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) { 24887ec681f3Smrg /* These are merged into the next stage */ 24897ec681f3Smrg vs_info_idx = has_tess ? MESA_SHADER_TESS_CTRL : MESA_SHADER_GEOMETRY; 24907ec681f3Smrg tes_info_idx = has_gs ? MESA_SHADER_GEOMETRY : MESA_SHADER_TESS_EVAL; 24917ec681f3Smrg } 24927ec681f3Smrg 24937ec681f3Smrg nir_foreach_shader_in_variable (var, shaders[MESA_SHADER_VERTEX]) { 24947ec681f3Smrg var->data.driver_location = var->data.location; 24957ec681f3Smrg } 24967ec681f3Smrg 24977ec681f3Smrg if (has_tess) { 24987ec681f3Smrg nir_linked_io_var_info vs2tcs = nir_assign_linked_io_var_locations( 24997ec681f3Smrg shaders[MESA_SHADER_VERTEX], shaders[MESA_SHADER_TESS_CTRL]); 25007ec681f3Smrg nir_linked_io_var_info tcs2tes = nir_assign_linked_io_var_locations( 25017ec681f3Smrg shaders[MESA_SHADER_TESS_CTRL], shaders[MESA_SHADER_TESS_EVAL]); 25027ec681f3Smrg 25037ec681f3Smrg infos[MESA_SHADER_VERTEX].vs.num_linked_outputs = vs2tcs.num_linked_io_vars; 25047ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs = vs2tcs.num_linked_io_vars; 25057ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs = tcs2tes.num_linked_io_vars; 25067ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs = tcs2tes.num_linked_patch_io_vars; 25077ec681f3Smrg infos[MESA_SHADER_TESS_EVAL].tes.num_linked_inputs = tcs2tes.num_linked_io_vars; 25087ec681f3Smrg infos[MESA_SHADER_TESS_EVAL].tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars; 25097ec681f3Smrg 25107ec681f3Smrg /* Copy data to merged stage */ 25117ec681f3Smrg infos[vs_info_idx].vs.num_linked_outputs = vs2tcs.num_linked_io_vars; 25127ec681f3Smrg infos[tes_info_idx].tes.num_linked_inputs = tcs2tes.num_linked_io_vars; 25137ec681f3Smrg infos[tes_info_idx].tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars; 25147ec681f3Smrg 25157ec681f3Smrg if (has_gs) { 25167ec681f3Smrg nir_linked_io_var_info tes2gs = nir_assign_linked_io_var_locations( 25177ec681f3Smrg shaders[MESA_SHADER_TESS_EVAL], shaders[MESA_SHADER_GEOMETRY]); 25187ec681f3Smrg 25197ec681f3Smrg infos[MESA_SHADER_TESS_EVAL].tes.num_linked_outputs = tes2gs.num_linked_io_vars; 25207ec681f3Smrg infos[MESA_SHADER_GEOMETRY].gs.num_linked_inputs = tes2gs.num_linked_io_vars; 25217ec681f3Smrg 25227ec681f3Smrg /* Copy data to merged stage */ 25237ec681f3Smrg infos[tes_info_idx].tes.num_linked_outputs = tes2gs.num_linked_io_vars; 25247ec681f3Smrg } 25257ec681f3Smrg } else if (has_gs) { 25267ec681f3Smrg nir_linked_io_var_info vs2gs = nir_assign_linked_io_var_locations( 25277ec681f3Smrg shaders[MESA_SHADER_VERTEX], shaders[MESA_SHADER_GEOMETRY]); 25287ec681f3Smrg 25297ec681f3Smrg infos[MESA_SHADER_VERTEX].vs.num_linked_outputs = vs2gs.num_linked_io_vars; 25307ec681f3Smrg infos[MESA_SHADER_GEOMETRY].gs.num_linked_inputs = vs2gs.num_linked_io_vars; 25317ec681f3Smrg 25327ec681f3Smrg /* Copy data to merged stage */ 25337ec681f3Smrg infos[vs_info_idx].vs.num_linked_outputs = vs2gs.num_linked_io_vars; 25347ec681f3Smrg } 25357ec681f3Smrg 25367ec681f3Smrg assert(pipeline->graphics.last_vgt_api_stage != MESA_SHADER_NONE); 25377ec681f3Smrg nir_foreach_shader_out_variable(var, shaders[pipeline->graphics.last_vgt_api_stage]) 25387ec681f3Smrg { 25397ec681f3Smrg var->data.driver_location = var->data.location; 25407ec681f3Smrg } 254101e04c3fSmrg} 254201e04c3fSmrg 2543ed98bd31Smayastatic uint32_t 2544ed98bd31Smayaradv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo *input_state, 25457ec681f3Smrg uint32_t attrib_binding) 2546ed98bd31Smaya{ 25477ec681f3Smrg for (uint32_t i = 0; i < input_state->vertexBindingDescriptionCount; i++) { 25487ec681f3Smrg const VkVertexInputBindingDescription *input_binding = 25497ec681f3Smrg &input_state->pVertexBindingDescriptions[i]; 2550ed98bd31Smaya 25517ec681f3Smrg if (input_binding->binding == attrib_binding) 25527ec681f3Smrg return input_binding->stride; 25537ec681f3Smrg } 2554ed98bd31Smaya 25557ec681f3Smrg return 0; 2556ed98bd31Smaya} 255701e04c3fSmrg 255801e04c3fSmrgstatic struct radv_pipeline_key 25597ec681f3Smrgradv_generate_graphics_pipeline_key(const struct radv_pipeline *pipeline, 256001e04c3fSmrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 25617ec681f3Smrg const struct radv_blend_state *blend) 25627ec681f3Smrg{ 25637ec681f3Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 25647ec681f3Smrg struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; 25657ec681f3Smrg bool uses_dynamic_stride = false; 25667ec681f3Smrg 25677ec681f3Smrg struct radv_pipeline_key key; 25687ec681f3Smrg memset(&key, 0, sizeof(key)); 25697ec681f3Smrg 25707ec681f3Smrg if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT) 25717ec681f3Smrg key.optimisations_disabled = 1; 25727ec681f3Smrg 25737ec681f3Smrg key.has_multiview_view_index = !!subpass->view_mask; 25747ec681f3Smrg 25757ec681f3Smrg if (pCreateInfo->pDynamicState) { 25767ec681f3Smrg uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; 25777ec681f3Smrg for (uint32_t i = 0; i < count; i++) { 25787ec681f3Smrg if (pCreateInfo->pDynamicState->pDynamicStates[i] == VK_DYNAMIC_STATE_VERTEX_INPUT_EXT) { 25797ec681f3Smrg key.vs.dynamic_input_state = true; 25807ec681f3Smrg /* we don't care about use_dynamic_stride in this case */ 25817ec681f3Smrg break; 25827ec681f3Smrg } else if (pCreateInfo->pDynamicState->pDynamicStates[i] == 25837ec681f3Smrg VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT) { 25847ec681f3Smrg uses_dynamic_stride = true; 25857ec681f3Smrg } 25867ec681f3Smrg } 25877ec681f3Smrg } 25887ec681f3Smrg 25897ec681f3Smrg if (!key.vs.dynamic_input_state) { 25907ec681f3Smrg const VkPipelineVertexInputStateCreateInfo *input_state = pCreateInfo->pVertexInputState; 25917ec681f3Smrg const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisor_state = vk_find_struct_const( 25927ec681f3Smrg input_state->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); 25937ec681f3Smrg 25947ec681f3Smrg uint32_t binding_input_rate = 0; 25957ec681f3Smrg uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; 25967ec681f3Smrg for (unsigned i = 0; i < input_state->vertexBindingDescriptionCount; ++i) { 25977ec681f3Smrg if (input_state->pVertexBindingDescriptions[i].inputRate) { 25987ec681f3Smrg unsigned binding = input_state->pVertexBindingDescriptions[i].binding; 25997ec681f3Smrg binding_input_rate |= 1u << binding; 26007ec681f3Smrg instance_rate_divisors[binding] = 1; 26017ec681f3Smrg } 26027ec681f3Smrg } 26037ec681f3Smrg if (divisor_state) { 26047ec681f3Smrg for (unsigned i = 0; i < divisor_state->vertexBindingDivisorCount; ++i) { 26057ec681f3Smrg instance_rate_divisors[divisor_state->pVertexBindingDivisors[i].binding] = 26067ec681f3Smrg divisor_state->pVertexBindingDivisors[i].divisor; 26077ec681f3Smrg } 26087ec681f3Smrg } 26097ec681f3Smrg 26107ec681f3Smrg for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) { 26117ec681f3Smrg const VkVertexInputAttributeDescription *desc = 26127ec681f3Smrg &input_state->pVertexAttributeDescriptions[i]; 26137ec681f3Smrg const struct util_format_description *format_desc; 26147ec681f3Smrg unsigned location = desc->location; 26157ec681f3Smrg unsigned binding = desc->binding; 26167ec681f3Smrg unsigned num_format, data_format; 26177ec681f3Smrg bool post_shuffle; 26187ec681f3Smrg 26197ec681f3Smrg if (binding_input_rate & (1u << binding)) { 26207ec681f3Smrg key.vs.instance_rate_inputs |= 1u << location; 26217ec681f3Smrg key.vs.instance_rate_divisors[location] = instance_rate_divisors[binding]; 26227ec681f3Smrg } 26237ec681f3Smrg 26247ec681f3Smrg format_desc = vk_format_description(desc->format); 26257ec681f3Smrg radv_translate_vertex_format(pipeline->device->physical_device, desc->format, format_desc, 26267ec681f3Smrg &data_format, &num_format, &post_shuffle, 26277ec681f3Smrg &key.vs.vertex_alpha_adjust[location]); 26287ec681f3Smrg 26297ec681f3Smrg key.vs.vertex_attribute_formats[location] = data_format | (num_format << 4); 26307ec681f3Smrg key.vs.vertex_attribute_bindings[location] = desc->binding; 26317ec681f3Smrg key.vs.vertex_attribute_offsets[location] = desc->offset; 26327ec681f3Smrg 26337ec681f3Smrg const struct ac_data_format_info *dfmt_info = ac_get_data_format_info(data_format); 26347ec681f3Smrg unsigned attrib_align = 26357ec681f3Smrg dfmt_info->chan_byte_size ? dfmt_info->chan_byte_size : dfmt_info->element_size; 26367ec681f3Smrg 26377ec681f3Smrg /* If desc->offset is misaligned, then the buffer offset must be too. Just 26387ec681f3Smrg * skip updating vertex_binding_align in this case. 26397ec681f3Smrg */ 26407ec681f3Smrg if (desc->offset % attrib_align == 0) 26417ec681f3Smrg key.vs.vertex_binding_align[desc->binding] = 26427ec681f3Smrg MAX2(key.vs.vertex_binding_align[desc->binding], attrib_align); 26437ec681f3Smrg 26447ec681f3Smrg if (!uses_dynamic_stride) { 26457ec681f3Smrg /* From the Vulkan spec 1.2.157: 26467ec681f3Smrg * 26477ec681f3Smrg * "If the bound pipeline state object was created 26487ec681f3Smrg * with the 26497ec681f3Smrg * VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT 26507ec681f3Smrg * dynamic state enabled then pStrides[i] specifies 26517ec681f3Smrg * the distance in bytes between two consecutive 26527ec681f3Smrg * elements within the corresponding buffer. In this 26537ec681f3Smrg * case the VkVertexInputBindingDescription::stride 26547ec681f3Smrg * state from the pipeline state object is ignored." 26557ec681f3Smrg * 26567ec681f3Smrg * Make sure the vertex attribute stride is zero to 26577ec681f3Smrg * avoid computing a wrong offset if it's initialized 26587ec681f3Smrg * to something else than zero. 26597ec681f3Smrg */ 26607ec681f3Smrg key.vs.vertex_attribute_strides[location] = 26617ec681f3Smrg radv_get_attrib_stride(input_state, desc->binding); 26627ec681f3Smrg } 26637ec681f3Smrg 26647ec681f3Smrg if (post_shuffle) 26657ec681f3Smrg key.vs.vertex_post_shuffle |= 1 << location; 26667ec681f3Smrg } 26677ec681f3Smrg } 26687ec681f3Smrg 26697ec681f3Smrg const VkPipelineTessellationStateCreateInfo *tess = 26707ec681f3Smrg radv_pipeline_get_tessellation_state(pCreateInfo); 26717ec681f3Smrg if (tess) 26727ec681f3Smrg key.tcs.tess_input_vertices = tess->patchControlPoints; 26737ec681f3Smrg 26747ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *vkms = 26757ec681f3Smrg radv_pipeline_get_multisample_state(pCreateInfo); 26767ec681f3Smrg if (vkms && vkms->rasterizationSamples > 1) { 26777ec681f3Smrg uint32_t num_samples = vkms->rasterizationSamples; 26787ec681f3Smrg uint32_t ps_iter_samples = radv_pipeline_get_ps_iter_samples(pCreateInfo); 26797ec681f3Smrg key.ps.num_samples = num_samples; 26807ec681f3Smrg key.ps.log2_ps_iter_samples = util_logbase2(ps_iter_samples); 26817ec681f3Smrg } 26827ec681f3Smrg 26837ec681f3Smrg key.ps.col_format = blend->spi_shader_col_format; 26847ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class < GFX8) { 26857ec681f3Smrg key.ps.is_int8 = blend->col_format_is_int8; 26867ec681f3Smrg key.ps.is_int10 = blend->col_format_is_int10; 26877ec681f3Smrg } 26887ec681f3Smrg 26897ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { 26907ec681f3Smrg key.vs.topology = pCreateInfo->pInputAssemblyState->topology; 26917ec681f3Smrg 26927ec681f3Smrg const VkPipelineRasterizationStateCreateInfo *raster_info = pCreateInfo->pRasterizationState; 26937ec681f3Smrg const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *provoking_vtx_info = 26947ec681f3Smrg vk_find_struct_const(raster_info->pNext, 26957ec681f3Smrg PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT); 26967ec681f3Smrg if (provoking_vtx_info && 26977ec681f3Smrg provoking_vtx_info->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) { 26987ec681f3Smrg key.vs.provoking_vtx_last = true; 26997ec681f3Smrg } 27007ec681f3Smrg } 27017ec681f3Smrg 27027ec681f3Smrg if (pipeline->device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE) 27037ec681f3Smrg key.ps.lower_discard_to_demote = true; 27047ec681f3Smrg 27057ec681f3Smrg if (pipeline->device->instance->enable_mrt_output_nan_fixup) 27067ec681f3Smrg key.ps.enable_mrt_output_nan_fixup = true; 27077ec681f3Smrg 27087ec681f3Smrg key.ps.force_vrs = pipeline->device->force_vrs; 27097ec681f3Smrg 27107ec681f3Smrg if (pipeline->device->instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM) 27117ec681f3Smrg key.invariant_geom = true; 27127ec681f3Smrg 27137ec681f3Smrg key.use_ngg = pipeline->device->physical_device->use_ngg; 27147ec681f3Smrg 27157ec681f3Smrg return key; 27167ec681f3Smrg} 27177ec681f3Smrg 27187ec681f3Smrgstatic uint8_t 27197ec681f3Smrgradv_get_wave_size(struct radv_device *device, const VkPipelineShaderStageCreateInfo *pStage, 27207ec681f3Smrg gl_shader_stage stage, const struct radv_shader_info *info) 27217ec681f3Smrg{ 27227ec681f3Smrg if (stage == MESA_SHADER_GEOMETRY && !info->is_ngg) 27237ec681f3Smrg return 64; 27247ec681f3Smrg else if (stage == MESA_SHADER_COMPUTE) { 27257ec681f3Smrg return info->cs.subgroup_size; 27267ec681f3Smrg } else if (stage == MESA_SHADER_FRAGMENT) 27277ec681f3Smrg return device->physical_device->ps_wave_size; 27287ec681f3Smrg else 27297ec681f3Smrg return device->physical_device->ge_wave_size; 27307ec681f3Smrg} 27317ec681f3Smrg 27327ec681f3Smrgstatic uint8_t 27337ec681f3Smrgradv_get_ballot_bit_size(struct radv_device *device, const VkPipelineShaderStageCreateInfo *pStage, 27347ec681f3Smrg gl_shader_stage stage, const struct radv_shader_info *info) 27357ec681f3Smrg{ 27367ec681f3Smrg if (stage == MESA_SHADER_COMPUTE && info->cs.subgroup_size) 27377ec681f3Smrg return info->cs.subgroup_size; 27387ec681f3Smrg return 64; 27397ec681f3Smrg} 27407ec681f3Smrg 27417ec681f3Smrgstatic void 27427ec681f3Smrgradv_determine_ngg_settings(struct radv_pipeline *pipeline, 27437ec681f3Smrg const struct radv_pipeline_key *pipeline_key, 27447ec681f3Smrg struct radv_shader_info *infos, nir_shader **nir) 27457ec681f3Smrg{ 27467ec681f3Smrg struct radv_device *device = pipeline->device; 27477ec681f3Smrg 27487ec681f3Smrg if (!nir[MESA_SHADER_GEOMETRY] && pipeline->graphics.last_vgt_api_stage != MESA_SHADER_NONE) { 27497ec681f3Smrg uint64_t ps_inputs_read = 27507ec681f3Smrg nir[MESA_SHADER_FRAGMENT] ? nir[MESA_SHADER_FRAGMENT]->info.inputs_read : 0; 27517ec681f3Smrg gl_shader_stage es_stage = pipeline->graphics.last_vgt_api_stage; 27527ec681f3Smrg 27537ec681f3Smrg unsigned num_vertices_per_prim = si_conv_prim_to_gs_out(pipeline_key->vs.topology) + 1; 27547ec681f3Smrg if (es_stage == MESA_SHADER_TESS_EVAL) 27557ec681f3Smrg num_vertices_per_prim = nir[es_stage]->info.tess.point_mode ? 1 27567ec681f3Smrg : nir[es_stage]->info.tess.primitive_mode == GL_ISOLINES ? 2 27577ec681f3Smrg : 3; 27587ec681f3Smrg 27597ec681f3Smrg infos[es_stage].has_ngg_culling = radv_consider_culling( 27607ec681f3Smrg device, nir[es_stage], ps_inputs_read, num_vertices_per_prim, &infos[es_stage]); 27617ec681f3Smrg 27627ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir[es_stage]); 27637ec681f3Smrg infos[es_stage].has_ngg_early_prim_export = exec_list_is_singular(&impl->body); 27647ec681f3Smrg 27657ec681f3Smrg /* Invocations that process an input vertex */ 27667ec681f3Smrg const struct gfx10_ngg_info *ngg_info = &infos[es_stage].ngg_info; 27677ec681f3Smrg unsigned max_vtx_in = MIN2(256, ngg_info->enable_vertex_grouping ? ngg_info->hw_max_esverts : num_vertices_per_prim * ngg_info->max_gsprims); 27687ec681f3Smrg 27697ec681f3Smrg unsigned lds_bytes_if_culling_off = 0; 27707ec681f3Smrg /* We need LDS space when VS needs to export the primitive ID. */ 27717ec681f3Smrg if (es_stage == MESA_SHADER_VERTEX && infos[es_stage].vs.outinfo.export_prim_id) 27727ec681f3Smrg lds_bytes_if_culling_off = max_vtx_in * 4u; 27737ec681f3Smrg infos[es_stage].num_lds_blocks_when_not_culling = 27747ec681f3Smrg DIV_ROUND_UP(lds_bytes_if_culling_off, 27757ec681f3Smrg device->physical_device->rad_info.lds_encode_granularity); 27767ec681f3Smrg 27777ec681f3Smrg /* NGG passthrough mode should be disabled when culling and when the vertex shader exports the 27787ec681f3Smrg * primitive ID. 27797ec681f3Smrg */ 27807ec681f3Smrg infos[es_stage].is_ngg_passthrough = infos[es_stage].is_ngg_passthrough && 27817ec681f3Smrg !infos[es_stage].has_ngg_culling && 27827ec681f3Smrg !(es_stage == MESA_SHADER_VERTEX && 27837ec681f3Smrg infos[es_stage].vs.outinfo.export_prim_id); 27847ec681f3Smrg } 27857ec681f3Smrg} 27867ec681f3Smrg 27877ec681f3Smrgstatic void 27887ec681f3Smrgradv_fill_shader_info(struct radv_pipeline *pipeline, 27897ec681f3Smrg struct radv_pipeline_layout *pipeline_layout, 27907ec681f3Smrg const VkPipelineShaderStageCreateInfo **pStages, 27917ec681f3Smrg const struct radv_pipeline_key *pipeline_key, 27927ec681f3Smrg struct radv_shader_info *infos, nir_shader **nir) 27937ec681f3Smrg{ 27947ec681f3Smrg struct radv_device *device = pipeline->device; 27957ec681f3Smrg unsigned active_stages = 0; 27967ec681f3Smrg unsigned filled_stages = 0; 27977ec681f3Smrg 27987ec681f3Smrg for (int i = 0; i < MESA_SHADER_STAGES; i++) { 27997ec681f3Smrg if (nir[i]) 28007ec681f3Smrg active_stages |= (1 << i); 28017ec681f3Smrg } 28027ec681f3Smrg 28037ec681f3Smrg if (nir[MESA_SHADER_TESS_CTRL]) { 28047ec681f3Smrg infos[MESA_SHADER_VERTEX].vs.as_ls = true; 28057ec681f3Smrg } 28067ec681f3Smrg 28077ec681f3Smrg if (nir[MESA_SHADER_GEOMETRY]) { 28087ec681f3Smrg if (nir[MESA_SHADER_TESS_CTRL]) 28097ec681f3Smrg infos[MESA_SHADER_TESS_EVAL].tes.as_es = true; 28107ec681f3Smrg else 28117ec681f3Smrg infos[MESA_SHADER_VERTEX].vs.as_es = true; 28127ec681f3Smrg } 28137ec681f3Smrg 28147ec681f3Smrg if (pipeline_key->use_ngg) { 28157ec681f3Smrg if (nir[MESA_SHADER_TESS_CTRL]) { 28167ec681f3Smrg infos[MESA_SHADER_TESS_EVAL].is_ngg = true; 28177ec681f3Smrg } else { 28187ec681f3Smrg infos[MESA_SHADER_VERTEX].is_ngg = true; 28197ec681f3Smrg } 28207ec681f3Smrg 28217ec681f3Smrg if (nir[MESA_SHADER_TESS_CTRL] && nir[MESA_SHADER_GEOMETRY] && 28227ec681f3Smrg nir[MESA_SHADER_GEOMETRY]->info.gs.invocations * 28237ec681f3Smrg nir[MESA_SHADER_GEOMETRY]->info.gs.vertices_out > 28247ec681f3Smrg 256) { 28257ec681f3Smrg /* Fallback to the legacy path if tessellation is 28267ec681f3Smrg * enabled with extreme geometry because 28277ec681f3Smrg * EN_MAX_VERT_OUT_PER_GS_INSTANCE doesn't work and it 28287ec681f3Smrg * might hang. 28297ec681f3Smrg */ 28307ec681f3Smrg infos[MESA_SHADER_TESS_EVAL].is_ngg = false; 28317ec681f3Smrg } 28327ec681f3Smrg 28337ec681f3Smrg gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX; 28347ec681f3Smrg 28357ec681f3Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 28367ec681f3Smrg if (nir[i]) 28377ec681f3Smrg last_xfb_stage = i; 28387ec681f3Smrg } 28397ec681f3Smrg 28407ec681f3Smrg bool uses_xfb = nir[last_xfb_stage] && radv_nir_stage_uses_xfb(nir[last_xfb_stage]); 28417ec681f3Smrg 28427ec681f3Smrg if (!device->physical_device->use_ngg_streamout && uses_xfb) { 28437ec681f3Smrg if (nir[MESA_SHADER_TESS_CTRL]) 28447ec681f3Smrg infos[MESA_SHADER_TESS_EVAL].is_ngg = false; 28457ec681f3Smrg else 28467ec681f3Smrg infos[MESA_SHADER_VERTEX].is_ngg = false; 28477ec681f3Smrg } 28487ec681f3Smrg 28497ec681f3Smrg /* Determine if the pipeline is eligible for the NGG passthrough 28507ec681f3Smrg * mode. It can't be enabled for geometry shaders, for NGG 28517ec681f3Smrg * streamout or for vertex shaders that export the primitive ID 28527ec681f3Smrg * (this is checked later because we don't have the info here.) 28537ec681f3Smrg */ 28547ec681f3Smrg if (!nir[MESA_SHADER_GEOMETRY] && !uses_xfb) { 28557ec681f3Smrg if (nir[MESA_SHADER_TESS_CTRL] && infos[MESA_SHADER_TESS_EVAL].is_ngg) { 28567ec681f3Smrg infos[MESA_SHADER_TESS_EVAL].is_ngg_passthrough = true; 28577ec681f3Smrg } else if (nir[MESA_SHADER_VERTEX] && infos[MESA_SHADER_VERTEX].is_ngg) { 28587ec681f3Smrg infos[MESA_SHADER_VERTEX].is_ngg_passthrough = true; 28597ec681f3Smrg } 28607ec681f3Smrg } 28617ec681f3Smrg } 28627ec681f3Smrg 28637ec681f3Smrg if (nir[MESA_SHADER_FRAGMENT]) { 28647ec681f3Smrg radv_nir_shader_info_init(&infos[MESA_SHADER_FRAGMENT]); 28657ec681f3Smrg radv_nir_shader_info_pass(pipeline->device, nir[MESA_SHADER_FRAGMENT], pipeline_layout, 28667ec681f3Smrg pipeline_key, &infos[MESA_SHADER_FRAGMENT]); 28677ec681f3Smrg 28687ec681f3Smrg assert(pipeline->graphics.last_vgt_api_stage != MESA_SHADER_NONE); 28697ec681f3Smrg if (infos[MESA_SHADER_FRAGMENT].ps.prim_id_input) { 28707ec681f3Smrg if (pipeline->graphics.last_vgt_api_stage == MESA_SHADER_VERTEX) { 28717ec681f3Smrg infos[MESA_SHADER_VERTEX].vs.outinfo.export_prim_id = true; 28727ec681f3Smrg } else if (pipeline->graphics.last_vgt_api_stage == MESA_SHADER_TESS_EVAL) { 28737ec681f3Smrg infos[MESA_SHADER_TESS_EVAL].tes.outinfo.export_prim_id = true; 28747ec681f3Smrg } else { 28757ec681f3Smrg assert(pipeline->graphics.last_vgt_api_stage == MESA_SHADER_GEOMETRY); 28767ec681f3Smrg } 28777ec681f3Smrg } 28787ec681f3Smrg 28797ec681f3Smrg if (!!infos[MESA_SHADER_FRAGMENT].ps.num_input_clips_culls) { 28807ec681f3Smrg if (pipeline->graphics.last_vgt_api_stage == MESA_SHADER_VERTEX) { 28817ec681f3Smrg infos[MESA_SHADER_VERTEX].vs.outinfo.export_clip_dists = true; 28827ec681f3Smrg } else if (pipeline->graphics.last_vgt_api_stage == MESA_SHADER_TESS_EVAL) { 28837ec681f3Smrg infos[MESA_SHADER_TESS_EVAL].tes.outinfo.export_clip_dists = true; 28847ec681f3Smrg } else { 28857ec681f3Smrg assert(pipeline->graphics.last_vgt_api_stage == MESA_SHADER_GEOMETRY); 28867ec681f3Smrg infos[MESA_SHADER_GEOMETRY].vs.outinfo.export_clip_dists = true; 28877ec681f3Smrg } 28887ec681f3Smrg } 28897ec681f3Smrg 28907ec681f3Smrg filled_stages |= (1 << MESA_SHADER_FRAGMENT); 28917ec681f3Smrg } 28927ec681f3Smrg 28937ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 && 28947ec681f3Smrg nir[MESA_SHADER_TESS_CTRL]) { 28957ec681f3Smrg struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]}; 28967ec681f3Smrg 28977ec681f3Smrg radv_nir_shader_info_init(&infos[MESA_SHADER_TESS_CTRL]); 28987ec681f3Smrg 28997ec681f3Smrg /* Copy data to merged stage. */ 29007ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].vs.as_ls = true; 29017ec681f3Smrg 29027ec681f3Smrg for (int i = 0; i < 2; i++) { 29037ec681f3Smrg radv_nir_shader_info_pass(pipeline->device, combined_nir[i], pipeline_layout, pipeline_key, 29047ec681f3Smrg &infos[MESA_SHADER_TESS_CTRL]); 29057ec681f3Smrg } 29067ec681f3Smrg 29077ec681f3Smrg filled_stages |= (1 << MESA_SHADER_VERTEX); 29087ec681f3Smrg filled_stages |= (1 << MESA_SHADER_TESS_CTRL); 29097ec681f3Smrg } 29107ec681f3Smrg 29117ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 && 29127ec681f3Smrg nir[MESA_SHADER_GEOMETRY]) { 29137ec681f3Smrg gl_shader_stage pre_stage = 29147ec681f3Smrg nir[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; 29157ec681f3Smrg struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]}; 29167ec681f3Smrg 29177ec681f3Smrg radv_nir_shader_info_init(&infos[MESA_SHADER_GEOMETRY]); 29187ec681f3Smrg 29197ec681f3Smrg /* Copy data to merged stage. */ 29207ec681f3Smrg if (pre_stage == MESA_SHADER_VERTEX) { 29217ec681f3Smrg infos[MESA_SHADER_GEOMETRY].vs.as_es = infos[MESA_SHADER_VERTEX].vs.as_es; 29227ec681f3Smrg } else { 29237ec681f3Smrg infos[MESA_SHADER_GEOMETRY].tes.as_es = infos[MESA_SHADER_TESS_EVAL].tes.as_es; 29247ec681f3Smrg } 29257ec681f3Smrg infos[MESA_SHADER_GEOMETRY].is_ngg = infos[pre_stage].is_ngg; 29267ec681f3Smrg infos[MESA_SHADER_GEOMETRY].gs.es_type = pre_stage; 29277ec681f3Smrg 29287ec681f3Smrg for (int i = 0; i < 2; i++) { 29297ec681f3Smrg radv_nir_shader_info_pass(pipeline->device, combined_nir[i], pipeline_layout, pipeline_key, 29307ec681f3Smrg &infos[MESA_SHADER_GEOMETRY]); 29317ec681f3Smrg } 29327ec681f3Smrg 29337ec681f3Smrg filled_stages |= (1 << pre_stage); 29347ec681f3Smrg filled_stages |= (1 << MESA_SHADER_GEOMETRY); 29357ec681f3Smrg } 29367ec681f3Smrg 29377ec681f3Smrg active_stages ^= filled_stages; 29387ec681f3Smrg while (active_stages) { 29397ec681f3Smrg int i = u_bit_scan(&active_stages); 29407ec681f3Smrg radv_nir_shader_info_init(&infos[i]); 29417ec681f3Smrg radv_nir_shader_info_pass(pipeline->device, nir[i], pipeline_layout, pipeline_key, &infos[i]); 29427ec681f3Smrg } 29437ec681f3Smrg 29447ec681f3Smrg if (nir[MESA_SHADER_COMPUTE]) { 29457ec681f3Smrg unsigned subgroup_size = pipeline_key->cs.compute_subgroup_size; 29467ec681f3Smrg unsigned req_subgroup_size = subgroup_size; 29477ec681f3Smrg bool require_full_subgroups = pipeline_key->cs.require_full_subgroups; 29487ec681f3Smrg 29497ec681f3Smrg if (!subgroup_size) 29507ec681f3Smrg subgroup_size = device->physical_device->cs_wave_size; 29517ec681f3Smrg 29527ec681f3Smrg unsigned local_size = nir[MESA_SHADER_COMPUTE]->info.workgroup_size[0] * 29537ec681f3Smrg nir[MESA_SHADER_COMPUTE]->info.workgroup_size[1] * 29547ec681f3Smrg nir[MESA_SHADER_COMPUTE]->info.workgroup_size[2]; 29557ec681f3Smrg 29567ec681f3Smrg /* Games don't always request full subgroups when they should, 29577ec681f3Smrg * which can cause bugs if cswave32 is enabled. 29587ec681f3Smrg */ 29597ec681f3Smrg if (device->physical_device->cs_wave_size == 32 && 29607ec681f3Smrg nir[MESA_SHADER_COMPUTE]->info.cs.uses_wide_subgroup_intrinsics && !req_subgroup_size && 29617ec681f3Smrg local_size % RADV_SUBGROUP_SIZE == 0) 29627ec681f3Smrg require_full_subgroups = true; 29637ec681f3Smrg 29647ec681f3Smrg if (require_full_subgroups && !req_subgroup_size) { 29657ec681f3Smrg /* don't use wave32 pretending to be wave64 */ 29667ec681f3Smrg subgroup_size = RADV_SUBGROUP_SIZE; 29677ec681f3Smrg } 29687ec681f3Smrg 29697ec681f3Smrg infos[MESA_SHADER_COMPUTE].cs.subgroup_size = subgroup_size; 29707ec681f3Smrg } 29717ec681f3Smrg 29727ec681f3Smrg for (int i = 0; i < MESA_SHADER_STAGES; i++) { 29737ec681f3Smrg if (nir[i]) { 29747ec681f3Smrg infos[i].wave_size = radv_get_wave_size(pipeline->device, pStages[i], i, &infos[i]); 29757ec681f3Smrg infos[i].ballot_bit_size = 29767ec681f3Smrg radv_get_ballot_bit_size(pipeline->device, pStages[i], i, &infos[i]); 29777ec681f3Smrg } 29787ec681f3Smrg } 29797ec681f3Smrg 29807ec681f3Smrg /* PS always operates without workgroups. */ 29817ec681f3Smrg if (nir[MESA_SHADER_FRAGMENT]) 29827ec681f3Smrg infos[MESA_SHADER_FRAGMENT].workgroup_size = infos[MESA_SHADER_FRAGMENT].wave_size; 29837ec681f3Smrg 29847ec681f3Smrg if (nir[MESA_SHADER_COMPUTE]) { 29857ec681f3Smrg /* Variable workgroup size is not supported by Vulkan. */ 29867ec681f3Smrg assert(!nir[MESA_SHADER_COMPUTE]->info.workgroup_size_variable); 29877ec681f3Smrg 29887ec681f3Smrg infos[MESA_SHADER_COMPUTE].workgroup_size = 29897ec681f3Smrg ac_compute_cs_workgroup_size( 29907ec681f3Smrg nir[MESA_SHADER_COMPUTE]->info.workgroup_size, false, UINT32_MAX); 29917ec681f3Smrg } 29927ec681f3Smrg} 29937ec681f3Smrg 29947ec681f3Smrgstatic void 29957ec681f3Smrgmerge_tess_info(struct shader_info *tes_info, struct shader_info *tcs_info) 29967ec681f3Smrg{ 29977ec681f3Smrg /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says: 29987ec681f3Smrg * 29997ec681f3Smrg * "PointMode. Controls generation of points rather than triangles 30007ec681f3Smrg * or lines. This functionality defaults to disabled, and is 30017ec681f3Smrg * enabled if either shader stage includes the execution mode. 30027ec681f3Smrg * 30037ec681f3Smrg * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw, 30047ec681f3Smrg * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd, 30057ec681f3Smrg * and OutputVertices, it says: 30067ec681f3Smrg * 30077ec681f3Smrg * "One mode must be set in at least one of the tessellation 30087ec681f3Smrg * shader stages." 30097ec681f3Smrg * 30107ec681f3Smrg * So, the fields can be set in either the TCS or TES, but they must 30117ec681f3Smrg * agree if set in both. Our backend looks at TES, so bitwise-or in 30127ec681f3Smrg * the values from the TCS. 30137ec681f3Smrg */ 30147ec681f3Smrg assert(tcs_info->tess.tcs_vertices_out == 0 || tes_info->tess.tcs_vertices_out == 0 || 30157ec681f3Smrg tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out); 30167ec681f3Smrg tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out; 30177ec681f3Smrg 30187ec681f3Smrg assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || 30197ec681f3Smrg tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED || 30207ec681f3Smrg tcs_info->tess.spacing == tes_info->tess.spacing); 30217ec681f3Smrg tes_info->tess.spacing |= tcs_info->tess.spacing; 30227ec681f3Smrg 30237ec681f3Smrg assert(tcs_info->tess.primitive_mode == 0 || tes_info->tess.primitive_mode == 0 || 30247ec681f3Smrg tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode); 30257ec681f3Smrg tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode; 30267ec681f3Smrg tes_info->tess.ccw |= tcs_info->tess.ccw; 30277ec681f3Smrg tes_info->tess.point_mode |= tcs_info->tess.point_mode; 30287ec681f3Smrg 30297ec681f3Smrg /* Copy the merged info back to the TCS */ 30307ec681f3Smrg tcs_info->tess.tcs_vertices_out = tes_info->tess.tcs_vertices_out; 30317ec681f3Smrg tcs_info->tess.spacing = tes_info->tess.spacing; 30327ec681f3Smrg tcs_info->tess.primitive_mode = tes_info->tess.primitive_mode; 30337ec681f3Smrg tcs_info->tess.ccw = tes_info->tess.ccw; 30347ec681f3Smrg tcs_info->tess.point_mode = tes_info->tess.point_mode; 30357ec681f3Smrg} 30367ec681f3Smrg 30377ec681f3Smrgstatic void 30387ec681f3Smrggather_tess_info(struct radv_device *device, nir_shader **nir, struct radv_shader_info *infos, 30397ec681f3Smrg const struct radv_pipeline_key *pipeline_key) 30407ec681f3Smrg{ 30417ec681f3Smrg merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info); 30427ec681f3Smrg 30437ec681f3Smrg unsigned tess_in_patch_size = pipeline_key->tcs.tess_input_vertices; 30447ec681f3Smrg unsigned tess_out_patch_size = nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out; 30457ec681f3Smrg 30467ec681f3Smrg /* Number of tessellation patches per workgroup processed by the current pipeline. */ 30477ec681f3Smrg unsigned num_patches = get_tcs_num_patches( 30487ec681f3Smrg tess_in_patch_size, tess_out_patch_size, 30497ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs, 30507ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs, 30517ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs, device->tess_offchip_block_dw_size, 30527ec681f3Smrg device->physical_device->rad_info.chip_class, device->physical_device->rad_info.family); 30537ec681f3Smrg 30547ec681f3Smrg /* LDS size used by VS+TCS for storing TCS inputs and outputs. */ 30557ec681f3Smrg unsigned tcs_lds_size = calculate_tess_lds_size( 30567ec681f3Smrg device->physical_device->rad_info.chip_class, tess_in_patch_size, tess_out_patch_size, 30577ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs, num_patches, 30587ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs, 30597ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs); 30607ec681f3Smrg 30617ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].num_tess_patches = num_patches; 30627ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].tcs.num_lds_blocks = tcs_lds_size; 30637ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].tcs.tes_reads_tess_factors = 30647ec681f3Smrg !!(nir[MESA_SHADER_TESS_EVAL]->info.inputs_read & 30657ec681f3Smrg (VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER)); 30667ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].tcs.tes_inputs_read = nir[MESA_SHADER_TESS_EVAL]->info.inputs_read; 30677ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].tcs.tes_patch_inputs_read = 30687ec681f3Smrg nir[MESA_SHADER_TESS_EVAL]->info.patch_inputs_read; 30697ec681f3Smrg 30707ec681f3Smrg infos[MESA_SHADER_TESS_EVAL].num_tess_patches = num_patches; 30717ec681f3Smrg infos[MESA_SHADER_GEOMETRY].num_tess_patches = num_patches; 30727ec681f3Smrg infos[MESA_SHADER_VERTEX].num_tess_patches = num_patches; 30737ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].tcs.tcs_vertices_out = tess_out_patch_size; 30747ec681f3Smrg infos[MESA_SHADER_VERTEX].tcs.tcs_vertices_out = tess_out_patch_size; 30757ec681f3Smrg 30767ec681f3Smrg if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) { 30777ec681f3Smrg /* When the number of TCS input and output vertices are the same (typically 3): 30787ec681f3Smrg * - There is an equal amount of LS and HS invocations 30797ec681f3Smrg * - In case of merged LSHS shaders, the LS and HS halves of the shader 30807ec681f3Smrg * always process the exact same vertex. We can use this knowledge to optimize them. 30817ec681f3Smrg * 30827ec681f3Smrg * We don't set tcs_in_out_eq if the float controls differ because that might 30837ec681f3Smrg * involve different float modes for the same block and our optimizer 30847ec681f3Smrg * doesn't handle a instruction dominating another with a different mode. 30857ec681f3Smrg */ 30867ec681f3Smrg infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq = 30877ec681f3Smrg device->physical_device->rad_info.chip_class >= GFX9 && 30887ec681f3Smrg tess_in_patch_size == tess_out_patch_size && 30897ec681f3Smrg nir[MESA_SHADER_VERTEX]->info.float_controls_execution_mode == 30907ec681f3Smrg nir[MESA_SHADER_TESS_CTRL]->info.float_controls_execution_mode; 30917ec681f3Smrg 30927ec681f3Smrg if (infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq) 30937ec681f3Smrg infos[MESA_SHADER_VERTEX].vs.tcs_temp_only_input_mask = 30947ec681f3Smrg nir[MESA_SHADER_TESS_CTRL]->info.inputs_read & 30957ec681f3Smrg nir[MESA_SHADER_VERTEX]->info.outputs_written & 30967ec681f3Smrg ~nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_cross_invocation_inputs_read & 30977ec681f3Smrg ~nir[MESA_SHADER_TESS_CTRL]->info.inputs_read_indirectly & 30987ec681f3Smrg ~nir[MESA_SHADER_VERTEX]->info.outputs_accessed_indirectly; 30997ec681f3Smrg 31007ec681f3Smrg /* Copy data to TCS so it can be accessed by the backend if they are merged. */ 31017ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].vs.tcs_in_out_eq = infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq; 31027ec681f3Smrg infos[MESA_SHADER_TESS_CTRL].vs.tcs_temp_only_input_mask = 31037ec681f3Smrg infos[MESA_SHADER_VERTEX].vs.tcs_temp_only_input_mask; 31047ec681f3Smrg } 31057ec681f3Smrg 31067ec681f3Smrg for (gl_shader_stage s = MESA_SHADER_VERTEX; s <= MESA_SHADER_TESS_CTRL; ++s) 31077ec681f3Smrg infos[s].workgroup_size = 31087ec681f3Smrg ac_compute_lshs_workgroup_size( 31097ec681f3Smrg device->physical_device->rad_info.chip_class, s, 31107ec681f3Smrg num_patches, tess_in_patch_size, tess_out_patch_size); 31117ec681f3Smrg} 31127ec681f3Smrg 31137ec681f3Smrgstatic void 31147ec681f3Smrgradv_init_feedback(const VkPipelineCreationFeedbackCreateInfoEXT *ext) 31157ec681f3Smrg{ 31167ec681f3Smrg if (!ext) 31177ec681f3Smrg return; 31187ec681f3Smrg 31197ec681f3Smrg if (ext->pPipelineCreationFeedback) { 31207ec681f3Smrg ext->pPipelineCreationFeedback->flags = 0; 31217ec681f3Smrg ext->pPipelineCreationFeedback->duration = 0; 31227ec681f3Smrg } 31237ec681f3Smrg 31247ec681f3Smrg for (unsigned i = 0; i < ext->pipelineStageCreationFeedbackCount; ++i) { 31257ec681f3Smrg ext->pPipelineStageCreationFeedbacks[i].flags = 0; 31267ec681f3Smrg ext->pPipelineStageCreationFeedbacks[i].duration = 0; 31277ec681f3Smrg } 312801e04c3fSmrg} 312901e04c3fSmrg 313001e04c3fSmrgstatic void 31317ec681f3Smrgradv_start_feedback(VkPipelineCreationFeedbackEXT *feedback) 31327ec681f3Smrg{ 31337ec681f3Smrg if (!feedback) 31347ec681f3Smrg return; 31357ec681f3Smrg 31367ec681f3Smrg feedback->duration -= radv_get_current_time(); 31377ec681f3Smrg feedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT; 313801e04c3fSmrg} 313901e04c3fSmrg 314001e04c3fSmrgstatic void 31417ec681f3Smrgradv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit) 31427ec681f3Smrg{ 31437ec681f3Smrg if (!feedback) 31447ec681f3Smrg return; 31457ec681f3Smrg 31467ec681f3Smrg feedback->duration += radv_get_current_time(); 31477ec681f3Smrg feedback->flags = 31487ec681f3Smrg VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT | 31497ec681f3Smrg (cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0); 31507ec681f3Smrg} 31517ec681f3Smrg 31527ec681f3Smrgstatic bool 31537ec681f3Smrgmem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, 31547ec681f3Smrg unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high, 31557ec681f3Smrg void *data) 31567ec681f3Smrg{ 31577ec681f3Smrg if (num_components > 4) 31587ec681f3Smrg return false; 31597ec681f3Smrg 31607ec681f3Smrg /* >128 bit loads are split except with SMEM */ 31617ec681f3Smrg if (bit_size * num_components > 128) 31627ec681f3Smrg return false; 31637ec681f3Smrg 31647ec681f3Smrg uint32_t align; 31657ec681f3Smrg if (align_offset) 31667ec681f3Smrg align = 1 << (ffs(align_offset) - 1); 31677ec681f3Smrg else 31687ec681f3Smrg align = align_mul; 31697ec681f3Smrg 31707ec681f3Smrg switch (low->intrinsic) { 31717ec681f3Smrg case nir_intrinsic_load_global: 31727ec681f3Smrg case nir_intrinsic_store_global: 31737ec681f3Smrg case nir_intrinsic_store_ssbo: 31747ec681f3Smrg case nir_intrinsic_load_ssbo: 31757ec681f3Smrg case nir_intrinsic_load_ubo: 31767ec681f3Smrg case nir_intrinsic_load_push_constant: { 31777ec681f3Smrg unsigned max_components; 31787ec681f3Smrg if (align % 4 == 0) 31797ec681f3Smrg max_components = NIR_MAX_VEC_COMPONENTS; 31807ec681f3Smrg else if (align % 2 == 0) 31817ec681f3Smrg max_components = 16u / bit_size; 31827ec681f3Smrg else 31837ec681f3Smrg max_components = 8u / bit_size; 31847ec681f3Smrg return (align % (bit_size / 8u)) == 0 && num_components <= max_components; 31857ec681f3Smrg } 31867ec681f3Smrg case nir_intrinsic_load_deref: 31877ec681f3Smrg case nir_intrinsic_store_deref: 31887ec681f3Smrg assert(nir_deref_mode_is(nir_src_as_deref(low->src[0]), nir_var_mem_shared)); 31897ec681f3Smrg FALLTHROUGH; 31907ec681f3Smrg case nir_intrinsic_load_shared: 31917ec681f3Smrg case nir_intrinsic_store_shared: 31927ec681f3Smrg if (bit_size * num_components == 31937ec681f3Smrg 96) { /* 96 bit loads require 128 bit alignment and are split otherwise */ 31947ec681f3Smrg return align % 16 == 0; 31957ec681f3Smrg } else if (bit_size == 16 && (align % 4)) { 31967ec681f3Smrg /* AMD hardware can't do 2-byte aligned f16vec2 loads, but they are useful for ALU 31977ec681f3Smrg * vectorization, because our vectorizer requires the scalar IR to already contain vectors. 31987ec681f3Smrg */ 31997ec681f3Smrg return (align % 2 == 0) && num_components <= 2; 32007ec681f3Smrg } else { 32017ec681f3Smrg if (num_components == 3) { 32027ec681f3Smrg /* AMD hardware can't do 3-component loads except for 96-bit loads, handled above. */ 32037ec681f3Smrg return false; 32047ec681f3Smrg } 32057ec681f3Smrg unsigned req = bit_size * num_components; 32067ec681f3Smrg if (req == 64 || req == 128) /* 64-bit and 128-bit loads can use ds_read2_b{32,64} */ 32077ec681f3Smrg req /= 2u; 32087ec681f3Smrg return align % (req / 8u) == 0; 32097ec681f3Smrg } 32107ec681f3Smrg default: 32117ec681f3Smrg return false; 32127ec681f3Smrg } 32137ec681f3Smrg return false; 32147ec681f3Smrg} 32157ec681f3Smrg 32167ec681f3Smrgstatic unsigned 32177ec681f3Smrglower_bit_size_callback(const nir_instr *instr, void *_) 32187ec681f3Smrg{ 32197ec681f3Smrg struct radv_device *device = _; 32207ec681f3Smrg enum chip_class chip = device->physical_device->rad_info.chip_class; 32217ec681f3Smrg 32227ec681f3Smrg if (instr->type != nir_instr_type_alu) 32237ec681f3Smrg return 0; 32247ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(instr); 32257ec681f3Smrg 32267ec681f3Smrg if (alu->dest.dest.ssa.bit_size & (8 | 16)) { 32277ec681f3Smrg unsigned bit_size = alu->dest.dest.ssa.bit_size; 32287ec681f3Smrg switch (alu->op) { 32297ec681f3Smrg case nir_op_iabs: 32307ec681f3Smrg case nir_op_bitfield_select: 32317ec681f3Smrg case nir_op_imul_high: 32327ec681f3Smrg case nir_op_umul_high: 32337ec681f3Smrg case nir_op_ineg: 32347ec681f3Smrg case nir_op_isign: 32357ec681f3Smrg return 32; 32367ec681f3Smrg case nir_op_imax: 32377ec681f3Smrg case nir_op_umax: 32387ec681f3Smrg case nir_op_imin: 32397ec681f3Smrg case nir_op_umin: 32407ec681f3Smrg case nir_op_ishr: 32417ec681f3Smrg case nir_op_ushr: 32427ec681f3Smrg case nir_op_ishl: 32437ec681f3Smrg case nir_op_uadd_sat: 32447ec681f3Smrg return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 32457ec681f3Smrg : 0; 32467ec681f3Smrg case nir_op_iadd_sat: 32477ec681f3Smrg return bit_size == 8 || !nir_dest_is_divergent(alu->dest.dest) ? 32 : 0; 32487ec681f3Smrg 32497ec681f3Smrg default: 32507ec681f3Smrg return 0; 32517ec681f3Smrg } 32527ec681f3Smrg } 32537ec681f3Smrg 32547ec681f3Smrg if (nir_src_bit_size(alu->src[0].src) & (8 | 16)) { 32557ec681f3Smrg unsigned bit_size = nir_src_bit_size(alu->src[0].src); 32567ec681f3Smrg switch (alu->op) { 32577ec681f3Smrg case nir_op_bit_count: 32587ec681f3Smrg case nir_op_find_lsb: 32597ec681f3Smrg case nir_op_ufind_msb: 32607ec681f3Smrg case nir_op_i2b1: 32617ec681f3Smrg return 32; 32627ec681f3Smrg case nir_op_ilt: 32637ec681f3Smrg case nir_op_ige: 32647ec681f3Smrg case nir_op_ieq: 32657ec681f3Smrg case nir_op_ine: 32667ec681f3Smrg case nir_op_ult: 32677ec681f3Smrg case nir_op_uge: 32687ec681f3Smrg return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 32697ec681f3Smrg : 0; 32707ec681f3Smrg default: 32717ec681f3Smrg return 0; 32727ec681f3Smrg } 32737ec681f3Smrg } 32747ec681f3Smrg 32757ec681f3Smrg return 0; 32767ec681f3Smrg} 32777ec681f3Smrg 32787ec681f3Smrgstatic bool 32797ec681f3Smrgopt_vectorize_callback(const nir_instr *instr, void *_) 32807ec681f3Smrg{ 32817ec681f3Smrg assert(instr->type == nir_instr_type_alu); 32827ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(instr); 32837ec681f3Smrg unsigned bit_size = alu->dest.dest.ssa.bit_size; 32847ec681f3Smrg if (bit_size != 16) 32857ec681f3Smrg return false; 32867ec681f3Smrg 32877ec681f3Smrg switch (alu->op) { 32887ec681f3Smrg case nir_op_fadd: 32897ec681f3Smrg case nir_op_fsub: 32907ec681f3Smrg case nir_op_fmul: 32917ec681f3Smrg case nir_op_fneg: 32927ec681f3Smrg case nir_op_fsat: 32937ec681f3Smrg case nir_op_fmin: 32947ec681f3Smrg case nir_op_fmax: 32957ec681f3Smrg case nir_op_iadd: 32967ec681f3Smrg case nir_op_isub: 32977ec681f3Smrg case nir_op_imul: 32987ec681f3Smrg case nir_op_imin: 32997ec681f3Smrg case nir_op_imax: 33007ec681f3Smrg case nir_op_umin: 33017ec681f3Smrg case nir_op_umax: 33027ec681f3Smrg return true; 33037ec681f3Smrg case nir_op_ishl: /* TODO: in NIR, these have 32bit shift operands */ 33047ec681f3Smrg case nir_op_ishr: /* while Radeon needs 16bit operands when vectorized */ 33057ec681f3Smrg case nir_op_ushr: 33067ec681f3Smrg default: 33077ec681f3Smrg return false; 33087ec681f3Smrg } 33097ec681f3Smrg} 33107ec681f3Smrg 33117ec681f3Smrgstatic nir_component_mask_t 33127ec681f3Smrgnon_uniform_access_callback(const nir_src *src, void *_) 33137ec681f3Smrg{ 33147ec681f3Smrg if (src->ssa->num_components == 1) 33157ec681f3Smrg return 0x1; 33167ec681f3Smrg return nir_chase_binding(*src).success ? 0x2 : 0x3; 33177ec681f3Smrg} 33187ec681f3Smrg 33197ec681f3SmrgVkResult 33207ec681f3Smrgradv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout *pipeline_layout, 33217ec681f3Smrg struct radv_device *device, struct radv_pipeline_cache *cache, 33227ec681f3Smrg const struct radv_pipeline_key *pipeline_key, 33237ec681f3Smrg const VkPipelineShaderStageCreateInfo **pStages, 33247ec681f3Smrg const VkPipelineCreateFlags flags, const uint8_t *custom_hash, 33257ec681f3Smrg VkPipelineCreationFeedbackEXT *pipeline_feedback, 33267ec681f3Smrg VkPipelineCreationFeedbackEXT **stage_feedbacks) 33277ec681f3Smrg{ 33287ec681f3Smrg struct vk_shader_module fs_m = {0}; 33297ec681f3Smrg struct vk_shader_module *modules[MESA_SHADER_STAGES] = { 33307ec681f3Smrg 0, 33317ec681f3Smrg }; 33327ec681f3Smrg nir_shader *nir[MESA_SHADER_STAGES] = {0}; 33337ec681f3Smrg struct radv_shader_binary *binaries[MESA_SHADER_STAGES] = {NULL}; 33347ec681f3Smrg struct radv_shader_info infos[MESA_SHADER_STAGES] = {0}; 33357ec681f3Smrg unsigned char hash[20], gs_copy_hash[20]; 33367ec681f3Smrg bool keep_executable_info = 33377ec681f3Smrg (flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) || 33387ec681f3Smrg device->keep_shader_info; 33397ec681f3Smrg bool keep_statistic_info = (flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) || 33407ec681f3Smrg (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || 33417ec681f3Smrg device->keep_shader_info; 33427ec681f3Smrg struct radv_pipeline_shader_stack_size **stack_sizes = 33437ec681f3Smrg pipeline->type == RADV_PIPELINE_COMPUTE ? &pipeline->compute.rt_stack_sizes : NULL; 33447ec681f3Smrg uint32_t *num_stack_sizes = stack_sizes ? &pipeline->compute.group_count : NULL; 33457ec681f3Smrg 33467ec681f3Smrg radv_start_feedback(pipeline_feedback); 33477ec681f3Smrg 33487ec681f3Smrg for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) { 33497ec681f3Smrg if (pStages[i]) { 33507ec681f3Smrg modules[i] = vk_shader_module_from_handle(pStages[i]->module); 33517ec681f3Smrg if (modules[i]->nir) 33527ec681f3Smrg _mesa_sha1_compute(modules[i]->nir->info.name, strlen(modules[i]->nir->info.name), 33537ec681f3Smrg modules[i]->sha1); 33547ec681f3Smrg 33557ec681f3Smrg pipeline->active_stages |= mesa_to_vk_shader_stage(i); 33567ec681f3Smrg if (i < MESA_SHADER_FRAGMENT) 33577ec681f3Smrg pipeline->graphics.last_vgt_api_stage = i; 33587ec681f3Smrg } 33597ec681f3Smrg } 33607ec681f3Smrg 33617ec681f3Smrg if (custom_hash) 33627ec681f3Smrg memcpy(hash, custom_hash, 20); 33637ec681f3Smrg else { 33647ec681f3Smrg radv_hash_shaders(hash, pStages, pipeline_layout, pipeline_key, 33657ec681f3Smrg radv_get_hash_flags(device, keep_statistic_info)); 33667ec681f3Smrg } 33677ec681f3Smrg memcpy(gs_copy_hash, hash, 20); 33687ec681f3Smrg gs_copy_hash[0] ^= 1; 33697ec681f3Smrg 33707ec681f3Smrg pipeline->pipeline_hash = *(uint64_t *)hash; 33717ec681f3Smrg 33727ec681f3Smrg bool found_in_application_cache = true; 33737ec681f3Smrg if (modules[MESA_SHADER_GEOMETRY] && !keep_executable_info) { 33747ec681f3Smrg struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0}; 33757ec681f3Smrg radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants, NULL, 33767ec681f3Smrg NULL, &found_in_application_cache); 33777ec681f3Smrg pipeline->gs_copy_shader = variants[MESA_SHADER_GEOMETRY]; 33787ec681f3Smrg } 33797ec681f3Smrg 33807ec681f3Smrg if (!keep_executable_info && 33817ec681f3Smrg radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders, 33827ec681f3Smrg stack_sizes, num_stack_sizes, 33837ec681f3Smrg &found_in_application_cache) && 33847ec681f3Smrg (!modules[MESA_SHADER_GEOMETRY] || pipeline->gs_copy_shader || 33857ec681f3Smrg pipeline->shaders[MESA_SHADER_GEOMETRY]->info.is_ngg)) { 33867ec681f3Smrg radv_stop_feedback(pipeline_feedback, found_in_application_cache); 33877ec681f3Smrg return VK_SUCCESS; 33887ec681f3Smrg } 33897ec681f3Smrg 33907ec681f3Smrg if (flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) { 33917ec681f3Smrg radv_stop_feedback(pipeline_feedback, found_in_application_cache); 33927ec681f3Smrg return VK_PIPELINE_COMPILE_REQUIRED_EXT; 33937ec681f3Smrg } 33947ec681f3Smrg 33957ec681f3Smrg if (!modules[MESA_SHADER_FRAGMENT] && !modules[MESA_SHADER_COMPUTE]) { 33967ec681f3Smrg nir_builder fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "noop_fs"); 33977ec681f3Smrg fs_m = vk_shader_module_from_nir(fs_b.shader); 33987ec681f3Smrg modules[MESA_SHADER_FRAGMENT] = &fs_m; 33997ec681f3Smrg } 34007ec681f3Smrg 34017ec681f3Smrg for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) { 34027ec681f3Smrg const VkPipelineShaderStageCreateInfo *stage = pStages[i]; 34037ec681f3Smrg 34047ec681f3Smrg if (!modules[i]) 34057ec681f3Smrg continue; 34067ec681f3Smrg 34077ec681f3Smrg radv_start_feedback(stage_feedbacks[i]); 34087ec681f3Smrg 34097ec681f3Smrg nir[i] = radv_shader_compile_to_nir(device, modules[i], stage ? stage->pName : "main", i, 34107ec681f3Smrg stage ? stage->pSpecializationInfo : NULL, 34117ec681f3Smrg pipeline_layout, pipeline_key); 34127ec681f3Smrg 34137ec681f3Smrg /* We don't want to alter meta shaders IR directly so clone it 34147ec681f3Smrg * first. 34157ec681f3Smrg */ 34167ec681f3Smrg if (nir[i]->info.name) { 34177ec681f3Smrg nir[i] = nir_shader_clone(NULL, nir[i]); 34187ec681f3Smrg } 34197ec681f3Smrg 34207ec681f3Smrg radv_stop_feedback(stage_feedbacks[i], false); 34217ec681f3Smrg } 34227ec681f3Smrg 34237ec681f3Smrg bool optimize_conservatively = pipeline_key->optimisations_disabled; 34247ec681f3Smrg 34257ec681f3Smrg radv_link_shaders(pipeline, pipeline_key, nir, optimize_conservatively); 34267ec681f3Smrg radv_set_driver_locations(pipeline, nir, infos); 34277ec681f3Smrg 34287ec681f3Smrg for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 34297ec681f3Smrg if (nir[i]) { 34307ec681f3Smrg radv_start_feedback(stage_feedbacks[i]); 34317ec681f3Smrg radv_optimize_nir(device, nir[i], optimize_conservatively, false); 34327ec681f3Smrg 34337ec681f3Smrg /* Gather info again, information such as outputs_read can be out-of-date. */ 34347ec681f3Smrg nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i])); 34357ec681f3Smrg radv_lower_io(device, nir[i]); 34367ec681f3Smrg 34377ec681f3Smrg radv_stop_feedback(stage_feedbacks[i], false); 34387ec681f3Smrg } 34397ec681f3Smrg } 34407ec681f3Smrg 34417ec681f3Smrg if (nir[MESA_SHADER_TESS_CTRL]) { 34427ec681f3Smrg nir_lower_patch_vertices(nir[MESA_SHADER_TESS_EVAL], 34437ec681f3Smrg nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out, NULL); 34447ec681f3Smrg gather_tess_info(device, nir, infos, pipeline_key); 34457ec681f3Smrg } 34467ec681f3Smrg 34477ec681f3Smrg radv_fill_shader_info(pipeline, pipeline_layout, pStages, pipeline_key, infos, nir); 34487ec681f3Smrg 34497ec681f3Smrg bool pipeline_has_ngg = (nir[MESA_SHADER_VERTEX] && infos[MESA_SHADER_VERTEX].is_ngg) || 34507ec681f3Smrg (nir[MESA_SHADER_TESS_EVAL] && infos[MESA_SHADER_TESS_EVAL].is_ngg); 34517ec681f3Smrg 34527ec681f3Smrg if (pipeline_has_ngg) { 34537ec681f3Smrg struct gfx10_ngg_info *ngg_info; 34547ec681f3Smrg 34557ec681f3Smrg if (nir[MESA_SHADER_GEOMETRY]) 34567ec681f3Smrg ngg_info = &infos[MESA_SHADER_GEOMETRY].ngg_info; 34577ec681f3Smrg else if (nir[MESA_SHADER_TESS_CTRL]) 34587ec681f3Smrg ngg_info = &infos[MESA_SHADER_TESS_EVAL].ngg_info; 34597ec681f3Smrg else 34607ec681f3Smrg ngg_info = &infos[MESA_SHADER_VERTEX].ngg_info; 34617ec681f3Smrg 34627ec681f3Smrg gfx10_get_ngg_info(pipeline_key, pipeline, nir, infos, ngg_info); 34637ec681f3Smrg } else if (nir[MESA_SHADER_GEOMETRY]) { 34647ec681f3Smrg struct gfx9_gs_info *gs_info = &infos[MESA_SHADER_GEOMETRY].gs_ring_info; 34657ec681f3Smrg 34667ec681f3Smrg gfx9_get_gs_info(pipeline_key, pipeline, nir, infos, gs_info); 34677ec681f3Smrg } else { 34687ec681f3Smrg gl_shader_stage hw_vs_api_stage = 34697ec681f3Smrg nir[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; 34707ec681f3Smrg infos[hw_vs_api_stage].workgroup_size = infos[hw_vs_api_stage].wave_size; 34717ec681f3Smrg } 34727ec681f3Smrg 34737ec681f3Smrg radv_determine_ngg_settings(pipeline, pipeline_key, infos, nir); 34747ec681f3Smrg 34757ec681f3Smrg for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 34767ec681f3Smrg if (nir[i]) { 34777ec681f3Smrg radv_start_feedback(stage_feedbacks[i]); 34787ec681f3Smrg 34797ec681f3Smrg /* Wave and workgroup size should already be filled. */ 34807ec681f3Smrg assert(infos[i].wave_size && infos[i].workgroup_size); 34817ec681f3Smrg 34827ec681f3Smrg if (!radv_use_llvm_for_stage(device, i)) { 34837ec681f3Smrg nir_lower_non_uniform_access_options options = { 34847ec681f3Smrg .types = nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access | 34857ec681f3Smrg nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access, 34867ec681f3Smrg .callback = &non_uniform_access_callback, 34877ec681f3Smrg .callback_data = NULL, 34887ec681f3Smrg }; 34897ec681f3Smrg NIR_PASS_V(nir[i], nir_lower_non_uniform_access, &options); 34907ec681f3Smrg } 34917ec681f3Smrg NIR_PASS_V(nir[i], nir_lower_memory_model); 34927ec681f3Smrg 34937ec681f3Smrg bool lower_to_scalar = false; 34947ec681f3Smrg 34957ec681f3Smrg nir_load_store_vectorize_options vectorize_opts = { 34967ec681f3Smrg .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | 34977ec681f3Smrg nir_var_mem_shared | nir_var_mem_global, 34987ec681f3Smrg .callback = mem_vectorize_callback, 34997ec681f3Smrg .robust_modes = 0, 35007ec681f3Smrg }; 35017ec681f3Smrg 35027ec681f3Smrg if (device->robust_buffer_access2) { 35037ec681f3Smrg vectorize_opts.robust_modes = 35047ec681f3Smrg nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global | nir_var_mem_push_const; 35057ec681f3Smrg } 35067ec681f3Smrg 35077ec681f3Smrg if (nir_opt_load_store_vectorize(nir[i], &vectorize_opts)) { 35087ec681f3Smrg NIR_PASS_V(nir[i], nir_copy_prop); 35097ec681f3Smrg lower_to_scalar = true; 35107ec681f3Smrg 35117ec681f3Smrg /* Gather info again, to update whether 8/16-bit are used. */ 35127ec681f3Smrg nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i])); 35137ec681f3Smrg } 35147ec681f3Smrg 35157ec681f3Smrg lower_to_scalar |= 35167ec681f3Smrg nir_opt_shrink_vectors(nir[i], !device->instance->disable_shrink_image_store); 35177ec681f3Smrg 35187ec681f3Smrg if (lower_to_scalar) 35197ec681f3Smrg nir_lower_alu_to_scalar(nir[i], NULL, NULL); 35207ec681f3Smrg 35217ec681f3Smrg /* lower ALU operations */ 35227ec681f3Smrg nir_lower_int64(nir[i]); 35237ec681f3Smrg 35247ec681f3Smrg nir_opt_idiv_const(nir[i], 8); 35257ec681f3Smrg 35267ec681f3Smrg nir_lower_idiv(nir[i], 35277ec681f3Smrg &(nir_lower_idiv_options){ 35287ec681f3Smrg .imprecise_32bit_lowering = false, 35297ec681f3Smrg .allow_fp16 = device->physical_device->rad_info.chip_class >= GFX9, 35307ec681f3Smrg }); 35317ec681f3Smrg 35327ec681f3Smrg nir_opt_sink(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies); 35337ec681f3Smrg nir_opt_move(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies); 35347ec681f3Smrg 35357ec681f3Smrg /* Lower I/O intrinsics to memory instructions. */ 35367ec681f3Smrg bool io_to_mem = radv_lower_io_to_mem(device, nir[i], &infos[i], pipeline_key); 35377ec681f3Smrg bool lowered_ngg = pipeline_has_ngg && i == pipeline->graphics.last_vgt_api_stage && 35387ec681f3Smrg !radv_use_llvm_for_stage(device, i); 35397ec681f3Smrg if (lowered_ngg) 35407ec681f3Smrg radv_lower_ngg(device, nir[i], &infos[i], pipeline_key); 35417ec681f3Smrg 35427ec681f3Smrg radv_optimize_nir_algebraic(nir[i], io_to_mem || lowered_ngg || i == MESA_SHADER_COMPUTE); 35437ec681f3Smrg 35447ec681f3Smrg if (nir[i]->info.bit_sizes_int & (8 | 16)) { 35457ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX8) { 35467ec681f3Smrg nir_convert_to_lcssa(nir[i], true, true); 35477ec681f3Smrg nir_divergence_analysis(nir[i]); 35487ec681f3Smrg } 35497ec681f3Smrg 35507ec681f3Smrg if (nir_lower_bit_size(nir[i], lower_bit_size_callback, device)) { 35517ec681f3Smrg NIR_PASS_V(nir[i], nir_opt_constant_folding); 35527ec681f3Smrg NIR_PASS_V(nir[i], nir_opt_dce); 35537ec681f3Smrg } 35547ec681f3Smrg 35557ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX8) 35567ec681f3Smrg nir_opt_remove_phis(nir[i]); /* cleanup LCSSA phis */ 35577ec681f3Smrg } 35587ec681f3Smrg if (((nir[i]->info.bit_sizes_int | nir[i]->info.bit_sizes_float) & 16) && 35597ec681f3Smrg device->physical_device->rad_info.chip_class >= GFX9) 35607ec681f3Smrg NIR_PASS_V(nir[i], nir_opt_vectorize, opt_vectorize_callback, NULL); 35617ec681f3Smrg 35627ec681f3Smrg /* cleanup passes */ 35637ec681f3Smrg nir_lower_load_const_to_scalar(nir[i]); 35647ec681f3Smrg nir_move_options move_opts = nir_move_const_undef | nir_move_load_ubo | 35657ec681f3Smrg nir_move_load_input | nir_move_comparisons | nir_move_copies; 35667ec681f3Smrg nir_opt_sink(nir[i], move_opts | nir_move_load_ssbo); 35677ec681f3Smrg nir_opt_move(nir[i], move_opts); 35687ec681f3Smrg 35697ec681f3Smrg radv_stop_feedback(stage_feedbacks[i], false); 35707ec681f3Smrg } 35717ec681f3Smrg } 35727ec681f3Smrg 35737ec681f3Smrg for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 35747ec681f3Smrg if (radv_can_dump_shader(device, modules[i], false)) 35757ec681f3Smrg nir_print_shader(nir[i], stderr); 35767ec681f3Smrg } 35777ec681f3Smrg 35787ec681f3Smrg if (modules[MESA_SHADER_GEOMETRY]) { 35797ec681f3Smrg struct radv_shader_binary *gs_copy_binary = NULL; 35807ec681f3Smrg if (!pipeline_has_ngg) { 35817ec681f3Smrg struct radv_shader_info info = {0}; 35827ec681f3Smrg 35837ec681f3Smrg if (infos[MESA_SHADER_GEOMETRY].vs.outinfo.export_clip_dists) 35847ec681f3Smrg info.vs.outinfo.export_clip_dists = true; 35857ec681f3Smrg 35867ec681f3Smrg radv_nir_shader_info_pass(device, nir[MESA_SHADER_GEOMETRY], pipeline_layout, pipeline_key, 35877ec681f3Smrg &info); 35887ec681f3Smrg info.wave_size = 64; /* Wave32 not supported. */ 35897ec681f3Smrg info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */ 35907ec681f3Smrg info.ballot_bit_size = 64; 35917ec681f3Smrg 35927ec681f3Smrg pipeline->gs_copy_shader = radv_create_gs_copy_shader( 35937ec681f3Smrg device, nir[MESA_SHADER_GEOMETRY], &info, &gs_copy_binary, keep_executable_info, 35947ec681f3Smrg keep_statistic_info, pipeline_key->has_multiview_view_index, 35957ec681f3Smrg pipeline_key->optimisations_disabled); 35967ec681f3Smrg } 35977ec681f3Smrg 35987ec681f3Smrg if (!keep_executable_info && pipeline->gs_copy_shader) { 35997ec681f3Smrg struct radv_shader_binary *gs_binaries[MESA_SHADER_STAGES] = {NULL}; 36007ec681f3Smrg struct radv_shader_variant *gs_variants[MESA_SHADER_STAGES] = {0}; 36017ec681f3Smrg 36027ec681f3Smrg gs_binaries[MESA_SHADER_GEOMETRY] = gs_copy_binary; 36037ec681f3Smrg gs_variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader; 36047ec681f3Smrg 36057ec681f3Smrg radv_pipeline_cache_insert_shaders(device, cache, gs_copy_hash, gs_variants, gs_binaries, 36067ec681f3Smrg NULL, 0); 36077ec681f3Smrg 36087ec681f3Smrg pipeline->gs_copy_shader = gs_variants[MESA_SHADER_GEOMETRY]; 36097ec681f3Smrg } 36107ec681f3Smrg free(gs_copy_binary); 36117ec681f3Smrg } 36127ec681f3Smrg 36137ec681f3Smrg if (nir[MESA_SHADER_FRAGMENT]) { 36147ec681f3Smrg if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) { 36157ec681f3Smrg radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]); 36167ec681f3Smrg 36177ec681f3Smrg pipeline->shaders[MESA_SHADER_FRAGMENT] = radv_shader_variant_compile( 36187ec681f3Smrg device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1, pipeline_layout, 36197ec681f3Smrg pipeline_key, infos + MESA_SHADER_FRAGMENT, keep_executable_info, 36207ec681f3Smrg keep_statistic_info, &binaries[MESA_SHADER_FRAGMENT]); 36217ec681f3Smrg 36227ec681f3Smrg radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false); 36237ec681f3Smrg } 36247ec681f3Smrg } 36257ec681f3Smrg 36267ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_TESS_CTRL]) { 36277ec681f3Smrg if (!pipeline->shaders[MESA_SHADER_TESS_CTRL]) { 36287ec681f3Smrg struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]}; 36297ec681f3Smrg 36307ec681f3Smrg radv_start_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL]); 36317ec681f3Smrg 36327ec681f3Smrg pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile( 36337ec681f3Smrg device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2, pipeline_layout, pipeline_key, 36347ec681f3Smrg &infos[MESA_SHADER_TESS_CTRL], keep_executable_info, keep_statistic_info, 36357ec681f3Smrg &binaries[MESA_SHADER_TESS_CTRL]); 36367ec681f3Smrg 36377ec681f3Smrg radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false); 36387ec681f3Smrg } 36397ec681f3Smrg modules[MESA_SHADER_VERTEX] = NULL; 36407ec681f3Smrg } 36417ec681f3Smrg 36427ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_GEOMETRY]) { 36437ec681f3Smrg gl_shader_stage pre_stage = 36447ec681f3Smrg modules[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; 36457ec681f3Smrg if (!pipeline->shaders[MESA_SHADER_GEOMETRY]) { 36467ec681f3Smrg struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]}; 36477ec681f3Smrg 36487ec681f3Smrg radv_start_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY]); 36497ec681f3Smrg 36507ec681f3Smrg pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile( 36517ec681f3Smrg device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2, pipeline_layout, pipeline_key, 36527ec681f3Smrg &infos[MESA_SHADER_GEOMETRY], keep_executable_info, 36537ec681f3Smrg keep_statistic_info, &binaries[MESA_SHADER_GEOMETRY]); 36547ec681f3Smrg 36557ec681f3Smrg radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false); 36567ec681f3Smrg } 36577ec681f3Smrg modules[pre_stage] = NULL; 36587ec681f3Smrg } 36597ec681f3Smrg 36607ec681f3Smrg for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 36617ec681f3Smrg if (modules[i] && !pipeline->shaders[i]) { 36627ec681f3Smrg radv_start_feedback(stage_feedbacks[i]); 36637ec681f3Smrg 36647ec681f3Smrg pipeline->shaders[i] = radv_shader_variant_compile( 36657ec681f3Smrg device, modules[i], &nir[i], 1, pipeline_layout, pipeline_key, infos + i, 36667ec681f3Smrg keep_executable_info, keep_statistic_info, &binaries[i]); 36677ec681f3Smrg 36687ec681f3Smrg radv_stop_feedback(stage_feedbacks[i], false); 36697ec681f3Smrg } 36707ec681f3Smrg } 36717ec681f3Smrg 36727ec681f3Smrg if (!keep_executable_info) { 36737ec681f3Smrg radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders, binaries, 36747ec681f3Smrg stack_sizes ? *stack_sizes : NULL, 36757ec681f3Smrg num_stack_sizes ? *num_stack_sizes : 0); 36767ec681f3Smrg } 36777ec681f3Smrg 36787ec681f3Smrg for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 36797ec681f3Smrg free(binaries[i]); 36807ec681f3Smrg if (nir[i]) { 36817ec681f3Smrg ralloc_free(nir[i]); 36827ec681f3Smrg 36837ec681f3Smrg if (radv_can_dump_shader_stats(device, modules[i])) { 36847ec681f3Smrg radv_dump_shader_stats(device, pipeline, i, stderr); 36857ec681f3Smrg } 36867ec681f3Smrg } 36877ec681f3Smrg } 36887ec681f3Smrg 36897ec681f3Smrg if (fs_m.nir) 36907ec681f3Smrg ralloc_free(fs_m.nir); 36917ec681f3Smrg 36927ec681f3Smrg radv_stop_feedback(pipeline_feedback, false); 36937ec681f3Smrg return VK_SUCCESS; 369401e04c3fSmrg} 369501e04c3fSmrg 369601e04c3fSmrgstatic uint32_t 36977ec681f3Smrgradv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline, gl_shader_stage stage, 36987ec681f3Smrg enum chip_class chip_class) 36997ec681f3Smrg{ 37007ec681f3Smrg bool has_gs = radv_pipeline_has_gs(pipeline); 37017ec681f3Smrg bool has_tess = radv_pipeline_has_tess(pipeline); 37027ec681f3Smrg bool has_ngg = radv_pipeline_has_ngg(pipeline); 37037ec681f3Smrg 37047ec681f3Smrg switch (stage) { 37057ec681f3Smrg case MESA_SHADER_FRAGMENT: 37067ec681f3Smrg return R_00B030_SPI_SHADER_USER_DATA_PS_0; 37077ec681f3Smrg case MESA_SHADER_VERTEX: 37087ec681f3Smrg if (has_tess) { 37097ec681f3Smrg if (chip_class >= GFX10) { 37107ec681f3Smrg return R_00B430_SPI_SHADER_USER_DATA_HS_0; 37117ec681f3Smrg } else if (chip_class == GFX9) { 37127ec681f3Smrg return R_00B430_SPI_SHADER_USER_DATA_LS_0; 37137ec681f3Smrg } else { 37147ec681f3Smrg return R_00B530_SPI_SHADER_USER_DATA_LS_0; 37157ec681f3Smrg } 37167ec681f3Smrg } 37177ec681f3Smrg 37187ec681f3Smrg if (has_gs) { 37197ec681f3Smrg if (chip_class >= GFX10) { 37207ec681f3Smrg return R_00B230_SPI_SHADER_USER_DATA_GS_0; 37217ec681f3Smrg } else { 37227ec681f3Smrg return R_00B330_SPI_SHADER_USER_DATA_ES_0; 37237ec681f3Smrg } 37247ec681f3Smrg } 37257ec681f3Smrg 37267ec681f3Smrg if (has_ngg) 37277ec681f3Smrg return R_00B230_SPI_SHADER_USER_DATA_GS_0; 37287ec681f3Smrg 37297ec681f3Smrg return R_00B130_SPI_SHADER_USER_DATA_VS_0; 37307ec681f3Smrg case MESA_SHADER_GEOMETRY: 37317ec681f3Smrg return chip_class == GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 37327ec681f3Smrg : R_00B230_SPI_SHADER_USER_DATA_GS_0; 37337ec681f3Smrg case MESA_SHADER_COMPUTE: 37347ec681f3Smrg return R_00B900_COMPUTE_USER_DATA_0; 37357ec681f3Smrg case MESA_SHADER_TESS_CTRL: 37367ec681f3Smrg return chip_class == GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 37377ec681f3Smrg : R_00B430_SPI_SHADER_USER_DATA_HS_0; 37387ec681f3Smrg case MESA_SHADER_TESS_EVAL: 37397ec681f3Smrg if (has_gs) { 37407ec681f3Smrg return chip_class >= GFX10 ? R_00B230_SPI_SHADER_USER_DATA_GS_0 37417ec681f3Smrg : R_00B330_SPI_SHADER_USER_DATA_ES_0; 37427ec681f3Smrg } else if (has_ngg) { 37437ec681f3Smrg return R_00B230_SPI_SHADER_USER_DATA_GS_0; 37447ec681f3Smrg } else { 37457ec681f3Smrg return R_00B130_SPI_SHADER_USER_DATA_VS_0; 37467ec681f3Smrg } 37477ec681f3Smrg default: 37487ec681f3Smrg unreachable("unknown shader"); 37497ec681f3Smrg } 375001e04c3fSmrg} 375101e04c3fSmrg 375201e04c3fSmrgstruct radv_bin_size_entry { 37537ec681f3Smrg unsigned bpp; 37547ec681f3Smrg VkExtent2D extent; 375501e04c3fSmrg}; 375601e04c3fSmrg 375701e04c3fSmrgstatic VkExtent2D 37587ec681f3Smrgradv_gfx9_compute_bin_size(const struct radv_pipeline *pipeline, 37597ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 37607ec681f3Smrg{ 37617ec681f3Smrg static const struct radv_bin_size_entry color_size_table[][3][9] = { 37627ec681f3Smrg { 37637ec681f3Smrg /* One RB / SE */ 37647ec681f3Smrg { 37657ec681f3Smrg /* One shader engine */ 37667ec681f3Smrg {0, {128, 128}}, 37677ec681f3Smrg {1, {64, 128}}, 37687ec681f3Smrg {2, {32, 128}}, 37697ec681f3Smrg {3, {16, 128}}, 37707ec681f3Smrg {17, {0, 0}}, 37717ec681f3Smrg {UINT_MAX, {0, 0}}, 37727ec681f3Smrg }, 37737ec681f3Smrg { 37747ec681f3Smrg /* Two shader engines */ 37757ec681f3Smrg {0, {128, 128}}, 37767ec681f3Smrg {2, {64, 128}}, 37777ec681f3Smrg {3, {32, 128}}, 37787ec681f3Smrg {5, {16, 128}}, 37797ec681f3Smrg {17, {0, 0}}, 37807ec681f3Smrg {UINT_MAX, {0, 0}}, 37817ec681f3Smrg }, 37827ec681f3Smrg { 37837ec681f3Smrg /* Four shader engines */ 37847ec681f3Smrg {0, {128, 128}}, 37857ec681f3Smrg {3, {64, 128}}, 37867ec681f3Smrg {5, {16, 128}}, 37877ec681f3Smrg {17, {0, 0}}, 37887ec681f3Smrg {UINT_MAX, {0, 0}}, 37897ec681f3Smrg }, 37907ec681f3Smrg }, 37917ec681f3Smrg { 37927ec681f3Smrg /* Two RB / SE */ 37937ec681f3Smrg { 37947ec681f3Smrg /* One shader engine */ 37957ec681f3Smrg {0, {128, 128}}, 37967ec681f3Smrg {2, {64, 128}}, 37977ec681f3Smrg {3, {32, 128}}, 37987ec681f3Smrg {5, {16, 128}}, 37997ec681f3Smrg {33, {0, 0}}, 38007ec681f3Smrg {UINT_MAX, {0, 0}}, 38017ec681f3Smrg }, 38027ec681f3Smrg { 38037ec681f3Smrg /* Two shader engines */ 38047ec681f3Smrg {0, {128, 128}}, 38057ec681f3Smrg {3, {64, 128}}, 38067ec681f3Smrg {5, {32, 128}}, 38077ec681f3Smrg {9, {16, 128}}, 38087ec681f3Smrg {33, {0, 0}}, 38097ec681f3Smrg {UINT_MAX, {0, 0}}, 38107ec681f3Smrg }, 38117ec681f3Smrg { 38127ec681f3Smrg /* Four shader engines */ 38137ec681f3Smrg {0, {256, 256}}, 38147ec681f3Smrg {2, {128, 256}}, 38157ec681f3Smrg {3, {128, 128}}, 38167ec681f3Smrg {5, {64, 128}}, 38177ec681f3Smrg {9, {16, 128}}, 38187ec681f3Smrg {33, {0, 0}}, 38197ec681f3Smrg {UINT_MAX, {0, 0}}, 38207ec681f3Smrg }, 38217ec681f3Smrg }, 38227ec681f3Smrg { 38237ec681f3Smrg /* Four RB / SE */ 38247ec681f3Smrg { 38257ec681f3Smrg /* One shader engine */ 38267ec681f3Smrg {0, {128, 256}}, 38277ec681f3Smrg {2, {128, 128}}, 38287ec681f3Smrg {3, {64, 128}}, 38297ec681f3Smrg {5, {32, 128}}, 38307ec681f3Smrg {9, {16, 128}}, 38317ec681f3Smrg {33, {0, 0}}, 38327ec681f3Smrg {UINT_MAX, {0, 0}}, 38337ec681f3Smrg }, 38347ec681f3Smrg { 38357ec681f3Smrg /* Two shader engines */ 38367ec681f3Smrg {0, {256, 256}}, 38377ec681f3Smrg {2, {128, 256}}, 38387ec681f3Smrg {3, {128, 128}}, 38397ec681f3Smrg {5, {64, 128}}, 38407ec681f3Smrg {9, {32, 128}}, 38417ec681f3Smrg {17, {16, 128}}, 38427ec681f3Smrg {33, {0, 0}}, 38437ec681f3Smrg {UINT_MAX, {0, 0}}, 38447ec681f3Smrg }, 38457ec681f3Smrg { 38467ec681f3Smrg /* Four shader engines */ 38477ec681f3Smrg {0, {256, 512}}, 38487ec681f3Smrg {2, {256, 256}}, 38497ec681f3Smrg {3, {128, 256}}, 38507ec681f3Smrg {5, {128, 128}}, 38517ec681f3Smrg {9, {64, 128}}, 38527ec681f3Smrg {17, {16, 128}}, 38537ec681f3Smrg {33, {0, 0}}, 38547ec681f3Smrg {UINT_MAX, {0, 0}}, 38557ec681f3Smrg }, 38567ec681f3Smrg }, 38577ec681f3Smrg }; 38587ec681f3Smrg static const struct radv_bin_size_entry ds_size_table[][3][9] = { 38597ec681f3Smrg { 38607ec681f3Smrg // One RB / SE 38617ec681f3Smrg { 38627ec681f3Smrg // One shader engine 38637ec681f3Smrg {0, {128, 256}}, 38647ec681f3Smrg {2, {128, 128}}, 38657ec681f3Smrg {4, {64, 128}}, 38667ec681f3Smrg {7, {32, 128}}, 38677ec681f3Smrg {13, {16, 128}}, 38687ec681f3Smrg {49, {0, 0}}, 38697ec681f3Smrg {UINT_MAX, {0, 0}}, 38707ec681f3Smrg }, 38717ec681f3Smrg { 38727ec681f3Smrg // Two shader engines 38737ec681f3Smrg {0, {256, 256}}, 38747ec681f3Smrg {2, {128, 256}}, 38757ec681f3Smrg {4, {128, 128}}, 38767ec681f3Smrg {7, {64, 128}}, 38777ec681f3Smrg {13, {32, 128}}, 38787ec681f3Smrg {25, {16, 128}}, 38797ec681f3Smrg {49, {0, 0}}, 38807ec681f3Smrg {UINT_MAX, {0, 0}}, 38817ec681f3Smrg }, 38827ec681f3Smrg { 38837ec681f3Smrg // Four shader engines 38847ec681f3Smrg {0, {256, 512}}, 38857ec681f3Smrg {2, {256, 256}}, 38867ec681f3Smrg {4, {128, 256}}, 38877ec681f3Smrg {7, {128, 128}}, 38887ec681f3Smrg {13, {64, 128}}, 38897ec681f3Smrg {25, {16, 128}}, 38907ec681f3Smrg {49, {0, 0}}, 38917ec681f3Smrg {UINT_MAX, {0, 0}}, 38927ec681f3Smrg }, 38937ec681f3Smrg }, 38947ec681f3Smrg { 38957ec681f3Smrg // Two RB / SE 38967ec681f3Smrg { 38977ec681f3Smrg // One shader engine 38987ec681f3Smrg {0, {256, 256}}, 38997ec681f3Smrg {2, {128, 256}}, 39007ec681f3Smrg {4, {128, 128}}, 39017ec681f3Smrg {7, {64, 128}}, 39027ec681f3Smrg {13, {32, 128}}, 39037ec681f3Smrg {25, {16, 128}}, 39047ec681f3Smrg {97, {0, 0}}, 39057ec681f3Smrg {UINT_MAX, {0, 0}}, 39067ec681f3Smrg }, 39077ec681f3Smrg { 39087ec681f3Smrg // Two shader engines 39097ec681f3Smrg {0, {256, 512}}, 39107ec681f3Smrg {2, {256, 256}}, 39117ec681f3Smrg {4, {128, 256}}, 39127ec681f3Smrg {7, {128, 128}}, 39137ec681f3Smrg {13, {64, 128}}, 39147ec681f3Smrg {25, {32, 128}}, 39157ec681f3Smrg {49, {16, 128}}, 39167ec681f3Smrg {97, {0, 0}}, 39177ec681f3Smrg {UINT_MAX, {0, 0}}, 39187ec681f3Smrg }, 39197ec681f3Smrg { 39207ec681f3Smrg // Four shader engines 39217ec681f3Smrg {0, {512, 512}}, 39227ec681f3Smrg {2, {256, 512}}, 39237ec681f3Smrg {4, {256, 256}}, 39247ec681f3Smrg {7, {128, 256}}, 39257ec681f3Smrg {13, {128, 128}}, 39267ec681f3Smrg {25, {64, 128}}, 39277ec681f3Smrg {49, {16, 128}}, 39287ec681f3Smrg {97, {0, 0}}, 39297ec681f3Smrg {UINT_MAX, {0, 0}}, 39307ec681f3Smrg }, 39317ec681f3Smrg }, 39327ec681f3Smrg { 39337ec681f3Smrg // Four RB / SE 39347ec681f3Smrg { 39357ec681f3Smrg // One shader engine 39367ec681f3Smrg {0, {256, 512}}, 39377ec681f3Smrg {2, {256, 256}}, 39387ec681f3Smrg {4, {128, 256}}, 39397ec681f3Smrg {7, {128, 128}}, 39407ec681f3Smrg {13, {64, 128}}, 39417ec681f3Smrg {25, {32, 128}}, 39427ec681f3Smrg {49, {16, 128}}, 39437ec681f3Smrg {UINT_MAX, {0, 0}}, 39447ec681f3Smrg }, 39457ec681f3Smrg { 39467ec681f3Smrg // Two shader engines 39477ec681f3Smrg {0, {512, 512}}, 39487ec681f3Smrg {2, {256, 512}}, 39497ec681f3Smrg {4, {256, 256}}, 39507ec681f3Smrg {7, {128, 256}}, 39517ec681f3Smrg {13, {128, 128}}, 39527ec681f3Smrg {25, {64, 128}}, 39537ec681f3Smrg {49, {32, 128}}, 39547ec681f3Smrg {97, {16, 128}}, 39557ec681f3Smrg {UINT_MAX, {0, 0}}, 39567ec681f3Smrg }, 39577ec681f3Smrg { 39587ec681f3Smrg // Four shader engines 39597ec681f3Smrg {0, {512, 512}}, 39607ec681f3Smrg {4, {256, 512}}, 39617ec681f3Smrg {7, {256, 256}}, 39627ec681f3Smrg {13, {128, 256}}, 39637ec681f3Smrg {25, {128, 128}}, 39647ec681f3Smrg {49, {64, 128}}, 39657ec681f3Smrg {97, {16, 128}}, 39667ec681f3Smrg {UINT_MAX, {0, 0}}, 39677ec681f3Smrg }, 39687ec681f3Smrg }, 39697ec681f3Smrg }; 39707ec681f3Smrg 39717ec681f3Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 39727ec681f3Smrg struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; 39737ec681f3Smrg VkExtent2D extent = {512, 512}; 39747ec681f3Smrg 39757ec681f3Smrg unsigned log_num_rb_per_se = 39767ec681f3Smrg util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_render_backends / 39777ec681f3Smrg pipeline->device->physical_device->rad_info.max_se); 39787ec681f3Smrg unsigned log_num_se = util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_se); 39797ec681f3Smrg 39807ec681f3Smrg unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config); 39817ec681f3Smrg unsigned ps_iter_samples = 1u << G_028804_PS_ITER_SAMPLES(pipeline->graphics.ms.db_eqaa); 39827ec681f3Smrg unsigned effective_samples = total_samples; 39837ec681f3Smrg unsigned color_bytes_per_pixel = 0; 39847ec681f3Smrg 39857ec681f3Smrg const VkPipelineColorBlendStateCreateInfo *vkblend = 39867ec681f3Smrg radv_pipeline_get_color_blend_state(pCreateInfo); 39877ec681f3Smrg if (vkblend) { 39887ec681f3Smrg for (unsigned i = 0; i < subpass->color_count; i++) { 39897ec681f3Smrg if (!vkblend->pAttachments[i].colorWriteMask) 39907ec681f3Smrg continue; 39917ec681f3Smrg 39927ec681f3Smrg if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) 39937ec681f3Smrg continue; 39947ec681f3Smrg 39957ec681f3Smrg VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format; 39967ec681f3Smrg color_bytes_per_pixel += vk_format_get_blocksize(format); 39977ec681f3Smrg } 39987ec681f3Smrg 39997ec681f3Smrg /* MSAA images typically don't use all samples all the time. */ 40007ec681f3Smrg if (effective_samples >= 2 && ps_iter_samples <= 1) 40017ec681f3Smrg effective_samples = 2; 40027ec681f3Smrg color_bytes_per_pixel *= effective_samples; 40037ec681f3Smrg } 40047ec681f3Smrg 40057ec681f3Smrg const struct radv_bin_size_entry *color_entry = color_size_table[log_num_rb_per_se][log_num_se]; 40067ec681f3Smrg while (color_entry[1].bpp <= color_bytes_per_pixel) 40077ec681f3Smrg ++color_entry; 40087ec681f3Smrg 40097ec681f3Smrg extent = color_entry->extent; 40107ec681f3Smrg 40117ec681f3Smrg if (subpass->depth_stencil_attachment) { 40127ec681f3Smrg struct radv_render_pass_attachment *attachment = 40137ec681f3Smrg pass->attachments + subpass->depth_stencil_attachment->attachment; 40147ec681f3Smrg 40157ec681f3Smrg /* Coefficients taken from AMDVLK */ 40167ec681f3Smrg unsigned depth_coeff = vk_format_has_depth(attachment->format) ? 5 : 0; 40177ec681f3Smrg unsigned stencil_coeff = vk_format_has_stencil(attachment->format) ? 1 : 0; 40187ec681f3Smrg unsigned ds_bytes_per_pixel = 4 * (depth_coeff + stencil_coeff) * total_samples; 40197ec681f3Smrg 40207ec681f3Smrg const struct radv_bin_size_entry *ds_entry = ds_size_table[log_num_rb_per_se][log_num_se]; 40217ec681f3Smrg while (ds_entry[1].bpp <= ds_bytes_per_pixel) 40227ec681f3Smrg ++ds_entry; 40237ec681f3Smrg 40247ec681f3Smrg if (ds_entry->extent.width * ds_entry->extent.height < extent.width * extent.height) 40257ec681f3Smrg extent = ds_entry->extent; 40267ec681f3Smrg } 40277ec681f3Smrg 40287ec681f3Smrg return extent; 40297ec681f3Smrg} 40307ec681f3Smrg 40317ec681f3Smrgstatic VkExtent2D 40327ec681f3Smrgradv_gfx10_compute_bin_size(const struct radv_pipeline *pipeline, 40337ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 40347ec681f3Smrg{ 40357ec681f3Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 40367ec681f3Smrg struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; 40377ec681f3Smrg VkExtent2D extent = {512, 512}; 40387ec681f3Smrg 40397ec681f3Smrg const unsigned db_tag_size = 64; 40407ec681f3Smrg const unsigned db_tag_count = 312; 40417ec681f3Smrg const unsigned color_tag_size = 1024; 40427ec681f3Smrg const unsigned color_tag_count = 31; 40437ec681f3Smrg const unsigned fmask_tag_size = 256; 40447ec681f3Smrg const unsigned fmask_tag_count = 44; 40457ec681f3Smrg 40467ec681f3Smrg const unsigned rb_count = pipeline->device->physical_device->rad_info.max_render_backends; 40477ec681f3Smrg const unsigned pipe_count = 40487ec681f3Smrg MAX2(rb_count, pipeline->device->physical_device->rad_info.num_tcc_blocks); 40497ec681f3Smrg 40507ec681f3Smrg const unsigned db_tag_part = (db_tag_count * rb_count / pipe_count) * db_tag_size * pipe_count; 40517ec681f3Smrg const unsigned color_tag_part = 40527ec681f3Smrg (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count; 40537ec681f3Smrg const unsigned fmask_tag_part = 40547ec681f3Smrg (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count; 40557ec681f3Smrg 40567ec681f3Smrg const unsigned total_samples = 40577ec681f3Smrg 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config); 40587ec681f3Smrg const unsigned samples_log = util_logbase2_ceil(total_samples); 40597ec681f3Smrg 40607ec681f3Smrg unsigned color_bytes_per_pixel = 0; 40617ec681f3Smrg unsigned fmask_bytes_per_pixel = 0; 40627ec681f3Smrg 40637ec681f3Smrg const VkPipelineColorBlendStateCreateInfo *vkblend = 40647ec681f3Smrg radv_pipeline_get_color_blend_state(pCreateInfo); 40657ec681f3Smrg if (vkblend) { 40667ec681f3Smrg for (unsigned i = 0; i < subpass->color_count; i++) { 40677ec681f3Smrg if (!vkblend->pAttachments[i].colorWriteMask) 40687ec681f3Smrg continue; 40697ec681f3Smrg 40707ec681f3Smrg if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) 40717ec681f3Smrg continue; 40727ec681f3Smrg 40737ec681f3Smrg VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format; 40747ec681f3Smrg color_bytes_per_pixel += vk_format_get_blocksize(format); 40757ec681f3Smrg 40767ec681f3Smrg if (total_samples > 1) { 40777ec681f3Smrg assert(samples_log <= 3); 40787ec681f3Smrg const unsigned fmask_array[] = {0, 1, 1, 4}; 40797ec681f3Smrg fmask_bytes_per_pixel += fmask_array[samples_log]; 40807ec681f3Smrg } 40817ec681f3Smrg } 40827ec681f3Smrg 40837ec681f3Smrg color_bytes_per_pixel *= total_samples; 40847ec681f3Smrg } 40857ec681f3Smrg color_bytes_per_pixel = MAX2(color_bytes_per_pixel, 1); 40867ec681f3Smrg 40877ec681f3Smrg const unsigned color_pixel_count_log = util_logbase2(color_tag_part / color_bytes_per_pixel); 40887ec681f3Smrg extent.width = 1ull << ((color_pixel_count_log + 1) / 2); 40897ec681f3Smrg extent.height = 1ull << (color_pixel_count_log / 2); 40907ec681f3Smrg 40917ec681f3Smrg if (fmask_bytes_per_pixel) { 40927ec681f3Smrg const unsigned fmask_pixel_count_log = util_logbase2(fmask_tag_part / fmask_bytes_per_pixel); 40937ec681f3Smrg 40947ec681f3Smrg const VkExtent2D fmask_extent = 40957ec681f3Smrg (VkExtent2D){.width = 1ull << ((fmask_pixel_count_log + 1) / 2), 40967ec681f3Smrg .height = 1ull << (color_pixel_count_log / 2)}; 40977ec681f3Smrg 40987ec681f3Smrg if (fmask_extent.width * fmask_extent.height < extent.width * extent.height) 40997ec681f3Smrg extent = fmask_extent; 41007ec681f3Smrg } 41017ec681f3Smrg 41027ec681f3Smrg if (subpass->depth_stencil_attachment) { 41037ec681f3Smrg struct radv_render_pass_attachment *attachment = 41047ec681f3Smrg pass->attachments + subpass->depth_stencil_attachment->attachment; 41057ec681f3Smrg 41067ec681f3Smrg /* Coefficients taken from AMDVLK */ 41077ec681f3Smrg unsigned depth_coeff = vk_format_has_depth(attachment->format) ? 5 : 0; 41087ec681f3Smrg unsigned stencil_coeff = vk_format_has_stencil(attachment->format) ? 1 : 0; 41097ec681f3Smrg unsigned db_bytes_per_pixel = (depth_coeff + stencil_coeff) * total_samples; 41107ec681f3Smrg 41117ec681f3Smrg const unsigned db_pixel_count_log = util_logbase2(db_tag_part / db_bytes_per_pixel); 41127ec681f3Smrg 41137ec681f3Smrg const VkExtent2D db_extent = (VkExtent2D){.width = 1ull << ((db_pixel_count_log + 1) / 2), 41147ec681f3Smrg .height = 1ull << (color_pixel_count_log / 2)}; 41157ec681f3Smrg 41167ec681f3Smrg if (db_extent.width * db_extent.height < extent.width * extent.height) 41177ec681f3Smrg extent = db_extent; 41187ec681f3Smrg } 41197ec681f3Smrg 41207ec681f3Smrg extent.width = MAX2(extent.width, 128); 41217ec681f3Smrg extent.height = MAX2(extent.width, 64); 41227ec681f3Smrg 41237ec681f3Smrg return extent; 412401e04c3fSmrg} 412501e04c3fSmrg 412601e04c3fSmrgstatic void 41277ec681f3Smrgradv_pipeline_init_disabled_binning_state(struct radv_pipeline *pipeline, 41287ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 41297ec681f3Smrg{ 41307ec681f3Smrg uint32_t pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) | 41317ec681f3Smrg S_028C44_DISABLE_START_OF_PRIM(1); 41327ec681f3Smrg 41337ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { 41347ec681f3Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 41357ec681f3Smrg struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; 41367ec681f3Smrg const VkPipelineColorBlendStateCreateInfo *vkblend = 41377ec681f3Smrg radv_pipeline_get_color_blend_state(pCreateInfo); 41387ec681f3Smrg unsigned min_bytes_per_pixel = 0; 41397ec681f3Smrg 41407ec681f3Smrg if (vkblend) { 41417ec681f3Smrg for (unsigned i = 0; i < subpass->color_count; i++) { 41427ec681f3Smrg if (!vkblend->pAttachments[i].colorWriteMask) 41437ec681f3Smrg continue; 41447ec681f3Smrg 41457ec681f3Smrg if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) 41467ec681f3Smrg continue; 41477ec681f3Smrg 41487ec681f3Smrg VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format; 41497ec681f3Smrg unsigned bytes = vk_format_get_blocksize(format); 41507ec681f3Smrg if (!min_bytes_per_pixel || bytes < min_bytes_per_pixel) 41517ec681f3Smrg min_bytes_per_pixel = bytes; 41527ec681f3Smrg } 41537ec681f3Smrg } 41547ec681f3Smrg 41557ec681f3Smrg pa_sc_binner_cntl_0 = 41567ec681f3Smrg S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_NEW_SC) | S_028C44_BIN_SIZE_X(0) | 41577ec681f3Smrg S_028C44_BIN_SIZE_Y(0) | S_028C44_BIN_SIZE_X_EXTEND(2) | /* 128 */ 41587ec681f3Smrg S_028C44_BIN_SIZE_Y_EXTEND(min_bytes_per_pixel <= 4 ? 2 : 1) | /* 128 or 64 */ 41597ec681f3Smrg S_028C44_DISABLE_START_OF_PRIM(1); 41607ec681f3Smrg } 41617ec681f3Smrg 41627ec681f3Smrg pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0; 416301e04c3fSmrg} 416401e04c3fSmrg 41657ec681f3Smrgstruct radv_binning_settings 41667ec681f3Smrgradv_get_binning_settings(const struct radv_physical_device *pdev) 41677ec681f3Smrg{ 41687ec681f3Smrg struct radv_binning_settings settings; 41697ec681f3Smrg if (pdev->rad_info.has_dedicated_vram) { 41707ec681f3Smrg if (pdev->rad_info.max_render_backends > 4) { 41717ec681f3Smrg settings.context_states_per_bin = 1; 41727ec681f3Smrg settings.persistent_states_per_bin = 1; 41737ec681f3Smrg } else { 41747ec681f3Smrg settings.context_states_per_bin = 3; 41757ec681f3Smrg settings.persistent_states_per_bin = 8; 41767ec681f3Smrg } 41777ec681f3Smrg settings.fpovs_per_batch = 63; 41787ec681f3Smrg } else { 41797ec681f3Smrg /* The context states are affected by the scissor bug. */ 41807ec681f3Smrg settings.context_states_per_bin = 6; 41817ec681f3Smrg /* 32 causes hangs for RAVEN. */ 41827ec681f3Smrg settings.persistent_states_per_bin = 16; 41837ec681f3Smrg settings.fpovs_per_batch = 63; 41847ec681f3Smrg } 41857ec681f3Smrg 41867ec681f3Smrg if (pdev->rad_info.has_gfx9_scissor_bug) 41877ec681f3Smrg settings.context_states_per_bin = 1; 41887ec681f3Smrg 41897ec681f3Smrg return settings; 41907ec681f3Smrg} 41917ec681f3Smrg 41927ec681f3Smrgstatic void 41937ec681f3Smrgradv_pipeline_init_binning_state(struct radv_pipeline *pipeline, 41947ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 41957ec681f3Smrg const struct radv_blend_state *blend) 41967ec681f3Smrg{ 41977ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class < GFX9) 41987ec681f3Smrg return; 41997ec681f3Smrg 42007ec681f3Smrg VkExtent2D bin_size; 42017ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { 42027ec681f3Smrg bin_size = radv_gfx10_compute_bin_size(pipeline, pCreateInfo); 42037ec681f3Smrg } else if (pipeline->device->physical_device->rad_info.chip_class == GFX9) { 42047ec681f3Smrg bin_size = radv_gfx9_compute_bin_size(pipeline, pCreateInfo); 42057ec681f3Smrg } else 42067ec681f3Smrg unreachable("Unhandled generation for binning bin size calculation"); 42077ec681f3Smrg 42087ec681f3Smrg if (pipeline->device->pbb_allowed && bin_size.width && bin_size.height) { 42097ec681f3Smrg struct radv_binning_settings settings = 42107ec681f3Smrg radv_get_binning_settings(pipeline->device->physical_device); 42117ec681f3Smrg 42127ec681f3Smrg const uint32_t pa_sc_binner_cntl_0 = 42137ec681f3Smrg S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | 42147ec681f3Smrg S_028C44_BIN_SIZE_X(bin_size.width == 16) | S_028C44_BIN_SIZE_Y(bin_size.height == 16) | 42157ec681f3Smrg S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) | 42167ec681f3Smrg S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) | 42177ec681f3Smrg S_028C44_CONTEXT_STATES_PER_BIN(settings.context_states_per_bin - 1) | 42187ec681f3Smrg S_028C44_PERSISTENT_STATES_PER_BIN(settings.persistent_states_per_bin - 1) | 42197ec681f3Smrg S_028C44_DISABLE_START_OF_PRIM(1) | 42207ec681f3Smrg S_028C44_FPOVS_PER_BATCH(settings.fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1); 42217ec681f3Smrg 42227ec681f3Smrg pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0; 42237ec681f3Smrg } else 42247ec681f3Smrg radv_pipeline_init_disabled_binning_state(pipeline, pCreateInfo); 42257ec681f3Smrg} 422601e04c3fSmrg 422701e04c3fSmrgstatic void 4228ed98bd31Smayaradv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, 42297ec681f3Smrg const struct radv_pipeline *pipeline, 423001e04c3fSmrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 423101e04c3fSmrg const struct radv_graphics_pipeline_create_info *extra) 423201e04c3fSmrg{ 42337ec681f3Smrg const VkPipelineDepthStencilStateCreateInfo *vkds = 42347ec681f3Smrg radv_pipeline_get_depth_stencil_state(pCreateInfo); 42357ec681f3Smrg RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 42367ec681f3Smrg struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; 42377ec681f3Smrg struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; 42387ec681f3Smrg struct radv_render_pass_attachment *attachment = NULL; 42397ec681f3Smrg uint32_t db_render_control = 0, db_render_override2 = 0; 42407ec681f3Smrg uint32_t db_render_override = 0; 42417ec681f3Smrg 42427ec681f3Smrg if (subpass->depth_stencil_attachment) 42437ec681f3Smrg attachment = pass->attachments + subpass->depth_stencil_attachment->attachment; 42447ec681f3Smrg 42457ec681f3Smrg bool has_depth_attachment = attachment && vk_format_has_depth(attachment->format); 42467ec681f3Smrg 42477ec681f3Smrg if (vkds && has_depth_attachment) { 42487ec681f3Smrg /* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */ 42497ec681f3Smrg db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(attachment->samples > 2); 42507ec681f3Smrg 42517ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) 42527ec681f3Smrg db_render_override2 |= S_028010_CENTROID_COMPUTATION_MODE(1); 42537ec681f3Smrg } 42547ec681f3Smrg 42557ec681f3Smrg if (attachment && extra) { 42567ec681f3Smrg db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear); 42577ec681f3Smrg db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear); 42587ec681f3Smrg 42597ec681f3Smrg db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->resummarize_enable); 42607ec681f3Smrg db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->depth_compress_disable); 42617ec681f3Smrg db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->stencil_compress_disable); 42627ec681f3Smrg } 42637ec681f3Smrg 42647ec681f3Smrg db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 42657ec681f3Smrg S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); 42667ec681f3Smrg 42677ec681f3Smrg if (!pCreateInfo->pRasterizationState->depthClampEnable && ps->info.ps.writes_z) { 42687ec681f3Smrg /* From VK_EXT_depth_range_unrestricted spec: 42697ec681f3Smrg * 42707ec681f3Smrg * "The behavior described in Primitive Clipping still applies. 42717ec681f3Smrg * If depth clamping is disabled the depth values are still 42727ec681f3Smrg * clipped to 0 ≤ zc ≤ wc before the viewport transform. If 42737ec681f3Smrg * depth clamping is enabled the above equation is ignored and 42747ec681f3Smrg * the depth values are instead clamped to the VkViewport 42757ec681f3Smrg * minDepth and maxDepth values, which in the case of this 42767ec681f3Smrg * extension can be outside of the 0.0 to 1.0 range." 42777ec681f3Smrg */ 42787ec681f3Smrg db_render_override |= S_02800C_DISABLE_VIEWPORT_CLAMP(1); 42797ec681f3Smrg } 42807ec681f3Smrg 42817ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028000_DB_RENDER_CONTROL, db_render_control); 42827ec681f3Smrg 42837ec681f3Smrg radeon_set_context_reg_seq(ctx_cs, R_02800C_DB_RENDER_OVERRIDE, 2); 42847ec681f3Smrg radeon_emit(ctx_cs, db_render_override); 42857ec681f3Smrg radeon_emit(ctx_cs, db_render_override2); 428601e04c3fSmrg} 428701e04c3fSmrg 428801e04c3fSmrgstatic void 4289ed98bd31Smayaradv_pipeline_generate_blend_state(struct radeon_cmdbuf *ctx_cs, 42907ec681f3Smrg const struct radv_pipeline *pipeline, 429101e04c3fSmrg const struct radv_blend_state *blend) 429201e04c3fSmrg{ 42937ec681f3Smrg radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8); 42947ec681f3Smrg radeon_emit_array(ctx_cs, blend->cb_blend_control, 8); 42957ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask); 429601e04c3fSmrg 42977ec681f3Smrg if (pipeline->device->physical_device->rad_info.has_rbplus) { 429801e04c3fSmrg 42997ec681f3Smrg radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8); 43007ec681f3Smrg radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8); 43017ec681f3Smrg } 430201e04c3fSmrg 43037ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); 430401e04c3fSmrg 43057ec681f3Smrg radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); 430601e04c3fSmrg} 430701e04c3fSmrg 430801e04c3fSmrgstatic void 4309ed98bd31Smayaradv_pipeline_generate_raster_state(struct radeon_cmdbuf *ctx_cs, 43107ec681f3Smrg const struct radv_pipeline *pipeline, 431101e04c3fSmrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 431201e04c3fSmrg{ 43137ec681f3Smrg const VkPipelineRasterizationStateCreateInfo *vkraster = pCreateInfo->pRasterizationState; 43147ec681f3Smrg const VkConservativeRasterizationModeEXT mode = radv_get_conservative_raster_mode(vkraster); 43157ec681f3Smrg uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1); 43167ec681f3Smrg 43177ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) { 43187ec681f3Smrg /* Conservative rasterization. */ 43197ec681f3Smrg if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) { 43207ec681f3Smrg pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) | 43217ec681f3Smrg S_028C4C_CENTROID_SAMPLE_OVERRIDE(1); 43227ec681f3Smrg 43237ec681f3Smrg if (mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT) { 43247ec681f3Smrg pa_sc_conservative_rast |= 43257ec681f3Smrg S_028C4C_OVER_RAST_ENABLE(1) | S_028C4C_OVER_RAST_SAMPLE_SELECT(0) | 43267ec681f3Smrg S_028C4C_UNDER_RAST_ENABLE(0) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) | 43277ec681f3Smrg S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(1); 43287ec681f3Smrg } else { 43297ec681f3Smrg assert(mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT); 43307ec681f3Smrg pa_sc_conservative_rast |= 43317ec681f3Smrg S_028C4C_OVER_RAST_ENABLE(0) | S_028C4C_OVER_RAST_SAMPLE_SELECT(1) | 43327ec681f3Smrg S_028C4C_UNDER_RAST_ENABLE(1) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(0) | 43337ec681f3Smrg S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(0); 43347ec681f3Smrg } 43357ec681f3Smrg } 43367ec681f3Smrg 43377ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, 43387ec681f3Smrg pa_sc_conservative_rast); 43397ec681f3Smrg } 434001e04c3fSmrg} 434101e04c3fSmrg 434201e04c3fSmrgstatic void 4343ed98bd31Smayaradv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs, 43447ec681f3Smrg const struct radv_pipeline *pipeline) 434501e04c3fSmrg{ 43467ec681f3Smrg const struct radv_multisample_state *ms = &pipeline->graphics.ms; 43477ec681f3Smrg 43487ec681f3Smrg radeon_set_context_reg_seq(ctx_cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 43497ec681f3Smrg radeon_emit(ctx_cs, ms->pa_sc_aa_mask[0]); 43507ec681f3Smrg radeon_emit(ctx_cs, ms->pa_sc_aa_mask[1]); 43517ec681f3Smrg 43527ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028804_DB_EQAA, ms->db_eqaa); 43537ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028BE0_PA_SC_AA_CONFIG, ms->pa_sc_aa_config); 43547ec681f3Smrg 43557ec681f3Smrg radeon_set_context_reg_seq(ctx_cs, R_028A48_PA_SC_MODE_CNTL_0, 2); 43567ec681f3Smrg radeon_emit(ctx_cs, ms->pa_sc_mode_cntl_0); 43577ec681f3Smrg radeon_emit(ctx_cs, ms->pa_sc_mode_cntl_1); 43587ec681f3Smrg 43597ec681f3Smrg /* The exclusion bits can be set to improve rasterization efficiency 43607ec681f3Smrg * if no sample lies on the pixel boundary (-8 sample offset). It's 43617ec681f3Smrg * currently always TRUE because the driver doesn't support 16 samples. 43627ec681f3Smrg */ 43637ec681f3Smrg bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= GFX7; 43647ec681f3Smrg radeon_set_context_reg( 43657ec681f3Smrg ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 43667ec681f3Smrg S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); 436701e04c3fSmrg} 436801e04c3fSmrg 436901e04c3fSmrgstatic void 4370ed98bd31Smayaradv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs, 43717ec681f3Smrg const struct radv_pipeline *pipeline) 437201e04c3fSmrg{ 43737ec681f3Smrg const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); 43747ec681f3Smrg const struct radv_shader_variant *vs = pipeline->shaders[MESA_SHADER_TESS_EVAL] 43757ec681f3Smrg ? pipeline->shaders[MESA_SHADER_TESS_EVAL] 43767ec681f3Smrg : pipeline->shaders[MESA_SHADER_VERTEX]; 43777ec681f3Smrg unsigned vgt_primitiveid_en = 0; 43787ec681f3Smrg uint32_t vgt_gs_mode = 0; 43797ec681f3Smrg 43807ec681f3Smrg if (radv_pipeline_has_ngg(pipeline)) 43817ec681f3Smrg return; 43827ec681f3Smrg 43837ec681f3Smrg if (radv_pipeline_has_gs(pipeline)) { 43847ec681f3Smrg const struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY]; 43857ec681f3Smrg 43867ec681f3Smrg vgt_gs_mode = ac_vgt_gs_mode(gs->info.gs.vertices_out, 43877ec681f3Smrg pipeline->device->physical_device->rad_info.chip_class); 43887ec681f3Smrg } else if (outinfo->export_prim_id || vs->info.uses_prim_id) { 43897ec681f3Smrg vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A); 43907ec681f3Smrg vgt_primitiveid_en |= S_028A84_PRIMITIVEID_EN(1); 43917ec681f3Smrg } 43927ec681f3Smrg 43937ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en); 43947ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028A40_VGT_GS_MODE, vgt_gs_mode); 439501e04c3fSmrg} 439601e04c3fSmrg 439701e04c3fSmrgstatic void 43987ec681f3Smrgradv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 43997ec681f3Smrg const struct radv_pipeline *pipeline, 44007ec681f3Smrg const struct radv_shader_variant *shader) 44017ec681f3Smrg{ 44027ec681f3Smrg uint64_t va = radv_shader_variant_get_va(shader); 44037ec681f3Smrg 44047ec681f3Smrg radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4); 44057ec681f3Smrg radeon_emit(cs, va >> 8); 44067ec681f3Smrg radeon_emit(cs, S_00B124_MEM_BASE(va >> 40)); 44077ec681f3Smrg radeon_emit(cs, shader->config.rsrc1); 44087ec681f3Smrg radeon_emit(cs, shader->config.rsrc2); 44097ec681f3Smrg 44107ec681f3Smrg const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); 44117ec681f3Smrg unsigned clip_dist_mask, cull_dist_mask, total_mask; 44127ec681f3Smrg clip_dist_mask = outinfo->clip_dist_mask; 44137ec681f3Smrg cull_dist_mask = outinfo->cull_dist_mask; 44147ec681f3Smrg total_mask = clip_dist_mask | cull_dist_mask; 44157ec681f3Smrg 44167ec681f3Smrg bool writes_primitive_shading_rate = 44177ec681f3Smrg outinfo->writes_primitive_shading_rate || pipeline->device->force_vrs != RADV_FORCE_VRS_NONE; 44187ec681f3Smrg bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer || 44197ec681f3Smrg outinfo->writes_viewport_index || writes_primitive_shading_rate; 44207ec681f3Smrg unsigned spi_vs_out_config, nparams; 44217ec681f3Smrg 44227ec681f3Smrg /* VS is required to export at least one param. */ 44237ec681f3Smrg nparams = MAX2(outinfo->param_exports, 1); 44247ec681f3Smrg spi_vs_out_config = S_0286C4_VS_EXPORT_COUNT(nparams - 1); 44257ec681f3Smrg 44267ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { 44277ec681f3Smrg spi_vs_out_config |= S_0286C4_NO_PC_EXPORT(outinfo->param_exports == 0); 44287ec681f3Smrg } 44297ec681f3Smrg 44307ec681f3Smrg radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG, spi_vs_out_config); 44317ec681f3Smrg 44327ec681f3Smrg radeon_set_context_reg( 44337ec681f3Smrg ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT, 44347ec681f3Smrg S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | 44357ec681f3Smrg S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP 44367ec681f3Smrg : V_02870C_SPI_SHADER_NONE) | 44377ec681f3Smrg S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP 44387ec681f3Smrg : V_02870C_SPI_SHADER_NONE) | 44397ec681f3Smrg S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP 44407ec681f3Smrg : V_02870C_SPI_SHADER_NONE)); 44417ec681f3Smrg 44427ec681f3Smrg radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL, 44437ec681f3Smrg S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | 44447ec681f3Smrg S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | 44457ec681f3Smrg S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | 44467ec681f3Smrg S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) | 44477ec681f3Smrg S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | 44487ec681f3Smrg S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | 44497ec681f3Smrg S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | 44507ec681f3Smrg S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | 44517ec681f3Smrg total_mask << 8 | clip_dist_mask); 44527ec681f3Smrg 44537ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class <= GFX8) 44547ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, outinfo->writes_viewport_index); 44557ec681f3Smrg 44567ec681f3Smrg unsigned late_alloc_wave64, cu_mask; 44577ec681f3Smrg ac_compute_late_alloc(&pipeline->device->physical_device->rad_info, false, false, 44587ec681f3Smrg shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask); 44597ec681f3Smrg 44607ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) { 44617ec681f3Smrg radeon_set_sh_reg_idx(pipeline->device->physical_device, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3, 44627ec681f3Smrg S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F)); 44637ec681f3Smrg radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64)); 44647ec681f3Smrg } 44657ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { 44667ec681f3Smrg uint32_t oversub_pc_lines = late_alloc_wave64 ? pipeline->device->physical_device->rad_info.pc_lines / 4 : 0; 44677ec681f3Smrg gfx10_emit_ge_pc_alloc(cs, pipeline->device->physical_device->rad_info.chip_class, oversub_pc_lines); 44687ec681f3Smrg } 446901e04c3fSmrg} 447001e04c3fSmrg 447101e04c3fSmrgstatic void 44727ec681f3Smrgradv_pipeline_generate_hw_es(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline, 44737ec681f3Smrg const struct radv_shader_variant *shader) 447401e04c3fSmrg{ 44757ec681f3Smrg uint64_t va = radv_shader_variant_get_va(shader); 447601e04c3fSmrg 44777ec681f3Smrg radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4); 44787ec681f3Smrg radeon_emit(cs, va >> 8); 44797ec681f3Smrg radeon_emit(cs, S_00B324_MEM_BASE(va >> 40)); 44807ec681f3Smrg radeon_emit(cs, shader->config.rsrc1); 44817ec681f3Smrg radeon_emit(cs, shader->config.rsrc2); 448201e04c3fSmrg} 448301e04c3fSmrg 448401e04c3fSmrgstatic void 44857ec681f3Smrgradv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline, 44867ec681f3Smrg const struct radv_shader_variant *shader) 448701e04c3fSmrg{ 44887ec681f3Smrg unsigned num_lds_blocks = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_lds_blocks; 44897ec681f3Smrg uint64_t va = radv_shader_variant_get_va(shader); 44907ec681f3Smrg uint32_t rsrc2 = shader->config.rsrc2; 449101e04c3fSmrg 44927ec681f3Smrg radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); 449301e04c3fSmrg 44947ec681f3Smrg rsrc2 |= S_00B52C_LDS_SIZE(num_lds_blocks); 44957ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class == GFX7 && 44967ec681f3Smrg pipeline->device->physical_device->rad_info.family != CHIP_HAWAII) 44977ec681f3Smrg radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2); 449801e04c3fSmrg 44997ec681f3Smrg radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2); 45007ec681f3Smrg radeon_emit(cs, shader->config.rsrc1); 45017ec681f3Smrg radeon_emit(cs, rsrc2); 450201e04c3fSmrg} 450301e04c3fSmrg 450401e04c3fSmrgstatic void 45057ec681f3Smrgradv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 45067ec681f3Smrg const struct radv_pipeline *pipeline, 45077ec681f3Smrg const struct radv_shader_variant *shader) 45087ec681f3Smrg{ 45097ec681f3Smrg uint64_t va = radv_shader_variant_get_va(shader); 45107ec681f3Smrg gl_shader_stage es_type = 45117ec681f3Smrg radv_pipeline_has_tess(pipeline) ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; 45127ec681f3Smrg struct radv_shader_variant *es = es_type == MESA_SHADER_TESS_EVAL 45137ec681f3Smrg ? pipeline->shaders[MESA_SHADER_TESS_EVAL] 45147ec681f3Smrg : pipeline->shaders[MESA_SHADER_VERTEX]; 45157ec681f3Smrg const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info; 45167ec681f3Smrg 45177ec681f3Smrg radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); 45187ec681f3Smrg 45197ec681f3Smrg radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2); 45207ec681f3Smrg radeon_emit(cs, shader->config.rsrc1); 45217ec681f3Smrg radeon_emit(cs, shader->config.rsrc2); 45227ec681f3Smrg 45237ec681f3Smrg const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); 45247ec681f3Smrg unsigned clip_dist_mask, cull_dist_mask, total_mask; 45257ec681f3Smrg clip_dist_mask = outinfo->clip_dist_mask; 45267ec681f3Smrg cull_dist_mask = outinfo->cull_dist_mask; 45277ec681f3Smrg total_mask = clip_dist_mask | cull_dist_mask; 45287ec681f3Smrg 45297ec681f3Smrg bool writes_primitive_shading_rate = 45307ec681f3Smrg outinfo->writes_primitive_shading_rate || pipeline->device->force_vrs != RADV_FORCE_VRS_NONE; 45317ec681f3Smrg bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer || 45327ec681f3Smrg outinfo->writes_viewport_index || writes_primitive_shading_rate; 45337ec681f3Smrg bool es_enable_prim_id = outinfo->export_prim_id || (es && es->info.uses_prim_id); 45347ec681f3Smrg bool break_wave_at_eoi = false; 45357ec681f3Smrg unsigned ge_cntl; 45367ec681f3Smrg unsigned nparams; 45377ec681f3Smrg 45387ec681f3Smrg if (es_type == MESA_SHADER_TESS_EVAL) { 45397ec681f3Smrg struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY]; 45407ec681f3Smrg 45417ec681f3Smrg if (es_enable_prim_id || (gs && gs->info.uses_prim_id)) 45427ec681f3Smrg break_wave_at_eoi = true; 45437ec681f3Smrg } 45447ec681f3Smrg 45457ec681f3Smrg nparams = MAX2(outinfo->param_exports, 1); 45467ec681f3Smrg radeon_set_context_reg( 45477ec681f3Smrg ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG, 45487ec681f3Smrg S_0286C4_VS_EXPORT_COUNT(nparams - 1) | S_0286C4_NO_PC_EXPORT(outinfo->param_exports == 0)); 45497ec681f3Smrg 45507ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028708_SPI_SHADER_IDX_FORMAT, 45517ec681f3Smrg S_028708_IDX0_EXPORT_FORMAT(V_028708_SPI_SHADER_1COMP)); 45527ec681f3Smrg radeon_set_context_reg( 45537ec681f3Smrg ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT, 45547ec681f3Smrg S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | 45557ec681f3Smrg S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP 45567ec681f3Smrg : V_02870C_SPI_SHADER_NONE) | 45577ec681f3Smrg S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP 45587ec681f3Smrg : V_02870C_SPI_SHADER_NONE) | 45597ec681f3Smrg S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP 45607ec681f3Smrg : V_02870C_SPI_SHADER_NONE)); 45617ec681f3Smrg 45627ec681f3Smrg radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL, 45637ec681f3Smrg S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | 45647ec681f3Smrg S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | 45657ec681f3Smrg S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | 45667ec681f3Smrg S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) | 45677ec681f3Smrg S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | 45687ec681f3Smrg S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | 45697ec681f3Smrg S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | 45707ec681f3Smrg S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | 45717ec681f3Smrg total_mask << 8 | clip_dist_mask); 45727ec681f3Smrg 45737ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, 45747ec681f3Smrg S_028A84_PRIMITIVEID_EN(es_enable_prim_id) | 45757ec681f3Smrg S_028A84_NGG_DISABLE_PROVOK_REUSE(outinfo->export_prim_id)); 45767ec681f3Smrg 45777ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 45787ec681f3Smrg ngg_state->vgt_esgs_ring_itemsize); 45797ec681f3Smrg 45807ec681f3Smrg /* NGG specific registers. */ 45817ec681f3Smrg struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY]; 45827ec681f3Smrg uint32_t gs_num_invocations = gs ? gs->info.gs.invocations : 1; 45837ec681f3Smrg 45847ec681f3Smrg radeon_set_context_reg( 45857ec681f3Smrg ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, 45867ec681f3Smrg S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) | 45877ec681f3Smrg S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) | 45887ec681f3Smrg S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations)); 45897ec681f3Smrg radeon_set_context_reg(ctx_cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, 45907ec681f3Smrg S_0287FC_MAX_VERTS_PER_SUBGROUP(ngg_state->max_out_verts)); 45917ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028B4C_GE_NGG_SUBGRP_CNTL, 45927ec681f3Smrg S_028B4C_PRIM_AMP_FACTOR(ngg_state->prim_amp_factor) | 45937ec681f3Smrg S_028B4C_THDS_PER_SUBGRP(0)); /* for fast launch */ 45947ec681f3Smrg radeon_set_context_reg( 45957ec681f3Smrg ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT, 45967ec681f3Smrg S_028B90_CNT(gs_num_invocations) | S_028B90_ENABLE(gs_num_invocations > 1) | 45977ec681f3Smrg S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(ngg_state->max_vert_out_per_gs_instance)); 45987ec681f3Smrg 45997ec681f3Smrg ge_cntl = S_03096C_PRIM_GRP_SIZE(ngg_state->max_gsprims) | 46007ec681f3Smrg S_03096C_VERT_GRP_SIZE(ngg_state->enable_vertex_grouping ? ngg_state->hw_max_esverts : 256) | /* 256 = disable vertex grouping */ 46017ec681f3Smrg S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi); 46027ec681f3Smrg 46037ec681f3Smrg /* Bug workaround for a possible hang with non-tessellation cases. 46047ec681f3Smrg * Tessellation always sets GE_CNTL.VERT_GRP_SIZE = 0 46057ec681f3Smrg * 46067ec681f3Smrg * Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5 46077ec681f3Smrg */ 46087ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class == GFX10 && 46097ec681f3Smrg !radv_pipeline_has_tess(pipeline) && ngg_state->hw_max_esverts != 256) { 46107ec681f3Smrg ge_cntl &= C_03096C_VERT_GRP_SIZE; 46117ec681f3Smrg 46127ec681f3Smrg if (ngg_state->hw_max_esverts > 5) { 46137ec681f3Smrg ge_cntl |= S_03096C_VERT_GRP_SIZE(ngg_state->hw_max_esverts - 5); 46147ec681f3Smrg } 46157ec681f3Smrg } 46167ec681f3Smrg 46177ec681f3Smrg radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL, ge_cntl); 46187ec681f3Smrg 46197ec681f3Smrg unsigned late_alloc_wave64, cu_mask; 46207ec681f3Smrg ac_compute_late_alloc(&pipeline->device->physical_device->rad_info, true, shader->info.has_ngg_culling, 46217ec681f3Smrg shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask); 46227ec681f3Smrg 46237ec681f3Smrg radeon_set_sh_reg_idx( 46247ec681f3Smrg pipeline->device->physical_device, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, 46257ec681f3Smrg S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F)); 46267ec681f3Smrg radeon_set_sh_reg_idx( 46277ec681f3Smrg pipeline->device->physical_device, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3, 46287ec681f3Smrg S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64)); 46297ec681f3Smrg 46307ec681f3Smrg uint32_t oversub_pc_lines = late_alloc_wave64 ? pipeline->device->physical_device->rad_info.pc_lines / 4 : 0; 46317ec681f3Smrg if (shader->info.has_ngg_culling) { 46327ec681f3Smrg unsigned oversub_factor = 2; 46337ec681f3Smrg 46347ec681f3Smrg if (outinfo->param_exports > 4) 46357ec681f3Smrg oversub_factor = 4; 46367ec681f3Smrg else if (outinfo->param_exports > 2) 46377ec681f3Smrg oversub_factor = 3; 46387ec681f3Smrg 46397ec681f3Smrg oversub_pc_lines *= oversub_factor; 46407ec681f3Smrg } 46417ec681f3Smrg 46427ec681f3Smrg gfx10_emit_ge_pc_alloc(cs, pipeline->device->physical_device->rad_info.chip_class, oversub_pc_lines); 464301e04c3fSmrg} 464401e04c3fSmrg 464501e04c3fSmrgstatic void 46467ec681f3Smrgradv_pipeline_generate_hw_hs(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline, 46477ec681f3Smrg const struct radv_shader_variant *shader) 464801e04c3fSmrg{ 46497ec681f3Smrg uint64_t va = radv_shader_variant_get_va(shader); 46507ec681f3Smrg 46517ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) { 46527ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { 46537ec681f3Smrg radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); 46547ec681f3Smrg } else { 46557ec681f3Smrg radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8); 46567ec681f3Smrg } 46577ec681f3Smrg 46587ec681f3Smrg radeon_set_sh_reg_seq(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2); 46597ec681f3Smrg radeon_emit(cs, shader->config.rsrc1); 46607ec681f3Smrg radeon_emit(cs, shader->config.rsrc2); 46617ec681f3Smrg } else { 46627ec681f3Smrg radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4); 46637ec681f3Smrg radeon_emit(cs, va >> 8); 46647ec681f3Smrg radeon_emit(cs, S_00B424_MEM_BASE(va >> 40)); 46657ec681f3Smrg radeon_emit(cs, shader->config.rsrc1); 46667ec681f3Smrg radeon_emit(cs, shader->config.rsrc2); 46677ec681f3Smrg } 46687ec681f3Smrg} 466901e04c3fSmrg 46707ec681f3Smrgstatic void 46717ec681f3Smrgradv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 46727ec681f3Smrg const struct radv_pipeline *pipeline) 46737ec681f3Smrg{ 46747ec681f3Smrg struct radv_shader_variant *vs; 46757ec681f3Smrg 46767ec681f3Smrg /* Skip shaders merged into HS/GS */ 46777ec681f3Smrg vs = pipeline->shaders[MESA_SHADER_VERTEX]; 46787ec681f3Smrg if (!vs) 46797ec681f3Smrg return; 46807ec681f3Smrg 46817ec681f3Smrg if (vs->info.vs.as_ls) 46827ec681f3Smrg radv_pipeline_generate_hw_ls(cs, pipeline, vs); 46837ec681f3Smrg else if (vs->info.vs.as_es) 46847ec681f3Smrg radv_pipeline_generate_hw_es(cs, pipeline, vs); 46857ec681f3Smrg else if (vs->info.is_ngg) 46867ec681f3Smrg radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, vs); 46877ec681f3Smrg else 46887ec681f3Smrg radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, vs); 468901e04c3fSmrg} 469001e04c3fSmrg 469101e04c3fSmrgstatic void 46927ec681f3Smrgradv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 46937ec681f3Smrg const struct radv_pipeline *pipeline) 469401e04c3fSmrg{ 46957ec681f3Smrg struct radv_shader_variant *tes, *tcs; 46967ec681f3Smrg 46977ec681f3Smrg tcs = pipeline->shaders[MESA_SHADER_TESS_CTRL]; 46987ec681f3Smrg tes = pipeline->shaders[MESA_SHADER_TESS_EVAL]; 46997ec681f3Smrg 47007ec681f3Smrg if (tes) { 47017ec681f3Smrg if (tes->info.is_ngg) { 47027ec681f3Smrg radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, tes); 47037ec681f3Smrg } else if (tes->info.tes.as_es) 47047ec681f3Smrg radv_pipeline_generate_hw_es(cs, pipeline, tes); 47057ec681f3Smrg else 47067ec681f3Smrg radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, tes); 47077ec681f3Smrg } 47087ec681f3Smrg 47097ec681f3Smrg radv_pipeline_generate_hw_hs(cs, pipeline, tcs); 47107ec681f3Smrg 47117ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 && 47127ec681f3Smrg !radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) { 47137ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, 47147ec681f3Smrg S_028A44_ES_VERTS_PER_SUBGRP(250) | S_028A44_GS_PRIMS_PER_SUBGRP(126) | 47157ec681f3Smrg S_028A44_GS_INST_PRIMS_IN_SUBGRP(126)); 47167ec681f3Smrg } 47177ec681f3Smrg} 471801e04c3fSmrg 47197ec681f3Smrgstatic void 47207ec681f3Smrgradv_pipeline_generate_tess_state(struct radeon_cmdbuf *ctx_cs, 47217ec681f3Smrg const struct radv_pipeline *pipeline, 47227ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 47237ec681f3Smrg{ 47247ec681f3Smrg struct radv_shader_variant *tes = radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL); 47257ec681f3Smrg unsigned type = 0, partitioning = 0, topology = 0, distribution_mode = 0; 47267ec681f3Smrg unsigned num_tcs_input_cp, num_tcs_output_cp, num_patches; 47277ec681f3Smrg unsigned ls_hs_config; 47287ec681f3Smrg 47297ec681f3Smrg num_tcs_input_cp = pCreateInfo->pTessellationState->patchControlPoints; 47307ec681f3Smrg num_tcs_output_cp = 47317ec681f3Smrg pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.tcs_vertices_out; // TCS VERTICES OUT 47327ec681f3Smrg num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches; 47337ec681f3Smrg 47347ec681f3Smrg ls_hs_config = S_028B58_NUM_PATCHES(num_patches) | S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) | 47357ec681f3Smrg S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp); 47367ec681f3Smrg 47377ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) { 47387ec681f3Smrg radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); 47397ec681f3Smrg } else { 47407ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config); 47417ec681f3Smrg } 47427ec681f3Smrg 47437ec681f3Smrg switch (tes->info.tes.primitive_mode) { 47447ec681f3Smrg case GL_TRIANGLES: 47457ec681f3Smrg type = V_028B6C_TESS_TRIANGLE; 47467ec681f3Smrg break; 47477ec681f3Smrg case GL_QUADS: 47487ec681f3Smrg type = V_028B6C_TESS_QUAD; 47497ec681f3Smrg break; 47507ec681f3Smrg case GL_ISOLINES: 47517ec681f3Smrg type = V_028B6C_TESS_ISOLINE; 47527ec681f3Smrg break; 47537ec681f3Smrg } 47547ec681f3Smrg 47557ec681f3Smrg switch (tes->info.tes.spacing) { 47567ec681f3Smrg case TESS_SPACING_EQUAL: 47577ec681f3Smrg partitioning = V_028B6C_PART_INTEGER; 47587ec681f3Smrg break; 47597ec681f3Smrg case TESS_SPACING_FRACTIONAL_ODD: 47607ec681f3Smrg partitioning = V_028B6C_PART_FRAC_ODD; 47617ec681f3Smrg break; 47627ec681f3Smrg case TESS_SPACING_FRACTIONAL_EVEN: 47637ec681f3Smrg partitioning = V_028B6C_PART_FRAC_EVEN; 47647ec681f3Smrg break; 47657ec681f3Smrg default: 47667ec681f3Smrg break; 47677ec681f3Smrg } 47687ec681f3Smrg 47697ec681f3Smrg bool ccw = tes->info.tes.ccw; 47707ec681f3Smrg const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state = 47717ec681f3Smrg vk_find_struct_const(pCreateInfo->pTessellationState, 47727ec681f3Smrg PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO); 47737ec681f3Smrg 47747ec681f3Smrg if (domain_origin_state && 47757ec681f3Smrg domain_origin_state->domainOrigin != VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT) 47767ec681f3Smrg ccw = !ccw; 47777ec681f3Smrg 47787ec681f3Smrg if (tes->info.tes.point_mode) 47797ec681f3Smrg topology = V_028B6C_OUTPUT_POINT; 47807ec681f3Smrg else if (tes->info.tes.primitive_mode == GL_ISOLINES) 47817ec681f3Smrg topology = V_028B6C_OUTPUT_LINE; 47827ec681f3Smrg else if (ccw) 47837ec681f3Smrg topology = V_028B6C_OUTPUT_TRIANGLE_CCW; 47847ec681f3Smrg else 47857ec681f3Smrg topology = V_028B6C_OUTPUT_TRIANGLE_CW; 47867ec681f3Smrg 47877ec681f3Smrg if (pipeline->device->physical_device->rad_info.has_distributed_tess) { 47887ec681f3Smrg if (pipeline->device->physical_device->rad_info.family == CHIP_FIJI || 47897ec681f3Smrg pipeline->device->physical_device->rad_info.family >= CHIP_POLARIS10) 47907ec681f3Smrg distribution_mode = V_028B6C_TRAPEZOIDS; 47917ec681f3Smrg else 47927ec681f3Smrg distribution_mode = V_028B6C_DONUTS; 47937ec681f3Smrg } else 47947ec681f3Smrg distribution_mode = V_028B6C_NO_DIST; 47957ec681f3Smrg 47967ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM, 47977ec681f3Smrg S_028B6C_TYPE(type) | S_028B6C_PARTITIONING(partitioning) | 47987ec681f3Smrg S_028B6C_TOPOLOGY(topology) | 47997ec681f3Smrg S_028B6C_DISTRIBUTION_MODE(distribution_mode)); 48007ec681f3Smrg} 480101e04c3fSmrg 48027ec681f3Smrgstatic void 48037ec681f3Smrgradv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 48047ec681f3Smrg const struct radv_pipeline *pipeline, 48057ec681f3Smrg const struct radv_shader_variant *gs) 48067ec681f3Smrg{ 48077ec681f3Smrg const struct gfx9_gs_info *gs_state = &gs->info.gs_ring_info; 48087ec681f3Smrg unsigned gs_max_out_vertices; 48097ec681f3Smrg const uint8_t *num_components; 48107ec681f3Smrg uint8_t max_stream; 48117ec681f3Smrg unsigned offset; 48127ec681f3Smrg uint64_t va; 48137ec681f3Smrg 48147ec681f3Smrg gs_max_out_vertices = gs->info.gs.vertices_out; 48157ec681f3Smrg max_stream = gs->info.gs.max_stream; 48167ec681f3Smrg num_components = gs->info.gs.num_stream_output_components; 48177ec681f3Smrg 48187ec681f3Smrg offset = num_components[0] * gs_max_out_vertices; 48197ec681f3Smrg 48207ec681f3Smrg radeon_set_context_reg_seq(ctx_cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3); 48217ec681f3Smrg radeon_emit(ctx_cs, offset); 48227ec681f3Smrg if (max_stream >= 1) 48237ec681f3Smrg offset += num_components[1] * gs_max_out_vertices; 48247ec681f3Smrg radeon_emit(ctx_cs, offset); 48257ec681f3Smrg if (max_stream >= 2) 48267ec681f3Smrg offset += num_components[2] * gs_max_out_vertices; 48277ec681f3Smrg radeon_emit(ctx_cs, offset); 48287ec681f3Smrg if (max_stream >= 3) 48297ec681f3Smrg offset += num_components[3] * gs_max_out_vertices; 48307ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset); 48317ec681f3Smrg 48327ec681f3Smrg radeon_set_context_reg_seq(ctx_cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4); 48337ec681f3Smrg radeon_emit(ctx_cs, num_components[0]); 48347ec681f3Smrg radeon_emit(ctx_cs, (max_stream >= 1) ? num_components[1] : 0); 48357ec681f3Smrg radeon_emit(ctx_cs, (max_stream >= 2) ? num_components[2] : 0); 48367ec681f3Smrg radeon_emit(ctx_cs, (max_stream >= 3) ? num_components[3] : 0); 48377ec681f3Smrg 48387ec681f3Smrg uint32_t gs_num_invocations = gs->info.gs.invocations; 48397ec681f3Smrg radeon_set_context_reg( 48407ec681f3Smrg ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT, 48417ec681f3Smrg S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0)); 48427ec681f3Smrg 48437ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 48447ec681f3Smrg gs_state->vgt_esgs_ring_itemsize); 48457ec681f3Smrg 48467ec681f3Smrg va = radv_shader_variant_get_va(gs); 48477ec681f3Smrg 48487ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) { 48497ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { 48507ec681f3Smrg radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); 48517ec681f3Smrg } else { 48527ec681f3Smrg radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8); 48537ec681f3Smrg } 48547ec681f3Smrg 48557ec681f3Smrg radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2); 48567ec681f3Smrg radeon_emit(cs, gs->config.rsrc1); 48577ec681f3Smrg radeon_emit(cs, gs->config.rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size)); 48587ec681f3Smrg 48597ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl); 48607ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, 48617ec681f3Smrg gs_state->vgt_gs_max_prims_per_subgroup); 48627ec681f3Smrg } else { 48637ec681f3Smrg radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4); 48647ec681f3Smrg radeon_emit(cs, va >> 8); 48657ec681f3Smrg radeon_emit(cs, S_00B224_MEM_BASE(va >> 40)); 48667ec681f3Smrg radeon_emit(cs, gs->config.rsrc1); 48677ec681f3Smrg radeon_emit(cs, gs->config.rsrc2); 48687ec681f3Smrg } 48697ec681f3Smrg 48707ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) { 48717ec681f3Smrg radeon_set_sh_reg_idx( 48727ec681f3Smrg pipeline->device->physical_device, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, 48737ec681f3Smrg S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F)); 48747ec681f3Smrg 48757ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { 48767ec681f3Smrg radeon_set_sh_reg_idx( 48777ec681f3Smrg pipeline->device->physical_device, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3, 48787ec681f3Smrg S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0)); 48797ec681f3Smrg } 48807ec681f3Smrg } 48817ec681f3Smrg 48827ec681f3Smrg radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader); 48837ec681f3Smrg} 488401e04c3fSmrg 48857ec681f3Smrgstatic void 48867ec681f3Smrgradv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 48877ec681f3Smrg const struct radv_pipeline *pipeline) 48887ec681f3Smrg{ 48897ec681f3Smrg struct radv_shader_variant *gs; 489001e04c3fSmrg 48917ec681f3Smrg gs = pipeline->shaders[MESA_SHADER_GEOMETRY]; 48927ec681f3Smrg if (!gs) 48937ec681f3Smrg return; 489401e04c3fSmrg 48957ec681f3Smrg if (gs->info.is_ngg) 48967ec681f3Smrg radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, gs); 48977ec681f3Smrg else 48987ec681f3Smrg radv_pipeline_generate_hw_gs(ctx_cs, cs, pipeline, gs); 489901e04c3fSmrg 49007ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out); 490101e04c3fSmrg} 490201e04c3fSmrg 49037ec681f3Smrgstatic uint32_t 49047ec681f3Smrgoffset_to_ps_input(uint32_t offset, bool flat_shade, bool explicit, bool float16) 49057ec681f3Smrg{ 49067ec681f3Smrg uint32_t ps_input_cntl; 49077ec681f3Smrg if (offset <= AC_EXP_PARAM_OFFSET_31) { 49087ec681f3Smrg ps_input_cntl = S_028644_OFFSET(offset); 49097ec681f3Smrg if (flat_shade || explicit) 49107ec681f3Smrg ps_input_cntl |= S_028644_FLAT_SHADE(1); 49117ec681f3Smrg if (explicit) { 49127ec681f3Smrg /* Force parameter cache to be read in passthrough 49137ec681f3Smrg * mode. 49147ec681f3Smrg */ 49157ec681f3Smrg ps_input_cntl |= S_028644_OFFSET(1 << 5); 49167ec681f3Smrg } 49177ec681f3Smrg if (float16) { 49187ec681f3Smrg ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) | S_028644_ATTR0_VALID(1); 49197ec681f3Smrg } 49207ec681f3Smrg } else { 49217ec681f3Smrg /* The input is a DEFAULT_VAL constant. */ 49227ec681f3Smrg assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && offset <= AC_EXP_PARAM_DEFAULT_VAL_1111); 49237ec681f3Smrg offset -= AC_EXP_PARAM_DEFAULT_VAL_0000; 49247ec681f3Smrg ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset); 49257ec681f3Smrg } 49267ec681f3Smrg return ps_input_cntl; 492701e04c3fSmrg} 492801e04c3fSmrg 492901e04c3fSmrgstatic void 49307ec681f3Smrgradv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, const struct radv_pipeline *pipeline) 49317ec681f3Smrg{ 49327ec681f3Smrg struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; 49337ec681f3Smrg const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); 49347ec681f3Smrg uint32_t ps_input_cntl[32]; 49357ec681f3Smrg 49367ec681f3Smrg unsigned ps_offset = 0; 49377ec681f3Smrg 49387ec681f3Smrg if (ps->info.ps.prim_id_input) { 49397ec681f3Smrg unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID]; 49407ec681f3Smrg if (vs_offset != AC_EXP_PARAM_UNDEFINED) { 49417ec681f3Smrg ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false); 49427ec681f3Smrg ++ps_offset; 49437ec681f3Smrg } 49447ec681f3Smrg } 49457ec681f3Smrg 49467ec681f3Smrg if (ps->info.ps.layer_input) { 49477ec681f3Smrg unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER]; 49487ec681f3Smrg if (vs_offset != AC_EXP_PARAM_UNDEFINED) 49497ec681f3Smrg ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false); 49507ec681f3Smrg else 49517ec681f3Smrg ps_input_cntl[ps_offset] = 49527ec681f3Smrg offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false, false); 49537ec681f3Smrg ++ps_offset; 49547ec681f3Smrg } 49557ec681f3Smrg 49567ec681f3Smrg if (ps->info.ps.viewport_index_input) { 49577ec681f3Smrg unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VIEWPORT]; 49587ec681f3Smrg if (vs_offset != AC_EXP_PARAM_UNDEFINED) 49597ec681f3Smrg ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false); 49607ec681f3Smrg else 49617ec681f3Smrg ps_input_cntl[ps_offset] = 49627ec681f3Smrg offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false, false); 49637ec681f3Smrg ++ps_offset; 49647ec681f3Smrg } 49657ec681f3Smrg 49667ec681f3Smrg if (ps->info.ps.has_pcoord) { 49677ec681f3Smrg unsigned val; 49687ec681f3Smrg val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20); 49697ec681f3Smrg ps_input_cntl[ps_offset] = val; 49707ec681f3Smrg ps_offset++; 49717ec681f3Smrg } 49727ec681f3Smrg 49737ec681f3Smrg if (ps->info.ps.num_input_clips_culls) { 49747ec681f3Smrg unsigned vs_offset; 49757ec681f3Smrg 49767ec681f3Smrg vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0]; 49777ec681f3Smrg if (vs_offset != AC_EXP_PARAM_UNDEFINED) { 49787ec681f3Smrg ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false, false); 49797ec681f3Smrg ++ps_offset; 49807ec681f3Smrg } 49817ec681f3Smrg 49827ec681f3Smrg vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1]; 49837ec681f3Smrg if (vs_offset != AC_EXP_PARAM_UNDEFINED && ps->info.ps.num_input_clips_culls > 4) { 49847ec681f3Smrg ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false, false); 49857ec681f3Smrg ++ps_offset; 49867ec681f3Smrg } 49877ec681f3Smrg } 49887ec681f3Smrg 49897ec681f3Smrg for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.ps.input_mask; ++i) { 49907ec681f3Smrg unsigned vs_offset; 49917ec681f3Smrg bool flat_shade; 49927ec681f3Smrg bool explicit; 49937ec681f3Smrg bool float16; 49947ec681f3Smrg if (!(ps->info.ps.input_mask & (1u << i))) 49957ec681f3Smrg continue; 49967ec681f3Smrg 49977ec681f3Smrg vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i]; 49987ec681f3Smrg if (vs_offset == AC_EXP_PARAM_UNDEFINED) { 49997ec681f3Smrg ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20); 50007ec681f3Smrg ++ps_offset; 50017ec681f3Smrg continue; 50027ec681f3Smrg } 50037ec681f3Smrg 50047ec681f3Smrg flat_shade = !!(ps->info.ps.flat_shaded_mask & (1u << ps_offset)); 50057ec681f3Smrg explicit = !!(ps->info.ps.explicit_shaded_mask & (1u << ps_offset)); 50067ec681f3Smrg float16 = !!(ps->info.ps.float16_shaded_mask & (1u << ps_offset)); 50077ec681f3Smrg 50087ec681f3Smrg ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, explicit, float16); 50097ec681f3Smrg ++ps_offset; 50107ec681f3Smrg } 50117ec681f3Smrg 50127ec681f3Smrg if (ps_offset) { 50137ec681f3Smrg radeon_set_context_reg_seq(ctx_cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset); 50147ec681f3Smrg for (unsigned i = 0; i < ps_offset; i++) { 50157ec681f3Smrg radeon_emit(ctx_cs, ps_input_cntl[i]); 50167ec681f3Smrg } 50177ec681f3Smrg } 501801e04c3fSmrg} 501901e04c3fSmrg 502001e04c3fSmrgstatic uint32_t 502101e04c3fSmrgradv_compute_db_shader_control(const struct radv_device *device, 50227ec681f3Smrg const struct radv_pipeline *pipeline, 502301e04c3fSmrg const struct radv_shader_variant *ps) 502401e04c3fSmrg{ 50257ec681f3Smrg unsigned conservative_z_export = V_02880C_EXPORT_ANY_Z; 50267ec681f3Smrg unsigned z_order; 50277ec681f3Smrg if (ps->info.ps.early_fragment_test || !ps->info.ps.writes_memory) 50287ec681f3Smrg z_order = V_02880C_EARLY_Z_THEN_LATE_Z; 50297ec681f3Smrg else 50307ec681f3Smrg z_order = V_02880C_LATE_Z; 50317ec681f3Smrg 50327ec681f3Smrg if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_GREATER) 50337ec681f3Smrg conservative_z_export = V_02880C_EXPORT_GREATER_THAN_Z; 50347ec681f3Smrg else if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_LESS) 50357ec681f3Smrg conservative_z_export = V_02880C_EXPORT_LESS_THAN_Z; 50367ec681f3Smrg 50377ec681f3Smrg bool disable_rbplus = device->physical_device->rad_info.has_rbplus && 50387ec681f3Smrg !device->physical_device->rad_info.rbplus_allowed; 50397ec681f3Smrg 50407ec681f3Smrg /* It shouldn't be needed to export gl_SampleMask when MSAA is disabled 50417ec681f3Smrg * but this appears to break Project Cars (DXVK). See 50427ec681f3Smrg * https://bugs.freedesktop.org/show_bug.cgi?id=109401 50437ec681f3Smrg */ 50447ec681f3Smrg bool mask_export_enable = ps->info.ps.writes_sample_mask; 50457ec681f3Smrg 50467ec681f3Smrg return S_02880C_Z_EXPORT_ENABLE(ps->info.ps.writes_z) | 50477ec681f3Smrg S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.ps.writes_stencil) | 50487ec681f3Smrg S_02880C_KILL_ENABLE(!!ps->info.ps.can_discard) | 50497ec681f3Smrg S_02880C_MASK_EXPORT_ENABLE(mask_export_enable) | 50507ec681f3Smrg S_02880C_CONSERVATIVE_Z_EXPORT(conservative_z_export) | S_02880C_Z_ORDER(z_order) | 50517ec681f3Smrg S_02880C_DEPTH_BEFORE_SHADER(ps->info.ps.early_fragment_test) | 50527ec681f3Smrg S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(ps->info.ps.post_depth_coverage) | 50537ec681f3Smrg S_02880C_EXEC_ON_HIER_FAIL(ps->info.ps.writes_memory) | 50547ec681f3Smrg S_02880C_EXEC_ON_NOOP(ps->info.ps.writes_memory) | 50557ec681f3Smrg S_02880C_DUAL_QUAD_DISABLE(disable_rbplus); 505601e04c3fSmrg} 505701e04c3fSmrg 505801e04c3fSmrgstatic void 50597ec681f3Smrgradv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 50607ec681f3Smrg struct radv_pipeline *pipeline) 506101e04c3fSmrg{ 50627ec681f3Smrg struct radv_shader_variant *ps; 50637ec681f3Smrg uint64_t va; 50647ec681f3Smrg assert(pipeline->shaders[MESA_SHADER_FRAGMENT]); 506501e04c3fSmrg 50667ec681f3Smrg ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; 50677ec681f3Smrg va = radv_shader_variant_get_va(ps); 506801e04c3fSmrg 50697ec681f3Smrg radeon_set_sh_reg_seq(cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4); 50707ec681f3Smrg radeon_emit(cs, va >> 8); 50717ec681f3Smrg radeon_emit(cs, S_00B024_MEM_BASE(va >> 40)); 50727ec681f3Smrg radeon_emit(cs, ps->config.rsrc1); 50737ec681f3Smrg radeon_emit(cs, ps->config.rsrc2); 507401e04c3fSmrg 50757ec681f3Smrg radeon_set_context_reg(ctx_cs, R_02880C_DB_SHADER_CONTROL, 50767ec681f3Smrg radv_compute_db_shader_control(pipeline->device, pipeline, ps)); 507701e04c3fSmrg 50787ec681f3Smrg radeon_set_context_reg_seq(ctx_cs, R_0286CC_SPI_PS_INPUT_ENA, 2); 50797ec681f3Smrg radeon_emit(ctx_cs, ps->config.spi_ps_input_ena); 50807ec681f3Smrg radeon_emit(ctx_cs, ps->config.spi_ps_input_addr); 508101e04c3fSmrg 50827ec681f3Smrg radeon_set_context_reg( 50837ec681f3Smrg ctx_cs, R_0286D8_SPI_PS_IN_CONTROL, 50847ec681f3Smrg S_0286D8_NUM_INTERP(ps->info.ps.num_interp) | S_0286D8_PS_W32_EN(ps->info.wave_size == 32)); 508501e04c3fSmrg 50867ec681f3Smrg radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl); 508701e04c3fSmrg 50887ec681f3Smrg radeon_set_context_reg( 50897ec681f3Smrg ctx_cs, R_028710_SPI_SHADER_Z_FORMAT, 50907ec681f3Smrg ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil, 50917ec681f3Smrg ps->info.ps.writes_sample_mask)); 50927ec681f3Smrg} 509301e04c3fSmrg 50947ec681f3Smrgstatic void 50957ec681f3Smrgradv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *ctx_cs, 50967ec681f3Smrg const struct radv_pipeline *pipeline) 50977ec681f3Smrg{ 50987ec681f3Smrg if (pipeline->device->physical_device->rad_info.family < CHIP_POLARIS10 || 50997ec681f3Smrg pipeline->device->physical_device->rad_info.chip_class >= GFX10) 51007ec681f3Smrg return; 51017ec681f3Smrg 51027ec681f3Smrg unsigned vtx_reuse_depth = 30; 51037ec681f3Smrg if (radv_pipeline_has_tess(pipeline) && 51047ec681f3Smrg radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.tes.spacing == 51057ec681f3Smrg TESS_SPACING_FRACTIONAL_ODD) { 51067ec681f3Smrg vtx_reuse_depth = 14; 51077ec681f3Smrg } 51087ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 51097ec681f3Smrg S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth)); 51107ec681f3Smrg} 511101e04c3fSmrg 51127ec681f3Smrgstatic void 51137ec681f3Smrgradv_pipeline_generate_vgt_shader_config(struct radeon_cmdbuf *ctx_cs, 51147ec681f3Smrg const struct radv_pipeline *pipeline) 51157ec681f3Smrg{ 51167ec681f3Smrg uint32_t stages = 0; 51177ec681f3Smrg if (radv_pipeline_has_tess(pipeline)) { 51187ec681f3Smrg stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1); 51197ec681f3Smrg 51207ec681f3Smrg if (radv_pipeline_has_gs(pipeline)) 51217ec681f3Smrg stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | S_028B54_GS_EN(1); 51227ec681f3Smrg else if (radv_pipeline_has_ngg(pipeline)) 51237ec681f3Smrg stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS); 51247ec681f3Smrg else 51257ec681f3Smrg stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS); 51267ec681f3Smrg } else if (radv_pipeline_has_gs(pipeline)) { 51277ec681f3Smrg stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1); 51287ec681f3Smrg } else if (radv_pipeline_has_ngg(pipeline)) { 51297ec681f3Smrg stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL); 51307ec681f3Smrg } 51317ec681f3Smrg 51327ec681f3Smrg if (radv_pipeline_has_ngg(pipeline)) { 51337ec681f3Smrg stages |= S_028B54_PRIMGEN_EN(1); 51347ec681f3Smrg if (pipeline->streamout_shader) 51357ec681f3Smrg stages |= S_028B54_NGG_WAVE_ID_EN(1); 51367ec681f3Smrg if (radv_pipeline_has_ngg_passthrough(pipeline)) 51377ec681f3Smrg stages |= S_028B54_PRIMGEN_PASSTHRU_EN(1); 51387ec681f3Smrg } else if (radv_pipeline_has_gs(pipeline)) { 51397ec681f3Smrg stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); 51407ec681f3Smrg } 51417ec681f3Smrg 51427ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) 51437ec681f3Smrg stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2); 51447ec681f3Smrg 51457ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { 51467ec681f3Smrg uint8_t hs_size = 64, gs_size = 64, vs_size = 64; 51477ec681f3Smrg 51487ec681f3Smrg if (radv_pipeline_has_tess(pipeline)) 51497ec681f3Smrg hs_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.wave_size; 51507ec681f3Smrg 51517ec681f3Smrg if (pipeline->shaders[MESA_SHADER_GEOMETRY]) { 51527ec681f3Smrg vs_size = gs_size = pipeline->shaders[MESA_SHADER_GEOMETRY]->info.wave_size; 51537ec681f3Smrg if (radv_pipeline_has_gs_copy_shader(pipeline)) 51547ec681f3Smrg vs_size = pipeline->gs_copy_shader->info.wave_size; 51557ec681f3Smrg } else if (pipeline->shaders[MESA_SHADER_TESS_EVAL]) 51567ec681f3Smrg vs_size = pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.wave_size; 51577ec681f3Smrg else if (pipeline->shaders[MESA_SHADER_VERTEX]) 51587ec681f3Smrg vs_size = pipeline->shaders[MESA_SHADER_VERTEX]->info.wave_size; 51597ec681f3Smrg 51607ec681f3Smrg if (radv_pipeline_has_ngg(pipeline)) { 51617ec681f3Smrg assert(!radv_pipeline_has_gs_copy_shader(pipeline)); 51627ec681f3Smrg gs_size = vs_size; 51637ec681f3Smrg } 51647ec681f3Smrg 51657ec681f3Smrg /* legacy GS only supports Wave64 */ 51667ec681f3Smrg stages |= S_028B54_HS_W32_EN(hs_size == 32 ? 1 : 0) | 51677ec681f3Smrg S_028B54_GS_W32_EN(gs_size == 32 ? 1 : 0) | 51687ec681f3Smrg S_028B54_VS_W32_EN(vs_size == 32 ? 1 : 0); 51697ec681f3Smrg } 51707ec681f3Smrg 51717ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, stages); 517201e04c3fSmrg} 517301e04c3fSmrg 517401e04c3fSmrgstatic void 51757ec681f3Smrgradv_pipeline_generate_cliprect_rule(struct radeon_cmdbuf *ctx_cs, 51767ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 517701e04c3fSmrg{ 51787ec681f3Smrg const VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info = 51797ec681f3Smrg vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT); 51807ec681f3Smrg uint32_t cliprect_rule = 0; 51817ec681f3Smrg 51827ec681f3Smrg if (!discard_rectangle_info) { 51837ec681f3Smrg cliprect_rule = 0xffff; 51847ec681f3Smrg } else { 51857ec681f3Smrg for (unsigned i = 0; i < (1u << MAX_DISCARD_RECTANGLES); ++i) { 51867ec681f3Smrg /* Interpret i as a bitmask, and then set the bit in 51877ec681f3Smrg * the mask if that combination of rectangles in which 51887ec681f3Smrg * the pixel is contained should pass the cliprect 51897ec681f3Smrg * test. 51907ec681f3Smrg */ 51917ec681f3Smrg unsigned relevant_subset = i & ((1u << discard_rectangle_info->discardRectangleCount) - 1); 51927ec681f3Smrg 51937ec681f3Smrg if (discard_rectangle_info->discardRectangleMode == 51947ec681f3Smrg VK_DISCARD_RECTANGLE_MODE_INCLUSIVE_EXT && 51957ec681f3Smrg !relevant_subset) 51967ec681f3Smrg continue; 51977ec681f3Smrg 51987ec681f3Smrg if (discard_rectangle_info->discardRectangleMode == 51997ec681f3Smrg VK_DISCARD_RECTANGLE_MODE_EXCLUSIVE_EXT && 52007ec681f3Smrg relevant_subset) 52017ec681f3Smrg continue; 52027ec681f3Smrg 52037ec681f3Smrg cliprect_rule |= 1u << i; 52047ec681f3Smrg } 52057ec681f3Smrg } 52067ec681f3Smrg 52077ec681f3Smrg radeon_set_context_reg(ctx_cs, R_02820C_PA_SC_CLIPRECT_RULE, cliprect_rule); 52087ec681f3Smrg} 520901e04c3fSmrg 52107ec681f3Smrgstatic void 52117ec681f3Smrggfx10_pipeline_generate_ge_cntl(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline) 52127ec681f3Smrg{ 52137ec681f3Smrg bool break_wave_at_eoi = false; 52147ec681f3Smrg unsigned primgroup_size; 52157ec681f3Smrg unsigned vertgroup_size = 256; /* 256 = disable vertex grouping */ 52167ec681f3Smrg 52177ec681f3Smrg if (radv_pipeline_has_tess(pipeline)) { 52187ec681f3Smrg primgroup_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches; 52197ec681f3Smrg } else if (radv_pipeline_has_gs(pipeline)) { 52207ec681f3Smrg const struct gfx9_gs_info *gs_state = 52217ec681f3Smrg &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info; 52227ec681f3Smrg unsigned vgt_gs_onchip_cntl = gs_state->vgt_gs_onchip_cntl; 52237ec681f3Smrg primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(vgt_gs_onchip_cntl); 52247ec681f3Smrg } else { 52257ec681f3Smrg primgroup_size = 128; /* recommended without a GS and tess */ 52267ec681f3Smrg } 52277ec681f3Smrg 52287ec681f3Smrg if (radv_pipeline_has_tess(pipeline)) { 52297ec681f3Smrg if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id || 52307ec681f3Smrg radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id) 52317ec681f3Smrg break_wave_at_eoi = true; 52327ec681f3Smrg } 52337ec681f3Smrg 52347ec681f3Smrg radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL, 52357ec681f3Smrg S_03096C_PRIM_GRP_SIZE(primgroup_size) | 52367ec681f3Smrg S_03096C_VERT_GRP_SIZE(vertgroup_size) | 52377ec681f3Smrg S_03096C_PACKET_TO_ONE_PA(0) /* line stipple */ | 52387ec681f3Smrg S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi)); 523901e04c3fSmrg} 524001e04c3fSmrg 52417ec681f3Smrgstatic void 52427ec681f3Smrgradv_pipeline_generate_vgt_gs_out(struct radeon_cmdbuf *ctx_cs, 52437ec681f3Smrg const struct radv_pipeline *pipeline, 52447ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 52457ec681f3Smrg const struct radv_graphics_pipeline_create_info *extra) 524601e04c3fSmrg{ 52477ec681f3Smrg uint32_t gs_out; 52487ec681f3Smrg 52497ec681f3Smrg if (radv_pipeline_has_gs(pipeline)) { 52507ec681f3Smrg gs_out = 52517ec681f3Smrg si_conv_gl_prim_to_gs_out(pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim); 52527ec681f3Smrg } else if (radv_pipeline_has_tess(pipeline)) { 52537ec681f3Smrg if (pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.point_mode) { 52547ec681f3Smrg gs_out = V_028A6C_POINTLIST; 52557ec681f3Smrg } else { 52567ec681f3Smrg gs_out = si_conv_gl_prim_to_gs_out( 52577ec681f3Smrg pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.primitive_mode); 52587ec681f3Smrg } 52597ec681f3Smrg } else { 52607ec681f3Smrg gs_out = si_conv_prim_to_gs_out(pCreateInfo->pInputAssemblyState->topology); 52617ec681f3Smrg } 52627ec681f3Smrg 52637ec681f3Smrg if (extra && extra->use_rectlist) { 52647ec681f3Smrg gs_out = V_028A6C_TRISTRIP; 52657ec681f3Smrg if (radv_pipeline_has_ngg(pipeline)) 52667ec681f3Smrg gs_out = V_028A6C_RECTLIST; 52677ec681f3Smrg } 52687ec681f3Smrg 52697ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out); 52707ec681f3Smrg} 527101e04c3fSmrg 52727ec681f3Smrgstatic bool 52737ec681f3Smrggfx103_pipeline_vrs_coarse_shading(const struct radv_pipeline *pipeline) 52747ec681f3Smrg{ 52757ec681f3Smrg struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; 52767ec681f3Smrg struct radv_device *device = pipeline->device; 527701e04c3fSmrg 52787ec681f3Smrg if (device->instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING) 52797ec681f3Smrg return false; 528001e04c3fSmrg 52817ec681f3Smrg if (!ps->info.ps.allow_flat_shading) 52827ec681f3Smrg return false; 528301e04c3fSmrg 52847ec681f3Smrg return true; 528501e04c3fSmrg} 528601e04c3fSmrg 52877ec681f3Smrgstatic void 52887ec681f3Smrggfx103_pipeline_generate_vrs_state(struct radeon_cmdbuf *ctx_cs, 52897ec681f3Smrg const struct radv_pipeline *pipeline, 52907ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo) 529101e04c3fSmrg{ 52927ec681f3Smrg uint32_t mode = V_028064_VRS_COMB_MODE_PASSTHRU; 52937ec681f3Smrg uint8_t rate_x = 0, rate_y = 0; 52947ec681f3Smrg bool enable_vrs = false; 52957ec681f3Smrg 52967ec681f3Smrg if (vk_find_struct_const(pCreateInfo->pNext, 52977ec681f3Smrg PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR) || 52987ec681f3Smrg radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR)) { 52997ec681f3Smrg /* Enable draw call VRS because it's explicitly requested. */ 53007ec681f3Smrg enable_vrs = true; 53017ec681f3Smrg } else if (gfx103_pipeline_vrs_coarse_shading(pipeline)) { 53027ec681f3Smrg /* Enable VRS coarse shading 2x2 if the driver determined that 53037ec681f3Smrg * it's safe to enable. 53047ec681f3Smrg */ 53057ec681f3Smrg mode = V_028064_VRS_COMB_MODE_OVERRIDE; 53067ec681f3Smrg rate_x = rate_y = 1; 53077ec681f3Smrg } else if (pipeline->device->force_vrs != RADV_FORCE_VRS_NONE) { 53087ec681f3Smrg /* Force enable vertex VRS if requested by the user. */ 53097ec681f3Smrg radeon_set_context_reg( 53107ec681f3Smrg ctx_cs, R_028848_PA_CL_VRS_CNTL, 53117ec681f3Smrg S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) | 53127ec681f3Smrg S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE)); 53137ec681f3Smrg 53147ec681f3Smrg /* If the shader is using discard, turn off coarse shading 53157ec681f3Smrg * because discard at 2x2 pixel granularity degrades quality 53167ec681f3Smrg * too much. MIN allows sample shading but not coarse shading. 53177ec681f3Smrg */ 53187ec681f3Smrg struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; 53197ec681f3Smrg 53207ec681f3Smrg mode = ps->info.ps.can_discard ? V_028064_VRS_COMB_MODE_MIN : V_028064_VRS_COMB_MODE_PASSTHRU; 53217ec681f3Smrg } 53227ec681f3Smrg 53237ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, S_028A98_EN_VRS_RATE(enable_vrs)); 53247ec681f3Smrg 53257ec681f3Smrg radeon_set_context_reg(ctx_cs, R_028064_DB_VRS_OVERRIDE_CNTL, 53267ec681f3Smrg S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) | 53277ec681f3Smrg S_028064_VRS_OVERRIDE_RATE_X(rate_x) | 53287ec681f3Smrg S_028064_VRS_OVERRIDE_RATE_Y(rate_y)); 53297ec681f3Smrg} 533001e04c3fSmrg 53317ec681f3Smrgstatic void 53327ec681f3Smrgradv_pipeline_generate_pm4(struct radv_pipeline *pipeline, 53337ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 53347ec681f3Smrg const struct radv_graphics_pipeline_create_info *extra, 53357ec681f3Smrg const struct radv_blend_state *blend) 53367ec681f3Smrg{ 53377ec681f3Smrg struct radeon_cmdbuf *ctx_cs = &pipeline->ctx_cs; 53387ec681f3Smrg struct radeon_cmdbuf *cs = &pipeline->cs; 53397ec681f3Smrg 53407ec681f3Smrg cs->max_dw = 64; 53417ec681f3Smrg ctx_cs->max_dw = 256; 53427ec681f3Smrg cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw)); 53437ec681f3Smrg ctx_cs->buf = cs->buf + cs->max_dw; 53447ec681f3Smrg 53457ec681f3Smrg radv_pipeline_generate_depth_stencil_state(ctx_cs, pipeline, pCreateInfo, extra); 53467ec681f3Smrg radv_pipeline_generate_blend_state(ctx_cs, pipeline, blend); 53477ec681f3Smrg radv_pipeline_generate_raster_state(ctx_cs, pipeline, pCreateInfo); 53487ec681f3Smrg radv_pipeline_generate_multisample_state(ctx_cs, pipeline); 53497ec681f3Smrg radv_pipeline_generate_vgt_gs_mode(ctx_cs, pipeline); 53507ec681f3Smrg radv_pipeline_generate_vertex_shader(ctx_cs, cs, pipeline); 53517ec681f3Smrg 53527ec681f3Smrg if (radv_pipeline_has_tess(pipeline)) { 53537ec681f3Smrg radv_pipeline_generate_tess_shaders(ctx_cs, cs, pipeline); 53547ec681f3Smrg radv_pipeline_generate_tess_state(ctx_cs, pipeline, pCreateInfo); 53557ec681f3Smrg } 53567ec681f3Smrg 53577ec681f3Smrg radv_pipeline_generate_geometry_shader(ctx_cs, cs, pipeline); 53587ec681f3Smrg radv_pipeline_generate_fragment_shader(ctx_cs, cs, pipeline); 53597ec681f3Smrg radv_pipeline_generate_ps_inputs(ctx_cs, pipeline); 53607ec681f3Smrg radv_pipeline_generate_vgt_vertex_reuse(ctx_cs, pipeline); 53617ec681f3Smrg radv_pipeline_generate_vgt_shader_config(ctx_cs, pipeline); 53627ec681f3Smrg radv_pipeline_generate_cliprect_rule(ctx_cs, pCreateInfo); 53637ec681f3Smrg radv_pipeline_generate_vgt_gs_out(ctx_cs, pipeline, pCreateInfo, extra); 53647ec681f3Smrg 53657ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 && 53667ec681f3Smrg !radv_pipeline_has_ngg(pipeline)) 53677ec681f3Smrg gfx10_pipeline_generate_ge_cntl(ctx_cs, pipeline); 53687ec681f3Smrg 53697ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) 53707ec681f3Smrg gfx103_pipeline_generate_vrs_state(ctx_cs, pipeline, pCreateInfo); 53717ec681f3Smrg 53727ec681f3Smrg pipeline->ctx_cs_hash = _mesa_hash_data(ctx_cs->buf, ctx_cs->cdw * 4); 53737ec681f3Smrg 53747ec681f3Smrg assert(ctx_cs->cdw <= ctx_cs->max_dw); 53757ec681f3Smrg assert(cs->cdw <= cs->max_dw); 53767ec681f3Smrg} 537701e04c3fSmrg 53787ec681f3Smrgstatic void 53797ec681f3Smrgradv_pipeline_init_vertex_input_state(struct radv_pipeline *pipeline, 53807ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 53817ec681f3Smrg const struct radv_pipeline_key *key) 53827ec681f3Smrg{ 53837ec681f3Smrg const struct radv_shader_info *info = &radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info; 53847ec681f3Smrg if (!key->vs.dynamic_input_state) { 53857ec681f3Smrg const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; 53867ec681f3Smrg 53877ec681f3Smrg for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { 53887ec681f3Smrg const VkVertexInputBindingDescription *desc = &vi_info->pVertexBindingDescriptions[i]; 53897ec681f3Smrg 53907ec681f3Smrg pipeline->binding_stride[desc->binding] = desc->stride; 53917ec681f3Smrg } 53927ec681f3Smrg 53937ec681f3Smrg for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { 53947ec681f3Smrg const VkVertexInputAttributeDescription *desc = &vi_info->pVertexAttributeDescriptions[i]; 53957ec681f3Smrg 53967ec681f3Smrg uint32_t end = desc->offset + vk_format_get_blocksize(desc->format); 53977ec681f3Smrg pipeline->attrib_ends[desc->location] = end; 53987ec681f3Smrg if (pipeline->binding_stride[desc->binding]) 53997ec681f3Smrg pipeline->attrib_index_offset[desc->location] = 54007ec681f3Smrg desc->offset / pipeline->binding_stride[desc->binding]; 54017ec681f3Smrg pipeline->attrib_bindings[desc->location] = desc->binding; 54027ec681f3Smrg } 54037ec681f3Smrg } 54047ec681f3Smrg 54057ec681f3Smrg pipeline->use_per_attribute_vb_descs = info->vs.use_per_attribute_vb_descs; 54067ec681f3Smrg pipeline->last_vertex_attrib_bit = util_last_bit(info->vs.vb_desc_usage_mask); 54077ec681f3Smrg if (pipeline->shaders[MESA_SHADER_VERTEX]) 54087ec681f3Smrg pipeline->next_vertex_stage = MESA_SHADER_VERTEX; 54097ec681f3Smrg else if (pipeline->shaders[MESA_SHADER_TESS_CTRL]) 54107ec681f3Smrg pipeline->next_vertex_stage = MESA_SHADER_TESS_CTRL; 54117ec681f3Smrg else 54127ec681f3Smrg pipeline->next_vertex_stage = MESA_SHADER_GEOMETRY; 54137ec681f3Smrg if (pipeline->next_vertex_stage == MESA_SHADER_VERTEX) { 54147ec681f3Smrg const struct radv_shader_variant *vs_shader = pipeline->shaders[MESA_SHADER_VERTEX]; 54157ec681f3Smrg pipeline->can_use_simple_input = vs_shader->info.is_ngg == pipeline->device->physical_device->use_ngg && 54167ec681f3Smrg vs_shader->info.wave_size == pipeline->device->physical_device->ge_wave_size; 54177ec681f3Smrg } else { 54187ec681f3Smrg pipeline->can_use_simple_input = false; 54197ec681f3Smrg } 54207ec681f3Smrg if (info->vs.dynamic_inputs) 54217ec681f3Smrg pipeline->vb_desc_usage_mask = BITFIELD_MASK(pipeline->last_vertex_attrib_bit); 54227ec681f3Smrg else 54237ec681f3Smrg pipeline->vb_desc_usage_mask = info->vs.vb_desc_usage_mask; 54247ec681f3Smrg pipeline->vb_desc_alloc_size = util_bitcount(pipeline->vb_desc_usage_mask) * 16; 54257ec681f3Smrg} 542601e04c3fSmrg 54277ec681f3Smrgstatic struct radv_shader_variant * 54287ec681f3Smrgradv_pipeline_get_streamout_shader(struct radv_pipeline *pipeline) 54297ec681f3Smrg{ 54307ec681f3Smrg int i; 543101e04c3fSmrg 54327ec681f3Smrg for (i = MESA_SHADER_GEOMETRY; i >= MESA_SHADER_VERTEX; i--) { 54337ec681f3Smrg struct radv_shader_variant *shader = radv_get_shader(pipeline, i); 543401e04c3fSmrg 54357ec681f3Smrg if (shader && shader->info.so.num_outputs > 0) 54367ec681f3Smrg return shader; 54377ec681f3Smrg } 543801e04c3fSmrg 54397ec681f3Smrg return NULL; 54407ec681f3Smrg} 544101e04c3fSmrg 54427ec681f3Smrgstatic bool 54437ec681f3Smrgradv_shader_need_indirect_descriptor_sets(struct radv_pipeline *pipeline, gl_shader_stage stage) 54447ec681f3Smrg{ 54457ec681f3Smrg struct radv_userdata_info *loc = 54467ec681f3Smrg radv_lookup_user_sgpr(pipeline, stage, AC_UD_INDIRECT_DESCRIPTOR_SETS); 54477ec681f3Smrg return loc->sgpr_idx != -1; 544801e04c3fSmrg} 544901e04c3fSmrg 545001e04c3fSmrgstatic void 54517ec681f3Smrgradv_pipeline_init_shader_stages_state(struct radv_pipeline *pipeline) 545201e04c3fSmrg{ 54537ec681f3Smrg struct radv_device *device = pipeline->device; 54547ec681f3Smrg 54557ec681f3Smrg for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 54567ec681f3Smrg pipeline->user_data_0[i] = radv_pipeline_stage_to_user_data_0( 54577ec681f3Smrg pipeline, i, device->physical_device->rad_info.chip_class); 54587ec681f3Smrg 54597ec681f3Smrg if (pipeline->shaders[i]) { 54607ec681f3Smrg pipeline->need_indirect_descriptor_sets |= 54617ec681f3Smrg radv_shader_need_indirect_descriptor_sets(pipeline, i); 54627ec681f3Smrg } 54637ec681f3Smrg } 54647ec681f3Smrg 54657ec681f3Smrg struct radv_userdata_info *loc = 54667ec681f3Smrg radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, AC_UD_VS_BASE_VERTEX_START_INSTANCE); 54677ec681f3Smrg if (loc->sgpr_idx != -1) { 54687ec681f3Smrg pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[MESA_SHADER_VERTEX]; 54697ec681f3Smrg pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4; 54707ec681f3Smrg pipeline->graphics.vtx_emit_num = loc->num_sgprs; 54717ec681f3Smrg pipeline->graphics.uses_drawid = 54727ec681f3Smrg radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_draw_id; 54737ec681f3Smrg pipeline->graphics.uses_baseinstance = 54747ec681f3Smrg radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_base_instance; 54757ec681f3Smrg } 54767ec681f3Smrg} 5477ed98bd31Smaya 54787ec681f3Smrgstatic VkResult 54797ec681f3Smrgradv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device, 54807ec681f3Smrg struct radv_pipeline_cache *cache, 54817ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 54827ec681f3Smrg const struct radv_graphics_pipeline_create_info *extra) 54837ec681f3Smrg{ 54847ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout); 54857ec681f3Smrg VkResult result; 54867ec681f3Smrg 54877ec681f3Smrg pipeline->device = device; 54887ec681f3Smrg pipeline->graphics.last_vgt_api_stage = MESA_SHADER_NONE; 54897ec681f3Smrg 54907ec681f3Smrg struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra); 54917ec681f3Smrg 54927ec681f3Smrg const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback = 54937ec681f3Smrg vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT); 54947ec681f3Smrg radv_init_feedback(creation_feedback); 54957ec681f3Smrg 54967ec681f3Smrg VkPipelineCreationFeedbackEXT *pipeline_feedback = 54977ec681f3Smrg creation_feedback ? creation_feedback->pPipelineCreationFeedback : NULL; 54987ec681f3Smrg 54997ec681f3Smrg const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 55007ec681f3Smrg 0, 55017ec681f3Smrg }; 55027ec681f3Smrg VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = {0}; 55037ec681f3Smrg for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { 55047ec681f3Smrg gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1; 55057ec681f3Smrg pStages[stage] = &pCreateInfo->pStages[i]; 55067ec681f3Smrg if (creation_feedback) 55077ec681f3Smrg stage_feedbacks[stage] = &creation_feedback->pPipelineStageCreationFeedbacks[i]; 55087ec681f3Smrg } 55097ec681f3Smrg 55107ec681f3Smrg struct radv_pipeline_key key = 55117ec681f3Smrg radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &blend); 55127ec681f3Smrg 55137ec681f3Smrg result = radv_create_shaders(pipeline, pipeline_layout, device, cache, &key, pStages, 55147ec681f3Smrg pCreateInfo->flags, NULL, pipeline_feedback, stage_feedbacks); 55157ec681f3Smrg if (result != VK_SUCCESS) 55167ec681f3Smrg return result; 55177ec681f3Smrg 55187ec681f3Smrg pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); 55197ec681f3Smrg radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo); 55207ec681f3Smrg radv_pipeline_init_input_assembly_state(pipeline, pCreateInfo, extra); 55217ec681f3Smrg radv_pipeline_init_dynamic_state(pipeline, pCreateInfo, extra); 55227ec681f3Smrg radv_pipeline_init_raster_state(pipeline, pCreateInfo); 55237ec681f3Smrg radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo); 55247ec681f3Smrg 55257ec681f3Smrg if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) 55267ec681f3Smrg gfx103_pipeline_init_vrs_state(pipeline, pCreateInfo); 55277ec681f3Smrg 55287ec681f3Smrg /* Ensure that some export memory is always allocated, for two reasons: 55297ec681f3Smrg * 55307ec681f3Smrg * 1) Correctness: The hardware ignores the EXEC mask if no export 55317ec681f3Smrg * memory is allocated, so KILL and alpha test do not work correctly 55327ec681f3Smrg * without this. 55337ec681f3Smrg * 2) Performance: Every shader needs at least a NULL export, even when 55347ec681f3Smrg * it writes no color/depth output. The NULL export instruction 55357ec681f3Smrg * stalls without this setting. 55367ec681f3Smrg * 55377ec681f3Smrg * Don't add this to CB_SHADER_MASK. 55387ec681f3Smrg * 55397ec681f3Smrg * GFX10 supports pixel shaders without exports by setting both the 55407ec681f3Smrg * color and Z formats to SPI_SHADER_ZERO. The hw will skip export 55417ec681f3Smrg * instructions if any are present. 55427ec681f3Smrg */ 55437ec681f3Smrg struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; 55447ec681f3Smrg if ((pipeline->device->physical_device->rad_info.chip_class <= GFX9 || 55457ec681f3Smrg ps->info.ps.can_discard) && 55467ec681f3Smrg !blend.spi_shader_col_format) { 55477ec681f3Smrg if (!ps->info.ps.writes_z && !ps->info.ps.writes_stencil && !ps->info.ps.writes_sample_mask) 55487ec681f3Smrg blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R; 55497ec681f3Smrg } 55507ec681f3Smrg 55517ec681f3Smrg if (extra && (extra->custom_blend_mode == V_028808_CB_ELIMINATE_FAST_CLEAR || 55527ec681f3Smrg extra->custom_blend_mode == V_028808_CB_FMASK_DECOMPRESS || 55537ec681f3Smrg extra->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS || 55547ec681f3Smrg extra->custom_blend_mode == V_028808_CB_RESOLVE)) { 55557ec681f3Smrg /* According to the CB spec states, CB_SHADER_MASK should be 55567ec681f3Smrg * set to enable writes to all four channels of MRT0. 55577ec681f3Smrg */ 55587ec681f3Smrg blend.cb_shader_mask = 0xf; 55597ec681f3Smrg } 55607ec681f3Smrg 55617ec681f3Smrg pipeline->graphics.col_format = blend.spi_shader_col_format; 55627ec681f3Smrg pipeline->graphics.cb_target_mask = blend.cb_target_mask; 55637ec681f3Smrg 55647ec681f3Smrg if (radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) { 55657ec681f3Smrg struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY]; 55667ec681f3Smrg 55677ec681f3Smrg radv_pipeline_init_gs_ring_state(pipeline, &gs->info.gs_ring_info); 55687ec681f3Smrg } 55697ec681f3Smrg 55707ec681f3Smrg if (radv_pipeline_has_tess(pipeline)) { 55717ec681f3Smrg pipeline->graphics.tess_patch_control_points = 55727ec681f3Smrg pCreateInfo->pTessellationState->patchControlPoints; 55737ec681f3Smrg } 55747ec681f3Smrg 55757ec681f3Smrg radv_pipeline_init_vertex_input_state(pipeline, pCreateInfo, &key); 55767ec681f3Smrg radv_pipeline_init_binning_state(pipeline, pCreateInfo, &blend); 55777ec681f3Smrg radv_pipeline_init_shader_stages_state(pipeline); 55787ec681f3Smrg radv_pipeline_init_scratch(device, pipeline); 55797ec681f3Smrg 55807ec681f3Smrg /* Find the last vertex shader stage that eventually uses streamout. */ 55817ec681f3Smrg pipeline->streamout_shader = radv_pipeline_get_streamout_shader(pipeline); 55827ec681f3Smrg 55837ec681f3Smrg pipeline->graphics.is_ngg = radv_pipeline_has_ngg(pipeline); 55847ec681f3Smrg pipeline->graphics.has_ngg_culling = 55857ec681f3Smrg pipeline->graphics.is_ngg && 55867ec681f3Smrg pipeline->shaders[pipeline->graphics.last_vgt_api_stage]->info.has_ngg_culling; 55877ec681f3Smrg 55887ec681f3Smrg pipeline->push_constant_size = pipeline_layout->push_constant_size; 55897ec681f3Smrg pipeline->dynamic_offset_count = pipeline_layout->dynamic_offset_count; 55907ec681f3Smrg 55917ec681f3Smrg radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra, &blend); 55927ec681f3Smrg 55937ec681f3Smrg return result; 55947ec681f3Smrg} 5595ed98bd31Smaya 55967ec681f3SmrgVkResult 55977ec681f3Smrgradv_graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache, 55987ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfo, 55997ec681f3Smrg const struct radv_graphics_pipeline_create_info *extra, 56007ec681f3Smrg const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) 56017ec681f3Smrg{ 56027ec681f3Smrg RADV_FROM_HANDLE(radv_device, device, _device); 56037ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); 56047ec681f3Smrg struct radv_pipeline *pipeline; 56057ec681f3Smrg VkResult result; 5606ed98bd31Smaya 56077ec681f3Smrg pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, 56087ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 56097ec681f3Smrg if (pipeline == NULL) 56107ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 561101e04c3fSmrg 56127ec681f3Smrg vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); 56137ec681f3Smrg pipeline->type = RADV_PIPELINE_GRAPHICS; 561401e04c3fSmrg 56157ec681f3Smrg result = radv_pipeline_init(pipeline, device, cache, pCreateInfo, extra); 56167ec681f3Smrg if (result != VK_SUCCESS) { 56177ec681f3Smrg radv_pipeline_destroy(device, pipeline, pAllocator); 56187ec681f3Smrg return result; 56197ec681f3Smrg } 562001e04c3fSmrg 56217ec681f3Smrg *pPipeline = radv_pipeline_to_handle(pipeline); 562201e04c3fSmrg 56237ec681f3Smrg return VK_SUCCESS; 56247ec681f3Smrg} 5625ed98bd31Smaya 56267ec681f3SmrgVkResult 56277ec681f3Smrgradv_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, 56287ec681f3Smrg const VkGraphicsPipelineCreateInfo *pCreateInfos, 56297ec681f3Smrg const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) 56307ec681f3Smrg{ 56317ec681f3Smrg VkResult result = VK_SUCCESS; 56327ec681f3Smrg unsigned i = 0; 56337ec681f3Smrg 56347ec681f3Smrg for (; i < count; i++) { 56357ec681f3Smrg VkResult r; 56367ec681f3Smrg r = radv_graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i], NULL, pAllocator, 56377ec681f3Smrg &pPipelines[i]); 56387ec681f3Smrg if (r != VK_SUCCESS) { 56397ec681f3Smrg result = r; 56407ec681f3Smrg pPipelines[i] = VK_NULL_HANDLE; 56417ec681f3Smrg 56427ec681f3Smrg if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) 56437ec681f3Smrg break; 56447ec681f3Smrg } 56457ec681f3Smrg } 56467ec681f3Smrg 56477ec681f3Smrg for (; i < count; ++i) 56487ec681f3Smrg pPipelines[i] = VK_NULL_HANDLE; 56497ec681f3Smrg 56507ec681f3Smrg return result; 565101e04c3fSmrg} 565201e04c3fSmrg 56537ec681f3Smrgstatic void 56547ec681f3Smrgradv_pipeline_generate_hw_cs(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline) 56557ec681f3Smrg{ 56567ec681f3Smrg struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE]; 56577ec681f3Smrg uint64_t va = radv_shader_variant_get_va(shader); 56587ec681f3Smrg struct radv_device *device = pipeline->device; 56597ec681f3Smrg 56607ec681f3Smrg radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8); 56617ec681f3Smrg 56627ec681f3Smrg radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2); 56637ec681f3Smrg radeon_emit(cs, shader->config.rsrc1); 56647ec681f3Smrg radeon_emit(cs, shader->config.rsrc2); 56657ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX10) { 56667ec681f3Smrg radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3); 56677ec681f3Smrg } 566801e04c3fSmrg} 566901e04c3fSmrg 56707ec681f3Smrgstatic void 56717ec681f3Smrgradv_pipeline_generate_compute_state(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline) 56727ec681f3Smrg{ 56737ec681f3Smrg struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE]; 56747ec681f3Smrg struct radv_device *device = pipeline->device; 56757ec681f3Smrg unsigned threads_per_threadgroup; 56767ec681f3Smrg unsigned threadgroups_per_cu = 1; 56777ec681f3Smrg unsigned waves_per_threadgroup; 56787ec681f3Smrg unsigned max_waves_per_sh = 0; 56797ec681f3Smrg 56807ec681f3Smrg /* Calculate best compute resource limits. */ 56817ec681f3Smrg threads_per_threadgroup = 56827ec681f3Smrg shader->info.cs.block_size[0] * shader->info.cs.block_size[1] * shader->info.cs.block_size[2]; 56837ec681f3Smrg waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, shader->info.wave_size); 56847ec681f3Smrg 56857ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX10 && waves_per_threadgroup == 1) 56867ec681f3Smrg threadgroups_per_cu = 2; 56877ec681f3Smrg 56887ec681f3Smrg radeon_set_sh_reg( 56897ec681f3Smrg cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 56907ec681f3Smrg ac_get_compute_resource_limits(&device->physical_device->rad_info, waves_per_threadgroup, 56917ec681f3Smrg max_waves_per_sh, threadgroups_per_cu)); 56927ec681f3Smrg 56937ec681f3Smrg radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); 56947ec681f3Smrg radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0])); 56957ec681f3Smrg radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1])); 56967ec681f3Smrg radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2])); 56977ec681f3Smrg} 569801e04c3fSmrg 569901e04c3fSmrgstatic void 57007ec681f3Smrgradv_compute_generate_pm4(struct radv_pipeline *pipeline) 570101e04c3fSmrg{ 57027ec681f3Smrg struct radv_device *device = pipeline->device; 57037ec681f3Smrg struct radeon_cmdbuf *cs = &pipeline->cs; 570401e04c3fSmrg 57057ec681f3Smrg cs->max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 19 : 16; 57067ec681f3Smrg cs->buf = malloc(cs->max_dw * 4); 570701e04c3fSmrg 57087ec681f3Smrg radv_pipeline_generate_hw_cs(cs, pipeline); 57097ec681f3Smrg radv_pipeline_generate_compute_state(cs, pipeline); 571001e04c3fSmrg 57117ec681f3Smrg assert(pipeline->cs.cdw <= pipeline->cs.max_dw); 57127ec681f3Smrg} 571301e04c3fSmrg 57147ec681f3Smrgstatic struct radv_pipeline_key 57157ec681f3Smrgradv_generate_compute_pipeline_key(struct radv_pipeline *pipeline, 57167ec681f3Smrg const VkComputePipelineCreateInfo *pCreateInfo) 57177ec681f3Smrg{ 57187ec681f3Smrg const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->stage; 57197ec681f3Smrg struct radv_pipeline_key key; 57207ec681f3Smrg memset(&key, 0, sizeof(key)); 57217ec681f3Smrg 57227ec681f3Smrg if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT) 57237ec681f3Smrg key.optimisations_disabled = 1; 57247ec681f3Smrg 57257ec681f3Smrg const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *subgroup_size = 57267ec681f3Smrg vk_find_struct_const(stage->pNext, 57277ec681f3Smrg PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT); 57287ec681f3Smrg 57297ec681f3Smrg if (subgroup_size) { 57307ec681f3Smrg assert(subgroup_size->requiredSubgroupSize == 32 || 57317ec681f3Smrg subgroup_size->requiredSubgroupSize == 64); 57327ec681f3Smrg key.cs.compute_subgroup_size = subgroup_size->requiredSubgroupSize; 57337ec681f3Smrg } else if (stage->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) { 57347ec681f3Smrg key.cs.require_full_subgroups = true; 57357ec681f3Smrg } 57367ec681f3Smrg 57377ec681f3Smrg return key; 57387ec681f3Smrg} 573901e04c3fSmrg 57407ec681f3SmrgVkResult 57417ec681f3Smrgradv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, 57427ec681f3Smrg const VkComputePipelineCreateInfo *pCreateInfo, 57437ec681f3Smrg const VkAllocationCallbacks *pAllocator, const uint8_t *custom_hash, 57447ec681f3Smrg struct radv_pipeline_shader_stack_size *rt_stack_sizes, 57457ec681f3Smrg uint32_t rt_group_count, VkPipeline *pPipeline) 57467ec681f3Smrg{ 57477ec681f3Smrg RADV_FROM_HANDLE(radv_device, device, _device); 57487ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); 57497ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout); 57507ec681f3Smrg const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 57517ec681f3Smrg 0, 57527ec681f3Smrg }; 57537ec681f3Smrg VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = {0}; 57547ec681f3Smrg struct radv_pipeline *pipeline; 57557ec681f3Smrg VkResult result; 57567ec681f3Smrg 57577ec681f3Smrg pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, 57587ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 57597ec681f3Smrg if (pipeline == NULL) { 57607ec681f3Smrg free(rt_stack_sizes); 57617ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 57627ec681f3Smrg } 57637ec681f3Smrg 57647ec681f3Smrg vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); 57657ec681f3Smrg pipeline->type = RADV_PIPELINE_COMPUTE; 57667ec681f3Smrg 57677ec681f3Smrg pipeline->device = device; 57687ec681f3Smrg pipeline->graphics.last_vgt_api_stage = MESA_SHADER_NONE; 57697ec681f3Smrg pipeline->compute.rt_stack_sizes = rt_stack_sizes; 57707ec681f3Smrg pipeline->compute.group_count = rt_group_count; 57717ec681f3Smrg 57727ec681f3Smrg const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback = 57737ec681f3Smrg vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT); 57747ec681f3Smrg radv_init_feedback(creation_feedback); 57757ec681f3Smrg 57767ec681f3Smrg VkPipelineCreationFeedbackEXT *pipeline_feedback = 57777ec681f3Smrg creation_feedback ? creation_feedback->pPipelineCreationFeedback : NULL; 57787ec681f3Smrg if (creation_feedback) 57797ec681f3Smrg stage_feedbacks[MESA_SHADER_COMPUTE] = &creation_feedback->pPipelineStageCreationFeedbacks[0]; 57807ec681f3Smrg 57817ec681f3Smrg pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage; 57827ec681f3Smrg 57837ec681f3Smrg struct radv_pipeline_key key = radv_generate_compute_pipeline_key(pipeline, pCreateInfo); 57847ec681f3Smrg 57857ec681f3Smrg result = radv_create_shaders(pipeline, pipeline_layout, device, cache, &key, pStages, 57867ec681f3Smrg pCreateInfo->flags, custom_hash, pipeline_feedback, stage_feedbacks); 57877ec681f3Smrg if (result != VK_SUCCESS) { 57887ec681f3Smrg radv_pipeline_destroy(device, pipeline, pAllocator); 57897ec681f3Smrg return result; 57907ec681f3Smrg } 57917ec681f3Smrg 57927ec681f3Smrg pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0( 57937ec681f3Smrg pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class); 57947ec681f3Smrg pipeline->need_indirect_descriptor_sets |= 57957ec681f3Smrg radv_shader_need_indirect_descriptor_sets(pipeline, MESA_SHADER_COMPUTE); 57967ec681f3Smrg radv_pipeline_init_scratch(device, pipeline); 57977ec681f3Smrg 57987ec681f3Smrg pipeline->push_constant_size = pipeline_layout->push_constant_size; 57997ec681f3Smrg pipeline->dynamic_offset_count = pipeline_layout->dynamic_offset_count; 58007ec681f3Smrg 58017ec681f3Smrg radv_compute_generate_pm4(pipeline); 58027ec681f3Smrg 58037ec681f3Smrg *pPipeline = radv_pipeline_to_handle(pipeline); 58047ec681f3Smrg 58057ec681f3Smrg return VK_SUCCESS; 580601e04c3fSmrg} 580701e04c3fSmrg 58087ec681f3SmrgVkResult 58097ec681f3Smrgradv_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, 58107ec681f3Smrg const VkComputePipelineCreateInfo *pCreateInfos, 58117ec681f3Smrg const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) 581201e04c3fSmrg{ 58137ec681f3Smrg VkResult result = VK_SUCCESS; 58147ec681f3Smrg 58157ec681f3Smrg unsigned i = 0; 58167ec681f3Smrg for (; i < count; i++) { 58177ec681f3Smrg VkResult r; 58187ec681f3Smrg r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, NULL, 58197ec681f3Smrg NULL, 0, &pPipelines[i]); 58207ec681f3Smrg if (r != VK_SUCCESS) { 58217ec681f3Smrg result = r; 58227ec681f3Smrg pPipelines[i] = VK_NULL_HANDLE; 58237ec681f3Smrg 58247ec681f3Smrg if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) 58257ec681f3Smrg break; 58267ec681f3Smrg } 58277ec681f3Smrg } 58287ec681f3Smrg 58297ec681f3Smrg for (; i < count; ++i) 58307ec681f3Smrg pPipelines[i] = VK_NULL_HANDLE; 58317ec681f3Smrg 58327ec681f3Smrg return result; 58337ec681f3Smrg} 583401e04c3fSmrg 58357ec681f3Smrgstatic uint32_t 58367ec681f3Smrgradv_get_executable_count(const struct radv_pipeline *pipeline) 58377ec681f3Smrg{ 58387ec681f3Smrg uint32_t ret = 0; 58397ec681f3Smrg for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 58407ec681f3Smrg if (!pipeline->shaders[i]) 58417ec681f3Smrg continue; 58427ec681f3Smrg 58437ec681f3Smrg if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(pipeline)) { 58447ec681f3Smrg ret += 2u; 58457ec681f3Smrg } else { 58467ec681f3Smrg ret += 1u; 58477ec681f3Smrg } 58487ec681f3Smrg } 58497ec681f3Smrg return ret; 58507ec681f3Smrg} 585101e04c3fSmrg 58527ec681f3Smrgstatic struct radv_shader_variant * 58537ec681f3Smrgradv_get_shader_from_executable_index(const struct radv_pipeline *pipeline, int index, 58547ec681f3Smrg gl_shader_stage *stage) 58557ec681f3Smrg{ 58567ec681f3Smrg for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 58577ec681f3Smrg if (!pipeline->shaders[i]) 58587ec681f3Smrg continue; 58597ec681f3Smrg if (!index) { 58607ec681f3Smrg *stage = i; 58617ec681f3Smrg return pipeline->shaders[i]; 58627ec681f3Smrg } 58637ec681f3Smrg 58647ec681f3Smrg --index; 58657ec681f3Smrg 58667ec681f3Smrg if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(pipeline)) { 58677ec681f3Smrg if (!index) { 58687ec681f3Smrg *stage = i; 58697ec681f3Smrg return pipeline->gs_copy_shader; 58707ec681f3Smrg } 58717ec681f3Smrg --index; 58727ec681f3Smrg } 58737ec681f3Smrg } 58747ec681f3Smrg 58757ec681f3Smrg *stage = -1; 58767ec681f3Smrg return NULL; 58777ec681f3Smrg} 587801e04c3fSmrg 58797ec681f3Smrg/* Basically strlcpy (which does not exist on linux) specialized for 58807ec681f3Smrg * descriptions. */ 58817ec681f3Smrgstatic void 58827ec681f3Smrgdesc_copy(char *desc, const char *src) 58837ec681f3Smrg{ 58847ec681f3Smrg int len = strlen(src); 58857ec681f3Smrg assert(len < VK_MAX_DESCRIPTION_SIZE); 58867ec681f3Smrg memcpy(desc, src, len); 58877ec681f3Smrg memset(desc + len, 0, VK_MAX_DESCRIPTION_SIZE - len); 588801e04c3fSmrg} 588901e04c3fSmrg 58907ec681f3SmrgVkResult 58917ec681f3Smrgradv_GetPipelineExecutablePropertiesKHR(VkDevice _device, const VkPipelineInfoKHR *pPipelineInfo, 58927ec681f3Smrg uint32_t *pExecutableCount, 58937ec681f3Smrg VkPipelineExecutablePropertiesKHR *pProperties) 58947ec681f3Smrg{ 58957ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelineInfo->pipeline); 58967ec681f3Smrg const uint32_t total_count = radv_get_executable_count(pipeline); 58977ec681f3Smrg 58987ec681f3Smrg if (!pProperties) { 58997ec681f3Smrg *pExecutableCount = total_count; 59007ec681f3Smrg return VK_SUCCESS; 59017ec681f3Smrg } 59027ec681f3Smrg 59037ec681f3Smrg const uint32_t count = MIN2(total_count, *pExecutableCount); 59047ec681f3Smrg for (unsigned i = 0, executable_idx = 0; i < MESA_SHADER_STAGES && executable_idx < count; ++i) { 59057ec681f3Smrg if (!pipeline->shaders[i]) 59067ec681f3Smrg continue; 59077ec681f3Smrg pProperties[executable_idx].stages = mesa_to_vk_shader_stage(i); 59087ec681f3Smrg const char *name = NULL; 59097ec681f3Smrg const char *description = NULL; 59107ec681f3Smrg switch (i) { 59117ec681f3Smrg case MESA_SHADER_VERTEX: 59127ec681f3Smrg name = "Vertex Shader"; 59137ec681f3Smrg description = "Vulkan Vertex Shader"; 59147ec681f3Smrg break; 59157ec681f3Smrg case MESA_SHADER_TESS_CTRL: 59167ec681f3Smrg if (!pipeline->shaders[MESA_SHADER_VERTEX]) { 59177ec681f3Smrg pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT; 59187ec681f3Smrg name = "Vertex + Tessellation Control Shaders"; 59197ec681f3Smrg description = "Combined Vulkan Vertex and Tessellation Control Shaders"; 59207ec681f3Smrg } else { 59217ec681f3Smrg name = "Tessellation Control Shader"; 59227ec681f3Smrg description = "Vulkan Tessellation Control Shader"; 59237ec681f3Smrg } 59247ec681f3Smrg break; 59257ec681f3Smrg case MESA_SHADER_TESS_EVAL: 59267ec681f3Smrg name = "Tessellation Evaluation Shader"; 59277ec681f3Smrg description = "Vulkan Tessellation Evaluation Shader"; 59287ec681f3Smrg break; 59297ec681f3Smrg case MESA_SHADER_GEOMETRY: 59307ec681f3Smrg if (radv_pipeline_has_tess(pipeline) && !pipeline->shaders[MESA_SHADER_TESS_EVAL]) { 59317ec681f3Smrg pProperties[executable_idx].stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; 59327ec681f3Smrg name = "Tessellation Evaluation + Geometry Shaders"; 59337ec681f3Smrg description = "Combined Vulkan Tessellation Evaluation and Geometry Shaders"; 59347ec681f3Smrg } else if (!radv_pipeline_has_tess(pipeline) && !pipeline->shaders[MESA_SHADER_VERTEX]) { 59357ec681f3Smrg pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT; 59367ec681f3Smrg name = "Vertex + Geometry Shader"; 59377ec681f3Smrg description = "Combined Vulkan Vertex and Geometry Shaders"; 59387ec681f3Smrg } else { 59397ec681f3Smrg name = "Geometry Shader"; 59407ec681f3Smrg description = "Vulkan Geometry Shader"; 59417ec681f3Smrg } 59427ec681f3Smrg break; 59437ec681f3Smrg case MESA_SHADER_FRAGMENT: 59447ec681f3Smrg name = "Fragment Shader"; 59457ec681f3Smrg description = "Vulkan Fragment Shader"; 59467ec681f3Smrg break; 59477ec681f3Smrg case MESA_SHADER_COMPUTE: 59487ec681f3Smrg name = "Compute Shader"; 59497ec681f3Smrg description = "Vulkan Compute Shader"; 59507ec681f3Smrg break; 59517ec681f3Smrg } 59527ec681f3Smrg 59537ec681f3Smrg pProperties[executable_idx].subgroupSize = pipeline->shaders[i]->info.wave_size; 59547ec681f3Smrg desc_copy(pProperties[executable_idx].name, name); 59557ec681f3Smrg desc_copy(pProperties[executable_idx].description, description); 59567ec681f3Smrg 59577ec681f3Smrg ++executable_idx; 59587ec681f3Smrg if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(pipeline)) { 59597ec681f3Smrg assert(pipeline->gs_copy_shader); 59607ec681f3Smrg if (executable_idx >= count) 59617ec681f3Smrg break; 59627ec681f3Smrg 59637ec681f3Smrg pProperties[executable_idx].stages = VK_SHADER_STAGE_GEOMETRY_BIT; 59647ec681f3Smrg pProperties[executable_idx].subgroupSize = 64; 59657ec681f3Smrg desc_copy(pProperties[executable_idx].name, "GS Copy Shader"); 59667ec681f3Smrg desc_copy(pProperties[executable_idx].description, 59677ec681f3Smrg "Extra shader stage that loads the GS output ringbuffer into the rasterizer"); 59687ec681f3Smrg 59697ec681f3Smrg ++executable_idx; 59707ec681f3Smrg } 59717ec681f3Smrg } 59727ec681f3Smrg 59737ec681f3Smrg VkResult result = *pExecutableCount < total_count ? VK_INCOMPLETE : VK_SUCCESS; 59747ec681f3Smrg *pExecutableCount = count; 59757ec681f3Smrg return result; 597601e04c3fSmrg} 597701e04c3fSmrg 597801e04c3fSmrgVkResult 59797ec681f3Smrgradv_GetPipelineExecutableStatisticsKHR(VkDevice _device, 59807ec681f3Smrg const VkPipelineExecutableInfoKHR *pExecutableInfo, 59817ec681f3Smrg uint32_t *pStatisticCount, 59827ec681f3Smrg VkPipelineExecutableStatisticKHR *pStatistics) 59837ec681f3Smrg{ 59847ec681f3Smrg RADV_FROM_HANDLE(radv_device, device, _device); 59857ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline); 59867ec681f3Smrg gl_shader_stage stage; 59877ec681f3Smrg struct radv_shader_variant *shader = 59887ec681f3Smrg radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage); 59897ec681f3Smrg 59907ec681f3Smrg enum chip_class chip_class = device->physical_device->rad_info.chip_class; 59917ec681f3Smrg unsigned lds_increment = chip_class >= GFX7 ? 512 : 256; 59927ec681f3Smrg unsigned max_waves = radv_get_max_waves(device, shader, stage); 59937ec681f3Smrg 59947ec681f3Smrg VkPipelineExecutableStatisticKHR *s = pStatistics; 59957ec681f3Smrg VkPipelineExecutableStatisticKHR *end = s + (pStatistics ? *pStatisticCount : 0); 59967ec681f3Smrg VkResult result = VK_SUCCESS; 59977ec681f3Smrg 59987ec681f3Smrg if (s < end) { 59997ec681f3Smrg desc_copy(s->name, "SGPRs"); 60007ec681f3Smrg desc_copy(s->description, "Number of SGPR registers allocated per subgroup"); 60017ec681f3Smrg s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 60027ec681f3Smrg s->value.u64 = shader->config.num_sgprs; 60037ec681f3Smrg } 60047ec681f3Smrg ++s; 60057ec681f3Smrg 60067ec681f3Smrg if (s < end) { 60077ec681f3Smrg desc_copy(s->name, "VGPRs"); 60087ec681f3Smrg desc_copy(s->description, "Number of VGPR registers allocated per subgroup"); 60097ec681f3Smrg s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 60107ec681f3Smrg s->value.u64 = shader->config.num_vgprs; 60117ec681f3Smrg } 60127ec681f3Smrg ++s; 60137ec681f3Smrg 60147ec681f3Smrg if (s < end) { 60157ec681f3Smrg desc_copy(s->name, "Spilled SGPRs"); 60167ec681f3Smrg desc_copy(s->description, "Number of SGPR registers spilled per subgroup"); 60177ec681f3Smrg s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 60187ec681f3Smrg s->value.u64 = shader->config.spilled_sgprs; 60197ec681f3Smrg } 60207ec681f3Smrg ++s; 60217ec681f3Smrg 60227ec681f3Smrg if (s < end) { 60237ec681f3Smrg desc_copy(s->name, "Spilled VGPRs"); 60247ec681f3Smrg desc_copy(s->description, "Number of VGPR registers spilled per subgroup"); 60257ec681f3Smrg s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 60267ec681f3Smrg s->value.u64 = shader->config.spilled_vgprs; 60277ec681f3Smrg } 60287ec681f3Smrg ++s; 60297ec681f3Smrg 60307ec681f3Smrg if (s < end) { 60317ec681f3Smrg desc_copy(s->name, "Code size"); 60327ec681f3Smrg desc_copy(s->description, "Code size in bytes"); 60337ec681f3Smrg s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 60347ec681f3Smrg s->value.u64 = shader->exec_size; 60357ec681f3Smrg } 60367ec681f3Smrg ++s; 60377ec681f3Smrg 60387ec681f3Smrg if (s < end) { 60397ec681f3Smrg desc_copy(s->name, "LDS size"); 60407ec681f3Smrg desc_copy(s->description, "LDS size in bytes per workgroup"); 60417ec681f3Smrg s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 60427ec681f3Smrg s->value.u64 = shader->config.lds_size * lds_increment; 60437ec681f3Smrg } 60447ec681f3Smrg ++s; 60457ec681f3Smrg 60467ec681f3Smrg if (s < end) { 60477ec681f3Smrg desc_copy(s->name, "Scratch size"); 60487ec681f3Smrg desc_copy(s->description, "Private memory in bytes per subgroup"); 60497ec681f3Smrg s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 60507ec681f3Smrg s->value.u64 = shader->config.scratch_bytes_per_wave; 60517ec681f3Smrg } 60527ec681f3Smrg ++s; 60537ec681f3Smrg 60547ec681f3Smrg if (s < end) { 60557ec681f3Smrg desc_copy(s->name, "Subgroups per SIMD"); 60567ec681f3Smrg desc_copy(s->description, "The maximum number of subgroups in flight on a SIMD unit"); 60577ec681f3Smrg s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 60587ec681f3Smrg s->value.u64 = max_waves; 60597ec681f3Smrg } 60607ec681f3Smrg ++s; 60617ec681f3Smrg 60627ec681f3Smrg if (shader->statistics) { 60637ec681f3Smrg for (unsigned i = 0; i < aco_num_statistics; i++) { 60647ec681f3Smrg const struct aco_compiler_statistic_info *info = &aco_statistic_infos[i]; 60657ec681f3Smrg if (s < end) { 60667ec681f3Smrg desc_copy(s->name, info->name); 60677ec681f3Smrg desc_copy(s->description, info->desc); 60687ec681f3Smrg s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 60697ec681f3Smrg s->value.u64 = shader->statistics[i]; 60707ec681f3Smrg } 60717ec681f3Smrg ++s; 60727ec681f3Smrg } 60737ec681f3Smrg } 60747ec681f3Smrg 60757ec681f3Smrg if (!pStatistics) 60767ec681f3Smrg *pStatisticCount = s - pStatistics; 60777ec681f3Smrg else if (s > end) { 60787ec681f3Smrg *pStatisticCount = end - pStatistics; 60797ec681f3Smrg result = VK_INCOMPLETE; 60807ec681f3Smrg } else { 60817ec681f3Smrg *pStatisticCount = s - pStatistics; 60827ec681f3Smrg } 60837ec681f3Smrg 60847ec681f3Smrg return result; 608501e04c3fSmrg} 608601e04c3fSmrg 60877ec681f3Smrgstatic VkResult 60887ec681f3Smrgradv_copy_representation(void *data, size_t *data_size, const char *src) 60897ec681f3Smrg{ 60907ec681f3Smrg size_t total_size = strlen(src) + 1; 609101e04c3fSmrg 60927ec681f3Smrg if (!data) { 60937ec681f3Smrg *data_size = total_size; 60947ec681f3Smrg return VK_SUCCESS; 60957ec681f3Smrg } 60967ec681f3Smrg 60977ec681f3Smrg size_t size = MIN2(total_size, *data_size); 60987ec681f3Smrg 60997ec681f3Smrg memcpy(data, src, size); 61007ec681f3Smrg if (size) 61017ec681f3Smrg *((char *)data + size - 1) = 0; 61027ec681f3Smrg return size < total_size ? VK_INCOMPLETE : VK_SUCCESS; 61037ec681f3Smrg} 61047ec681f3Smrg 61057ec681f3SmrgVkResult 61067ec681f3Smrgradv_GetPipelineExecutableInternalRepresentationsKHR( 61077ec681f3Smrg VkDevice device, const VkPipelineExecutableInfoKHR *pExecutableInfo, 61087ec681f3Smrg uint32_t *pInternalRepresentationCount, 61097ec681f3Smrg VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations) 611001e04c3fSmrg{ 61117ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline); 61127ec681f3Smrg gl_shader_stage stage; 61137ec681f3Smrg struct radv_shader_variant *shader = 61147ec681f3Smrg radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage); 61157ec681f3Smrg 61167ec681f3Smrg VkPipelineExecutableInternalRepresentationKHR *p = pInternalRepresentations; 61177ec681f3Smrg VkPipelineExecutableInternalRepresentationKHR *end = 61187ec681f3Smrg p + (pInternalRepresentations ? *pInternalRepresentationCount : 0); 61197ec681f3Smrg VkResult result = VK_SUCCESS; 61207ec681f3Smrg /* optimized NIR */ 61217ec681f3Smrg if (p < end) { 61227ec681f3Smrg p->isText = true; 61237ec681f3Smrg desc_copy(p->name, "NIR Shader(s)"); 61247ec681f3Smrg desc_copy(p->description, "The optimized NIR shader(s)"); 61257ec681f3Smrg if (radv_copy_representation(p->pData, &p->dataSize, shader->nir_string) != VK_SUCCESS) 61267ec681f3Smrg result = VK_INCOMPLETE; 61277ec681f3Smrg } 61287ec681f3Smrg ++p; 61297ec681f3Smrg 61307ec681f3Smrg /* backend IR */ 61317ec681f3Smrg if (p < end) { 61327ec681f3Smrg p->isText = true; 61337ec681f3Smrg if (radv_use_llvm_for_stage(pipeline->device, stage)) { 61347ec681f3Smrg desc_copy(p->name, "LLVM IR"); 61357ec681f3Smrg desc_copy(p->description, "The LLVM IR after some optimizations"); 61367ec681f3Smrg } else { 61377ec681f3Smrg desc_copy(p->name, "ACO IR"); 61387ec681f3Smrg desc_copy(p->description, "The ACO IR after some optimizations"); 61397ec681f3Smrg } 61407ec681f3Smrg if (radv_copy_representation(p->pData, &p->dataSize, shader->ir_string) != VK_SUCCESS) 61417ec681f3Smrg result = VK_INCOMPLETE; 61427ec681f3Smrg } 61437ec681f3Smrg ++p; 61447ec681f3Smrg 61457ec681f3Smrg /* Disassembler */ 61467ec681f3Smrg if (p < end && shader->disasm_string) { 61477ec681f3Smrg p->isText = true; 61487ec681f3Smrg desc_copy(p->name, "Assembly"); 61497ec681f3Smrg desc_copy(p->description, "Final Assembly"); 61507ec681f3Smrg if (radv_copy_representation(p->pData, &p->dataSize, shader->disasm_string) != VK_SUCCESS) 61517ec681f3Smrg result = VK_INCOMPLETE; 61527ec681f3Smrg } 61537ec681f3Smrg ++p; 61547ec681f3Smrg 61557ec681f3Smrg if (!pInternalRepresentations) 61567ec681f3Smrg *pInternalRepresentationCount = p - pInternalRepresentations; 61577ec681f3Smrg else if (p > end) { 61587ec681f3Smrg result = VK_INCOMPLETE; 61597ec681f3Smrg *pInternalRepresentationCount = end - pInternalRepresentations; 61607ec681f3Smrg } else { 61617ec681f3Smrg *pInternalRepresentationCount = p - pInternalRepresentations; 61627ec681f3Smrg } 61637ec681f3Smrg 61647ec681f3Smrg return result; 616501e04c3fSmrg} 6166