101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2015 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg#include <assert.h>
2501e04c3fSmrg#include <stdbool.h>
2601e04c3fSmrg#include <string.h>
2701e04c3fSmrg#include <unistd.h>
2801e04c3fSmrg#include <fcntl.h>
2901e04c3fSmrg
3001e04c3fSmrg#include "util/mesa-sha1.h"
319f464c52Smaya#include "util/os_time.h"
327ec681f3Smrg#include "common/intel_l3_config.h"
337ec681f3Smrg#include "common/intel_disasm.h"
347ec681f3Smrg#include "common/intel_sample_positions.h"
3501e04c3fSmrg#include "anv_private.h"
3601e04c3fSmrg#include "compiler/brw_nir.h"
377ec681f3Smrg#include "compiler/brw_nir_rt.h"
3801e04c3fSmrg#include "anv_nir.h"
399f464c52Smaya#include "nir/nir_xfb_info.h"
4001e04c3fSmrg#include "spirv/nir_spirv.h"
4101e04c3fSmrg#include "vk_util.h"
4201e04c3fSmrg
4301e04c3fSmrg/* Needed for SWIZZLE macros */
4401e04c3fSmrg#include "program/prog_instruction.h"
4501e04c3fSmrg
4601e04c3fSmrg// Shader functions
477ec681f3Smrg#define SPIR_V_MAGIC_NUMBER 0x07230203
4801e04c3fSmrg
497ec681f3Smrgstruct anv_spirv_debug_data {
507ec681f3Smrg   struct anv_device *device;
517ec681f3Smrg   const struct vk_shader_module *module;
527ec681f3Smrg};
5301e04c3fSmrg
547ec681f3Smrgstatic void anv_spirv_nir_debug(void *private_data,
557ec681f3Smrg                                enum nir_spirv_debug_level level,
567ec681f3Smrg                                size_t spirv_offset,
577ec681f3Smrg                                const char *message)
5801e04c3fSmrg{
597ec681f3Smrg   struct anv_spirv_debug_data *debug_data = private_data;
607ec681f3Smrg
617ec681f3Smrg   switch (level) {
627ec681f3Smrg   case NIR_SPIRV_DEBUG_LEVEL_INFO:
637ec681f3Smrg      vk_logi(VK_LOG_OBJS(&debug_data->module->base),
647ec681f3Smrg              "SPIR-V offset %lu: %s",
657ec681f3Smrg              (unsigned long) spirv_offset, message);
667ec681f3Smrg      break;
677ec681f3Smrg   case NIR_SPIRV_DEBUG_LEVEL_WARNING:
687ec681f3Smrg      vk_logw(VK_LOG_OBJS(&debug_data->module->base),
697ec681f3Smrg              "SPIR-V offset %lu: %s",
707ec681f3Smrg              (unsigned long) spirv_offset, message);
717ec681f3Smrg      break;
727ec681f3Smrg   case NIR_SPIRV_DEBUG_LEVEL_ERROR:
737ec681f3Smrg      vk_loge(VK_LOG_OBJS(&debug_data->module->base),
747ec681f3Smrg              "SPIR-V offset %lu: %s",
757ec681f3Smrg              (unsigned long) spirv_offset, message);
767ec681f3Smrg      break;
777ec681f3Smrg   default:
787ec681f3Smrg      break;
797ec681f3Smrg   }
8001e04c3fSmrg}
8101e04c3fSmrg
8201e04c3fSmrg/* Eventually, this will become part of anv_CreateShader.  Unfortunately,
8301e04c3fSmrg * we can't do that yet because we don't have the ability to copy nir.
8401e04c3fSmrg */
8501e04c3fSmrgstatic nir_shader *
869f464c52Smayaanv_shader_compile_to_nir(struct anv_device *device,
8701e04c3fSmrg                          void *mem_ctx,
887ec681f3Smrg                          const struct vk_shader_module *module,
8901e04c3fSmrg                          const char *entrypoint_name,
9001e04c3fSmrg                          gl_shader_stage stage,
9101e04c3fSmrg                          const VkSpecializationInfo *spec_info)
9201e04c3fSmrg{
937ec681f3Smrg   const struct anv_physical_device *pdevice = device->physical;
949f464c52Smaya   const struct brw_compiler *compiler = pdevice->compiler;
9501e04c3fSmrg   const nir_shader_compiler_options *nir_options =
9601e04c3fSmrg      compiler->glsl_compiler_options[stage].NirOptions;
9701e04c3fSmrg
9801e04c3fSmrg   uint32_t *spirv = (uint32_t *) module->data;
9901e04c3fSmrg   assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
10001e04c3fSmrg   assert(module->size % 4 == 0);
10101e04c3fSmrg
10201e04c3fSmrg   uint32_t num_spec_entries = 0;
1037ec681f3Smrg   struct nir_spirv_specialization *spec_entries =
1047ec681f3Smrg      vk_spec_info_to_nir_spirv(spec_info, &num_spec_entries);
10501e04c3fSmrg
1067ec681f3Smrg   struct anv_spirv_debug_data spirv_debug_data = {
1077ec681f3Smrg      .device = device,
1087ec681f3Smrg      .module = module,
1097ec681f3Smrg   };
11001e04c3fSmrg   struct spirv_to_nir_options spirv_options = {
11101e04c3fSmrg      .caps = {
1127ec681f3Smrg         .demote_to_helper_invocation = true,
1139f464c52Smaya         .derivative_group = true,
1149f464c52Smaya         .descriptor_array_dynamic_indexing = true,
1159f464c52Smaya         .descriptor_array_non_uniform_indexing = true,
1169f464c52Smaya         .descriptor_indexing = true,
11701e04c3fSmrg         .device_group = true,
11801e04c3fSmrg         .draw_parameters = true,
1197ec681f3Smrg         .float16 = pdevice->info.ver >= 8,
1207ec681f3Smrg         .float32_atomic_add = pdevice->info.has_lsc,
1217ec681f3Smrg         .float32_atomic_min_max = pdevice->info.ver >= 9,
1227ec681f3Smrg         .float64 = pdevice->info.ver >= 8,
1237ec681f3Smrg         .float64_atomic_min_max = pdevice->info.has_lsc,
1247ec681f3Smrg         .fragment_shader_sample_interlock = pdevice->info.ver >= 9,
1257ec681f3Smrg         .fragment_shader_pixel_interlock = pdevice->info.ver >= 9,
1269f464c52Smaya         .geometry_streams = true,
1277ec681f3Smrg         /* When KHR_format_feature_flags2 is enabled, the read/write without
1287ec681f3Smrg          * format is per format, so just report true. It's up to the
1297ec681f3Smrg          * application to check.
1307ec681f3Smrg          */
1317ec681f3Smrg         .image_read_without_format = device->vk.enabled_extensions.KHR_format_feature_flags2,
13201e04c3fSmrg         .image_write_without_format = true,
1337ec681f3Smrg         .int8 = pdevice->info.ver >= 8,
1347ec681f3Smrg         .int16 = pdevice->info.ver >= 8,
1357ec681f3Smrg         .int64 = pdevice->info.ver >= 8,
1367ec681f3Smrg         .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,
1377ec681f3Smrg         .integer_functions2 = pdevice->info.ver >= 8,
1389f464c52Smaya         .min_lod = true,
13901e04c3fSmrg         .multiview = true,
1409f464c52Smaya         .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
1417ec681f3Smrg         .post_depth_coverage = pdevice->info.ver >= 9,
1429f464c52Smaya         .runtime_descriptor_array = true,
1437ec681f3Smrg         .float_controls = pdevice->info.ver >= 8,
1447ec681f3Smrg         .ray_tracing = pdevice->info.has_ray_tracing,
1457ec681f3Smrg         .shader_clock = true,
14601e04c3fSmrg         .shader_viewport_index_layer = true,
1477ec681f3Smrg         .stencil_export = pdevice->info.ver >= 9,
1487ec681f3Smrg         .storage_8bit = pdevice->info.ver >= 8,
1497ec681f3Smrg         .storage_16bit = pdevice->info.ver >= 8,
15001e04c3fSmrg         .subgroup_arithmetic = true,
15101e04c3fSmrg         .subgroup_basic = true,
15201e04c3fSmrg         .subgroup_ballot = true,
1537ec681f3Smrg         .subgroup_dispatch = true,
15401e04c3fSmrg         .subgroup_quad = true,
1557ec681f3Smrg         .subgroup_uniform_control_flow = true,
15601e04c3fSmrg         .subgroup_shuffle = true,
15701e04c3fSmrg         .subgroup_vote = true,
1589f464c52Smaya         .tessellation = true,
1597ec681f3Smrg         .transform_feedback = pdevice->info.ver >= 8,
1609f464c52Smaya         .variable_pointers = true,
1617ec681f3Smrg         .vk_memory_model = true,
1627ec681f3Smrg         .vk_memory_model_device_scope = true,
1637ec681f3Smrg         .workgroup_memory_explicit_layout = true,
1647ec681f3Smrg         .fragment_shading_rate = pdevice->info.ver >= 11,
1657ec681f3Smrg      },
1667ec681f3Smrg      .ubo_addr_format =
1677ec681f3Smrg         anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access),
1687ec681f3Smrg      .ssbo_addr_format =
1697ec681f3Smrg          anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
1707ec681f3Smrg      .phys_ssbo_addr_format = nir_address_format_64bit_global,
1717ec681f3Smrg      .push_const_addr_format = nir_address_format_logical,
1727ec681f3Smrg
1737ec681f3Smrg      /* TODO: Consider changing this to an address format that has the NULL
1747ec681f3Smrg       * pointer equals to 0.  That might be a better format to play nice
1757ec681f3Smrg       * with certain code / code generators.
1767ec681f3Smrg       */
1777ec681f3Smrg      .shared_addr_format = nir_address_format_32bit_offset,
1787ec681f3Smrg      .debug = {
1797ec681f3Smrg         .func = anv_spirv_nir_debug,
1807ec681f3Smrg         .private_data = &spirv_debug_data,
18101e04c3fSmrg      },
18201e04c3fSmrg   };
18301e04c3fSmrg
1849f464c52Smaya
1857ec681f3Smrg   nir_shader *nir =
18601e04c3fSmrg      spirv_to_nir(spirv, module->size / 4,
18701e04c3fSmrg                   spec_entries, num_spec_entries,
18801e04c3fSmrg                   stage, entrypoint_name, &spirv_options, nir_options);
1897ec681f3Smrg   if (!nir) {
1907ec681f3Smrg      free(spec_entries);
1917ec681f3Smrg      return NULL;
1927ec681f3Smrg   }
1937ec681f3Smrg
19401e04c3fSmrg   assert(nir->info.stage == stage);
19501e04c3fSmrg   nir_validate_shader(nir, "after spirv_to_nir");
1967ec681f3Smrg   nir_validate_ssa_dominance(nir, "after spirv_to_nir");
19701e04c3fSmrg   ralloc_steal(mem_ctx, nir);
19801e04c3fSmrg
19901e04c3fSmrg   free(spec_entries);
20001e04c3fSmrg
2017ec681f3Smrg   const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
2027ec681f3Smrg      .point_coord = true,
2037ec681f3Smrg   };
2047ec681f3Smrg   NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
2057ec681f3Smrg
2067ec681f3Smrg   if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) {
20701e04c3fSmrg      fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
20801e04c3fSmrg              gl_shader_stage_name(stage));
20901e04c3fSmrg      nir_print_shader(nir, stderr);
21001e04c3fSmrg   }
21101e04c3fSmrg
21201e04c3fSmrg   /* We have to lower away local constant initializers right before we
21301e04c3fSmrg    * inline functions.  That way they get properly initialized at the top
21401e04c3fSmrg    * of the function and not at the top of its caller.
21501e04c3fSmrg    */
2167ec681f3Smrg   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
21701e04c3fSmrg   NIR_PASS_V(nir, nir_lower_returns);
21801e04c3fSmrg   NIR_PASS_V(nir, nir_inline_functions);
2197ec681f3Smrg   NIR_PASS_V(nir, nir_copy_prop);
2209f464c52Smaya   NIR_PASS_V(nir, nir_opt_deref);
22101e04c3fSmrg
22201e04c3fSmrg   /* Pick off the single entrypoint that we want */
22301e04c3fSmrg   foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
2247ec681f3Smrg      if (!func->is_entrypoint)
22501e04c3fSmrg         exec_node_remove(&func->node);
22601e04c3fSmrg   }
22701e04c3fSmrg   assert(exec_list_length(&nir->functions) == 1);
22801e04c3fSmrg
22901e04c3fSmrg   /* Now that we've deleted all but the main function, we can go ahead and
23001e04c3fSmrg    * lower the rest of the constant initializers.  We do this here so that
23101e04c3fSmrg    * nir_remove_dead_variables and split_per_member_structs below see the
23201e04c3fSmrg    * corresponding stores.
23301e04c3fSmrg    */
2347ec681f3Smrg   NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
23501e04c3fSmrg
23601e04c3fSmrg   /* Split member structs.  We do this before lower_io_to_temporaries so that
23701e04c3fSmrg    * it doesn't lower system values to temporaries by accident.
23801e04c3fSmrg    */
23901e04c3fSmrg   NIR_PASS_V(nir, nir_split_var_copies);
24001e04c3fSmrg   NIR_PASS_V(nir, nir_split_per_member_structs);
24101e04c3fSmrg
24201e04c3fSmrg   NIR_PASS_V(nir, nir_remove_dead_variables,
2437ec681f3Smrg              nir_var_shader_in | nir_var_shader_out | nir_var_system_value |
2447ec681f3Smrg              nir_var_shader_call_data | nir_var_ray_hit_attrib,
2457ec681f3Smrg              NULL);
24601e04c3fSmrg
2477ec681f3Smrg   NIR_PASS_V(nir, nir_propagate_invariant, false);
24801e04c3fSmrg   NIR_PASS_V(nir, nir_lower_io_to_temporaries,
2497ec681f3Smrg              nir_shader_get_entrypoint(nir), true, false);
25001e04c3fSmrg
2519f464c52Smaya   NIR_PASS_V(nir, nir_lower_frexp);
2529f464c52Smaya
25301e04c3fSmrg   /* Vulkan uses the separate-shader linking model */
25401e04c3fSmrg   nir->info.separate_shader = true;
25501e04c3fSmrg
2567ec681f3Smrg   brw_preprocess_nir(compiler, nir, NULL);
25701e04c3fSmrg
25801e04c3fSmrg   return nir;
25901e04c3fSmrg}
26001e04c3fSmrg
2617ec681f3SmrgVkResult
2627ec681f3Smrganv_pipeline_init(struct anv_pipeline *pipeline,
2637ec681f3Smrg                  struct anv_device *device,
2647ec681f3Smrg                  enum anv_pipeline_type type,
2657ec681f3Smrg                  VkPipelineCreateFlags flags,
2667ec681f3Smrg                  const VkAllocationCallbacks *pAllocator)
2677ec681f3Smrg{
2687ec681f3Smrg   VkResult result;
2697ec681f3Smrg
2707ec681f3Smrg   memset(pipeline, 0, sizeof(*pipeline));
2717ec681f3Smrg
2727ec681f3Smrg   vk_object_base_init(&device->vk, &pipeline->base,
2737ec681f3Smrg                       VK_OBJECT_TYPE_PIPELINE);
2747ec681f3Smrg   pipeline->device = device;
2757ec681f3Smrg
2767ec681f3Smrg   /* It's the job of the child class to provide actual backing storage for
2777ec681f3Smrg    * the batch by setting batch.start, batch.next, and batch.end.
2787ec681f3Smrg    */
2797ec681f3Smrg   pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
2807ec681f3Smrg   pipeline->batch.relocs = &pipeline->batch_relocs;
2817ec681f3Smrg   pipeline->batch.status = VK_SUCCESS;
2827ec681f3Smrg
2837ec681f3Smrg   result = anv_reloc_list_init(&pipeline->batch_relocs,
2847ec681f3Smrg                                pipeline->batch.alloc);
2857ec681f3Smrg   if (result != VK_SUCCESS)
2867ec681f3Smrg      return result;
2877ec681f3Smrg
2887ec681f3Smrg   pipeline->mem_ctx = ralloc_context(NULL);
2897ec681f3Smrg
2907ec681f3Smrg   pipeline->type = type;
2917ec681f3Smrg   pipeline->flags = flags;
2927ec681f3Smrg
2937ec681f3Smrg   util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
2947ec681f3Smrg
2957ec681f3Smrg   return VK_SUCCESS;
2967ec681f3Smrg}
2977ec681f3Smrg
2987ec681f3Smrgvoid
2997ec681f3Smrganv_pipeline_finish(struct anv_pipeline *pipeline,
3007ec681f3Smrg                    struct anv_device *device,
3017ec681f3Smrg                    const VkAllocationCallbacks *pAllocator)
3027ec681f3Smrg{
3037ec681f3Smrg   anv_reloc_list_finish(&pipeline->batch_relocs,
3047ec681f3Smrg                         pAllocator ? pAllocator : &device->vk.alloc);
3057ec681f3Smrg   ralloc_free(pipeline->mem_ctx);
3067ec681f3Smrg   vk_object_base_finish(&pipeline->base);
3077ec681f3Smrg}
3087ec681f3Smrg
30901e04c3fSmrgvoid anv_DestroyPipeline(
31001e04c3fSmrg    VkDevice                                    _device,
31101e04c3fSmrg    VkPipeline                                  _pipeline,
31201e04c3fSmrg    const VkAllocationCallbacks*                pAllocator)
31301e04c3fSmrg{
31401e04c3fSmrg   ANV_FROM_HANDLE(anv_device, device, _device);
31501e04c3fSmrg   ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
31601e04c3fSmrg
31701e04c3fSmrg   if (!pipeline)
31801e04c3fSmrg      return;
31901e04c3fSmrg
3207ec681f3Smrg   switch (pipeline->type) {
3217ec681f3Smrg   case ANV_PIPELINE_GRAPHICS: {
3227ec681f3Smrg      struct anv_graphics_pipeline *gfx_pipeline =
3237ec681f3Smrg         anv_pipeline_to_graphics(pipeline);
32401e04c3fSmrg
3257ec681f3Smrg      if (gfx_pipeline->blend_state.map)
3267ec681f3Smrg         anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state);
3277ec681f3Smrg      if (gfx_pipeline->cps_state.map)
3287ec681f3Smrg         anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->cps_state);
3297ec681f3Smrg
3307ec681f3Smrg      for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
3317ec681f3Smrg         if (gfx_pipeline->shaders[s])
3327ec681f3Smrg            anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
3337ec681f3Smrg      }
3347ec681f3Smrg      break;
3357ec681f3Smrg   }
3367ec681f3Smrg
3377ec681f3Smrg   case ANV_PIPELINE_COMPUTE: {
3387ec681f3Smrg      struct anv_compute_pipeline *compute_pipeline =
3397ec681f3Smrg         anv_pipeline_to_compute(pipeline);
3407ec681f3Smrg
3417ec681f3Smrg      if (compute_pipeline->cs)
3427ec681f3Smrg         anv_shader_bin_unref(device, compute_pipeline->cs);
3437ec681f3Smrg
3447ec681f3Smrg      break;
3457ec681f3Smrg   }
3467ec681f3Smrg
3477ec681f3Smrg   case ANV_PIPELINE_RAY_TRACING: {
3487ec681f3Smrg      struct anv_ray_tracing_pipeline *rt_pipeline =
3497ec681f3Smrg         anv_pipeline_to_ray_tracing(pipeline);
3507ec681f3Smrg
3517ec681f3Smrg      util_dynarray_foreach(&rt_pipeline->shaders,
3527ec681f3Smrg                            struct anv_shader_bin *, shader) {
3537ec681f3Smrg         anv_shader_bin_unref(device, *shader);
3547ec681f3Smrg      }
3557ec681f3Smrg      break;
3567ec681f3Smrg   }
3577ec681f3Smrg
3587ec681f3Smrg   default:
3597ec681f3Smrg      unreachable("invalid pipeline type");
36001e04c3fSmrg   }
36101e04c3fSmrg
3627ec681f3Smrg   anv_pipeline_finish(pipeline, device, pAllocator);
3637ec681f3Smrg   vk_free2(&device->vk.alloc, pAllocator, pipeline);
36401e04c3fSmrg}
36501e04c3fSmrg
3667ec681f3Smrgstatic const uint32_t vk_to_intel_primitive_type[] = {
36701e04c3fSmrg   [VK_PRIMITIVE_TOPOLOGY_POINT_LIST]                    = _3DPRIM_POINTLIST,
36801e04c3fSmrg   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST]                     = _3DPRIM_LINELIST,
36901e04c3fSmrg   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP]                    = _3DPRIM_LINESTRIP,
37001e04c3fSmrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST]                 = _3DPRIM_TRILIST,
37101e04c3fSmrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP]                = _3DPRIM_TRISTRIP,
37201e04c3fSmrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN]                  = _3DPRIM_TRIFAN,
37301e04c3fSmrg   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY]      = _3DPRIM_LINELIST_ADJ,
37401e04c3fSmrg   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
37501e04c3fSmrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY]  = _3DPRIM_TRILIST_ADJ,
37601e04c3fSmrg   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
37701e04c3fSmrg};
37801e04c3fSmrg
37901e04c3fSmrgstatic void
3807ec681f3Smrgpopulate_sampler_prog_key(const struct intel_device_info *devinfo,
38101e04c3fSmrg                          struct brw_sampler_prog_key_data *key)
38201e04c3fSmrg{
38301e04c3fSmrg   /* Almost all multisampled textures are compressed.  The only time when we
38401e04c3fSmrg    * don't compress a multisampled texture is for 16x MSAA with a surface
38501e04c3fSmrg    * width greater than 8k which is a bit of an edge case.  Since the sampler
38601e04c3fSmrg    * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
38701e04c3fSmrg    * to tell the compiler to always assume compression.
38801e04c3fSmrg    */
38901e04c3fSmrg   key->compressed_multisample_layout_mask = ~0;
39001e04c3fSmrg
39101e04c3fSmrg   /* SkyLake added support for 16x MSAA.  With this came a new message for
39201e04c3fSmrg    * reading from a 16x MSAA surface with compression.  The new message was
39301e04c3fSmrg    * needed because now the MCS data is 64 bits instead of 32 or lower as is
39401e04c3fSmrg    * the case for 8x, 4x, and 2x.  The key->msaa_16 bit-field controls which
39501e04c3fSmrg    * message we use.  Fortunately, the 16x message works for 8x, 4x, and 2x
39601e04c3fSmrg    * so we can just use it unconditionally.  This may not be quite as
39701e04c3fSmrg    * efficient but it saves us from recompiling.
39801e04c3fSmrg    */
3997ec681f3Smrg   if (devinfo->ver >= 9)
40001e04c3fSmrg      key->msaa_16 = ~0;
40101e04c3fSmrg
40201e04c3fSmrg   /* XXX: Handle texture swizzle on HSW- */
40301e04c3fSmrg   for (int i = 0; i < MAX_SAMPLERS; i++) {
40401e04c3fSmrg      /* Assume color sampler, no swizzling. (Works for BDW+) */
40501e04c3fSmrg      key->swizzles[i] = SWIZZLE_XYZW;
40601e04c3fSmrg   }
40701e04c3fSmrg}
40801e04c3fSmrg
40901e04c3fSmrgstatic void
4107ec681f3Smrgpopulate_base_prog_key(const struct intel_device_info *devinfo,
4117ec681f3Smrg                       enum brw_subgroup_size_type subgroup_size_type,
4127ec681f3Smrg                       bool robust_buffer_acccess,
4137ec681f3Smrg                       struct brw_base_prog_key *key)
4147ec681f3Smrg{
4157ec681f3Smrg   key->subgroup_size_type = subgroup_size_type;
4167ec681f3Smrg   key->robust_buffer_access = robust_buffer_acccess;
4177ec681f3Smrg
4187ec681f3Smrg   populate_sampler_prog_key(devinfo, &key->tex);
4197ec681f3Smrg}
4207ec681f3Smrg
4217ec681f3Smrgstatic void
4227ec681f3Smrgpopulate_vs_prog_key(const struct intel_device_info *devinfo,
4237ec681f3Smrg                     enum brw_subgroup_size_type subgroup_size_type,
4247ec681f3Smrg                     bool robust_buffer_acccess,
42501e04c3fSmrg                     struct brw_vs_prog_key *key)
42601e04c3fSmrg{
42701e04c3fSmrg   memset(key, 0, sizeof(*key));
42801e04c3fSmrg
4297ec681f3Smrg   populate_base_prog_key(devinfo, subgroup_size_type,
4307ec681f3Smrg                          robust_buffer_acccess, &key->base);
43101e04c3fSmrg
43201e04c3fSmrg   /* XXX: Handle vertex input work-arounds */
43301e04c3fSmrg
43401e04c3fSmrg   /* XXX: Handle sampler_prog_key */
43501e04c3fSmrg}
43601e04c3fSmrg
43701e04c3fSmrgstatic void
4387ec681f3Smrgpopulate_tcs_prog_key(const struct intel_device_info *devinfo,
4397ec681f3Smrg                      enum brw_subgroup_size_type subgroup_size_type,
4407ec681f3Smrg                      bool robust_buffer_acccess,
44101e04c3fSmrg                      unsigned input_vertices,
44201e04c3fSmrg                      struct brw_tcs_prog_key *key)
44301e04c3fSmrg{
44401e04c3fSmrg   memset(key, 0, sizeof(*key));
44501e04c3fSmrg
4467ec681f3Smrg   populate_base_prog_key(devinfo, subgroup_size_type,
4477ec681f3Smrg                          robust_buffer_acccess, &key->base);
44801e04c3fSmrg
44901e04c3fSmrg   key->input_vertices = input_vertices;
45001e04c3fSmrg}
45101e04c3fSmrg
45201e04c3fSmrgstatic void
4537ec681f3Smrgpopulate_tes_prog_key(const struct intel_device_info *devinfo,
4547ec681f3Smrg                      enum brw_subgroup_size_type subgroup_size_type,
4557ec681f3Smrg                      bool robust_buffer_acccess,
45601e04c3fSmrg                      struct brw_tes_prog_key *key)
45701e04c3fSmrg{
45801e04c3fSmrg   memset(key, 0, sizeof(*key));
45901e04c3fSmrg
4607ec681f3Smrg   populate_base_prog_key(devinfo, subgroup_size_type,
4617ec681f3Smrg                          robust_buffer_acccess, &key->base);
46201e04c3fSmrg}
46301e04c3fSmrg
46401e04c3fSmrgstatic void
4657ec681f3Smrgpopulate_gs_prog_key(const struct intel_device_info *devinfo,
4667ec681f3Smrg                     enum brw_subgroup_size_type subgroup_size_type,
4677ec681f3Smrg                     bool robust_buffer_acccess,
46801e04c3fSmrg                     struct brw_gs_prog_key *key)
46901e04c3fSmrg{
47001e04c3fSmrg   memset(key, 0, sizeof(*key));
47101e04c3fSmrg
4727ec681f3Smrg   populate_base_prog_key(devinfo, subgroup_size_type,
4737ec681f3Smrg                          robust_buffer_acccess, &key->base);
4747ec681f3Smrg}
4757ec681f3Smrg
4767ec681f3Smrgstatic bool
4777ec681f3Smrgpipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
4787ec681f3Smrg                          const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info)
4797ec681f3Smrg{
4807ec681f3Smrg   if (pipeline->sample_shading_enable)
4817ec681f3Smrg      return false;
4827ec681f3Smrg
4837ec681f3Smrg   /* Not dynamic & not specified for the pipeline. */
4847ec681f3Smrg   if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 && !fsr_info)
4857ec681f3Smrg      return false;
4867ec681f3Smrg
4877ec681f3Smrg   /* Not dynamic & pipeline has a 1x1 fragment shading rate with no
4887ec681f3Smrg    * possibility for element of the pipeline to change the value.
4897ec681f3Smrg    */
4907ec681f3Smrg   if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 &&
4917ec681f3Smrg       fsr_info->fragmentSize.width <= 1 &&
4927ec681f3Smrg       fsr_info->fragmentSize.height <= 1 &&
4937ec681f3Smrg       fsr_info->combinerOps[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
4947ec681f3Smrg       fsr_info->combinerOps[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)
4957ec681f3Smrg      return false;
4967ec681f3Smrg
4977ec681f3Smrg   return true;
49801e04c3fSmrg}
49901e04c3fSmrg
50001e04c3fSmrgstatic void
5017ec681f3Smrgpopulate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
5027ec681f3Smrg                     VkPipelineShaderStageCreateFlags flags,
5037ec681f3Smrg                     bool robust_buffer_acccess,
50401e04c3fSmrg                     const struct anv_subpass *subpass,
50501e04c3fSmrg                     const VkPipelineMultisampleStateCreateInfo *ms_info,
5067ec681f3Smrg                     const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info,
50701e04c3fSmrg                     struct brw_wm_prog_key *key)
50801e04c3fSmrg{
5097ec681f3Smrg   const struct anv_device *device = pipeline->base.device;
5107ec681f3Smrg   const struct intel_device_info *devinfo = &device->info;
5117ec681f3Smrg
51201e04c3fSmrg   memset(key, 0, sizeof(*key));
51301e04c3fSmrg
5147ec681f3Smrg   populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);
51501e04c3fSmrg
51601e04c3fSmrg   /* We set this to 0 here and set to the actual value before we call
51701e04c3fSmrg    * brw_compile_fs.
51801e04c3fSmrg    */
51901e04c3fSmrg   key->input_slots_valid = 0;
52001e04c3fSmrg
52101e04c3fSmrg   /* Vulkan doesn't specify a default */
52201e04c3fSmrg   key->high_quality_derivatives = false;
52301e04c3fSmrg
52401e04c3fSmrg   /* XXX Vulkan doesn't appear to specify */
52501e04c3fSmrg   key->clamp_fragment_color = false;
52601e04c3fSmrg
5277ec681f3Smrg   key->ignore_sample_mask_out = false;
5287ec681f3Smrg
52901e04c3fSmrg   assert(subpass->color_count <= MAX_RTS);
53001e04c3fSmrg   for (uint32_t i = 0; i < subpass->color_count; i++) {
53101e04c3fSmrg      if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
53201e04c3fSmrg         key->color_outputs_valid |= (1 << i);
53301e04c3fSmrg   }
53401e04c3fSmrg
5357ec681f3Smrg   key->nr_color_regions = subpass->color_count;
53601e04c3fSmrg
5379f464c52Smaya   /* To reduce possible shader recompilations we would need to know if
5389f464c52Smaya    * there is a SampleMask output variable to compute if we should emit
5399f464c52Smaya    * code to workaround the issue that hardware disables alpha to coverage
5409f464c52Smaya    * when there is SampleMask output.
5419f464c52Smaya    */
5429f464c52Smaya   key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;
5439f464c52Smaya
5449f464c52Smaya   /* Vulkan doesn't support fixed-function alpha test */
5459f464c52Smaya   key->alpha_test_replicate_alpha = false;
54601e04c3fSmrg
54701e04c3fSmrg   if (ms_info) {
54801e04c3fSmrg      /* We should probably pull this out of the shader, but it's fairly
54901e04c3fSmrg       * harmless to compute it and then let dead-code take care of it.
55001e04c3fSmrg       */
55101e04c3fSmrg      if (ms_info->rasterizationSamples > 1) {
5529f464c52Smaya         key->persample_interp = ms_info->sampleShadingEnable &&
55301e04c3fSmrg            (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
55401e04c3fSmrg         key->multisample_fbo = true;
55501e04c3fSmrg      }
55601e04c3fSmrg
5579f464c52Smaya      key->frag_coord_adds_sample_pos = key->persample_interp;
55801e04c3fSmrg   }
5597ec681f3Smrg
5607ec681f3Smrg   key->coarse_pixel =
5617ec681f3Smrg      device->vk.enabled_extensions.KHR_fragment_shading_rate &&
5627ec681f3Smrg      pipeline_has_coarse_pixel(pipeline, fsr_info);
56301e04c3fSmrg}
56401e04c3fSmrg
56501e04c3fSmrgstatic void
5667ec681f3Smrgpopulate_cs_prog_key(const struct intel_device_info *devinfo,
5677ec681f3Smrg                     enum brw_subgroup_size_type subgroup_size_type,
5687ec681f3Smrg                     bool robust_buffer_acccess,
56901e04c3fSmrg                     struct brw_cs_prog_key *key)
57001e04c3fSmrg{
57101e04c3fSmrg   memset(key, 0, sizeof(*key));
57201e04c3fSmrg
5737ec681f3Smrg   populate_base_prog_key(devinfo, subgroup_size_type,
5747ec681f3Smrg                          robust_buffer_acccess, &key->base);
5757ec681f3Smrg}
5767ec681f3Smrg
5777ec681f3Smrgstatic void
5787ec681f3Smrgpopulate_bs_prog_key(const struct intel_device_info *devinfo,
5797ec681f3Smrg                     VkPipelineShaderStageCreateFlags flags,
5807ec681f3Smrg                     bool robust_buffer_access,
5817ec681f3Smrg                     struct brw_bs_prog_key *key)
5827ec681f3Smrg{
5837ec681f3Smrg   memset(key, 0, sizeof(*key));
5847ec681f3Smrg
5857ec681f3Smrg   populate_base_prog_key(devinfo, flags, robust_buffer_access, &key->base);
58601e04c3fSmrg}
58701e04c3fSmrg
58801e04c3fSmrgstruct anv_pipeline_stage {
58901e04c3fSmrg   gl_shader_stage stage;
59001e04c3fSmrg
5917ec681f3Smrg   const struct vk_shader_module *module;
59201e04c3fSmrg   const char *entrypoint;
59301e04c3fSmrg   const VkSpecializationInfo *spec_info;
59401e04c3fSmrg
5959f464c52Smaya   unsigned char shader_sha1[20];
5969f464c52Smaya
59701e04c3fSmrg   union brw_any_prog_key key;
59801e04c3fSmrg
59901e04c3fSmrg   struct {
60001e04c3fSmrg      gl_shader_stage stage;
60101e04c3fSmrg      unsigned char sha1[20];
60201e04c3fSmrg   } cache_key;
60301e04c3fSmrg
60401e04c3fSmrg   nir_shader *nir;
60501e04c3fSmrg
60601e04c3fSmrg   struct anv_pipeline_binding surface_to_descriptor[256];
60701e04c3fSmrg   struct anv_pipeline_binding sampler_to_descriptor[256];
60801e04c3fSmrg   struct anv_pipeline_bind_map bind_map;
60901e04c3fSmrg
61001e04c3fSmrg   union brw_any_prog_data prog_data;
6119f464c52Smaya
6127ec681f3Smrg   uint32_t num_stats;
6137ec681f3Smrg   struct brw_compile_stats stats[3];
6147ec681f3Smrg   char *disasm[3];
6157ec681f3Smrg
6169f464c52Smaya   VkPipelineCreationFeedbackEXT feedback;
6177ec681f3Smrg
6187ec681f3Smrg   const unsigned *code;
6197ec681f3Smrg
6207ec681f3Smrg   struct anv_shader_bin *bin;
62101e04c3fSmrg};
62201e04c3fSmrg
62301e04c3fSmrgstatic void
6247ec681f3Smrganv_pipeline_hash_shader(const struct vk_shader_module *module,
6259f464c52Smaya                         const char *entrypoint,
6269f464c52Smaya                         gl_shader_stage stage,
6279f464c52Smaya                         const VkSpecializationInfo *spec_info,
6289f464c52Smaya                         unsigned char *sha1_out)
62901e04c3fSmrg{
6309f464c52Smaya   struct mesa_sha1 ctx;
6319f464c52Smaya   _mesa_sha1_init(&ctx);
6329f464c52Smaya
6339f464c52Smaya   _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
6349f464c52Smaya   _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
6359f464c52Smaya   _mesa_sha1_update(&ctx, &stage, sizeof(stage));
6369f464c52Smaya   if (spec_info) {
6379f464c52Smaya      _mesa_sha1_update(&ctx, spec_info->pMapEntries,
6389f464c52Smaya                        spec_info->mapEntryCount *
6399f464c52Smaya                        sizeof(*spec_info->pMapEntries));
6409f464c52Smaya      _mesa_sha1_update(&ctx, spec_info->pData,
6419f464c52Smaya                        spec_info->dataSize);
64201e04c3fSmrg   }
6439f464c52Smaya
6449f464c52Smaya   _mesa_sha1_final(&ctx, sha1_out);
64501e04c3fSmrg}
64601e04c3fSmrg
64701e04c3fSmrgstatic void
6487ec681f3Smrganv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
64901e04c3fSmrg                           struct anv_pipeline_layout *layout,
65001e04c3fSmrg                           struct anv_pipeline_stage *stages,
65101e04c3fSmrg                           unsigned char *sha1_out)
65201e04c3fSmrg{
65301e04c3fSmrg   struct mesa_sha1 ctx;
65401e04c3fSmrg   _mesa_sha1_init(&ctx);
65501e04c3fSmrg
65601e04c3fSmrg   _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
65701e04c3fSmrg                     sizeof(pipeline->subpass->view_mask));
65801e04c3fSmrg
65901e04c3fSmrg   if (layout)
66001e04c3fSmrg      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
66101e04c3fSmrg
6627ec681f3Smrg   const bool rba = pipeline->base.device->robust_buffer_access;
66301e04c3fSmrg   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
66401e04c3fSmrg
6657ec681f3Smrg   for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
6669f464c52Smaya      if (stages[s].entrypoint) {
6679f464c52Smaya         _mesa_sha1_update(&ctx, stages[s].shader_sha1,
6689f464c52Smaya                           sizeof(stages[s].shader_sha1));
6699f464c52Smaya         _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
6709f464c52Smaya      }
67101e04c3fSmrg   }
67201e04c3fSmrg
67301e04c3fSmrg   _mesa_sha1_final(&ctx, sha1_out);
67401e04c3fSmrg}
67501e04c3fSmrg
67601e04c3fSmrgstatic void
6777ec681f3Smrganv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
67801e04c3fSmrg                          struct anv_pipeline_layout *layout,
67901e04c3fSmrg                          struct anv_pipeline_stage *stage,
68001e04c3fSmrg                          unsigned char *sha1_out)
68101e04c3fSmrg{
68201e04c3fSmrg   struct mesa_sha1 ctx;
68301e04c3fSmrg   _mesa_sha1_init(&ctx);
68401e04c3fSmrg
68501e04c3fSmrg   if (layout)
68601e04c3fSmrg      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
68701e04c3fSmrg
6887ec681f3Smrg   const bool rba = pipeline->base.device->robust_buffer_access;
68901e04c3fSmrg   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
69001e04c3fSmrg
6919f464c52Smaya   _mesa_sha1_update(&ctx, stage->shader_sha1,
6929f464c52Smaya                     sizeof(stage->shader_sha1));
6939f464c52Smaya   _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
69401e04c3fSmrg
69501e04c3fSmrg   _mesa_sha1_final(&ctx, sha1_out);
69601e04c3fSmrg}
69701e04c3fSmrg
6987ec681f3Smrgstatic void
6997ec681f3Smrganv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,
7007ec681f3Smrg                                     struct anv_pipeline_layout *layout,
7017ec681f3Smrg                                     struct anv_pipeline_stage *stage,
7027ec681f3Smrg                                     unsigned char *sha1_out)
7037ec681f3Smrg{
7047ec681f3Smrg   struct mesa_sha1 ctx;
7057ec681f3Smrg   _mesa_sha1_init(&ctx);
7067ec681f3Smrg
7077ec681f3Smrg   if (layout != NULL)
7087ec681f3Smrg      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
7097ec681f3Smrg
7107ec681f3Smrg   const bool rba = pipeline->base.device->robust_buffer_access;
7117ec681f3Smrg   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
7127ec681f3Smrg
7137ec681f3Smrg   _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
7147ec681f3Smrg   _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));
7157ec681f3Smrg
7167ec681f3Smrg   _mesa_sha1_final(&ctx, sha1_out);
7177ec681f3Smrg}
7187ec681f3Smrg
7197ec681f3Smrgstatic void
7207ec681f3Smrganv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline,
7217ec681f3Smrg                                              struct anv_pipeline_layout *layout,
7227ec681f3Smrg                                              struct anv_pipeline_stage *intersection,
7237ec681f3Smrg                                              struct anv_pipeline_stage *any_hit,
7247ec681f3Smrg                                              unsigned char *sha1_out)
7257ec681f3Smrg{
7267ec681f3Smrg   struct mesa_sha1 ctx;
7277ec681f3Smrg   _mesa_sha1_init(&ctx);
7287ec681f3Smrg
7297ec681f3Smrg   if (layout != NULL)
7307ec681f3Smrg      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
7317ec681f3Smrg
7327ec681f3Smrg   const bool rba = pipeline->base.device->robust_buffer_access;
7337ec681f3Smrg   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
7347ec681f3Smrg
7357ec681f3Smrg   _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1));
7367ec681f3Smrg   _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs));
7377ec681f3Smrg   _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1));
7387ec681f3Smrg   _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs));
7397ec681f3Smrg
7407ec681f3Smrg   _mesa_sha1_final(&ctx, sha1_out);
7417ec681f3Smrg}
7427ec681f3Smrg
7439f464c52Smayastatic nir_shader *
7449f464c52Smayaanv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
7459f464c52Smaya                           struct anv_pipeline_cache *cache,
7469f464c52Smaya                           void *mem_ctx,
7479f464c52Smaya                           struct anv_pipeline_stage *stage)
7489f464c52Smaya{
7499f464c52Smaya   const struct brw_compiler *compiler =
7507ec681f3Smrg      pipeline->device->physical->compiler;
7519f464c52Smaya   const nir_shader_compiler_options *nir_options =
7529f464c52Smaya      compiler->glsl_compiler_options[stage->stage].NirOptions;
7539f464c52Smaya   nir_shader *nir;
7549f464c52Smaya
7559f464c52Smaya   nir = anv_device_search_for_nir(pipeline->device, cache,
7569f464c52Smaya                                   nir_options,
7579f464c52Smaya                                   stage->shader_sha1,
7589f464c52Smaya                                   mem_ctx);
7599f464c52Smaya   if (nir) {
7609f464c52Smaya      assert(nir->info.stage == stage->stage);
7619f464c52Smaya      return nir;
7629f464c52Smaya   }
7639f464c52Smaya
7649f464c52Smaya   nir = anv_shader_compile_to_nir(pipeline->device,
7659f464c52Smaya                                   mem_ctx,
7669f464c52Smaya                                   stage->module,
7679f464c52Smaya                                   stage->entrypoint,
7689f464c52Smaya                                   stage->stage,
7699f464c52Smaya                                   stage->spec_info);
7709f464c52Smaya   if (nir) {
7719f464c52Smaya      anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
7729f464c52Smaya      return nir;
7739f464c52Smaya   }
7749f464c52Smaya
7759f464c52Smaya   return NULL;
7769f464c52Smaya}
7779f464c52Smaya
7787ec681f3Smrgstatic void
7797ec681f3Smrgshared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
7807ec681f3Smrg{
7817ec681f3Smrg   assert(glsl_type_is_vector_or_scalar(type));
7827ec681f3Smrg
7837ec681f3Smrg   uint32_t comp_size = glsl_type_is_boolean(type)
7847ec681f3Smrg      ? 4 : glsl_get_bit_size(type) / 8;
7857ec681f3Smrg   unsigned length = glsl_get_vector_elements(type);
7867ec681f3Smrg   *size = comp_size * length,
7877ec681f3Smrg   *align = comp_size * (length == 3 ? 4 : length);
7887ec681f3Smrg}
7897ec681f3Smrg
79001e04c3fSmrgstatic void
79101e04c3fSmrganv_pipeline_lower_nir(struct anv_pipeline *pipeline,
79201e04c3fSmrg                       void *mem_ctx,
79301e04c3fSmrg                       struct anv_pipeline_stage *stage,
79401e04c3fSmrg                       struct anv_pipeline_layout *layout)
79501e04c3fSmrg{
7967ec681f3Smrg   const struct anv_physical_device *pdevice = pipeline->device->physical;
7979f464c52Smaya   const struct brw_compiler *compiler = pdevice->compiler;
79801e04c3fSmrg
79901e04c3fSmrg   struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
80001e04c3fSmrg   nir_shader *nir = stage->nir;
80101e04c3fSmrg
8029f464c52Smaya   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
8037ec681f3Smrg      /* Check if sample shading is enabled in the shader and toggle
8047ec681f3Smrg       * it on for the pipeline independent if sampleShadingEnable is set.
8057ec681f3Smrg       */
8067ec681f3Smrg      nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
8077ec681f3Smrg      if (nir->info.fs.uses_sample_shading)
8087ec681f3Smrg         anv_pipeline_to_graphics(pipeline)->sample_shading_enable = true;
8097ec681f3Smrg
8107ec681f3Smrg      NIR_PASS_V(nir, nir_lower_wpos_center,
8117ec681f3Smrg                 anv_pipeline_to_graphics(pipeline)->sample_shading_enable);
8127ec681f3Smrg      NIR_PASS_V(nir, nir_lower_input_attachments,
8137ec681f3Smrg                 &(nir_input_attachment_options) {
8147ec681f3Smrg                     .use_fragcoord_sysval = true,
8157ec681f3Smrg                     .use_layer_id_sysval = true,
8167ec681f3Smrg                 });
8179f464c52Smaya   }
8189f464c52Smaya
81901e04c3fSmrg   NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
82001e04c3fSmrg
8217ec681f3Smrg   if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
8227ec681f3Smrg      NIR_PASS_V(nir, anv_nir_lower_multiview,
8237ec681f3Smrg                 anv_pipeline_to_graphics(pipeline));
8247ec681f3Smrg   }
82501e04c3fSmrg
8267ec681f3Smrg   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
82701e04c3fSmrg
8287ec681f3Smrg   NIR_PASS_V(nir, brw_nir_lower_storage_image, compiler->devinfo);
82901e04c3fSmrg
8307ec681f3Smrg   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
8317ec681f3Smrg              nir_address_format_64bit_global);
8327ec681f3Smrg   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
8337ec681f3Smrg              nir_address_format_32bit_offset);
83401e04c3fSmrg
8357ec681f3Smrg   /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
8367ec681f3Smrg   anv_nir_apply_pipeline_layout(pdevice,
8377ec681f3Smrg                                 pipeline->device->robust_buffer_access,
8387ec681f3Smrg                                 layout, nir, &stage->bind_map);
8397ec681f3Smrg
8407ec681f3Smrg   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
8417ec681f3Smrg              anv_nir_ubo_addr_format(pdevice,
8427ec681f3Smrg                 pipeline->device->robust_buffer_access));
8437ec681f3Smrg   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
8447ec681f3Smrg              anv_nir_ssbo_addr_format(pdevice,
8457ec681f3Smrg                 pipeline->device->robust_buffer_access));
8467ec681f3Smrg
8477ec681f3Smrg   /* First run copy-prop to get rid of all of the vec() that address
8487ec681f3Smrg    * calculations often create and then constant-fold so that, when we
8497ec681f3Smrg    * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
8507ec681f3Smrg    */
8517ec681f3Smrg   NIR_PASS_V(nir, nir_copy_prop);
8527ec681f3Smrg   NIR_PASS_V(nir, nir_opt_constant_folding);
85301e04c3fSmrg
8547ec681f3Smrg   NIR_PASS_V(nir, anv_nir_lower_ubo_loads);
8557ec681f3Smrg
8567ec681f3Smrg   /* We don't support non-uniform UBOs and non-uniform SSBO access is
8577ec681f3Smrg    * handled naturally by falling back to A64 messages.
8587ec681f3Smrg    */
8597ec681f3Smrg   NIR_PASS_V(nir, nir_lower_non_uniform_access,
8607ec681f3Smrg              &(nir_lower_non_uniform_access_options) {
8617ec681f3Smrg                  .types = nir_lower_non_uniform_texture_access |
8627ec681f3Smrg                           nir_lower_non_uniform_image_access,
8637ec681f3Smrg                  .callback = NULL,
8647ec681f3Smrg              });
8657ec681f3Smrg
8667ec681f3Smrg   anv_nir_compute_push_layout(pdevice, pipeline->device->robust_buffer_access,
8677ec681f3Smrg                               nir, prog_data, &stage->bind_map, mem_ctx);
8687ec681f3Smrg
8697ec681f3Smrg   if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
8707ec681f3Smrg      if (!nir->info.shared_memory_explicit_layout) {
8717ec681f3Smrg         NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
8727ec681f3Smrg                    nir_var_mem_shared, shared_type_info);
87301e04c3fSmrg      }
87401e04c3fSmrg
8757ec681f3Smrg      NIR_PASS_V(nir, nir_lower_explicit_io,
8767ec681f3Smrg                 nir_var_mem_shared, nir_address_format_32bit_offset);
87701e04c3fSmrg
8787ec681f3Smrg      if (nir->info.zero_initialize_shared_memory &&
8797ec681f3Smrg          nir->info.shared_size > 0) {
8807ec681f3Smrg         /* The effective Shared Local Memory size is at least 1024 bytes and
8817ec681f3Smrg          * is always rounded to a power of two, so it is OK to align the size
8827ec681f3Smrg          * used by the shader to chunk_size -- which does simplify the logic.
8837ec681f3Smrg          */
8847ec681f3Smrg         const unsigned chunk_size = 16;
8857ec681f3Smrg         const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
8867ec681f3Smrg         assert(shared_size <=
8877ec681f3Smrg                intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
88801e04c3fSmrg
8897ec681f3Smrg         NIR_PASS_V(nir, nir_zero_initialize_shared_memory,
8907ec681f3Smrg                    shared_size, chunk_size);
8917ec681f3Smrg      }
89201e04c3fSmrg   }
89301e04c3fSmrg
89401e04c3fSmrg   stage->nir = nir;
89501e04c3fSmrg}
89601e04c3fSmrg
89701e04c3fSmrgstatic void
89801e04c3fSmrganv_pipeline_link_vs(const struct brw_compiler *compiler,
89901e04c3fSmrg                     struct anv_pipeline_stage *vs_stage,
90001e04c3fSmrg                     struct anv_pipeline_stage *next_stage)
90101e04c3fSmrg{
90201e04c3fSmrg   if (next_stage)
9037ec681f3Smrg      brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
90401e04c3fSmrg}
90501e04c3fSmrg
9067ec681f3Smrgstatic void
90701e04c3fSmrganv_pipeline_compile_vs(const struct brw_compiler *compiler,
90801e04c3fSmrg                        void *mem_ctx,
9097ec681f3Smrg                        struct anv_graphics_pipeline *pipeline,
91001e04c3fSmrg                        struct anv_pipeline_stage *vs_stage)
91101e04c3fSmrg{
9127ec681f3Smrg   /* When using Primitive Replication for multiview, each view gets its own
9137ec681f3Smrg    * position slot.
9147ec681f3Smrg    */
9157ec681f3Smrg   uint32_t pos_slots = pipeline->use_primitive_replication ?
9167ec681f3Smrg      anv_subpass_view_count(pipeline->subpass) : 1;
9177ec681f3Smrg
91801e04c3fSmrg   brw_compute_vue_map(compiler->devinfo,
91901e04c3fSmrg                       &vs_stage->prog_data.vs.base.vue_map,
92001e04c3fSmrg                       vs_stage->nir->info.outputs_written,
9217ec681f3Smrg                       vs_stage->nir->info.separate_shader,
9227ec681f3Smrg                       pos_slots);
9237ec681f3Smrg
9247ec681f3Smrg   vs_stage->num_stats = 1;
9257ec681f3Smrg
9267ec681f3Smrg   struct brw_compile_vs_params params = {
9277ec681f3Smrg      .nir = vs_stage->nir,
9287ec681f3Smrg      .key = &vs_stage->key.vs,
9297ec681f3Smrg      .prog_data = &vs_stage->prog_data.vs,
9307ec681f3Smrg      .stats = vs_stage->stats,
9317ec681f3Smrg      .log_data = pipeline->base.device,
9327ec681f3Smrg   };
93301e04c3fSmrg
9347ec681f3Smrg   vs_stage->code = brw_compile_vs(compiler, mem_ctx, &params);
93501e04c3fSmrg}
93601e04c3fSmrg
93701e04c3fSmrgstatic void
93801e04c3fSmrgmerge_tess_info(struct shader_info *tes_info,
93901e04c3fSmrg                const struct shader_info *tcs_info)
94001e04c3fSmrg{
94101e04c3fSmrg   /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
94201e04c3fSmrg    *
94301e04c3fSmrg    *    "PointMode. Controls generation of points rather than triangles
94401e04c3fSmrg    *     or lines. This functionality defaults to disabled, and is
94501e04c3fSmrg    *     enabled if either shader stage includes the execution mode.
94601e04c3fSmrg    *
94701e04c3fSmrg    * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
94801e04c3fSmrg    * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
94901e04c3fSmrg    * and OutputVertices, it says:
95001e04c3fSmrg    *
95101e04c3fSmrg    *    "One mode must be set in at least one of the tessellation
95201e04c3fSmrg    *     shader stages."
95301e04c3fSmrg    *
95401e04c3fSmrg    * So, the fields can be set in either the TCS or TES, but they must
95501e04c3fSmrg    * agree if set in both.  Our backend looks at TES, so bitwise-or in
95601e04c3fSmrg    * the values from the TCS.
95701e04c3fSmrg    */
95801e04c3fSmrg   assert(tcs_info->tess.tcs_vertices_out == 0 ||
95901e04c3fSmrg          tes_info->tess.tcs_vertices_out == 0 ||
96001e04c3fSmrg          tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
96101e04c3fSmrg   tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
96201e04c3fSmrg
96301e04c3fSmrg   assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
96401e04c3fSmrg          tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
96501e04c3fSmrg          tcs_info->tess.spacing == tes_info->tess.spacing);
96601e04c3fSmrg   tes_info->tess.spacing |= tcs_info->tess.spacing;
96701e04c3fSmrg
96801e04c3fSmrg   assert(tcs_info->tess.primitive_mode == 0 ||
96901e04c3fSmrg          tes_info->tess.primitive_mode == 0 ||
97001e04c3fSmrg          tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
97101e04c3fSmrg   tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
97201e04c3fSmrg   tes_info->tess.ccw |= tcs_info->tess.ccw;
97301e04c3fSmrg   tes_info->tess.point_mode |= tcs_info->tess.point_mode;
97401e04c3fSmrg}
97501e04c3fSmrg
97601e04c3fSmrgstatic void
97701e04c3fSmrganv_pipeline_link_tcs(const struct brw_compiler *compiler,
97801e04c3fSmrg                      struct anv_pipeline_stage *tcs_stage,
97901e04c3fSmrg                      struct anv_pipeline_stage *tes_stage)
98001e04c3fSmrg{
98101e04c3fSmrg   assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
98201e04c3fSmrg
9837ec681f3Smrg   brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
98401e04c3fSmrg
98501e04c3fSmrg   nir_lower_patch_vertices(tes_stage->nir,
98601e04c3fSmrg                            tcs_stage->nir->info.tess.tcs_vertices_out,
98701e04c3fSmrg                            NULL);
98801e04c3fSmrg
98901e04c3fSmrg   /* Copy TCS info into the TES info */
99001e04c3fSmrg   merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
99101e04c3fSmrg
99201e04c3fSmrg   /* Whacking the key after cache lookup is a bit sketchy, but all of
99301e04c3fSmrg    * this comes from the SPIR-V, which is part of the hash used for the
99401e04c3fSmrg    * pipeline cache.  So it should be safe.
99501e04c3fSmrg    */
99601e04c3fSmrg   tcs_stage->key.tcs.tes_primitive_mode =
99701e04c3fSmrg      tes_stage->nir->info.tess.primitive_mode;
99801e04c3fSmrg   tcs_stage->key.tcs.quads_workaround =
9997ec681f3Smrg      compiler->devinfo->ver < 9 &&
100001e04c3fSmrg      tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&
100101e04c3fSmrg      tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
100201e04c3fSmrg}
100301e04c3fSmrg
10047ec681f3Smrgstatic void
100501e04c3fSmrganv_pipeline_compile_tcs(const struct brw_compiler *compiler,
100601e04c3fSmrg                         void *mem_ctx,
10079f464c52Smaya                         struct anv_device *device,
100801e04c3fSmrg                         struct anv_pipeline_stage *tcs_stage,
100901e04c3fSmrg                         struct anv_pipeline_stage *prev_stage)
101001e04c3fSmrg{
101101e04c3fSmrg   tcs_stage->key.tcs.outputs_written =
101201e04c3fSmrg      tcs_stage->nir->info.outputs_written;
101301e04c3fSmrg   tcs_stage->key.tcs.patch_outputs_written =
101401e04c3fSmrg      tcs_stage->nir->info.patch_outputs_written;
101501e04c3fSmrg
10167ec681f3Smrg   tcs_stage->num_stats = 1;
10177ec681f3Smrg   tcs_stage->code = brw_compile_tcs(compiler, device, mem_ctx,
10187ec681f3Smrg                                     &tcs_stage->key.tcs,
10197ec681f3Smrg                                     &tcs_stage->prog_data.tcs,
10207ec681f3Smrg                                     tcs_stage->nir, -1,
10217ec681f3Smrg                                     tcs_stage->stats, NULL);
102201e04c3fSmrg}
102301e04c3fSmrg
102401e04c3fSmrgstatic void
102501e04c3fSmrganv_pipeline_link_tes(const struct brw_compiler *compiler,
102601e04c3fSmrg                      struct anv_pipeline_stage *tes_stage,
102701e04c3fSmrg                      struct anv_pipeline_stage *next_stage)
102801e04c3fSmrg{
102901e04c3fSmrg   if (next_stage)
10307ec681f3Smrg      brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
103101e04c3fSmrg}
103201e04c3fSmrg
10337ec681f3Smrgstatic void
103401e04c3fSmrganv_pipeline_compile_tes(const struct brw_compiler *compiler,
103501e04c3fSmrg                         void *mem_ctx,
10369f464c52Smaya                         struct anv_device *device,
103701e04c3fSmrg                         struct anv_pipeline_stage *tes_stage,
103801e04c3fSmrg                         struct anv_pipeline_stage *tcs_stage)
103901e04c3fSmrg{
104001e04c3fSmrg   tes_stage->key.tes.inputs_read =
104101e04c3fSmrg      tcs_stage->nir->info.outputs_written;
104201e04c3fSmrg   tes_stage->key.tes.patch_inputs_read =
104301e04c3fSmrg      tcs_stage->nir->info.patch_outputs_written;
104401e04c3fSmrg
10457ec681f3Smrg   tes_stage->num_stats = 1;
10467ec681f3Smrg   tes_stage->code = brw_compile_tes(compiler, device, mem_ctx,
10477ec681f3Smrg                                     &tes_stage->key.tes,
10487ec681f3Smrg                                     &tcs_stage->prog_data.tcs.base.vue_map,
10497ec681f3Smrg                                     &tes_stage->prog_data.tes,
10507ec681f3Smrg                                     tes_stage->nir, -1,
10517ec681f3Smrg                                     tes_stage->stats, NULL);
105201e04c3fSmrg}
105301e04c3fSmrg
105401e04c3fSmrgstatic void
105501e04c3fSmrganv_pipeline_link_gs(const struct brw_compiler *compiler,
105601e04c3fSmrg                     struct anv_pipeline_stage *gs_stage,
105701e04c3fSmrg                     struct anv_pipeline_stage *next_stage)
105801e04c3fSmrg{
105901e04c3fSmrg   if (next_stage)
10607ec681f3Smrg      brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
106101e04c3fSmrg}
106201e04c3fSmrg
10637ec681f3Smrgstatic void
106401e04c3fSmrganv_pipeline_compile_gs(const struct brw_compiler *compiler,
106501e04c3fSmrg                        void *mem_ctx,
10669f464c52Smaya                        struct anv_device *device,
106701e04c3fSmrg                        struct anv_pipeline_stage *gs_stage,
106801e04c3fSmrg                        struct anv_pipeline_stage *prev_stage)
106901e04c3fSmrg{
107001e04c3fSmrg   brw_compute_vue_map(compiler->devinfo,
107101e04c3fSmrg                       &gs_stage->prog_data.gs.base.vue_map,
107201e04c3fSmrg                       gs_stage->nir->info.outputs_written,
10737ec681f3Smrg                       gs_stage->nir->info.separate_shader, 1);
10747ec681f3Smrg
10757ec681f3Smrg   gs_stage->num_stats = 1;
10767ec681f3Smrg   gs_stage->code = brw_compile_gs(compiler, device, mem_ctx,
10777ec681f3Smrg                                   &gs_stage->key.gs,
10787ec681f3Smrg                                   &gs_stage->prog_data.gs,
10797ec681f3Smrg                                   gs_stage->nir, -1,
10807ec681f3Smrg                                   gs_stage->stats, NULL);
108101e04c3fSmrg}
108201e04c3fSmrg
108301e04c3fSmrgstatic void
108401e04c3fSmrganv_pipeline_link_fs(const struct brw_compiler *compiler,
108501e04c3fSmrg                     struct anv_pipeline_stage *stage)
108601e04c3fSmrg{
10877ec681f3Smrg   unsigned num_rt_bindings;
10887ec681f3Smrg   struct anv_pipeline_binding rt_bindings[MAX_RTS];
10897ec681f3Smrg   if (stage->key.wm.nr_color_regions > 0) {
10907ec681f3Smrg      assert(stage->key.wm.nr_color_regions <= MAX_RTS);
10917ec681f3Smrg      for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
10927ec681f3Smrg         if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
10937ec681f3Smrg            rt_bindings[rt] = (struct anv_pipeline_binding) {
10947ec681f3Smrg               .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
10957ec681f3Smrg               .index = rt,
10967ec681f3Smrg            };
10977ec681f3Smrg         } else {
10987ec681f3Smrg            /* Setup a null render target */
10997ec681f3Smrg            rt_bindings[rt] = (struct anv_pipeline_binding) {
11007ec681f3Smrg               .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
11017ec681f3Smrg               .index = UINT32_MAX,
11027ec681f3Smrg            };
11037ec681f3Smrg         }
11049f464c52Smaya      }
11057ec681f3Smrg      num_rt_bindings = stage->key.wm.nr_color_regions;
11067ec681f3Smrg   } else {
11077ec681f3Smrg      /* Setup a null render target */
11087ec681f3Smrg      rt_bindings[0] = (struct anv_pipeline_binding) {
11097ec681f3Smrg         .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
11107ec681f3Smrg         .index = UINT32_MAX,
11117ec681f3Smrg      };
11127ec681f3Smrg      num_rt_bindings = 1;
111301e04c3fSmrg   }
111401e04c3fSmrg
11157ec681f3Smrg   assert(num_rt_bindings <= MAX_RTS);
11167ec681f3Smrg   assert(stage->bind_map.surface_count == 0);
11177ec681f3Smrg   typed_memcpy(stage->bind_map.surface_to_descriptor,
11187ec681f3Smrg                rt_bindings, num_rt_bindings);
11197ec681f3Smrg   stage->bind_map.surface_count += num_rt_bindings;
112001e04c3fSmrg
11217ec681f3Smrg   /* Now that we've set up the color attachments, we can go through and
11227ec681f3Smrg    * eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the
11237ec681f3Smrg    * hopes that dead code can clean them up in this and any earlier shader
11247ec681f3Smrg    * stages.
11257ec681f3Smrg    */
11267ec681f3Smrg   nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
112701e04c3fSmrg   bool deleted_output = false;
11287ec681f3Smrg   nir_foreach_shader_out_variable_safe(var, stage->nir) {
11297ec681f3Smrg      /* TODO: We don't delete depth/stencil writes.  We probably could if the
11307ec681f3Smrg       * subpass doesn't have a depth/stencil attachment.
11317ec681f3Smrg       */
113201e04c3fSmrg      if (var->data.location < FRAG_RESULT_DATA0)
113301e04c3fSmrg         continue;
113401e04c3fSmrg
113501e04c3fSmrg      const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
11369f464c52Smaya
11377ec681f3Smrg      /* If this is the RT at location 0 and we have alpha to coverage
11387ec681f3Smrg       * enabled we still need that write because it will affect the coverage
11397ec681f3Smrg       * mask even if it's never written to a color target.
11407ec681f3Smrg       */
11417ec681f3Smrg      if (rt == 0 && stage->key.wm.alpha_to_coverage)
11427ec681f3Smrg         continue;
11437ec681f3Smrg
11447ec681f3Smrg      const unsigned array_len =
11457ec681f3Smrg         glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
11467ec681f3Smrg      assert(rt + array_len <= MAX_RTS);
11477ec681f3Smrg
11487ec681f3Smrg      if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid &
11497ec681f3Smrg                             BITFIELD_RANGE(rt, array_len))) {
115001e04c3fSmrg         deleted_output = true;
11519f464c52Smaya         var->data.mode = nir_var_function_temp;
115201e04c3fSmrg         exec_node_remove(&var->node);
115301e04c3fSmrg         exec_list_push_tail(&impl->locals, &var->node);
115401e04c3fSmrg      }
115501e04c3fSmrg   }
115601e04c3fSmrg
115701e04c3fSmrg   if (deleted_output)
115801e04c3fSmrg      nir_fixup_deref_modes(stage->nir);
115901e04c3fSmrg
11607ec681f3Smrg   /* Initially the valid outputs value is based off the renderpass color
11617ec681f3Smrg    * attachments (see populate_wm_prog_key()), now that we've potentially
11627ec681f3Smrg    * deleted variables that map to unused attachments, we need to update the
11637ec681f3Smrg    * valid outputs for the backend compiler based on what output variables
11647ec681f3Smrg    * are actually used. */
11657ec681f3Smrg   stage->key.wm.color_outputs_valid = 0;
11667ec681f3Smrg   nir_foreach_shader_out_variable_safe(var, stage->nir) {
11677ec681f3Smrg      if (var->data.location < FRAG_RESULT_DATA0)
11687ec681f3Smrg         continue;
11697ec681f3Smrg
11707ec681f3Smrg      const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
11717ec681f3Smrg      const unsigned array_len =
11727ec681f3Smrg         glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
11737ec681f3Smrg      assert(rt + array_len <= MAX_RTS);
11747ec681f3Smrg
11757ec681f3Smrg      stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
117601e04c3fSmrg   }
117701e04c3fSmrg
11787ec681f3Smrg   /* We stored the number of subpass color attachments in nr_color_regions
11797ec681f3Smrg    * when calculating the key for caching.  Now that we've computed the bind
11807ec681f3Smrg    * map, we can reduce this to the actual max before we go into the back-end
11817ec681f3Smrg    * compiler.
118201e04c3fSmrg    */
11837ec681f3Smrg   stage->key.wm.nr_color_regions =
11847ec681f3Smrg      util_last_bit(stage->key.wm.color_outputs_valid);
118501e04c3fSmrg}
118601e04c3fSmrg
11877ec681f3Smrgstatic void
118801e04c3fSmrganv_pipeline_compile_fs(const struct brw_compiler *compiler,
118901e04c3fSmrg                        void *mem_ctx,
11909f464c52Smaya                        struct anv_device *device,
119101e04c3fSmrg                        struct anv_pipeline_stage *fs_stage,
119201e04c3fSmrg                        struct anv_pipeline_stage *prev_stage)
119301e04c3fSmrg{
119401e04c3fSmrg   /* TODO: we could set this to 0 based on the information in nir_shader, but
119501e04c3fSmrg    * we need this before we call spirv_to_nir.
119601e04c3fSmrg    */
119701e04c3fSmrg   assert(prev_stage);
119801e04c3fSmrg   fs_stage->key.wm.input_slots_valid =
119901e04c3fSmrg      prev_stage->prog_data.vue.vue_map.slots_valid;
120001e04c3fSmrg
12017ec681f3Smrg   struct brw_compile_fs_params params = {
12027ec681f3Smrg      .nir = fs_stage->nir,
12037ec681f3Smrg      .key = &fs_stage->key.wm,
12047ec681f3Smrg      .prog_data = &fs_stage->prog_data.wm,
12057ec681f3Smrg
12067ec681f3Smrg      .allow_spilling = true,
12077ec681f3Smrg      .stats = fs_stage->stats,
12087ec681f3Smrg      .log_data = device,
12097ec681f3Smrg   };
12107ec681f3Smrg
12117ec681f3Smrg   fs_stage->code = brw_compile_fs(compiler, mem_ctx, &params);
12127ec681f3Smrg
12137ec681f3Smrg   fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
12147ec681f3Smrg                         (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
12157ec681f3Smrg                         (uint32_t)fs_stage->prog_data.wm.dispatch_32;
121601e04c3fSmrg
12177ec681f3Smrg   if (fs_stage->key.wm.color_outputs_valid == 0 &&
121801e04c3fSmrg       !fs_stage->prog_data.wm.has_side_effects &&
12197ec681f3Smrg       !fs_stage->prog_data.wm.uses_omask &&
12207ec681f3Smrg       !fs_stage->key.wm.alpha_to_coverage &&
122101e04c3fSmrg       !fs_stage->prog_data.wm.uses_kill &&
122201e04c3fSmrg       fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&
122301e04c3fSmrg       !fs_stage->prog_data.wm.computed_stencil) {
122401e04c3fSmrg      /* This fragment shader has no outputs and no side effects.  Go ahead
122501e04c3fSmrg       * and return the code pointer so we don't accidentally think the
122601e04c3fSmrg       * compile failed but zero out prog_data which will set program_size to
122701e04c3fSmrg       * zero and disable the stage.
122801e04c3fSmrg       */
122901e04c3fSmrg      memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));
123001e04c3fSmrg   }
123101e04c3fSmrg}
123201e04c3fSmrg
12337ec681f3Smrgstatic void
12347ec681f3Smrganv_pipeline_add_executable(struct anv_pipeline *pipeline,
12357ec681f3Smrg                            struct anv_pipeline_stage *stage,
12367ec681f3Smrg                            struct brw_compile_stats *stats,
12377ec681f3Smrg                            uint32_t code_offset)
123801e04c3fSmrg{
12397ec681f3Smrg   char *nir = NULL;
12407ec681f3Smrg   if (stage->nir &&
12417ec681f3Smrg       (pipeline->flags &
12427ec681f3Smrg        VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
12437ec681f3Smrg      nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);
12447ec681f3Smrg   }
12459f464c52Smaya
12467ec681f3Smrg   char *disasm = NULL;
12477ec681f3Smrg   if (stage->code &&
12487ec681f3Smrg       (pipeline->flags &
12497ec681f3Smrg        VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
12507ec681f3Smrg      char *stream_data = NULL;
12517ec681f3Smrg      size_t stream_size = 0;
12527ec681f3Smrg      FILE *stream = open_memstream(&stream_data, &stream_size);
12537ec681f3Smrg
12547ec681f3Smrg      uint32_t push_size = 0;
12557ec681f3Smrg      for (unsigned i = 0; i < 4; i++)
12567ec681f3Smrg         push_size += stage->bind_map.push_ranges[i].length;
12577ec681f3Smrg      if (push_size > 0) {
12587ec681f3Smrg         fprintf(stream, "Push constant ranges:\n");
12597ec681f3Smrg         for (unsigned i = 0; i < 4; i++) {
12607ec681f3Smrg            if (stage->bind_map.push_ranges[i].length == 0)
12617ec681f3Smrg               continue;
12627ec681f3Smrg
12637ec681f3Smrg            fprintf(stream, "    RANGE%d (%dB): ", i,
12647ec681f3Smrg                    stage->bind_map.push_ranges[i].length * 32);
12657ec681f3Smrg
12667ec681f3Smrg            switch (stage->bind_map.push_ranges[i].set) {
12677ec681f3Smrg            case ANV_DESCRIPTOR_SET_NULL:
12687ec681f3Smrg               fprintf(stream, "NULL");
12697ec681f3Smrg               break;
12707ec681f3Smrg
12717ec681f3Smrg            case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
12727ec681f3Smrg               fprintf(stream, "Vulkan push constants and API params");
12737ec681f3Smrg               break;
12747ec681f3Smrg
12757ec681f3Smrg            case ANV_DESCRIPTOR_SET_DESCRIPTORS:
12767ec681f3Smrg               fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
12777ec681f3Smrg                       stage->bind_map.push_ranges[i].index,
12787ec681f3Smrg                       stage->bind_map.push_ranges[i].start * 32);
12797ec681f3Smrg               break;
12807ec681f3Smrg
12817ec681f3Smrg            case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
12827ec681f3Smrg               unreachable("gl_NumWorkgroups is never pushed");
12837ec681f3Smrg
12847ec681f3Smrg            case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
12857ec681f3Smrg               fprintf(stream, "Inline shader constant data (start=%dB)",
12867ec681f3Smrg                       stage->bind_map.push_ranges[i].start * 32);
12877ec681f3Smrg               break;
12887ec681f3Smrg
12897ec681f3Smrg            case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
12907ec681f3Smrg               unreachable("Color attachments can't be pushed");
12917ec681f3Smrg
12927ec681f3Smrg            default:
12937ec681f3Smrg               fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
12947ec681f3Smrg                       stage->bind_map.push_ranges[i].set,
12957ec681f3Smrg                       stage->bind_map.push_ranges[i].index,
12967ec681f3Smrg                       stage->bind_map.push_ranges[i].start * 32);
12977ec681f3Smrg               break;
12987ec681f3Smrg            }
12997ec681f3Smrg            fprintf(stream, "\n");
13007ec681f3Smrg         }
13017ec681f3Smrg         fprintf(stream, "\n");
13027ec681f3Smrg      }
130301e04c3fSmrg
13047ec681f3Smrg      /* Creating this is far cheaper than it looks.  It's perfectly fine to
13057ec681f3Smrg       * do it for every binary.
13067ec681f3Smrg       */
13077ec681f3Smrg      intel_disassemble(&pipeline->device->info,
13087ec681f3Smrg                        stage->code, code_offset, stream);
130901e04c3fSmrg
13107ec681f3Smrg      fclose(stream);
131101e04c3fSmrg
13127ec681f3Smrg      /* Copy it to a ralloc'd thing */
13137ec681f3Smrg      disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
13147ec681f3Smrg      memcpy(disasm, stream_data, stream_size);
13157ec681f3Smrg      disasm[stream_size] = 0;
131601e04c3fSmrg
13177ec681f3Smrg      free(stream_data);
13187ec681f3Smrg   }
13199f464c52Smaya
13207ec681f3Smrg   const struct anv_pipeline_executable exe = {
13217ec681f3Smrg      .stage = stage->stage,
13227ec681f3Smrg      .stats = *stats,
13237ec681f3Smrg      .nir = nir,
13247ec681f3Smrg      .disasm = disasm,
13257ec681f3Smrg   };
13267ec681f3Smrg   util_dynarray_append(&pipeline->executables,
13277ec681f3Smrg                        struct anv_pipeline_executable, exe);
13287ec681f3Smrg}
13297ec681f3Smrg
13307ec681f3Smrgstatic void
13317ec681f3Smrganv_pipeline_add_executables(struct anv_pipeline *pipeline,
13327ec681f3Smrg                             struct anv_pipeline_stage *stage,
13337ec681f3Smrg                             struct anv_shader_bin *bin)
13347ec681f3Smrg{
13357ec681f3Smrg   if (stage->stage == MESA_SHADER_FRAGMENT) {
13367ec681f3Smrg      /* We pull the prog data and stats out of the anv_shader_bin because
13377ec681f3Smrg       * the anv_pipeline_stage may not be fully populated if we successfully
13387ec681f3Smrg       * looked up the shader in a cache.
13397ec681f3Smrg       */
13407ec681f3Smrg      const struct brw_wm_prog_data *wm_prog_data =
13417ec681f3Smrg         (const struct brw_wm_prog_data *)bin->prog_data;
13427ec681f3Smrg      struct brw_compile_stats *stats = bin->stats;
13437ec681f3Smrg
13447ec681f3Smrg      if (wm_prog_data->dispatch_8) {
13457ec681f3Smrg         anv_pipeline_add_executable(pipeline, stage, stats++, 0);
13467ec681f3Smrg      }
13477ec681f3Smrg
13487ec681f3Smrg      if (wm_prog_data->dispatch_16) {
13497ec681f3Smrg         anv_pipeline_add_executable(pipeline, stage, stats++,
13507ec681f3Smrg                                     wm_prog_data->prog_offset_16);
13517ec681f3Smrg      }
13527ec681f3Smrg
13537ec681f3Smrg      if (wm_prog_data->dispatch_32) {
13547ec681f3Smrg         anv_pipeline_add_executable(pipeline, stage, stats++,
13557ec681f3Smrg                                     wm_prog_data->prog_offset_32);
13567ec681f3Smrg      }
13577ec681f3Smrg   } else {
13587ec681f3Smrg      anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
13597ec681f3Smrg   }
13607ec681f3Smrg}
13617ec681f3Smrg
13627ec681f3Smrgstatic enum brw_subgroup_size_type
13637ec681f3Smrganv_subgroup_size_type(gl_shader_stage stage,
13647ec681f3Smrg                       VkPipelineShaderStageCreateFlags flags,
13657ec681f3Smrg                       const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info)
13667ec681f3Smrg{
13677ec681f3Smrg   enum brw_subgroup_size_type subgroup_size_type;
13687ec681f3Smrg
13697ec681f3Smrg   if (rss_info) {
13707ec681f3Smrg      assert(stage == MESA_SHADER_COMPUTE);
13717ec681f3Smrg      /* These enum values are expressly chosen to be equal to the subgroup
13727ec681f3Smrg       * size that they require.
13737ec681f3Smrg       */
13747ec681f3Smrg      assert(rss_info->requiredSubgroupSize == 8 ||
13757ec681f3Smrg             rss_info->requiredSubgroupSize == 16 ||
13767ec681f3Smrg             rss_info->requiredSubgroupSize == 32);
13777ec681f3Smrg      subgroup_size_type = rss_info->requiredSubgroupSize;
13787ec681f3Smrg   } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) {
13797ec681f3Smrg      subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING;
13807ec681f3Smrg   } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
13817ec681f3Smrg      assert(stage == MESA_SHADER_COMPUTE);
13827ec681f3Smrg      /* If the client expressly requests full subgroups and they don't
13837ec681f3Smrg       * specify a subgroup size neither allow varying subgroups, we need to
13847ec681f3Smrg       * pick one.  So we specify the API value of 32.  Performance will
13857ec681f3Smrg       * likely be terrible in this case but there's nothing we can do about
13867ec681f3Smrg       * that.  The client should have chosen a size.
13877ec681f3Smrg       */
13887ec681f3Smrg      subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
13897ec681f3Smrg   } else {
13907ec681f3Smrg      subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
13917ec681f3Smrg   }
13927ec681f3Smrg
13937ec681f3Smrg   return subgroup_size_type;
13947ec681f3Smrg}
13957ec681f3Smrg
13967ec681f3Smrgstatic void
13977ec681f3Smrganv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
13987ec681f3Smrg{
13997ec681f3Smrg   /* TODO: Cache this pipeline-wide information. */
14007ec681f3Smrg
14017ec681f3Smrg   if (anv_pipeline_is_primitive(pipeline)) {
14027ec681f3Smrg      /* Primitive replication depends on information from all the shaders.
14037ec681f3Smrg       * Recover this bit from the fact that we have more than one position slot
14047ec681f3Smrg       * in the vertex shader when using it.
14057ec681f3Smrg       */
14067ec681f3Smrg      assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
14077ec681f3Smrg      int pos_slots = 0;
14087ec681f3Smrg      const struct brw_vue_prog_data *vue_prog_data =
14097ec681f3Smrg         (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;
14107ec681f3Smrg      const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
14117ec681f3Smrg      for (int i = 0; i < vue_map->num_slots; i++) {
14127ec681f3Smrg         if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)
14137ec681f3Smrg            pos_slots++;
14147ec681f3Smrg      }
14157ec681f3Smrg      pipeline->use_primitive_replication = pos_slots > 1;
14167ec681f3Smrg   }
14177ec681f3Smrg}
14187ec681f3Smrg
14197ec681f3Smrgstatic VkResult
14207ec681f3Smrganv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
14217ec681f3Smrg                              struct anv_pipeline_cache *cache,
14227ec681f3Smrg                              const VkGraphicsPipelineCreateInfo *info)
14237ec681f3Smrg{
14247ec681f3Smrg   VkPipelineCreationFeedbackEXT pipeline_feedback = {
14257ec681f3Smrg      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
14267ec681f3Smrg   };
14277ec681f3Smrg   int64_t pipeline_start = os_time_get_nano();
14287ec681f3Smrg
14297ec681f3Smrg   const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
14307ec681f3Smrg   struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
14317ec681f3Smrg
14327ec681f3Smrg   /* Information on which states are considered dynamic. */
14337ec681f3Smrg   const VkPipelineDynamicStateCreateInfo *dyn_info =
14347ec681f3Smrg      info->pDynamicState;
14357ec681f3Smrg   uint32_t dynamic_states = 0;
14367ec681f3Smrg   if (dyn_info) {
14377ec681f3Smrg      for (unsigned i = 0; i < dyn_info->dynamicStateCount; i++)
14387ec681f3Smrg         dynamic_states |=
14397ec681f3Smrg            anv_cmd_dirty_bit_for_vk_dynamic_state(dyn_info->pDynamicStates[i]);
14407ec681f3Smrg   }
14417ec681f3Smrg
14427ec681f3Smrg   VkResult result;
14437ec681f3Smrg   for (uint32_t i = 0; i < info->stageCount; i++) {
14447ec681f3Smrg      const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
14457ec681f3Smrg      gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
14467ec681f3Smrg
14477ec681f3Smrg      int64_t stage_start = os_time_get_nano();
14487ec681f3Smrg
14497ec681f3Smrg      stages[stage].stage = stage;
14507ec681f3Smrg      stages[stage].module = vk_shader_module_from_handle(sinfo->module);
14517ec681f3Smrg      stages[stage].entrypoint = sinfo->pName;
145201e04c3fSmrg      stages[stage].spec_info = sinfo->pSpecializationInfo;
14539f464c52Smaya      anv_pipeline_hash_shader(stages[stage].module,
14549f464c52Smaya                               stages[stage].entrypoint,
14559f464c52Smaya                               stage,
14569f464c52Smaya                               stages[stage].spec_info,
14579f464c52Smaya                               stages[stage].shader_sha1);
145801e04c3fSmrg
14597ec681f3Smrg      enum brw_subgroup_size_type subgroup_size_type =
14607ec681f3Smrg         anv_subgroup_size_type(stage, sinfo->flags, NULL);
14617ec681f3Smrg
14627ec681f3Smrg      const struct intel_device_info *devinfo = &pipeline->base.device->info;
146301e04c3fSmrg      switch (stage) {
146401e04c3fSmrg      case MESA_SHADER_VERTEX:
14657ec681f3Smrg         populate_vs_prog_key(devinfo, subgroup_size_type,
14667ec681f3Smrg                              pipeline->base.device->robust_buffer_access,
14677ec681f3Smrg                              &stages[stage].key.vs);
146801e04c3fSmrg         break;
146901e04c3fSmrg      case MESA_SHADER_TESS_CTRL:
14707ec681f3Smrg         populate_tcs_prog_key(devinfo, subgroup_size_type,
14717ec681f3Smrg                               pipeline->base.device->robust_buffer_access,
147201e04c3fSmrg                               info->pTessellationState->patchControlPoints,
147301e04c3fSmrg                               &stages[stage].key.tcs);
147401e04c3fSmrg         break;
147501e04c3fSmrg      case MESA_SHADER_TESS_EVAL:
14767ec681f3Smrg         populate_tes_prog_key(devinfo, subgroup_size_type,
14777ec681f3Smrg                               pipeline->base.device->robust_buffer_access,
14787ec681f3Smrg                               &stages[stage].key.tes);
147901e04c3fSmrg         break;
148001e04c3fSmrg      case MESA_SHADER_GEOMETRY:
14817ec681f3Smrg         populate_gs_prog_key(devinfo, subgroup_size_type,
14827ec681f3Smrg                              pipeline->base.device->robust_buffer_access,
14837ec681f3Smrg                              &stages[stage].key.gs);
148401e04c3fSmrg         break;
14857ec681f3Smrg      case MESA_SHADER_FRAGMENT: {
14867ec681f3Smrg         const bool raster_enabled =
14877ec681f3Smrg            !info->pRasterizationState->rasterizerDiscardEnable ||
14887ec681f3Smrg            dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
14897ec681f3Smrg         populate_wm_prog_key(pipeline, subgroup_size_type,
14907ec681f3Smrg                              pipeline->base.device->robust_buffer_access,
14917ec681f3Smrg                              pipeline->subpass,
14927ec681f3Smrg                              raster_enabled ? info->pMultisampleState : NULL,
14937ec681f3Smrg                              vk_find_struct_const(info->pNext,
14947ec681f3Smrg                                                   PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR),
149501e04c3fSmrg                              &stages[stage].key.wm);
149601e04c3fSmrg         break;
14977ec681f3Smrg      }
149801e04c3fSmrg      default:
149901e04c3fSmrg         unreachable("Invalid graphics shader stage");
150001e04c3fSmrg      }
15019f464c52Smaya
15029f464c52Smaya      stages[stage].feedback.duration += os_time_get_nano() - stage_start;
15039f464c52Smaya      stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
150401e04c3fSmrg   }
150501e04c3fSmrg
150601e04c3fSmrg   assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
150701e04c3fSmrg
150801e04c3fSmrg   ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
150901e04c3fSmrg
151001e04c3fSmrg   unsigned char sha1[20];
151101e04c3fSmrg   anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
151201e04c3fSmrg
15137ec681f3Smrg   for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
151401e04c3fSmrg      if (!stages[s].entrypoint)
151501e04c3fSmrg         continue;
151601e04c3fSmrg
151701e04c3fSmrg      stages[s].cache_key.stage = s;
151801e04c3fSmrg      memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
15197ec681f3Smrg   }
152001e04c3fSmrg
15217ec681f3Smrg   const bool skip_cache_lookup =
15227ec681f3Smrg      (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
15239f464c52Smaya
15247ec681f3Smrg   if (!skip_cache_lookup) {
15257ec681f3Smrg      unsigned found = 0;
15267ec681f3Smrg      unsigned cache_hits = 0;
15277ec681f3Smrg      for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
15287ec681f3Smrg         if (!stages[s].entrypoint)
15297ec681f3Smrg            continue;
153001e04c3fSmrg
15317ec681f3Smrg         int64_t stage_start = os_time_get_nano();
15327ec681f3Smrg
15337ec681f3Smrg         bool cache_hit;
15347ec681f3Smrg         struct anv_shader_bin *bin =
15357ec681f3Smrg            anv_device_search_for_kernel(pipeline->base.device, cache,
15367ec681f3Smrg                                         &stages[s].cache_key,
15377ec681f3Smrg                                         sizeof(stages[s].cache_key), &cache_hit);
15387ec681f3Smrg         if (bin) {
15397ec681f3Smrg            found++;
15407ec681f3Smrg            pipeline->shaders[s] = bin;
15417ec681f3Smrg         }
15427ec681f3Smrg
15437ec681f3Smrg         if (cache_hit) {
15447ec681f3Smrg            cache_hits++;
15457ec681f3Smrg            stages[s].feedback.flags |=
15467ec681f3Smrg               VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
15477ec681f3Smrg         }
15487ec681f3Smrg         stages[s].feedback.duration += os_time_get_nano() - stage_start;
15499f464c52Smaya      }
15507ec681f3Smrg
15517ec681f3Smrg      if (found == __builtin_popcount(pipeline->active_stages)) {
15527ec681f3Smrg         if (cache_hits == found) {
15537ec681f3Smrg            pipeline_feedback.flags |=
15547ec681f3Smrg               VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
15557ec681f3Smrg         }
15567ec681f3Smrg         /* We found all our shaders in the cache.  We're done. */
15577ec681f3Smrg         for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
15587ec681f3Smrg            if (!stages[s].entrypoint)
15597ec681f3Smrg               continue;
15607ec681f3Smrg
15617ec681f3Smrg            anv_pipeline_add_executables(&pipeline->base, &stages[s],
15627ec681f3Smrg                                         pipeline->shaders[s]);
15637ec681f3Smrg         }
15647ec681f3Smrg         anv_pipeline_init_from_cached_graphics(pipeline);
15657ec681f3Smrg         goto done;
15667ec681f3Smrg      } else if (found > 0) {
15677ec681f3Smrg         /* We found some but not all of our shaders.  This shouldn't happen
15687ec681f3Smrg          * most of the time but it can if we have a partially populated
15697ec681f3Smrg          * pipeline cache.
15707ec681f3Smrg          */
15717ec681f3Smrg         assert(found < __builtin_popcount(pipeline->active_stages));
15727ec681f3Smrg
15737ec681f3Smrg         vk_perf(VK_LOG_OBJS(&cache->base),
15747ec681f3Smrg                 "Found a partial pipeline in the cache.  This is "
15757ec681f3Smrg                 "most likely caused by an incomplete pipeline cache "
15767ec681f3Smrg                 "import or export");
15777ec681f3Smrg
15787ec681f3Smrg         /* We're going to have to recompile anyway, so just throw away our
15797ec681f3Smrg          * references to the shaders in the cache.  We'll get them out of the
15807ec681f3Smrg          * cache again as part of the compilation process.
15817ec681f3Smrg          */
15827ec681f3Smrg         for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
15837ec681f3Smrg            stages[s].feedback.flags = 0;
15847ec681f3Smrg            if (pipeline->shaders[s]) {
15857ec681f3Smrg               anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
15867ec681f3Smrg               pipeline->shaders[s] = NULL;
15877ec681f3Smrg            }
158801e04c3fSmrg         }
158901e04c3fSmrg      }
159001e04c3fSmrg   }
159101e04c3fSmrg
15927ec681f3Smrg   if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
15937ec681f3Smrg      return VK_PIPELINE_COMPILE_REQUIRED_EXT;
15947ec681f3Smrg
159501e04c3fSmrg   void *pipeline_ctx = ralloc_context(NULL);
159601e04c3fSmrg
15977ec681f3Smrg   for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
159801e04c3fSmrg      if (!stages[s].entrypoint)
159901e04c3fSmrg         continue;
160001e04c3fSmrg
16019f464c52Smaya      int64_t stage_start = os_time_get_nano();
16029f464c52Smaya
160301e04c3fSmrg      assert(stages[s].stage == s);
160401e04c3fSmrg      assert(pipeline->shaders[s] == NULL);
160501e04c3fSmrg
160601e04c3fSmrg      stages[s].bind_map = (struct anv_pipeline_bind_map) {
160701e04c3fSmrg         .surface_to_descriptor = stages[s].surface_to_descriptor,
160801e04c3fSmrg         .sampler_to_descriptor = stages[s].sampler_to_descriptor
160901e04c3fSmrg      };
161001e04c3fSmrg
16117ec681f3Smrg      stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
16129f464c52Smaya                                                 pipeline_ctx,
16139f464c52Smaya                                                 &stages[s]);
161401e04c3fSmrg      if (stages[s].nir == NULL) {
16157ec681f3Smrg         result = vk_error(pipeline, VK_ERROR_UNKNOWN);
161601e04c3fSmrg         goto fail;
161701e04c3fSmrg      }
16189f464c52Smaya
16197ec681f3Smrg      /* This is rather ugly.
16207ec681f3Smrg       *
16217ec681f3Smrg       * Any variable annotated as interpolated by sample essentially disables
16227ec681f3Smrg       * coarse pixel shading. Unfortunately the CTS tests exercising this set
16237ec681f3Smrg       * the varying value in the previous stage using a constant. Our NIR
16247ec681f3Smrg       * infrastructure is clever enough to lookup variables across stages and
16257ec681f3Smrg       * constant fold, removing the variable. So in order to comply with CTS
16267ec681f3Smrg       * we have check variables here.
16277ec681f3Smrg       */
16287ec681f3Smrg      if (s == MESA_SHADER_FRAGMENT) {
16297ec681f3Smrg         nir_foreach_variable_in_list(var, &stages[s].nir->variables) {
16307ec681f3Smrg            if (var->data.sample) {
16317ec681f3Smrg               stages[s].key.wm.coarse_pixel = false;
16327ec681f3Smrg               break;
16337ec681f3Smrg            }
16347ec681f3Smrg         }
16357ec681f3Smrg      }
16367ec681f3Smrg
16379f464c52Smaya      stages[s].feedback.duration += os_time_get_nano() - stage_start;
163801e04c3fSmrg   }
163901e04c3fSmrg
164001e04c3fSmrg   /* Walk backwards to link */
164101e04c3fSmrg   struct anv_pipeline_stage *next_stage = NULL;
16427ec681f3Smrg   for (int s = ARRAY_SIZE(pipeline->shaders) - 1; s >= 0; s--) {
164301e04c3fSmrg      if (!stages[s].entrypoint)
164401e04c3fSmrg         continue;
164501e04c3fSmrg
164601e04c3fSmrg      switch (s) {
164701e04c3fSmrg      case MESA_SHADER_VERTEX:
164801e04c3fSmrg         anv_pipeline_link_vs(compiler, &stages[s], next_stage);
164901e04c3fSmrg         break;
165001e04c3fSmrg      case MESA_SHADER_TESS_CTRL:
165101e04c3fSmrg         anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
165201e04c3fSmrg         break;
165301e04c3fSmrg      case MESA_SHADER_TESS_EVAL:
165401e04c3fSmrg         anv_pipeline_link_tes(compiler, &stages[s], next_stage);
165501e04c3fSmrg         break;
165601e04c3fSmrg      case MESA_SHADER_GEOMETRY:
165701e04c3fSmrg         anv_pipeline_link_gs(compiler, &stages[s], next_stage);
165801e04c3fSmrg         break;
165901e04c3fSmrg      case MESA_SHADER_FRAGMENT:
166001e04c3fSmrg         anv_pipeline_link_fs(compiler, &stages[s]);
166101e04c3fSmrg         break;
166201e04c3fSmrg      default:
166301e04c3fSmrg         unreachable("Invalid graphics shader stage");
166401e04c3fSmrg      }
166501e04c3fSmrg
166601e04c3fSmrg      next_stage = &stages[s];
166701e04c3fSmrg   }
166801e04c3fSmrg
16697ec681f3Smrg   if (pipeline->base.device->info.ver >= 12 &&
16707ec681f3Smrg       pipeline->subpass->view_mask != 0) {
16717ec681f3Smrg      /* For some pipelines HW Primitive Replication can be used instead of
16727ec681f3Smrg       * instancing to implement Multiview.  This depend on how viewIndex is
16737ec681f3Smrg       * used in all the active shaders, so this check can't be done per
16747ec681f3Smrg       * individual shaders.
16757ec681f3Smrg       */
16767ec681f3Smrg      nir_shader *shaders[MESA_SHADER_STAGES] = {};
16777ec681f3Smrg      for (unsigned s = 0; s < MESA_SHADER_STAGES; s++)
16787ec681f3Smrg         shaders[s] = stages[s].nir;
16797ec681f3Smrg
16807ec681f3Smrg      pipeline->use_primitive_replication =
16817ec681f3Smrg         anv_check_for_primitive_replication(shaders, pipeline);
16827ec681f3Smrg   } else {
16837ec681f3Smrg      pipeline->use_primitive_replication = false;
16847ec681f3Smrg   }
16857ec681f3Smrg
168601e04c3fSmrg   struct anv_pipeline_stage *prev_stage = NULL;
16877ec681f3Smrg   for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
16887ec681f3Smrg      if (!stages[s].entrypoint)
16897ec681f3Smrg         continue;
16907ec681f3Smrg
16917ec681f3Smrg      int64_t stage_start = os_time_get_nano();
16927ec681f3Smrg
16937ec681f3Smrg      void *stage_ctx = ralloc_context(NULL);
16947ec681f3Smrg
16957ec681f3Smrg      anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
16967ec681f3Smrg
16977ec681f3Smrg      if (prev_stage && compiler->glsl_compiler_options[s].NirOptions->unify_interfaces) {
16987ec681f3Smrg         prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &
16997ec681f3Smrg                  ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
17007ec681f3Smrg         stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written &
17017ec681f3Smrg                  ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
17027ec681f3Smrg         prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read;
17037ec681f3Smrg         stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written;
17047ec681f3Smrg      }
17057ec681f3Smrg
17067ec681f3Smrg      ralloc_free(stage_ctx);
17077ec681f3Smrg
17087ec681f3Smrg      stages[s].feedback.duration += os_time_get_nano() - stage_start;
17097ec681f3Smrg
17107ec681f3Smrg      prev_stage = &stages[s];
17117ec681f3Smrg   }
17127ec681f3Smrg
17137ec681f3Smrg   prev_stage = NULL;
171401e04c3fSmrg   for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
171501e04c3fSmrg      if (!stages[s].entrypoint)
171601e04c3fSmrg         continue;
171701e04c3fSmrg
17189f464c52Smaya      int64_t stage_start = os_time_get_nano();
17199f464c52Smaya
172001e04c3fSmrg      void *stage_ctx = ralloc_context(NULL);
172101e04c3fSmrg
17229f464c52Smaya      nir_xfb_info *xfb_info = NULL;
17239f464c52Smaya      if (s == MESA_SHADER_VERTEX ||
17249f464c52Smaya          s == MESA_SHADER_TESS_EVAL ||
17259f464c52Smaya          s == MESA_SHADER_GEOMETRY)
17269f464c52Smaya         xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx);
17279f464c52Smaya
172801e04c3fSmrg      switch (s) {
172901e04c3fSmrg      case MESA_SHADER_VERTEX:
17307ec681f3Smrg         anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
17317ec681f3Smrg                                 &stages[s]);
173201e04c3fSmrg         break;
173301e04c3fSmrg      case MESA_SHADER_TESS_CTRL:
17347ec681f3Smrg         anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
17357ec681f3Smrg                                  &stages[s], prev_stage);
173601e04c3fSmrg         break;
173701e04c3fSmrg      case MESA_SHADER_TESS_EVAL:
17387ec681f3Smrg         anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
17397ec681f3Smrg                                  &stages[s], prev_stage);
174001e04c3fSmrg         break;
174101e04c3fSmrg      case MESA_SHADER_GEOMETRY:
17427ec681f3Smrg         anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
17437ec681f3Smrg                                 &stages[s], prev_stage);
174401e04c3fSmrg         break;
174501e04c3fSmrg      case MESA_SHADER_FRAGMENT:
17467ec681f3Smrg         anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
17477ec681f3Smrg                                 &stages[s], prev_stage);
174801e04c3fSmrg         break;
174901e04c3fSmrg      default:
175001e04c3fSmrg         unreachable("Invalid graphics shader stage");
175101e04c3fSmrg      }
17527ec681f3Smrg      if (stages[s].code == NULL) {
175301e04c3fSmrg         ralloc_free(stage_ctx);
17547ec681f3Smrg         result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
175501e04c3fSmrg         goto fail;
175601e04c3fSmrg      }
175701e04c3fSmrg
17587ec681f3Smrg      anv_nir_validate_push_layout(&stages[s].prog_data.base,
17597ec681f3Smrg                                   &stages[s].bind_map);
17607ec681f3Smrg
176101e04c3fSmrg      struct anv_shader_bin *bin =
17627ec681f3Smrg         anv_device_upload_kernel(pipeline->base.device, cache, s,
176301e04c3fSmrg                                  &stages[s].cache_key,
176401e04c3fSmrg                                  sizeof(stages[s].cache_key),
17657ec681f3Smrg                                  stages[s].code,
17667ec681f3Smrg                                  stages[s].prog_data.base.program_size,
176701e04c3fSmrg                                  &stages[s].prog_data.base,
176801e04c3fSmrg                                  brw_prog_data_size(s),
17697ec681f3Smrg                                  stages[s].stats, stages[s].num_stats,
17709f464c52Smaya                                  xfb_info, &stages[s].bind_map);
177101e04c3fSmrg      if (!bin) {
177201e04c3fSmrg         ralloc_free(stage_ctx);
17737ec681f3Smrg         result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
177401e04c3fSmrg         goto fail;
177501e04c3fSmrg      }
177601e04c3fSmrg
17777ec681f3Smrg      anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
17787ec681f3Smrg
177901e04c3fSmrg      pipeline->shaders[s] = bin;
178001e04c3fSmrg      ralloc_free(stage_ctx);
178101e04c3fSmrg
17829f464c52Smaya      stages[s].feedback.duration += os_time_get_nano() - stage_start;
17839f464c52Smaya
178401e04c3fSmrg      prev_stage = &stages[s];
178501e04c3fSmrg   }
178601e04c3fSmrg
178701e04c3fSmrg   ralloc_free(pipeline_ctx);
178801e04c3fSmrg
178901e04c3fSmrgdone:
179001e04c3fSmrg
179101e04c3fSmrg   if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&
179201e04c3fSmrg       pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {
179301e04c3fSmrg      /* This can happen if we decided to implicitly disable the fragment
179401e04c3fSmrg       * shader.  See anv_pipeline_compile_fs().
179501e04c3fSmrg       */
17967ec681f3Smrg      anv_shader_bin_unref(pipeline->base.device,
179701e04c3fSmrg                           pipeline->shaders[MESA_SHADER_FRAGMENT]);
179801e04c3fSmrg      pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
179901e04c3fSmrg      pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
180001e04c3fSmrg   }
180101e04c3fSmrg
18029f464c52Smaya   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
18039f464c52Smaya
18049f464c52Smaya   const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
18059f464c52Smaya      vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
18069f464c52Smaya   if (create_feedback) {
18079f464c52Smaya      *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
18089f464c52Smaya
18099f464c52Smaya      assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
18109f464c52Smaya      for (uint32_t i = 0; i < info->stageCount; i++) {
18119f464c52Smaya         gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
18129f464c52Smaya         create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
18139f464c52Smaya      }
18149f464c52Smaya   }
18159f464c52Smaya
181601e04c3fSmrg   return VK_SUCCESS;
181701e04c3fSmrg
181801e04c3fSmrgfail:
181901e04c3fSmrg   ralloc_free(pipeline_ctx);
182001e04c3fSmrg
18217ec681f3Smrg   for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
182201e04c3fSmrg      if (pipeline->shaders[s])
18237ec681f3Smrg         anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
182401e04c3fSmrg   }
182501e04c3fSmrg
182601e04c3fSmrg   return result;
182701e04c3fSmrg}
182801e04c3fSmrg
182901e04c3fSmrgVkResult
18307ec681f3Smrganv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
183101e04c3fSmrg                        struct anv_pipeline_cache *cache,
183201e04c3fSmrg                        const VkComputePipelineCreateInfo *info,
18337ec681f3Smrg                        const struct vk_shader_module *module,
183401e04c3fSmrg                        const char *entrypoint,
183501e04c3fSmrg                        const VkSpecializationInfo *spec_info)
183601e04c3fSmrg{
18379f464c52Smaya   VkPipelineCreationFeedbackEXT pipeline_feedback = {
18389f464c52Smaya      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
18399f464c52Smaya   };
18409f464c52Smaya   int64_t pipeline_start = os_time_get_nano();
18419f464c52Smaya
18427ec681f3Smrg   const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
184301e04c3fSmrg
184401e04c3fSmrg   struct anv_pipeline_stage stage = {
184501e04c3fSmrg      .stage = MESA_SHADER_COMPUTE,
184601e04c3fSmrg      .module = module,
184701e04c3fSmrg      .entrypoint = entrypoint,
184801e04c3fSmrg      .spec_info = spec_info,
184901e04c3fSmrg      .cache_key = {
185001e04c3fSmrg         .stage = MESA_SHADER_COMPUTE,
18519f464c52Smaya      },
18529f464c52Smaya      .feedback = {
18539f464c52Smaya         .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
18549f464c52Smaya      },
185501e04c3fSmrg   };
18569f464c52Smaya   anv_pipeline_hash_shader(stage.module,
18579f464c52Smaya                            stage.entrypoint,
18589f464c52Smaya                            MESA_SHADER_COMPUTE,
18599f464c52Smaya                            stage.spec_info,
18609f464c52Smaya                            stage.shader_sha1);
186101e04c3fSmrg
186201e04c3fSmrg   struct anv_shader_bin *bin = NULL;
186301e04c3fSmrg
18647ec681f3Smrg   const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =
18657ec681f3Smrg      vk_find_struct_const(info->stage.pNext,
18667ec681f3Smrg                           PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
18677ec681f3Smrg
18687ec681f3Smrg   const enum brw_subgroup_size_type subgroup_size_type =
18697ec681f3Smrg      anv_subgroup_size_type(MESA_SHADER_COMPUTE, info->stage.flags, rss_info);
18707ec681f3Smrg
18717ec681f3Smrg   populate_cs_prog_key(&pipeline->base.device->info, subgroup_size_type,
18727ec681f3Smrg                        pipeline->base.device->robust_buffer_access,
18737ec681f3Smrg                        &stage.key.cs);
187401e04c3fSmrg
187501e04c3fSmrg   ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
187601e04c3fSmrg
18777ec681f3Smrg   const bool skip_cache_lookup =
18787ec681f3Smrg      (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
18797ec681f3Smrg
188001e04c3fSmrg   anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
188101e04c3fSmrg
18827ec681f3Smrg   bool cache_hit = false;
18837ec681f3Smrg   if (!skip_cache_lookup) {
18847ec681f3Smrg      bin = anv_device_search_for_kernel(pipeline->base.device, cache,
18857ec681f3Smrg                                         &stage.cache_key,
18867ec681f3Smrg                                         sizeof(stage.cache_key),
18877ec681f3Smrg                                         &cache_hit);
18887ec681f3Smrg   }
18897ec681f3Smrg
18907ec681f3Smrg   if (bin == NULL &&
18917ec681f3Smrg       (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT))
18927ec681f3Smrg      return VK_PIPELINE_COMPILE_REQUIRED_EXT;
18937ec681f3Smrg
18947ec681f3Smrg   void *mem_ctx = ralloc_context(NULL);
189501e04c3fSmrg   if (bin == NULL) {
18969f464c52Smaya      int64_t stage_start = os_time_get_nano();
18979f464c52Smaya
189801e04c3fSmrg      stage.bind_map = (struct anv_pipeline_bind_map) {
189901e04c3fSmrg         .surface_to_descriptor = stage.surface_to_descriptor,
190001e04c3fSmrg         .sampler_to_descriptor = stage.sampler_to_descriptor
190101e04c3fSmrg      };
190201e04c3fSmrg
19039f464c52Smaya      /* Set up a binding for the gl_NumWorkGroups */
19049f464c52Smaya      stage.bind_map.surface_count = 1;
19059f464c52Smaya      stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
19069f464c52Smaya         .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
19079f464c52Smaya      };
19089f464c52Smaya
19097ec681f3Smrg      stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
191001e04c3fSmrg      if (stage.nir == NULL) {
191101e04c3fSmrg         ralloc_free(mem_ctx);
19127ec681f3Smrg         return vk_error(pipeline, VK_ERROR_UNKNOWN);
191301e04c3fSmrg      }
191401e04c3fSmrg
19157ec681f3Smrg      NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id);
191601e04c3fSmrg
19177ec681f3Smrg      anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
191801e04c3fSmrg
19197ec681f3Smrg      NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
19207ec681f3Smrg
19217ec681f3Smrg      stage.num_stats = 1;
19227ec681f3Smrg
19237ec681f3Smrg      struct brw_compile_cs_params params = {
19247ec681f3Smrg         .nir = stage.nir,
19257ec681f3Smrg         .key = &stage.key.cs,
19267ec681f3Smrg         .prog_data = &stage.prog_data.cs,
19277ec681f3Smrg         .stats = stage.stats,
19287ec681f3Smrg         .log_data = pipeline->base.device,
19297ec681f3Smrg      };
19307ec681f3Smrg
19317ec681f3Smrg      stage.code = brw_compile_cs(compiler, mem_ctx, &params);
19327ec681f3Smrg      if (stage.code == NULL) {
193301e04c3fSmrg         ralloc_free(mem_ctx);
19347ec681f3Smrg         return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
19357ec681f3Smrg      }
19367ec681f3Smrg
19377ec681f3Smrg      anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
19387ec681f3Smrg
19397ec681f3Smrg      if (!stage.prog_data.cs.uses_num_work_groups) {
19407ec681f3Smrg         assert(stage.bind_map.surface_to_descriptor[0].set ==
19417ec681f3Smrg                ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
19427ec681f3Smrg         stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
194301e04c3fSmrg      }
194401e04c3fSmrg
194501e04c3fSmrg      const unsigned code_size = stage.prog_data.base.program_size;
19467ec681f3Smrg      bin = anv_device_upload_kernel(pipeline->base.device, cache,
19477ec681f3Smrg                                     MESA_SHADER_COMPUTE,
194801e04c3fSmrg                                     &stage.cache_key, sizeof(stage.cache_key),
19497ec681f3Smrg                                     stage.code, code_size,
195001e04c3fSmrg                                     &stage.prog_data.base,
195101e04c3fSmrg                                     sizeof(stage.prog_data.cs),
19527ec681f3Smrg                                     stage.stats, stage.num_stats,
19539f464c52Smaya                                     NULL, &stage.bind_map);
195401e04c3fSmrg      if (!bin) {
195501e04c3fSmrg         ralloc_free(mem_ctx);
19567ec681f3Smrg         return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
195701e04c3fSmrg      }
195801e04c3fSmrg
19599f464c52Smaya      stage.feedback.duration = os_time_get_nano() - stage_start;
19609f464c52Smaya   }
19619f464c52Smaya
19627ec681f3Smrg   anv_pipeline_add_executables(&pipeline->base, &stage, bin);
19637ec681f3Smrg
19647ec681f3Smrg   ralloc_free(mem_ctx);
19657ec681f3Smrg
19669f464c52Smaya   if (cache_hit) {
19679f464c52Smaya      stage.feedback.flags |=
19689f464c52Smaya         VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
19699f464c52Smaya      pipeline_feedback.flags |=
19709f464c52Smaya         VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
19719f464c52Smaya   }
19729f464c52Smaya   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
19739f464c52Smaya
19749f464c52Smaya   const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
19759f464c52Smaya      vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
19769f464c52Smaya   if (create_feedback) {
19779f464c52Smaya      *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
19789f464c52Smaya
19799f464c52Smaya      assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
19809f464c52Smaya      create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
198101e04c3fSmrg   }
198201e04c3fSmrg
19837ec681f3Smrg   pipeline->cs = bin;
198401e04c3fSmrg
198501e04c3fSmrg   return VK_SUCCESS;
198601e04c3fSmrg}
198701e04c3fSmrg
198801e04c3fSmrg/**
198901e04c3fSmrg * Copy pipeline state not marked as dynamic.
199001e04c3fSmrg * Dynamic state is pipeline state which hasn't been provided at pipeline
199101e04c3fSmrg * creation time, but is dynamically provided afterwards using various
199201e04c3fSmrg * vkCmdSet* functions.
199301e04c3fSmrg *
199401e04c3fSmrg * The set of state considered "non_dynamic" is determined by the pieces of
199501e04c3fSmrg * state that have their corresponding VkDynamicState enums omitted from
199601e04c3fSmrg * VkPipelineDynamicStateCreateInfo::pDynamicStates.
199701e04c3fSmrg *
199801e04c3fSmrg * @param[out] pipeline    Destination non_dynamic state.
199901e04c3fSmrg * @param[in]  pCreateInfo Source of non_dynamic state to be copied.
200001e04c3fSmrg */
200101e04c3fSmrgstatic void
20027ec681f3Smrgcopy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
200301e04c3fSmrg                       const VkGraphicsPipelineCreateInfo *pCreateInfo)
200401e04c3fSmrg{
200501e04c3fSmrg   anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
200601e04c3fSmrg   struct anv_subpass *subpass = pipeline->subpass;
200701e04c3fSmrg
200801e04c3fSmrg   pipeline->dynamic_state = default_dynamic_state;
200901e04c3fSmrg
20107ec681f3Smrg   states &= ~pipeline->dynamic_states;
201101e04c3fSmrg
201201e04c3fSmrg   struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
201301e04c3fSmrg
20147ec681f3Smrg   bool raster_discard =
20157ec681f3Smrg      pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
20167ec681f3Smrg      !(pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
20177ec681f3Smrg
201801e04c3fSmrg   /* Section 9.2 of the Vulkan 1.0.15 spec says:
201901e04c3fSmrg    *
202001e04c3fSmrg    *    pViewportState is [...] NULL if the pipeline
202101e04c3fSmrg    *    has rasterization disabled.
202201e04c3fSmrg    */
20237ec681f3Smrg   if (!raster_discard) {
202401e04c3fSmrg      assert(pCreateInfo->pViewportState);
202501e04c3fSmrg
202601e04c3fSmrg      dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
20277ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
202801e04c3fSmrg         typed_memcpy(dynamic->viewport.viewports,
202901e04c3fSmrg                     pCreateInfo->pViewportState->pViewports,
203001e04c3fSmrg                     pCreateInfo->pViewportState->viewportCount);
203101e04c3fSmrg      }
203201e04c3fSmrg
203301e04c3fSmrg      dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
20347ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) {
203501e04c3fSmrg         typed_memcpy(dynamic->scissor.scissors,
203601e04c3fSmrg                     pCreateInfo->pViewportState->pScissors,
203701e04c3fSmrg                     pCreateInfo->pViewportState->scissorCount);
203801e04c3fSmrg      }
203901e04c3fSmrg   }
204001e04c3fSmrg
20417ec681f3Smrg   if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {
204201e04c3fSmrg      assert(pCreateInfo->pRasterizationState);
204301e04c3fSmrg      dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
204401e04c3fSmrg   }
204501e04c3fSmrg
20467ec681f3Smrg   if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) {
204701e04c3fSmrg      assert(pCreateInfo->pRasterizationState);
204801e04c3fSmrg      dynamic->depth_bias.bias =
204901e04c3fSmrg         pCreateInfo->pRasterizationState->depthBiasConstantFactor;
205001e04c3fSmrg      dynamic->depth_bias.clamp =
205101e04c3fSmrg         pCreateInfo->pRasterizationState->depthBiasClamp;
205201e04c3fSmrg      dynamic->depth_bias.slope =
205301e04c3fSmrg         pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
205401e04c3fSmrg   }
205501e04c3fSmrg
20567ec681f3Smrg   if (states & ANV_CMD_DIRTY_DYNAMIC_CULL_MODE) {
20577ec681f3Smrg      assert(pCreateInfo->pRasterizationState);
20587ec681f3Smrg      dynamic->cull_mode =
20597ec681f3Smrg         pCreateInfo->pRasterizationState->cullMode;
20607ec681f3Smrg   }
20617ec681f3Smrg
20627ec681f3Smrg   if (states & ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE) {
20637ec681f3Smrg      assert(pCreateInfo->pRasterizationState);
20647ec681f3Smrg      dynamic->front_face =
20657ec681f3Smrg         pCreateInfo->pRasterizationState->frontFace;
20667ec681f3Smrg   }
20677ec681f3Smrg
20687ec681f3Smrg   if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) &&
20697ec681f3Smrg         (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
20707ec681f3Smrg      assert(pCreateInfo->pInputAssemblyState);
20717ec681f3Smrg      dynamic->primitive_topology = pCreateInfo->pInputAssemblyState->topology;
20727ec681f3Smrg   }
20737ec681f3Smrg
20747ec681f3Smrg   if (states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
20757ec681f3Smrg      assert(pCreateInfo->pRasterizationState);
20767ec681f3Smrg      dynamic->raster_discard =
20777ec681f3Smrg         pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
20787ec681f3Smrg   }
20797ec681f3Smrg
20807ec681f3Smrg   if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE) {
20817ec681f3Smrg      assert(pCreateInfo->pRasterizationState);
20827ec681f3Smrg      dynamic->depth_bias_enable =
20837ec681f3Smrg         pCreateInfo->pRasterizationState->depthBiasEnable;
20847ec681f3Smrg   }
20857ec681f3Smrg
20867ec681f3Smrg   if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE) &&
20877ec681f3Smrg         (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
20887ec681f3Smrg      assert(pCreateInfo->pInputAssemblyState);
20897ec681f3Smrg      dynamic->primitive_restart_enable =
20907ec681f3Smrg         pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
20917ec681f3Smrg   }
20927ec681f3Smrg
209301e04c3fSmrg   /* Section 9.2 of the Vulkan 1.0.15 spec says:
209401e04c3fSmrg    *
209501e04c3fSmrg    *    pColorBlendState is [...] NULL if the pipeline has rasterization
209601e04c3fSmrg    *    disabled or if the subpass of the render pass the pipeline is
209701e04c3fSmrg    *    created against does not use any color attachments.
209801e04c3fSmrg    */
209901e04c3fSmrg   bool uses_color_att = false;
210001e04c3fSmrg   for (unsigned i = 0; i < subpass->color_count; ++i) {
210101e04c3fSmrg      if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
210201e04c3fSmrg         uses_color_att = true;
210301e04c3fSmrg         break;
210401e04c3fSmrg      }
210501e04c3fSmrg   }
210601e04c3fSmrg
21077ec681f3Smrg   if (uses_color_att && !raster_discard) {
210801e04c3fSmrg      assert(pCreateInfo->pColorBlendState);
210901e04c3fSmrg
21107ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
211101e04c3fSmrg         typed_memcpy(dynamic->blend_constants,
211201e04c3fSmrg                     pCreateInfo->pColorBlendState->blendConstants, 4);
211301e04c3fSmrg   }
211401e04c3fSmrg
211501e04c3fSmrg   /* If there is no depthstencil attachment, then don't read
211601e04c3fSmrg    * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
211701e04c3fSmrg    * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
211801e04c3fSmrg    * no need to override the depthstencil defaults in
211901e04c3fSmrg    * anv_pipeline::dynamic_state when there is no depthstencil attachment.
212001e04c3fSmrg    *
212101e04c3fSmrg    * Section 9.2 of the Vulkan 1.0.15 spec says:
212201e04c3fSmrg    *
212301e04c3fSmrg    *    pDepthStencilState is [...] NULL if the pipeline has rasterization
212401e04c3fSmrg    *    disabled or if the subpass of the render pass the pipeline is created
212501e04c3fSmrg    *    against does not use a depth/stencil attachment.
212601e04c3fSmrg    */
21277ec681f3Smrg   if (!raster_discard && subpass->depth_stencil_attachment) {
212801e04c3fSmrg      assert(pCreateInfo->pDepthStencilState);
212901e04c3fSmrg
21307ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) {
213101e04c3fSmrg         dynamic->depth_bounds.min =
213201e04c3fSmrg            pCreateInfo->pDepthStencilState->minDepthBounds;
213301e04c3fSmrg         dynamic->depth_bounds.max =
213401e04c3fSmrg            pCreateInfo->pDepthStencilState->maxDepthBounds;
213501e04c3fSmrg      }
213601e04c3fSmrg
21377ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) {
213801e04c3fSmrg         dynamic->stencil_compare_mask.front =
213901e04c3fSmrg            pCreateInfo->pDepthStencilState->front.compareMask;
214001e04c3fSmrg         dynamic->stencil_compare_mask.back =
214101e04c3fSmrg            pCreateInfo->pDepthStencilState->back.compareMask;
214201e04c3fSmrg      }
214301e04c3fSmrg
21447ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) {
214501e04c3fSmrg         dynamic->stencil_write_mask.front =
214601e04c3fSmrg            pCreateInfo->pDepthStencilState->front.writeMask;
214701e04c3fSmrg         dynamic->stencil_write_mask.back =
214801e04c3fSmrg            pCreateInfo->pDepthStencilState->back.writeMask;
214901e04c3fSmrg      }
215001e04c3fSmrg
21517ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) {
215201e04c3fSmrg         dynamic->stencil_reference.front =
215301e04c3fSmrg            pCreateInfo->pDepthStencilState->front.reference;
215401e04c3fSmrg         dynamic->stencil_reference.back =
215501e04c3fSmrg            pCreateInfo->pDepthStencilState->back.reference;
215601e04c3fSmrg      }
21577ec681f3Smrg
21587ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) {
21597ec681f3Smrg         dynamic->depth_test_enable =
21607ec681f3Smrg            pCreateInfo->pDepthStencilState->depthTestEnable;
21617ec681f3Smrg      }
21627ec681f3Smrg
21637ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) {
21647ec681f3Smrg         dynamic->depth_write_enable =
21657ec681f3Smrg            pCreateInfo->pDepthStencilState->depthWriteEnable;
21667ec681f3Smrg      }
21677ec681f3Smrg
21687ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) {
21697ec681f3Smrg         dynamic->depth_compare_op =
21707ec681f3Smrg            pCreateInfo->pDepthStencilState->depthCompareOp;
21717ec681f3Smrg      }
21727ec681f3Smrg
21737ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
21747ec681f3Smrg         dynamic->depth_bounds_test_enable =
21757ec681f3Smrg            pCreateInfo->pDepthStencilState->depthBoundsTestEnable;
21767ec681f3Smrg      }
21777ec681f3Smrg
21787ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) {
21797ec681f3Smrg         dynamic->stencil_test_enable =
21807ec681f3Smrg            pCreateInfo->pDepthStencilState->stencilTestEnable;
21817ec681f3Smrg      }
21827ec681f3Smrg
21837ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP) {
21847ec681f3Smrg         const VkPipelineDepthStencilStateCreateInfo *info =
21857ec681f3Smrg            pCreateInfo->pDepthStencilState;
21867ec681f3Smrg         memcpy(&dynamic->stencil_op.front, &info->front,
21877ec681f3Smrg                sizeof(dynamic->stencil_op.front));
21887ec681f3Smrg         memcpy(&dynamic->stencil_op.back, &info->back,
21897ec681f3Smrg                sizeof(dynamic->stencil_op.back));
21907ec681f3Smrg      }
21917ec681f3Smrg   }
21927ec681f3Smrg
21937ec681f3Smrg   const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =
21947ec681f3Smrg      vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
21957ec681f3Smrg                           PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
21967ec681f3Smrg   if (!raster_discard && line_state && line_state->stippledLineEnable) {
21977ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) {
21987ec681f3Smrg         dynamic->line_stipple.factor = line_state->lineStippleFactor;
21997ec681f3Smrg         dynamic->line_stipple.pattern = line_state->lineStipplePattern;
22007ec681f3Smrg      }
22017ec681f3Smrg   }
22027ec681f3Smrg
22037ec681f3Smrg   const VkPipelineMultisampleStateCreateInfo *ms_info =
22047ec681f3Smrg      pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? NULL :
22057ec681f3Smrg      pCreateInfo->pMultisampleState;
22067ec681f3Smrg   if (states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
22077ec681f3Smrg      const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ?
22087ec681f3Smrg         vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL;
22097ec681f3Smrg
22107ec681f3Smrg      if (sl_info) {
22117ec681f3Smrg         dynamic->sample_locations.samples =
22127ec681f3Smrg            sl_info->sampleLocationsInfo.sampleLocationsCount;
22137ec681f3Smrg         const VkSampleLocationEXT *positions =
22147ec681f3Smrg            sl_info->sampleLocationsInfo.pSampleLocations;
22157ec681f3Smrg         for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) {
22167ec681f3Smrg            dynamic->sample_locations.locations[i].x = positions[i].x;
22177ec681f3Smrg            dynamic->sample_locations.locations[i].y = positions[i].y;
22187ec681f3Smrg         }
22197ec681f3Smrg      }
22207ec681f3Smrg   }
22217ec681f3Smrg   /* Ensure we always have valid values for sample_locations. */
22227ec681f3Smrg   if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
22237ec681f3Smrg       dynamic->sample_locations.samples == 0) {
22247ec681f3Smrg      dynamic->sample_locations.samples =
22257ec681f3Smrg         ms_info ? ms_info->rasterizationSamples : 1;
22267ec681f3Smrg      const struct intel_sample_position *positions =
22277ec681f3Smrg         intel_get_sample_positions(dynamic->sample_locations.samples);
22287ec681f3Smrg      for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) {
22297ec681f3Smrg         dynamic->sample_locations.locations[i].x = positions[i].x;
22307ec681f3Smrg         dynamic->sample_locations.locations[i].y = positions[i].y;
22317ec681f3Smrg      }
22327ec681f3Smrg   }
22337ec681f3Smrg
22347ec681f3Smrg   if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
22357ec681f3Smrg      if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
22367ec681f3Smrg          uses_color_att) {
22377ec681f3Smrg         assert(pCreateInfo->pColorBlendState);
22387ec681f3Smrg         const VkPipelineColorWriteCreateInfoEXT *color_write_info =
22397ec681f3Smrg            vk_find_struct_const(pCreateInfo->pColorBlendState->pNext,
22407ec681f3Smrg                                 PIPELINE_COLOR_WRITE_CREATE_INFO_EXT);
22417ec681f3Smrg
22427ec681f3Smrg         if (color_write_info) {
22437ec681f3Smrg            dynamic->color_writes = 0;
22447ec681f3Smrg            for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) {
22457ec681f3Smrg               dynamic->color_writes |=
22467ec681f3Smrg                  color_write_info->pColorWriteEnables[i] ? (1u << i) : 0;
22477ec681f3Smrg            }
22487ec681f3Smrg         }
22497ec681f3Smrg      }
22507ec681f3Smrg   }
22517ec681f3Smrg
22527ec681f3Smrg   const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_state =
22537ec681f3Smrg      vk_find_struct_const(pCreateInfo->pNext,
22547ec681f3Smrg                           PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
22557ec681f3Smrg   if (fsr_state) {
22567ec681f3Smrg      if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE)
22577ec681f3Smrg         dynamic->fragment_shading_rate = fsr_state->fragmentSize;
225801e04c3fSmrg   }
225901e04c3fSmrg
226001e04c3fSmrg   pipeline->dynamic_state_mask = states;
22617ec681f3Smrg
22627ec681f3Smrg   /* Mark states that can either be dynamic or fully baked into the pipeline.
22637ec681f3Smrg    */
22647ec681f3Smrg   pipeline->static_state_mask = states &
22657ec681f3Smrg      (ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS |
22667ec681f3Smrg       ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
22677ec681f3Smrg       ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE |
22687ec681f3Smrg       ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
22697ec681f3Smrg       ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP |
22707ec681f3Smrg       ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY);
227101e04c3fSmrg}
227201e04c3fSmrg
227301e04c3fSmrgstatic void
227401e04c3fSmrganv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
227501e04c3fSmrg{
227601e04c3fSmrg#ifdef DEBUG
227701e04c3fSmrg   struct anv_render_pass *renderpass = NULL;
227801e04c3fSmrg   struct anv_subpass *subpass = NULL;
227901e04c3fSmrg
228001e04c3fSmrg   /* Assert that all required members of VkGraphicsPipelineCreateInfo are
228101e04c3fSmrg    * present.  See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
228201e04c3fSmrg    */
228301e04c3fSmrg   assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
228401e04c3fSmrg
228501e04c3fSmrg   renderpass = anv_render_pass_from_handle(info->renderPass);
228601e04c3fSmrg   assert(renderpass);
228701e04c3fSmrg
228801e04c3fSmrg   assert(info->subpass < renderpass->subpass_count);
228901e04c3fSmrg   subpass = &renderpass->subpasses[info->subpass];
229001e04c3fSmrg
229101e04c3fSmrg   assert(info->stageCount >= 1);
229201e04c3fSmrg   assert(info->pRasterizationState);
229301e04c3fSmrg   if (!info->pRasterizationState->rasterizerDiscardEnable) {
229401e04c3fSmrg      assert(info->pViewportState);
229501e04c3fSmrg      assert(info->pMultisampleState);
229601e04c3fSmrg
229701e04c3fSmrg      if (subpass && subpass->depth_stencil_attachment)
229801e04c3fSmrg         assert(info->pDepthStencilState);
229901e04c3fSmrg
230001e04c3fSmrg      if (subpass && subpass->color_count > 0) {
230101e04c3fSmrg         bool all_color_unused = true;
230201e04c3fSmrg         for (int i = 0; i < subpass->color_count; i++) {
230301e04c3fSmrg            if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
230401e04c3fSmrg               all_color_unused = false;
230501e04c3fSmrg         }
230601e04c3fSmrg         /* pColorBlendState is ignored if the pipeline has rasterization
230701e04c3fSmrg          * disabled or if the subpass of the render pass the pipeline is
230801e04c3fSmrg          * created against does not use any color attachments.
230901e04c3fSmrg          */
231001e04c3fSmrg         assert(info->pColorBlendState || all_color_unused);
231101e04c3fSmrg      }
231201e04c3fSmrg   }
231301e04c3fSmrg
231401e04c3fSmrg   for (uint32_t i = 0; i < info->stageCount; ++i) {
231501e04c3fSmrg      switch (info->pStages[i].stage) {
23167ec681f3Smrg      case VK_SHADER_STAGE_VERTEX_BIT:
23177ec681f3Smrg         assert(info->pVertexInputState);
23187ec681f3Smrg         assert(info->pInputAssemblyState);
23197ec681f3Smrg         break;
232001e04c3fSmrg      case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
232101e04c3fSmrg      case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
232201e04c3fSmrg         assert(info->pTessellationState);
232301e04c3fSmrg         break;
232401e04c3fSmrg      default:
232501e04c3fSmrg         break;
232601e04c3fSmrg      }
232701e04c3fSmrg   }
232801e04c3fSmrg#endif
232901e04c3fSmrg}
233001e04c3fSmrg
233101e04c3fSmrg/**
233201e04c3fSmrg * Calculate the desired L3 partitioning based on the current state of the
233301e04c3fSmrg * pipeline.  For now this simply returns the conservative defaults calculated
233401e04c3fSmrg * by get_default_l3_weights(), but we could probably do better by gathering
233501e04c3fSmrg * more statistics from the pipeline state (e.g. guess of expected URB usage
233601e04c3fSmrg * and bound surfaces), or by using feed-back from performance counters.
233701e04c3fSmrg */
233801e04c3fSmrgvoid
233901e04c3fSmrganv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
234001e04c3fSmrg{
23417ec681f3Smrg   const struct intel_device_info *devinfo = &pipeline->device->info;
23427ec681f3Smrg
23437ec681f3Smrg   const struct intel_l3_weights w =
23447ec681f3Smrg      intel_get_default_l3_weights(devinfo, true, needs_slm);
23457ec681f3Smrg
23467ec681f3Smrg   pipeline->l3_config = intel_get_l3_config(devinfo, w);
23477ec681f3Smrg}
23487ec681f3Smrg
23497ec681f3Smrgstatic VkLineRasterizationModeEXT
23507ec681f3Smrgvk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT *line_info,
23517ec681f3Smrg                           const VkPipelineMultisampleStateCreateInfo *ms_info)
23527ec681f3Smrg{
23537ec681f3Smrg   VkLineRasterizationModeEXT line_mode =
23547ec681f3Smrg      line_info ? line_info->lineRasterizationMode :
23557ec681f3Smrg                  VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT;
235601e04c3fSmrg
23577ec681f3Smrg   if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) {
23587ec681f3Smrg      if (ms_info && ms_info->rasterizationSamples > 1) {
23597ec681f3Smrg         return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
23607ec681f3Smrg      } else {
23617ec681f3Smrg         return VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
23627ec681f3Smrg      }
23637ec681f3Smrg   }
236401e04c3fSmrg
23657ec681f3Smrg   return line_mode;
236601e04c3fSmrg}
236701e04c3fSmrg
236801e04c3fSmrgVkResult
23697ec681f3Smrganv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
23707ec681f3Smrg                           struct anv_device *device,
23717ec681f3Smrg                           struct anv_pipeline_cache *cache,
23727ec681f3Smrg                           const VkGraphicsPipelineCreateInfo *pCreateInfo,
23737ec681f3Smrg                           const VkAllocationCallbacks *alloc)
237401e04c3fSmrg{
237501e04c3fSmrg   VkResult result;
237601e04c3fSmrg
237701e04c3fSmrg   anv_pipeline_validate_create_info(pCreateInfo);
237801e04c3fSmrg
23797ec681f3Smrg   result = anv_pipeline_init(&pipeline->base, device,
23807ec681f3Smrg                              ANV_PIPELINE_GRAPHICS, pCreateInfo->flags,
23817ec681f3Smrg                              alloc);
23827ec681f3Smrg   if (result != VK_SUCCESS)
23837ec681f3Smrg      return result;
238401e04c3fSmrg
23857ec681f3Smrg   anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
23867ec681f3Smrg                         pipeline->batch_data, sizeof(pipeline->batch_data));
238701e04c3fSmrg
238801e04c3fSmrg   ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
238901e04c3fSmrg   assert(pCreateInfo->subpass < render_pass->subpass_count);
239001e04c3fSmrg   pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
239101e04c3fSmrg
23927ec681f3Smrg   assert(pCreateInfo->pRasterizationState);
239301e04c3fSmrg
23947ec681f3Smrg   if (pCreateInfo->pDynamicState) {
23957ec681f3Smrg      /* Remove all of the states that are marked as dynamic */
23967ec681f3Smrg      uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
23977ec681f3Smrg      for (uint32_t s = 0; s < count; s++) {
23987ec681f3Smrg         pipeline->dynamic_states |= anv_cmd_dirty_bit_for_vk_dynamic_state(
23997ec681f3Smrg            pCreateInfo->pDynamicState->pDynamicStates[s]);
24007ec681f3Smrg      }
24017ec681f3Smrg   }
24027ec681f3Smrg
24037ec681f3Smrg   pipeline->active_stages = 0;
24047ec681f3Smrg   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
24057ec681f3Smrg      pipeline->active_stages |= pCreateInfo->pStages[i].stage;
24067ec681f3Smrg
24077ec681f3Smrg   if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
24087ec681f3Smrg      pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
240901e04c3fSmrg
241001e04c3fSmrg   copy_non_dynamic_state(pipeline, pCreateInfo);
24117ec681f3Smrg
24127ec681f3Smrg   pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable;
241301e04c3fSmrg
24149f464c52Smaya   /* Previously we enabled depth clipping when !depthClampEnable.
24159f464c52Smaya    * DepthClipStateCreateInfo now makes depth clipping explicit so if the
24169f464c52Smaya    * clipping info is available, use its enable value to determine clipping,
24179f464c52Smaya    * otherwise fallback to the previous !depthClampEnable logic.
24189f464c52Smaya    */
24199f464c52Smaya   const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
24209f464c52Smaya      vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
24219f464c52Smaya                           PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
24229f464c52Smaya   pipeline->depth_clip_enable = clip_info ? clip_info->depthClipEnable : !pipeline->depth_clamp_enable;
24239f464c52Smaya
24247ec681f3Smrg   pipeline->sample_shading_enable =
24257ec681f3Smrg      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
24267ec681f3Smrg      pCreateInfo->pMultisampleState &&
24277ec681f3Smrg      pCreateInfo->pMultisampleState->sampleShadingEnable;
242801e04c3fSmrg
242901e04c3fSmrg   result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
243001e04c3fSmrg   if (result != VK_SUCCESS) {
24317ec681f3Smrg      anv_pipeline_finish(&pipeline->base, device, alloc);
243201e04c3fSmrg      return result;
243301e04c3fSmrg   }
243401e04c3fSmrg
24357ec681f3Smrg   anv_pipeline_setup_l3_config(&pipeline->base, false);
243601e04c3fSmrg
24377ec681f3Smrg   if (anv_pipeline_is_primitive(pipeline)) {
24387ec681f3Smrg      const VkPipelineVertexInputStateCreateInfo *vi_info =
24397ec681f3Smrg         pCreateInfo->pVertexInputState;
244001e04c3fSmrg
24417ec681f3Smrg      const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
244201e04c3fSmrg
24437ec681f3Smrg      for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
24447ec681f3Smrg         const VkVertexInputAttributeDescription *desc =
24457ec681f3Smrg            &vi_info->pVertexAttributeDescriptions[i];
244601e04c3fSmrg
24477ec681f3Smrg         if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))
24487ec681f3Smrg            pipeline->vb_used |= 1 << desc->binding;
24497ec681f3Smrg      }
245001e04c3fSmrg
24517ec681f3Smrg      for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
24527ec681f3Smrg         const VkVertexInputBindingDescription *desc =
24537ec681f3Smrg            &vi_info->pVertexBindingDescriptions[i];
24547ec681f3Smrg
24557ec681f3Smrg         pipeline->vb[desc->binding].stride = desc->stride;
24567ec681f3Smrg
24577ec681f3Smrg         /* Step rate is programmed per vertex element (attribute), not
24587ec681f3Smrg          * binding. Set up a map of which bindings step per instance, for
24597ec681f3Smrg          * reference by vertex element setup. */
24607ec681f3Smrg         switch (desc->inputRate) {
24617ec681f3Smrg         default:
24627ec681f3Smrg         case VK_VERTEX_INPUT_RATE_VERTEX:
24637ec681f3Smrg            pipeline->vb[desc->binding].instanced = false;
24647ec681f3Smrg            break;
24657ec681f3Smrg         case VK_VERTEX_INPUT_RATE_INSTANCE:
24667ec681f3Smrg            pipeline->vb[desc->binding].instanced = true;
24677ec681f3Smrg            break;
24687ec681f3Smrg         }
246901e04c3fSmrg
24707ec681f3Smrg         pipeline->vb[desc->binding].instance_divisor = 1;
24717ec681f3Smrg      }
247201e04c3fSmrg
24737ec681f3Smrg      const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =
24747ec681f3Smrg         vk_find_struct_const(vi_info->pNext,
24757ec681f3Smrg                              PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
24767ec681f3Smrg      if (vi_div_state) {
24777ec681f3Smrg         for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {
24787ec681f3Smrg            const VkVertexInputBindingDivisorDescriptionEXT *desc =
24797ec681f3Smrg               &vi_div_state->pVertexBindingDivisors[i];
248001e04c3fSmrg
24817ec681f3Smrg            pipeline->vb[desc->binding].instance_divisor = desc->divisor;
24827ec681f3Smrg         }
248301e04c3fSmrg      }
248401e04c3fSmrg
24857ec681f3Smrg      /* Our implementation of VK_KHR_multiview uses instancing to draw the
24867ec681f3Smrg       * different views.  If the client asks for instancing, we need to multiply
24877ec681f3Smrg       * the instance divisor by the number of views ensure that we repeat the
24887ec681f3Smrg       * client's per-instance data once for each view.
24897ec681f3Smrg       */
24907ec681f3Smrg      if (pipeline->subpass->view_mask && !pipeline->use_primitive_replication) {
24917ec681f3Smrg         const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
24927ec681f3Smrg         for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
24937ec681f3Smrg            if (pipeline->vb[vb].instanced)
24947ec681f3Smrg               pipeline->vb[vb].instance_divisor *= view_count;
24957ec681f3Smrg         }
24967ec681f3Smrg      }
249701e04c3fSmrg
24987ec681f3Smrg      const VkPipelineInputAssemblyStateCreateInfo *ia_info =
24997ec681f3Smrg         pCreateInfo->pInputAssemblyState;
25007ec681f3Smrg      const VkPipelineTessellationStateCreateInfo *tess_info =
25017ec681f3Smrg         pCreateInfo->pTessellationState;
250201e04c3fSmrg
25037ec681f3Smrg      if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
25047ec681f3Smrg         pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
25057ec681f3Smrg      else
25067ec681f3Smrg         pipeline->topology = vk_to_intel_primitive_type[ia_info->topology];
250701e04c3fSmrg   }
250801e04c3fSmrg
25097ec681f3Smrg   /* If rasterization is not enabled, ms_info must be ignored. */
25107ec681f3Smrg   const bool raster_enabled =
25117ec681f3Smrg      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ||
25127ec681f3Smrg      (pipeline->dynamic_states &
25137ec681f3Smrg       ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
25147ec681f3Smrg
25157ec681f3Smrg   const VkPipelineMultisampleStateCreateInfo *ms_info =
25167ec681f3Smrg      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
25177ec681f3Smrg
25187ec681f3Smrg   const VkPipelineRasterizationLineStateCreateInfoEXT *line_info =
25197ec681f3Smrg      vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
25207ec681f3Smrg                           PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
25217ec681f3Smrg
25227ec681f3Smrg   /* Store line mode, polygon mode and rasterization samples, these are used
25237ec681f3Smrg    * for dynamic primitive topology.
252401e04c3fSmrg    */
25257ec681f3Smrg   pipeline->line_mode = vk_line_rasterization_mode(line_info, ms_info);
25267ec681f3Smrg   pipeline->polygon_mode = pCreateInfo->pRasterizationState->polygonMode;
25277ec681f3Smrg   pipeline->rasterization_samples =
25287ec681f3Smrg      ms_info ? ms_info->rasterizationSamples : 1;
25297ec681f3Smrg
25307ec681f3Smrg   return VK_SUCCESS;
25317ec681f3Smrg}
25327ec681f3Smrg
25337ec681f3Smrgstatic VkResult
25347ec681f3Smrgcompile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
25357ec681f3Smrg                         struct anv_pipeline_cache *cache,
25367ec681f3Smrg                         nir_shader *nir,
25377ec681f3Smrg                         struct anv_pipeline_stage *stage,
25387ec681f3Smrg                         struct anv_shader_bin **shader_out,
25397ec681f3Smrg                         void *mem_ctx)
25407ec681f3Smrg{
25417ec681f3Smrg   const struct brw_compiler *compiler =
25427ec681f3Smrg      pipeline->base.device->physical->compiler;
25437ec681f3Smrg   const struct intel_device_info *devinfo = compiler->devinfo;
25447ec681f3Smrg
25457ec681f3Smrg   nir_shader **resume_shaders = NULL;
25467ec681f3Smrg   uint32_t num_resume_shaders = 0;
25477ec681f3Smrg   if (nir->info.stage != MESA_SHADER_COMPUTE) {
25487ec681f3Smrg      NIR_PASS_V(nir, nir_lower_shader_calls,
25497ec681f3Smrg                 nir_address_format_64bit_global,
25507ec681f3Smrg                 BRW_BTD_STACK_ALIGN,
25517ec681f3Smrg                 &resume_shaders, &num_resume_shaders, mem_ctx);
25527ec681f3Smrg      NIR_PASS_V(nir, brw_nir_lower_shader_calls);
25537ec681f3Smrg      NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
255401e04c3fSmrg   }
255501e04c3fSmrg
25567ec681f3Smrg   for (unsigned i = 0; i < num_resume_shaders; i++) {
25577ec681f3Smrg      NIR_PASS_V(resume_shaders[i], brw_nir_lower_shader_calls);
25587ec681f3Smrg      NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo);
25597ec681f3Smrg   }
256001e04c3fSmrg
25617ec681f3Smrg   stage->code =
25627ec681f3Smrg      brw_compile_bs(compiler, pipeline->base.device, mem_ctx,
25637ec681f3Smrg                     &stage->key.bs, &stage->prog_data.bs, nir,
25647ec681f3Smrg                     num_resume_shaders, resume_shaders, stage->stats, NULL);
25657ec681f3Smrg   if (stage->code == NULL)
25667ec681f3Smrg      return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
25677ec681f3Smrg
25687ec681f3Smrg   /* Ray-tracing shaders don't have a "real" bind map */
25697ec681f3Smrg   struct anv_pipeline_bind_map empty_bind_map = {};
25707ec681f3Smrg
25717ec681f3Smrg   const unsigned code_size = stage->prog_data.base.program_size;
25727ec681f3Smrg   struct anv_shader_bin *bin =
25737ec681f3Smrg      anv_device_upload_kernel(pipeline->base.device,
25747ec681f3Smrg                               cache,
25757ec681f3Smrg                               stage->stage,
25767ec681f3Smrg                               &stage->cache_key, sizeof(stage->cache_key),
25777ec681f3Smrg                               stage->code, code_size,
25787ec681f3Smrg                               &stage->prog_data.base,
25797ec681f3Smrg                               sizeof(stage->prog_data.bs),
25807ec681f3Smrg                               stage->stats, 1,
25817ec681f3Smrg                               NULL, &empty_bind_map);
25827ec681f3Smrg   if (bin == NULL)
25837ec681f3Smrg      return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
25847ec681f3Smrg
25857ec681f3Smrg   /* TODO: Figure out executables for resume shaders */
25867ec681f3Smrg   anv_pipeline_add_executables(&pipeline->base, stage, bin);
25877ec681f3Smrg   util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin);
25887ec681f3Smrg
25897ec681f3Smrg   *shader_out = bin;
259001e04c3fSmrg
259101e04c3fSmrg   return VK_SUCCESS;
259201e04c3fSmrg}
25937ec681f3Smrg
25947ec681f3Smrgstatic bool
25957ec681f3Smrgis_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)
25967ec681f3Smrg{
25977ec681f3Smrg   if (info->pDynamicState == NULL)
25987ec681f3Smrg      return false;
25997ec681f3Smrg
26007ec681f3Smrg   for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {
26017ec681f3Smrg      if (info->pDynamicState->pDynamicStates[i] ==
26027ec681f3Smrg          VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
26037ec681f3Smrg         return true;
26047ec681f3Smrg   }
26057ec681f3Smrg
26067ec681f3Smrg   return false;
26077ec681f3Smrg}
26087ec681f3Smrg
26097ec681f3Smrgstatic void
26107ec681f3Smrganv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline,
26117ec681f3Smrg                                        const VkRayTracingPipelineCreateInfoKHR *info,
26127ec681f3Smrg                                        uint32_t *stack_max)
26137ec681f3Smrg{
26147ec681f3Smrg   if (is_rt_stack_size_dynamic(info)) {
26157ec681f3Smrg      pipeline->stack_size = 0; /* 0 means dynamic */
26167ec681f3Smrg   } else {
26177ec681f3Smrg      /* From the Vulkan spec:
26187ec681f3Smrg       *
26197ec681f3Smrg       *    "If the stack size is not set explicitly, the stack size for a
26207ec681f3Smrg       *    pipeline is:
26217ec681f3Smrg       *
26227ec681f3Smrg       *       rayGenStackMax +
26237ec681f3Smrg       *       min(1, maxPipelineRayRecursionDepth) ×
26247ec681f3Smrg       *       max(closestHitStackMax, missStackMax,
26257ec681f3Smrg       *           intersectionStackMax + anyHitStackMax) +
26267ec681f3Smrg       *       max(0, maxPipelineRayRecursionDepth-1) ×
26277ec681f3Smrg       *       max(closestHitStackMax, missStackMax) +
26287ec681f3Smrg       *       2 × callableStackMax"
26297ec681f3Smrg       */
26307ec681f3Smrg      pipeline->stack_size =
26317ec681f3Smrg         stack_max[MESA_SHADER_RAYGEN] +
26327ec681f3Smrg         MIN2(1, info->maxPipelineRayRecursionDepth) *
26337ec681f3Smrg         MAX4(stack_max[MESA_SHADER_CLOSEST_HIT],
26347ec681f3Smrg              stack_max[MESA_SHADER_MISS],
26357ec681f3Smrg              stack_max[MESA_SHADER_INTERSECTION],
26367ec681f3Smrg              stack_max[MESA_SHADER_ANY_HIT]) +
26377ec681f3Smrg         MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) *
26387ec681f3Smrg         MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],
26397ec681f3Smrg              stack_max[MESA_SHADER_MISS]) +
26407ec681f3Smrg         2 * stack_max[MESA_SHADER_CALLABLE];
26417ec681f3Smrg
26427ec681f3Smrg      /* This is an extremely unlikely case but we need to set it to some
26437ec681f3Smrg       * non-zero value so that we don't accidentally think it's dynamic.
26447ec681f3Smrg       * Our minimum stack size is 2KB anyway so we could set to any small
26457ec681f3Smrg       * value we like.
26467ec681f3Smrg       */
26477ec681f3Smrg      if (pipeline->stack_size == 0)
26487ec681f3Smrg         pipeline->stack_size = 1;
26497ec681f3Smrg   }
26507ec681f3Smrg}
26517ec681f3Smrg
26527ec681f3Smrgstatic struct anv_pipeline_stage *
26537ec681f3Smrganv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
26547ec681f3Smrg                                     const VkRayTracingPipelineCreateInfoKHR *info,
26557ec681f3Smrg                                     void *pipeline_ctx)
26567ec681f3Smrg{
26577ec681f3Smrg   ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
26587ec681f3Smrg
26597ec681f3Smrg   /* Create enough stage entries for all shader modules plus potential
26607ec681f3Smrg    * combinaisons in the groups.
26617ec681f3Smrg    */
26627ec681f3Smrg   struct anv_pipeline_stage *stages =
26637ec681f3Smrg      rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount);
26647ec681f3Smrg
26657ec681f3Smrg   for (uint32_t i = 0; i < info->stageCount; i++) {
26667ec681f3Smrg      const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
26677ec681f3Smrg      if (sinfo->module == VK_NULL_HANDLE)
26687ec681f3Smrg         continue;
26697ec681f3Smrg
26707ec681f3Smrg      int64_t stage_start = os_time_get_nano();
26717ec681f3Smrg
26727ec681f3Smrg      stages[i] = (struct anv_pipeline_stage) {
26737ec681f3Smrg         .stage = vk_to_mesa_shader_stage(sinfo->stage),
26747ec681f3Smrg         .module = vk_shader_module_from_handle(sinfo->module),
26757ec681f3Smrg         .entrypoint = sinfo->pName,
26767ec681f3Smrg         .spec_info = sinfo->pSpecializationInfo,
26777ec681f3Smrg         .cache_key = {
26787ec681f3Smrg            .stage = vk_to_mesa_shader_stage(sinfo->stage),
26797ec681f3Smrg         },
26807ec681f3Smrg         .feedback = {
26817ec681f3Smrg            .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
26827ec681f3Smrg         },
26837ec681f3Smrg      };
26847ec681f3Smrg
26857ec681f3Smrg      populate_bs_prog_key(&pipeline->base.device->info, sinfo->flags,
26867ec681f3Smrg                           pipeline->base.device->robust_buffer_access,
26877ec681f3Smrg                           &stages[i].key.bs);
26887ec681f3Smrg
26897ec681f3Smrg      anv_pipeline_hash_shader(stages[i].module,
26907ec681f3Smrg                               stages[i].entrypoint,
26917ec681f3Smrg                               stages[i].stage,
26927ec681f3Smrg                               stages[i].spec_info,
26937ec681f3Smrg                               stages[i].shader_sha1);
26947ec681f3Smrg
26957ec681f3Smrg      if (stages[i].stage != MESA_SHADER_INTERSECTION) {
26967ec681f3Smrg         anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i],
26977ec681f3Smrg                                              stages[i].cache_key.sha1);
26987ec681f3Smrg      }
26997ec681f3Smrg
27007ec681f3Smrg      stages[i].feedback.duration += os_time_get_nano() - stage_start;
27017ec681f3Smrg   }
27027ec681f3Smrg
27037ec681f3Smrg   for (uint32_t i = 0; i < info->groupCount; i++) {
27047ec681f3Smrg      const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
27057ec681f3Smrg
27067ec681f3Smrg      if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR)
27077ec681f3Smrg         continue;
27087ec681f3Smrg
27097ec681f3Smrg      int64_t stage_start = os_time_get_nano();
27107ec681f3Smrg
27117ec681f3Smrg      uint32_t intersection_idx = ginfo->intersectionShader;
27127ec681f3Smrg      assert(intersection_idx < info->stageCount);
27137ec681f3Smrg
27147ec681f3Smrg      uint32_t any_hit_idx = ginfo->anyHitShader;
27157ec681f3Smrg      if (any_hit_idx != VK_SHADER_UNUSED_KHR) {
27167ec681f3Smrg         assert(any_hit_idx < info->stageCount);
27177ec681f3Smrg         anv_pipeline_hash_ray_tracing_combined_shader(pipeline,
27187ec681f3Smrg                                                       layout,
27197ec681f3Smrg                                                       &stages[intersection_idx],
27207ec681f3Smrg                                                       &stages[any_hit_idx],
27217ec681f3Smrg                                                       stages[intersection_idx].cache_key.sha1);
27227ec681f3Smrg      } else {
27237ec681f3Smrg         anv_pipeline_hash_ray_tracing_shader(pipeline, layout,
27247ec681f3Smrg                                              &stages[intersection_idx],
27257ec681f3Smrg                                              stages[intersection_idx].cache_key.sha1);
27267ec681f3Smrg      }
27277ec681f3Smrg
27287ec681f3Smrg      stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start;
27297ec681f3Smrg   }
27307ec681f3Smrg
27317ec681f3Smrg   return stages;
27327ec681f3Smrg}
27337ec681f3Smrg
27347ec681f3Smrgstatic bool
27357ec681f3Smrganv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
27367ec681f3Smrg                                 struct anv_pipeline_cache *cache,
27377ec681f3Smrg                                 const VkRayTracingPipelineCreateInfoKHR *info,
27387ec681f3Smrg                                 struct anv_pipeline_stage *stages,
27397ec681f3Smrg                                 uint32_t *stack_max)
27407ec681f3Smrg{
27417ec681f3Smrg   uint32_t shaders = 0, cache_hits = 0;
27427ec681f3Smrg   for (uint32_t i = 0; i < info->stageCount; i++) {
27437ec681f3Smrg      if (stages[i].entrypoint == NULL)
27447ec681f3Smrg         continue;
27457ec681f3Smrg
27467ec681f3Smrg      shaders++;
27477ec681f3Smrg
27487ec681f3Smrg      int64_t stage_start = os_time_get_nano();
27497ec681f3Smrg
27507ec681f3Smrg      bool cache_hit;
27517ec681f3Smrg      stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache,
27527ec681f3Smrg                                                   &stages[i].cache_key,
27537ec681f3Smrg                                                   sizeof(stages[i].cache_key),
27547ec681f3Smrg                                                   &cache_hit);
27557ec681f3Smrg      if (cache_hit) {
27567ec681f3Smrg         cache_hits++;
27577ec681f3Smrg         stages[i].feedback.flags |=
27587ec681f3Smrg            VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
27597ec681f3Smrg      }
27607ec681f3Smrg
27617ec681f3Smrg      if (stages[i].bin != NULL) {
27627ec681f3Smrg         anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin);
27637ec681f3Smrg         util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin);
27647ec681f3Smrg
27657ec681f3Smrg         uint32_t stack_size =
27667ec681f3Smrg            brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
27677ec681f3Smrg         stack_max[stages[i].stage] =
27687ec681f3Smrg            MAX2(stack_max[stages[i].stage], stack_size);
27697ec681f3Smrg      }
27707ec681f3Smrg
27717ec681f3Smrg      stages[i].feedback.duration += os_time_get_nano() - stage_start;
27727ec681f3Smrg   }
27737ec681f3Smrg
27747ec681f3Smrg   return cache_hits == shaders;
27757ec681f3Smrg}
27767ec681f3Smrg
27777ec681f3Smrgstatic VkResult
27787ec681f3Smrganv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
27797ec681f3Smrg                                 struct anv_pipeline_cache *cache,
27807ec681f3Smrg                                 const VkRayTracingPipelineCreateInfoKHR *info)
27817ec681f3Smrg{
27827ec681f3Smrg   const struct intel_device_info *devinfo = &pipeline->base.device->info;
27837ec681f3Smrg   VkResult result;
27847ec681f3Smrg
27857ec681f3Smrg   VkPipelineCreationFeedbackEXT pipeline_feedback = {
27867ec681f3Smrg      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
27877ec681f3Smrg   };
27887ec681f3Smrg   int64_t pipeline_start = os_time_get_nano();
27897ec681f3Smrg
27907ec681f3Smrg   void *pipeline_ctx = ralloc_context(NULL);
27917ec681f3Smrg
27927ec681f3Smrg   struct anv_pipeline_stage *stages =
27937ec681f3Smrg      anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx);
27947ec681f3Smrg
27957ec681f3Smrg   ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
27967ec681f3Smrg
27977ec681f3Smrg   const bool skip_cache_lookup =
27987ec681f3Smrg      (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
27997ec681f3Smrg
28007ec681f3Smrg   uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {};
28017ec681f3Smrg
28027ec681f3Smrg   if (!skip_cache_lookup &&
28037ec681f3Smrg       anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) {
28047ec681f3Smrg      pipeline_feedback.flags |=
28057ec681f3Smrg         VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
28067ec681f3Smrg      goto done;
28077ec681f3Smrg   }
28087ec681f3Smrg
28097ec681f3Smrg   if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) {
28107ec681f3Smrg      ralloc_free(pipeline_ctx);
28117ec681f3Smrg      return VK_PIPELINE_COMPILE_REQUIRED_EXT;
28127ec681f3Smrg   }
28137ec681f3Smrg
28147ec681f3Smrg   for (uint32_t i = 0; i < info->stageCount; i++) {
28157ec681f3Smrg      if (stages[i].entrypoint == NULL)
28167ec681f3Smrg         continue;
28177ec681f3Smrg
28187ec681f3Smrg      int64_t stage_start = os_time_get_nano();
28197ec681f3Smrg
28207ec681f3Smrg      stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
28217ec681f3Smrg                                                 pipeline_ctx, &stages[i]);
28227ec681f3Smrg      if (stages[i].nir == NULL) {
28237ec681f3Smrg         ralloc_free(pipeline_ctx);
28247ec681f3Smrg         return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
28257ec681f3Smrg      }
28267ec681f3Smrg
28277ec681f3Smrg      anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout);
28287ec681f3Smrg
28297ec681f3Smrg      stages[i].feedback.duration += os_time_get_nano() - stage_start;
28307ec681f3Smrg   }
28317ec681f3Smrg
28327ec681f3Smrg   for (uint32_t i = 0; i < info->stageCount; i++) {
28337ec681f3Smrg      if (stages[i].entrypoint == NULL)
28347ec681f3Smrg         continue;
28357ec681f3Smrg
28367ec681f3Smrg      /* Shader found in cache already. */
28377ec681f3Smrg      if (stages[i].bin != NULL)
28387ec681f3Smrg         continue;
28397ec681f3Smrg
28407ec681f3Smrg      /* We handle intersection shaders as part of the group */
28417ec681f3Smrg      if (stages[i].stage == MESA_SHADER_INTERSECTION)
28427ec681f3Smrg         continue;
28437ec681f3Smrg
28447ec681f3Smrg      int64_t stage_start = os_time_get_nano();
28457ec681f3Smrg
28467ec681f3Smrg      void *stage_ctx = ralloc_context(pipeline_ctx);
28477ec681f3Smrg
28487ec681f3Smrg      nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir);
28497ec681f3Smrg      switch (stages[i].stage) {
28507ec681f3Smrg      case MESA_SHADER_RAYGEN:
28517ec681f3Smrg         brw_nir_lower_raygen(nir);
28527ec681f3Smrg         break;
28537ec681f3Smrg
28547ec681f3Smrg      case MESA_SHADER_ANY_HIT:
28557ec681f3Smrg         brw_nir_lower_any_hit(nir, devinfo);
28567ec681f3Smrg         break;
28577ec681f3Smrg
28587ec681f3Smrg      case MESA_SHADER_CLOSEST_HIT:
28597ec681f3Smrg         brw_nir_lower_closest_hit(nir);
28607ec681f3Smrg         break;
28617ec681f3Smrg
28627ec681f3Smrg      case MESA_SHADER_MISS:
28637ec681f3Smrg         brw_nir_lower_miss(nir);
28647ec681f3Smrg         break;
28657ec681f3Smrg
28667ec681f3Smrg      case MESA_SHADER_INTERSECTION:
28677ec681f3Smrg         unreachable("These are handled later");
28687ec681f3Smrg
28697ec681f3Smrg      case MESA_SHADER_CALLABLE:
28707ec681f3Smrg         brw_nir_lower_callable(nir);
28717ec681f3Smrg         break;
28727ec681f3Smrg
28737ec681f3Smrg      default:
28747ec681f3Smrg         unreachable("Invalid ray-tracing shader stage");
28757ec681f3Smrg      }
28767ec681f3Smrg
28777ec681f3Smrg      result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i],
28787ec681f3Smrg                                        &stages[i].bin, stage_ctx);
28797ec681f3Smrg      if (result != VK_SUCCESS) {
28807ec681f3Smrg         ralloc_free(pipeline_ctx);
28817ec681f3Smrg         return result;
28827ec681f3Smrg      }
28837ec681f3Smrg
28847ec681f3Smrg      uint32_t stack_size =
28857ec681f3Smrg         brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
28867ec681f3Smrg      stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size);
28877ec681f3Smrg
28887ec681f3Smrg      ralloc_free(stage_ctx);
28897ec681f3Smrg
28907ec681f3Smrg      stages[i].feedback.duration += os_time_get_nano() - stage_start;
28917ec681f3Smrg   }
28927ec681f3Smrg
28937ec681f3Smrg   for (uint32_t i = 0; i < info->groupCount; i++) {
28947ec681f3Smrg      const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
28957ec681f3Smrg      struct anv_rt_shader_group *group = &pipeline->groups[i];
28967ec681f3Smrg      group->type = ginfo->type;
28977ec681f3Smrg      switch (ginfo->type) {
28987ec681f3Smrg      case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
28997ec681f3Smrg         assert(ginfo->generalShader < info->stageCount);
29007ec681f3Smrg         group->general = stages[ginfo->generalShader].bin;
29017ec681f3Smrg         break;
29027ec681f3Smrg
29037ec681f3Smrg      case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
29047ec681f3Smrg         if (ginfo->anyHitShader < info->stageCount)
29057ec681f3Smrg            group->any_hit = stages[ginfo->anyHitShader].bin;
29067ec681f3Smrg
29077ec681f3Smrg         if (ginfo->closestHitShader < info->stageCount)
29087ec681f3Smrg            group->closest_hit = stages[ginfo->closestHitShader].bin;
29097ec681f3Smrg         break;
29107ec681f3Smrg
29117ec681f3Smrg      case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
29127ec681f3Smrg         if (ginfo->closestHitShader < info->stageCount)
29137ec681f3Smrg            group->closest_hit = stages[ginfo->closestHitShader].bin;
29147ec681f3Smrg
29157ec681f3Smrg         uint32_t intersection_idx = info->pGroups[i].intersectionShader;
29167ec681f3Smrg         assert(intersection_idx < info->stageCount);
29177ec681f3Smrg
29187ec681f3Smrg         /* Only compile this stage if not already found in the cache. */
29197ec681f3Smrg         if (stages[intersection_idx].bin == NULL) {
29207ec681f3Smrg            /* The any-hit and intersection shader have to be combined */
29217ec681f3Smrg            uint32_t any_hit_idx = info->pGroups[i].anyHitShader;
29227ec681f3Smrg            const nir_shader *any_hit = NULL;
29237ec681f3Smrg            if (any_hit_idx < info->stageCount)
29247ec681f3Smrg               any_hit = stages[any_hit_idx].nir;
29257ec681f3Smrg
29267ec681f3Smrg            void *group_ctx = ralloc_context(pipeline_ctx);
29277ec681f3Smrg            nir_shader *intersection =
29287ec681f3Smrg               nir_shader_clone(group_ctx, stages[intersection_idx].nir);
29297ec681f3Smrg
29307ec681f3Smrg            brw_nir_lower_combined_intersection_any_hit(intersection, any_hit,
29317ec681f3Smrg                                                        devinfo);
29327ec681f3Smrg
29337ec681f3Smrg            result = compile_upload_rt_shader(pipeline, cache,
29347ec681f3Smrg                                              intersection,
29357ec681f3Smrg                                              &stages[intersection_idx],
29367ec681f3Smrg                                              &group->intersection,
29377ec681f3Smrg                                              group_ctx);
29387ec681f3Smrg            ralloc_free(group_ctx);
29397ec681f3Smrg            if (result != VK_SUCCESS)
29407ec681f3Smrg               return result;
29417ec681f3Smrg         } else {
29427ec681f3Smrg            group->intersection = stages[intersection_idx].bin;
29437ec681f3Smrg         }
29447ec681f3Smrg
29457ec681f3Smrg         uint32_t stack_size =
29467ec681f3Smrg            brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size;
29477ec681f3Smrg         stack_max[MESA_SHADER_INTERSECTION] =
29487ec681f3Smrg            MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size);
29497ec681f3Smrg
29507ec681f3Smrg         break;
29517ec681f3Smrg      }
29527ec681f3Smrg
29537ec681f3Smrg      default:
29547ec681f3Smrg         unreachable("Invalid ray tracing shader group type");
29557ec681f3Smrg      }
29567ec681f3Smrg   }
29577ec681f3Smrg
29587ec681f3Smrg done:
29597ec681f3Smrg   ralloc_free(pipeline_ctx);
29607ec681f3Smrg
29617ec681f3Smrg   anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max);
29627ec681f3Smrg
29637ec681f3Smrg   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
29647ec681f3Smrg
29657ec681f3Smrg   const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
29667ec681f3Smrg      vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
29677ec681f3Smrg   if (create_feedback) {
29687ec681f3Smrg      *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
29697ec681f3Smrg
29707ec681f3Smrg      assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
29717ec681f3Smrg      for (uint32_t i = 0; i < info->stageCount; i++) {
29727ec681f3Smrg         gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
29737ec681f3Smrg         create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
29747ec681f3Smrg      }
29757ec681f3Smrg   }
29767ec681f3Smrg
29777ec681f3Smrg   return VK_SUCCESS;
29787ec681f3Smrg}
29797ec681f3Smrg
29807ec681f3SmrgVkResult
29817ec681f3Smrganv_device_init_rt_shaders(struct anv_device *device)
29827ec681f3Smrg{
29837ec681f3Smrg   if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
29847ec681f3Smrg      return VK_SUCCESS;
29857ec681f3Smrg
29867ec681f3Smrg   bool cache_hit;
29877ec681f3Smrg
29887ec681f3Smrg   struct brw_rt_trampoline {
29897ec681f3Smrg      char name[16];
29907ec681f3Smrg      struct brw_cs_prog_key key;
29917ec681f3Smrg   } trampoline_key = {
29927ec681f3Smrg      .name = "rt-trampoline",
29937ec681f3Smrg      .key = {
29947ec681f3Smrg         /* TODO: Other subgroup sizes? */
29957ec681f3Smrg         .base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_8,
29967ec681f3Smrg      },
29977ec681f3Smrg   };
29987ec681f3Smrg   device->rt_trampoline =
29997ec681f3Smrg      anv_device_search_for_kernel(device, &device->default_pipeline_cache,
30007ec681f3Smrg                                   &trampoline_key, sizeof(trampoline_key),
30017ec681f3Smrg                                   &cache_hit);
30027ec681f3Smrg   if (device->rt_trampoline == NULL) {
30037ec681f3Smrg
30047ec681f3Smrg      void *tmp_ctx = ralloc_context(NULL);
30057ec681f3Smrg      nir_shader *trampoline_nir =
30067ec681f3Smrg         brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
30077ec681f3Smrg
30087ec681f3Smrg      struct anv_pipeline_bind_map bind_map = {
30097ec681f3Smrg         .surface_count = 0,
30107ec681f3Smrg         .sampler_count = 0,
30117ec681f3Smrg      };
30127ec681f3Smrg      uint32_t dummy_params[4] = { 0, };
30137ec681f3Smrg      struct brw_cs_prog_data trampoline_prog_data = {
30147ec681f3Smrg         .base.nr_params = 4,
30157ec681f3Smrg         .base.param = dummy_params,
30167ec681f3Smrg         .uses_inline_data = true,
30177ec681f3Smrg         .uses_btd_stack_ids = true,
30187ec681f3Smrg      };
30197ec681f3Smrg      struct brw_compile_cs_params params = {
30207ec681f3Smrg         .nir = trampoline_nir,
30217ec681f3Smrg         .key = &trampoline_key.key,
30227ec681f3Smrg         .prog_data = &trampoline_prog_data,
30237ec681f3Smrg         .log_data = device,
30247ec681f3Smrg      };
30257ec681f3Smrg      const unsigned *tramp_data =
30267ec681f3Smrg         brw_compile_cs(device->physical->compiler, tmp_ctx, &params);
30277ec681f3Smrg
30287ec681f3Smrg      device->rt_trampoline =
30297ec681f3Smrg         anv_device_upload_kernel(device, &device->default_pipeline_cache,
30307ec681f3Smrg                                  MESA_SHADER_COMPUTE,
30317ec681f3Smrg                                  &trampoline_key, sizeof(trampoline_key),
30327ec681f3Smrg                                  tramp_data,
30337ec681f3Smrg                                  trampoline_prog_data.base.program_size,
30347ec681f3Smrg                                  &trampoline_prog_data.base,
30357ec681f3Smrg                                  sizeof(trampoline_prog_data),
30367ec681f3Smrg                                  NULL, 0, NULL, &bind_map);
30377ec681f3Smrg
30387ec681f3Smrg      ralloc_free(tmp_ctx);
30397ec681f3Smrg
30407ec681f3Smrg      if (device->rt_trampoline == NULL)
30417ec681f3Smrg         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
30427ec681f3Smrg   }
30437ec681f3Smrg
30447ec681f3Smrg   struct brw_rt_trivial_return {
30457ec681f3Smrg      char name[16];
30467ec681f3Smrg      struct brw_bs_prog_key key;
30477ec681f3Smrg   } return_key = {
30487ec681f3Smrg      .name = "rt-trivial-ret",
30497ec681f3Smrg   };
30507ec681f3Smrg   device->rt_trivial_return =
30517ec681f3Smrg      anv_device_search_for_kernel(device, &device->default_pipeline_cache,
30527ec681f3Smrg                                   &return_key, sizeof(return_key),
30537ec681f3Smrg                                   &cache_hit);
30547ec681f3Smrg   if (device->rt_trivial_return == NULL) {
30557ec681f3Smrg      void *tmp_ctx = ralloc_context(NULL);
30567ec681f3Smrg      nir_shader *trivial_return_nir =
30577ec681f3Smrg         brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);
30587ec681f3Smrg
30597ec681f3Smrg      NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, &device->info);
30607ec681f3Smrg
30617ec681f3Smrg      struct anv_pipeline_bind_map bind_map = {
30627ec681f3Smrg         .surface_count = 0,
30637ec681f3Smrg         .sampler_count = 0,
30647ec681f3Smrg      };
30657ec681f3Smrg      struct brw_bs_prog_data return_prog_data = { 0, };
30667ec681f3Smrg      const unsigned *return_data =
30677ec681f3Smrg         brw_compile_bs(device->physical->compiler, device, tmp_ctx,
30687ec681f3Smrg                        &return_key.key, &return_prog_data, trivial_return_nir,
30697ec681f3Smrg                        0, 0, NULL, NULL);
30707ec681f3Smrg
30717ec681f3Smrg      device->rt_trivial_return =
30727ec681f3Smrg         anv_device_upload_kernel(device, &device->default_pipeline_cache,
30737ec681f3Smrg                                  MESA_SHADER_CALLABLE,
30747ec681f3Smrg                                  &return_key, sizeof(return_key),
30757ec681f3Smrg                                  return_data, return_prog_data.base.program_size,
30767ec681f3Smrg                                  &return_prog_data.base, sizeof(return_prog_data),
30777ec681f3Smrg                                  NULL, 0, NULL, &bind_map);
30787ec681f3Smrg
30797ec681f3Smrg      ralloc_free(tmp_ctx);
30807ec681f3Smrg
30817ec681f3Smrg      if (device->rt_trivial_return == NULL) {
30827ec681f3Smrg         anv_shader_bin_unref(device, device->rt_trampoline);
30837ec681f3Smrg         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
30847ec681f3Smrg      }
30857ec681f3Smrg   }
30867ec681f3Smrg
30877ec681f3Smrg   return VK_SUCCESS;
30887ec681f3Smrg}
30897ec681f3Smrg
30907ec681f3Smrgvoid
30917ec681f3Smrganv_device_finish_rt_shaders(struct anv_device *device)
30927ec681f3Smrg{
30937ec681f3Smrg   if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
30947ec681f3Smrg      return;
30957ec681f3Smrg
30967ec681f3Smrg   anv_shader_bin_unref(device, device->rt_trampoline);
30977ec681f3Smrg}
30987ec681f3Smrg
30997ec681f3SmrgVkResult
31007ec681f3Smrganv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
31017ec681f3Smrg                              struct anv_device *device,
31027ec681f3Smrg                              struct anv_pipeline_cache *cache,
31037ec681f3Smrg                              const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
31047ec681f3Smrg                              const VkAllocationCallbacks *alloc)
31057ec681f3Smrg{
31067ec681f3Smrg   VkResult result;
31077ec681f3Smrg
31087ec681f3Smrg   util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);
31097ec681f3Smrg
31107ec681f3Smrg   result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo);
31117ec681f3Smrg   if (result != VK_SUCCESS)
31127ec681f3Smrg      goto fail;
31137ec681f3Smrg
31147ec681f3Smrg   anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false);
31157ec681f3Smrg
31167ec681f3Smrg   return VK_SUCCESS;
31177ec681f3Smrg
31187ec681f3Smrgfail:
31197ec681f3Smrg   util_dynarray_foreach(&pipeline->shaders,
31207ec681f3Smrg                         struct anv_shader_bin *, shader) {
31217ec681f3Smrg      anv_shader_bin_unref(device, *shader);
31227ec681f3Smrg   }
31237ec681f3Smrg   return result;
31247ec681f3Smrg}
31257ec681f3Smrg
31267ec681f3Smrg#define WRITE_STR(field, ...) ({                               \
31277ec681f3Smrg   memset(field, 0, sizeof(field));                            \
31287ec681f3Smrg   UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
31297ec681f3Smrg   assert(i > 0 && i < sizeof(field));                         \
31307ec681f3Smrg})
31317ec681f3Smrg
31327ec681f3SmrgVkResult anv_GetPipelineExecutablePropertiesKHR(
31337ec681f3Smrg    VkDevice                                    device,
31347ec681f3Smrg    const VkPipelineInfoKHR*                    pPipelineInfo,
31357ec681f3Smrg    uint32_t*                                   pExecutableCount,
31367ec681f3Smrg    VkPipelineExecutablePropertiesKHR*          pProperties)
31377ec681f3Smrg{
31387ec681f3Smrg   ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
31397ec681f3Smrg   VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);
31407ec681f3Smrg
31417ec681f3Smrg   util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
31427ec681f3Smrg      vk_outarray_append(&out, props) {
31437ec681f3Smrg         gl_shader_stage stage = exe->stage;
31447ec681f3Smrg         props->stages = mesa_to_vk_shader_stage(stage);
31457ec681f3Smrg
31467ec681f3Smrg         unsigned simd_width = exe->stats.dispatch_width;
31477ec681f3Smrg         if (stage == MESA_SHADER_FRAGMENT) {
31487ec681f3Smrg            WRITE_STR(props->name, "%s%d %s",
31497ec681f3Smrg                      simd_width ? "SIMD" : "vec",
31507ec681f3Smrg                      simd_width ? simd_width : 4,
31517ec681f3Smrg                      _mesa_shader_stage_to_string(stage));
31527ec681f3Smrg         } else {
31537ec681f3Smrg            WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
31547ec681f3Smrg         }
31557ec681f3Smrg         WRITE_STR(props->description, "%s%d %s shader",
31567ec681f3Smrg                   simd_width ? "SIMD" : "vec",
31577ec681f3Smrg                   simd_width ? simd_width : 4,
31587ec681f3Smrg                   _mesa_shader_stage_to_string(stage));
31597ec681f3Smrg
31607ec681f3Smrg         /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
31617ec681f3Smrg          * wants a subgroup size of 1.
31627ec681f3Smrg          */
31637ec681f3Smrg         props->subgroupSize = MAX2(simd_width, 1);
31647ec681f3Smrg      }
31657ec681f3Smrg   }
31667ec681f3Smrg
31677ec681f3Smrg   return vk_outarray_status(&out);
31687ec681f3Smrg}
31697ec681f3Smrg
31707ec681f3Smrgstatic const struct anv_pipeline_executable *
31717ec681f3Smrganv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
31727ec681f3Smrg{
31737ec681f3Smrg   assert(index < util_dynarray_num_elements(&pipeline->executables,
31747ec681f3Smrg                                             struct anv_pipeline_executable));
31757ec681f3Smrg   return util_dynarray_element(
31767ec681f3Smrg      &pipeline->executables, struct anv_pipeline_executable, index);
31777ec681f3Smrg}
31787ec681f3Smrg
31797ec681f3SmrgVkResult anv_GetPipelineExecutableStatisticsKHR(
31807ec681f3Smrg    VkDevice                                    device,
31817ec681f3Smrg    const VkPipelineExecutableInfoKHR*          pExecutableInfo,
31827ec681f3Smrg    uint32_t*                                   pStatisticCount,
31837ec681f3Smrg    VkPipelineExecutableStatisticKHR*           pStatistics)
31847ec681f3Smrg{
31857ec681f3Smrg   ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
31867ec681f3Smrg   VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);
31877ec681f3Smrg
31887ec681f3Smrg   const struct anv_pipeline_executable *exe =
31897ec681f3Smrg      anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
31907ec681f3Smrg
31917ec681f3Smrg   const struct brw_stage_prog_data *prog_data;
31927ec681f3Smrg   switch (pipeline->type) {
31937ec681f3Smrg   case ANV_PIPELINE_GRAPHICS: {
31947ec681f3Smrg      prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
31957ec681f3Smrg      break;
31967ec681f3Smrg   }
31977ec681f3Smrg   case ANV_PIPELINE_COMPUTE: {
31987ec681f3Smrg      prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
31997ec681f3Smrg      break;
32007ec681f3Smrg   }
32017ec681f3Smrg   default:
32027ec681f3Smrg      unreachable("invalid pipeline type");
32037ec681f3Smrg   }
32047ec681f3Smrg
32057ec681f3Smrg   vk_outarray_append(&out, stat) {
32067ec681f3Smrg      WRITE_STR(stat->name, "Instruction Count");
32077ec681f3Smrg      WRITE_STR(stat->description,
32087ec681f3Smrg                "Number of GEN instructions in the final generated "
32097ec681f3Smrg                "shader executable.");
32107ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
32117ec681f3Smrg      stat->value.u64 = exe->stats.instructions;
32127ec681f3Smrg   }
32137ec681f3Smrg
32147ec681f3Smrg   vk_outarray_append(&out, stat) {
32157ec681f3Smrg      WRITE_STR(stat->name, "SEND Count");
32167ec681f3Smrg      WRITE_STR(stat->description,
32177ec681f3Smrg                "Number of instructions in the final generated shader "
32187ec681f3Smrg                "executable which access external units such as the "
32197ec681f3Smrg                "constant cache or the sampler.");
32207ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
32217ec681f3Smrg      stat->value.u64 = exe->stats.sends;
32227ec681f3Smrg   }
32237ec681f3Smrg
32247ec681f3Smrg   vk_outarray_append(&out, stat) {
32257ec681f3Smrg      WRITE_STR(stat->name, "Loop Count");
32267ec681f3Smrg      WRITE_STR(stat->description,
32277ec681f3Smrg                "Number of loops (not unrolled) in the final generated "
32287ec681f3Smrg                "shader executable.");
32297ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
32307ec681f3Smrg      stat->value.u64 = exe->stats.loops;
32317ec681f3Smrg   }
32327ec681f3Smrg
32337ec681f3Smrg   vk_outarray_append(&out, stat) {
32347ec681f3Smrg      WRITE_STR(stat->name, "Cycle Count");
32357ec681f3Smrg      WRITE_STR(stat->description,
32367ec681f3Smrg                "Estimate of the number of EU cycles required to execute "
32377ec681f3Smrg                "the final generated executable.  This is an estimate only "
32387ec681f3Smrg                "and may vary greatly from actual run-time performance.");
32397ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
32407ec681f3Smrg      stat->value.u64 = exe->stats.cycles;
32417ec681f3Smrg   }
32427ec681f3Smrg
32437ec681f3Smrg   vk_outarray_append(&out, stat) {
32447ec681f3Smrg      WRITE_STR(stat->name, "Spill Count");
32457ec681f3Smrg      WRITE_STR(stat->description,
32467ec681f3Smrg                "Number of scratch spill operations.  This gives a rough "
32477ec681f3Smrg                "estimate of the cost incurred due to spilling temporary "
32487ec681f3Smrg                "values to memory.  If this is non-zero, you may want to "
32497ec681f3Smrg                "adjust your shader to reduce register pressure.");
32507ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
32517ec681f3Smrg      stat->value.u64 = exe->stats.spills;
32527ec681f3Smrg   }
32537ec681f3Smrg
32547ec681f3Smrg   vk_outarray_append(&out, stat) {
32557ec681f3Smrg      WRITE_STR(stat->name, "Fill Count");
32567ec681f3Smrg      WRITE_STR(stat->description,
32577ec681f3Smrg                "Number of scratch fill operations.  This gives a rough "
32587ec681f3Smrg                "estimate of the cost incurred due to spilling temporary "
32597ec681f3Smrg                "values to memory.  If this is non-zero, you may want to "
32607ec681f3Smrg                "adjust your shader to reduce register pressure.");
32617ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
32627ec681f3Smrg      stat->value.u64 = exe->stats.fills;
32637ec681f3Smrg   }
32647ec681f3Smrg
32657ec681f3Smrg   vk_outarray_append(&out, stat) {
32667ec681f3Smrg      WRITE_STR(stat->name, "Scratch Memory Size");
32677ec681f3Smrg      WRITE_STR(stat->description,
32687ec681f3Smrg                "Number of bytes of scratch memory required by the "
32697ec681f3Smrg                "generated shader executable.  If this is non-zero, you "
32707ec681f3Smrg                "may want to adjust your shader to reduce register "
32717ec681f3Smrg                "pressure.");
32727ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
32737ec681f3Smrg      stat->value.u64 = prog_data->total_scratch;
32747ec681f3Smrg   }
32757ec681f3Smrg
32767ec681f3Smrg   if (gl_shader_stage_uses_workgroup(exe->stage)) {
32777ec681f3Smrg      vk_outarray_append(&out, stat) {
32787ec681f3Smrg         WRITE_STR(stat->name, "Workgroup Memory Size");
32797ec681f3Smrg         WRITE_STR(stat->description,
32807ec681f3Smrg                   "Number of bytes of workgroup shared memory used by this "
32817ec681f3Smrg                   "shader including any padding.");
32827ec681f3Smrg         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
32837ec681f3Smrg         stat->value.u64 = prog_data->total_shared;
32847ec681f3Smrg      }
32857ec681f3Smrg   }
32867ec681f3Smrg
32877ec681f3Smrg   return vk_outarray_status(&out);
32887ec681f3Smrg}
32897ec681f3Smrg
32907ec681f3Smrgstatic bool
32917ec681f3Smrgwrite_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
32927ec681f3Smrg              const char *data)
32937ec681f3Smrg{
32947ec681f3Smrg   ir->isText = VK_TRUE;
32957ec681f3Smrg
32967ec681f3Smrg   size_t data_len = strlen(data) + 1;
32977ec681f3Smrg
32987ec681f3Smrg   if (ir->pData == NULL) {
32997ec681f3Smrg      ir->dataSize = data_len;
33007ec681f3Smrg      return true;
33017ec681f3Smrg   }
33027ec681f3Smrg
33037ec681f3Smrg   strncpy(ir->pData, data, ir->dataSize);
33047ec681f3Smrg   if (ir->dataSize < data_len)
33057ec681f3Smrg      return false;
33067ec681f3Smrg
33077ec681f3Smrg   ir->dataSize = data_len;
33087ec681f3Smrg   return true;
33097ec681f3Smrg}
33107ec681f3Smrg
33117ec681f3SmrgVkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
33127ec681f3Smrg    VkDevice                                    device,
33137ec681f3Smrg    const VkPipelineExecutableInfoKHR*          pExecutableInfo,
33147ec681f3Smrg    uint32_t*                                   pInternalRepresentationCount,
33157ec681f3Smrg    VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
33167ec681f3Smrg{
33177ec681f3Smrg   ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
33187ec681f3Smrg   VK_OUTARRAY_MAKE(out, pInternalRepresentations,
33197ec681f3Smrg                    pInternalRepresentationCount);
33207ec681f3Smrg   bool incomplete_text = false;
33217ec681f3Smrg
33227ec681f3Smrg   const struct anv_pipeline_executable *exe =
33237ec681f3Smrg      anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
33247ec681f3Smrg
33257ec681f3Smrg   if (exe->nir) {
33267ec681f3Smrg      vk_outarray_append(&out, ir) {
33277ec681f3Smrg         WRITE_STR(ir->name, "Final NIR");
33287ec681f3Smrg         WRITE_STR(ir->description,
33297ec681f3Smrg                   "Final NIR before going into the back-end compiler");
33307ec681f3Smrg
33317ec681f3Smrg         if (!write_ir_text(ir, exe->nir))
33327ec681f3Smrg            incomplete_text = true;
33337ec681f3Smrg      }
33347ec681f3Smrg   }
33357ec681f3Smrg
33367ec681f3Smrg   if (exe->disasm) {
33377ec681f3Smrg      vk_outarray_append(&out, ir) {
33387ec681f3Smrg         WRITE_STR(ir->name, "GEN Assembly");
33397ec681f3Smrg         WRITE_STR(ir->description,
33407ec681f3Smrg                   "Final GEN assembly for the generated shader binary");
33417ec681f3Smrg
33427ec681f3Smrg         if (!write_ir_text(ir, exe->disasm))
33437ec681f3Smrg            incomplete_text = true;
33447ec681f3Smrg      }
33457ec681f3Smrg   }
33467ec681f3Smrg
33477ec681f3Smrg   return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
33487ec681f3Smrg}
33497ec681f3Smrg
33507ec681f3SmrgVkResult
33517ec681f3Smrganv_GetRayTracingShaderGroupHandlesKHR(
33527ec681f3Smrg    VkDevice                                    _device,
33537ec681f3Smrg    VkPipeline                                  _pipeline,
33547ec681f3Smrg    uint32_t                                    firstGroup,
33557ec681f3Smrg    uint32_t                                    groupCount,
33567ec681f3Smrg    size_t                                      dataSize,
33577ec681f3Smrg    void*                                       pData)
33587ec681f3Smrg{
33597ec681f3Smrg   ANV_FROM_HANDLE(anv_device, device, _device);
33607ec681f3Smrg   ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
33617ec681f3Smrg
33627ec681f3Smrg   if (pipeline->type != ANV_PIPELINE_RAY_TRACING)
33637ec681f3Smrg      return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
33647ec681f3Smrg
33657ec681f3Smrg   struct anv_ray_tracing_pipeline *rt_pipeline =
33667ec681f3Smrg      anv_pipeline_to_ray_tracing(pipeline);
33677ec681f3Smrg
33687ec681f3Smrg   for (uint32_t i = 0; i < groupCount; i++) {
33697ec681f3Smrg      struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
33707ec681f3Smrg      memcpy(pData, group->handle, sizeof(group->handle));
33717ec681f3Smrg      pData += sizeof(group->handle);
33727ec681f3Smrg   }
33737ec681f3Smrg
33747ec681f3Smrg   return VK_SUCCESS;
33757ec681f3Smrg}
33767ec681f3Smrg
33777ec681f3SmrgVkResult
33787ec681f3Smrganv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(
33797ec681f3Smrg    VkDevice                                    _device,
33807ec681f3Smrg    VkPipeline                                  pipeline,
33817ec681f3Smrg    uint32_t                                    firstGroup,
33827ec681f3Smrg    uint32_t                                    groupCount,
33837ec681f3Smrg    size_t                                      dataSize,
33847ec681f3Smrg    void*                                       pData)
33857ec681f3Smrg{
33867ec681f3Smrg   ANV_FROM_HANDLE(anv_device, device, _device);
33877ec681f3Smrg   unreachable("Unimplemented");
33887ec681f3Smrg   return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
33897ec681f3Smrg}
33907ec681f3Smrg
33917ec681f3SmrgVkDeviceSize
33927ec681f3Smrganv_GetRayTracingShaderGroupStackSizeKHR(
33937ec681f3Smrg    VkDevice                                    device,
33947ec681f3Smrg    VkPipeline                                  _pipeline,
33957ec681f3Smrg    uint32_t                                    group,
33967ec681f3Smrg    VkShaderGroupShaderKHR                      groupShader)
33977ec681f3Smrg{
33987ec681f3Smrg   ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
33997ec681f3Smrg   assert(pipeline->type == ANV_PIPELINE_RAY_TRACING);
34007ec681f3Smrg
34017ec681f3Smrg   struct anv_ray_tracing_pipeline *rt_pipeline =
34027ec681f3Smrg      anv_pipeline_to_ray_tracing(pipeline);
34037ec681f3Smrg
34047ec681f3Smrg   assert(group < rt_pipeline->group_count);
34057ec681f3Smrg
34067ec681f3Smrg   struct anv_shader_bin *bin;
34077ec681f3Smrg   switch (groupShader) {
34087ec681f3Smrg   case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
34097ec681f3Smrg      bin = rt_pipeline->groups[group].general;
34107ec681f3Smrg      break;
34117ec681f3Smrg
34127ec681f3Smrg   case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
34137ec681f3Smrg      bin = rt_pipeline->groups[group].closest_hit;
34147ec681f3Smrg      break;
34157ec681f3Smrg
34167ec681f3Smrg   case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
34177ec681f3Smrg      bin = rt_pipeline->groups[group].any_hit;
34187ec681f3Smrg      break;
34197ec681f3Smrg
34207ec681f3Smrg   case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
34217ec681f3Smrg      bin = rt_pipeline->groups[group].intersection;
34227ec681f3Smrg      break;
34237ec681f3Smrg
34247ec681f3Smrg   default:
34257ec681f3Smrg      unreachable("Invalid VkShaderGroupShader enum");
34267ec681f3Smrg   }
34277ec681f3Smrg
34287ec681f3Smrg   if (bin == NULL)
34297ec681f3Smrg      return 0;
34307ec681f3Smrg
34317ec681f3Smrg   return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
34327ec681f3Smrg}
3433