17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2021 Google
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "radv_acceleration_structure.h"
257ec681f3Smrg#include "radv_debug.h"
267ec681f3Smrg#include "radv_private.h"
277ec681f3Smrg#include "radv_shader.h"
287ec681f3Smrg
297ec681f3Smrg#include "nir/nir.h"
307ec681f3Smrg#include "nir/nir_builder.h"
317ec681f3Smrg#include "nir/nir_builtin_builder.h"
327ec681f3Smrg
337ec681f3Smrgstatic VkRayTracingPipelineCreateInfoKHR
347ec681f3Smrgradv_create_merged_rt_create_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo)
357ec681f3Smrg{
367ec681f3Smrg   VkRayTracingPipelineCreateInfoKHR local_create_info = *pCreateInfo;
377ec681f3Smrg   uint32_t total_stages = pCreateInfo->stageCount;
387ec681f3Smrg   uint32_t total_groups = pCreateInfo->groupCount;
397ec681f3Smrg
407ec681f3Smrg   if (pCreateInfo->pLibraryInfo) {
417ec681f3Smrg      for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
427ec681f3Smrg         RADV_FROM_HANDLE(radv_pipeline, library, pCreateInfo->pLibraryInfo->pLibraries[i]);
437ec681f3Smrg         total_stages += library->library.stage_count;
447ec681f3Smrg         total_groups += library->library.group_count;
457ec681f3Smrg      }
467ec681f3Smrg   }
477ec681f3Smrg   VkPipelineShaderStageCreateInfo *stages = NULL;
487ec681f3Smrg   VkRayTracingShaderGroupCreateInfoKHR *groups = NULL;
497ec681f3Smrg   local_create_info.stageCount = total_stages;
507ec681f3Smrg   local_create_info.groupCount = total_groups;
517ec681f3Smrg   local_create_info.pStages = stages =
527ec681f3Smrg      malloc(sizeof(VkPipelineShaderStageCreateInfo) * total_stages);
537ec681f3Smrg   local_create_info.pGroups = groups =
547ec681f3Smrg      malloc(sizeof(VkRayTracingShaderGroupCreateInfoKHR) * total_groups);
557ec681f3Smrg   if (!local_create_info.pStages || !local_create_info.pGroups)
567ec681f3Smrg      return local_create_info;
577ec681f3Smrg
587ec681f3Smrg   total_stages = pCreateInfo->stageCount;
597ec681f3Smrg   total_groups = pCreateInfo->groupCount;
607ec681f3Smrg   for (unsigned j = 0; j < pCreateInfo->stageCount; ++j)
617ec681f3Smrg      stages[j] = pCreateInfo->pStages[j];
627ec681f3Smrg   for (unsigned j = 0; j < pCreateInfo->groupCount; ++j)
637ec681f3Smrg      groups[j] = pCreateInfo->pGroups[j];
647ec681f3Smrg
657ec681f3Smrg   if (pCreateInfo->pLibraryInfo) {
667ec681f3Smrg      for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
677ec681f3Smrg         RADV_FROM_HANDLE(radv_pipeline, library, pCreateInfo->pLibraryInfo->pLibraries[i]);
687ec681f3Smrg         for (unsigned j = 0; j < library->library.stage_count; ++j)
697ec681f3Smrg            stages[total_stages + j] = library->library.stages[j];
707ec681f3Smrg         for (unsigned j = 0; j < library->library.group_count; ++j) {
717ec681f3Smrg            VkRayTracingShaderGroupCreateInfoKHR *dst = &groups[total_groups + j];
727ec681f3Smrg            *dst = library->library.groups[j];
737ec681f3Smrg            if (dst->generalShader != VK_SHADER_UNUSED_KHR)
747ec681f3Smrg               dst->generalShader += total_stages;
757ec681f3Smrg            if (dst->closestHitShader != VK_SHADER_UNUSED_KHR)
767ec681f3Smrg               dst->closestHitShader += total_stages;
777ec681f3Smrg            if (dst->anyHitShader != VK_SHADER_UNUSED_KHR)
787ec681f3Smrg               dst->anyHitShader += total_stages;
797ec681f3Smrg            if (dst->intersectionShader != VK_SHADER_UNUSED_KHR)
807ec681f3Smrg               dst->intersectionShader += total_stages;
817ec681f3Smrg         }
827ec681f3Smrg         total_stages += library->library.stage_count;
837ec681f3Smrg         total_groups += library->library.group_count;
847ec681f3Smrg      }
857ec681f3Smrg   }
867ec681f3Smrg   return local_create_info;
877ec681f3Smrg}
887ec681f3Smrg
897ec681f3Smrgstatic VkResult
907ec681f3Smrgradv_rt_pipeline_library_create(VkDevice _device, VkPipelineCache _cache,
917ec681f3Smrg                                const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
927ec681f3Smrg                                const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
937ec681f3Smrg{
947ec681f3Smrg   RADV_FROM_HANDLE(radv_device, device, _device);
957ec681f3Smrg   struct radv_pipeline *pipeline;
967ec681f3Smrg
977ec681f3Smrg   pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
987ec681f3Smrg                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
997ec681f3Smrg   if (pipeline == NULL)
1007ec681f3Smrg      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1017ec681f3Smrg
1027ec681f3Smrg   vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
1037ec681f3Smrg   pipeline->type = RADV_PIPELINE_LIBRARY;
1047ec681f3Smrg
1057ec681f3Smrg   VkRayTracingPipelineCreateInfoKHR local_create_info =
1067ec681f3Smrg      radv_create_merged_rt_create_info(pCreateInfo);
1077ec681f3Smrg   if (!local_create_info.pStages || !local_create_info.pGroups)
1087ec681f3Smrg      goto fail;
1097ec681f3Smrg
1107ec681f3Smrg   if (local_create_info.stageCount) {
1117ec681f3Smrg      size_t size = sizeof(VkPipelineShaderStageCreateInfo) * local_create_info.stageCount;
1127ec681f3Smrg      pipeline->library.stage_count = local_create_info.stageCount;
1137ec681f3Smrg      pipeline->library.stages = malloc(size);
1147ec681f3Smrg      if (!pipeline->library.stages)
1157ec681f3Smrg         goto fail;
1167ec681f3Smrg      memcpy(pipeline->library.stages, local_create_info.pStages, size);
1177ec681f3Smrg   }
1187ec681f3Smrg
1197ec681f3Smrg   if (local_create_info.groupCount) {
1207ec681f3Smrg      size_t size = sizeof(VkRayTracingShaderGroupCreateInfoKHR) * local_create_info.groupCount;
1217ec681f3Smrg      pipeline->library.group_count = local_create_info.groupCount;
1227ec681f3Smrg      pipeline->library.groups = malloc(size);
1237ec681f3Smrg      if (!pipeline->library.groups)
1247ec681f3Smrg         goto fail;
1257ec681f3Smrg      memcpy(pipeline->library.groups, local_create_info.pGroups, size);
1267ec681f3Smrg   }
1277ec681f3Smrg
1287ec681f3Smrg   *pPipeline = radv_pipeline_to_handle(pipeline);
1297ec681f3Smrg
1307ec681f3Smrg   free((void *)local_create_info.pGroups);
1317ec681f3Smrg   free((void *)local_create_info.pStages);
1327ec681f3Smrg   return VK_SUCCESS;
1337ec681f3Smrgfail:
1347ec681f3Smrg   free(pipeline->library.groups);
1357ec681f3Smrg   free(pipeline->library.stages);
1367ec681f3Smrg   free((void *)local_create_info.pGroups);
1377ec681f3Smrg   free((void *)local_create_info.pStages);
1387ec681f3Smrg   return VK_ERROR_OUT_OF_HOST_MEMORY;
1397ec681f3Smrg}
1407ec681f3Smrg
1417ec681f3Smrg/*
1427ec681f3Smrg * Global variables for an RT pipeline
1437ec681f3Smrg */
1447ec681f3Smrgstruct rt_variables {
1457ec681f3Smrg   /* idx of the next shader to run in the next iteration of the main loop */
1467ec681f3Smrg   nir_variable *idx;
1477ec681f3Smrg
1487ec681f3Smrg   /* scratch offset of the argument area relative to stack_ptr */
1497ec681f3Smrg   nir_variable *arg;
1507ec681f3Smrg
1517ec681f3Smrg   nir_variable *stack_ptr;
1527ec681f3Smrg
1537ec681f3Smrg   /* global address of the SBT entry used for the shader */
1547ec681f3Smrg   nir_variable *shader_record_ptr;
1557ec681f3Smrg
1567ec681f3Smrg   /* trace_ray arguments */
1577ec681f3Smrg   nir_variable *accel_struct;
1587ec681f3Smrg   nir_variable *flags;
1597ec681f3Smrg   nir_variable *cull_mask;
1607ec681f3Smrg   nir_variable *sbt_offset;
1617ec681f3Smrg   nir_variable *sbt_stride;
1627ec681f3Smrg   nir_variable *miss_index;
1637ec681f3Smrg   nir_variable *origin;
1647ec681f3Smrg   nir_variable *tmin;
1657ec681f3Smrg   nir_variable *direction;
1667ec681f3Smrg   nir_variable *tmax;
1677ec681f3Smrg
1687ec681f3Smrg   /* from the BTAS instance currently being visited */
1697ec681f3Smrg   nir_variable *custom_instance_and_mask;
1707ec681f3Smrg
1717ec681f3Smrg   /* Properties of the primitive currently being visited. */
1727ec681f3Smrg   nir_variable *primitive_id;
1737ec681f3Smrg   nir_variable *geometry_id_and_flags;
1747ec681f3Smrg   nir_variable *instance_id;
1757ec681f3Smrg   nir_variable *instance_addr;
1767ec681f3Smrg   nir_variable *hit_kind;
1777ec681f3Smrg   nir_variable *opaque;
1787ec681f3Smrg
1797ec681f3Smrg   /* Safeguard to ensure we don't end up in an infinite loop of non-existing case. Should not be
1807ec681f3Smrg    * needed but is extra anti-hang safety during bring-up. */
1817ec681f3Smrg   nir_variable *main_loop_case_visited;
1827ec681f3Smrg
1837ec681f3Smrg   /* Output variable for intersection & anyhit shaders. */
1847ec681f3Smrg   nir_variable *ahit_status;
1857ec681f3Smrg
1867ec681f3Smrg   /* Array of stack size struct for recording the max stack size for each group. */
1877ec681f3Smrg   struct radv_pipeline_shader_stack_size *stack_sizes;
1887ec681f3Smrg   unsigned group_idx;
1897ec681f3Smrg};
1907ec681f3Smrg
1917ec681f3Smrgstatic struct rt_variables
1927ec681f3Smrgcreate_rt_variables(nir_shader *shader, struct radv_pipeline_shader_stack_size *stack_sizes)
1937ec681f3Smrg{
1947ec681f3Smrg   struct rt_variables vars = {
1957ec681f3Smrg      NULL,
1967ec681f3Smrg   };
1977ec681f3Smrg   vars.idx = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "idx");
1987ec681f3Smrg   vars.arg = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "arg");
1997ec681f3Smrg   vars.stack_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "stack_ptr");
2007ec681f3Smrg   vars.shader_record_ptr =
2017ec681f3Smrg      nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_record_ptr");
2027ec681f3Smrg
2037ec681f3Smrg   const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
2047ec681f3Smrg   vars.accel_struct =
2057ec681f3Smrg      nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "accel_struct");
2067ec681f3Smrg   vars.flags = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "ray_flags");
2077ec681f3Smrg   vars.cull_mask = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "cull_mask");
2087ec681f3Smrg   vars.sbt_offset =
2097ec681f3Smrg      nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_offset");
2107ec681f3Smrg   vars.sbt_stride =
2117ec681f3Smrg      nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_stride");
2127ec681f3Smrg   vars.miss_index =
2137ec681f3Smrg      nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "miss_index");
2147ec681f3Smrg   vars.origin = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_origin");
2157ec681f3Smrg   vars.tmin = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmin");
2167ec681f3Smrg   vars.direction = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_direction");
2177ec681f3Smrg   vars.tmax = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmax");
2187ec681f3Smrg
2197ec681f3Smrg   vars.custom_instance_and_mask = nir_variable_create(
2207ec681f3Smrg      shader, nir_var_shader_temp, glsl_uint_type(), "custom_instance_and_mask");
2217ec681f3Smrg   vars.primitive_id =
2227ec681f3Smrg      nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "primitive_id");
2237ec681f3Smrg   vars.geometry_id_and_flags =
2247ec681f3Smrg      nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "geometry_id_and_flags");
2257ec681f3Smrg   vars.instance_id =
2267ec681f3Smrg      nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "instance_id");
2277ec681f3Smrg   vars.instance_addr =
2287ec681f3Smrg      nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr");
2297ec681f3Smrg   vars.hit_kind = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "hit_kind");
2307ec681f3Smrg   vars.opaque = nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "opaque");
2317ec681f3Smrg
2327ec681f3Smrg   vars.main_loop_case_visited =
2337ec681f3Smrg      nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "main_loop_case_visited");
2347ec681f3Smrg   vars.ahit_status =
2357ec681f3Smrg      nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "ahit_status");
2367ec681f3Smrg
2377ec681f3Smrg   vars.stack_sizes = stack_sizes;
2387ec681f3Smrg   return vars;
2397ec681f3Smrg}
2407ec681f3Smrg
2417ec681f3Smrg/*
2427ec681f3Smrg * Remap all the variables between the two rt_variables struct for inlining.
2437ec681f3Smrg */
2447ec681f3Smrgstatic void
2457ec681f3Smrgmap_rt_variables(struct hash_table *var_remap, struct rt_variables *src,
2467ec681f3Smrg                 const struct rt_variables *dst)
2477ec681f3Smrg{
2487ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->idx, dst->idx);
2497ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->arg, dst->arg);
2507ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->stack_ptr, dst->stack_ptr);
2517ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->shader_record_ptr, dst->shader_record_ptr);
2527ec681f3Smrg
2537ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->accel_struct, dst->accel_struct);
2547ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->flags, dst->flags);
2557ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->cull_mask, dst->cull_mask);
2567ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->sbt_offset, dst->sbt_offset);
2577ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->sbt_stride, dst->sbt_stride);
2587ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->miss_index, dst->miss_index);
2597ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->origin, dst->origin);
2607ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->tmin, dst->tmin);
2617ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->direction, dst->direction);
2627ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->tmax, dst->tmax);
2637ec681f3Smrg
2647ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->custom_instance_and_mask, dst->custom_instance_and_mask);
2657ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->primitive_id, dst->primitive_id);
2667ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->geometry_id_and_flags, dst->geometry_id_and_flags);
2677ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->instance_id, dst->instance_id);
2687ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->instance_addr, dst->instance_addr);
2697ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->hit_kind, dst->hit_kind);
2707ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->opaque, dst->opaque);
2717ec681f3Smrg   _mesa_hash_table_insert(var_remap, src->ahit_status, dst->ahit_status);
2727ec681f3Smrg
2737ec681f3Smrg   src->stack_sizes = dst->stack_sizes;
2747ec681f3Smrg   src->group_idx = dst->group_idx;
2757ec681f3Smrg}
2767ec681f3Smrg
2777ec681f3Smrg/*
2787ec681f3Smrg * Create a copy of the global rt variables where the primitive/instance related variables are
2797ec681f3Smrg * independent.This is needed as we need to keep the old values of the global variables around
2807ec681f3Smrg * in case e.g. an anyhit shader reject the collision. So there are inner variables that get copied
2817ec681f3Smrg * to the outer variables once we commit to a better hit.
2827ec681f3Smrg */
2837ec681f3Smrgstatic struct rt_variables
2847ec681f3Smrgcreate_inner_vars(nir_builder *b, const struct rt_variables *vars)
2857ec681f3Smrg{
2867ec681f3Smrg   struct rt_variables inner_vars = *vars;
2877ec681f3Smrg   inner_vars.idx =
2887ec681f3Smrg      nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_idx");
2897ec681f3Smrg   inner_vars.shader_record_ptr = nir_variable_create(
2907ec681f3Smrg      b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_shader_record_ptr");
2917ec681f3Smrg   inner_vars.primitive_id =
2927ec681f3Smrg      nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_primitive_id");
2937ec681f3Smrg   inner_vars.geometry_id_and_flags = nir_variable_create(
2947ec681f3Smrg      b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_geometry_id_and_flags");
2957ec681f3Smrg   inner_vars.tmax =
2967ec681f3Smrg      nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "inner_tmax");
2977ec681f3Smrg   inner_vars.instance_id =
2987ec681f3Smrg      nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_instance_id");
2997ec681f3Smrg   inner_vars.instance_addr = nir_variable_create(b->shader, nir_var_shader_temp,
3007ec681f3Smrg                                                  glsl_uint64_t_type(), "inner_instance_addr");
3017ec681f3Smrg   inner_vars.hit_kind =
3027ec681f3Smrg      nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_hit_kind");
3037ec681f3Smrg   inner_vars.custom_instance_and_mask = nir_variable_create(
3047ec681f3Smrg      b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_custom_instance_and_mask");
3057ec681f3Smrg
3067ec681f3Smrg   return inner_vars;
3077ec681f3Smrg}
3087ec681f3Smrg
3097ec681f3Smrg/* The hit attributes are stored on the stack. This is the offset compared to the current stack
3107ec681f3Smrg * pointer of where the hit attrib is stored. */
3117ec681f3Smrgconst uint32_t RADV_HIT_ATTRIB_OFFSET = -(16 + RADV_MAX_HIT_ATTRIB_SIZE);
3127ec681f3Smrg
3137ec681f3Smrgstatic void
3147ec681f3Smrginsert_rt_return(nir_builder *b, const struct rt_variables *vars)
3157ec681f3Smrg{
3167ec681f3Smrg   nir_store_var(b, vars->stack_ptr,
3177ec681f3Smrg                 nir_iadd(b, nir_load_var(b, vars->stack_ptr), nir_imm_int(b, -16)), 1);
3187ec681f3Smrg   nir_store_var(b, vars->idx,
3197ec681f3Smrg                 nir_load_scratch(b, 1, 32, nir_load_var(b, vars->stack_ptr), .align_mul = 16), 1);
3207ec681f3Smrg}
3217ec681f3Smrg
3227ec681f3Smrgenum sbt_type {
3237ec681f3Smrg   SBT_RAYGEN,
3247ec681f3Smrg   SBT_MISS,
3257ec681f3Smrg   SBT_HIT,
3267ec681f3Smrg   SBT_CALLABLE,
3277ec681f3Smrg};
3287ec681f3Smrg
3297ec681f3Smrgstatic nir_ssa_def *
3307ec681f3Smrgget_sbt_ptr(nir_builder *b, nir_ssa_def *idx, enum sbt_type binding)
3317ec681f3Smrg{
3327ec681f3Smrg   nir_ssa_def *desc = nir_load_sbt_amd(b, 4, .binding = binding);
3337ec681f3Smrg   nir_ssa_def *base_addr = nir_pack_64_2x32(b, nir_channels(b, desc, 0x3));
3347ec681f3Smrg   nir_ssa_def *stride = nir_channel(b, desc, 2);
3357ec681f3Smrg
3367ec681f3Smrg   nir_ssa_def *ret = nir_imul(b, idx, stride);
3377ec681f3Smrg   ret = nir_iadd(b, base_addr, nir_u2u64(b, ret));
3387ec681f3Smrg
3397ec681f3Smrg   return ret;
3407ec681f3Smrg}
3417ec681f3Smrg
3427ec681f3Smrgstatic void
3437ec681f3Smrgload_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_ssa_def *idx,
3447ec681f3Smrg               enum sbt_type binding, unsigned offset)
3457ec681f3Smrg{
3467ec681f3Smrg   nir_ssa_def *addr = get_sbt_ptr(b, idx, binding);
3477ec681f3Smrg
3487ec681f3Smrg   nir_ssa_def *load_addr = addr;
3497ec681f3Smrg   if (offset)
3507ec681f3Smrg      load_addr = nir_iadd(b, load_addr, nir_imm_int64(b, offset));
3517ec681f3Smrg   nir_ssa_def *v_idx =
3527ec681f3Smrg      nir_build_load_global(b, 1, 32, load_addr, .align_mul = 4, .align_offset = 0);
3537ec681f3Smrg
3547ec681f3Smrg   nir_store_var(b, vars->idx, v_idx, 1);
3557ec681f3Smrg
3567ec681f3Smrg   nir_ssa_def *record_addr = nir_iadd(b, addr, nir_imm_int64(b, RADV_RT_HANDLE_SIZE));
3577ec681f3Smrg   nir_store_var(b, vars->shader_record_ptr, record_addr, 1);
3587ec681f3Smrg}
3597ec681f3Smrg
3607ec681f3Smrgstatic nir_ssa_def *
3617ec681f3Smrgnir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], bool translation)
3627ec681f3Smrg{
3637ec681f3Smrg   nir_ssa_def *result_components[3] = {
3647ec681f3Smrg      nir_channel(b, matrix[0], 3),
3657ec681f3Smrg      nir_channel(b, matrix[1], 3),
3667ec681f3Smrg      nir_channel(b, matrix[2], 3),
3677ec681f3Smrg   };
3687ec681f3Smrg   for (unsigned i = 0; i < 3; ++i) {
3697ec681f3Smrg      for (unsigned j = 0; j < 3; ++j) {
3707ec681f3Smrg         nir_ssa_def *v =
3717ec681f3Smrg            nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j));
3727ec681f3Smrg         result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v;
3737ec681f3Smrg      }
3747ec681f3Smrg   }
3757ec681f3Smrg   return nir_vec(b, result_components, 3);
3767ec681f3Smrg}
3777ec681f3Smrg
3787ec681f3Smrgstatic nir_ssa_def *
3797ec681f3Smrgnir_build_vec3_mat_mult_pre(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[])
3807ec681f3Smrg{
3817ec681f3Smrg   nir_ssa_def *result_components[3] = {
3827ec681f3Smrg      nir_channel(b, matrix[0], 3),
3837ec681f3Smrg      nir_channel(b, matrix[1], 3),
3847ec681f3Smrg      nir_channel(b, matrix[2], 3),
3857ec681f3Smrg   };
3867ec681f3Smrg   return nir_build_vec3_mat_mult(b, nir_fsub(b, vec, nir_vec(b, result_components, 3)), matrix,
3877ec681f3Smrg                                  false);
3887ec681f3Smrg}
3897ec681f3Smrg
3907ec681f3Smrgstatic void
3917ec681f3Smrgnir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out)
3927ec681f3Smrg{
3937ec681f3Smrg   unsigned offset = offsetof(struct radv_bvh_instance_node, wto_matrix);
3947ec681f3Smrg   for (unsigned i = 0; i < 3; ++i) {
3957ec681f3Smrg      out[i] = nir_build_load_global(b, 4, 32,
3967ec681f3Smrg                                     nir_iadd(b, instance_addr, nir_imm_int64(b, offset + i * 16)),
3977ec681f3Smrg                                     .align_mul = 64, .align_offset = offset + i * 16);
3987ec681f3Smrg   }
3997ec681f3Smrg}
4007ec681f3Smrg
4017ec681f3Smrg/* This lowers all the RT instructions that we do not want to pass on to the combined shader and
4027ec681f3Smrg * that we can implement using the variables from the shader we are going to inline into. */
4037ec681f3Smrgstatic void
4047ec681f3Smrglower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned call_idx_base)
4057ec681f3Smrg{
4067ec681f3Smrg   nir_builder b_shader;
4077ec681f3Smrg   nir_builder_init(&b_shader, nir_shader_get_entrypoint(shader));
4087ec681f3Smrg
4097ec681f3Smrg   nir_foreach_block (block, nir_shader_get_entrypoint(shader)) {
4107ec681f3Smrg      nir_foreach_instr_safe (instr, block) {
4117ec681f3Smrg         switch (instr->type) {
4127ec681f3Smrg         case nir_instr_type_intrinsic: {
4137ec681f3Smrg            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4147ec681f3Smrg            switch (intr->intrinsic) {
4157ec681f3Smrg            case nir_intrinsic_rt_execute_callable: {
4167ec681f3Smrg               uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE;
4177ec681f3Smrg               uint32_t ret = call_idx_base + nir_intrinsic_call_idx(intr) + 1;
4187ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
4197ec681f3Smrg
4207ec681f3Smrg               nir_store_var(&b_shader, vars->stack_ptr,
4217ec681f3Smrg                             nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr),
4227ec681f3Smrg                                      nir_imm_int(&b_shader, size)),
4237ec681f3Smrg                             1);
4247ec681f3Smrg               nir_store_scratch(&b_shader, nir_imm_int(&b_shader, ret),
4257ec681f3Smrg                                 nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16,
4267ec681f3Smrg                                 .write_mask = 1);
4277ec681f3Smrg
4287ec681f3Smrg               nir_store_var(&b_shader, vars->stack_ptr,
4297ec681f3Smrg                             nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr),
4307ec681f3Smrg                                      nir_imm_int(&b_shader, 16)),
4317ec681f3Smrg                             1);
4327ec681f3Smrg               load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_CALLABLE, 0);
4337ec681f3Smrg
4347ec681f3Smrg               nir_store_var(
4357ec681f3Smrg                  &b_shader, vars->arg,
4367ec681f3Smrg                  nir_isub(&b_shader, intr->src[1].ssa, nir_imm_int(&b_shader, size + 16)), 1);
4377ec681f3Smrg
4387ec681f3Smrg               vars->stack_sizes[vars->group_idx].recursive_size =
4397ec681f3Smrg                  MAX2(vars->stack_sizes[vars->group_idx].recursive_size, size + 16);
4407ec681f3Smrg               break;
4417ec681f3Smrg            }
4427ec681f3Smrg            case nir_intrinsic_rt_trace_ray: {
4437ec681f3Smrg               uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE;
4447ec681f3Smrg               uint32_t ret = call_idx_base + nir_intrinsic_call_idx(intr) + 1;
4457ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
4467ec681f3Smrg
4477ec681f3Smrg               nir_store_var(&b_shader, vars->stack_ptr,
4487ec681f3Smrg                             nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr),
4497ec681f3Smrg                                      nir_imm_int(&b_shader, size)),
4507ec681f3Smrg                             1);
4517ec681f3Smrg               nir_store_scratch(&b_shader, nir_imm_int(&b_shader, ret),
4527ec681f3Smrg                                 nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16,
4537ec681f3Smrg                                 .write_mask = 1);
4547ec681f3Smrg
4557ec681f3Smrg               nir_store_var(&b_shader, vars->stack_ptr,
4567ec681f3Smrg                             nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr),
4577ec681f3Smrg                                      nir_imm_int(&b_shader, 16)),
4587ec681f3Smrg                             1);
4597ec681f3Smrg
4607ec681f3Smrg               nir_store_var(&b_shader, vars->idx, nir_imm_int(&b_shader, 1), 1);
4617ec681f3Smrg               nir_store_var(
4627ec681f3Smrg                  &b_shader, vars->arg,
4637ec681f3Smrg                  nir_isub(&b_shader, intr->src[10].ssa, nir_imm_int(&b_shader, size + 16)), 1);
4647ec681f3Smrg
4657ec681f3Smrg               vars->stack_sizes[vars->group_idx].recursive_size =
4667ec681f3Smrg                  MAX2(vars->stack_sizes[vars->group_idx].recursive_size, size + 16);
4677ec681f3Smrg
4687ec681f3Smrg               /* Per the SPIR-V extension spec we have to ignore some bits for some arguments. */
4697ec681f3Smrg               nir_store_var(&b_shader, vars->accel_struct, intr->src[0].ssa, 0x1);
4707ec681f3Smrg               nir_store_var(&b_shader, vars->flags, intr->src[1].ssa, 0x1);
4717ec681f3Smrg               nir_store_var(&b_shader, vars->cull_mask,
4727ec681f3Smrg                             nir_iand(&b_shader, intr->src[2].ssa, nir_imm_int(&b_shader, 0xff)),
4737ec681f3Smrg                             0x1);
4747ec681f3Smrg               nir_store_var(&b_shader, vars->sbt_offset,
4757ec681f3Smrg                             nir_iand(&b_shader, intr->src[3].ssa, nir_imm_int(&b_shader, 0xf)),
4767ec681f3Smrg                             0x1);
4777ec681f3Smrg               nir_store_var(&b_shader, vars->sbt_stride,
4787ec681f3Smrg                             nir_iand(&b_shader, intr->src[4].ssa, nir_imm_int(&b_shader, 0xf)),
4797ec681f3Smrg                             0x1);
4807ec681f3Smrg               nir_store_var(&b_shader, vars->miss_index,
4817ec681f3Smrg                             nir_iand(&b_shader, intr->src[5].ssa, nir_imm_int(&b_shader, 0xffff)),
4827ec681f3Smrg                             0x1);
4837ec681f3Smrg               nir_store_var(&b_shader, vars->origin, intr->src[6].ssa, 0x7);
4847ec681f3Smrg               nir_store_var(&b_shader, vars->tmin, intr->src[7].ssa, 0x1);
4857ec681f3Smrg               nir_store_var(&b_shader, vars->direction, intr->src[8].ssa, 0x7);
4867ec681f3Smrg               nir_store_var(&b_shader, vars->tmax, intr->src[9].ssa, 0x1);
4877ec681f3Smrg               break;
4887ec681f3Smrg            }
4897ec681f3Smrg            case nir_intrinsic_rt_resume: {
4907ec681f3Smrg               uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE;
4917ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
4927ec681f3Smrg
4937ec681f3Smrg               nir_store_var(&b_shader, vars->stack_ptr,
4947ec681f3Smrg                             nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr),
4957ec681f3Smrg                                      nir_imm_int(&b_shader, -size)),
4967ec681f3Smrg                             1);
4977ec681f3Smrg               break;
4987ec681f3Smrg            }
4997ec681f3Smrg            case nir_intrinsic_rt_return_amd: {
5007ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
5017ec681f3Smrg
5027ec681f3Smrg               if (shader->info.stage == MESA_SHADER_RAYGEN) {
5037ec681f3Smrg                  nir_store_var(&b_shader, vars->idx, nir_imm_int(&b_shader, 0), 1);
5047ec681f3Smrg                  break;
5057ec681f3Smrg               }
5067ec681f3Smrg               insert_rt_return(&b_shader, vars);
5077ec681f3Smrg               break;
5087ec681f3Smrg            }
5097ec681f3Smrg            case nir_intrinsic_load_scratch: {
5107ec681f3Smrg               b_shader.cursor = nir_before_instr(instr);
5117ec681f3Smrg               nir_instr_rewrite_src_ssa(
5127ec681f3Smrg                  instr, &intr->src[0],
5137ec681f3Smrg                  nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[0].ssa));
5147ec681f3Smrg               break;
5157ec681f3Smrg            }
5167ec681f3Smrg            case nir_intrinsic_store_scratch: {
5177ec681f3Smrg               b_shader.cursor = nir_before_instr(instr);
5187ec681f3Smrg               nir_instr_rewrite_src_ssa(
5197ec681f3Smrg                  instr, &intr->src[1],
5207ec681f3Smrg                  nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[1].ssa));
5217ec681f3Smrg               break;
5227ec681f3Smrg            }
5237ec681f3Smrg            case nir_intrinsic_load_rt_arg_scratch_offset_amd: {
5247ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
5257ec681f3Smrg               nir_ssa_def *ret = nir_load_var(&b_shader, vars->arg);
5267ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
5277ec681f3Smrg               break;
5287ec681f3Smrg            }
5297ec681f3Smrg            case nir_intrinsic_load_shader_record_ptr: {
5307ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
5317ec681f3Smrg               nir_ssa_def *ret = nir_load_var(&b_shader, vars->shader_record_ptr);
5327ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
5337ec681f3Smrg               break;
5347ec681f3Smrg            }
5357ec681f3Smrg            case nir_intrinsic_load_ray_launch_id: {
5367ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
5377ec681f3Smrg               nir_ssa_def *ret = nir_load_global_invocation_id(&b_shader, 32);
5387ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
5397ec681f3Smrg               break;
5407ec681f3Smrg            }
5417ec681f3Smrg            case nir_intrinsic_load_ray_t_min: {
5427ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
5437ec681f3Smrg               nir_ssa_def *ret = nir_load_var(&b_shader, vars->tmin);
5447ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
5457ec681f3Smrg               break;
5467ec681f3Smrg            }
5477ec681f3Smrg            case nir_intrinsic_load_ray_t_max: {
5487ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
5497ec681f3Smrg               nir_ssa_def *ret = nir_load_var(&b_shader, vars->tmax);
5507ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
5517ec681f3Smrg               break;
5527ec681f3Smrg            }
5537ec681f3Smrg            case nir_intrinsic_load_ray_world_origin: {
5547ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
5557ec681f3Smrg               nir_ssa_def *ret = nir_load_var(&b_shader, vars->origin);
5567ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
5577ec681f3Smrg               break;
5587ec681f3Smrg            }
5597ec681f3Smrg            case nir_intrinsic_load_ray_world_direction: {
5607ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
5617ec681f3Smrg               nir_ssa_def *ret = nir_load_var(&b_shader, vars->direction);
5627ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
5637ec681f3Smrg               break;
5647ec681f3Smrg            }
5657ec681f3Smrg            case nir_intrinsic_load_ray_instance_custom_index: {
5667ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
5677ec681f3Smrg               nir_ssa_def *ret = nir_load_var(&b_shader, vars->custom_instance_and_mask);
5687ec681f3Smrg               ret = nir_iand(&b_shader, ret, nir_imm_int(&b_shader, 0xFFFFFF));
5697ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
5707ec681f3Smrg               break;
5717ec681f3Smrg            }
5727ec681f3Smrg            case nir_intrinsic_load_primitive_id: {
5737ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
5747ec681f3Smrg               nir_ssa_def *ret = nir_load_var(&b_shader, vars->primitive_id);
5757ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
5767ec681f3Smrg               break;
5777ec681f3Smrg            }
5787ec681f3Smrg            case nir_intrinsic_load_ray_geometry_index: {
5797ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
5807ec681f3Smrg               nir_ssa_def *ret = nir_load_var(&b_shader, vars->geometry_id_and_flags);
5817ec681f3Smrg               ret = nir_iand(&b_shader, ret, nir_imm_int(&b_shader, 0xFFFFFFF));
5827ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
5837ec681f3Smrg               break;
5847ec681f3Smrg            }
5857ec681f3Smrg            case nir_intrinsic_load_instance_id: {
5867ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
5877ec681f3Smrg               nir_ssa_def *ret = nir_load_var(&b_shader, vars->instance_id);
5887ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
5897ec681f3Smrg               break;
5907ec681f3Smrg            }
5917ec681f3Smrg            case nir_intrinsic_load_ray_flags: {
5927ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
5937ec681f3Smrg               nir_ssa_def *ret = nir_load_var(&b_shader, vars->flags);
5947ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
5957ec681f3Smrg               break;
5967ec681f3Smrg            }
5977ec681f3Smrg            case nir_intrinsic_load_ray_hit_kind: {
5987ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
5997ec681f3Smrg               nir_ssa_def *ret = nir_load_var(&b_shader, vars->hit_kind);
6007ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
6017ec681f3Smrg               break;
6027ec681f3Smrg            }
6037ec681f3Smrg            case nir_intrinsic_load_ray_world_to_object: {
6047ec681f3Smrg               unsigned c = nir_intrinsic_column(intr);
6057ec681f3Smrg               nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
6067ec681f3Smrg               nir_ssa_def *wto_matrix[3];
6077ec681f3Smrg               nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
6087ec681f3Smrg
6097ec681f3Smrg               nir_ssa_def *vals[3];
6107ec681f3Smrg               for (unsigned i = 0; i < 3; ++i)
6117ec681f3Smrg                  vals[i] = nir_channel(&b_shader, wto_matrix[i], c);
6127ec681f3Smrg
6137ec681f3Smrg               nir_ssa_def *val = nir_vec(&b_shader, vals, 3);
6147ec681f3Smrg               if (c == 3)
6157ec681f3Smrg                  val = nir_fneg(&b_shader,
6167ec681f3Smrg                                 nir_build_vec3_mat_mult(&b_shader, val, wto_matrix, false));
6177ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
6187ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, val);
6197ec681f3Smrg               break;
6207ec681f3Smrg            }
6217ec681f3Smrg            case nir_intrinsic_load_ray_object_to_world: {
6227ec681f3Smrg               unsigned c = nir_intrinsic_column(intr);
6237ec681f3Smrg               nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
6247ec681f3Smrg               nir_ssa_def *val;
6257ec681f3Smrg               if (c == 3) {
6267ec681f3Smrg                  nir_ssa_def *wto_matrix[3];
6277ec681f3Smrg                  nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
6287ec681f3Smrg
6297ec681f3Smrg                  nir_ssa_def *vals[3];
6307ec681f3Smrg                  for (unsigned i = 0; i < 3; ++i)
6317ec681f3Smrg                     vals[i] = nir_channel(&b_shader, wto_matrix[i], c);
6327ec681f3Smrg
6337ec681f3Smrg                  val = nir_vec(&b_shader, vals, 3);
6347ec681f3Smrg               } else {
6357ec681f3Smrg                  val = nir_build_load_global(
6367ec681f3Smrg                     &b_shader, 3, 32,
6377ec681f3Smrg                     nir_iadd(&b_shader, instance_node_addr, nir_imm_int64(&b_shader, 92 + c * 12)),
6387ec681f3Smrg                     .align_mul = 4, .align_offset = 0);
6397ec681f3Smrg               }
6407ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
6417ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, val);
6427ec681f3Smrg               break;
6437ec681f3Smrg            }
6447ec681f3Smrg            case nir_intrinsic_load_ray_object_origin: {
6457ec681f3Smrg               nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
6467ec681f3Smrg               nir_ssa_def *wto_matrix[] = {
6477ec681f3Smrg                  nir_build_load_global(
6487ec681f3Smrg                     &b_shader, 4, 32,
6497ec681f3Smrg                     nir_iadd(&b_shader, instance_node_addr, nir_imm_int64(&b_shader, 16)),
6507ec681f3Smrg                     .align_mul = 64, .align_offset = 16),
6517ec681f3Smrg                  nir_build_load_global(
6527ec681f3Smrg                     &b_shader, 4, 32,
6537ec681f3Smrg                     nir_iadd(&b_shader, instance_node_addr, nir_imm_int64(&b_shader, 32)),
6547ec681f3Smrg                     .align_mul = 64, .align_offset = 32),
6557ec681f3Smrg                  nir_build_load_global(
6567ec681f3Smrg                     &b_shader, 4, 32,
6577ec681f3Smrg                     nir_iadd(&b_shader, instance_node_addr, nir_imm_int64(&b_shader, 48)),
6587ec681f3Smrg                     .align_mul = 64, .align_offset = 48)};
6597ec681f3Smrg               nir_ssa_def *val = nir_build_vec3_mat_mult_pre(
6607ec681f3Smrg                  &b_shader, nir_load_var(&b_shader, vars->origin), wto_matrix);
6617ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
6627ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, val);
6637ec681f3Smrg               break;
6647ec681f3Smrg            }
6657ec681f3Smrg            case nir_intrinsic_load_ray_object_direction: {
6667ec681f3Smrg               nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
6677ec681f3Smrg               nir_ssa_def *wto_matrix[3];
6687ec681f3Smrg               nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
6697ec681f3Smrg               nir_ssa_def *val = nir_build_vec3_mat_mult(
6707ec681f3Smrg                  &b_shader, nir_load_var(&b_shader, vars->direction), wto_matrix, false);
6717ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
6727ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, val);
6737ec681f3Smrg               break;
6747ec681f3Smrg            }
6757ec681f3Smrg            case nir_intrinsic_load_intersection_opaque_amd: {
6767ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
6777ec681f3Smrg               nir_ssa_def *ret = nir_load_var(&b_shader, vars->opaque);
6787ec681f3Smrg               nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
6797ec681f3Smrg               break;
6807ec681f3Smrg            }
6817ec681f3Smrg            case nir_intrinsic_ignore_ray_intersection: {
6827ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
6837ec681f3Smrg               nir_store_var(&b_shader, vars->ahit_status, nir_imm_int(&b_shader, 1), 1);
6847ec681f3Smrg
6857ec681f3Smrg               /* The if is a workaround to avoid having to fix up control flow manually */
6867ec681f3Smrg               nir_push_if(&b_shader, nir_imm_true(&b_shader));
6877ec681f3Smrg               nir_jump(&b_shader, nir_jump_return);
6887ec681f3Smrg               nir_pop_if(&b_shader, NULL);
6897ec681f3Smrg               break;
6907ec681f3Smrg            }
6917ec681f3Smrg            case nir_intrinsic_terminate_ray: {
6927ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
6937ec681f3Smrg               nir_store_var(&b_shader, vars->ahit_status, nir_imm_int(&b_shader, 2), 1);
6947ec681f3Smrg
6957ec681f3Smrg               /* The if is a workaround to avoid having to fix up control flow manually */
6967ec681f3Smrg               nir_push_if(&b_shader, nir_imm_true(&b_shader));
6977ec681f3Smrg               nir_jump(&b_shader, nir_jump_return);
6987ec681f3Smrg               nir_pop_if(&b_shader, NULL);
6997ec681f3Smrg               break;
7007ec681f3Smrg            }
7017ec681f3Smrg            case nir_intrinsic_report_ray_intersection: {
7027ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
7037ec681f3Smrg               nir_push_if(
7047ec681f3Smrg                  &b_shader,
7057ec681f3Smrg                  nir_iand(
7067ec681f3Smrg                     &b_shader,
7077ec681f3Smrg                     nir_flt(&b_shader, intr->src[0].ssa, nir_load_var(&b_shader, vars->tmax)),
7087ec681f3Smrg                     nir_fge(&b_shader, intr->src[0].ssa, nir_load_var(&b_shader, vars->tmin))));
7097ec681f3Smrg               {
7107ec681f3Smrg                  nir_store_var(&b_shader, vars->ahit_status, nir_imm_int(&b_shader, 0), 1);
7117ec681f3Smrg                  nir_store_var(&b_shader, vars->tmax, intr->src[0].ssa, 1);
7127ec681f3Smrg                  nir_store_var(&b_shader, vars->hit_kind, intr->src[1].ssa, 1);
7137ec681f3Smrg               }
7147ec681f3Smrg               nir_pop_if(&b_shader, NULL);
7157ec681f3Smrg               break;
7167ec681f3Smrg            }
7177ec681f3Smrg            default:
7187ec681f3Smrg               break;
7197ec681f3Smrg            }
7207ec681f3Smrg            break;
7217ec681f3Smrg         }
7227ec681f3Smrg         case nir_instr_type_jump: {
7237ec681f3Smrg            nir_jump_instr *jump = nir_instr_as_jump(instr);
7247ec681f3Smrg            if (jump->type == nir_jump_halt) {
7257ec681f3Smrg               b_shader.cursor = nir_instr_remove(instr);
7267ec681f3Smrg               nir_jump(&b_shader, nir_jump_return);
7277ec681f3Smrg            }
7287ec681f3Smrg            break;
7297ec681f3Smrg         }
7307ec681f3Smrg         default:
7317ec681f3Smrg            break;
7327ec681f3Smrg         }
7337ec681f3Smrg      }
7347ec681f3Smrg   }
7357ec681f3Smrg
7367ec681f3Smrg   nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_none);
7377ec681f3Smrg}
7387ec681f3Smrg
7397ec681f3Smrgstatic void
7407ec681f3Smrginsert_rt_case(nir_builder *b, nir_shader *shader, const struct rt_variables *vars,
7417ec681f3Smrg               nir_ssa_def *idx, uint32_t call_idx_base, uint32_t call_idx)
7427ec681f3Smrg{
7437ec681f3Smrg   struct hash_table *var_remap = _mesa_pointer_hash_table_create(NULL);
7447ec681f3Smrg
7457ec681f3Smrg   nir_opt_dead_cf(shader);
7467ec681f3Smrg
7477ec681f3Smrg   struct rt_variables src_vars = create_rt_variables(shader, vars->stack_sizes);
7487ec681f3Smrg   map_rt_variables(var_remap, &src_vars, vars);
7497ec681f3Smrg
7507ec681f3Smrg   NIR_PASS_V(shader, lower_rt_instructions, &src_vars, call_idx_base);
7517ec681f3Smrg
7527ec681f3Smrg   NIR_PASS_V(shader, nir_opt_remove_phis);
7537ec681f3Smrg   NIR_PASS_V(shader, nir_lower_returns);
7547ec681f3Smrg   NIR_PASS_V(shader, nir_opt_dce);
7557ec681f3Smrg
7567ec681f3Smrg   if (b->shader->info.stage == MESA_SHADER_ANY_HIT ||
7577ec681f3Smrg       b->shader->info.stage == MESA_SHADER_INTERSECTION) {
7587ec681f3Smrg      src_vars.stack_sizes[src_vars.group_idx].non_recursive_size =
7597ec681f3Smrg         MAX2(src_vars.stack_sizes[src_vars.group_idx].non_recursive_size, shader->scratch_size);
7607ec681f3Smrg   } else {
7617ec681f3Smrg      src_vars.stack_sizes[src_vars.group_idx].recursive_size =
7627ec681f3Smrg         MAX2(src_vars.stack_sizes[src_vars.group_idx].recursive_size, shader->scratch_size);
7637ec681f3Smrg   }
7647ec681f3Smrg
7657ec681f3Smrg   nir_push_if(b, nir_ieq(b, idx, nir_imm_int(b, call_idx)));
7667ec681f3Smrg   nir_store_var(b, vars->main_loop_case_visited, nir_imm_bool(b, true), 1);
7677ec681f3Smrg   nir_inline_function_impl(b, nir_shader_get_entrypoint(shader), NULL, var_remap);
7687ec681f3Smrg   nir_pop_if(b, NULL);
7697ec681f3Smrg
7707ec681f3Smrg   /* Adopt the instructions from the source shader, since they are merely moved, not cloned. */
7717ec681f3Smrg   ralloc_adopt(ralloc_context(b->shader), ralloc_context(shader));
7727ec681f3Smrg
7737ec681f3Smrg   ralloc_free(var_remap);
7747ec681f3Smrg}
7757ec681f3Smrg
7767ec681f3Smrgstatic bool
7777ec681f3Smrglower_rt_derefs(nir_shader *shader)
7787ec681f3Smrg{
7797ec681f3Smrg   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
7807ec681f3Smrg
7817ec681f3Smrg   bool progress = false;
7827ec681f3Smrg
7837ec681f3Smrg   nir_builder b;
7847ec681f3Smrg   nir_builder_init(&b, impl);
7857ec681f3Smrg
7867ec681f3Smrg   b.cursor = nir_before_cf_list(&impl->body);
7877ec681f3Smrg   nir_ssa_def *arg_offset = nir_load_rt_arg_scratch_offset_amd(&b);
7887ec681f3Smrg
7897ec681f3Smrg   nir_foreach_block (block, impl) {
7907ec681f3Smrg      nir_foreach_instr_safe (instr, block) {
7917ec681f3Smrg         switch (instr->type) {
7927ec681f3Smrg         case nir_instr_type_deref: {
7937ec681f3Smrg            if (instr->type != nir_instr_type_deref)
7947ec681f3Smrg               continue;
7957ec681f3Smrg
7967ec681f3Smrg            nir_deref_instr *deref = nir_instr_as_deref(instr);
7977ec681f3Smrg            if (nir_deref_mode_is(deref, nir_var_shader_call_data)) {
7987ec681f3Smrg               deref->modes = nir_var_function_temp;
7997ec681f3Smrg               if (deref->deref_type == nir_deref_type_var) {
8007ec681f3Smrg                  b.cursor = nir_before_instr(&deref->instr);
8017ec681f3Smrg                  nir_deref_instr *cast = nir_build_deref_cast(
8027ec681f3Smrg                     &b, arg_offset, nir_var_function_temp, deref->var->type, 0);
8037ec681f3Smrg                  nir_ssa_def_rewrite_uses(&deref->dest.ssa, &cast->dest.ssa);
8047ec681f3Smrg                  nir_instr_remove(&deref->instr);
8057ec681f3Smrg               }
8067ec681f3Smrg               progress = true;
8077ec681f3Smrg            } else if (nir_deref_mode_is(deref, nir_var_ray_hit_attrib)) {
8087ec681f3Smrg               deref->modes = nir_var_function_temp;
8097ec681f3Smrg               if (deref->deref_type == nir_deref_type_var) {
8107ec681f3Smrg                  b.cursor = nir_before_instr(&deref->instr);
8117ec681f3Smrg                  nir_deref_instr *cast =
8127ec681f3Smrg                     nir_build_deref_cast(&b, nir_imm_int(&b, RADV_HIT_ATTRIB_OFFSET),
8137ec681f3Smrg                                          nir_var_function_temp, deref->type, 0);
8147ec681f3Smrg                  nir_ssa_def_rewrite_uses(&deref->dest.ssa, &cast->dest.ssa);
8157ec681f3Smrg                  nir_instr_remove(&deref->instr);
8167ec681f3Smrg               }
8177ec681f3Smrg               progress = true;
8187ec681f3Smrg            }
8197ec681f3Smrg            break;
8207ec681f3Smrg         }
8217ec681f3Smrg         default:
8227ec681f3Smrg            break;
8237ec681f3Smrg         }
8247ec681f3Smrg      }
8257ec681f3Smrg   }
8267ec681f3Smrg
8277ec681f3Smrg   if (progress) {
8287ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
8297ec681f3Smrg   } else {
8307ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_all);
8317ec681f3Smrg   }
8327ec681f3Smrg
8337ec681f3Smrg   return progress;
8347ec681f3Smrg}
8357ec681f3Smrg
8367ec681f3Smrgstatic gl_shader_stage
8377ec681f3Smrgconvert_rt_stage(VkShaderStageFlagBits vk_stage)
8387ec681f3Smrg{
8397ec681f3Smrg   switch (vk_stage) {
8407ec681f3Smrg   case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
8417ec681f3Smrg      return MESA_SHADER_RAYGEN;
8427ec681f3Smrg   case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
8437ec681f3Smrg      return MESA_SHADER_ANY_HIT;
8447ec681f3Smrg   case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
8457ec681f3Smrg      return MESA_SHADER_CLOSEST_HIT;
8467ec681f3Smrg   case VK_SHADER_STAGE_MISS_BIT_KHR:
8477ec681f3Smrg      return MESA_SHADER_MISS;
8487ec681f3Smrg   case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
8497ec681f3Smrg      return MESA_SHADER_INTERSECTION;
8507ec681f3Smrg   case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
8517ec681f3Smrg      return MESA_SHADER_CALLABLE;
8527ec681f3Smrg   default:
8537ec681f3Smrg      unreachable("Unhandled RT stage");
8547ec681f3Smrg   }
8557ec681f3Smrg}
8567ec681f3Smrg
8577ec681f3Smrgstatic nir_shader *
8587ec681f3Smrgparse_rt_stage(struct radv_device *device, struct radv_pipeline_layout *layout,
8597ec681f3Smrg               const VkPipelineShaderStageCreateInfo *stage)
8607ec681f3Smrg{
8617ec681f3Smrg   struct radv_pipeline_key key;
8627ec681f3Smrg   memset(&key, 0, sizeof(key));
8637ec681f3Smrg
8647ec681f3Smrg   nir_shader *shader = radv_shader_compile_to_nir(
8657ec681f3Smrg      device, vk_shader_module_from_handle(stage->module), stage->pName,
8667ec681f3Smrg      convert_rt_stage(stage->stage), stage->pSpecializationInfo, layout, &key);
8677ec681f3Smrg
8687ec681f3Smrg   if (shader->info.stage == MESA_SHADER_RAYGEN || shader->info.stage == MESA_SHADER_CLOSEST_HIT ||
8697ec681f3Smrg       shader->info.stage == MESA_SHADER_CALLABLE || shader->info.stage == MESA_SHADER_MISS) {
8707ec681f3Smrg      nir_block *last_block = nir_impl_last_block(nir_shader_get_entrypoint(shader));
8717ec681f3Smrg      nir_builder b_inner;
8727ec681f3Smrg      nir_builder_init(&b_inner, nir_shader_get_entrypoint(shader));
8737ec681f3Smrg      b_inner.cursor = nir_after_block(last_block);
8747ec681f3Smrg      nir_rt_return_amd(&b_inner);
8757ec681f3Smrg   }
8767ec681f3Smrg
8777ec681f3Smrg   NIR_PASS_V(shader, nir_lower_vars_to_explicit_types,
8787ec681f3Smrg              nir_var_function_temp | nir_var_shader_call_data | nir_var_ray_hit_attrib,
8797ec681f3Smrg              glsl_get_natural_size_align_bytes);
8807ec681f3Smrg
8817ec681f3Smrg   NIR_PASS_V(shader, lower_rt_derefs);
8827ec681f3Smrg
8837ec681f3Smrg   NIR_PASS_V(shader, nir_lower_explicit_io, nir_var_function_temp,
8847ec681f3Smrg              nir_address_format_32bit_offset);
8857ec681f3Smrg
8867ec681f3Smrg   return shader;
8877ec681f3Smrg}
8887ec681f3Smrg
8897ec681f3Smrgstatic nir_function_impl *
8907ec681f3Smrglower_any_hit_for_intersection(nir_shader *any_hit)
8917ec681f3Smrg{
8927ec681f3Smrg   nir_function_impl *impl = nir_shader_get_entrypoint(any_hit);
8937ec681f3Smrg
8947ec681f3Smrg   /* Any-hit shaders need three parameters */
8957ec681f3Smrg   assert(impl->function->num_params == 0);
8967ec681f3Smrg   nir_parameter params[] = {
8977ec681f3Smrg      {
8987ec681f3Smrg         /* A pointer to a boolean value for whether or not the hit was
8997ec681f3Smrg          * accepted.
9007ec681f3Smrg          */
9017ec681f3Smrg         .num_components = 1,
9027ec681f3Smrg         .bit_size = 32,
9037ec681f3Smrg      },
9047ec681f3Smrg      {
9057ec681f3Smrg         /* The hit T value */
9067ec681f3Smrg         .num_components = 1,
9077ec681f3Smrg         .bit_size = 32,
9087ec681f3Smrg      },
9097ec681f3Smrg      {
9107ec681f3Smrg         /* The hit kind */
9117ec681f3Smrg         .num_components = 1,
9127ec681f3Smrg         .bit_size = 32,
9137ec681f3Smrg      },
9147ec681f3Smrg   };
9157ec681f3Smrg   impl->function->num_params = ARRAY_SIZE(params);
9167ec681f3Smrg   impl->function->params = ralloc_array(any_hit, nir_parameter, ARRAY_SIZE(params));
9177ec681f3Smrg   memcpy(impl->function->params, params, sizeof(params));
9187ec681f3Smrg
9197ec681f3Smrg   nir_builder build;
9207ec681f3Smrg   nir_builder_init(&build, impl);
9217ec681f3Smrg   nir_builder *b = &build;
9227ec681f3Smrg
9237ec681f3Smrg   b->cursor = nir_before_cf_list(&impl->body);
9247ec681f3Smrg
9257ec681f3Smrg   nir_ssa_def *commit_ptr = nir_load_param(b, 0);
9267ec681f3Smrg   nir_ssa_def *hit_t = nir_load_param(b, 1);
9277ec681f3Smrg   nir_ssa_def *hit_kind = nir_load_param(b, 2);
9287ec681f3Smrg
9297ec681f3Smrg   nir_deref_instr *commit =
9307ec681f3Smrg      nir_build_deref_cast(b, commit_ptr, nir_var_function_temp, glsl_bool_type(), 0);
9317ec681f3Smrg
9327ec681f3Smrg   nir_foreach_block_safe (block, impl) {
9337ec681f3Smrg      nir_foreach_instr_safe (instr, block) {
9347ec681f3Smrg         switch (instr->type) {
9357ec681f3Smrg         case nir_instr_type_intrinsic: {
9367ec681f3Smrg            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
9377ec681f3Smrg            switch (intrin->intrinsic) {
9387ec681f3Smrg            case nir_intrinsic_ignore_ray_intersection:
9397ec681f3Smrg               b->cursor = nir_instr_remove(&intrin->instr);
9407ec681f3Smrg               /* We put the newly emitted code inside a dummy if because it's
9417ec681f3Smrg                * going to contain a jump instruction and we don't want to
9427ec681f3Smrg                * deal with that mess here.  It'll get dealt with by our
9437ec681f3Smrg                * control-flow optimization passes.
9447ec681f3Smrg                */
9457ec681f3Smrg               nir_store_deref(b, commit, nir_imm_false(b), 0x1);
9467ec681f3Smrg               nir_push_if(b, nir_imm_true(b));
9477ec681f3Smrg               nir_jump(b, nir_jump_halt);
9487ec681f3Smrg               nir_pop_if(b, NULL);
9497ec681f3Smrg               break;
9507ec681f3Smrg
9517ec681f3Smrg            case nir_intrinsic_terminate_ray:
9527ec681f3Smrg               /* The "normal" handling of terminateRay works fine in
9537ec681f3Smrg                * intersection shaders.
9547ec681f3Smrg                */
9557ec681f3Smrg               break;
9567ec681f3Smrg
9577ec681f3Smrg            case nir_intrinsic_load_ray_t_max:
9587ec681f3Smrg               nir_ssa_def_rewrite_uses(&intrin->dest.ssa, hit_t);
9597ec681f3Smrg               nir_instr_remove(&intrin->instr);
9607ec681f3Smrg               break;
9617ec681f3Smrg
9627ec681f3Smrg            case nir_intrinsic_load_ray_hit_kind:
9637ec681f3Smrg               nir_ssa_def_rewrite_uses(&intrin->dest.ssa, hit_kind);
9647ec681f3Smrg               nir_instr_remove(&intrin->instr);
9657ec681f3Smrg               break;
9667ec681f3Smrg
9677ec681f3Smrg            default:
9687ec681f3Smrg               break;
9697ec681f3Smrg            }
9707ec681f3Smrg            break;
9717ec681f3Smrg         }
9727ec681f3Smrg         case nir_instr_type_jump: {
9737ec681f3Smrg            nir_jump_instr *jump = nir_instr_as_jump(instr);
9747ec681f3Smrg            if (jump->type == nir_jump_halt) {
9757ec681f3Smrg               b->cursor = nir_instr_remove(instr);
9767ec681f3Smrg               nir_jump(b, nir_jump_return);
9777ec681f3Smrg            }
9787ec681f3Smrg            break;
9797ec681f3Smrg         }
9807ec681f3Smrg
9817ec681f3Smrg         default:
9827ec681f3Smrg            break;
9837ec681f3Smrg         }
9847ec681f3Smrg      }
9857ec681f3Smrg   }
9867ec681f3Smrg
9877ec681f3Smrg   nir_validate_shader(any_hit, "after initial any-hit lowering");
9887ec681f3Smrg
9897ec681f3Smrg   nir_lower_returns_impl(impl);
9907ec681f3Smrg
9917ec681f3Smrg   nir_validate_shader(any_hit, "after lowering returns");
9927ec681f3Smrg
9937ec681f3Smrg   return impl;
9947ec681f3Smrg}
9957ec681f3Smrg
9967ec681f3Smrg/* Inline the any_hit shader into the intersection shader so we don't have
9977ec681f3Smrg * to implement yet another shader call interface here. Neither do any recursion.
9987ec681f3Smrg */
9997ec681f3Smrgstatic void
10007ec681f3Smrgnir_lower_intersection_shader(nir_shader *intersection, nir_shader *any_hit)
10017ec681f3Smrg{
10027ec681f3Smrg   void *dead_ctx = ralloc_context(intersection);
10037ec681f3Smrg
10047ec681f3Smrg   nir_function_impl *any_hit_impl = NULL;
10057ec681f3Smrg   struct hash_table *any_hit_var_remap = NULL;
10067ec681f3Smrg   if (any_hit) {
10077ec681f3Smrg      any_hit = nir_shader_clone(dead_ctx, any_hit);
10087ec681f3Smrg      NIR_PASS_V(any_hit, nir_opt_dce);
10097ec681f3Smrg      any_hit_impl = lower_any_hit_for_intersection(any_hit);
10107ec681f3Smrg      any_hit_var_remap = _mesa_pointer_hash_table_create(dead_ctx);
10117ec681f3Smrg   }
10127ec681f3Smrg
10137ec681f3Smrg   nir_function_impl *impl = nir_shader_get_entrypoint(intersection);
10147ec681f3Smrg
10157ec681f3Smrg   nir_builder build;
10167ec681f3Smrg   nir_builder_init(&build, impl);
10177ec681f3Smrg   nir_builder *b = &build;
10187ec681f3Smrg
10197ec681f3Smrg   b->cursor = nir_before_cf_list(&impl->body);
10207ec681f3Smrg
10217ec681f3Smrg   nir_variable *commit = nir_local_variable_create(impl, glsl_bool_type(), "ray_commit");
10227ec681f3Smrg   nir_store_var(b, commit, nir_imm_false(b), 0x1);
10237ec681f3Smrg
10247ec681f3Smrg   nir_foreach_block_safe (block, impl) {
10257ec681f3Smrg      nir_foreach_instr_safe (instr, block) {
10267ec681f3Smrg         if (instr->type != nir_instr_type_intrinsic)
10277ec681f3Smrg            continue;
10287ec681f3Smrg
10297ec681f3Smrg         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
10307ec681f3Smrg         if (intrin->intrinsic != nir_intrinsic_report_ray_intersection)
10317ec681f3Smrg            continue;
10327ec681f3Smrg
10337ec681f3Smrg         b->cursor = nir_instr_remove(&intrin->instr);
10347ec681f3Smrg         nir_ssa_def *hit_t = nir_ssa_for_src(b, intrin->src[0], 1);
10357ec681f3Smrg         nir_ssa_def *hit_kind = nir_ssa_for_src(b, intrin->src[1], 1);
10367ec681f3Smrg         nir_ssa_def *min_t = nir_load_ray_t_min(b);
10377ec681f3Smrg         nir_ssa_def *max_t = nir_load_ray_t_max(b);
10387ec681f3Smrg
10397ec681f3Smrg         /* bool commit_tmp = false; */
10407ec681f3Smrg         nir_variable *commit_tmp = nir_local_variable_create(impl, glsl_bool_type(), "commit_tmp");
10417ec681f3Smrg         nir_store_var(b, commit_tmp, nir_imm_false(b), 0x1);
10427ec681f3Smrg
10437ec681f3Smrg         nir_push_if(b, nir_iand(b, nir_fge(b, hit_t, min_t), nir_fge(b, max_t, hit_t)));
10447ec681f3Smrg         {
10457ec681f3Smrg            /* Any-hit defaults to commit */
10467ec681f3Smrg            nir_store_var(b, commit_tmp, nir_imm_true(b), 0x1);
10477ec681f3Smrg
10487ec681f3Smrg            if (any_hit_impl != NULL) {
10497ec681f3Smrg               nir_push_if(b, nir_inot(b, nir_load_intersection_opaque_amd(b)));
10507ec681f3Smrg               {
10517ec681f3Smrg                  nir_ssa_def *params[] = {
10527ec681f3Smrg                     &nir_build_deref_var(b, commit_tmp)->dest.ssa,
10537ec681f3Smrg                     hit_t,
10547ec681f3Smrg                     hit_kind,
10557ec681f3Smrg                  };
10567ec681f3Smrg                  nir_inline_function_impl(b, any_hit_impl, params, any_hit_var_remap);
10577ec681f3Smrg               }
10587ec681f3Smrg               nir_pop_if(b, NULL);
10597ec681f3Smrg            }
10607ec681f3Smrg
10617ec681f3Smrg            nir_push_if(b, nir_load_var(b, commit_tmp));
10627ec681f3Smrg            {
10637ec681f3Smrg               nir_report_ray_intersection(b, 1, hit_t, hit_kind);
10647ec681f3Smrg            }
10657ec681f3Smrg            nir_pop_if(b, NULL);
10667ec681f3Smrg         }
10677ec681f3Smrg         nir_pop_if(b, NULL);
10687ec681f3Smrg
10697ec681f3Smrg         nir_ssa_def *accepted = nir_load_var(b, commit_tmp);
10707ec681f3Smrg         nir_ssa_def_rewrite_uses(&intrin->dest.ssa, accepted);
10717ec681f3Smrg      }
10727ec681f3Smrg   }
10737ec681f3Smrg
10747ec681f3Smrg   /* We did some inlining; have to re-index SSA defs */
10757ec681f3Smrg   nir_index_ssa_defs(impl);
10767ec681f3Smrg
10777ec681f3Smrg   /* Eliminate the casts introduced for the commit return of the any-hit shader. */
10787ec681f3Smrg   NIR_PASS_V(intersection, nir_opt_deref);
10797ec681f3Smrg
10807ec681f3Smrg   ralloc_free(dead_ctx);
10817ec681f3Smrg}
10827ec681f3Smrg
10837ec681f3Smrg/* Variables only used internally to ray traversal. This is data that describes
10847ec681f3Smrg * the current state of the traversal vs. what we'd give to a shader.  e.g. what
10857ec681f3Smrg * is the instance we're currently visiting vs. what is the instance of the
10867ec681f3Smrg * closest hit. */
10877ec681f3Smrgstruct rt_traversal_vars {
10887ec681f3Smrg   nir_variable *origin;
10897ec681f3Smrg   nir_variable *dir;
10907ec681f3Smrg   nir_variable *inv_dir;
10917ec681f3Smrg   nir_variable *sbt_offset_and_flags;
10927ec681f3Smrg   nir_variable *instance_id;
10937ec681f3Smrg   nir_variable *custom_instance_and_mask;
10947ec681f3Smrg   nir_variable *instance_addr;
10957ec681f3Smrg   nir_variable *should_return;
10967ec681f3Smrg   nir_variable *bvh_base;
10977ec681f3Smrg   nir_variable *stack;
10987ec681f3Smrg   nir_variable *top_stack;
10997ec681f3Smrg};
11007ec681f3Smrg
11017ec681f3Smrgstatic struct rt_traversal_vars
11027ec681f3Smrginit_traversal_vars(nir_builder *b)
11037ec681f3Smrg{
11047ec681f3Smrg   const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
11057ec681f3Smrg   struct rt_traversal_vars ret;
11067ec681f3Smrg
11077ec681f3Smrg   ret.origin = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_origin");
11087ec681f3Smrg   ret.dir = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_dir");
11097ec681f3Smrg   ret.inv_dir =
11107ec681f3Smrg      nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_inv_dir");
11117ec681f3Smrg   ret.sbt_offset_and_flags = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(),
11127ec681f3Smrg                                                  "traversal_sbt_offset_and_flags");
11137ec681f3Smrg   ret.instance_id = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(),
11147ec681f3Smrg                                         "traversal_instance_id");
11157ec681f3Smrg   ret.custom_instance_and_mask = nir_variable_create(
11167ec681f3Smrg      b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_custom_instance_and_mask");
11177ec681f3Smrg   ret.instance_addr =
11187ec681f3Smrg      nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr");
11197ec681f3Smrg   ret.should_return = nir_variable_create(b->shader, nir_var_shader_temp, glsl_bool_type(),
11207ec681f3Smrg                                           "traversal_should_return");
11217ec681f3Smrg   ret.bvh_base = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(),
11227ec681f3Smrg                                      "traversal_bvh_base");
11237ec681f3Smrg   ret.stack =
11247ec681f3Smrg      nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_ptr");
11257ec681f3Smrg   ret.top_stack = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(),
11267ec681f3Smrg                                       "traversal_top_stack_ptr");
11277ec681f3Smrg   return ret;
11287ec681f3Smrg}
11297ec681f3Smrg
11307ec681f3Smrgstatic nir_ssa_def *
11317ec681f3Smrgbuild_addr_to_node(nir_builder *b, nir_ssa_def *addr)
11327ec681f3Smrg{
11337ec681f3Smrg   const uint64_t bvh_size = 1ull << 42;
11347ec681f3Smrg   nir_ssa_def *node = nir_ushr(b, addr, nir_imm_int(b, 3));
11357ec681f3Smrg   return nir_iand(b, node, nir_imm_int64(b, (bvh_size - 1) << 3));
11367ec681f3Smrg}
11377ec681f3Smrg
11387ec681f3Smrgstatic nir_ssa_def *
11397ec681f3Smrgbuild_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node)
11407ec681f3Smrg{
11417ec681f3Smrg   nir_ssa_def *addr = nir_iand(b, node, nir_imm_int64(b, ~7ull));
11427ec681f3Smrg   addr = nir_ishl(b, addr, nir_imm_int(b, 3));
11437ec681f3Smrg   /* Assumes everything is in the top half of address space, which is true in
11447ec681f3Smrg    * GFX9+ for now. */
11457ec681f3Smrg   return device->physical_device->rad_info.chip_class >= GFX9
11467ec681f3Smrg      ? nir_ior(b, addr, nir_imm_int64(b, 0xffffull << 48))
11477ec681f3Smrg      : addr;
11487ec681f3Smrg}
11497ec681f3Smrg
11507ec681f3Smrg/* When a hit is opaque the any_hit shader is skipped for this hit and the hit
11517ec681f3Smrg * is assumed to be an actual hit. */
11527ec681f3Smrgstatic nir_ssa_def *
11537ec681f3Smrghit_is_opaque(nir_builder *b, const struct rt_variables *vars,
11547ec681f3Smrg              const struct rt_traversal_vars *trav_vars, nir_ssa_def *geometry_id_and_flags)
11557ec681f3Smrg{
11567ec681f3Smrg   nir_ssa_def *geom_force_opaque = nir_ine(
11577ec681f3Smrg      b, nir_iand(b, geometry_id_and_flags, nir_imm_int(b, 1u << 28 /* VK_GEOMETRY_OPAQUE_BIT */)),
11587ec681f3Smrg      nir_imm_int(b, 0));
11597ec681f3Smrg   nir_ssa_def *instance_force_opaque =
11607ec681f3Smrg      nir_ine(b,
11617ec681f3Smrg              nir_iand(b, nir_load_var(b, trav_vars->sbt_offset_and_flags),
11627ec681f3Smrg                       nir_imm_int(b, 4 << 24 /* VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT */)),
11637ec681f3Smrg              nir_imm_int(b, 0));
11647ec681f3Smrg   nir_ssa_def *instance_force_non_opaque =
11657ec681f3Smrg      nir_ine(b,
11667ec681f3Smrg              nir_iand(b, nir_load_var(b, trav_vars->sbt_offset_and_flags),
11677ec681f3Smrg                       nir_imm_int(b, 8 << 24 /* VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT */)),
11687ec681f3Smrg              nir_imm_int(b, 0));
11697ec681f3Smrg
11707ec681f3Smrg   nir_ssa_def *opaque = geom_force_opaque;
11717ec681f3Smrg   opaque = nir_bcsel(b, instance_force_opaque, nir_imm_bool(b, true), opaque);
11727ec681f3Smrg   opaque = nir_bcsel(b, instance_force_non_opaque, nir_imm_bool(b, false), opaque);
11737ec681f3Smrg
11747ec681f3Smrg   nir_ssa_def *ray_force_opaque =
11757ec681f3Smrg      nir_ine(b, nir_iand(b, nir_load_var(b, vars->flags), nir_imm_int(b, 1 /* RayFlagsOpaque */)),
11767ec681f3Smrg              nir_imm_int(b, 0));
11777ec681f3Smrg   nir_ssa_def *ray_force_non_opaque = nir_ine(
11787ec681f3Smrg      b, nir_iand(b, nir_load_var(b, vars->flags), nir_imm_int(b, 2 /* RayFlagsNoOpaque */)),
11797ec681f3Smrg      nir_imm_int(b, 0));
11807ec681f3Smrg
11817ec681f3Smrg   opaque = nir_bcsel(b, ray_force_opaque, nir_imm_bool(b, true), opaque);
11827ec681f3Smrg   opaque = nir_bcsel(b, ray_force_non_opaque, nir_imm_bool(b, false), opaque);
11837ec681f3Smrg   return opaque;
11847ec681f3Smrg}
11857ec681f3Smrg
11867ec681f3Smrgstatic void
11877ec681f3Smrgvisit_any_hit_shaders(struct radv_device *device,
11887ec681f3Smrg                      const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, nir_builder *b,
11897ec681f3Smrg                      struct rt_variables *vars)
11907ec681f3Smrg{
11917ec681f3Smrg   RADV_FROM_HANDLE(radv_pipeline_layout, layout, pCreateInfo->layout);
11927ec681f3Smrg   nir_ssa_def *sbt_idx = nir_load_var(b, vars->idx);
11937ec681f3Smrg
11947ec681f3Smrg   nir_push_if(b, nir_ine(b, sbt_idx, nir_imm_int(b, 0)));
11957ec681f3Smrg   for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) {
11967ec681f3Smrg      const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i];
11977ec681f3Smrg      uint32_t shader_id = VK_SHADER_UNUSED_KHR;
11987ec681f3Smrg
11997ec681f3Smrg      switch (group_info->type) {
12007ec681f3Smrg      case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
12017ec681f3Smrg         shader_id = group_info->anyHitShader;
12027ec681f3Smrg         break;
12037ec681f3Smrg      default:
12047ec681f3Smrg         break;
12057ec681f3Smrg      }
12067ec681f3Smrg      if (shader_id == VK_SHADER_UNUSED_KHR)
12077ec681f3Smrg         continue;
12087ec681f3Smrg
12097ec681f3Smrg      const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id];
12107ec681f3Smrg      nir_shader *nir_stage = parse_rt_stage(device, layout, stage);
12117ec681f3Smrg
12127ec681f3Smrg      vars->group_idx = i;
12137ec681f3Smrg      insert_rt_case(b, nir_stage, vars, sbt_idx, 0, i + 2);
12147ec681f3Smrg   }
12157ec681f3Smrg   nir_pop_if(b, NULL);
12167ec681f3Smrg}
12177ec681f3Smrg
12187ec681f3Smrgstatic void
12197ec681f3Smrginsert_traversal_triangle_case(struct radv_device *device,
12207ec681f3Smrg                               const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, nir_builder *b,
12217ec681f3Smrg                               nir_ssa_def *result, const struct rt_variables *vars,
12227ec681f3Smrg                               const struct rt_traversal_vars *trav_vars, nir_ssa_def *bvh_node)
12237ec681f3Smrg{
12247ec681f3Smrg   nir_ssa_def *dist = nir_vector_extract(b, result, nir_imm_int(b, 0));
12257ec681f3Smrg   nir_ssa_def *div = nir_vector_extract(b, result, nir_imm_int(b, 1));
12267ec681f3Smrg   dist = nir_fdiv(b, dist, div);
12277ec681f3Smrg   nir_ssa_def *frontface = nir_flt(b, nir_imm_float(b, 0), div);
12287ec681f3Smrg   nir_ssa_def *switch_ccw = nir_ine(
12297ec681f3Smrg      b,
12307ec681f3Smrg      nir_iand(
12317ec681f3Smrg         b, nir_load_var(b, trav_vars->sbt_offset_and_flags),
12327ec681f3Smrg         nir_imm_int(b, 2 << 24 /* VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT */)),
12337ec681f3Smrg      nir_imm_int(b, 0));
12347ec681f3Smrg   frontface = nir_ixor(b, frontface, switch_ccw);
12357ec681f3Smrg
12367ec681f3Smrg   nir_ssa_def *not_cull = nir_ieq(
12377ec681f3Smrg      b, nir_iand(b, nir_load_var(b, vars->flags), nir_imm_int(b, 256 /* RayFlagsSkipTriangles */)),
12387ec681f3Smrg      nir_imm_int(b, 0));
12397ec681f3Smrg   nir_ssa_def *not_facing_cull = nir_ieq(
12407ec681f3Smrg      b,
12417ec681f3Smrg      nir_iand(b, nir_load_var(b, vars->flags),
12427ec681f3Smrg               nir_bcsel(b, frontface, nir_imm_int(b, 32 /* RayFlagsCullFrontFacingTriangles */),
12437ec681f3Smrg                         nir_imm_int(b, 16 /* RayFlagsCullBackFacingTriangles */))),
12447ec681f3Smrg      nir_imm_int(b, 0));
12457ec681f3Smrg
12467ec681f3Smrg   not_cull = nir_iand(
12477ec681f3Smrg      b, not_cull,
12487ec681f3Smrg      nir_ior(
12497ec681f3Smrg         b, not_facing_cull,
12507ec681f3Smrg         nir_ine(
12517ec681f3Smrg            b,
12527ec681f3Smrg            nir_iand(
12537ec681f3Smrg               b, nir_load_var(b, trav_vars->sbt_offset_and_flags),
12547ec681f3Smrg               nir_imm_int(b, 1 << 24 /* VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT */)),
12557ec681f3Smrg            nir_imm_int(b, 0))));
12567ec681f3Smrg
12577ec681f3Smrg   nir_push_if(b, nir_iand(b,
12587ec681f3Smrg                           nir_iand(b, nir_flt(b, dist, nir_load_var(b, vars->tmax)),
12597ec681f3Smrg                                    nir_fge(b, dist, nir_load_var(b, vars->tmin))),
12607ec681f3Smrg                           not_cull));
12617ec681f3Smrg   {
12627ec681f3Smrg
12637ec681f3Smrg      nir_ssa_def *triangle_info = nir_build_load_global(
12647ec681f3Smrg         b, 2, 32,
12657ec681f3Smrg         nir_iadd(b, build_node_to_addr(device, b, bvh_node),
12667ec681f3Smrg                  nir_imm_int64(b, offsetof(struct radv_bvh_triangle_node, triangle_id))),
12677ec681f3Smrg         .align_mul = 4, .align_offset = 0);
12687ec681f3Smrg      nir_ssa_def *primitive_id = nir_channel(b, triangle_info, 0);
12697ec681f3Smrg      nir_ssa_def *geometry_id_and_flags = nir_channel(b, triangle_info, 1);
12707ec681f3Smrg      nir_ssa_def *geometry_id = nir_iand(b, geometry_id_and_flags, nir_imm_int(b, 0xfffffff));
12717ec681f3Smrg      nir_ssa_def *is_opaque = hit_is_opaque(b, vars, trav_vars, geometry_id_and_flags);
12727ec681f3Smrg
12737ec681f3Smrg      not_cull =
12747ec681f3Smrg         nir_ieq(b,
12757ec681f3Smrg                 nir_iand(b, nir_load_var(b, vars->flags),
12767ec681f3Smrg                          nir_bcsel(b, is_opaque, nir_imm_int(b, 0x40), nir_imm_int(b, 0x80))),
12777ec681f3Smrg                 nir_imm_int(b, 0));
12787ec681f3Smrg      nir_push_if(b, not_cull);
12797ec681f3Smrg      {
12807ec681f3Smrg         nir_ssa_def *sbt_idx =
12817ec681f3Smrg            nir_iadd(b,
12827ec681f3Smrg                     nir_iadd(b, nir_load_var(b, vars->sbt_offset),
12837ec681f3Smrg                              nir_iand(b, nir_load_var(b, trav_vars->sbt_offset_and_flags),
12847ec681f3Smrg                                       nir_imm_int(b, 0xffffff))),
12857ec681f3Smrg                     nir_imul(b, nir_load_var(b, vars->sbt_stride), geometry_id));
12867ec681f3Smrg         nir_ssa_def *divs[2] = {div, div};
12877ec681f3Smrg         nir_ssa_def *ij = nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2));
12887ec681f3Smrg         nir_ssa_def *hit_kind =
12897ec681f3Smrg            nir_bcsel(b, frontface, nir_imm_int(b, 0xFE), nir_imm_int(b, 0xFF));
12907ec681f3Smrg
12917ec681f3Smrg         nir_store_scratch(
12927ec681f3Smrg            b, ij,
12937ec681f3Smrg            nir_iadd(b, nir_load_var(b, vars->stack_ptr), nir_imm_int(b, RADV_HIT_ATTRIB_OFFSET)),
12947ec681f3Smrg            .align_mul = 16, .write_mask = 3);
12957ec681f3Smrg
12967ec681f3Smrg         nir_store_var(b, vars->ahit_status, nir_imm_int(b, 0), 1);
12977ec681f3Smrg
12987ec681f3Smrg         nir_push_if(b, nir_ine(b, is_opaque, nir_imm_bool(b, true)));
12997ec681f3Smrg         {
13007ec681f3Smrg            struct rt_variables inner_vars = create_inner_vars(b, vars);
13017ec681f3Smrg
13027ec681f3Smrg            nir_store_var(b, inner_vars.primitive_id, primitive_id, 1);
13037ec681f3Smrg            nir_store_var(b, inner_vars.geometry_id_and_flags, geometry_id_and_flags, 1);
13047ec681f3Smrg            nir_store_var(b, inner_vars.tmax, dist, 0x1);
13057ec681f3Smrg            nir_store_var(b, inner_vars.instance_id, nir_load_var(b, trav_vars->instance_id), 0x1);
13067ec681f3Smrg            nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, trav_vars->instance_addr),
13077ec681f3Smrg                          0x1);
13087ec681f3Smrg            nir_store_var(b, inner_vars.hit_kind, hit_kind, 0x1);
13097ec681f3Smrg            nir_store_var(b, inner_vars.custom_instance_and_mask,
13107ec681f3Smrg                          nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1);
13117ec681f3Smrg
13127ec681f3Smrg            load_sbt_entry(b, &inner_vars, sbt_idx, SBT_HIT, 4);
13137ec681f3Smrg
13147ec681f3Smrg            visit_any_hit_shaders(device, pCreateInfo, b, &inner_vars);
13157ec681f3Smrg
13167ec681f3Smrg            nir_push_if(b, nir_ieq(b, nir_load_var(b, vars->ahit_status), nir_imm_int(b, 1)));
13177ec681f3Smrg            {
13187ec681f3Smrg               nir_jump(b, nir_jump_continue);
13197ec681f3Smrg            }
13207ec681f3Smrg            nir_pop_if(b, NULL);
13217ec681f3Smrg         }
13227ec681f3Smrg         nir_pop_if(b, NULL);
13237ec681f3Smrg
13247ec681f3Smrg         nir_store_var(b, vars->primitive_id, primitive_id, 1);
13257ec681f3Smrg         nir_store_var(b, vars->geometry_id_and_flags, geometry_id_and_flags, 1);
13267ec681f3Smrg         nir_store_var(b, vars->tmax, dist, 0x1);
13277ec681f3Smrg         nir_store_var(b, vars->instance_id, nir_load_var(b, trav_vars->instance_id), 0x1);
13287ec681f3Smrg         nir_store_var(b, vars->instance_addr, nir_load_var(b, trav_vars->instance_addr), 0x1);
13297ec681f3Smrg         nir_store_var(b, vars->hit_kind, hit_kind, 0x1);
13307ec681f3Smrg         nir_store_var(b, vars->custom_instance_and_mask,
13317ec681f3Smrg                       nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1);
13327ec681f3Smrg
13337ec681f3Smrg         load_sbt_entry(b, vars, sbt_idx, SBT_HIT, 0);
13347ec681f3Smrg
13357ec681f3Smrg         nir_store_var(b, trav_vars->should_return,
13367ec681f3Smrg                       nir_ior(b,
13377ec681f3Smrg                               nir_ine(b,
13387ec681f3Smrg                                       nir_iand(b, nir_load_var(b, vars->flags),
13397ec681f3Smrg                                                nir_imm_int(b, 8 /* SkipClosestHitShader */)),
13407ec681f3Smrg                                       nir_imm_int(b, 0)),
13417ec681f3Smrg                               nir_ieq(b, nir_load_var(b, vars->idx), nir_imm_int(b, 0))),
13427ec681f3Smrg                       1);
13437ec681f3Smrg
13447ec681f3Smrg         nir_ssa_def *terminate_on_first_hit =
13457ec681f3Smrg            nir_ine(b,
13467ec681f3Smrg                    nir_iand(b, nir_load_var(b, vars->flags),
13477ec681f3Smrg                             nir_imm_int(b, 4 /* TerminateOnFirstHitKHR */)),
13487ec681f3Smrg                    nir_imm_int(b, 0));
13497ec681f3Smrg         nir_ssa_def *ray_terminated =
13507ec681f3Smrg            nir_ieq(b, nir_load_var(b, vars->ahit_status), nir_imm_int(b, 2));
13517ec681f3Smrg         nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated));
13527ec681f3Smrg         {
13537ec681f3Smrg            nir_jump(b, nir_jump_break);
13547ec681f3Smrg         }
13557ec681f3Smrg         nir_pop_if(b, NULL);
13567ec681f3Smrg      }
13577ec681f3Smrg      nir_pop_if(b, NULL);
13587ec681f3Smrg   }
13597ec681f3Smrg   nir_pop_if(b, NULL);
13607ec681f3Smrg}
13617ec681f3Smrg
13627ec681f3Smrgstatic void
13637ec681f3Smrginsert_traversal_aabb_case(struct radv_device *device,
13647ec681f3Smrg                           const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, nir_builder *b,
13657ec681f3Smrg                           const struct rt_variables *vars,
13667ec681f3Smrg                           const struct rt_traversal_vars *trav_vars, nir_ssa_def *bvh_node)
13677ec681f3Smrg{
13687ec681f3Smrg   RADV_FROM_HANDLE(radv_pipeline_layout, layout, pCreateInfo->layout);
13697ec681f3Smrg
13707ec681f3Smrg   nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node);
13717ec681f3Smrg   nir_ssa_def *triangle_info = nir_build_load_global(
13727ec681f3Smrg      b, 2, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 24)), .align_mul = 4, .align_offset = 0);
13737ec681f3Smrg   nir_ssa_def *primitive_id = nir_channel(b, triangle_info, 0);
13747ec681f3Smrg   nir_ssa_def *geometry_id_and_flags = nir_channel(b, triangle_info, 1);
13757ec681f3Smrg   nir_ssa_def *geometry_id = nir_iand(b, geometry_id_and_flags, nir_imm_int(b, 0xfffffff));
13767ec681f3Smrg   nir_ssa_def *is_opaque = hit_is_opaque(b, vars, trav_vars, geometry_id_and_flags);
13777ec681f3Smrg
13787ec681f3Smrg   nir_ssa_def *not_cull =
13797ec681f3Smrg      nir_ieq(b,
13807ec681f3Smrg              nir_iand(b, nir_load_var(b, vars->flags),
13817ec681f3Smrg                       nir_bcsel(b, is_opaque, nir_imm_int(b, 0x40), nir_imm_int(b, 0x80))),
13827ec681f3Smrg              nir_imm_int(b, 0));
13837ec681f3Smrg   nir_push_if(b, not_cull);
13847ec681f3Smrg   {
13857ec681f3Smrg      nir_ssa_def *sbt_idx =
13867ec681f3Smrg         nir_iadd(b,
13877ec681f3Smrg                  nir_iadd(b, nir_load_var(b, vars->sbt_offset),
13887ec681f3Smrg                           nir_iand(b, nir_load_var(b, trav_vars->sbt_offset_and_flags),
13897ec681f3Smrg                                    nir_imm_int(b, 0xffffff))),
13907ec681f3Smrg                  nir_imul(b, nir_load_var(b, vars->sbt_stride), geometry_id));
13917ec681f3Smrg
13927ec681f3Smrg      struct rt_variables inner_vars = create_inner_vars(b, vars);
13937ec681f3Smrg
13947ec681f3Smrg      /* For AABBs the intersection shader writes the hit kind, and only does it if it is the
13957ec681f3Smrg       * next closest hit candidate. */
13967ec681f3Smrg      inner_vars.hit_kind = vars->hit_kind;
13977ec681f3Smrg
13987ec681f3Smrg      nir_store_var(b, inner_vars.primitive_id, primitive_id, 1);
13997ec681f3Smrg      nir_store_var(b, inner_vars.geometry_id_and_flags, geometry_id_and_flags, 1);
14007ec681f3Smrg      nir_store_var(b, inner_vars.tmax, nir_load_var(b, vars->tmax), 0x1);
14017ec681f3Smrg      nir_store_var(b, inner_vars.instance_id, nir_load_var(b, trav_vars->instance_id), 0x1);
14027ec681f3Smrg      nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, trav_vars->instance_addr), 0x1);
14037ec681f3Smrg      nir_store_var(b, inner_vars.custom_instance_and_mask,
14047ec681f3Smrg                    nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1);
14057ec681f3Smrg      nir_store_var(b, inner_vars.opaque, is_opaque, 1);
14067ec681f3Smrg
14077ec681f3Smrg      load_sbt_entry(b, &inner_vars, sbt_idx, SBT_HIT, 4);
14087ec681f3Smrg
14097ec681f3Smrg      nir_store_var(b, vars->ahit_status, nir_imm_int(b, 1), 1);
14107ec681f3Smrg
14117ec681f3Smrg      nir_push_if(b, nir_ine(b, nir_load_var(b, inner_vars.idx), nir_imm_int(b, 0)));
14127ec681f3Smrg      for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) {
14137ec681f3Smrg         const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i];
14147ec681f3Smrg         uint32_t shader_id = VK_SHADER_UNUSED_KHR;
14157ec681f3Smrg         uint32_t any_hit_shader_id = VK_SHADER_UNUSED_KHR;
14167ec681f3Smrg
14177ec681f3Smrg         switch (group_info->type) {
14187ec681f3Smrg         case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
14197ec681f3Smrg            shader_id = group_info->intersectionShader;
14207ec681f3Smrg            any_hit_shader_id = group_info->anyHitShader;
14217ec681f3Smrg            break;
14227ec681f3Smrg         default:
14237ec681f3Smrg            break;
14247ec681f3Smrg         }
14257ec681f3Smrg         if (shader_id == VK_SHADER_UNUSED_KHR)
14267ec681f3Smrg            continue;
14277ec681f3Smrg
14287ec681f3Smrg         const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id];
14297ec681f3Smrg         nir_shader *nir_stage = parse_rt_stage(device, layout, stage);
14307ec681f3Smrg
14317ec681f3Smrg         nir_shader *any_hit_stage = NULL;
14327ec681f3Smrg         if (any_hit_shader_id != VK_SHADER_UNUSED_KHR) {
14337ec681f3Smrg            stage = &pCreateInfo->pStages[any_hit_shader_id];
14347ec681f3Smrg            any_hit_stage = parse_rt_stage(device, layout, stage);
14357ec681f3Smrg
14367ec681f3Smrg            nir_lower_intersection_shader(nir_stage, any_hit_stage);
14377ec681f3Smrg            ralloc_free(any_hit_stage);
14387ec681f3Smrg         }
14397ec681f3Smrg
14407ec681f3Smrg         inner_vars.group_idx = i;
14417ec681f3Smrg         insert_rt_case(b, nir_stage, &inner_vars, nir_load_var(b, inner_vars.idx), 0, i + 2);
14427ec681f3Smrg      }
14437ec681f3Smrg      nir_push_else(b, NULL);
14447ec681f3Smrg      {
14457ec681f3Smrg         nir_ssa_def *vec3_zero = nir_channels(b, nir_imm_vec4(b, 0, 0, 0, 0), 0x7);
14467ec681f3Smrg         nir_ssa_def *vec3_inf =
14477ec681f3Smrg            nir_channels(b, nir_imm_vec4(b, INFINITY, INFINITY, INFINITY, 0), 0x7);
14487ec681f3Smrg
14497ec681f3Smrg         nir_ssa_def *bvh_lo =
14507ec681f3Smrg            nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 0)),
14517ec681f3Smrg                                  .align_mul = 4, .align_offset = 0);
14527ec681f3Smrg         nir_ssa_def *bvh_hi =
14537ec681f3Smrg            nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 12)),
14547ec681f3Smrg                                  .align_mul = 4, .align_offset = 0);
14557ec681f3Smrg
14567ec681f3Smrg         bvh_lo = nir_fsub(b, bvh_lo, nir_load_var(b, trav_vars->origin));
14577ec681f3Smrg         bvh_hi = nir_fsub(b, bvh_hi, nir_load_var(b, trav_vars->origin));
14587ec681f3Smrg         nir_ssa_def *t_vec = nir_fmin(b, nir_fmul(b, bvh_lo, nir_load_var(b, trav_vars->inv_dir)),
14597ec681f3Smrg                                       nir_fmul(b, bvh_hi, nir_load_var(b, trav_vars->inv_dir)));
14607ec681f3Smrg         nir_ssa_def *t2_vec = nir_fmax(b, nir_fmul(b, bvh_lo, nir_load_var(b, trav_vars->inv_dir)),
14617ec681f3Smrg                                        nir_fmul(b, bvh_hi, nir_load_var(b, trav_vars->inv_dir)));
14627ec681f3Smrg         /* If we run parallel to one of the edges the range should be [0, inf) not [0,0] */
14637ec681f3Smrg         t2_vec =
14647ec681f3Smrg            nir_bcsel(b, nir_feq(b, nir_load_var(b, trav_vars->dir), vec3_zero), vec3_inf, t2_vec);
14657ec681f3Smrg
14667ec681f3Smrg         nir_ssa_def *t_min = nir_fmax(b, nir_channel(b, t_vec, 0), nir_channel(b, t_vec, 1));
14677ec681f3Smrg         t_min = nir_fmax(b, t_min, nir_channel(b, t_vec, 2));
14687ec681f3Smrg
14697ec681f3Smrg         nir_ssa_def *t_max = nir_fmin(b, nir_channel(b, t2_vec, 0), nir_channel(b, t2_vec, 1));
14707ec681f3Smrg         t_max = nir_fmin(b, t_max, nir_channel(b, t2_vec, 2));
14717ec681f3Smrg
14727ec681f3Smrg         nir_push_if(b, nir_iand(b, nir_flt(b, t_min, nir_load_var(b, vars->tmax)),
14737ec681f3Smrg                                 nir_fge(b, t_max, nir_load_var(b, vars->tmin))));
14747ec681f3Smrg         {
14757ec681f3Smrg            nir_store_var(b, vars->ahit_status, nir_imm_int(b, 0), 1);
14767ec681f3Smrg            nir_store_var(b, vars->tmax, nir_fmax(b, t_min, nir_load_var(b, vars->tmin)), 1);
14777ec681f3Smrg         }
14787ec681f3Smrg         nir_pop_if(b, NULL);
14797ec681f3Smrg      }
14807ec681f3Smrg      nir_pop_if(b, NULL);
14817ec681f3Smrg
14827ec681f3Smrg      nir_push_if(b, nir_ine(b, nir_load_var(b, vars->ahit_status), nir_imm_int(b, 1)));
14837ec681f3Smrg      {
14847ec681f3Smrg         nir_store_var(b, vars->primitive_id, primitive_id, 1);
14857ec681f3Smrg         nir_store_var(b, vars->geometry_id_and_flags, geometry_id_and_flags, 1);
14867ec681f3Smrg         nir_store_var(b, vars->tmax, nir_load_var(b, inner_vars.tmax), 0x1);
14877ec681f3Smrg         nir_store_var(b, vars->instance_id, nir_load_var(b, trav_vars->instance_id), 0x1);
14887ec681f3Smrg         nir_store_var(b, vars->instance_addr, nir_load_var(b, trav_vars->instance_addr), 0x1);
14897ec681f3Smrg         nir_store_var(b, vars->custom_instance_and_mask,
14907ec681f3Smrg                       nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1);
14917ec681f3Smrg
14927ec681f3Smrg         load_sbt_entry(b, vars, sbt_idx, SBT_HIT, 0);
14937ec681f3Smrg
14947ec681f3Smrg         nir_store_var(b, trav_vars->should_return,
14957ec681f3Smrg                       nir_ior(b,
14967ec681f3Smrg                               nir_ine(b,
14977ec681f3Smrg                                       nir_iand(b, nir_load_var(b, vars->flags),
14987ec681f3Smrg                                                nir_imm_int(b, 8 /* SkipClosestHitShader */)),
14997ec681f3Smrg                                       nir_imm_int(b, 0)),
15007ec681f3Smrg                               nir_ieq(b, nir_load_var(b, vars->idx), nir_imm_int(b, 0))),
15017ec681f3Smrg                       1);
15027ec681f3Smrg
15037ec681f3Smrg         nir_ssa_def *terminate_on_first_hit =
15047ec681f3Smrg            nir_ine(b,
15057ec681f3Smrg                    nir_iand(b, nir_load_var(b, vars->flags),
15067ec681f3Smrg                             nir_imm_int(b, 4 /* TerminateOnFirstHitKHR */)),
15077ec681f3Smrg                    nir_imm_int(b, 0));
15087ec681f3Smrg         nir_ssa_def *ray_terminated =
15097ec681f3Smrg            nir_ieq(b, nir_load_var(b, vars->ahit_status), nir_imm_int(b, 2));
15107ec681f3Smrg         nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated));
15117ec681f3Smrg         {
15127ec681f3Smrg            nir_jump(b, nir_jump_break);
15137ec681f3Smrg         }
15147ec681f3Smrg         nir_pop_if(b, NULL);
15157ec681f3Smrg      }
15167ec681f3Smrg      nir_pop_if(b, NULL);
15177ec681f3Smrg   }
15187ec681f3Smrg   nir_pop_if(b, NULL);
15197ec681f3Smrg}
15207ec681f3Smrg
15217ec681f3Smrgstatic void
15227ec681f3Smrgnir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices, uint32_t chan_1, uint32_t chan_2)
15237ec681f3Smrg{
15247ec681f3Smrg   nir_ssa_def *ssa_distances = nir_load_var(b, var_distances);
15257ec681f3Smrg   nir_ssa_def *ssa_indices = nir_load_var(b, var_indices);
15267ec681f3Smrg   /* if (distances[chan_2] < distances[chan_1]) { */
15277ec681f3Smrg   nir_push_if(b, nir_flt(b, nir_channel(b, ssa_distances, chan_2), nir_channel(b, ssa_distances, chan_1)));
15287ec681f3Smrg   {
15297ec681f3Smrg      /* swap(distances[chan_2], distances[chan_1]); */
15307ec681f3Smrg      nir_ssa_def *new_distances[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32)};
15317ec681f3Smrg      nir_ssa_def *new_indices[4]   = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32)};
15327ec681f3Smrg      new_distances[chan_2] = nir_channel(b, ssa_distances, chan_1);
15337ec681f3Smrg      new_distances[chan_1] = nir_channel(b, ssa_distances, chan_2);
15347ec681f3Smrg      new_indices[chan_2] = nir_channel(b, ssa_indices, chan_1);
15357ec681f3Smrg      new_indices[chan_1] = nir_channel(b, ssa_indices, chan_2);
15367ec681f3Smrg      nir_store_var(b, var_distances, nir_vec(b, new_distances, 4), (1u << chan_1) | (1u << chan_2));
15377ec681f3Smrg      nir_store_var(b, var_indices, nir_vec(b, new_indices, 4), (1u << chan_1) | (1u << chan_2));
15387ec681f3Smrg   }
15397ec681f3Smrg   /* } */
15407ec681f3Smrg   nir_pop_if(b, NULL);
15417ec681f3Smrg}
15427ec681f3Smrg
15437ec681f3Smrgstatic nir_ssa_def *
15447ec681f3Smrgintersect_ray_amd_software_box(struct radv_device *device,
15457ec681f3Smrg                               nir_builder *b, nir_ssa_def *bvh_node,
15467ec681f3Smrg                               nir_ssa_def *ray_tmax, nir_ssa_def *origin,
15477ec681f3Smrg                               nir_ssa_def *dir, nir_ssa_def *inv_dir)
15487ec681f3Smrg{
15497ec681f3Smrg   const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
15507ec681f3Smrg   const struct glsl_type *uvec4_type = glsl_vector_type(GLSL_TYPE_UINT, 4);
15517ec681f3Smrg
15527ec681f3Smrg   nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node);
15537ec681f3Smrg
15547ec681f3Smrg   /* vec4 distances = vec4(INF, INF, INF, INF); */
15557ec681f3Smrg   nir_variable *distances = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "distances");
15567ec681f3Smrg   nir_store_var(b, distances, nir_imm_vec4(b, INFINITY, INFINITY, INFINITY, INFINITY), 0xf);
15577ec681f3Smrg
15587ec681f3Smrg   /* uvec4 child_indices = uvec4(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff); */
15597ec681f3Smrg   nir_variable *child_indices = nir_variable_create(b->shader, nir_var_shader_temp, uvec4_type, "child_indices");
15607ec681f3Smrg   nir_store_var(b, child_indices, nir_imm_ivec4(b, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu), 0xf);
15617ec681f3Smrg
15627ec681f3Smrg   /* Need to remove infinities here because otherwise we get nasty NaN propogation
15637ec681f3Smrg    * if the direction has 0s in it. */
15647ec681f3Smrg   /* inv_dir = clamp(inv_dir, -FLT_MAX, FLT_MAX); */
15657ec681f3Smrg   inv_dir = nir_fclamp(b, inv_dir, nir_imm_float(b, -FLT_MAX), nir_imm_float(b, FLT_MAX));
15667ec681f3Smrg
15677ec681f3Smrg   for (int i = 0; i < 4; i++) {
15687ec681f3Smrg      const uint32_t child_offset  = offsetof(struct radv_bvh_box32_node, children[i]);
15697ec681f3Smrg      const uint32_t coord_offsets[2] = {
15707ec681f3Smrg         offsetof(struct radv_bvh_box32_node, coords[i][0][0]),
15717ec681f3Smrg         offsetof(struct radv_bvh_box32_node, coords[i][1][0]),
15727ec681f3Smrg      };
15737ec681f3Smrg
15747ec681f3Smrg      /* node->children[i] -> uint */
15757ec681f3Smrg      nir_ssa_def *child_index = nir_build_load_global(b, 1, 32, nir_iadd(b, node_addr, nir_imm_int64(b, child_offset)),  .align_mul = 64, .align_offset = child_offset  % 64 );
15767ec681f3Smrg      /* node->coords[i][0], node->coords[i][1] -> vec3 */
15777ec681f3Smrg      nir_ssa_def *node_coords[2] = {
15787ec681f3Smrg         nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, coord_offsets[0])), .align_mul = 64, .align_offset = coord_offsets[0] % 64 ),
15797ec681f3Smrg         nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, coord_offsets[1])), .align_mul = 64, .align_offset = coord_offsets[1] % 64 ),
15807ec681f3Smrg      };
15817ec681f3Smrg
15827ec681f3Smrg      /* If x of the aabb min is NaN, then this is an inactive aabb.
15837ec681f3Smrg       * We don't need to care about any other components being NaN as that is UB.
15847ec681f3Smrg       * https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR */
15857ec681f3Smrg      nir_ssa_def *min_x = nir_channel(b, node_coords[0], 0);
15867ec681f3Smrg      nir_ssa_def *min_x_is_not_nan = nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */
15877ec681f3Smrg
15887ec681f3Smrg      /* vec3 bound0 = (node->coords[i][0] - origin) * inv_dir; */
15897ec681f3Smrg      nir_ssa_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir);
15907ec681f3Smrg      /* vec3 bound1 = (node->coords[i][1] - origin) * inv_dir; */
15917ec681f3Smrg      nir_ssa_def *bound1 = nir_fmul(b, nir_fsub(b, node_coords[1], origin), inv_dir);
15927ec681f3Smrg
15937ec681f3Smrg      /* float tmin = max(max(min(bound0.x, bound1.x), min(bound0.y, bound1.y)), min(bound0.z, bound1.z)); */
15947ec681f3Smrg      nir_ssa_def *tmin = nir_fmax(b, nir_fmax(b,
15957ec681f3Smrg         nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
15967ec681f3Smrg         nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
15977ec681f3Smrg         nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
15987ec681f3Smrg
15997ec681f3Smrg      /* float tmax = min(min(max(bound0.x, bound1.x), max(bound0.y, bound1.y)), max(bound0.z, bound1.z)); */
16007ec681f3Smrg      nir_ssa_def *tmax = nir_fmin(b, nir_fmin(b,
16017ec681f3Smrg         nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
16027ec681f3Smrg         nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
16037ec681f3Smrg         nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
16047ec681f3Smrg
16057ec681f3Smrg      /* if (!isnan(node->coords[i][0].x) && tmax >= max(0.0f, tmin) && tmin < ray_tmax) { */
16067ec681f3Smrg      nir_push_if(b,
16077ec681f3Smrg         nir_iand(b,
16087ec681f3Smrg            min_x_is_not_nan,
16097ec681f3Smrg            nir_iand(b,
16107ec681f3Smrg               nir_fge(b, tmax, nir_fmax(b, nir_imm_float(b, 0.0f), tmin)),
16117ec681f3Smrg               nir_flt(b, tmin, ray_tmax))));
16127ec681f3Smrg      {
16137ec681f3Smrg         /* child_indices[i] = node->children[i]; */
16147ec681f3Smrg         nir_ssa_def *new_child_indices[4] = {child_index, child_index, child_index, child_index};
16157ec681f3Smrg         nir_store_var(b, child_indices, nir_vec(b, new_child_indices, 4), 1u << i);
16167ec681f3Smrg
16177ec681f3Smrg         /* distances[i] = tmin; */
16187ec681f3Smrg         nir_ssa_def *new_distances[4] = {tmin, tmin, tmin, tmin};
16197ec681f3Smrg         nir_store_var(b, distances, nir_vec(b, new_distances, 4), 1u << i);
16207ec681f3Smrg
16217ec681f3Smrg      }
16227ec681f3Smrg      /* } */
16237ec681f3Smrg      nir_pop_if(b, NULL);
16247ec681f3Smrg   }
16257ec681f3Smrg
16267ec681f3Smrg   /* Sort our distances with a sorting network. */
16277ec681f3Smrg   nir_sort_hit_pair(b, distances, child_indices, 0, 1);
16287ec681f3Smrg   nir_sort_hit_pair(b, distances, child_indices, 2, 3);
16297ec681f3Smrg   nir_sort_hit_pair(b, distances, child_indices, 0, 2);
16307ec681f3Smrg   nir_sort_hit_pair(b, distances, child_indices, 1, 3);
16317ec681f3Smrg   nir_sort_hit_pair(b, distances, child_indices, 1, 2);
16327ec681f3Smrg
16337ec681f3Smrg   return nir_load_var(b, child_indices);
16347ec681f3Smrg}
16357ec681f3Smrg
16367ec681f3Smrgstatic nir_ssa_def *
16377ec681f3Smrgintersect_ray_amd_software_tri(struct radv_device *device,
16387ec681f3Smrg                               nir_builder *b, nir_ssa_def *bvh_node,
16397ec681f3Smrg                               nir_ssa_def *ray_tmax, nir_ssa_def *origin,
16407ec681f3Smrg                               nir_ssa_def *dir, nir_ssa_def *inv_dir)
16417ec681f3Smrg{
16427ec681f3Smrg   const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
16437ec681f3Smrg
16447ec681f3Smrg   nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node);
16457ec681f3Smrg
16467ec681f3Smrg   const uint32_t coord_offsets[3] = {
16477ec681f3Smrg      offsetof(struct radv_bvh_triangle_node, coords[0]),
16487ec681f3Smrg      offsetof(struct radv_bvh_triangle_node, coords[1]),
16497ec681f3Smrg      offsetof(struct radv_bvh_triangle_node, coords[2]),
16507ec681f3Smrg   };
16517ec681f3Smrg
16527ec681f3Smrg   /* node->coords[0], node->coords[1], node->coords[2] -> vec3 */
16537ec681f3Smrg   nir_ssa_def *node_coords[3] = {
16547ec681f3Smrg      nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, coord_offsets[0])), .align_mul = 64, .align_offset = coord_offsets[0] % 64 ),
16557ec681f3Smrg      nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, coord_offsets[1])), .align_mul = 64, .align_offset = coord_offsets[1] % 64 ),
16567ec681f3Smrg      nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, coord_offsets[2])), .align_mul = 64, .align_offset = coord_offsets[2] % 64 ),
16577ec681f3Smrg   };
16587ec681f3Smrg
16597ec681f3Smrg   nir_variable *result = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "result");
16607ec681f3Smrg   nir_store_var(b, result, nir_imm_vec4(b, INFINITY, 1.0f, 0.0f, 0.0f), 0xf);
16617ec681f3Smrg
16627ec681f3Smrg   /* Based on watertight Ray/Triangle intersection from
16637ec681f3Smrg    * http://jcgt.org/published/0002/01/05/paper.pdf */
16647ec681f3Smrg
16657ec681f3Smrg   /* Calculate the dimension where the ray direction is largest */
16667ec681f3Smrg   nir_ssa_def *abs_dir = nir_fabs(b, dir);
16677ec681f3Smrg
16687ec681f3Smrg   nir_ssa_def *abs_dirs[3] = {
16697ec681f3Smrg      nir_channel(b, abs_dir, 0),
16707ec681f3Smrg      nir_channel(b, abs_dir, 1),
16717ec681f3Smrg      nir_channel(b, abs_dir, 2),
16727ec681f3Smrg   };
16737ec681f3Smrg   /* Find index of greatest value of abs_dir and put that as kz. */
16747ec681f3Smrg   nir_ssa_def *kz = nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[1]),
16757ec681f3Smrg         nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[2]),
16767ec681f3Smrg            nir_imm_int(b, 0), nir_imm_int(b, 2)),
16777ec681f3Smrg         nir_bcsel(b, nir_fge(b, abs_dirs[1], abs_dirs[2]),
16787ec681f3Smrg            nir_imm_int(b, 1), nir_imm_int(b, 2)));
16797ec681f3Smrg   nir_ssa_def *kx = nir_imod(b, nir_iadd(b, kz, nir_imm_int(b, 1)), nir_imm_int(b, 3));
16807ec681f3Smrg   nir_ssa_def *ky = nir_imod(b, nir_iadd(b, kx, nir_imm_int(b, 1)), nir_imm_int(b, 3));
16817ec681f3Smrg   nir_ssa_def *k_indices[3] = { kx, ky, kz };
16827ec681f3Smrg   nir_ssa_def *k = nir_vec(b, k_indices, 3);
16837ec681f3Smrg
16847ec681f3Smrg   /* Swap kx and ky dimensions to preseve winding order */
16857ec681f3Smrg   unsigned swap_xy_swizzle[4] = {1, 0, 2, 3};
16867ec681f3Smrg   k = nir_bcsel(b,
16877ec681f3Smrg      nir_flt(b, nir_vector_extract(b, dir, kz), nir_imm_float(b, 0.0f)),
16887ec681f3Smrg      nir_swizzle(b, k, swap_xy_swizzle, 3),
16897ec681f3Smrg      k);
16907ec681f3Smrg
16917ec681f3Smrg   kx = nir_channel(b, k, 0);
16927ec681f3Smrg   ky = nir_channel(b, k, 1);
16937ec681f3Smrg   kz = nir_channel(b, k, 2);
16947ec681f3Smrg
16957ec681f3Smrg   /* Calculate shear constants */
16967ec681f3Smrg   nir_ssa_def *sz = nir_frcp(b, nir_vector_extract(b, dir, kz));
16977ec681f3Smrg   nir_ssa_def *sx = nir_fmul(b, nir_vector_extract(b, dir, kx), sz);
16987ec681f3Smrg   nir_ssa_def *sy = nir_fmul(b, nir_vector_extract(b, dir, ky), sz);
16997ec681f3Smrg
17007ec681f3Smrg   /* Calculate vertices relative to ray origin */
17017ec681f3Smrg   nir_ssa_def *v_a = nir_fsub(b, node_coords[0], origin);
17027ec681f3Smrg   nir_ssa_def *v_b = nir_fsub(b, node_coords[1], origin);
17037ec681f3Smrg   nir_ssa_def *v_c = nir_fsub(b, node_coords[2], origin);
17047ec681f3Smrg
17057ec681f3Smrg   /* Perform shear and scale */
17067ec681f3Smrg   nir_ssa_def *ax = nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz)));
17077ec681f3Smrg   nir_ssa_def *ay = nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz)));
17087ec681f3Smrg   nir_ssa_def *bx = nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz)));
17097ec681f3Smrg   nir_ssa_def *by = nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz)));
17107ec681f3Smrg   nir_ssa_def *cx = nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz)));
17117ec681f3Smrg   nir_ssa_def *cy = nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz)));
17127ec681f3Smrg
17137ec681f3Smrg   nir_ssa_def *u = nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx));
17147ec681f3Smrg   nir_ssa_def *v = nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx));
17157ec681f3Smrg   nir_ssa_def *w = nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax));
17167ec681f3Smrg
17177ec681f3Smrg   nir_variable *u_var = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "u");
17187ec681f3Smrg   nir_variable *v_var = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "v");
17197ec681f3Smrg   nir_variable *w_var = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "w");
17207ec681f3Smrg   nir_store_var(b, u_var, u, 0x1);
17217ec681f3Smrg   nir_store_var(b, v_var, v, 0x1);
17227ec681f3Smrg   nir_store_var(b, w_var, w, 0x1);
17237ec681f3Smrg
17247ec681f3Smrg   /* Fallback to testing edges with double precision...
17257ec681f3Smrg    *
17267ec681f3Smrg    * The Vulkan spec states it only needs single precision watertightness
17277ec681f3Smrg    * but we fail dEQP-VK.ray_tracing_pipeline.watertightness.closedFan2.1024 with
17287ec681f3Smrg    * failures = 1 without doing this. :( */
17297ec681f3Smrg   nir_ssa_def *cond_retest = nir_ior(b, nir_ior(b,
17307ec681f3Smrg      nir_feq(b, u, nir_imm_float(b, 0.0f)),
17317ec681f3Smrg      nir_feq(b, v, nir_imm_float(b, 0.0f))),
17327ec681f3Smrg      nir_feq(b, w, nir_imm_float(b, 0.0f)));
17337ec681f3Smrg
17347ec681f3Smrg   nir_push_if(b, cond_retest);
17357ec681f3Smrg   {
17367ec681f3Smrg      ax = nir_f2f64(b, ax); ay = nir_f2f64(b, ay);
17377ec681f3Smrg      bx = nir_f2f64(b, bx); by = nir_f2f64(b, by);
17387ec681f3Smrg      cx = nir_f2f64(b, cx); cy = nir_f2f64(b, cy);
17397ec681f3Smrg
17407ec681f3Smrg      nir_store_var(b, u_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx))), 0x1);
17417ec681f3Smrg      nir_store_var(b, v_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx))), 0x1);
17427ec681f3Smrg      nir_store_var(b, w_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax))), 0x1);
17437ec681f3Smrg   }
17447ec681f3Smrg   nir_pop_if(b, NULL);
17457ec681f3Smrg
17467ec681f3Smrg   u = nir_load_var(b, u_var);
17477ec681f3Smrg   v = nir_load_var(b, v_var);
17487ec681f3Smrg   w = nir_load_var(b, w_var);
17497ec681f3Smrg
17507ec681f3Smrg   /* Perform edge tests. */
17517ec681f3Smrg   nir_ssa_def *cond_back = nir_ior(b, nir_ior(b,
17527ec681f3Smrg      nir_flt(b, u, nir_imm_float(b, 0.0f)),
17537ec681f3Smrg      nir_flt(b, v, nir_imm_float(b, 0.0f))),
17547ec681f3Smrg      nir_flt(b, w, nir_imm_float(b, 0.0f)));
17557ec681f3Smrg
17567ec681f3Smrg   nir_ssa_def *cond_front = nir_ior(b, nir_ior(b,
17577ec681f3Smrg      nir_flt(b, nir_imm_float(b, 0.0f), u),
17587ec681f3Smrg      nir_flt(b, nir_imm_float(b, 0.0f), v)),
17597ec681f3Smrg      nir_flt(b, nir_imm_float(b, 0.0f), w));
17607ec681f3Smrg
17617ec681f3Smrg   nir_ssa_def *cond = nir_inot(b, nir_iand(b, cond_back, cond_front));
17627ec681f3Smrg
17637ec681f3Smrg   nir_push_if(b, cond);
17647ec681f3Smrg   {
17657ec681f3Smrg      nir_ssa_def *det = nir_fadd(b, u, nir_fadd(b, v, w));
17667ec681f3Smrg
17677ec681f3Smrg      nir_ssa_def *az = nir_fmul(b, sz, nir_vector_extract(b, v_a, kz));
17687ec681f3Smrg      nir_ssa_def *bz = nir_fmul(b, sz, nir_vector_extract(b, v_b, kz));
17697ec681f3Smrg      nir_ssa_def *cz = nir_fmul(b, sz, nir_vector_extract(b, v_c, kz));
17707ec681f3Smrg
17717ec681f3Smrg      nir_ssa_def *t = nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz));
17727ec681f3Smrg
17737ec681f3Smrg      nir_ssa_def *t_signed = nir_fmul(b, nir_fsign(b, det), t);
17747ec681f3Smrg
17757ec681f3Smrg      nir_ssa_def *det_cond_front = nir_inot(b, nir_flt(b, t_signed, nir_imm_float(b, 0.0f)));
17767ec681f3Smrg
17777ec681f3Smrg      nir_push_if(b, det_cond_front);
17787ec681f3Smrg      {
17797ec681f3Smrg         nir_ssa_def *indices[4] = {
17807ec681f3Smrg            t, det,
17817ec681f3Smrg            v, w
17827ec681f3Smrg         };
17837ec681f3Smrg         nir_store_var(b, result, nir_vec(b, indices, 4), 0xf);
17847ec681f3Smrg      }
17857ec681f3Smrg      nir_pop_if(b, NULL);
17867ec681f3Smrg   }
17877ec681f3Smrg   nir_pop_if(b, NULL);
17887ec681f3Smrg
17897ec681f3Smrg   return nir_load_var(b, result);
17907ec681f3Smrg}
17917ec681f3Smrg
17927ec681f3Smrgstatic void
17937ec681f3Smrginsert_traversal(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
17947ec681f3Smrg                 nir_builder *b, const struct rt_variables *vars)
17957ec681f3Smrg{
17967ec681f3Smrg   unsigned stack_entry_size = 4;
17977ec681f3Smrg   unsigned lanes = b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] *
17987ec681f3Smrg                    b->shader->info.workgroup_size[2];
17997ec681f3Smrg   unsigned stack_entry_stride = stack_entry_size * lanes;
18007ec681f3Smrg   nir_ssa_def *stack_entry_stride_def = nir_imm_int(b, stack_entry_stride);
18017ec681f3Smrg   nir_ssa_def *stack_base =
18027ec681f3Smrg      nir_iadd(b, nir_imm_int(b, b->shader->info.shared_size),
18037ec681f3Smrg               nir_imul(b, nir_load_subgroup_invocation(b), nir_imm_int(b, stack_entry_size)));
18047ec681f3Smrg
18057ec681f3Smrg   /*
18067ec681f3Smrg    * A top-level AS can contain 2^24 children and a bottom-level AS can contain 2^24 triangles. At
18077ec681f3Smrg    * a branching factor of 4, that means we may need up to 24 levels of box nodes + 1 triangle node
18087ec681f3Smrg    * + 1 instance node. Furthermore, when processing a box node, worst case we actually push all 4
18097ec681f3Smrg    * children and remove one, so the DFS stack depth is box nodes * 3 + 2.
18107ec681f3Smrg    */
18117ec681f3Smrg   b->shader->info.shared_size += stack_entry_stride * 76;
18127ec681f3Smrg   assert(b->shader->info.shared_size <= 32768);
18137ec681f3Smrg
18147ec681f3Smrg   nir_ssa_def *accel_struct = nir_load_var(b, vars->accel_struct);
18157ec681f3Smrg
18167ec681f3Smrg   struct rt_traversal_vars trav_vars = init_traversal_vars(b);
18177ec681f3Smrg
18187ec681f3Smrg   /* Initialize the follow-up shader idx to 0, to be replaced by the miss shader
18197ec681f3Smrg    * if we actually miss. */
18207ec681f3Smrg   nir_store_var(b, vars->idx, nir_imm_int(b, 0), 1);
18217ec681f3Smrg
18227ec681f3Smrg   nir_store_var(b, trav_vars.should_return, nir_imm_bool(b, false), 1);
18237ec681f3Smrg
18247ec681f3Smrg   nir_push_if(b, nir_ine(b, accel_struct, nir_imm_int64(b, 0)));
18257ec681f3Smrg   {
18267ec681f3Smrg      nir_store_var(b, trav_vars.bvh_base, build_addr_to_node(b, accel_struct), 1);
18277ec681f3Smrg
18287ec681f3Smrg      nir_ssa_def *bvh_root =
18297ec681f3Smrg         nir_build_load_global(b, 1, 32, accel_struct, .access = ACCESS_NON_WRITEABLE,
18307ec681f3Smrg                               .align_mul = 64, .align_offset = 0);
18317ec681f3Smrg
18327ec681f3Smrg      /* We create a BVH descriptor that covers the entire memory range. That way we can always
18337ec681f3Smrg       * use the same descriptor, which avoids divergence when different rays hit different
18347ec681f3Smrg       * instances at the cost of having to use 64-bit node ids. */
18357ec681f3Smrg      const uint64_t bvh_size = 1ull << 42;
18367ec681f3Smrg      nir_ssa_def *desc = nir_imm_ivec4(
18377ec681f3Smrg         b, 0, 1u << 31 /* Enable box sorting */, (bvh_size - 1) & 0xFFFFFFFFu,
18387ec681f3Smrg         ((bvh_size - 1) >> 32) | (1u << 24 /* Return IJ for triangles */) | (1u << 31));
18397ec681f3Smrg
18407ec681f3Smrg      nir_ssa_def *vec3ones = nir_channels(b, nir_imm_vec4(b, 1.0, 1.0, 1.0, 1.0), 0x7);
18417ec681f3Smrg      nir_store_var(b, trav_vars.origin, nir_load_var(b, vars->origin), 7);
18427ec681f3Smrg      nir_store_var(b, trav_vars.dir, nir_load_var(b, vars->direction), 7);
18437ec681f3Smrg      nir_store_var(b, trav_vars.inv_dir, nir_fdiv(b, vec3ones, nir_load_var(b, trav_vars.dir)), 7);
18447ec681f3Smrg      nir_store_var(b, trav_vars.sbt_offset_and_flags, nir_imm_int(b, 0), 1);
18457ec681f3Smrg      nir_store_var(b, trav_vars.instance_addr, nir_imm_int64(b, 0), 1);
18467ec681f3Smrg
18477ec681f3Smrg      nir_store_var(b, trav_vars.stack, nir_iadd(b, stack_base, stack_entry_stride_def), 1);
18487ec681f3Smrg      nir_store_shared(b, bvh_root, stack_base, .base = 0, .write_mask = 0x1,
18497ec681f3Smrg                       .align_mul = stack_entry_size, .align_offset = 0);
18507ec681f3Smrg
18517ec681f3Smrg      nir_store_var(b, trav_vars.top_stack, nir_imm_int(b, 0), 1);
18527ec681f3Smrg
18537ec681f3Smrg      nir_push_loop(b);
18547ec681f3Smrg
18557ec681f3Smrg      nir_push_if(b, nir_ieq(b, nir_load_var(b, trav_vars.stack), stack_base));
18567ec681f3Smrg      nir_jump(b, nir_jump_break);
18577ec681f3Smrg      nir_pop_if(b, NULL);
18587ec681f3Smrg
18597ec681f3Smrg      nir_push_if(
18607ec681f3Smrg         b, nir_uge(b, nir_load_var(b, trav_vars.top_stack), nir_load_var(b, trav_vars.stack)));
18617ec681f3Smrg      nir_store_var(b, trav_vars.top_stack, nir_imm_int(b, 0), 1);
18627ec681f3Smrg      nir_store_var(b, trav_vars.bvh_base,
18637ec681f3Smrg                    build_addr_to_node(b, nir_load_var(b, vars->accel_struct)), 1);
18647ec681f3Smrg      nir_store_var(b, trav_vars.origin, nir_load_var(b, vars->origin), 7);
18657ec681f3Smrg      nir_store_var(b, trav_vars.dir, nir_load_var(b, vars->direction), 7);
18667ec681f3Smrg      nir_store_var(b, trav_vars.inv_dir, nir_fdiv(b, vec3ones, nir_load_var(b, trav_vars.dir)), 7);
18677ec681f3Smrg      nir_store_var(b, trav_vars.instance_addr, nir_imm_int64(b, 0), 1);
18687ec681f3Smrg
18697ec681f3Smrg      nir_pop_if(b, NULL);
18707ec681f3Smrg
18717ec681f3Smrg      nir_store_var(b, trav_vars.stack,
18727ec681f3Smrg                    nir_isub(b, nir_load_var(b, trav_vars.stack), stack_entry_stride_def), 1);
18737ec681f3Smrg
18747ec681f3Smrg      nir_ssa_def *bvh_node = nir_load_shared(b, 1, 32, nir_load_var(b, trav_vars.stack), .base = 0,
18757ec681f3Smrg                                              .align_mul = stack_entry_size, .align_offset = 0);
18767ec681f3Smrg      nir_ssa_def *bvh_node_type = nir_iand(b, bvh_node, nir_imm_int(b, 7));
18777ec681f3Smrg
18787ec681f3Smrg      bvh_node = nir_iadd(b, nir_load_var(b, trav_vars.bvh_base), nir_u2u(b, bvh_node, 64));
18797ec681f3Smrg      nir_ssa_def *intrinsic_result = NULL;
18807ec681f3Smrg      if (device->physical_device->rad_info.chip_class >= GFX10_3
18817ec681f3Smrg       && !(device->instance->perftest_flags & RADV_PERFTEST_FORCE_EMULATE_RT)) {
18827ec681f3Smrg         intrinsic_result = nir_bvh64_intersect_ray_amd(
18837ec681f3Smrg            b, 32, desc, nir_unpack_64_2x32(b, bvh_node), nir_load_var(b, vars->tmax),
18847ec681f3Smrg            nir_load_var(b, trav_vars.origin), nir_load_var(b, trav_vars.dir),
18857ec681f3Smrg            nir_load_var(b, trav_vars.inv_dir));
18867ec681f3Smrg      }
18877ec681f3Smrg
18887ec681f3Smrg      nir_push_if(b, nir_ine(b, nir_iand(b, bvh_node_type, nir_imm_int(b, 4)), nir_imm_int(b, 0)));
18897ec681f3Smrg      {
18907ec681f3Smrg         nir_push_if(b,
18917ec681f3Smrg                     nir_ine(b, nir_iand(b, bvh_node_type, nir_imm_int(b, 2)), nir_imm_int(b, 0)));
18927ec681f3Smrg         {
18937ec681f3Smrg            /* custom */
18947ec681f3Smrg            nir_push_if(
18957ec681f3Smrg               b, nir_ine(b, nir_iand(b, bvh_node_type, nir_imm_int(b, 1)), nir_imm_int(b, 0)));
18967ec681f3Smrg            {
18977ec681f3Smrg               insert_traversal_aabb_case(device, pCreateInfo, b, vars, &trav_vars, bvh_node);
18987ec681f3Smrg            }
18997ec681f3Smrg            nir_push_else(b, NULL);
19007ec681f3Smrg            {
19017ec681f3Smrg               /* instance */
19027ec681f3Smrg               nir_ssa_def *instance_node_addr = build_node_to_addr(device, b, bvh_node);
19037ec681f3Smrg               nir_ssa_def *instance_data = nir_build_load_global(
19047ec681f3Smrg                  b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0);
19057ec681f3Smrg               nir_ssa_def *wto_matrix[] = {
19067ec681f3Smrg                  nir_build_load_global(b, 4, 32,
19077ec681f3Smrg                                        nir_iadd(b, instance_node_addr, nir_imm_int64(b, 16)),
19087ec681f3Smrg                                        .align_mul = 64, .align_offset = 16),
19097ec681f3Smrg                  nir_build_load_global(b, 4, 32,
19107ec681f3Smrg                                        nir_iadd(b, instance_node_addr, nir_imm_int64(b, 32)),
19117ec681f3Smrg                                        .align_mul = 64, .align_offset = 32),
19127ec681f3Smrg                  nir_build_load_global(b, 4, 32,
19137ec681f3Smrg                                        nir_iadd(b, instance_node_addr, nir_imm_int64(b, 48)),
19147ec681f3Smrg                                        .align_mul = 64, .align_offset = 48)};
19157ec681f3Smrg               nir_ssa_def *instance_id = nir_build_load_global(
19167ec681f3Smrg                  b, 1, 32, nir_iadd(b, instance_node_addr, nir_imm_int64(b, 88)), .align_mul = 4,
19177ec681f3Smrg                  .align_offset = 0);
19187ec681f3Smrg               nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2);
19197ec681f3Smrg               nir_ssa_def *instance_mask = nir_ushr(b, instance_and_mask, nir_imm_int(b, 24));
19207ec681f3Smrg
19217ec681f3Smrg               nir_push_if(b,
19227ec681f3Smrg                           nir_ieq(b, nir_iand(b, instance_mask, nir_load_var(b, vars->cull_mask)),
19237ec681f3Smrg                                   nir_imm_int(b, 0)));
19247ec681f3Smrg               nir_jump(b, nir_jump_continue);
19257ec681f3Smrg               nir_pop_if(b, NULL);
19267ec681f3Smrg
19277ec681f3Smrg               nir_store_var(b, trav_vars.top_stack, nir_load_var(b, trav_vars.stack), 1);
19287ec681f3Smrg               nir_store_var(b, trav_vars.bvh_base,
19297ec681f3Smrg                             build_addr_to_node(
19307ec681f3Smrg                                b, nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3))),
19317ec681f3Smrg                             1);
19327ec681f3Smrg               nir_store_shared(b,
19337ec681f3Smrg                                nir_iand(b, nir_channel(b, instance_data, 0), nir_imm_int(b, 63)),
19347ec681f3Smrg                                nir_load_var(b, trav_vars.stack), .base = 0, .write_mask = 0x1,
19357ec681f3Smrg                                .align_mul = stack_entry_size, .align_offset = 0);
19367ec681f3Smrg               nir_store_var(b, trav_vars.stack,
19377ec681f3Smrg                             nir_iadd(b, nir_load_var(b, trav_vars.stack), stack_entry_stride_def),
19387ec681f3Smrg                             1);
19397ec681f3Smrg
19407ec681f3Smrg               nir_store_var(
19417ec681f3Smrg                  b, trav_vars.origin,
19427ec681f3Smrg                  nir_build_vec3_mat_mult_pre(b, nir_load_var(b, vars->origin), wto_matrix), 7);
19437ec681f3Smrg               nir_store_var(
19447ec681f3Smrg                  b, trav_vars.dir,
19457ec681f3Smrg                  nir_build_vec3_mat_mult(b, nir_load_var(b, vars->direction), wto_matrix, false),
19467ec681f3Smrg                  7);
19477ec681f3Smrg               nir_store_var(b, trav_vars.inv_dir,
19487ec681f3Smrg                             nir_fdiv(b, vec3ones, nir_load_var(b, trav_vars.dir)), 7);
19497ec681f3Smrg               nir_store_var(b, trav_vars.custom_instance_and_mask, instance_and_mask, 1);
19507ec681f3Smrg               nir_store_var(b, trav_vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3),
19517ec681f3Smrg                             1);
19527ec681f3Smrg               nir_store_var(b, trav_vars.instance_id, instance_id, 1);
19537ec681f3Smrg               nir_store_var(b, trav_vars.instance_addr, instance_node_addr, 1);
19547ec681f3Smrg            }
19557ec681f3Smrg            nir_pop_if(b, NULL);
19567ec681f3Smrg         }
19577ec681f3Smrg         nir_push_else(b, NULL);
19587ec681f3Smrg         {
19597ec681f3Smrg            /* box */
19607ec681f3Smrg            nir_ssa_def *result = intrinsic_result;
19617ec681f3Smrg            if (!result) {
19627ec681f3Smrg               /* If we didn't run the intrinsic cause the hardware didn't support it,
19637ec681f3Smrg                * emulate ray/box intersection here */
19647ec681f3Smrg               result = intersect_ray_amd_software_box(device,
19657ec681f3Smrg                  b, bvh_node, nir_load_var(b, vars->tmax), nir_load_var(b, trav_vars.origin),
19667ec681f3Smrg                  nir_load_var(b, trav_vars.dir), nir_load_var(b, trav_vars.inv_dir));
19677ec681f3Smrg            }
19687ec681f3Smrg
19697ec681f3Smrg            for (unsigned i = 4; i-- > 0; ) {
19707ec681f3Smrg               nir_ssa_def *new_node = nir_vector_extract(b, result, nir_imm_int(b, i));
19717ec681f3Smrg               nir_push_if(b, nir_ine(b, new_node, nir_imm_int(b, 0xffffffff)));
19727ec681f3Smrg               {
19737ec681f3Smrg                  nir_store_shared(b, new_node, nir_load_var(b, trav_vars.stack), .base = 0,
19747ec681f3Smrg                                   .write_mask = 0x1, .align_mul = stack_entry_size,
19757ec681f3Smrg                                   .align_offset = 0);
19767ec681f3Smrg                  nir_store_var(
19777ec681f3Smrg                     b, trav_vars.stack,
19787ec681f3Smrg                     nir_iadd(b, nir_load_var(b, trav_vars.stack), stack_entry_stride_def), 1);
19797ec681f3Smrg               }
19807ec681f3Smrg               nir_pop_if(b, NULL);
19817ec681f3Smrg            }
19827ec681f3Smrg         }
19837ec681f3Smrg         nir_pop_if(b, NULL);
19847ec681f3Smrg      }
19857ec681f3Smrg      nir_push_else(b, NULL);
19867ec681f3Smrg      {
19877ec681f3Smrg         nir_ssa_def *result = intrinsic_result;
19887ec681f3Smrg         if (!result) {
19897ec681f3Smrg            /* If we didn't run the intrinsic cause the hardware didn't support it,
19907ec681f3Smrg             * emulate ray/tri intersection here */
19917ec681f3Smrg            result = intersect_ray_amd_software_tri(device,
19927ec681f3Smrg               b, bvh_node, nir_load_var(b, vars->tmax), nir_load_var(b, trav_vars.origin),
19937ec681f3Smrg               nir_load_var(b, trav_vars.dir), nir_load_var(b, trav_vars.inv_dir));
19947ec681f3Smrg         }
19957ec681f3Smrg         insert_traversal_triangle_case(device, pCreateInfo, b, result, vars, &trav_vars, bvh_node);
19967ec681f3Smrg      }
19977ec681f3Smrg      nir_pop_if(b, NULL);
19987ec681f3Smrg
19997ec681f3Smrg      nir_pop_loop(b, NULL);
20007ec681f3Smrg   }
20017ec681f3Smrg   nir_pop_if(b, NULL);
20027ec681f3Smrg
20037ec681f3Smrg   /* should_return is set if we had a hit but we won't be calling the closest hit shader and hence
20047ec681f3Smrg    * need to return immediately to the calling shader. */
20057ec681f3Smrg   nir_push_if(b, nir_load_var(b, trav_vars.should_return));
20067ec681f3Smrg   {
20077ec681f3Smrg      insert_rt_return(b, vars);
20087ec681f3Smrg   }
20097ec681f3Smrg   nir_push_else(b, NULL);
20107ec681f3Smrg   {
20117ec681f3Smrg      /* Only load the miss shader if we actually miss, which we determining by not having set
20127ec681f3Smrg       * a closest hit shader. It is valid to not specify an SBT pointer for miss shaders if none
20137ec681f3Smrg       * of the rays miss. */
20147ec681f3Smrg      nir_push_if(b, nir_ieq(b, nir_load_var(b, vars->idx), nir_imm_int(b, 0)));
20157ec681f3Smrg      {
20167ec681f3Smrg         load_sbt_entry(b, vars, nir_load_var(b, vars->miss_index), SBT_MISS, 0);
20177ec681f3Smrg      }
20187ec681f3Smrg      nir_pop_if(b, NULL);
20197ec681f3Smrg   }
20207ec681f3Smrg   nir_pop_if(b, NULL);
20217ec681f3Smrg}
20227ec681f3Smrg
20237ec681f3Smrgstatic unsigned
20247ec681f3Smrgcompute_rt_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
20257ec681f3Smrg                      const struct radv_pipeline_shader_stack_size *stack_sizes)
20267ec681f3Smrg{
20277ec681f3Smrg   unsigned raygen_size = 0;
20287ec681f3Smrg   unsigned callable_size = 0;
20297ec681f3Smrg   unsigned chit_size = 0;
20307ec681f3Smrg   unsigned miss_size = 0;
20317ec681f3Smrg   unsigned non_recursive_size = 0;
20327ec681f3Smrg
20337ec681f3Smrg   for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) {
20347ec681f3Smrg      non_recursive_size = MAX2(stack_sizes[i].non_recursive_size, non_recursive_size);
20357ec681f3Smrg
20367ec681f3Smrg      const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i];
20377ec681f3Smrg      uint32_t shader_id = VK_SHADER_UNUSED_KHR;
20387ec681f3Smrg      unsigned size = stack_sizes[i].recursive_size;
20397ec681f3Smrg
20407ec681f3Smrg      switch (group_info->type) {
20417ec681f3Smrg      case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
20427ec681f3Smrg         shader_id = group_info->generalShader;
20437ec681f3Smrg         break;
20447ec681f3Smrg      case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
20457ec681f3Smrg      case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
20467ec681f3Smrg         shader_id = group_info->closestHitShader;
20477ec681f3Smrg         break;
20487ec681f3Smrg      default:
20497ec681f3Smrg         break;
20507ec681f3Smrg      }
20517ec681f3Smrg      if (shader_id == VK_SHADER_UNUSED_KHR)
20527ec681f3Smrg         continue;
20537ec681f3Smrg
20547ec681f3Smrg      const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id];
20557ec681f3Smrg      switch (stage->stage) {
20567ec681f3Smrg      case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
20577ec681f3Smrg         raygen_size = MAX2(raygen_size, size);
20587ec681f3Smrg         break;
20597ec681f3Smrg      case VK_SHADER_STAGE_MISS_BIT_KHR:
20607ec681f3Smrg         miss_size = MAX2(miss_size, size);
20617ec681f3Smrg         break;
20627ec681f3Smrg      case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
20637ec681f3Smrg         chit_size = MAX2(chit_size, size);
20647ec681f3Smrg         break;
20657ec681f3Smrg      case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
20667ec681f3Smrg         callable_size = MAX2(callable_size, size);
20677ec681f3Smrg         break;
20687ec681f3Smrg      default:
20697ec681f3Smrg         unreachable("Invalid stage type in RT shader");
20707ec681f3Smrg      }
20717ec681f3Smrg   }
20727ec681f3Smrg   return raygen_size +
20737ec681f3Smrg          MIN2(pCreateInfo->maxPipelineRayRecursionDepth, 1) *
20747ec681f3Smrg             MAX2(MAX2(chit_size, miss_size), non_recursive_size) +
20757ec681f3Smrg          MAX2(0, (int)(pCreateInfo->maxPipelineRayRecursionDepth) - 1) *
20767ec681f3Smrg             MAX2(chit_size, miss_size) +
20777ec681f3Smrg          2 * callable_size;
20787ec681f3Smrg}
20797ec681f3Smrg
20807ec681f3Smrgbool
20817ec681f3Smrgradv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo)
20827ec681f3Smrg{
20837ec681f3Smrg   if (!pCreateInfo->pDynamicState)
20847ec681f3Smrg      return false;
20857ec681f3Smrg
20867ec681f3Smrg   for (unsigned i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; ++i) {
20877ec681f3Smrg      if (pCreateInfo->pDynamicState->pDynamicStates[i] ==
20887ec681f3Smrg          VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
20897ec681f3Smrg         return true;
20907ec681f3Smrg   }
20917ec681f3Smrg
20927ec681f3Smrg   return false;
20937ec681f3Smrg}
20947ec681f3Smrg
20957ec681f3Smrgstatic nir_shader *
20967ec681f3Smrgcreate_rt_shader(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
20977ec681f3Smrg                 struct radv_pipeline_shader_stack_size *stack_sizes)
20987ec681f3Smrg{
20997ec681f3Smrg   RADV_FROM_HANDLE(radv_pipeline_layout, layout, pCreateInfo->layout);
21007ec681f3Smrg   struct radv_pipeline_key key;
21017ec681f3Smrg   memset(&key, 0, sizeof(key));
21027ec681f3Smrg
21037ec681f3Smrg   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "rt_combined");
21047ec681f3Smrg
21057ec681f3Smrg   b.shader->info.workgroup_size[0] = 8;
21067ec681f3Smrg   b.shader->info.workgroup_size[1] = 8;
21077ec681f3Smrg   b.shader->info.workgroup_size[2] = 1;
21087ec681f3Smrg
21097ec681f3Smrg   struct rt_variables vars = create_rt_variables(b.shader, stack_sizes);
21107ec681f3Smrg   load_sbt_entry(&b, &vars, nir_imm_int(&b, 0), SBT_RAYGEN, 0);
21117ec681f3Smrg   nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, 0), 0x1);
21127ec681f3Smrg
21137ec681f3Smrg   nir_store_var(&b, vars.main_loop_case_visited, nir_imm_bool(&b, true), 1);
21147ec681f3Smrg
21157ec681f3Smrg   nir_loop *loop = nir_push_loop(&b);
21167ec681f3Smrg
21177ec681f3Smrg   nir_push_if(&b, nir_ior(&b, nir_ieq(&b, nir_load_var(&b, vars.idx), nir_imm_int(&b, 0)),
21187ec681f3Smrg                           nir_ine(&b, nir_load_var(&b, vars.main_loop_case_visited),
21197ec681f3Smrg                                   nir_imm_bool(&b, true))));
21207ec681f3Smrg   nir_jump(&b, nir_jump_break);
21217ec681f3Smrg   nir_pop_if(&b, NULL);
21227ec681f3Smrg
21237ec681f3Smrg   nir_store_var(&b, vars.main_loop_case_visited, nir_imm_bool(&b, false), 1);
21247ec681f3Smrg
21257ec681f3Smrg   nir_push_if(&b, nir_ieq(&b, nir_load_var(&b, vars.idx), nir_imm_int(&b, 1)));
21267ec681f3Smrg   nir_store_var(&b, vars.main_loop_case_visited, nir_imm_bool(&b, true), 1);
21277ec681f3Smrg   insert_traversal(device, pCreateInfo, &b, &vars);
21287ec681f3Smrg   nir_pop_if(&b, NULL);
21297ec681f3Smrg
21307ec681f3Smrg   nir_ssa_def *idx = nir_load_var(&b, vars.idx);
21317ec681f3Smrg
21327ec681f3Smrg   /* We do a trick with the indexing of the resume shaders so that the first
21337ec681f3Smrg    * shader of group x always gets id x and the resume shader ids then come after
21347ec681f3Smrg    * groupCount. This makes the shadergroup handles independent of compilation. */
21357ec681f3Smrg   unsigned call_idx_base = pCreateInfo->groupCount + 1;
21367ec681f3Smrg   for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) {
21377ec681f3Smrg      const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i];
21387ec681f3Smrg      uint32_t shader_id = VK_SHADER_UNUSED_KHR;
21397ec681f3Smrg
21407ec681f3Smrg      switch (group_info->type) {
21417ec681f3Smrg      case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
21427ec681f3Smrg         shader_id = group_info->generalShader;
21437ec681f3Smrg         break;
21447ec681f3Smrg      case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
21457ec681f3Smrg      case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
21467ec681f3Smrg         shader_id = group_info->closestHitShader;
21477ec681f3Smrg         break;
21487ec681f3Smrg      default:
21497ec681f3Smrg         break;
21507ec681f3Smrg      }
21517ec681f3Smrg      if (shader_id == VK_SHADER_UNUSED_KHR)
21527ec681f3Smrg         continue;
21537ec681f3Smrg
21547ec681f3Smrg      const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id];
21557ec681f3Smrg      nir_shader *nir_stage = parse_rt_stage(device, layout, stage);
21567ec681f3Smrg
21577ec681f3Smrg      b.shader->options = nir_stage->options;
21587ec681f3Smrg
21597ec681f3Smrg      uint32_t num_resume_shaders = 0;
21607ec681f3Smrg      nir_shader **resume_shaders = NULL;
21617ec681f3Smrg      nir_lower_shader_calls(nir_stage, nir_address_format_32bit_offset, 16, &resume_shaders,
21627ec681f3Smrg                             &num_resume_shaders, nir_stage);
21637ec681f3Smrg
21647ec681f3Smrg      vars.group_idx = i;
21657ec681f3Smrg      insert_rt_case(&b, nir_stage, &vars, idx, call_idx_base, i + 2);
21667ec681f3Smrg      for (unsigned j = 0; j < num_resume_shaders; ++j) {
21677ec681f3Smrg         insert_rt_case(&b, resume_shaders[j], &vars, idx, call_idx_base, call_idx_base + 1 + j);
21687ec681f3Smrg      }
21697ec681f3Smrg      call_idx_base += num_resume_shaders;
21707ec681f3Smrg   }
21717ec681f3Smrg
21727ec681f3Smrg   nir_pop_loop(&b, loop);
21737ec681f3Smrg
21747ec681f3Smrg   if (radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo)) {
21757ec681f3Smrg      /* Put something so scratch gets enabled in the shader. */
21767ec681f3Smrg      b.shader->scratch_size = 16;
21777ec681f3Smrg   } else
21787ec681f3Smrg      b.shader->scratch_size = compute_rt_stack_size(pCreateInfo, stack_sizes);
21797ec681f3Smrg
21807ec681f3Smrg   /* Deal with all the inline functions. */
21817ec681f3Smrg   nir_index_ssa_defs(nir_shader_get_entrypoint(b.shader));
21827ec681f3Smrg   nir_metadata_preserve(nir_shader_get_entrypoint(b.shader), nir_metadata_none);
21837ec681f3Smrg
21847ec681f3Smrg   return b.shader;
21857ec681f3Smrg}
21867ec681f3Smrg
21877ec681f3Smrgstatic VkResult
21887ec681f3Smrgradv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
21897ec681f3Smrg                        const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
21907ec681f3Smrg                        const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
21917ec681f3Smrg{
21927ec681f3Smrg   RADV_FROM_HANDLE(radv_device, device, _device);
21937ec681f3Smrg   VkResult result;
21947ec681f3Smrg   struct radv_pipeline *pipeline = NULL;
21957ec681f3Smrg   struct radv_pipeline_shader_stack_size *stack_sizes = NULL;
21967ec681f3Smrg   uint8_t hash[20];
21977ec681f3Smrg   nir_shader *shader = NULL;
21987ec681f3Smrg   bool keep_statistic_info =
21997ec681f3Smrg      (pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) ||
22007ec681f3Smrg      (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || device->keep_shader_info;
22017ec681f3Smrg
22027ec681f3Smrg   if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)
22037ec681f3Smrg      return radv_rt_pipeline_library_create(_device, _cache, pCreateInfo, pAllocator, pPipeline);
22047ec681f3Smrg
22057ec681f3Smrg   VkRayTracingPipelineCreateInfoKHR local_create_info =
22067ec681f3Smrg      radv_create_merged_rt_create_info(pCreateInfo);
22077ec681f3Smrg   if (!local_create_info.pStages || !local_create_info.pGroups) {
22087ec681f3Smrg      result = VK_ERROR_OUT_OF_HOST_MEMORY;
22097ec681f3Smrg      goto fail;
22107ec681f3Smrg   }
22117ec681f3Smrg
22127ec681f3Smrg   radv_hash_rt_shaders(hash, &local_create_info, radv_get_hash_flags(device, keep_statistic_info));
22137ec681f3Smrg   struct vk_shader_module module = {.base.type = VK_OBJECT_TYPE_SHADER_MODULE};
22147ec681f3Smrg
22157ec681f3Smrg   VkComputePipelineCreateInfo compute_info = {
22167ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
22177ec681f3Smrg      .pNext = NULL,
22187ec681f3Smrg      .flags = pCreateInfo->flags | VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT,
22197ec681f3Smrg      .stage =
22207ec681f3Smrg         {
22217ec681f3Smrg            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
22227ec681f3Smrg            .stage = VK_SHADER_STAGE_COMPUTE_BIT,
22237ec681f3Smrg            .module = vk_shader_module_to_handle(&module),
22247ec681f3Smrg            .pName = "main",
22257ec681f3Smrg         },
22267ec681f3Smrg      .layout = pCreateInfo->layout,
22277ec681f3Smrg   };
22287ec681f3Smrg
22297ec681f3Smrg   /* First check if we can get things from the cache before we take the expensive step of
22307ec681f3Smrg    * generating the nir. */
22317ec681f3Smrg   result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, hash,
22327ec681f3Smrg                                         stack_sizes, local_create_info.groupCount, pPipeline);
22337ec681f3Smrg   if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT) {
22347ec681f3Smrg      stack_sizes = calloc(sizeof(*stack_sizes), local_create_info.groupCount);
22357ec681f3Smrg      if (!stack_sizes) {
22367ec681f3Smrg         result = VK_ERROR_OUT_OF_HOST_MEMORY;
22377ec681f3Smrg         goto fail;
22387ec681f3Smrg      }
22397ec681f3Smrg
22407ec681f3Smrg      shader = create_rt_shader(device, &local_create_info, stack_sizes);
22417ec681f3Smrg      module.nir = shader;
22427ec681f3Smrg      compute_info.flags = pCreateInfo->flags;
22437ec681f3Smrg      result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, hash,
22447ec681f3Smrg                                            stack_sizes, local_create_info.groupCount, pPipeline);
22457ec681f3Smrg      stack_sizes = NULL;
22467ec681f3Smrg
22477ec681f3Smrg      if (result != VK_SUCCESS)
22487ec681f3Smrg         goto shader_fail;
22497ec681f3Smrg   }
22507ec681f3Smrg   pipeline = radv_pipeline_from_handle(*pPipeline);
22517ec681f3Smrg
22527ec681f3Smrg   pipeline->compute.rt_group_handles =
22537ec681f3Smrg      calloc(sizeof(*pipeline->compute.rt_group_handles), local_create_info.groupCount);
22547ec681f3Smrg   if (!pipeline->compute.rt_group_handles) {
22557ec681f3Smrg      result = VK_ERROR_OUT_OF_HOST_MEMORY;
22567ec681f3Smrg      goto shader_fail;
22577ec681f3Smrg   }
22587ec681f3Smrg
22597ec681f3Smrg   pipeline->compute.dynamic_stack_size = radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo);
22607ec681f3Smrg
22617ec681f3Smrg   for (unsigned i = 0; i < local_create_info.groupCount; ++i) {
22627ec681f3Smrg      const VkRayTracingShaderGroupCreateInfoKHR *group_info = &local_create_info.pGroups[i];
22637ec681f3Smrg      switch (group_info->type) {
22647ec681f3Smrg      case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
22657ec681f3Smrg         if (group_info->generalShader != VK_SHADER_UNUSED_KHR)
22667ec681f3Smrg            pipeline->compute.rt_group_handles[i].handles[0] = i + 2;
22677ec681f3Smrg         break;
22687ec681f3Smrg      case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
22697ec681f3Smrg         if (group_info->intersectionShader != VK_SHADER_UNUSED_KHR)
22707ec681f3Smrg            pipeline->compute.rt_group_handles[i].handles[1] = i + 2;
22717ec681f3Smrg         FALLTHROUGH;
22727ec681f3Smrg      case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
22737ec681f3Smrg         if (group_info->closestHitShader != VK_SHADER_UNUSED_KHR)
22747ec681f3Smrg            pipeline->compute.rt_group_handles[i].handles[0] = i + 2;
22757ec681f3Smrg         if (group_info->anyHitShader != VK_SHADER_UNUSED_KHR)
22767ec681f3Smrg            pipeline->compute.rt_group_handles[i].handles[1] = i + 2;
22777ec681f3Smrg         break;
22787ec681f3Smrg      case VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR:
22797ec681f3Smrg         unreachable("VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR");
22807ec681f3Smrg      }
22817ec681f3Smrg   }
22827ec681f3Smrg
22837ec681f3Smrgshader_fail:
22847ec681f3Smrg   if (result != VK_SUCCESS && pipeline)
22857ec681f3Smrg      radv_pipeline_destroy(device, pipeline, pAllocator);
22867ec681f3Smrg   ralloc_free(shader);
22877ec681f3Smrgfail:
22887ec681f3Smrg   free((void *)local_create_info.pGroups);
22897ec681f3Smrg   free((void *)local_create_info.pStages);
22907ec681f3Smrg   free(stack_sizes);
22917ec681f3Smrg   return result;
22927ec681f3Smrg}
22937ec681f3Smrg
22947ec681f3SmrgVkResult
22957ec681f3Smrgradv_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
22967ec681f3Smrg                                  VkPipelineCache pipelineCache, uint32_t count,
22977ec681f3Smrg                                  const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
22987ec681f3Smrg                                  const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
22997ec681f3Smrg{
23007ec681f3Smrg   VkResult result = VK_SUCCESS;
23017ec681f3Smrg
23027ec681f3Smrg   unsigned i = 0;
23037ec681f3Smrg   for (; i < count; i++) {
23047ec681f3Smrg      VkResult r;
23057ec681f3Smrg      r = radv_rt_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator,
23067ec681f3Smrg                                  &pPipelines[i]);
23077ec681f3Smrg      if (r != VK_SUCCESS) {
23087ec681f3Smrg         result = r;
23097ec681f3Smrg         pPipelines[i] = VK_NULL_HANDLE;
23107ec681f3Smrg
23117ec681f3Smrg         if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
23127ec681f3Smrg            break;
23137ec681f3Smrg      }
23147ec681f3Smrg   }
23157ec681f3Smrg
23167ec681f3Smrg   for (; i < count; ++i)
23177ec681f3Smrg      pPipelines[i] = VK_NULL_HANDLE;
23187ec681f3Smrg
23197ec681f3Smrg   return result;
23207ec681f3Smrg}
23217ec681f3Smrg
23227ec681f3SmrgVkResult
23237ec681f3Smrgradv_GetRayTracingShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, uint32_t firstGroup,
23247ec681f3Smrg                                        uint32_t groupCount, size_t dataSize, void *pData)
23257ec681f3Smrg{
23267ec681f3Smrg   RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
23277ec681f3Smrg   char *data = pData;
23287ec681f3Smrg
23297ec681f3Smrg   STATIC_ASSERT(sizeof(*pipeline->compute.rt_group_handles) <= RADV_RT_HANDLE_SIZE);
23307ec681f3Smrg
23317ec681f3Smrg   memset(data, 0, groupCount * RADV_RT_HANDLE_SIZE);
23327ec681f3Smrg
23337ec681f3Smrg   for (uint32_t i = 0; i < groupCount; ++i) {
23347ec681f3Smrg      memcpy(data + i * RADV_RT_HANDLE_SIZE, &pipeline->compute.rt_group_handles[firstGroup + i],
23357ec681f3Smrg             sizeof(*pipeline->compute.rt_group_handles));
23367ec681f3Smrg   }
23377ec681f3Smrg
23387ec681f3Smrg   return VK_SUCCESS;
23397ec681f3Smrg}
23407ec681f3Smrg
23417ec681f3SmrgVkDeviceSize
23427ec681f3Smrgradv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device, VkPipeline _pipeline, uint32_t group,
23437ec681f3Smrg                                          VkShaderGroupShaderKHR groupShader)
23447ec681f3Smrg{
23457ec681f3Smrg   RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
23467ec681f3Smrg   const struct radv_pipeline_shader_stack_size *stack_size =
23477ec681f3Smrg      &pipeline->compute.rt_stack_sizes[group];
23487ec681f3Smrg
23497ec681f3Smrg   if (groupShader == VK_SHADER_GROUP_SHADER_ANY_HIT_KHR ||
23507ec681f3Smrg       groupShader == VK_SHADER_GROUP_SHADER_INTERSECTION_KHR)
23517ec681f3Smrg      return stack_size->non_recursive_size;
23527ec681f3Smrg   else
23537ec681f3Smrg      return stack_size->recursive_size;
23547ec681f3Smrg}
2355