17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2021 Google 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "radv_acceleration_structure.h" 257ec681f3Smrg#include "radv_debug.h" 267ec681f3Smrg#include "radv_private.h" 277ec681f3Smrg#include "radv_shader.h" 287ec681f3Smrg 297ec681f3Smrg#include "nir/nir.h" 307ec681f3Smrg#include "nir/nir_builder.h" 317ec681f3Smrg#include "nir/nir_builtin_builder.h" 327ec681f3Smrg 337ec681f3Smrgstatic VkRayTracingPipelineCreateInfoKHR 347ec681f3Smrgradv_create_merged_rt_create_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo) 357ec681f3Smrg{ 367ec681f3Smrg VkRayTracingPipelineCreateInfoKHR local_create_info = *pCreateInfo; 377ec681f3Smrg uint32_t total_stages = pCreateInfo->stageCount; 387ec681f3Smrg uint32_t total_groups = pCreateInfo->groupCount; 397ec681f3Smrg 407ec681f3Smrg if (pCreateInfo->pLibraryInfo) { 417ec681f3Smrg for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) { 427ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline, library, pCreateInfo->pLibraryInfo->pLibraries[i]); 437ec681f3Smrg total_stages += library->library.stage_count; 447ec681f3Smrg total_groups += library->library.group_count; 457ec681f3Smrg } 467ec681f3Smrg } 477ec681f3Smrg VkPipelineShaderStageCreateInfo *stages = NULL; 487ec681f3Smrg VkRayTracingShaderGroupCreateInfoKHR *groups = NULL; 497ec681f3Smrg local_create_info.stageCount = total_stages; 507ec681f3Smrg local_create_info.groupCount = total_groups; 517ec681f3Smrg local_create_info.pStages = stages = 527ec681f3Smrg malloc(sizeof(VkPipelineShaderStageCreateInfo) * total_stages); 537ec681f3Smrg local_create_info.pGroups = groups = 547ec681f3Smrg malloc(sizeof(VkRayTracingShaderGroupCreateInfoKHR) * total_groups); 557ec681f3Smrg if (!local_create_info.pStages || !local_create_info.pGroups) 567ec681f3Smrg return local_create_info; 577ec681f3Smrg 587ec681f3Smrg total_stages = pCreateInfo->stageCount; 597ec681f3Smrg total_groups = pCreateInfo->groupCount; 607ec681f3Smrg for (unsigned j = 0; j < pCreateInfo->stageCount; ++j) 617ec681f3Smrg stages[j] = pCreateInfo->pStages[j]; 627ec681f3Smrg for (unsigned j = 0; j < pCreateInfo->groupCount; ++j) 637ec681f3Smrg groups[j] = pCreateInfo->pGroups[j]; 647ec681f3Smrg 657ec681f3Smrg if (pCreateInfo->pLibraryInfo) { 667ec681f3Smrg for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) { 677ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline, library, pCreateInfo->pLibraryInfo->pLibraries[i]); 687ec681f3Smrg for (unsigned j = 0; j < library->library.stage_count; ++j) 697ec681f3Smrg stages[total_stages + j] = library->library.stages[j]; 707ec681f3Smrg for (unsigned j = 0; j < library->library.group_count; ++j) { 717ec681f3Smrg VkRayTracingShaderGroupCreateInfoKHR *dst = &groups[total_groups + j]; 727ec681f3Smrg *dst = library->library.groups[j]; 737ec681f3Smrg if (dst->generalShader != VK_SHADER_UNUSED_KHR) 747ec681f3Smrg dst->generalShader += total_stages; 757ec681f3Smrg if (dst->closestHitShader != VK_SHADER_UNUSED_KHR) 767ec681f3Smrg dst->closestHitShader += total_stages; 777ec681f3Smrg if (dst->anyHitShader != VK_SHADER_UNUSED_KHR) 787ec681f3Smrg dst->anyHitShader += total_stages; 797ec681f3Smrg if (dst->intersectionShader != VK_SHADER_UNUSED_KHR) 807ec681f3Smrg dst->intersectionShader += total_stages; 817ec681f3Smrg } 827ec681f3Smrg total_stages += library->library.stage_count; 837ec681f3Smrg total_groups += library->library.group_count; 847ec681f3Smrg } 857ec681f3Smrg } 867ec681f3Smrg return local_create_info; 877ec681f3Smrg} 887ec681f3Smrg 897ec681f3Smrgstatic VkResult 907ec681f3Smrgradv_rt_pipeline_library_create(VkDevice _device, VkPipelineCache _cache, 917ec681f3Smrg const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 927ec681f3Smrg const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) 937ec681f3Smrg{ 947ec681f3Smrg RADV_FROM_HANDLE(radv_device, device, _device); 957ec681f3Smrg struct radv_pipeline *pipeline; 967ec681f3Smrg 977ec681f3Smrg pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, 987ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 997ec681f3Smrg if (pipeline == NULL) 1007ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1017ec681f3Smrg 1027ec681f3Smrg vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); 1037ec681f3Smrg pipeline->type = RADV_PIPELINE_LIBRARY; 1047ec681f3Smrg 1057ec681f3Smrg VkRayTracingPipelineCreateInfoKHR local_create_info = 1067ec681f3Smrg radv_create_merged_rt_create_info(pCreateInfo); 1077ec681f3Smrg if (!local_create_info.pStages || !local_create_info.pGroups) 1087ec681f3Smrg goto fail; 1097ec681f3Smrg 1107ec681f3Smrg if (local_create_info.stageCount) { 1117ec681f3Smrg size_t size = sizeof(VkPipelineShaderStageCreateInfo) * local_create_info.stageCount; 1127ec681f3Smrg pipeline->library.stage_count = local_create_info.stageCount; 1137ec681f3Smrg pipeline->library.stages = malloc(size); 1147ec681f3Smrg if (!pipeline->library.stages) 1157ec681f3Smrg goto fail; 1167ec681f3Smrg memcpy(pipeline->library.stages, local_create_info.pStages, size); 1177ec681f3Smrg } 1187ec681f3Smrg 1197ec681f3Smrg if (local_create_info.groupCount) { 1207ec681f3Smrg size_t size = sizeof(VkRayTracingShaderGroupCreateInfoKHR) * local_create_info.groupCount; 1217ec681f3Smrg pipeline->library.group_count = local_create_info.groupCount; 1227ec681f3Smrg pipeline->library.groups = malloc(size); 1237ec681f3Smrg if (!pipeline->library.groups) 1247ec681f3Smrg goto fail; 1257ec681f3Smrg memcpy(pipeline->library.groups, local_create_info.pGroups, size); 1267ec681f3Smrg } 1277ec681f3Smrg 1287ec681f3Smrg *pPipeline = radv_pipeline_to_handle(pipeline); 1297ec681f3Smrg 1307ec681f3Smrg free((void *)local_create_info.pGroups); 1317ec681f3Smrg free((void *)local_create_info.pStages); 1327ec681f3Smrg return VK_SUCCESS; 1337ec681f3Smrgfail: 1347ec681f3Smrg free(pipeline->library.groups); 1357ec681f3Smrg free(pipeline->library.stages); 1367ec681f3Smrg free((void *)local_create_info.pGroups); 1377ec681f3Smrg free((void *)local_create_info.pStages); 1387ec681f3Smrg return VK_ERROR_OUT_OF_HOST_MEMORY; 1397ec681f3Smrg} 1407ec681f3Smrg 1417ec681f3Smrg/* 1427ec681f3Smrg * Global variables for an RT pipeline 1437ec681f3Smrg */ 1447ec681f3Smrgstruct rt_variables { 1457ec681f3Smrg /* idx of the next shader to run in the next iteration of the main loop */ 1467ec681f3Smrg nir_variable *idx; 1477ec681f3Smrg 1487ec681f3Smrg /* scratch offset of the argument area relative to stack_ptr */ 1497ec681f3Smrg nir_variable *arg; 1507ec681f3Smrg 1517ec681f3Smrg nir_variable *stack_ptr; 1527ec681f3Smrg 1537ec681f3Smrg /* global address of the SBT entry used for the shader */ 1547ec681f3Smrg nir_variable *shader_record_ptr; 1557ec681f3Smrg 1567ec681f3Smrg /* trace_ray arguments */ 1577ec681f3Smrg nir_variable *accel_struct; 1587ec681f3Smrg nir_variable *flags; 1597ec681f3Smrg nir_variable *cull_mask; 1607ec681f3Smrg nir_variable *sbt_offset; 1617ec681f3Smrg nir_variable *sbt_stride; 1627ec681f3Smrg nir_variable *miss_index; 1637ec681f3Smrg nir_variable *origin; 1647ec681f3Smrg nir_variable *tmin; 1657ec681f3Smrg nir_variable *direction; 1667ec681f3Smrg nir_variable *tmax; 1677ec681f3Smrg 1687ec681f3Smrg /* from the BTAS instance currently being visited */ 1697ec681f3Smrg nir_variable *custom_instance_and_mask; 1707ec681f3Smrg 1717ec681f3Smrg /* Properties of the primitive currently being visited. */ 1727ec681f3Smrg nir_variable *primitive_id; 1737ec681f3Smrg nir_variable *geometry_id_and_flags; 1747ec681f3Smrg nir_variable *instance_id; 1757ec681f3Smrg nir_variable *instance_addr; 1767ec681f3Smrg nir_variable *hit_kind; 1777ec681f3Smrg nir_variable *opaque; 1787ec681f3Smrg 1797ec681f3Smrg /* Safeguard to ensure we don't end up in an infinite loop of non-existing case. Should not be 1807ec681f3Smrg * needed but is extra anti-hang safety during bring-up. */ 1817ec681f3Smrg nir_variable *main_loop_case_visited; 1827ec681f3Smrg 1837ec681f3Smrg /* Output variable for intersection & anyhit shaders. */ 1847ec681f3Smrg nir_variable *ahit_status; 1857ec681f3Smrg 1867ec681f3Smrg /* Array of stack size struct for recording the max stack size for each group. */ 1877ec681f3Smrg struct radv_pipeline_shader_stack_size *stack_sizes; 1887ec681f3Smrg unsigned group_idx; 1897ec681f3Smrg}; 1907ec681f3Smrg 1917ec681f3Smrgstatic struct rt_variables 1927ec681f3Smrgcreate_rt_variables(nir_shader *shader, struct radv_pipeline_shader_stack_size *stack_sizes) 1937ec681f3Smrg{ 1947ec681f3Smrg struct rt_variables vars = { 1957ec681f3Smrg NULL, 1967ec681f3Smrg }; 1977ec681f3Smrg vars.idx = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "idx"); 1987ec681f3Smrg vars.arg = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "arg"); 1997ec681f3Smrg vars.stack_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "stack_ptr"); 2007ec681f3Smrg vars.shader_record_ptr = 2017ec681f3Smrg nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_record_ptr"); 2027ec681f3Smrg 2037ec681f3Smrg const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); 2047ec681f3Smrg vars.accel_struct = 2057ec681f3Smrg nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "accel_struct"); 2067ec681f3Smrg vars.flags = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "ray_flags"); 2077ec681f3Smrg vars.cull_mask = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "cull_mask"); 2087ec681f3Smrg vars.sbt_offset = 2097ec681f3Smrg nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_offset"); 2107ec681f3Smrg vars.sbt_stride = 2117ec681f3Smrg nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_stride"); 2127ec681f3Smrg vars.miss_index = 2137ec681f3Smrg nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "miss_index"); 2147ec681f3Smrg vars.origin = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_origin"); 2157ec681f3Smrg vars.tmin = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmin"); 2167ec681f3Smrg vars.direction = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_direction"); 2177ec681f3Smrg vars.tmax = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmax"); 2187ec681f3Smrg 2197ec681f3Smrg vars.custom_instance_and_mask = nir_variable_create( 2207ec681f3Smrg shader, nir_var_shader_temp, glsl_uint_type(), "custom_instance_and_mask"); 2217ec681f3Smrg vars.primitive_id = 2227ec681f3Smrg nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "primitive_id"); 2237ec681f3Smrg vars.geometry_id_and_flags = 2247ec681f3Smrg nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "geometry_id_and_flags"); 2257ec681f3Smrg vars.instance_id = 2267ec681f3Smrg nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "instance_id"); 2277ec681f3Smrg vars.instance_addr = 2287ec681f3Smrg nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr"); 2297ec681f3Smrg vars.hit_kind = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "hit_kind"); 2307ec681f3Smrg vars.opaque = nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "opaque"); 2317ec681f3Smrg 2327ec681f3Smrg vars.main_loop_case_visited = 2337ec681f3Smrg nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "main_loop_case_visited"); 2347ec681f3Smrg vars.ahit_status = 2357ec681f3Smrg nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "ahit_status"); 2367ec681f3Smrg 2377ec681f3Smrg vars.stack_sizes = stack_sizes; 2387ec681f3Smrg return vars; 2397ec681f3Smrg} 2407ec681f3Smrg 2417ec681f3Smrg/* 2427ec681f3Smrg * Remap all the variables between the two rt_variables struct for inlining. 2437ec681f3Smrg */ 2447ec681f3Smrgstatic void 2457ec681f3Smrgmap_rt_variables(struct hash_table *var_remap, struct rt_variables *src, 2467ec681f3Smrg const struct rt_variables *dst) 2477ec681f3Smrg{ 2487ec681f3Smrg _mesa_hash_table_insert(var_remap, src->idx, dst->idx); 2497ec681f3Smrg _mesa_hash_table_insert(var_remap, src->arg, dst->arg); 2507ec681f3Smrg _mesa_hash_table_insert(var_remap, src->stack_ptr, dst->stack_ptr); 2517ec681f3Smrg _mesa_hash_table_insert(var_remap, src->shader_record_ptr, dst->shader_record_ptr); 2527ec681f3Smrg 2537ec681f3Smrg _mesa_hash_table_insert(var_remap, src->accel_struct, dst->accel_struct); 2547ec681f3Smrg _mesa_hash_table_insert(var_remap, src->flags, dst->flags); 2557ec681f3Smrg _mesa_hash_table_insert(var_remap, src->cull_mask, dst->cull_mask); 2567ec681f3Smrg _mesa_hash_table_insert(var_remap, src->sbt_offset, dst->sbt_offset); 2577ec681f3Smrg _mesa_hash_table_insert(var_remap, src->sbt_stride, dst->sbt_stride); 2587ec681f3Smrg _mesa_hash_table_insert(var_remap, src->miss_index, dst->miss_index); 2597ec681f3Smrg _mesa_hash_table_insert(var_remap, src->origin, dst->origin); 2607ec681f3Smrg _mesa_hash_table_insert(var_remap, src->tmin, dst->tmin); 2617ec681f3Smrg _mesa_hash_table_insert(var_remap, src->direction, dst->direction); 2627ec681f3Smrg _mesa_hash_table_insert(var_remap, src->tmax, dst->tmax); 2637ec681f3Smrg 2647ec681f3Smrg _mesa_hash_table_insert(var_remap, src->custom_instance_and_mask, dst->custom_instance_and_mask); 2657ec681f3Smrg _mesa_hash_table_insert(var_remap, src->primitive_id, dst->primitive_id); 2667ec681f3Smrg _mesa_hash_table_insert(var_remap, src->geometry_id_and_flags, dst->geometry_id_and_flags); 2677ec681f3Smrg _mesa_hash_table_insert(var_remap, src->instance_id, dst->instance_id); 2687ec681f3Smrg _mesa_hash_table_insert(var_remap, src->instance_addr, dst->instance_addr); 2697ec681f3Smrg _mesa_hash_table_insert(var_remap, src->hit_kind, dst->hit_kind); 2707ec681f3Smrg _mesa_hash_table_insert(var_remap, src->opaque, dst->opaque); 2717ec681f3Smrg _mesa_hash_table_insert(var_remap, src->ahit_status, dst->ahit_status); 2727ec681f3Smrg 2737ec681f3Smrg src->stack_sizes = dst->stack_sizes; 2747ec681f3Smrg src->group_idx = dst->group_idx; 2757ec681f3Smrg} 2767ec681f3Smrg 2777ec681f3Smrg/* 2787ec681f3Smrg * Create a copy of the global rt variables where the primitive/instance related variables are 2797ec681f3Smrg * independent.This is needed as we need to keep the old values of the global variables around 2807ec681f3Smrg * in case e.g. an anyhit shader reject the collision. So there are inner variables that get copied 2817ec681f3Smrg * to the outer variables once we commit to a better hit. 2827ec681f3Smrg */ 2837ec681f3Smrgstatic struct rt_variables 2847ec681f3Smrgcreate_inner_vars(nir_builder *b, const struct rt_variables *vars) 2857ec681f3Smrg{ 2867ec681f3Smrg struct rt_variables inner_vars = *vars; 2877ec681f3Smrg inner_vars.idx = 2887ec681f3Smrg nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_idx"); 2897ec681f3Smrg inner_vars.shader_record_ptr = nir_variable_create( 2907ec681f3Smrg b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_shader_record_ptr"); 2917ec681f3Smrg inner_vars.primitive_id = 2927ec681f3Smrg nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_primitive_id"); 2937ec681f3Smrg inner_vars.geometry_id_and_flags = nir_variable_create( 2947ec681f3Smrg b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_geometry_id_and_flags"); 2957ec681f3Smrg inner_vars.tmax = 2967ec681f3Smrg nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "inner_tmax"); 2977ec681f3Smrg inner_vars.instance_id = 2987ec681f3Smrg nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_instance_id"); 2997ec681f3Smrg inner_vars.instance_addr = nir_variable_create(b->shader, nir_var_shader_temp, 3007ec681f3Smrg glsl_uint64_t_type(), "inner_instance_addr"); 3017ec681f3Smrg inner_vars.hit_kind = 3027ec681f3Smrg nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_hit_kind"); 3037ec681f3Smrg inner_vars.custom_instance_and_mask = nir_variable_create( 3047ec681f3Smrg b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_custom_instance_and_mask"); 3057ec681f3Smrg 3067ec681f3Smrg return inner_vars; 3077ec681f3Smrg} 3087ec681f3Smrg 3097ec681f3Smrg/* The hit attributes are stored on the stack. This is the offset compared to the current stack 3107ec681f3Smrg * pointer of where the hit attrib is stored. */ 3117ec681f3Smrgconst uint32_t RADV_HIT_ATTRIB_OFFSET = -(16 + RADV_MAX_HIT_ATTRIB_SIZE); 3127ec681f3Smrg 3137ec681f3Smrgstatic void 3147ec681f3Smrginsert_rt_return(nir_builder *b, const struct rt_variables *vars) 3157ec681f3Smrg{ 3167ec681f3Smrg nir_store_var(b, vars->stack_ptr, 3177ec681f3Smrg nir_iadd(b, nir_load_var(b, vars->stack_ptr), nir_imm_int(b, -16)), 1); 3187ec681f3Smrg nir_store_var(b, vars->idx, 3197ec681f3Smrg nir_load_scratch(b, 1, 32, nir_load_var(b, vars->stack_ptr), .align_mul = 16), 1); 3207ec681f3Smrg} 3217ec681f3Smrg 3227ec681f3Smrgenum sbt_type { 3237ec681f3Smrg SBT_RAYGEN, 3247ec681f3Smrg SBT_MISS, 3257ec681f3Smrg SBT_HIT, 3267ec681f3Smrg SBT_CALLABLE, 3277ec681f3Smrg}; 3287ec681f3Smrg 3297ec681f3Smrgstatic nir_ssa_def * 3307ec681f3Smrgget_sbt_ptr(nir_builder *b, nir_ssa_def *idx, enum sbt_type binding) 3317ec681f3Smrg{ 3327ec681f3Smrg nir_ssa_def *desc = nir_load_sbt_amd(b, 4, .binding = binding); 3337ec681f3Smrg nir_ssa_def *base_addr = nir_pack_64_2x32(b, nir_channels(b, desc, 0x3)); 3347ec681f3Smrg nir_ssa_def *stride = nir_channel(b, desc, 2); 3357ec681f3Smrg 3367ec681f3Smrg nir_ssa_def *ret = nir_imul(b, idx, stride); 3377ec681f3Smrg ret = nir_iadd(b, base_addr, nir_u2u64(b, ret)); 3387ec681f3Smrg 3397ec681f3Smrg return ret; 3407ec681f3Smrg} 3417ec681f3Smrg 3427ec681f3Smrgstatic void 3437ec681f3Smrgload_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_ssa_def *idx, 3447ec681f3Smrg enum sbt_type binding, unsigned offset) 3457ec681f3Smrg{ 3467ec681f3Smrg nir_ssa_def *addr = get_sbt_ptr(b, idx, binding); 3477ec681f3Smrg 3487ec681f3Smrg nir_ssa_def *load_addr = addr; 3497ec681f3Smrg if (offset) 3507ec681f3Smrg load_addr = nir_iadd(b, load_addr, nir_imm_int64(b, offset)); 3517ec681f3Smrg nir_ssa_def *v_idx = 3527ec681f3Smrg nir_build_load_global(b, 1, 32, load_addr, .align_mul = 4, .align_offset = 0); 3537ec681f3Smrg 3547ec681f3Smrg nir_store_var(b, vars->idx, v_idx, 1); 3557ec681f3Smrg 3567ec681f3Smrg nir_ssa_def *record_addr = nir_iadd(b, addr, nir_imm_int64(b, RADV_RT_HANDLE_SIZE)); 3577ec681f3Smrg nir_store_var(b, vars->shader_record_ptr, record_addr, 1); 3587ec681f3Smrg} 3597ec681f3Smrg 3607ec681f3Smrgstatic nir_ssa_def * 3617ec681f3Smrgnir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], bool translation) 3627ec681f3Smrg{ 3637ec681f3Smrg nir_ssa_def *result_components[3] = { 3647ec681f3Smrg nir_channel(b, matrix[0], 3), 3657ec681f3Smrg nir_channel(b, matrix[1], 3), 3667ec681f3Smrg nir_channel(b, matrix[2], 3), 3677ec681f3Smrg }; 3687ec681f3Smrg for (unsigned i = 0; i < 3; ++i) { 3697ec681f3Smrg for (unsigned j = 0; j < 3; ++j) { 3707ec681f3Smrg nir_ssa_def *v = 3717ec681f3Smrg nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j)); 3727ec681f3Smrg result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v; 3737ec681f3Smrg } 3747ec681f3Smrg } 3757ec681f3Smrg return nir_vec(b, result_components, 3); 3767ec681f3Smrg} 3777ec681f3Smrg 3787ec681f3Smrgstatic nir_ssa_def * 3797ec681f3Smrgnir_build_vec3_mat_mult_pre(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[]) 3807ec681f3Smrg{ 3817ec681f3Smrg nir_ssa_def *result_components[3] = { 3827ec681f3Smrg nir_channel(b, matrix[0], 3), 3837ec681f3Smrg nir_channel(b, matrix[1], 3), 3847ec681f3Smrg nir_channel(b, matrix[2], 3), 3857ec681f3Smrg }; 3867ec681f3Smrg return nir_build_vec3_mat_mult(b, nir_fsub(b, vec, nir_vec(b, result_components, 3)), matrix, 3877ec681f3Smrg false); 3887ec681f3Smrg} 3897ec681f3Smrg 3907ec681f3Smrgstatic void 3917ec681f3Smrgnir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out) 3927ec681f3Smrg{ 3937ec681f3Smrg unsigned offset = offsetof(struct radv_bvh_instance_node, wto_matrix); 3947ec681f3Smrg for (unsigned i = 0; i < 3; ++i) { 3957ec681f3Smrg out[i] = nir_build_load_global(b, 4, 32, 3967ec681f3Smrg nir_iadd(b, instance_addr, nir_imm_int64(b, offset + i * 16)), 3977ec681f3Smrg .align_mul = 64, .align_offset = offset + i * 16); 3987ec681f3Smrg } 3997ec681f3Smrg} 4007ec681f3Smrg 4017ec681f3Smrg/* This lowers all the RT instructions that we do not want to pass on to the combined shader and 4027ec681f3Smrg * that we can implement using the variables from the shader we are going to inline into. */ 4037ec681f3Smrgstatic void 4047ec681f3Smrglower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned call_idx_base) 4057ec681f3Smrg{ 4067ec681f3Smrg nir_builder b_shader; 4077ec681f3Smrg nir_builder_init(&b_shader, nir_shader_get_entrypoint(shader)); 4087ec681f3Smrg 4097ec681f3Smrg nir_foreach_block (block, nir_shader_get_entrypoint(shader)) { 4107ec681f3Smrg nir_foreach_instr_safe (instr, block) { 4117ec681f3Smrg switch (instr->type) { 4127ec681f3Smrg case nir_instr_type_intrinsic: { 4137ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 4147ec681f3Smrg switch (intr->intrinsic) { 4157ec681f3Smrg case nir_intrinsic_rt_execute_callable: { 4167ec681f3Smrg uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE; 4177ec681f3Smrg uint32_t ret = call_idx_base + nir_intrinsic_call_idx(intr) + 1; 4187ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 4197ec681f3Smrg 4207ec681f3Smrg nir_store_var(&b_shader, vars->stack_ptr, 4217ec681f3Smrg nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 4227ec681f3Smrg nir_imm_int(&b_shader, size)), 4237ec681f3Smrg 1); 4247ec681f3Smrg nir_store_scratch(&b_shader, nir_imm_int(&b_shader, ret), 4257ec681f3Smrg nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16, 4267ec681f3Smrg .write_mask = 1); 4277ec681f3Smrg 4287ec681f3Smrg nir_store_var(&b_shader, vars->stack_ptr, 4297ec681f3Smrg nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 4307ec681f3Smrg nir_imm_int(&b_shader, 16)), 4317ec681f3Smrg 1); 4327ec681f3Smrg load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_CALLABLE, 0); 4337ec681f3Smrg 4347ec681f3Smrg nir_store_var( 4357ec681f3Smrg &b_shader, vars->arg, 4367ec681f3Smrg nir_isub(&b_shader, intr->src[1].ssa, nir_imm_int(&b_shader, size + 16)), 1); 4377ec681f3Smrg 4387ec681f3Smrg vars->stack_sizes[vars->group_idx].recursive_size = 4397ec681f3Smrg MAX2(vars->stack_sizes[vars->group_idx].recursive_size, size + 16); 4407ec681f3Smrg break; 4417ec681f3Smrg } 4427ec681f3Smrg case nir_intrinsic_rt_trace_ray: { 4437ec681f3Smrg uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE; 4447ec681f3Smrg uint32_t ret = call_idx_base + nir_intrinsic_call_idx(intr) + 1; 4457ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 4467ec681f3Smrg 4477ec681f3Smrg nir_store_var(&b_shader, vars->stack_ptr, 4487ec681f3Smrg nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 4497ec681f3Smrg nir_imm_int(&b_shader, size)), 4507ec681f3Smrg 1); 4517ec681f3Smrg nir_store_scratch(&b_shader, nir_imm_int(&b_shader, ret), 4527ec681f3Smrg nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16, 4537ec681f3Smrg .write_mask = 1); 4547ec681f3Smrg 4557ec681f3Smrg nir_store_var(&b_shader, vars->stack_ptr, 4567ec681f3Smrg nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 4577ec681f3Smrg nir_imm_int(&b_shader, 16)), 4587ec681f3Smrg 1); 4597ec681f3Smrg 4607ec681f3Smrg nir_store_var(&b_shader, vars->idx, nir_imm_int(&b_shader, 1), 1); 4617ec681f3Smrg nir_store_var( 4627ec681f3Smrg &b_shader, vars->arg, 4637ec681f3Smrg nir_isub(&b_shader, intr->src[10].ssa, nir_imm_int(&b_shader, size + 16)), 1); 4647ec681f3Smrg 4657ec681f3Smrg vars->stack_sizes[vars->group_idx].recursive_size = 4667ec681f3Smrg MAX2(vars->stack_sizes[vars->group_idx].recursive_size, size + 16); 4677ec681f3Smrg 4687ec681f3Smrg /* Per the SPIR-V extension spec we have to ignore some bits for some arguments. */ 4697ec681f3Smrg nir_store_var(&b_shader, vars->accel_struct, intr->src[0].ssa, 0x1); 4707ec681f3Smrg nir_store_var(&b_shader, vars->flags, intr->src[1].ssa, 0x1); 4717ec681f3Smrg nir_store_var(&b_shader, vars->cull_mask, 4727ec681f3Smrg nir_iand(&b_shader, intr->src[2].ssa, nir_imm_int(&b_shader, 0xff)), 4737ec681f3Smrg 0x1); 4747ec681f3Smrg nir_store_var(&b_shader, vars->sbt_offset, 4757ec681f3Smrg nir_iand(&b_shader, intr->src[3].ssa, nir_imm_int(&b_shader, 0xf)), 4767ec681f3Smrg 0x1); 4777ec681f3Smrg nir_store_var(&b_shader, vars->sbt_stride, 4787ec681f3Smrg nir_iand(&b_shader, intr->src[4].ssa, nir_imm_int(&b_shader, 0xf)), 4797ec681f3Smrg 0x1); 4807ec681f3Smrg nir_store_var(&b_shader, vars->miss_index, 4817ec681f3Smrg nir_iand(&b_shader, intr->src[5].ssa, nir_imm_int(&b_shader, 0xffff)), 4827ec681f3Smrg 0x1); 4837ec681f3Smrg nir_store_var(&b_shader, vars->origin, intr->src[6].ssa, 0x7); 4847ec681f3Smrg nir_store_var(&b_shader, vars->tmin, intr->src[7].ssa, 0x1); 4857ec681f3Smrg nir_store_var(&b_shader, vars->direction, intr->src[8].ssa, 0x7); 4867ec681f3Smrg nir_store_var(&b_shader, vars->tmax, intr->src[9].ssa, 0x1); 4877ec681f3Smrg break; 4887ec681f3Smrg } 4897ec681f3Smrg case nir_intrinsic_rt_resume: { 4907ec681f3Smrg uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE; 4917ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 4927ec681f3Smrg 4937ec681f3Smrg nir_store_var(&b_shader, vars->stack_ptr, 4947ec681f3Smrg nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), 4957ec681f3Smrg nir_imm_int(&b_shader, -size)), 4967ec681f3Smrg 1); 4977ec681f3Smrg break; 4987ec681f3Smrg } 4997ec681f3Smrg case nir_intrinsic_rt_return_amd: { 5007ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 5017ec681f3Smrg 5027ec681f3Smrg if (shader->info.stage == MESA_SHADER_RAYGEN) { 5037ec681f3Smrg nir_store_var(&b_shader, vars->idx, nir_imm_int(&b_shader, 0), 1); 5047ec681f3Smrg break; 5057ec681f3Smrg } 5067ec681f3Smrg insert_rt_return(&b_shader, vars); 5077ec681f3Smrg break; 5087ec681f3Smrg } 5097ec681f3Smrg case nir_intrinsic_load_scratch: { 5107ec681f3Smrg b_shader.cursor = nir_before_instr(instr); 5117ec681f3Smrg nir_instr_rewrite_src_ssa( 5127ec681f3Smrg instr, &intr->src[0], 5137ec681f3Smrg nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[0].ssa)); 5147ec681f3Smrg break; 5157ec681f3Smrg } 5167ec681f3Smrg case nir_intrinsic_store_scratch: { 5177ec681f3Smrg b_shader.cursor = nir_before_instr(instr); 5187ec681f3Smrg nir_instr_rewrite_src_ssa( 5197ec681f3Smrg instr, &intr->src[1], 5207ec681f3Smrg nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[1].ssa)); 5217ec681f3Smrg break; 5227ec681f3Smrg } 5237ec681f3Smrg case nir_intrinsic_load_rt_arg_scratch_offset_amd: { 5247ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 5257ec681f3Smrg nir_ssa_def *ret = nir_load_var(&b_shader, vars->arg); 5267ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 5277ec681f3Smrg break; 5287ec681f3Smrg } 5297ec681f3Smrg case nir_intrinsic_load_shader_record_ptr: { 5307ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 5317ec681f3Smrg nir_ssa_def *ret = nir_load_var(&b_shader, vars->shader_record_ptr); 5327ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 5337ec681f3Smrg break; 5347ec681f3Smrg } 5357ec681f3Smrg case nir_intrinsic_load_ray_launch_id: { 5367ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 5377ec681f3Smrg nir_ssa_def *ret = nir_load_global_invocation_id(&b_shader, 32); 5387ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 5397ec681f3Smrg break; 5407ec681f3Smrg } 5417ec681f3Smrg case nir_intrinsic_load_ray_t_min: { 5427ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 5437ec681f3Smrg nir_ssa_def *ret = nir_load_var(&b_shader, vars->tmin); 5447ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 5457ec681f3Smrg break; 5467ec681f3Smrg } 5477ec681f3Smrg case nir_intrinsic_load_ray_t_max: { 5487ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 5497ec681f3Smrg nir_ssa_def *ret = nir_load_var(&b_shader, vars->tmax); 5507ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 5517ec681f3Smrg break; 5527ec681f3Smrg } 5537ec681f3Smrg case nir_intrinsic_load_ray_world_origin: { 5547ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 5557ec681f3Smrg nir_ssa_def *ret = nir_load_var(&b_shader, vars->origin); 5567ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 5577ec681f3Smrg break; 5587ec681f3Smrg } 5597ec681f3Smrg case nir_intrinsic_load_ray_world_direction: { 5607ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 5617ec681f3Smrg nir_ssa_def *ret = nir_load_var(&b_shader, vars->direction); 5627ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 5637ec681f3Smrg break; 5647ec681f3Smrg } 5657ec681f3Smrg case nir_intrinsic_load_ray_instance_custom_index: { 5667ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 5677ec681f3Smrg nir_ssa_def *ret = nir_load_var(&b_shader, vars->custom_instance_and_mask); 5687ec681f3Smrg ret = nir_iand(&b_shader, ret, nir_imm_int(&b_shader, 0xFFFFFF)); 5697ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 5707ec681f3Smrg break; 5717ec681f3Smrg } 5727ec681f3Smrg case nir_intrinsic_load_primitive_id: { 5737ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 5747ec681f3Smrg nir_ssa_def *ret = nir_load_var(&b_shader, vars->primitive_id); 5757ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 5767ec681f3Smrg break; 5777ec681f3Smrg } 5787ec681f3Smrg case nir_intrinsic_load_ray_geometry_index: { 5797ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 5807ec681f3Smrg nir_ssa_def *ret = nir_load_var(&b_shader, vars->geometry_id_and_flags); 5817ec681f3Smrg ret = nir_iand(&b_shader, ret, nir_imm_int(&b_shader, 0xFFFFFFF)); 5827ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 5837ec681f3Smrg break; 5847ec681f3Smrg } 5857ec681f3Smrg case nir_intrinsic_load_instance_id: { 5867ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 5877ec681f3Smrg nir_ssa_def *ret = nir_load_var(&b_shader, vars->instance_id); 5887ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 5897ec681f3Smrg break; 5907ec681f3Smrg } 5917ec681f3Smrg case nir_intrinsic_load_ray_flags: { 5927ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 5937ec681f3Smrg nir_ssa_def *ret = nir_load_var(&b_shader, vars->flags); 5947ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 5957ec681f3Smrg break; 5967ec681f3Smrg } 5977ec681f3Smrg case nir_intrinsic_load_ray_hit_kind: { 5987ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 5997ec681f3Smrg nir_ssa_def *ret = nir_load_var(&b_shader, vars->hit_kind); 6007ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 6017ec681f3Smrg break; 6027ec681f3Smrg } 6037ec681f3Smrg case nir_intrinsic_load_ray_world_to_object: { 6047ec681f3Smrg unsigned c = nir_intrinsic_column(intr); 6057ec681f3Smrg nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); 6067ec681f3Smrg nir_ssa_def *wto_matrix[3]; 6077ec681f3Smrg nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); 6087ec681f3Smrg 6097ec681f3Smrg nir_ssa_def *vals[3]; 6107ec681f3Smrg for (unsigned i = 0; i < 3; ++i) 6117ec681f3Smrg vals[i] = nir_channel(&b_shader, wto_matrix[i], c); 6127ec681f3Smrg 6137ec681f3Smrg nir_ssa_def *val = nir_vec(&b_shader, vals, 3); 6147ec681f3Smrg if (c == 3) 6157ec681f3Smrg val = nir_fneg(&b_shader, 6167ec681f3Smrg nir_build_vec3_mat_mult(&b_shader, val, wto_matrix, false)); 6177ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 6187ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, val); 6197ec681f3Smrg break; 6207ec681f3Smrg } 6217ec681f3Smrg case nir_intrinsic_load_ray_object_to_world: { 6227ec681f3Smrg unsigned c = nir_intrinsic_column(intr); 6237ec681f3Smrg nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); 6247ec681f3Smrg nir_ssa_def *val; 6257ec681f3Smrg if (c == 3) { 6267ec681f3Smrg nir_ssa_def *wto_matrix[3]; 6277ec681f3Smrg nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); 6287ec681f3Smrg 6297ec681f3Smrg nir_ssa_def *vals[3]; 6307ec681f3Smrg for (unsigned i = 0; i < 3; ++i) 6317ec681f3Smrg vals[i] = nir_channel(&b_shader, wto_matrix[i], c); 6327ec681f3Smrg 6337ec681f3Smrg val = nir_vec(&b_shader, vals, 3); 6347ec681f3Smrg } else { 6357ec681f3Smrg val = nir_build_load_global( 6367ec681f3Smrg &b_shader, 3, 32, 6377ec681f3Smrg nir_iadd(&b_shader, instance_node_addr, nir_imm_int64(&b_shader, 92 + c * 12)), 6387ec681f3Smrg .align_mul = 4, .align_offset = 0); 6397ec681f3Smrg } 6407ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 6417ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, val); 6427ec681f3Smrg break; 6437ec681f3Smrg } 6447ec681f3Smrg case nir_intrinsic_load_ray_object_origin: { 6457ec681f3Smrg nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); 6467ec681f3Smrg nir_ssa_def *wto_matrix[] = { 6477ec681f3Smrg nir_build_load_global( 6487ec681f3Smrg &b_shader, 4, 32, 6497ec681f3Smrg nir_iadd(&b_shader, instance_node_addr, nir_imm_int64(&b_shader, 16)), 6507ec681f3Smrg .align_mul = 64, .align_offset = 16), 6517ec681f3Smrg nir_build_load_global( 6527ec681f3Smrg &b_shader, 4, 32, 6537ec681f3Smrg nir_iadd(&b_shader, instance_node_addr, nir_imm_int64(&b_shader, 32)), 6547ec681f3Smrg .align_mul = 64, .align_offset = 32), 6557ec681f3Smrg nir_build_load_global( 6567ec681f3Smrg &b_shader, 4, 32, 6577ec681f3Smrg nir_iadd(&b_shader, instance_node_addr, nir_imm_int64(&b_shader, 48)), 6587ec681f3Smrg .align_mul = 64, .align_offset = 48)}; 6597ec681f3Smrg nir_ssa_def *val = nir_build_vec3_mat_mult_pre( 6607ec681f3Smrg &b_shader, nir_load_var(&b_shader, vars->origin), wto_matrix); 6617ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 6627ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, val); 6637ec681f3Smrg break; 6647ec681f3Smrg } 6657ec681f3Smrg case nir_intrinsic_load_ray_object_direction: { 6667ec681f3Smrg nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); 6677ec681f3Smrg nir_ssa_def *wto_matrix[3]; 6687ec681f3Smrg nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); 6697ec681f3Smrg nir_ssa_def *val = nir_build_vec3_mat_mult( 6707ec681f3Smrg &b_shader, nir_load_var(&b_shader, vars->direction), wto_matrix, false); 6717ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 6727ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, val); 6737ec681f3Smrg break; 6747ec681f3Smrg } 6757ec681f3Smrg case nir_intrinsic_load_intersection_opaque_amd: { 6767ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 6777ec681f3Smrg nir_ssa_def *ret = nir_load_var(&b_shader, vars->opaque); 6787ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); 6797ec681f3Smrg break; 6807ec681f3Smrg } 6817ec681f3Smrg case nir_intrinsic_ignore_ray_intersection: { 6827ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 6837ec681f3Smrg nir_store_var(&b_shader, vars->ahit_status, nir_imm_int(&b_shader, 1), 1); 6847ec681f3Smrg 6857ec681f3Smrg /* The if is a workaround to avoid having to fix up control flow manually */ 6867ec681f3Smrg nir_push_if(&b_shader, nir_imm_true(&b_shader)); 6877ec681f3Smrg nir_jump(&b_shader, nir_jump_return); 6887ec681f3Smrg nir_pop_if(&b_shader, NULL); 6897ec681f3Smrg break; 6907ec681f3Smrg } 6917ec681f3Smrg case nir_intrinsic_terminate_ray: { 6927ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 6937ec681f3Smrg nir_store_var(&b_shader, vars->ahit_status, nir_imm_int(&b_shader, 2), 1); 6947ec681f3Smrg 6957ec681f3Smrg /* The if is a workaround to avoid having to fix up control flow manually */ 6967ec681f3Smrg nir_push_if(&b_shader, nir_imm_true(&b_shader)); 6977ec681f3Smrg nir_jump(&b_shader, nir_jump_return); 6987ec681f3Smrg nir_pop_if(&b_shader, NULL); 6997ec681f3Smrg break; 7007ec681f3Smrg } 7017ec681f3Smrg case nir_intrinsic_report_ray_intersection: { 7027ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 7037ec681f3Smrg nir_push_if( 7047ec681f3Smrg &b_shader, 7057ec681f3Smrg nir_iand( 7067ec681f3Smrg &b_shader, 7077ec681f3Smrg nir_flt(&b_shader, intr->src[0].ssa, nir_load_var(&b_shader, vars->tmax)), 7087ec681f3Smrg nir_fge(&b_shader, intr->src[0].ssa, nir_load_var(&b_shader, vars->tmin)))); 7097ec681f3Smrg { 7107ec681f3Smrg nir_store_var(&b_shader, vars->ahit_status, nir_imm_int(&b_shader, 0), 1); 7117ec681f3Smrg nir_store_var(&b_shader, vars->tmax, intr->src[0].ssa, 1); 7127ec681f3Smrg nir_store_var(&b_shader, vars->hit_kind, intr->src[1].ssa, 1); 7137ec681f3Smrg } 7147ec681f3Smrg nir_pop_if(&b_shader, NULL); 7157ec681f3Smrg break; 7167ec681f3Smrg } 7177ec681f3Smrg default: 7187ec681f3Smrg break; 7197ec681f3Smrg } 7207ec681f3Smrg break; 7217ec681f3Smrg } 7227ec681f3Smrg case nir_instr_type_jump: { 7237ec681f3Smrg nir_jump_instr *jump = nir_instr_as_jump(instr); 7247ec681f3Smrg if (jump->type == nir_jump_halt) { 7257ec681f3Smrg b_shader.cursor = nir_instr_remove(instr); 7267ec681f3Smrg nir_jump(&b_shader, nir_jump_return); 7277ec681f3Smrg } 7287ec681f3Smrg break; 7297ec681f3Smrg } 7307ec681f3Smrg default: 7317ec681f3Smrg break; 7327ec681f3Smrg } 7337ec681f3Smrg } 7347ec681f3Smrg } 7357ec681f3Smrg 7367ec681f3Smrg nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_none); 7377ec681f3Smrg} 7387ec681f3Smrg 7397ec681f3Smrgstatic void 7407ec681f3Smrginsert_rt_case(nir_builder *b, nir_shader *shader, const struct rt_variables *vars, 7417ec681f3Smrg nir_ssa_def *idx, uint32_t call_idx_base, uint32_t call_idx) 7427ec681f3Smrg{ 7437ec681f3Smrg struct hash_table *var_remap = _mesa_pointer_hash_table_create(NULL); 7447ec681f3Smrg 7457ec681f3Smrg nir_opt_dead_cf(shader); 7467ec681f3Smrg 7477ec681f3Smrg struct rt_variables src_vars = create_rt_variables(shader, vars->stack_sizes); 7487ec681f3Smrg map_rt_variables(var_remap, &src_vars, vars); 7497ec681f3Smrg 7507ec681f3Smrg NIR_PASS_V(shader, lower_rt_instructions, &src_vars, call_idx_base); 7517ec681f3Smrg 7527ec681f3Smrg NIR_PASS_V(shader, nir_opt_remove_phis); 7537ec681f3Smrg NIR_PASS_V(shader, nir_lower_returns); 7547ec681f3Smrg NIR_PASS_V(shader, nir_opt_dce); 7557ec681f3Smrg 7567ec681f3Smrg if (b->shader->info.stage == MESA_SHADER_ANY_HIT || 7577ec681f3Smrg b->shader->info.stage == MESA_SHADER_INTERSECTION) { 7587ec681f3Smrg src_vars.stack_sizes[src_vars.group_idx].non_recursive_size = 7597ec681f3Smrg MAX2(src_vars.stack_sizes[src_vars.group_idx].non_recursive_size, shader->scratch_size); 7607ec681f3Smrg } else { 7617ec681f3Smrg src_vars.stack_sizes[src_vars.group_idx].recursive_size = 7627ec681f3Smrg MAX2(src_vars.stack_sizes[src_vars.group_idx].recursive_size, shader->scratch_size); 7637ec681f3Smrg } 7647ec681f3Smrg 7657ec681f3Smrg nir_push_if(b, nir_ieq(b, idx, nir_imm_int(b, call_idx))); 7667ec681f3Smrg nir_store_var(b, vars->main_loop_case_visited, nir_imm_bool(b, true), 1); 7677ec681f3Smrg nir_inline_function_impl(b, nir_shader_get_entrypoint(shader), NULL, var_remap); 7687ec681f3Smrg nir_pop_if(b, NULL); 7697ec681f3Smrg 7707ec681f3Smrg /* Adopt the instructions from the source shader, since they are merely moved, not cloned. */ 7717ec681f3Smrg ralloc_adopt(ralloc_context(b->shader), ralloc_context(shader)); 7727ec681f3Smrg 7737ec681f3Smrg ralloc_free(var_remap); 7747ec681f3Smrg} 7757ec681f3Smrg 7767ec681f3Smrgstatic bool 7777ec681f3Smrglower_rt_derefs(nir_shader *shader) 7787ec681f3Smrg{ 7797ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(shader); 7807ec681f3Smrg 7817ec681f3Smrg bool progress = false; 7827ec681f3Smrg 7837ec681f3Smrg nir_builder b; 7847ec681f3Smrg nir_builder_init(&b, impl); 7857ec681f3Smrg 7867ec681f3Smrg b.cursor = nir_before_cf_list(&impl->body); 7877ec681f3Smrg nir_ssa_def *arg_offset = nir_load_rt_arg_scratch_offset_amd(&b); 7887ec681f3Smrg 7897ec681f3Smrg nir_foreach_block (block, impl) { 7907ec681f3Smrg nir_foreach_instr_safe (instr, block) { 7917ec681f3Smrg switch (instr->type) { 7927ec681f3Smrg case nir_instr_type_deref: { 7937ec681f3Smrg if (instr->type != nir_instr_type_deref) 7947ec681f3Smrg continue; 7957ec681f3Smrg 7967ec681f3Smrg nir_deref_instr *deref = nir_instr_as_deref(instr); 7977ec681f3Smrg if (nir_deref_mode_is(deref, nir_var_shader_call_data)) { 7987ec681f3Smrg deref->modes = nir_var_function_temp; 7997ec681f3Smrg if (deref->deref_type == nir_deref_type_var) { 8007ec681f3Smrg b.cursor = nir_before_instr(&deref->instr); 8017ec681f3Smrg nir_deref_instr *cast = nir_build_deref_cast( 8027ec681f3Smrg &b, arg_offset, nir_var_function_temp, deref->var->type, 0); 8037ec681f3Smrg nir_ssa_def_rewrite_uses(&deref->dest.ssa, &cast->dest.ssa); 8047ec681f3Smrg nir_instr_remove(&deref->instr); 8057ec681f3Smrg } 8067ec681f3Smrg progress = true; 8077ec681f3Smrg } else if (nir_deref_mode_is(deref, nir_var_ray_hit_attrib)) { 8087ec681f3Smrg deref->modes = nir_var_function_temp; 8097ec681f3Smrg if (deref->deref_type == nir_deref_type_var) { 8107ec681f3Smrg b.cursor = nir_before_instr(&deref->instr); 8117ec681f3Smrg nir_deref_instr *cast = 8127ec681f3Smrg nir_build_deref_cast(&b, nir_imm_int(&b, RADV_HIT_ATTRIB_OFFSET), 8137ec681f3Smrg nir_var_function_temp, deref->type, 0); 8147ec681f3Smrg nir_ssa_def_rewrite_uses(&deref->dest.ssa, &cast->dest.ssa); 8157ec681f3Smrg nir_instr_remove(&deref->instr); 8167ec681f3Smrg } 8177ec681f3Smrg progress = true; 8187ec681f3Smrg } 8197ec681f3Smrg break; 8207ec681f3Smrg } 8217ec681f3Smrg default: 8227ec681f3Smrg break; 8237ec681f3Smrg } 8247ec681f3Smrg } 8257ec681f3Smrg } 8267ec681f3Smrg 8277ec681f3Smrg if (progress) { 8287ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); 8297ec681f3Smrg } else { 8307ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_all); 8317ec681f3Smrg } 8327ec681f3Smrg 8337ec681f3Smrg return progress; 8347ec681f3Smrg} 8357ec681f3Smrg 8367ec681f3Smrgstatic gl_shader_stage 8377ec681f3Smrgconvert_rt_stage(VkShaderStageFlagBits vk_stage) 8387ec681f3Smrg{ 8397ec681f3Smrg switch (vk_stage) { 8407ec681f3Smrg case VK_SHADER_STAGE_RAYGEN_BIT_KHR: 8417ec681f3Smrg return MESA_SHADER_RAYGEN; 8427ec681f3Smrg case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: 8437ec681f3Smrg return MESA_SHADER_ANY_HIT; 8447ec681f3Smrg case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: 8457ec681f3Smrg return MESA_SHADER_CLOSEST_HIT; 8467ec681f3Smrg case VK_SHADER_STAGE_MISS_BIT_KHR: 8477ec681f3Smrg return MESA_SHADER_MISS; 8487ec681f3Smrg case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: 8497ec681f3Smrg return MESA_SHADER_INTERSECTION; 8507ec681f3Smrg case VK_SHADER_STAGE_CALLABLE_BIT_KHR: 8517ec681f3Smrg return MESA_SHADER_CALLABLE; 8527ec681f3Smrg default: 8537ec681f3Smrg unreachable("Unhandled RT stage"); 8547ec681f3Smrg } 8557ec681f3Smrg} 8567ec681f3Smrg 8577ec681f3Smrgstatic nir_shader * 8587ec681f3Smrgparse_rt_stage(struct radv_device *device, struct radv_pipeline_layout *layout, 8597ec681f3Smrg const VkPipelineShaderStageCreateInfo *stage) 8607ec681f3Smrg{ 8617ec681f3Smrg struct radv_pipeline_key key; 8627ec681f3Smrg memset(&key, 0, sizeof(key)); 8637ec681f3Smrg 8647ec681f3Smrg nir_shader *shader = radv_shader_compile_to_nir( 8657ec681f3Smrg device, vk_shader_module_from_handle(stage->module), stage->pName, 8667ec681f3Smrg convert_rt_stage(stage->stage), stage->pSpecializationInfo, layout, &key); 8677ec681f3Smrg 8687ec681f3Smrg if (shader->info.stage == MESA_SHADER_RAYGEN || shader->info.stage == MESA_SHADER_CLOSEST_HIT || 8697ec681f3Smrg shader->info.stage == MESA_SHADER_CALLABLE || shader->info.stage == MESA_SHADER_MISS) { 8707ec681f3Smrg nir_block *last_block = nir_impl_last_block(nir_shader_get_entrypoint(shader)); 8717ec681f3Smrg nir_builder b_inner; 8727ec681f3Smrg nir_builder_init(&b_inner, nir_shader_get_entrypoint(shader)); 8737ec681f3Smrg b_inner.cursor = nir_after_block(last_block); 8747ec681f3Smrg nir_rt_return_amd(&b_inner); 8757ec681f3Smrg } 8767ec681f3Smrg 8777ec681f3Smrg NIR_PASS_V(shader, nir_lower_vars_to_explicit_types, 8787ec681f3Smrg nir_var_function_temp | nir_var_shader_call_data | nir_var_ray_hit_attrib, 8797ec681f3Smrg glsl_get_natural_size_align_bytes); 8807ec681f3Smrg 8817ec681f3Smrg NIR_PASS_V(shader, lower_rt_derefs); 8827ec681f3Smrg 8837ec681f3Smrg NIR_PASS_V(shader, nir_lower_explicit_io, nir_var_function_temp, 8847ec681f3Smrg nir_address_format_32bit_offset); 8857ec681f3Smrg 8867ec681f3Smrg return shader; 8877ec681f3Smrg} 8887ec681f3Smrg 8897ec681f3Smrgstatic nir_function_impl * 8907ec681f3Smrglower_any_hit_for_intersection(nir_shader *any_hit) 8917ec681f3Smrg{ 8927ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(any_hit); 8937ec681f3Smrg 8947ec681f3Smrg /* Any-hit shaders need three parameters */ 8957ec681f3Smrg assert(impl->function->num_params == 0); 8967ec681f3Smrg nir_parameter params[] = { 8977ec681f3Smrg { 8987ec681f3Smrg /* A pointer to a boolean value for whether or not the hit was 8997ec681f3Smrg * accepted. 9007ec681f3Smrg */ 9017ec681f3Smrg .num_components = 1, 9027ec681f3Smrg .bit_size = 32, 9037ec681f3Smrg }, 9047ec681f3Smrg { 9057ec681f3Smrg /* The hit T value */ 9067ec681f3Smrg .num_components = 1, 9077ec681f3Smrg .bit_size = 32, 9087ec681f3Smrg }, 9097ec681f3Smrg { 9107ec681f3Smrg /* The hit kind */ 9117ec681f3Smrg .num_components = 1, 9127ec681f3Smrg .bit_size = 32, 9137ec681f3Smrg }, 9147ec681f3Smrg }; 9157ec681f3Smrg impl->function->num_params = ARRAY_SIZE(params); 9167ec681f3Smrg impl->function->params = ralloc_array(any_hit, nir_parameter, ARRAY_SIZE(params)); 9177ec681f3Smrg memcpy(impl->function->params, params, sizeof(params)); 9187ec681f3Smrg 9197ec681f3Smrg nir_builder build; 9207ec681f3Smrg nir_builder_init(&build, impl); 9217ec681f3Smrg nir_builder *b = &build; 9227ec681f3Smrg 9237ec681f3Smrg b->cursor = nir_before_cf_list(&impl->body); 9247ec681f3Smrg 9257ec681f3Smrg nir_ssa_def *commit_ptr = nir_load_param(b, 0); 9267ec681f3Smrg nir_ssa_def *hit_t = nir_load_param(b, 1); 9277ec681f3Smrg nir_ssa_def *hit_kind = nir_load_param(b, 2); 9287ec681f3Smrg 9297ec681f3Smrg nir_deref_instr *commit = 9307ec681f3Smrg nir_build_deref_cast(b, commit_ptr, nir_var_function_temp, glsl_bool_type(), 0); 9317ec681f3Smrg 9327ec681f3Smrg nir_foreach_block_safe (block, impl) { 9337ec681f3Smrg nir_foreach_instr_safe (instr, block) { 9347ec681f3Smrg switch (instr->type) { 9357ec681f3Smrg case nir_instr_type_intrinsic: { 9367ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 9377ec681f3Smrg switch (intrin->intrinsic) { 9387ec681f3Smrg case nir_intrinsic_ignore_ray_intersection: 9397ec681f3Smrg b->cursor = nir_instr_remove(&intrin->instr); 9407ec681f3Smrg /* We put the newly emitted code inside a dummy if because it's 9417ec681f3Smrg * going to contain a jump instruction and we don't want to 9427ec681f3Smrg * deal with that mess here. It'll get dealt with by our 9437ec681f3Smrg * control-flow optimization passes. 9447ec681f3Smrg */ 9457ec681f3Smrg nir_store_deref(b, commit, nir_imm_false(b), 0x1); 9467ec681f3Smrg nir_push_if(b, nir_imm_true(b)); 9477ec681f3Smrg nir_jump(b, nir_jump_halt); 9487ec681f3Smrg nir_pop_if(b, NULL); 9497ec681f3Smrg break; 9507ec681f3Smrg 9517ec681f3Smrg case nir_intrinsic_terminate_ray: 9527ec681f3Smrg /* The "normal" handling of terminateRay works fine in 9537ec681f3Smrg * intersection shaders. 9547ec681f3Smrg */ 9557ec681f3Smrg break; 9567ec681f3Smrg 9577ec681f3Smrg case nir_intrinsic_load_ray_t_max: 9587ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, hit_t); 9597ec681f3Smrg nir_instr_remove(&intrin->instr); 9607ec681f3Smrg break; 9617ec681f3Smrg 9627ec681f3Smrg case nir_intrinsic_load_ray_hit_kind: 9637ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, hit_kind); 9647ec681f3Smrg nir_instr_remove(&intrin->instr); 9657ec681f3Smrg break; 9667ec681f3Smrg 9677ec681f3Smrg default: 9687ec681f3Smrg break; 9697ec681f3Smrg } 9707ec681f3Smrg break; 9717ec681f3Smrg } 9727ec681f3Smrg case nir_instr_type_jump: { 9737ec681f3Smrg nir_jump_instr *jump = nir_instr_as_jump(instr); 9747ec681f3Smrg if (jump->type == nir_jump_halt) { 9757ec681f3Smrg b->cursor = nir_instr_remove(instr); 9767ec681f3Smrg nir_jump(b, nir_jump_return); 9777ec681f3Smrg } 9787ec681f3Smrg break; 9797ec681f3Smrg } 9807ec681f3Smrg 9817ec681f3Smrg default: 9827ec681f3Smrg break; 9837ec681f3Smrg } 9847ec681f3Smrg } 9857ec681f3Smrg } 9867ec681f3Smrg 9877ec681f3Smrg nir_validate_shader(any_hit, "after initial any-hit lowering"); 9887ec681f3Smrg 9897ec681f3Smrg nir_lower_returns_impl(impl); 9907ec681f3Smrg 9917ec681f3Smrg nir_validate_shader(any_hit, "after lowering returns"); 9927ec681f3Smrg 9937ec681f3Smrg return impl; 9947ec681f3Smrg} 9957ec681f3Smrg 9967ec681f3Smrg/* Inline the any_hit shader into the intersection shader so we don't have 9977ec681f3Smrg * to implement yet another shader call interface here. Neither do any recursion. 9987ec681f3Smrg */ 9997ec681f3Smrgstatic void 10007ec681f3Smrgnir_lower_intersection_shader(nir_shader *intersection, nir_shader *any_hit) 10017ec681f3Smrg{ 10027ec681f3Smrg void *dead_ctx = ralloc_context(intersection); 10037ec681f3Smrg 10047ec681f3Smrg nir_function_impl *any_hit_impl = NULL; 10057ec681f3Smrg struct hash_table *any_hit_var_remap = NULL; 10067ec681f3Smrg if (any_hit) { 10077ec681f3Smrg any_hit = nir_shader_clone(dead_ctx, any_hit); 10087ec681f3Smrg NIR_PASS_V(any_hit, nir_opt_dce); 10097ec681f3Smrg any_hit_impl = lower_any_hit_for_intersection(any_hit); 10107ec681f3Smrg any_hit_var_remap = _mesa_pointer_hash_table_create(dead_ctx); 10117ec681f3Smrg } 10127ec681f3Smrg 10137ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(intersection); 10147ec681f3Smrg 10157ec681f3Smrg nir_builder build; 10167ec681f3Smrg nir_builder_init(&build, impl); 10177ec681f3Smrg nir_builder *b = &build; 10187ec681f3Smrg 10197ec681f3Smrg b->cursor = nir_before_cf_list(&impl->body); 10207ec681f3Smrg 10217ec681f3Smrg nir_variable *commit = nir_local_variable_create(impl, glsl_bool_type(), "ray_commit"); 10227ec681f3Smrg nir_store_var(b, commit, nir_imm_false(b), 0x1); 10237ec681f3Smrg 10247ec681f3Smrg nir_foreach_block_safe (block, impl) { 10257ec681f3Smrg nir_foreach_instr_safe (instr, block) { 10267ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 10277ec681f3Smrg continue; 10287ec681f3Smrg 10297ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 10307ec681f3Smrg if (intrin->intrinsic != nir_intrinsic_report_ray_intersection) 10317ec681f3Smrg continue; 10327ec681f3Smrg 10337ec681f3Smrg b->cursor = nir_instr_remove(&intrin->instr); 10347ec681f3Smrg nir_ssa_def *hit_t = nir_ssa_for_src(b, intrin->src[0], 1); 10357ec681f3Smrg nir_ssa_def *hit_kind = nir_ssa_for_src(b, intrin->src[1], 1); 10367ec681f3Smrg nir_ssa_def *min_t = nir_load_ray_t_min(b); 10377ec681f3Smrg nir_ssa_def *max_t = nir_load_ray_t_max(b); 10387ec681f3Smrg 10397ec681f3Smrg /* bool commit_tmp = false; */ 10407ec681f3Smrg nir_variable *commit_tmp = nir_local_variable_create(impl, glsl_bool_type(), "commit_tmp"); 10417ec681f3Smrg nir_store_var(b, commit_tmp, nir_imm_false(b), 0x1); 10427ec681f3Smrg 10437ec681f3Smrg nir_push_if(b, nir_iand(b, nir_fge(b, hit_t, min_t), nir_fge(b, max_t, hit_t))); 10447ec681f3Smrg { 10457ec681f3Smrg /* Any-hit defaults to commit */ 10467ec681f3Smrg nir_store_var(b, commit_tmp, nir_imm_true(b), 0x1); 10477ec681f3Smrg 10487ec681f3Smrg if (any_hit_impl != NULL) { 10497ec681f3Smrg nir_push_if(b, nir_inot(b, nir_load_intersection_opaque_amd(b))); 10507ec681f3Smrg { 10517ec681f3Smrg nir_ssa_def *params[] = { 10527ec681f3Smrg &nir_build_deref_var(b, commit_tmp)->dest.ssa, 10537ec681f3Smrg hit_t, 10547ec681f3Smrg hit_kind, 10557ec681f3Smrg }; 10567ec681f3Smrg nir_inline_function_impl(b, any_hit_impl, params, any_hit_var_remap); 10577ec681f3Smrg } 10587ec681f3Smrg nir_pop_if(b, NULL); 10597ec681f3Smrg } 10607ec681f3Smrg 10617ec681f3Smrg nir_push_if(b, nir_load_var(b, commit_tmp)); 10627ec681f3Smrg { 10637ec681f3Smrg nir_report_ray_intersection(b, 1, hit_t, hit_kind); 10647ec681f3Smrg } 10657ec681f3Smrg nir_pop_if(b, NULL); 10667ec681f3Smrg } 10677ec681f3Smrg nir_pop_if(b, NULL); 10687ec681f3Smrg 10697ec681f3Smrg nir_ssa_def *accepted = nir_load_var(b, commit_tmp); 10707ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, accepted); 10717ec681f3Smrg } 10727ec681f3Smrg } 10737ec681f3Smrg 10747ec681f3Smrg /* We did some inlining; have to re-index SSA defs */ 10757ec681f3Smrg nir_index_ssa_defs(impl); 10767ec681f3Smrg 10777ec681f3Smrg /* Eliminate the casts introduced for the commit return of the any-hit shader. */ 10787ec681f3Smrg NIR_PASS_V(intersection, nir_opt_deref); 10797ec681f3Smrg 10807ec681f3Smrg ralloc_free(dead_ctx); 10817ec681f3Smrg} 10827ec681f3Smrg 10837ec681f3Smrg/* Variables only used internally to ray traversal. This is data that describes 10847ec681f3Smrg * the current state of the traversal vs. what we'd give to a shader. e.g. what 10857ec681f3Smrg * is the instance we're currently visiting vs. what is the instance of the 10867ec681f3Smrg * closest hit. */ 10877ec681f3Smrgstruct rt_traversal_vars { 10887ec681f3Smrg nir_variable *origin; 10897ec681f3Smrg nir_variable *dir; 10907ec681f3Smrg nir_variable *inv_dir; 10917ec681f3Smrg nir_variable *sbt_offset_and_flags; 10927ec681f3Smrg nir_variable *instance_id; 10937ec681f3Smrg nir_variable *custom_instance_and_mask; 10947ec681f3Smrg nir_variable *instance_addr; 10957ec681f3Smrg nir_variable *should_return; 10967ec681f3Smrg nir_variable *bvh_base; 10977ec681f3Smrg nir_variable *stack; 10987ec681f3Smrg nir_variable *top_stack; 10997ec681f3Smrg}; 11007ec681f3Smrg 11017ec681f3Smrgstatic struct rt_traversal_vars 11027ec681f3Smrginit_traversal_vars(nir_builder *b) 11037ec681f3Smrg{ 11047ec681f3Smrg const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); 11057ec681f3Smrg struct rt_traversal_vars ret; 11067ec681f3Smrg 11077ec681f3Smrg ret.origin = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_origin"); 11087ec681f3Smrg ret.dir = nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_dir"); 11097ec681f3Smrg ret.inv_dir = 11107ec681f3Smrg nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "traversal_inv_dir"); 11117ec681f3Smrg ret.sbt_offset_and_flags = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), 11127ec681f3Smrg "traversal_sbt_offset_and_flags"); 11137ec681f3Smrg ret.instance_id = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), 11147ec681f3Smrg "traversal_instance_id"); 11157ec681f3Smrg ret.custom_instance_and_mask = nir_variable_create( 11167ec681f3Smrg b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_custom_instance_and_mask"); 11177ec681f3Smrg ret.instance_addr = 11187ec681f3Smrg nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr"); 11197ec681f3Smrg ret.should_return = nir_variable_create(b->shader, nir_var_shader_temp, glsl_bool_type(), 11207ec681f3Smrg "traversal_should_return"); 11217ec681f3Smrg ret.bvh_base = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), 11227ec681f3Smrg "traversal_bvh_base"); 11237ec681f3Smrg ret.stack = 11247ec681f3Smrg nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_ptr"); 11257ec681f3Smrg ret.top_stack = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), 11267ec681f3Smrg "traversal_top_stack_ptr"); 11277ec681f3Smrg return ret; 11287ec681f3Smrg} 11297ec681f3Smrg 11307ec681f3Smrgstatic nir_ssa_def * 11317ec681f3Smrgbuild_addr_to_node(nir_builder *b, nir_ssa_def *addr) 11327ec681f3Smrg{ 11337ec681f3Smrg const uint64_t bvh_size = 1ull << 42; 11347ec681f3Smrg nir_ssa_def *node = nir_ushr(b, addr, nir_imm_int(b, 3)); 11357ec681f3Smrg return nir_iand(b, node, nir_imm_int64(b, (bvh_size - 1) << 3)); 11367ec681f3Smrg} 11377ec681f3Smrg 11387ec681f3Smrgstatic nir_ssa_def * 11397ec681f3Smrgbuild_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node) 11407ec681f3Smrg{ 11417ec681f3Smrg nir_ssa_def *addr = nir_iand(b, node, nir_imm_int64(b, ~7ull)); 11427ec681f3Smrg addr = nir_ishl(b, addr, nir_imm_int(b, 3)); 11437ec681f3Smrg /* Assumes everything is in the top half of address space, which is true in 11447ec681f3Smrg * GFX9+ for now. */ 11457ec681f3Smrg return device->physical_device->rad_info.chip_class >= GFX9 11467ec681f3Smrg ? nir_ior(b, addr, nir_imm_int64(b, 0xffffull << 48)) 11477ec681f3Smrg : addr; 11487ec681f3Smrg} 11497ec681f3Smrg 11507ec681f3Smrg/* When a hit is opaque the any_hit shader is skipped for this hit and the hit 11517ec681f3Smrg * is assumed to be an actual hit. */ 11527ec681f3Smrgstatic nir_ssa_def * 11537ec681f3Smrghit_is_opaque(nir_builder *b, const struct rt_variables *vars, 11547ec681f3Smrg const struct rt_traversal_vars *trav_vars, nir_ssa_def *geometry_id_and_flags) 11557ec681f3Smrg{ 11567ec681f3Smrg nir_ssa_def *geom_force_opaque = nir_ine( 11577ec681f3Smrg b, nir_iand(b, geometry_id_and_flags, nir_imm_int(b, 1u << 28 /* VK_GEOMETRY_OPAQUE_BIT */)), 11587ec681f3Smrg nir_imm_int(b, 0)); 11597ec681f3Smrg nir_ssa_def *instance_force_opaque = 11607ec681f3Smrg nir_ine(b, 11617ec681f3Smrg nir_iand(b, nir_load_var(b, trav_vars->sbt_offset_and_flags), 11627ec681f3Smrg nir_imm_int(b, 4 << 24 /* VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT */)), 11637ec681f3Smrg nir_imm_int(b, 0)); 11647ec681f3Smrg nir_ssa_def *instance_force_non_opaque = 11657ec681f3Smrg nir_ine(b, 11667ec681f3Smrg nir_iand(b, nir_load_var(b, trav_vars->sbt_offset_and_flags), 11677ec681f3Smrg nir_imm_int(b, 8 << 24 /* VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT */)), 11687ec681f3Smrg nir_imm_int(b, 0)); 11697ec681f3Smrg 11707ec681f3Smrg nir_ssa_def *opaque = geom_force_opaque; 11717ec681f3Smrg opaque = nir_bcsel(b, instance_force_opaque, nir_imm_bool(b, true), opaque); 11727ec681f3Smrg opaque = nir_bcsel(b, instance_force_non_opaque, nir_imm_bool(b, false), opaque); 11737ec681f3Smrg 11747ec681f3Smrg nir_ssa_def *ray_force_opaque = 11757ec681f3Smrg nir_ine(b, nir_iand(b, nir_load_var(b, vars->flags), nir_imm_int(b, 1 /* RayFlagsOpaque */)), 11767ec681f3Smrg nir_imm_int(b, 0)); 11777ec681f3Smrg nir_ssa_def *ray_force_non_opaque = nir_ine( 11787ec681f3Smrg b, nir_iand(b, nir_load_var(b, vars->flags), nir_imm_int(b, 2 /* RayFlagsNoOpaque */)), 11797ec681f3Smrg nir_imm_int(b, 0)); 11807ec681f3Smrg 11817ec681f3Smrg opaque = nir_bcsel(b, ray_force_opaque, nir_imm_bool(b, true), opaque); 11827ec681f3Smrg opaque = nir_bcsel(b, ray_force_non_opaque, nir_imm_bool(b, false), opaque); 11837ec681f3Smrg return opaque; 11847ec681f3Smrg} 11857ec681f3Smrg 11867ec681f3Smrgstatic void 11877ec681f3Smrgvisit_any_hit_shaders(struct radv_device *device, 11887ec681f3Smrg const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, nir_builder *b, 11897ec681f3Smrg struct rt_variables *vars) 11907ec681f3Smrg{ 11917ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline_layout, layout, pCreateInfo->layout); 11927ec681f3Smrg nir_ssa_def *sbt_idx = nir_load_var(b, vars->idx); 11937ec681f3Smrg 11947ec681f3Smrg nir_push_if(b, nir_ine(b, sbt_idx, nir_imm_int(b, 0))); 11957ec681f3Smrg for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) { 11967ec681f3Smrg const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i]; 11977ec681f3Smrg uint32_t shader_id = VK_SHADER_UNUSED_KHR; 11987ec681f3Smrg 11997ec681f3Smrg switch (group_info->type) { 12007ec681f3Smrg case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: 12017ec681f3Smrg shader_id = group_info->anyHitShader; 12027ec681f3Smrg break; 12037ec681f3Smrg default: 12047ec681f3Smrg break; 12057ec681f3Smrg } 12067ec681f3Smrg if (shader_id == VK_SHADER_UNUSED_KHR) 12077ec681f3Smrg continue; 12087ec681f3Smrg 12097ec681f3Smrg const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id]; 12107ec681f3Smrg nir_shader *nir_stage = parse_rt_stage(device, layout, stage); 12117ec681f3Smrg 12127ec681f3Smrg vars->group_idx = i; 12137ec681f3Smrg insert_rt_case(b, nir_stage, vars, sbt_idx, 0, i + 2); 12147ec681f3Smrg } 12157ec681f3Smrg nir_pop_if(b, NULL); 12167ec681f3Smrg} 12177ec681f3Smrg 12187ec681f3Smrgstatic void 12197ec681f3Smrginsert_traversal_triangle_case(struct radv_device *device, 12207ec681f3Smrg const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, nir_builder *b, 12217ec681f3Smrg nir_ssa_def *result, const struct rt_variables *vars, 12227ec681f3Smrg const struct rt_traversal_vars *trav_vars, nir_ssa_def *bvh_node) 12237ec681f3Smrg{ 12247ec681f3Smrg nir_ssa_def *dist = nir_vector_extract(b, result, nir_imm_int(b, 0)); 12257ec681f3Smrg nir_ssa_def *div = nir_vector_extract(b, result, nir_imm_int(b, 1)); 12267ec681f3Smrg dist = nir_fdiv(b, dist, div); 12277ec681f3Smrg nir_ssa_def *frontface = nir_flt(b, nir_imm_float(b, 0), div); 12287ec681f3Smrg nir_ssa_def *switch_ccw = nir_ine( 12297ec681f3Smrg b, 12307ec681f3Smrg nir_iand( 12317ec681f3Smrg b, nir_load_var(b, trav_vars->sbt_offset_and_flags), 12327ec681f3Smrg nir_imm_int(b, 2 << 24 /* VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT */)), 12337ec681f3Smrg nir_imm_int(b, 0)); 12347ec681f3Smrg frontface = nir_ixor(b, frontface, switch_ccw); 12357ec681f3Smrg 12367ec681f3Smrg nir_ssa_def *not_cull = nir_ieq( 12377ec681f3Smrg b, nir_iand(b, nir_load_var(b, vars->flags), nir_imm_int(b, 256 /* RayFlagsSkipTriangles */)), 12387ec681f3Smrg nir_imm_int(b, 0)); 12397ec681f3Smrg nir_ssa_def *not_facing_cull = nir_ieq( 12407ec681f3Smrg b, 12417ec681f3Smrg nir_iand(b, nir_load_var(b, vars->flags), 12427ec681f3Smrg nir_bcsel(b, frontface, nir_imm_int(b, 32 /* RayFlagsCullFrontFacingTriangles */), 12437ec681f3Smrg nir_imm_int(b, 16 /* RayFlagsCullBackFacingTriangles */))), 12447ec681f3Smrg nir_imm_int(b, 0)); 12457ec681f3Smrg 12467ec681f3Smrg not_cull = nir_iand( 12477ec681f3Smrg b, not_cull, 12487ec681f3Smrg nir_ior( 12497ec681f3Smrg b, not_facing_cull, 12507ec681f3Smrg nir_ine( 12517ec681f3Smrg b, 12527ec681f3Smrg nir_iand( 12537ec681f3Smrg b, nir_load_var(b, trav_vars->sbt_offset_and_flags), 12547ec681f3Smrg nir_imm_int(b, 1 << 24 /* VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT */)), 12557ec681f3Smrg nir_imm_int(b, 0)))); 12567ec681f3Smrg 12577ec681f3Smrg nir_push_if(b, nir_iand(b, 12587ec681f3Smrg nir_iand(b, nir_flt(b, dist, nir_load_var(b, vars->tmax)), 12597ec681f3Smrg nir_fge(b, dist, nir_load_var(b, vars->tmin))), 12607ec681f3Smrg not_cull)); 12617ec681f3Smrg { 12627ec681f3Smrg 12637ec681f3Smrg nir_ssa_def *triangle_info = nir_build_load_global( 12647ec681f3Smrg b, 2, 32, 12657ec681f3Smrg nir_iadd(b, build_node_to_addr(device, b, bvh_node), 12667ec681f3Smrg nir_imm_int64(b, offsetof(struct radv_bvh_triangle_node, triangle_id))), 12677ec681f3Smrg .align_mul = 4, .align_offset = 0); 12687ec681f3Smrg nir_ssa_def *primitive_id = nir_channel(b, triangle_info, 0); 12697ec681f3Smrg nir_ssa_def *geometry_id_and_flags = nir_channel(b, triangle_info, 1); 12707ec681f3Smrg nir_ssa_def *geometry_id = nir_iand(b, geometry_id_and_flags, nir_imm_int(b, 0xfffffff)); 12717ec681f3Smrg nir_ssa_def *is_opaque = hit_is_opaque(b, vars, trav_vars, geometry_id_and_flags); 12727ec681f3Smrg 12737ec681f3Smrg not_cull = 12747ec681f3Smrg nir_ieq(b, 12757ec681f3Smrg nir_iand(b, nir_load_var(b, vars->flags), 12767ec681f3Smrg nir_bcsel(b, is_opaque, nir_imm_int(b, 0x40), nir_imm_int(b, 0x80))), 12777ec681f3Smrg nir_imm_int(b, 0)); 12787ec681f3Smrg nir_push_if(b, not_cull); 12797ec681f3Smrg { 12807ec681f3Smrg nir_ssa_def *sbt_idx = 12817ec681f3Smrg nir_iadd(b, 12827ec681f3Smrg nir_iadd(b, nir_load_var(b, vars->sbt_offset), 12837ec681f3Smrg nir_iand(b, nir_load_var(b, trav_vars->sbt_offset_and_flags), 12847ec681f3Smrg nir_imm_int(b, 0xffffff))), 12857ec681f3Smrg nir_imul(b, nir_load_var(b, vars->sbt_stride), geometry_id)); 12867ec681f3Smrg nir_ssa_def *divs[2] = {div, div}; 12877ec681f3Smrg nir_ssa_def *ij = nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2)); 12887ec681f3Smrg nir_ssa_def *hit_kind = 12897ec681f3Smrg nir_bcsel(b, frontface, nir_imm_int(b, 0xFE), nir_imm_int(b, 0xFF)); 12907ec681f3Smrg 12917ec681f3Smrg nir_store_scratch( 12927ec681f3Smrg b, ij, 12937ec681f3Smrg nir_iadd(b, nir_load_var(b, vars->stack_ptr), nir_imm_int(b, RADV_HIT_ATTRIB_OFFSET)), 12947ec681f3Smrg .align_mul = 16, .write_mask = 3); 12957ec681f3Smrg 12967ec681f3Smrg nir_store_var(b, vars->ahit_status, nir_imm_int(b, 0), 1); 12977ec681f3Smrg 12987ec681f3Smrg nir_push_if(b, nir_ine(b, is_opaque, nir_imm_bool(b, true))); 12997ec681f3Smrg { 13007ec681f3Smrg struct rt_variables inner_vars = create_inner_vars(b, vars); 13017ec681f3Smrg 13027ec681f3Smrg nir_store_var(b, inner_vars.primitive_id, primitive_id, 1); 13037ec681f3Smrg nir_store_var(b, inner_vars.geometry_id_and_flags, geometry_id_and_flags, 1); 13047ec681f3Smrg nir_store_var(b, inner_vars.tmax, dist, 0x1); 13057ec681f3Smrg nir_store_var(b, inner_vars.instance_id, nir_load_var(b, trav_vars->instance_id), 0x1); 13067ec681f3Smrg nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, trav_vars->instance_addr), 13077ec681f3Smrg 0x1); 13087ec681f3Smrg nir_store_var(b, inner_vars.hit_kind, hit_kind, 0x1); 13097ec681f3Smrg nir_store_var(b, inner_vars.custom_instance_and_mask, 13107ec681f3Smrg nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1); 13117ec681f3Smrg 13127ec681f3Smrg load_sbt_entry(b, &inner_vars, sbt_idx, SBT_HIT, 4); 13137ec681f3Smrg 13147ec681f3Smrg visit_any_hit_shaders(device, pCreateInfo, b, &inner_vars); 13157ec681f3Smrg 13167ec681f3Smrg nir_push_if(b, nir_ieq(b, nir_load_var(b, vars->ahit_status), nir_imm_int(b, 1))); 13177ec681f3Smrg { 13187ec681f3Smrg nir_jump(b, nir_jump_continue); 13197ec681f3Smrg } 13207ec681f3Smrg nir_pop_if(b, NULL); 13217ec681f3Smrg } 13227ec681f3Smrg nir_pop_if(b, NULL); 13237ec681f3Smrg 13247ec681f3Smrg nir_store_var(b, vars->primitive_id, primitive_id, 1); 13257ec681f3Smrg nir_store_var(b, vars->geometry_id_and_flags, geometry_id_and_flags, 1); 13267ec681f3Smrg nir_store_var(b, vars->tmax, dist, 0x1); 13277ec681f3Smrg nir_store_var(b, vars->instance_id, nir_load_var(b, trav_vars->instance_id), 0x1); 13287ec681f3Smrg nir_store_var(b, vars->instance_addr, nir_load_var(b, trav_vars->instance_addr), 0x1); 13297ec681f3Smrg nir_store_var(b, vars->hit_kind, hit_kind, 0x1); 13307ec681f3Smrg nir_store_var(b, vars->custom_instance_and_mask, 13317ec681f3Smrg nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1); 13327ec681f3Smrg 13337ec681f3Smrg load_sbt_entry(b, vars, sbt_idx, SBT_HIT, 0); 13347ec681f3Smrg 13357ec681f3Smrg nir_store_var(b, trav_vars->should_return, 13367ec681f3Smrg nir_ior(b, 13377ec681f3Smrg nir_ine(b, 13387ec681f3Smrg nir_iand(b, nir_load_var(b, vars->flags), 13397ec681f3Smrg nir_imm_int(b, 8 /* SkipClosestHitShader */)), 13407ec681f3Smrg nir_imm_int(b, 0)), 13417ec681f3Smrg nir_ieq(b, nir_load_var(b, vars->idx), nir_imm_int(b, 0))), 13427ec681f3Smrg 1); 13437ec681f3Smrg 13447ec681f3Smrg nir_ssa_def *terminate_on_first_hit = 13457ec681f3Smrg nir_ine(b, 13467ec681f3Smrg nir_iand(b, nir_load_var(b, vars->flags), 13477ec681f3Smrg nir_imm_int(b, 4 /* TerminateOnFirstHitKHR */)), 13487ec681f3Smrg nir_imm_int(b, 0)); 13497ec681f3Smrg nir_ssa_def *ray_terminated = 13507ec681f3Smrg nir_ieq(b, nir_load_var(b, vars->ahit_status), nir_imm_int(b, 2)); 13517ec681f3Smrg nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated)); 13527ec681f3Smrg { 13537ec681f3Smrg nir_jump(b, nir_jump_break); 13547ec681f3Smrg } 13557ec681f3Smrg nir_pop_if(b, NULL); 13567ec681f3Smrg } 13577ec681f3Smrg nir_pop_if(b, NULL); 13587ec681f3Smrg } 13597ec681f3Smrg nir_pop_if(b, NULL); 13607ec681f3Smrg} 13617ec681f3Smrg 13627ec681f3Smrgstatic void 13637ec681f3Smrginsert_traversal_aabb_case(struct radv_device *device, 13647ec681f3Smrg const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, nir_builder *b, 13657ec681f3Smrg const struct rt_variables *vars, 13667ec681f3Smrg const struct rt_traversal_vars *trav_vars, nir_ssa_def *bvh_node) 13677ec681f3Smrg{ 13687ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline_layout, layout, pCreateInfo->layout); 13697ec681f3Smrg 13707ec681f3Smrg nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node); 13717ec681f3Smrg nir_ssa_def *triangle_info = nir_build_load_global( 13727ec681f3Smrg b, 2, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 24)), .align_mul = 4, .align_offset = 0); 13737ec681f3Smrg nir_ssa_def *primitive_id = nir_channel(b, triangle_info, 0); 13747ec681f3Smrg nir_ssa_def *geometry_id_and_flags = nir_channel(b, triangle_info, 1); 13757ec681f3Smrg nir_ssa_def *geometry_id = nir_iand(b, geometry_id_and_flags, nir_imm_int(b, 0xfffffff)); 13767ec681f3Smrg nir_ssa_def *is_opaque = hit_is_opaque(b, vars, trav_vars, geometry_id_and_flags); 13777ec681f3Smrg 13787ec681f3Smrg nir_ssa_def *not_cull = 13797ec681f3Smrg nir_ieq(b, 13807ec681f3Smrg nir_iand(b, nir_load_var(b, vars->flags), 13817ec681f3Smrg nir_bcsel(b, is_opaque, nir_imm_int(b, 0x40), nir_imm_int(b, 0x80))), 13827ec681f3Smrg nir_imm_int(b, 0)); 13837ec681f3Smrg nir_push_if(b, not_cull); 13847ec681f3Smrg { 13857ec681f3Smrg nir_ssa_def *sbt_idx = 13867ec681f3Smrg nir_iadd(b, 13877ec681f3Smrg nir_iadd(b, nir_load_var(b, vars->sbt_offset), 13887ec681f3Smrg nir_iand(b, nir_load_var(b, trav_vars->sbt_offset_and_flags), 13897ec681f3Smrg nir_imm_int(b, 0xffffff))), 13907ec681f3Smrg nir_imul(b, nir_load_var(b, vars->sbt_stride), geometry_id)); 13917ec681f3Smrg 13927ec681f3Smrg struct rt_variables inner_vars = create_inner_vars(b, vars); 13937ec681f3Smrg 13947ec681f3Smrg /* For AABBs the intersection shader writes the hit kind, and only does it if it is the 13957ec681f3Smrg * next closest hit candidate. */ 13967ec681f3Smrg inner_vars.hit_kind = vars->hit_kind; 13977ec681f3Smrg 13987ec681f3Smrg nir_store_var(b, inner_vars.primitive_id, primitive_id, 1); 13997ec681f3Smrg nir_store_var(b, inner_vars.geometry_id_and_flags, geometry_id_and_flags, 1); 14007ec681f3Smrg nir_store_var(b, inner_vars.tmax, nir_load_var(b, vars->tmax), 0x1); 14017ec681f3Smrg nir_store_var(b, inner_vars.instance_id, nir_load_var(b, trav_vars->instance_id), 0x1); 14027ec681f3Smrg nir_store_var(b, inner_vars.instance_addr, nir_load_var(b, trav_vars->instance_addr), 0x1); 14037ec681f3Smrg nir_store_var(b, inner_vars.custom_instance_and_mask, 14047ec681f3Smrg nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1); 14057ec681f3Smrg nir_store_var(b, inner_vars.opaque, is_opaque, 1); 14067ec681f3Smrg 14077ec681f3Smrg load_sbt_entry(b, &inner_vars, sbt_idx, SBT_HIT, 4); 14087ec681f3Smrg 14097ec681f3Smrg nir_store_var(b, vars->ahit_status, nir_imm_int(b, 1), 1); 14107ec681f3Smrg 14117ec681f3Smrg nir_push_if(b, nir_ine(b, nir_load_var(b, inner_vars.idx), nir_imm_int(b, 0))); 14127ec681f3Smrg for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) { 14137ec681f3Smrg const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i]; 14147ec681f3Smrg uint32_t shader_id = VK_SHADER_UNUSED_KHR; 14157ec681f3Smrg uint32_t any_hit_shader_id = VK_SHADER_UNUSED_KHR; 14167ec681f3Smrg 14177ec681f3Smrg switch (group_info->type) { 14187ec681f3Smrg case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: 14197ec681f3Smrg shader_id = group_info->intersectionShader; 14207ec681f3Smrg any_hit_shader_id = group_info->anyHitShader; 14217ec681f3Smrg break; 14227ec681f3Smrg default: 14237ec681f3Smrg break; 14247ec681f3Smrg } 14257ec681f3Smrg if (shader_id == VK_SHADER_UNUSED_KHR) 14267ec681f3Smrg continue; 14277ec681f3Smrg 14287ec681f3Smrg const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id]; 14297ec681f3Smrg nir_shader *nir_stage = parse_rt_stage(device, layout, stage); 14307ec681f3Smrg 14317ec681f3Smrg nir_shader *any_hit_stage = NULL; 14327ec681f3Smrg if (any_hit_shader_id != VK_SHADER_UNUSED_KHR) { 14337ec681f3Smrg stage = &pCreateInfo->pStages[any_hit_shader_id]; 14347ec681f3Smrg any_hit_stage = parse_rt_stage(device, layout, stage); 14357ec681f3Smrg 14367ec681f3Smrg nir_lower_intersection_shader(nir_stage, any_hit_stage); 14377ec681f3Smrg ralloc_free(any_hit_stage); 14387ec681f3Smrg } 14397ec681f3Smrg 14407ec681f3Smrg inner_vars.group_idx = i; 14417ec681f3Smrg insert_rt_case(b, nir_stage, &inner_vars, nir_load_var(b, inner_vars.idx), 0, i + 2); 14427ec681f3Smrg } 14437ec681f3Smrg nir_push_else(b, NULL); 14447ec681f3Smrg { 14457ec681f3Smrg nir_ssa_def *vec3_zero = nir_channels(b, nir_imm_vec4(b, 0, 0, 0, 0), 0x7); 14467ec681f3Smrg nir_ssa_def *vec3_inf = 14477ec681f3Smrg nir_channels(b, nir_imm_vec4(b, INFINITY, INFINITY, INFINITY, 0), 0x7); 14487ec681f3Smrg 14497ec681f3Smrg nir_ssa_def *bvh_lo = 14507ec681f3Smrg nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 0)), 14517ec681f3Smrg .align_mul = 4, .align_offset = 0); 14527ec681f3Smrg nir_ssa_def *bvh_hi = 14537ec681f3Smrg nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 12)), 14547ec681f3Smrg .align_mul = 4, .align_offset = 0); 14557ec681f3Smrg 14567ec681f3Smrg bvh_lo = nir_fsub(b, bvh_lo, nir_load_var(b, trav_vars->origin)); 14577ec681f3Smrg bvh_hi = nir_fsub(b, bvh_hi, nir_load_var(b, trav_vars->origin)); 14587ec681f3Smrg nir_ssa_def *t_vec = nir_fmin(b, nir_fmul(b, bvh_lo, nir_load_var(b, trav_vars->inv_dir)), 14597ec681f3Smrg nir_fmul(b, bvh_hi, nir_load_var(b, trav_vars->inv_dir))); 14607ec681f3Smrg nir_ssa_def *t2_vec = nir_fmax(b, nir_fmul(b, bvh_lo, nir_load_var(b, trav_vars->inv_dir)), 14617ec681f3Smrg nir_fmul(b, bvh_hi, nir_load_var(b, trav_vars->inv_dir))); 14627ec681f3Smrg /* If we run parallel to one of the edges the range should be [0, inf) not [0,0] */ 14637ec681f3Smrg t2_vec = 14647ec681f3Smrg nir_bcsel(b, nir_feq(b, nir_load_var(b, trav_vars->dir), vec3_zero), vec3_inf, t2_vec); 14657ec681f3Smrg 14667ec681f3Smrg nir_ssa_def *t_min = nir_fmax(b, nir_channel(b, t_vec, 0), nir_channel(b, t_vec, 1)); 14677ec681f3Smrg t_min = nir_fmax(b, t_min, nir_channel(b, t_vec, 2)); 14687ec681f3Smrg 14697ec681f3Smrg nir_ssa_def *t_max = nir_fmin(b, nir_channel(b, t2_vec, 0), nir_channel(b, t2_vec, 1)); 14707ec681f3Smrg t_max = nir_fmin(b, t_max, nir_channel(b, t2_vec, 2)); 14717ec681f3Smrg 14727ec681f3Smrg nir_push_if(b, nir_iand(b, nir_flt(b, t_min, nir_load_var(b, vars->tmax)), 14737ec681f3Smrg nir_fge(b, t_max, nir_load_var(b, vars->tmin)))); 14747ec681f3Smrg { 14757ec681f3Smrg nir_store_var(b, vars->ahit_status, nir_imm_int(b, 0), 1); 14767ec681f3Smrg nir_store_var(b, vars->tmax, nir_fmax(b, t_min, nir_load_var(b, vars->tmin)), 1); 14777ec681f3Smrg } 14787ec681f3Smrg nir_pop_if(b, NULL); 14797ec681f3Smrg } 14807ec681f3Smrg nir_pop_if(b, NULL); 14817ec681f3Smrg 14827ec681f3Smrg nir_push_if(b, nir_ine(b, nir_load_var(b, vars->ahit_status), nir_imm_int(b, 1))); 14837ec681f3Smrg { 14847ec681f3Smrg nir_store_var(b, vars->primitive_id, primitive_id, 1); 14857ec681f3Smrg nir_store_var(b, vars->geometry_id_and_flags, geometry_id_and_flags, 1); 14867ec681f3Smrg nir_store_var(b, vars->tmax, nir_load_var(b, inner_vars.tmax), 0x1); 14877ec681f3Smrg nir_store_var(b, vars->instance_id, nir_load_var(b, trav_vars->instance_id), 0x1); 14887ec681f3Smrg nir_store_var(b, vars->instance_addr, nir_load_var(b, trav_vars->instance_addr), 0x1); 14897ec681f3Smrg nir_store_var(b, vars->custom_instance_and_mask, 14907ec681f3Smrg nir_load_var(b, trav_vars->custom_instance_and_mask), 0x1); 14917ec681f3Smrg 14927ec681f3Smrg load_sbt_entry(b, vars, sbt_idx, SBT_HIT, 0); 14937ec681f3Smrg 14947ec681f3Smrg nir_store_var(b, trav_vars->should_return, 14957ec681f3Smrg nir_ior(b, 14967ec681f3Smrg nir_ine(b, 14977ec681f3Smrg nir_iand(b, nir_load_var(b, vars->flags), 14987ec681f3Smrg nir_imm_int(b, 8 /* SkipClosestHitShader */)), 14997ec681f3Smrg nir_imm_int(b, 0)), 15007ec681f3Smrg nir_ieq(b, nir_load_var(b, vars->idx), nir_imm_int(b, 0))), 15017ec681f3Smrg 1); 15027ec681f3Smrg 15037ec681f3Smrg nir_ssa_def *terminate_on_first_hit = 15047ec681f3Smrg nir_ine(b, 15057ec681f3Smrg nir_iand(b, nir_load_var(b, vars->flags), 15067ec681f3Smrg nir_imm_int(b, 4 /* TerminateOnFirstHitKHR */)), 15077ec681f3Smrg nir_imm_int(b, 0)); 15087ec681f3Smrg nir_ssa_def *ray_terminated = 15097ec681f3Smrg nir_ieq(b, nir_load_var(b, vars->ahit_status), nir_imm_int(b, 2)); 15107ec681f3Smrg nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated)); 15117ec681f3Smrg { 15127ec681f3Smrg nir_jump(b, nir_jump_break); 15137ec681f3Smrg } 15147ec681f3Smrg nir_pop_if(b, NULL); 15157ec681f3Smrg } 15167ec681f3Smrg nir_pop_if(b, NULL); 15177ec681f3Smrg } 15187ec681f3Smrg nir_pop_if(b, NULL); 15197ec681f3Smrg} 15207ec681f3Smrg 15217ec681f3Smrgstatic void 15227ec681f3Smrgnir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices, uint32_t chan_1, uint32_t chan_2) 15237ec681f3Smrg{ 15247ec681f3Smrg nir_ssa_def *ssa_distances = nir_load_var(b, var_distances); 15257ec681f3Smrg nir_ssa_def *ssa_indices = nir_load_var(b, var_indices); 15267ec681f3Smrg /* if (distances[chan_2] < distances[chan_1]) { */ 15277ec681f3Smrg nir_push_if(b, nir_flt(b, nir_channel(b, ssa_distances, chan_2), nir_channel(b, ssa_distances, chan_1))); 15287ec681f3Smrg { 15297ec681f3Smrg /* swap(distances[chan_2], distances[chan_1]); */ 15307ec681f3Smrg nir_ssa_def *new_distances[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32)}; 15317ec681f3Smrg nir_ssa_def *new_indices[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32)}; 15327ec681f3Smrg new_distances[chan_2] = nir_channel(b, ssa_distances, chan_1); 15337ec681f3Smrg new_distances[chan_1] = nir_channel(b, ssa_distances, chan_2); 15347ec681f3Smrg new_indices[chan_2] = nir_channel(b, ssa_indices, chan_1); 15357ec681f3Smrg new_indices[chan_1] = nir_channel(b, ssa_indices, chan_2); 15367ec681f3Smrg nir_store_var(b, var_distances, nir_vec(b, new_distances, 4), (1u << chan_1) | (1u << chan_2)); 15377ec681f3Smrg nir_store_var(b, var_indices, nir_vec(b, new_indices, 4), (1u << chan_1) | (1u << chan_2)); 15387ec681f3Smrg } 15397ec681f3Smrg /* } */ 15407ec681f3Smrg nir_pop_if(b, NULL); 15417ec681f3Smrg} 15427ec681f3Smrg 15437ec681f3Smrgstatic nir_ssa_def * 15447ec681f3Smrgintersect_ray_amd_software_box(struct radv_device *device, 15457ec681f3Smrg nir_builder *b, nir_ssa_def *bvh_node, 15467ec681f3Smrg nir_ssa_def *ray_tmax, nir_ssa_def *origin, 15477ec681f3Smrg nir_ssa_def *dir, nir_ssa_def *inv_dir) 15487ec681f3Smrg{ 15497ec681f3Smrg const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4); 15507ec681f3Smrg const struct glsl_type *uvec4_type = glsl_vector_type(GLSL_TYPE_UINT, 4); 15517ec681f3Smrg 15527ec681f3Smrg nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node); 15537ec681f3Smrg 15547ec681f3Smrg /* vec4 distances = vec4(INF, INF, INF, INF); */ 15557ec681f3Smrg nir_variable *distances = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "distances"); 15567ec681f3Smrg nir_store_var(b, distances, nir_imm_vec4(b, INFINITY, INFINITY, INFINITY, INFINITY), 0xf); 15577ec681f3Smrg 15587ec681f3Smrg /* uvec4 child_indices = uvec4(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff); */ 15597ec681f3Smrg nir_variable *child_indices = nir_variable_create(b->shader, nir_var_shader_temp, uvec4_type, "child_indices"); 15607ec681f3Smrg nir_store_var(b, child_indices, nir_imm_ivec4(b, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu), 0xf); 15617ec681f3Smrg 15627ec681f3Smrg /* Need to remove infinities here because otherwise we get nasty NaN propogation 15637ec681f3Smrg * if the direction has 0s in it. */ 15647ec681f3Smrg /* inv_dir = clamp(inv_dir, -FLT_MAX, FLT_MAX); */ 15657ec681f3Smrg inv_dir = nir_fclamp(b, inv_dir, nir_imm_float(b, -FLT_MAX), nir_imm_float(b, FLT_MAX)); 15667ec681f3Smrg 15677ec681f3Smrg for (int i = 0; i < 4; i++) { 15687ec681f3Smrg const uint32_t child_offset = offsetof(struct radv_bvh_box32_node, children[i]); 15697ec681f3Smrg const uint32_t coord_offsets[2] = { 15707ec681f3Smrg offsetof(struct radv_bvh_box32_node, coords[i][0][0]), 15717ec681f3Smrg offsetof(struct radv_bvh_box32_node, coords[i][1][0]), 15727ec681f3Smrg }; 15737ec681f3Smrg 15747ec681f3Smrg /* node->children[i] -> uint */ 15757ec681f3Smrg nir_ssa_def *child_index = nir_build_load_global(b, 1, 32, nir_iadd(b, node_addr, nir_imm_int64(b, child_offset)), .align_mul = 64, .align_offset = child_offset % 64 ); 15767ec681f3Smrg /* node->coords[i][0], node->coords[i][1] -> vec3 */ 15777ec681f3Smrg nir_ssa_def *node_coords[2] = { 15787ec681f3Smrg nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, coord_offsets[0])), .align_mul = 64, .align_offset = coord_offsets[0] % 64 ), 15797ec681f3Smrg nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, coord_offsets[1])), .align_mul = 64, .align_offset = coord_offsets[1] % 64 ), 15807ec681f3Smrg }; 15817ec681f3Smrg 15827ec681f3Smrg /* If x of the aabb min is NaN, then this is an inactive aabb. 15837ec681f3Smrg * We don't need to care about any other components being NaN as that is UB. 15847ec681f3Smrg * https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR */ 15857ec681f3Smrg nir_ssa_def *min_x = nir_channel(b, node_coords[0], 0); 15867ec681f3Smrg nir_ssa_def *min_x_is_not_nan = nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */ 15877ec681f3Smrg 15887ec681f3Smrg /* vec3 bound0 = (node->coords[i][0] - origin) * inv_dir; */ 15897ec681f3Smrg nir_ssa_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir); 15907ec681f3Smrg /* vec3 bound1 = (node->coords[i][1] - origin) * inv_dir; */ 15917ec681f3Smrg nir_ssa_def *bound1 = nir_fmul(b, nir_fsub(b, node_coords[1], origin), inv_dir); 15927ec681f3Smrg 15937ec681f3Smrg /* float tmin = max(max(min(bound0.x, bound1.x), min(bound0.y, bound1.y)), min(bound0.z, bound1.z)); */ 15947ec681f3Smrg nir_ssa_def *tmin = nir_fmax(b, nir_fmax(b, 15957ec681f3Smrg nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)), 15967ec681f3Smrg nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))), 15977ec681f3Smrg nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2))); 15987ec681f3Smrg 15997ec681f3Smrg /* float tmax = min(min(max(bound0.x, bound1.x), max(bound0.y, bound1.y)), max(bound0.z, bound1.z)); */ 16007ec681f3Smrg nir_ssa_def *tmax = nir_fmin(b, nir_fmin(b, 16017ec681f3Smrg nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)), 16027ec681f3Smrg nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))), 16037ec681f3Smrg nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2))); 16047ec681f3Smrg 16057ec681f3Smrg /* if (!isnan(node->coords[i][0].x) && tmax >= max(0.0f, tmin) && tmin < ray_tmax) { */ 16067ec681f3Smrg nir_push_if(b, 16077ec681f3Smrg nir_iand(b, 16087ec681f3Smrg min_x_is_not_nan, 16097ec681f3Smrg nir_iand(b, 16107ec681f3Smrg nir_fge(b, tmax, nir_fmax(b, nir_imm_float(b, 0.0f), tmin)), 16117ec681f3Smrg nir_flt(b, tmin, ray_tmax)))); 16127ec681f3Smrg { 16137ec681f3Smrg /* child_indices[i] = node->children[i]; */ 16147ec681f3Smrg nir_ssa_def *new_child_indices[4] = {child_index, child_index, child_index, child_index}; 16157ec681f3Smrg nir_store_var(b, child_indices, nir_vec(b, new_child_indices, 4), 1u << i); 16167ec681f3Smrg 16177ec681f3Smrg /* distances[i] = tmin; */ 16187ec681f3Smrg nir_ssa_def *new_distances[4] = {tmin, tmin, tmin, tmin}; 16197ec681f3Smrg nir_store_var(b, distances, nir_vec(b, new_distances, 4), 1u << i); 16207ec681f3Smrg 16217ec681f3Smrg } 16227ec681f3Smrg /* } */ 16237ec681f3Smrg nir_pop_if(b, NULL); 16247ec681f3Smrg } 16257ec681f3Smrg 16267ec681f3Smrg /* Sort our distances with a sorting network. */ 16277ec681f3Smrg nir_sort_hit_pair(b, distances, child_indices, 0, 1); 16287ec681f3Smrg nir_sort_hit_pair(b, distances, child_indices, 2, 3); 16297ec681f3Smrg nir_sort_hit_pair(b, distances, child_indices, 0, 2); 16307ec681f3Smrg nir_sort_hit_pair(b, distances, child_indices, 1, 3); 16317ec681f3Smrg nir_sort_hit_pair(b, distances, child_indices, 1, 2); 16327ec681f3Smrg 16337ec681f3Smrg return nir_load_var(b, child_indices); 16347ec681f3Smrg} 16357ec681f3Smrg 16367ec681f3Smrgstatic nir_ssa_def * 16377ec681f3Smrgintersect_ray_amd_software_tri(struct radv_device *device, 16387ec681f3Smrg nir_builder *b, nir_ssa_def *bvh_node, 16397ec681f3Smrg nir_ssa_def *ray_tmax, nir_ssa_def *origin, 16407ec681f3Smrg nir_ssa_def *dir, nir_ssa_def *inv_dir) 16417ec681f3Smrg{ 16427ec681f3Smrg const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4); 16437ec681f3Smrg 16447ec681f3Smrg nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node); 16457ec681f3Smrg 16467ec681f3Smrg const uint32_t coord_offsets[3] = { 16477ec681f3Smrg offsetof(struct radv_bvh_triangle_node, coords[0]), 16487ec681f3Smrg offsetof(struct radv_bvh_triangle_node, coords[1]), 16497ec681f3Smrg offsetof(struct radv_bvh_triangle_node, coords[2]), 16507ec681f3Smrg }; 16517ec681f3Smrg 16527ec681f3Smrg /* node->coords[0], node->coords[1], node->coords[2] -> vec3 */ 16537ec681f3Smrg nir_ssa_def *node_coords[3] = { 16547ec681f3Smrg nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, coord_offsets[0])), .align_mul = 64, .align_offset = coord_offsets[0] % 64 ), 16557ec681f3Smrg nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, coord_offsets[1])), .align_mul = 64, .align_offset = coord_offsets[1] % 64 ), 16567ec681f3Smrg nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, coord_offsets[2])), .align_mul = 64, .align_offset = coord_offsets[2] % 64 ), 16577ec681f3Smrg }; 16587ec681f3Smrg 16597ec681f3Smrg nir_variable *result = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "result"); 16607ec681f3Smrg nir_store_var(b, result, nir_imm_vec4(b, INFINITY, 1.0f, 0.0f, 0.0f), 0xf); 16617ec681f3Smrg 16627ec681f3Smrg /* Based on watertight Ray/Triangle intersection from 16637ec681f3Smrg * http://jcgt.org/published/0002/01/05/paper.pdf */ 16647ec681f3Smrg 16657ec681f3Smrg /* Calculate the dimension where the ray direction is largest */ 16667ec681f3Smrg nir_ssa_def *abs_dir = nir_fabs(b, dir); 16677ec681f3Smrg 16687ec681f3Smrg nir_ssa_def *abs_dirs[3] = { 16697ec681f3Smrg nir_channel(b, abs_dir, 0), 16707ec681f3Smrg nir_channel(b, abs_dir, 1), 16717ec681f3Smrg nir_channel(b, abs_dir, 2), 16727ec681f3Smrg }; 16737ec681f3Smrg /* Find index of greatest value of abs_dir and put that as kz. */ 16747ec681f3Smrg nir_ssa_def *kz = nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[1]), 16757ec681f3Smrg nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[2]), 16767ec681f3Smrg nir_imm_int(b, 0), nir_imm_int(b, 2)), 16777ec681f3Smrg nir_bcsel(b, nir_fge(b, abs_dirs[1], abs_dirs[2]), 16787ec681f3Smrg nir_imm_int(b, 1), nir_imm_int(b, 2))); 16797ec681f3Smrg nir_ssa_def *kx = nir_imod(b, nir_iadd(b, kz, nir_imm_int(b, 1)), nir_imm_int(b, 3)); 16807ec681f3Smrg nir_ssa_def *ky = nir_imod(b, nir_iadd(b, kx, nir_imm_int(b, 1)), nir_imm_int(b, 3)); 16817ec681f3Smrg nir_ssa_def *k_indices[3] = { kx, ky, kz }; 16827ec681f3Smrg nir_ssa_def *k = nir_vec(b, k_indices, 3); 16837ec681f3Smrg 16847ec681f3Smrg /* Swap kx and ky dimensions to preseve winding order */ 16857ec681f3Smrg unsigned swap_xy_swizzle[4] = {1, 0, 2, 3}; 16867ec681f3Smrg k = nir_bcsel(b, 16877ec681f3Smrg nir_flt(b, nir_vector_extract(b, dir, kz), nir_imm_float(b, 0.0f)), 16887ec681f3Smrg nir_swizzle(b, k, swap_xy_swizzle, 3), 16897ec681f3Smrg k); 16907ec681f3Smrg 16917ec681f3Smrg kx = nir_channel(b, k, 0); 16927ec681f3Smrg ky = nir_channel(b, k, 1); 16937ec681f3Smrg kz = nir_channel(b, k, 2); 16947ec681f3Smrg 16957ec681f3Smrg /* Calculate shear constants */ 16967ec681f3Smrg nir_ssa_def *sz = nir_frcp(b, nir_vector_extract(b, dir, kz)); 16977ec681f3Smrg nir_ssa_def *sx = nir_fmul(b, nir_vector_extract(b, dir, kx), sz); 16987ec681f3Smrg nir_ssa_def *sy = nir_fmul(b, nir_vector_extract(b, dir, ky), sz); 16997ec681f3Smrg 17007ec681f3Smrg /* Calculate vertices relative to ray origin */ 17017ec681f3Smrg nir_ssa_def *v_a = nir_fsub(b, node_coords[0], origin); 17027ec681f3Smrg nir_ssa_def *v_b = nir_fsub(b, node_coords[1], origin); 17037ec681f3Smrg nir_ssa_def *v_c = nir_fsub(b, node_coords[2], origin); 17047ec681f3Smrg 17057ec681f3Smrg /* Perform shear and scale */ 17067ec681f3Smrg nir_ssa_def *ax = nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz))); 17077ec681f3Smrg nir_ssa_def *ay = nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz))); 17087ec681f3Smrg nir_ssa_def *bx = nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz))); 17097ec681f3Smrg nir_ssa_def *by = nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz))); 17107ec681f3Smrg nir_ssa_def *cx = nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz))); 17117ec681f3Smrg nir_ssa_def *cy = nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz))); 17127ec681f3Smrg 17137ec681f3Smrg nir_ssa_def *u = nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx)); 17147ec681f3Smrg nir_ssa_def *v = nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx)); 17157ec681f3Smrg nir_ssa_def *w = nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax)); 17167ec681f3Smrg 17177ec681f3Smrg nir_variable *u_var = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "u"); 17187ec681f3Smrg nir_variable *v_var = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "v"); 17197ec681f3Smrg nir_variable *w_var = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "w"); 17207ec681f3Smrg nir_store_var(b, u_var, u, 0x1); 17217ec681f3Smrg nir_store_var(b, v_var, v, 0x1); 17227ec681f3Smrg nir_store_var(b, w_var, w, 0x1); 17237ec681f3Smrg 17247ec681f3Smrg /* Fallback to testing edges with double precision... 17257ec681f3Smrg * 17267ec681f3Smrg * The Vulkan spec states it only needs single precision watertightness 17277ec681f3Smrg * but we fail dEQP-VK.ray_tracing_pipeline.watertightness.closedFan2.1024 with 17287ec681f3Smrg * failures = 1 without doing this. :( */ 17297ec681f3Smrg nir_ssa_def *cond_retest = nir_ior(b, nir_ior(b, 17307ec681f3Smrg nir_feq(b, u, nir_imm_float(b, 0.0f)), 17317ec681f3Smrg nir_feq(b, v, nir_imm_float(b, 0.0f))), 17327ec681f3Smrg nir_feq(b, w, nir_imm_float(b, 0.0f))); 17337ec681f3Smrg 17347ec681f3Smrg nir_push_if(b, cond_retest); 17357ec681f3Smrg { 17367ec681f3Smrg ax = nir_f2f64(b, ax); ay = nir_f2f64(b, ay); 17377ec681f3Smrg bx = nir_f2f64(b, bx); by = nir_f2f64(b, by); 17387ec681f3Smrg cx = nir_f2f64(b, cx); cy = nir_f2f64(b, cy); 17397ec681f3Smrg 17407ec681f3Smrg nir_store_var(b, u_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx))), 0x1); 17417ec681f3Smrg nir_store_var(b, v_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx))), 0x1); 17427ec681f3Smrg nir_store_var(b, w_var, nir_f2f32(b, nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax))), 0x1); 17437ec681f3Smrg } 17447ec681f3Smrg nir_pop_if(b, NULL); 17457ec681f3Smrg 17467ec681f3Smrg u = nir_load_var(b, u_var); 17477ec681f3Smrg v = nir_load_var(b, v_var); 17487ec681f3Smrg w = nir_load_var(b, w_var); 17497ec681f3Smrg 17507ec681f3Smrg /* Perform edge tests. */ 17517ec681f3Smrg nir_ssa_def *cond_back = nir_ior(b, nir_ior(b, 17527ec681f3Smrg nir_flt(b, u, nir_imm_float(b, 0.0f)), 17537ec681f3Smrg nir_flt(b, v, nir_imm_float(b, 0.0f))), 17547ec681f3Smrg nir_flt(b, w, nir_imm_float(b, 0.0f))); 17557ec681f3Smrg 17567ec681f3Smrg nir_ssa_def *cond_front = nir_ior(b, nir_ior(b, 17577ec681f3Smrg nir_flt(b, nir_imm_float(b, 0.0f), u), 17587ec681f3Smrg nir_flt(b, nir_imm_float(b, 0.0f), v)), 17597ec681f3Smrg nir_flt(b, nir_imm_float(b, 0.0f), w)); 17607ec681f3Smrg 17617ec681f3Smrg nir_ssa_def *cond = nir_inot(b, nir_iand(b, cond_back, cond_front)); 17627ec681f3Smrg 17637ec681f3Smrg nir_push_if(b, cond); 17647ec681f3Smrg { 17657ec681f3Smrg nir_ssa_def *det = nir_fadd(b, u, nir_fadd(b, v, w)); 17667ec681f3Smrg 17677ec681f3Smrg nir_ssa_def *az = nir_fmul(b, sz, nir_vector_extract(b, v_a, kz)); 17687ec681f3Smrg nir_ssa_def *bz = nir_fmul(b, sz, nir_vector_extract(b, v_b, kz)); 17697ec681f3Smrg nir_ssa_def *cz = nir_fmul(b, sz, nir_vector_extract(b, v_c, kz)); 17707ec681f3Smrg 17717ec681f3Smrg nir_ssa_def *t = nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz)); 17727ec681f3Smrg 17737ec681f3Smrg nir_ssa_def *t_signed = nir_fmul(b, nir_fsign(b, det), t); 17747ec681f3Smrg 17757ec681f3Smrg nir_ssa_def *det_cond_front = nir_inot(b, nir_flt(b, t_signed, nir_imm_float(b, 0.0f))); 17767ec681f3Smrg 17777ec681f3Smrg nir_push_if(b, det_cond_front); 17787ec681f3Smrg { 17797ec681f3Smrg nir_ssa_def *indices[4] = { 17807ec681f3Smrg t, det, 17817ec681f3Smrg v, w 17827ec681f3Smrg }; 17837ec681f3Smrg nir_store_var(b, result, nir_vec(b, indices, 4), 0xf); 17847ec681f3Smrg } 17857ec681f3Smrg nir_pop_if(b, NULL); 17867ec681f3Smrg } 17877ec681f3Smrg nir_pop_if(b, NULL); 17887ec681f3Smrg 17897ec681f3Smrg return nir_load_var(b, result); 17907ec681f3Smrg} 17917ec681f3Smrg 17927ec681f3Smrgstatic void 17937ec681f3Smrginsert_traversal(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 17947ec681f3Smrg nir_builder *b, const struct rt_variables *vars) 17957ec681f3Smrg{ 17967ec681f3Smrg unsigned stack_entry_size = 4; 17977ec681f3Smrg unsigned lanes = b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] * 17987ec681f3Smrg b->shader->info.workgroup_size[2]; 17997ec681f3Smrg unsigned stack_entry_stride = stack_entry_size * lanes; 18007ec681f3Smrg nir_ssa_def *stack_entry_stride_def = nir_imm_int(b, stack_entry_stride); 18017ec681f3Smrg nir_ssa_def *stack_base = 18027ec681f3Smrg nir_iadd(b, nir_imm_int(b, b->shader->info.shared_size), 18037ec681f3Smrg nir_imul(b, nir_load_subgroup_invocation(b), nir_imm_int(b, stack_entry_size))); 18047ec681f3Smrg 18057ec681f3Smrg /* 18067ec681f3Smrg * A top-level AS can contain 2^24 children and a bottom-level AS can contain 2^24 triangles. At 18077ec681f3Smrg * a branching factor of 4, that means we may need up to 24 levels of box nodes + 1 triangle node 18087ec681f3Smrg * + 1 instance node. Furthermore, when processing a box node, worst case we actually push all 4 18097ec681f3Smrg * children and remove one, so the DFS stack depth is box nodes * 3 + 2. 18107ec681f3Smrg */ 18117ec681f3Smrg b->shader->info.shared_size += stack_entry_stride * 76; 18127ec681f3Smrg assert(b->shader->info.shared_size <= 32768); 18137ec681f3Smrg 18147ec681f3Smrg nir_ssa_def *accel_struct = nir_load_var(b, vars->accel_struct); 18157ec681f3Smrg 18167ec681f3Smrg struct rt_traversal_vars trav_vars = init_traversal_vars(b); 18177ec681f3Smrg 18187ec681f3Smrg /* Initialize the follow-up shader idx to 0, to be replaced by the miss shader 18197ec681f3Smrg * if we actually miss. */ 18207ec681f3Smrg nir_store_var(b, vars->idx, nir_imm_int(b, 0), 1); 18217ec681f3Smrg 18227ec681f3Smrg nir_store_var(b, trav_vars.should_return, nir_imm_bool(b, false), 1); 18237ec681f3Smrg 18247ec681f3Smrg nir_push_if(b, nir_ine(b, accel_struct, nir_imm_int64(b, 0))); 18257ec681f3Smrg { 18267ec681f3Smrg nir_store_var(b, trav_vars.bvh_base, build_addr_to_node(b, accel_struct), 1); 18277ec681f3Smrg 18287ec681f3Smrg nir_ssa_def *bvh_root = 18297ec681f3Smrg nir_build_load_global(b, 1, 32, accel_struct, .access = ACCESS_NON_WRITEABLE, 18307ec681f3Smrg .align_mul = 64, .align_offset = 0); 18317ec681f3Smrg 18327ec681f3Smrg /* We create a BVH descriptor that covers the entire memory range. That way we can always 18337ec681f3Smrg * use the same descriptor, which avoids divergence when different rays hit different 18347ec681f3Smrg * instances at the cost of having to use 64-bit node ids. */ 18357ec681f3Smrg const uint64_t bvh_size = 1ull << 42; 18367ec681f3Smrg nir_ssa_def *desc = nir_imm_ivec4( 18377ec681f3Smrg b, 0, 1u << 31 /* Enable box sorting */, (bvh_size - 1) & 0xFFFFFFFFu, 18387ec681f3Smrg ((bvh_size - 1) >> 32) | (1u << 24 /* Return IJ for triangles */) | (1u << 31)); 18397ec681f3Smrg 18407ec681f3Smrg nir_ssa_def *vec3ones = nir_channels(b, nir_imm_vec4(b, 1.0, 1.0, 1.0, 1.0), 0x7); 18417ec681f3Smrg nir_store_var(b, trav_vars.origin, nir_load_var(b, vars->origin), 7); 18427ec681f3Smrg nir_store_var(b, trav_vars.dir, nir_load_var(b, vars->direction), 7); 18437ec681f3Smrg nir_store_var(b, trav_vars.inv_dir, nir_fdiv(b, vec3ones, nir_load_var(b, trav_vars.dir)), 7); 18447ec681f3Smrg nir_store_var(b, trav_vars.sbt_offset_and_flags, nir_imm_int(b, 0), 1); 18457ec681f3Smrg nir_store_var(b, trav_vars.instance_addr, nir_imm_int64(b, 0), 1); 18467ec681f3Smrg 18477ec681f3Smrg nir_store_var(b, trav_vars.stack, nir_iadd(b, stack_base, stack_entry_stride_def), 1); 18487ec681f3Smrg nir_store_shared(b, bvh_root, stack_base, .base = 0, .write_mask = 0x1, 18497ec681f3Smrg .align_mul = stack_entry_size, .align_offset = 0); 18507ec681f3Smrg 18517ec681f3Smrg nir_store_var(b, trav_vars.top_stack, nir_imm_int(b, 0), 1); 18527ec681f3Smrg 18537ec681f3Smrg nir_push_loop(b); 18547ec681f3Smrg 18557ec681f3Smrg nir_push_if(b, nir_ieq(b, nir_load_var(b, trav_vars.stack), stack_base)); 18567ec681f3Smrg nir_jump(b, nir_jump_break); 18577ec681f3Smrg nir_pop_if(b, NULL); 18587ec681f3Smrg 18597ec681f3Smrg nir_push_if( 18607ec681f3Smrg b, nir_uge(b, nir_load_var(b, trav_vars.top_stack), nir_load_var(b, trav_vars.stack))); 18617ec681f3Smrg nir_store_var(b, trav_vars.top_stack, nir_imm_int(b, 0), 1); 18627ec681f3Smrg nir_store_var(b, trav_vars.bvh_base, 18637ec681f3Smrg build_addr_to_node(b, nir_load_var(b, vars->accel_struct)), 1); 18647ec681f3Smrg nir_store_var(b, trav_vars.origin, nir_load_var(b, vars->origin), 7); 18657ec681f3Smrg nir_store_var(b, trav_vars.dir, nir_load_var(b, vars->direction), 7); 18667ec681f3Smrg nir_store_var(b, trav_vars.inv_dir, nir_fdiv(b, vec3ones, nir_load_var(b, trav_vars.dir)), 7); 18677ec681f3Smrg nir_store_var(b, trav_vars.instance_addr, nir_imm_int64(b, 0), 1); 18687ec681f3Smrg 18697ec681f3Smrg nir_pop_if(b, NULL); 18707ec681f3Smrg 18717ec681f3Smrg nir_store_var(b, trav_vars.stack, 18727ec681f3Smrg nir_isub(b, nir_load_var(b, trav_vars.stack), stack_entry_stride_def), 1); 18737ec681f3Smrg 18747ec681f3Smrg nir_ssa_def *bvh_node = nir_load_shared(b, 1, 32, nir_load_var(b, trav_vars.stack), .base = 0, 18757ec681f3Smrg .align_mul = stack_entry_size, .align_offset = 0); 18767ec681f3Smrg nir_ssa_def *bvh_node_type = nir_iand(b, bvh_node, nir_imm_int(b, 7)); 18777ec681f3Smrg 18787ec681f3Smrg bvh_node = nir_iadd(b, nir_load_var(b, trav_vars.bvh_base), nir_u2u(b, bvh_node, 64)); 18797ec681f3Smrg nir_ssa_def *intrinsic_result = NULL; 18807ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX10_3 18817ec681f3Smrg && !(device->instance->perftest_flags & RADV_PERFTEST_FORCE_EMULATE_RT)) { 18827ec681f3Smrg intrinsic_result = nir_bvh64_intersect_ray_amd( 18837ec681f3Smrg b, 32, desc, nir_unpack_64_2x32(b, bvh_node), nir_load_var(b, vars->tmax), 18847ec681f3Smrg nir_load_var(b, trav_vars.origin), nir_load_var(b, trav_vars.dir), 18857ec681f3Smrg nir_load_var(b, trav_vars.inv_dir)); 18867ec681f3Smrg } 18877ec681f3Smrg 18887ec681f3Smrg nir_push_if(b, nir_ine(b, nir_iand(b, bvh_node_type, nir_imm_int(b, 4)), nir_imm_int(b, 0))); 18897ec681f3Smrg { 18907ec681f3Smrg nir_push_if(b, 18917ec681f3Smrg nir_ine(b, nir_iand(b, bvh_node_type, nir_imm_int(b, 2)), nir_imm_int(b, 0))); 18927ec681f3Smrg { 18937ec681f3Smrg /* custom */ 18947ec681f3Smrg nir_push_if( 18957ec681f3Smrg b, nir_ine(b, nir_iand(b, bvh_node_type, nir_imm_int(b, 1)), nir_imm_int(b, 0))); 18967ec681f3Smrg { 18977ec681f3Smrg insert_traversal_aabb_case(device, pCreateInfo, b, vars, &trav_vars, bvh_node); 18987ec681f3Smrg } 18997ec681f3Smrg nir_push_else(b, NULL); 19007ec681f3Smrg { 19017ec681f3Smrg /* instance */ 19027ec681f3Smrg nir_ssa_def *instance_node_addr = build_node_to_addr(device, b, bvh_node); 19037ec681f3Smrg nir_ssa_def *instance_data = nir_build_load_global( 19047ec681f3Smrg b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0); 19057ec681f3Smrg nir_ssa_def *wto_matrix[] = { 19067ec681f3Smrg nir_build_load_global(b, 4, 32, 19077ec681f3Smrg nir_iadd(b, instance_node_addr, nir_imm_int64(b, 16)), 19087ec681f3Smrg .align_mul = 64, .align_offset = 16), 19097ec681f3Smrg nir_build_load_global(b, 4, 32, 19107ec681f3Smrg nir_iadd(b, instance_node_addr, nir_imm_int64(b, 32)), 19117ec681f3Smrg .align_mul = 64, .align_offset = 32), 19127ec681f3Smrg nir_build_load_global(b, 4, 32, 19137ec681f3Smrg nir_iadd(b, instance_node_addr, nir_imm_int64(b, 48)), 19147ec681f3Smrg .align_mul = 64, .align_offset = 48)}; 19157ec681f3Smrg nir_ssa_def *instance_id = nir_build_load_global( 19167ec681f3Smrg b, 1, 32, nir_iadd(b, instance_node_addr, nir_imm_int64(b, 88)), .align_mul = 4, 19177ec681f3Smrg .align_offset = 0); 19187ec681f3Smrg nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2); 19197ec681f3Smrg nir_ssa_def *instance_mask = nir_ushr(b, instance_and_mask, nir_imm_int(b, 24)); 19207ec681f3Smrg 19217ec681f3Smrg nir_push_if(b, 19227ec681f3Smrg nir_ieq(b, nir_iand(b, instance_mask, nir_load_var(b, vars->cull_mask)), 19237ec681f3Smrg nir_imm_int(b, 0))); 19247ec681f3Smrg nir_jump(b, nir_jump_continue); 19257ec681f3Smrg nir_pop_if(b, NULL); 19267ec681f3Smrg 19277ec681f3Smrg nir_store_var(b, trav_vars.top_stack, nir_load_var(b, trav_vars.stack), 1); 19287ec681f3Smrg nir_store_var(b, trav_vars.bvh_base, 19297ec681f3Smrg build_addr_to_node( 19307ec681f3Smrg b, nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3))), 19317ec681f3Smrg 1); 19327ec681f3Smrg nir_store_shared(b, 19337ec681f3Smrg nir_iand(b, nir_channel(b, instance_data, 0), nir_imm_int(b, 63)), 19347ec681f3Smrg nir_load_var(b, trav_vars.stack), .base = 0, .write_mask = 0x1, 19357ec681f3Smrg .align_mul = stack_entry_size, .align_offset = 0); 19367ec681f3Smrg nir_store_var(b, trav_vars.stack, 19377ec681f3Smrg nir_iadd(b, nir_load_var(b, trav_vars.stack), stack_entry_stride_def), 19387ec681f3Smrg 1); 19397ec681f3Smrg 19407ec681f3Smrg nir_store_var( 19417ec681f3Smrg b, trav_vars.origin, 19427ec681f3Smrg nir_build_vec3_mat_mult_pre(b, nir_load_var(b, vars->origin), wto_matrix), 7); 19437ec681f3Smrg nir_store_var( 19447ec681f3Smrg b, trav_vars.dir, 19457ec681f3Smrg nir_build_vec3_mat_mult(b, nir_load_var(b, vars->direction), wto_matrix, false), 19467ec681f3Smrg 7); 19477ec681f3Smrg nir_store_var(b, trav_vars.inv_dir, 19487ec681f3Smrg nir_fdiv(b, vec3ones, nir_load_var(b, trav_vars.dir)), 7); 19497ec681f3Smrg nir_store_var(b, trav_vars.custom_instance_and_mask, instance_and_mask, 1); 19507ec681f3Smrg nir_store_var(b, trav_vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), 19517ec681f3Smrg 1); 19527ec681f3Smrg nir_store_var(b, trav_vars.instance_id, instance_id, 1); 19537ec681f3Smrg nir_store_var(b, trav_vars.instance_addr, instance_node_addr, 1); 19547ec681f3Smrg } 19557ec681f3Smrg nir_pop_if(b, NULL); 19567ec681f3Smrg } 19577ec681f3Smrg nir_push_else(b, NULL); 19587ec681f3Smrg { 19597ec681f3Smrg /* box */ 19607ec681f3Smrg nir_ssa_def *result = intrinsic_result; 19617ec681f3Smrg if (!result) { 19627ec681f3Smrg /* If we didn't run the intrinsic cause the hardware didn't support it, 19637ec681f3Smrg * emulate ray/box intersection here */ 19647ec681f3Smrg result = intersect_ray_amd_software_box(device, 19657ec681f3Smrg b, bvh_node, nir_load_var(b, vars->tmax), nir_load_var(b, trav_vars.origin), 19667ec681f3Smrg nir_load_var(b, trav_vars.dir), nir_load_var(b, trav_vars.inv_dir)); 19677ec681f3Smrg } 19687ec681f3Smrg 19697ec681f3Smrg for (unsigned i = 4; i-- > 0; ) { 19707ec681f3Smrg nir_ssa_def *new_node = nir_vector_extract(b, result, nir_imm_int(b, i)); 19717ec681f3Smrg nir_push_if(b, nir_ine(b, new_node, nir_imm_int(b, 0xffffffff))); 19727ec681f3Smrg { 19737ec681f3Smrg nir_store_shared(b, new_node, nir_load_var(b, trav_vars.stack), .base = 0, 19747ec681f3Smrg .write_mask = 0x1, .align_mul = stack_entry_size, 19757ec681f3Smrg .align_offset = 0); 19767ec681f3Smrg nir_store_var( 19777ec681f3Smrg b, trav_vars.stack, 19787ec681f3Smrg nir_iadd(b, nir_load_var(b, trav_vars.stack), stack_entry_stride_def), 1); 19797ec681f3Smrg } 19807ec681f3Smrg nir_pop_if(b, NULL); 19817ec681f3Smrg } 19827ec681f3Smrg } 19837ec681f3Smrg nir_pop_if(b, NULL); 19847ec681f3Smrg } 19857ec681f3Smrg nir_push_else(b, NULL); 19867ec681f3Smrg { 19877ec681f3Smrg nir_ssa_def *result = intrinsic_result; 19887ec681f3Smrg if (!result) { 19897ec681f3Smrg /* If we didn't run the intrinsic cause the hardware didn't support it, 19907ec681f3Smrg * emulate ray/tri intersection here */ 19917ec681f3Smrg result = intersect_ray_amd_software_tri(device, 19927ec681f3Smrg b, bvh_node, nir_load_var(b, vars->tmax), nir_load_var(b, trav_vars.origin), 19937ec681f3Smrg nir_load_var(b, trav_vars.dir), nir_load_var(b, trav_vars.inv_dir)); 19947ec681f3Smrg } 19957ec681f3Smrg insert_traversal_triangle_case(device, pCreateInfo, b, result, vars, &trav_vars, bvh_node); 19967ec681f3Smrg } 19977ec681f3Smrg nir_pop_if(b, NULL); 19987ec681f3Smrg 19997ec681f3Smrg nir_pop_loop(b, NULL); 20007ec681f3Smrg } 20017ec681f3Smrg nir_pop_if(b, NULL); 20027ec681f3Smrg 20037ec681f3Smrg /* should_return is set if we had a hit but we won't be calling the closest hit shader and hence 20047ec681f3Smrg * need to return immediately to the calling shader. */ 20057ec681f3Smrg nir_push_if(b, nir_load_var(b, trav_vars.should_return)); 20067ec681f3Smrg { 20077ec681f3Smrg insert_rt_return(b, vars); 20087ec681f3Smrg } 20097ec681f3Smrg nir_push_else(b, NULL); 20107ec681f3Smrg { 20117ec681f3Smrg /* Only load the miss shader if we actually miss, which we determining by not having set 20127ec681f3Smrg * a closest hit shader. It is valid to not specify an SBT pointer for miss shaders if none 20137ec681f3Smrg * of the rays miss. */ 20147ec681f3Smrg nir_push_if(b, nir_ieq(b, nir_load_var(b, vars->idx), nir_imm_int(b, 0))); 20157ec681f3Smrg { 20167ec681f3Smrg load_sbt_entry(b, vars, nir_load_var(b, vars->miss_index), SBT_MISS, 0); 20177ec681f3Smrg } 20187ec681f3Smrg nir_pop_if(b, NULL); 20197ec681f3Smrg } 20207ec681f3Smrg nir_pop_if(b, NULL); 20217ec681f3Smrg} 20227ec681f3Smrg 20237ec681f3Smrgstatic unsigned 20247ec681f3Smrgcompute_rt_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 20257ec681f3Smrg const struct radv_pipeline_shader_stack_size *stack_sizes) 20267ec681f3Smrg{ 20277ec681f3Smrg unsigned raygen_size = 0; 20287ec681f3Smrg unsigned callable_size = 0; 20297ec681f3Smrg unsigned chit_size = 0; 20307ec681f3Smrg unsigned miss_size = 0; 20317ec681f3Smrg unsigned non_recursive_size = 0; 20327ec681f3Smrg 20337ec681f3Smrg for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) { 20347ec681f3Smrg non_recursive_size = MAX2(stack_sizes[i].non_recursive_size, non_recursive_size); 20357ec681f3Smrg 20367ec681f3Smrg const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i]; 20377ec681f3Smrg uint32_t shader_id = VK_SHADER_UNUSED_KHR; 20387ec681f3Smrg unsigned size = stack_sizes[i].recursive_size; 20397ec681f3Smrg 20407ec681f3Smrg switch (group_info->type) { 20417ec681f3Smrg case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: 20427ec681f3Smrg shader_id = group_info->generalShader; 20437ec681f3Smrg break; 20447ec681f3Smrg case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: 20457ec681f3Smrg case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: 20467ec681f3Smrg shader_id = group_info->closestHitShader; 20477ec681f3Smrg break; 20487ec681f3Smrg default: 20497ec681f3Smrg break; 20507ec681f3Smrg } 20517ec681f3Smrg if (shader_id == VK_SHADER_UNUSED_KHR) 20527ec681f3Smrg continue; 20537ec681f3Smrg 20547ec681f3Smrg const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id]; 20557ec681f3Smrg switch (stage->stage) { 20567ec681f3Smrg case VK_SHADER_STAGE_RAYGEN_BIT_KHR: 20577ec681f3Smrg raygen_size = MAX2(raygen_size, size); 20587ec681f3Smrg break; 20597ec681f3Smrg case VK_SHADER_STAGE_MISS_BIT_KHR: 20607ec681f3Smrg miss_size = MAX2(miss_size, size); 20617ec681f3Smrg break; 20627ec681f3Smrg case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: 20637ec681f3Smrg chit_size = MAX2(chit_size, size); 20647ec681f3Smrg break; 20657ec681f3Smrg case VK_SHADER_STAGE_CALLABLE_BIT_KHR: 20667ec681f3Smrg callable_size = MAX2(callable_size, size); 20677ec681f3Smrg break; 20687ec681f3Smrg default: 20697ec681f3Smrg unreachable("Invalid stage type in RT shader"); 20707ec681f3Smrg } 20717ec681f3Smrg } 20727ec681f3Smrg return raygen_size + 20737ec681f3Smrg MIN2(pCreateInfo->maxPipelineRayRecursionDepth, 1) * 20747ec681f3Smrg MAX2(MAX2(chit_size, miss_size), non_recursive_size) + 20757ec681f3Smrg MAX2(0, (int)(pCreateInfo->maxPipelineRayRecursionDepth) - 1) * 20767ec681f3Smrg MAX2(chit_size, miss_size) + 20777ec681f3Smrg 2 * callable_size; 20787ec681f3Smrg} 20797ec681f3Smrg 20807ec681f3Smrgbool 20817ec681f3Smrgradv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo) 20827ec681f3Smrg{ 20837ec681f3Smrg if (!pCreateInfo->pDynamicState) 20847ec681f3Smrg return false; 20857ec681f3Smrg 20867ec681f3Smrg for (unsigned i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; ++i) { 20877ec681f3Smrg if (pCreateInfo->pDynamicState->pDynamicStates[i] == 20887ec681f3Smrg VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR) 20897ec681f3Smrg return true; 20907ec681f3Smrg } 20917ec681f3Smrg 20927ec681f3Smrg return false; 20937ec681f3Smrg} 20947ec681f3Smrg 20957ec681f3Smrgstatic nir_shader * 20967ec681f3Smrgcreate_rt_shader(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 20977ec681f3Smrg struct radv_pipeline_shader_stack_size *stack_sizes) 20987ec681f3Smrg{ 20997ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline_layout, layout, pCreateInfo->layout); 21007ec681f3Smrg struct radv_pipeline_key key; 21017ec681f3Smrg memset(&key, 0, sizeof(key)); 21027ec681f3Smrg 21037ec681f3Smrg nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "rt_combined"); 21047ec681f3Smrg 21057ec681f3Smrg b.shader->info.workgroup_size[0] = 8; 21067ec681f3Smrg b.shader->info.workgroup_size[1] = 8; 21077ec681f3Smrg b.shader->info.workgroup_size[2] = 1; 21087ec681f3Smrg 21097ec681f3Smrg struct rt_variables vars = create_rt_variables(b.shader, stack_sizes); 21107ec681f3Smrg load_sbt_entry(&b, &vars, nir_imm_int(&b, 0), SBT_RAYGEN, 0); 21117ec681f3Smrg nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, 0), 0x1); 21127ec681f3Smrg 21137ec681f3Smrg nir_store_var(&b, vars.main_loop_case_visited, nir_imm_bool(&b, true), 1); 21147ec681f3Smrg 21157ec681f3Smrg nir_loop *loop = nir_push_loop(&b); 21167ec681f3Smrg 21177ec681f3Smrg nir_push_if(&b, nir_ior(&b, nir_ieq(&b, nir_load_var(&b, vars.idx), nir_imm_int(&b, 0)), 21187ec681f3Smrg nir_ine(&b, nir_load_var(&b, vars.main_loop_case_visited), 21197ec681f3Smrg nir_imm_bool(&b, true)))); 21207ec681f3Smrg nir_jump(&b, nir_jump_break); 21217ec681f3Smrg nir_pop_if(&b, NULL); 21227ec681f3Smrg 21237ec681f3Smrg nir_store_var(&b, vars.main_loop_case_visited, nir_imm_bool(&b, false), 1); 21247ec681f3Smrg 21257ec681f3Smrg nir_push_if(&b, nir_ieq(&b, nir_load_var(&b, vars.idx), nir_imm_int(&b, 1))); 21267ec681f3Smrg nir_store_var(&b, vars.main_loop_case_visited, nir_imm_bool(&b, true), 1); 21277ec681f3Smrg insert_traversal(device, pCreateInfo, &b, &vars); 21287ec681f3Smrg nir_pop_if(&b, NULL); 21297ec681f3Smrg 21307ec681f3Smrg nir_ssa_def *idx = nir_load_var(&b, vars.idx); 21317ec681f3Smrg 21327ec681f3Smrg /* We do a trick with the indexing of the resume shaders so that the first 21337ec681f3Smrg * shader of group x always gets id x and the resume shader ids then come after 21347ec681f3Smrg * groupCount. This makes the shadergroup handles independent of compilation. */ 21357ec681f3Smrg unsigned call_idx_base = pCreateInfo->groupCount + 1; 21367ec681f3Smrg for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) { 21377ec681f3Smrg const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i]; 21387ec681f3Smrg uint32_t shader_id = VK_SHADER_UNUSED_KHR; 21397ec681f3Smrg 21407ec681f3Smrg switch (group_info->type) { 21417ec681f3Smrg case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: 21427ec681f3Smrg shader_id = group_info->generalShader; 21437ec681f3Smrg break; 21447ec681f3Smrg case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: 21457ec681f3Smrg case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: 21467ec681f3Smrg shader_id = group_info->closestHitShader; 21477ec681f3Smrg break; 21487ec681f3Smrg default: 21497ec681f3Smrg break; 21507ec681f3Smrg } 21517ec681f3Smrg if (shader_id == VK_SHADER_UNUSED_KHR) 21527ec681f3Smrg continue; 21537ec681f3Smrg 21547ec681f3Smrg const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[shader_id]; 21557ec681f3Smrg nir_shader *nir_stage = parse_rt_stage(device, layout, stage); 21567ec681f3Smrg 21577ec681f3Smrg b.shader->options = nir_stage->options; 21587ec681f3Smrg 21597ec681f3Smrg uint32_t num_resume_shaders = 0; 21607ec681f3Smrg nir_shader **resume_shaders = NULL; 21617ec681f3Smrg nir_lower_shader_calls(nir_stage, nir_address_format_32bit_offset, 16, &resume_shaders, 21627ec681f3Smrg &num_resume_shaders, nir_stage); 21637ec681f3Smrg 21647ec681f3Smrg vars.group_idx = i; 21657ec681f3Smrg insert_rt_case(&b, nir_stage, &vars, idx, call_idx_base, i + 2); 21667ec681f3Smrg for (unsigned j = 0; j < num_resume_shaders; ++j) { 21677ec681f3Smrg insert_rt_case(&b, resume_shaders[j], &vars, idx, call_idx_base, call_idx_base + 1 + j); 21687ec681f3Smrg } 21697ec681f3Smrg call_idx_base += num_resume_shaders; 21707ec681f3Smrg } 21717ec681f3Smrg 21727ec681f3Smrg nir_pop_loop(&b, loop); 21737ec681f3Smrg 21747ec681f3Smrg if (radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo)) { 21757ec681f3Smrg /* Put something so scratch gets enabled in the shader. */ 21767ec681f3Smrg b.shader->scratch_size = 16; 21777ec681f3Smrg } else 21787ec681f3Smrg b.shader->scratch_size = compute_rt_stack_size(pCreateInfo, stack_sizes); 21797ec681f3Smrg 21807ec681f3Smrg /* Deal with all the inline functions. */ 21817ec681f3Smrg nir_index_ssa_defs(nir_shader_get_entrypoint(b.shader)); 21827ec681f3Smrg nir_metadata_preserve(nir_shader_get_entrypoint(b.shader), nir_metadata_none); 21837ec681f3Smrg 21847ec681f3Smrg return b.shader; 21857ec681f3Smrg} 21867ec681f3Smrg 21877ec681f3Smrgstatic VkResult 21887ec681f3Smrgradv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, 21897ec681f3Smrg const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, 21907ec681f3Smrg const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) 21917ec681f3Smrg{ 21927ec681f3Smrg RADV_FROM_HANDLE(radv_device, device, _device); 21937ec681f3Smrg VkResult result; 21947ec681f3Smrg struct radv_pipeline *pipeline = NULL; 21957ec681f3Smrg struct radv_pipeline_shader_stack_size *stack_sizes = NULL; 21967ec681f3Smrg uint8_t hash[20]; 21977ec681f3Smrg nir_shader *shader = NULL; 21987ec681f3Smrg bool keep_statistic_info = 21997ec681f3Smrg (pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) || 22007ec681f3Smrg (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || device->keep_shader_info; 22017ec681f3Smrg 22027ec681f3Smrg if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) 22037ec681f3Smrg return radv_rt_pipeline_library_create(_device, _cache, pCreateInfo, pAllocator, pPipeline); 22047ec681f3Smrg 22057ec681f3Smrg VkRayTracingPipelineCreateInfoKHR local_create_info = 22067ec681f3Smrg radv_create_merged_rt_create_info(pCreateInfo); 22077ec681f3Smrg if (!local_create_info.pStages || !local_create_info.pGroups) { 22087ec681f3Smrg result = VK_ERROR_OUT_OF_HOST_MEMORY; 22097ec681f3Smrg goto fail; 22107ec681f3Smrg } 22117ec681f3Smrg 22127ec681f3Smrg radv_hash_rt_shaders(hash, &local_create_info, radv_get_hash_flags(device, keep_statistic_info)); 22137ec681f3Smrg struct vk_shader_module module = {.base.type = VK_OBJECT_TYPE_SHADER_MODULE}; 22147ec681f3Smrg 22157ec681f3Smrg VkComputePipelineCreateInfo compute_info = { 22167ec681f3Smrg .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 22177ec681f3Smrg .pNext = NULL, 22187ec681f3Smrg .flags = pCreateInfo->flags | VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT, 22197ec681f3Smrg .stage = 22207ec681f3Smrg { 22217ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 22227ec681f3Smrg .stage = VK_SHADER_STAGE_COMPUTE_BIT, 22237ec681f3Smrg .module = vk_shader_module_to_handle(&module), 22247ec681f3Smrg .pName = "main", 22257ec681f3Smrg }, 22267ec681f3Smrg .layout = pCreateInfo->layout, 22277ec681f3Smrg }; 22287ec681f3Smrg 22297ec681f3Smrg /* First check if we can get things from the cache before we take the expensive step of 22307ec681f3Smrg * generating the nir. */ 22317ec681f3Smrg result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, hash, 22327ec681f3Smrg stack_sizes, local_create_info.groupCount, pPipeline); 22337ec681f3Smrg if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT) { 22347ec681f3Smrg stack_sizes = calloc(sizeof(*stack_sizes), local_create_info.groupCount); 22357ec681f3Smrg if (!stack_sizes) { 22367ec681f3Smrg result = VK_ERROR_OUT_OF_HOST_MEMORY; 22377ec681f3Smrg goto fail; 22387ec681f3Smrg } 22397ec681f3Smrg 22407ec681f3Smrg shader = create_rt_shader(device, &local_create_info, stack_sizes); 22417ec681f3Smrg module.nir = shader; 22427ec681f3Smrg compute_info.flags = pCreateInfo->flags; 22437ec681f3Smrg result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, hash, 22447ec681f3Smrg stack_sizes, local_create_info.groupCount, pPipeline); 22457ec681f3Smrg stack_sizes = NULL; 22467ec681f3Smrg 22477ec681f3Smrg if (result != VK_SUCCESS) 22487ec681f3Smrg goto shader_fail; 22497ec681f3Smrg } 22507ec681f3Smrg pipeline = radv_pipeline_from_handle(*pPipeline); 22517ec681f3Smrg 22527ec681f3Smrg pipeline->compute.rt_group_handles = 22537ec681f3Smrg calloc(sizeof(*pipeline->compute.rt_group_handles), local_create_info.groupCount); 22547ec681f3Smrg if (!pipeline->compute.rt_group_handles) { 22557ec681f3Smrg result = VK_ERROR_OUT_OF_HOST_MEMORY; 22567ec681f3Smrg goto shader_fail; 22577ec681f3Smrg } 22587ec681f3Smrg 22597ec681f3Smrg pipeline->compute.dynamic_stack_size = radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo); 22607ec681f3Smrg 22617ec681f3Smrg for (unsigned i = 0; i < local_create_info.groupCount; ++i) { 22627ec681f3Smrg const VkRayTracingShaderGroupCreateInfoKHR *group_info = &local_create_info.pGroups[i]; 22637ec681f3Smrg switch (group_info->type) { 22647ec681f3Smrg case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: 22657ec681f3Smrg if (group_info->generalShader != VK_SHADER_UNUSED_KHR) 22667ec681f3Smrg pipeline->compute.rt_group_handles[i].handles[0] = i + 2; 22677ec681f3Smrg break; 22687ec681f3Smrg case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: 22697ec681f3Smrg if (group_info->intersectionShader != VK_SHADER_UNUSED_KHR) 22707ec681f3Smrg pipeline->compute.rt_group_handles[i].handles[1] = i + 2; 22717ec681f3Smrg FALLTHROUGH; 22727ec681f3Smrg case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: 22737ec681f3Smrg if (group_info->closestHitShader != VK_SHADER_UNUSED_KHR) 22747ec681f3Smrg pipeline->compute.rt_group_handles[i].handles[0] = i + 2; 22757ec681f3Smrg if (group_info->anyHitShader != VK_SHADER_UNUSED_KHR) 22767ec681f3Smrg pipeline->compute.rt_group_handles[i].handles[1] = i + 2; 22777ec681f3Smrg break; 22787ec681f3Smrg case VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR: 22797ec681f3Smrg unreachable("VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR"); 22807ec681f3Smrg } 22817ec681f3Smrg } 22827ec681f3Smrg 22837ec681f3Smrgshader_fail: 22847ec681f3Smrg if (result != VK_SUCCESS && pipeline) 22857ec681f3Smrg radv_pipeline_destroy(device, pipeline, pAllocator); 22867ec681f3Smrg ralloc_free(shader); 22877ec681f3Smrgfail: 22887ec681f3Smrg free((void *)local_create_info.pGroups); 22897ec681f3Smrg free((void *)local_create_info.pStages); 22907ec681f3Smrg free(stack_sizes); 22917ec681f3Smrg return result; 22927ec681f3Smrg} 22937ec681f3Smrg 22947ec681f3SmrgVkResult 22957ec681f3Smrgradv_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation, 22967ec681f3Smrg VkPipelineCache pipelineCache, uint32_t count, 22977ec681f3Smrg const VkRayTracingPipelineCreateInfoKHR *pCreateInfos, 22987ec681f3Smrg const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) 22997ec681f3Smrg{ 23007ec681f3Smrg VkResult result = VK_SUCCESS; 23017ec681f3Smrg 23027ec681f3Smrg unsigned i = 0; 23037ec681f3Smrg for (; i < count; i++) { 23047ec681f3Smrg VkResult r; 23057ec681f3Smrg r = radv_rt_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, 23067ec681f3Smrg &pPipelines[i]); 23077ec681f3Smrg if (r != VK_SUCCESS) { 23087ec681f3Smrg result = r; 23097ec681f3Smrg pPipelines[i] = VK_NULL_HANDLE; 23107ec681f3Smrg 23117ec681f3Smrg if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) 23127ec681f3Smrg break; 23137ec681f3Smrg } 23147ec681f3Smrg } 23157ec681f3Smrg 23167ec681f3Smrg for (; i < count; ++i) 23177ec681f3Smrg pPipelines[i] = VK_NULL_HANDLE; 23187ec681f3Smrg 23197ec681f3Smrg return result; 23207ec681f3Smrg} 23217ec681f3Smrg 23227ec681f3SmrgVkResult 23237ec681f3Smrgradv_GetRayTracingShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, uint32_t firstGroup, 23247ec681f3Smrg uint32_t groupCount, size_t dataSize, void *pData) 23257ec681f3Smrg{ 23267ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); 23277ec681f3Smrg char *data = pData; 23287ec681f3Smrg 23297ec681f3Smrg STATIC_ASSERT(sizeof(*pipeline->compute.rt_group_handles) <= RADV_RT_HANDLE_SIZE); 23307ec681f3Smrg 23317ec681f3Smrg memset(data, 0, groupCount * RADV_RT_HANDLE_SIZE); 23327ec681f3Smrg 23337ec681f3Smrg for (uint32_t i = 0; i < groupCount; ++i) { 23347ec681f3Smrg memcpy(data + i * RADV_RT_HANDLE_SIZE, &pipeline->compute.rt_group_handles[firstGroup + i], 23357ec681f3Smrg sizeof(*pipeline->compute.rt_group_handles)); 23367ec681f3Smrg } 23377ec681f3Smrg 23387ec681f3Smrg return VK_SUCCESS; 23397ec681f3Smrg} 23407ec681f3Smrg 23417ec681f3SmrgVkDeviceSize 23427ec681f3Smrgradv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device, VkPipeline _pipeline, uint32_t group, 23437ec681f3Smrg VkShaderGroupShaderKHR groupShader) 23447ec681f3Smrg{ 23457ec681f3Smrg RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); 23467ec681f3Smrg const struct radv_pipeline_shader_stack_size *stack_size = 23477ec681f3Smrg &pipeline->compute.rt_stack_sizes[group]; 23487ec681f3Smrg 23497ec681f3Smrg if (groupShader == VK_SHADER_GROUP_SHADER_ANY_HIT_KHR || 23507ec681f3Smrg groupShader == VK_SHADER_GROUP_SHADER_INTERSECTION_KHR) 23517ec681f3Smrg return stack_size->non_recursive_size; 23527ec681f3Smrg else 23537ec681f3Smrg return stack_size->recursive_size; 23547ec681f3Smrg} 2355