1/* 2 * Copyright © 2020 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_nir_rt.h" 25#include "brw_nir_rt_builder.h" 26#include "nir_phi_builder.h" 27 28/** Insert the appropriate return instruction at the end of the shader */ 29void 30brw_nir_lower_shader_returns(nir_shader *shader) 31{ 32 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 33 34 /* Reserve scratch space at the start of the shader's per-thread scratch 35 * space for the return BINDLESS_SHADER_RECORD address and data payload. 36 * When a shader is called, the calling shader will write the return BSR 37 * address in this region of the callee's scratch space. 38 * 39 * We could also put it at the end of the caller's scratch space. However, 40 * doing this way means that a shader never accesses its caller's scratch 41 * space unless given an explicit pointer (such as for ray payloads). It 42 * also makes computing the address easier given that we want to apply an 43 * alignment to the scratch offset to ensure we can make alignment 44 * assumptions in the called shader. 45 * 46 * This isn't needed for ray-gen shaders because they end the thread and 47 * never return to the calling trampoline shader. 48 */ 49 assert(shader->scratch_size == 0); 50 if (shader->info.stage != MESA_SHADER_RAYGEN) 51 shader->scratch_size = BRW_BTD_STACK_CALLEE_DATA_SIZE; 52 53 nir_builder b; 54 nir_builder_init(&b, impl); 55 56 set_foreach(impl->end_block->predecessors, block_entry) { 57 struct nir_block *block = (void *)block_entry->key; 58 b.cursor = nir_after_block_before_jump(block); 59 60 switch (shader->info.stage) { 61 case MESA_SHADER_RAYGEN: 62 /* A raygen shader is always the root of the shader call tree. When 63 * it ends, we retire the bindless stack ID and no further shaders 64 * will be executed. 65 */ 66 brw_nir_btd_retire(&b); 67 break; 68 69 case MESA_SHADER_ANY_HIT: 70 /* The default action of an any-hit shader is to accept the ray 71 * intersection. 72 */ 73 nir_accept_ray_intersection(&b); 74 break; 75 76 case MESA_SHADER_CALLABLE: 77 case MESA_SHADER_MISS: 78 case MESA_SHADER_CLOSEST_HIT: 79 /* Callable, miss, and closest-hit shaders don't take any special 80 * action at the end. They simply return back to the previous shader 81 * in the call stack. 82 */ 83 brw_nir_btd_return(&b); 84 break; 85 86 case MESA_SHADER_INTERSECTION: 87 /* This will be handled by brw_nir_lower_intersection_shader */ 88 break; 89 90 default: 91 unreachable("Invalid callable shader stage"); 92 } 93 94 assert(impl->end_block->predecessors->entries == 1); 95 break; 96 } 97 98 nir_metadata_preserve(impl, nir_metadata_block_index | 99 nir_metadata_dominance); 100} 101 102static void 103store_resume_addr(nir_builder *b, nir_intrinsic_instr *call) 104{ 105 uint32_t call_idx = nir_intrinsic_call_idx(call); 106 uint32_t offset = nir_intrinsic_stack_size(call); 107 108 /* First thing on the called shader's stack is the resume address 109 * followed by a pointer to the payload. 110 */ 111 nir_ssa_def *resume_record_addr = 112 nir_iadd_imm(b, nir_load_btd_resume_sbt_addr_intel(b), 113 call_idx * BRW_BTD_RESUME_SBT_STRIDE); 114 /* By the time we get here, any remaining shader/function memory 115 * pointers have been lowered to SSA values. 116 */ 117 assert(nir_get_shader_call_payload_src(call)->is_ssa); 118 nir_ssa_def *payload_addr = 119 nir_get_shader_call_payload_src(call)->ssa; 120 brw_nir_rt_store_scratch(b, offset, BRW_BTD_STACK_ALIGN, 121 nir_vec2(b, resume_record_addr, payload_addr), 122 0xf /* write_mask */); 123 124 nir_btd_stack_push_intel(b, offset); 125} 126 127static bool 128lower_shader_calls_instr(struct nir_builder *b, nir_instr *instr, void *data) 129{ 130 if (instr->type != nir_instr_type_intrinsic) 131 return false; 132 133 /* Leave nir_intrinsic_rt_resume to be lowered by 134 * brw_nir_lower_rt_intrinsics() 135 */ 136 nir_intrinsic_instr *call = nir_instr_as_intrinsic(instr); 137 138 switch (call->intrinsic) { 139 case nir_intrinsic_rt_trace_ray: { 140 b->cursor = nir_instr_remove(instr); 141 142 store_resume_addr(b, call); 143 144 nir_ssa_def *as_addr = call->src[0].ssa; 145 nir_ssa_def *ray_flags = call->src[1].ssa; 146 /* From the SPIR-V spec: 147 * 148 * "Only the 8 least-significant bits of Cull Mask are used by this 149 * instruction - other bits are ignored. 150 * 151 * Only the 4 least-significant bits of SBT Offset and SBT Stride are 152 * used by this instruction - other bits are ignored. 153 * 154 * Only the 16 least-significant bits of Miss Index are used by this 155 * instruction - other bits are ignored." 156 */ 157 nir_ssa_def *cull_mask = nir_iand_imm(b, call->src[2].ssa, 0xff); 158 nir_ssa_def *sbt_offset = nir_iand_imm(b, call->src[3].ssa, 0xf); 159 nir_ssa_def *sbt_stride = nir_iand_imm(b, call->src[4].ssa, 0xf); 160 nir_ssa_def *miss_index = nir_iand_imm(b, call->src[5].ssa, 0xffff); 161 nir_ssa_def *ray_orig = call->src[6].ssa; 162 nir_ssa_def *ray_t_min = call->src[7].ssa; 163 nir_ssa_def *ray_dir = call->src[8].ssa; 164 nir_ssa_def *ray_t_max = call->src[9].ssa; 165 166 /* The hardware packet takes the address to the root node in the 167 * acceleration structure, not the acceleration structure itself. To 168 * find that, we have to read the root node offset from the acceleration 169 * structure which is the first QWord. 170 */ 171 nir_ssa_def *root_node_ptr = 172 nir_iadd(b, as_addr, nir_load_global(b, as_addr, 256, 1, 64)); 173 174 /* The hardware packet requires an address to the first element of the 175 * hit SBT. 176 * 177 * In order to calculate this, we must multiply the "SBT Offset" 178 * provided to OpTraceRay by the SBT stride provided for the hit SBT in 179 * the call to vkCmdTraceRay() and add that to the base address of the 180 * hit SBT. This stride is not to be confused with the "SBT Stride" 181 * provided to OpTraceRay which is in units of this stride. It's a 182 * rather terrible overload of the word "stride". The hardware docs 183 * calls the SPIR-V stride value the "shader index multiplier" which is 184 * a much more sane name. 185 */ 186 nir_ssa_def *hit_sbt_stride_B = 187 nir_load_ray_hit_sbt_stride_intel(b); 188 nir_ssa_def *hit_sbt_offset_B = 189 nir_umul_32x16(b, sbt_offset, nir_u2u32(b, hit_sbt_stride_B)); 190 nir_ssa_def *hit_sbt_addr = 191 nir_iadd(b, nir_load_ray_hit_sbt_addr_intel(b), 192 nir_u2u64(b, hit_sbt_offset_B)); 193 194 /* The hardware packet takes an address to the miss BSR. */ 195 nir_ssa_def *miss_sbt_stride_B = 196 nir_load_ray_miss_sbt_stride_intel(b); 197 nir_ssa_def *miss_sbt_offset_B = 198 nir_umul_32x16(b, miss_index, nir_u2u32(b, miss_sbt_stride_B)); 199 nir_ssa_def *miss_sbt_addr = 200 nir_iadd(b, nir_load_ray_miss_sbt_addr_intel(b), 201 nir_u2u64(b, miss_sbt_offset_B)); 202 203 struct brw_nir_rt_mem_ray_defs ray_defs = { 204 .root_node_ptr = root_node_ptr, 205 .ray_flags = nir_u2u16(b, ray_flags), 206 .ray_mask = cull_mask, 207 .hit_group_sr_base_ptr = hit_sbt_addr, 208 .hit_group_sr_stride = nir_u2u16(b, hit_sbt_stride_B), 209 .miss_sr_ptr = miss_sbt_addr, 210 .orig = ray_orig, 211 .t_near = ray_t_min, 212 .dir = ray_dir, 213 .t_far = ray_t_max, 214 .shader_index_multiplier = sbt_stride, 215 }; 216 brw_nir_rt_store_mem_ray(b, &ray_defs, BRW_RT_BVH_LEVEL_WORLD); 217 nir_trace_ray_initial_intel(b); 218 return true; 219 } 220 221 case nir_intrinsic_rt_execute_callable: { 222 b->cursor = nir_instr_remove(instr); 223 224 store_resume_addr(b, call); 225 226 nir_ssa_def *sbt_offset32 = 227 nir_imul(b, call->src[0].ssa, 228 nir_u2u32(b, nir_load_callable_sbt_stride_intel(b))); 229 nir_ssa_def *sbt_addr = 230 nir_iadd(b, nir_load_callable_sbt_addr_intel(b), 231 nir_u2u64(b, sbt_offset32)); 232 brw_nir_btd_spawn(b, sbt_addr); 233 return true; 234 } 235 236 default: 237 return false; 238 } 239} 240 241bool 242brw_nir_lower_shader_calls(nir_shader *shader) 243{ 244 return nir_shader_instructions_pass(shader, 245 lower_shader_calls_instr, 246 nir_metadata_block_index | 247 nir_metadata_dominance, 248 NULL); 249} 250 251/** Creates a trivial return shader 252 * 253 * This is a callable shader that doesn't really do anything. It just loads 254 * the resume address from the stack and does a return. 255 */ 256nir_shader * 257brw_nir_create_trivial_return_shader(const struct brw_compiler *compiler, 258 void *mem_ctx) 259{ 260 const nir_shader_compiler_options *nir_options = 261 compiler->glsl_compiler_options[MESA_SHADER_CALLABLE].NirOptions; 262 263 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_CALLABLE, 264 nir_options, 265 "RT Trivial Return"); 266 ralloc_steal(mem_ctx, b.shader); 267 nir_shader *nir = b.shader; 268 269 NIR_PASS_V(nir, brw_nir_lower_shader_returns); 270 271 return nir; 272} 273