17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2020 Intel Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#ifndef BRW_RT_H
257ec681f3Smrg#define BRW_RT_H
267ec681f3Smrg
277ec681f3Smrg#ifdef __cplusplus
287ec681f3Smrgextern "C" {
297ec681f3Smrg#endif
307ec681f3Smrg
317ec681f3Smrg/** Vulkan defines shaderGroupHandleSize = 32 */
327ec681f3Smrg#define BRW_RT_SBT_HANDLE_SIZE 32
337ec681f3Smrg
347ec681f3Smrg/** Offset after the RT dispatch globals at which "push" constants live */
357ec681f3Smrg#define BRW_RT_PUSH_CONST_OFFSET 128
367ec681f3Smrg
377ec681f3Smrg/** Stride of the resume SBT */
387ec681f3Smrg#define BRW_BTD_RESUME_SBT_STRIDE 8
397ec681f3Smrg
407ec681f3Smrg/* Vulkan always uses exactly two levels of BVH: world and object.  At the API
417ec681f3Smrg * level, these are referred to as top and bottom.
427ec681f3Smrg */
437ec681f3Smrgenum brw_rt_bvh_level {
447ec681f3Smrg   BRW_RT_BVH_LEVEL_WORLD = 0,
457ec681f3Smrg   BRW_RT_BVH_LEVEL_OBJECT = 1,
467ec681f3Smrg};
477ec681f3Smrg#define BRW_RT_MAX_BVH_LEVELS 2
487ec681f3Smrg
497ec681f3Smrgenum brw_rt_bvh_node_type {
507ec681f3Smrg   BRW_RT_BVH_NODE_TYPE_INTERNAL = 0,
517ec681f3Smrg   BRW_RT_BVH_NODE_TYPE_INSTANCE = 1,
527ec681f3Smrg   BRW_RT_BVH_NODE_TYPE_PROCEDURAL = 3,
537ec681f3Smrg   BRW_RT_BVH_NODE_TYPE_QUAD = 4,
547ec681f3Smrg};
557ec681f3Smrg
567ec681f3Smrg/** HitKind values returned for triangle geometry
577ec681f3Smrg *
587ec681f3Smrg * This enum must match the SPIR-V enum.
597ec681f3Smrg */
607ec681f3Smrgenum brw_rt_hit_kind {
617ec681f3Smrg   BRW_RT_HIT_KIND_FRONT_FACE = 0xfe,
627ec681f3Smrg   BRW_RT_HIT_KIND_BACK_FACE = 0xff,
637ec681f3Smrg};
647ec681f3Smrg
657ec681f3Smrg/** Ray flags
667ec681f3Smrg *
677ec681f3Smrg * This enum must match the SPIR-V RayFlags enum.
687ec681f3Smrg */
697ec681f3Smrgenum brw_rt_ray_flags {
707ec681f3Smrg   BRW_RT_RAY_FLAG_FORCE_OPAQUE                    = 0x01,
717ec681f3Smrg   BRW_RT_RAY_FLAG_FORCE_NON_OPAQUE                = 0x02,
727ec681f3Smrg   BRW_RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT          = 0x04,
737ec681f3Smrg   BRW_RT_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER         = 0x08,
747ec681f3Smrg   BRW_RT_RAY_FLAG_CULL_BACK_FACING_TRIANGLES      = 0x10,
757ec681f3Smrg   BRW_RT_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES     = 0x20,
767ec681f3Smrg   BRW_RT_RAY_FLAG_CULL_OPAQUE                     = 0x40,
777ec681f3Smrg   BRW_RT_RAY_FLAG_CULL_NON_OPAQUE                 = 0x80,
787ec681f3Smrg   BRW_RT_RAY_FLAG_SKIP_TRIANGLES                  = 0x100,
797ec681f3Smrg   BRW_RT_RAY_FLAG_SKIP_AABBS                      = 0x200,
807ec681f3Smrg};
817ec681f3Smrg
827ec681f3Smrgstruct brw_rt_scratch_layout {
837ec681f3Smrg   /** Number of stack IDs per DSS */
847ec681f3Smrg   uint32_t stack_ids_per_dss;
857ec681f3Smrg
867ec681f3Smrg   /** Start offset (in bytes) of the hardware MemRay stack */
877ec681f3Smrg   uint32_t ray_stack_start;
887ec681f3Smrg
897ec681f3Smrg   /** Stride (in bytes) of the hardware MemRay stack */
907ec681f3Smrg   uint32_t ray_stack_stride;
917ec681f3Smrg
927ec681f3Smrg   /** Start offset (in bytes) of the SW stacks */
937ec681f3Smrg   uint64_t sw_stack_start;
947ec681f3Smrg
957ec681f3Smrg   /** Size (in bytes) of the SW stack for a single shader invocation */
967ec681f3Smrg   uint32_t sw_stack_size;
977ec681f3Smrg
987ec681f3Smrg   /** Total size (in bytes) of the RT scratch memory area */
997ec681f3Smrg   uint64_t total_size;
1007ec681f3Smrg};
1017ec681f3Smrg
1027ec681f3Smrg/** Parameters passed to the raygen trampoline shader
1037ec681f3Smrg *
1047ec681f3Smrg * This struct is carefully construected to be 32B and must be passed to the
1057ec681f3Smrg * raygen trampoline shader as as inline constant data.
1067ec681f3Smrg */
1077ec681f3Smrgstruct brw_rt_raygen_trampoline_params {
1087ec681f3Smrg   /** The GPU address of the RT_DISPATCH_GLOBALS */
1097ec681f3Smrg   uint64_t rt_disp_globals_addr;
1107ec681f3Smrg
1117ec681f3Smrg   /** The GPU address of the BINDLESS_SHADER_RECORD for the raygen shader */
1127ec681f3Smrg   uint64_t raygen_bsr_addr;
1137ec681f3Smrg
1147ec681f3Smrg   /** 1 if this is an indirect dispatch, 0 otherwise */
1157ec681f3Smrg   uint8_t is_indirect;
1167ec681f3Smrg
1177ec681f3Smrg   /** The integer log2 of the local group size
1187ec681f3Smrg    *
1197ec681f3Smrg    * Ray-tracing shaders don't have a concept of local vs. global workgroup
1207ec681f3Smrg    * size.  They only have a single 3D launch size.  The raygen trampoline
1217ec681f3Smrg    * shader is always dispatched with a local workgroup size equal to the
1227ec681f3Smrg    * SIMD width but the shape of the local workgroup is determined at
1237ec681f3Smrg    * dispatch time based on the shape of the launch and passed to the
1247ec681f3Smrg    * trampoline via this field.  (There's no sense having a Z dimension on
1257ec681f3Smrg    * the local workgroup if the launch is 2D.)
1267ec681f3Smrg    *
1277ec681f3Smrg    * We use the integer log2 of the size because there's no point in
1287ec681f3Smrg    * non-power-of-two sizes and  shifts are cheaper than division.
1297ec681f3Smrg    */
1307ec681f3Smrg   uint8_t local_group_size_log2[3];
1317ec681f3Smrg
1327ec681f3Smrg   uint32_t pad[3];
1337ec681f3Smrg};
1347ec681f3Smrg
1357ec681f3Smrg/** Size of the "hot zone" in bytes
1367ec681f3Smrg *
1377ec681f3Smrg * The hot zone is a SW-defined data structure which is a single uvec4
1387ec681f3Smrg * containing two bits of information:
1397ec681f3Smrg *
1407ec681f3Smrg *  - hotzone.x: Stack offset (in bytes)
1417ec681f3Smrg *
1427ec681f3Smrg *    This is the offset (in bytes) into the per-thread scratch space at which
1437ec681f3Smrg *    the current shader's stack starts.  This is incremented by the calling
1447ec681f3Smrg *    shader prior to any shader call type instructions and gets decremented
1457ec681f3Smrg *    by the resume shader as part of completing the return operation.
1467ec681f3Smrg *
1477ec681f3Smrg *
1487ec681f3Smrg *  - hotzone.yzw: The launch ID associated with the current thread
1497ec681f3Smrg *
1507ec681f3Smrg *    Inside a bindless shader, the only information we have is the DSS ID
1517ec681f3Smrg *    from the hardware EU and a per-DSS stack ID.  In particular, the three-
1527ec681f3Smrg *    dimensional launch ID is lost the moment we leave the raygen trampoline.
1537ec681f3Smrg */
1547ec681f3Smrg#define BRW_RT_SIZEOF_HOTZONE 16
1557ec681f3Smrg
1567ec681f3Smrg/* From the BSpec "Address Computation for Memory Based Data Structures:
1577ec681f3Smrg * Ray and TraversalStack (Async Ray Tracing)":
1587ec681f3Smrg *
1597ec681f3Smrg *    sizeof(Ray) = 64B, sizeof(HitInfo) = 32B, sizeof(TravStack) = 32B.
1607ec681f3Smrg */
1617ec681f3Smrg#define BRW_RT_SIZEOF_RAY 64
1627ec681f3Smrg#define BRW_RT_SIZEOF_HIT_INFO 32
1637ec681f3Smrg#define BRW_RT_SIZEOF_TRAV_STACK 32
1647ec681f3Smrg
1657ec681f3Smrg/* From the BSpec:
1667ec681f3Smrg *
1677ec681f3Smrg *    syncStackSize = (maxBVHLevels % 2 == 1) ?
1687ec681f3Smrg *       (sizeof(HitInfo) * 2 +
1697ec681f3Smrg *          (sizeof(Ray) + sizeof(TravStack)) * maxBVHLevels + 32B) :
1707ec681f3Smrg *       (sizeof(HitInfo) * 2 +
1717ec681f3Smrg *          (sizeof(Ray) + sizeof(TravStack)) * maxBVHLevels);
1727ec681f3Smrg *
1737ec681f3Smrg * The select is just to align to 64B.
1747ec681f3Smrg */
1757ec681f3Smrg#define BRW_RT_SIZEOF_RAY_QUERY \
1767ec681f3Smrg   (BRW_RT_SIZEOF_HIT_INFO * 2 + \
1777ec681f3Smrg    (BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS + \
1787ec681f3Smrg    (BRW_RT_MAX_BVH_LEVELS % 2 ? 32 : 0))
1797ec681f3Smrg
1807ec681f3Smrg#define BRW_RT_SIZEOF_HW_STACK \
1817ec681f3Smrg   (BRW_RT_SIZEOF_HIT_INFO * 2 + \
1827ec681f3Smrg    BRW_RT_SIZEOF_RAY * BRW_RT_MAX_BVH_LEVELS + \
1837ec681f3Smrg    BRW_RT_SIZEOF_TRAV_STACK * BRW_RT_MAX_BVH_LEVELS)
1847ec681f3Smrg
1857ec681f3Smrg/* This is a mesa-defined region for hit attribute data */
1867ec681f3Smrg#define BRW_RT_SIZEOF_HIT_ATTRIB_DATA 64
1877ec681f3Smrg#define BRW_RT_OFFSETOF_HIT_ATTRIB_DATA BRW_RT_SIZEOF_HW_STACK
1887ec681f3Smrg
1897ec681f3Smrg#define BRW_RT_ASYNC_STACK_STRIDE \
1907ec681f3Smrg   ALIGN(BRW_RT_OFFSETOF_HIT_ATTRIB_DATA + \
1917ec681f3Smrg         BRW_RT_SIZEOF_HIT_ATTRIB_DATA, 64)
1927ec681f3Smrg
1937ec681f3Smrgstatic inline void
1947ec681f3Smrgbrw_rt_compute_scratch_layout(struct brw_rt_scratch_layout *layout,
1957ec681f3Smrg                              const struct intel_device_info *devinfo,
1967ec681f3Smrg                              uint32_t stack_ids_per_dss,
1977ec681f3Smrg                              uint32_t sw_stack_size)
1987ec681f3Smrg{
1997ec681f3Smrg   layout->stack_ids_per_dss = stack_ids_per_dss;
2007ec681f3Smrg
2017ec681f3Smrg   const uint32_t dss_count = intel_device_info_num_dual_subslices(devinfo);
2027ec681f3Smrg   const uint32_t num_stack_ids = dss_count * stack_ids_per_dss;
2037ec681f3Smrg
2047ec681f3Smrg   uint64_t size = 0;
2057ec681f3Smrg
2067ec681f3Smrg   /* The first thing in our scratch area is an array of "hot zones" which
2077ec681f3Smrg    * store the stack offset as well as the launch IDs for each active
2087ec681f3Smrg    * invocation.
2097ec681f3Smrg    */
2107ec681f3Smrg   size += BRW_RT_SIZEOF_HOTZONE * num_stack_ids;
2117ec681f3Smrg
2127ec681f3Smrg   /* Next, we place the HW ray stacks */
2137ec681f3Smrg   assert(size % 64 == 0); /* Cache-line aligned */
2147ec681f3Smrg   assert(size < UINT32_MAX);
2157ec681f3Smrg   layout->ray_stack_start = size;
2167ec681f3Smrg   layout->ray_stack_stride = BRW_RT_ASYNC_STACK_STRIDE;
2177ec681f3Smrg   size += num_stack_ids * layout->ray_stack_stride;
2187ec681f3Smrg
2197ec681f3Smrg   /* Finally, we place the SW stacks for the individual ray-tracing shader
2207ec681f3Smrg    * invocations.  We align these to 64B to ensure that we don't have any
2217ec681f3Smrg    * shared cache lines which could hurt performance.
2227ec681f3Smrg    */
2237ec681f3Smrg   assert(size % 64 == 0);
2247ec681f3Smrg   layout->sw_stack_start = size;
2257ec681f3Smrg   layout->sw_stack_size = ALIGN(sw_stack_size, 64);
2267ec681f3Smrg   size += num_stack_ids * layout->sw_stack_size;
2277ec681f3Smrg
2287ec681f3Smrg   layout->total_size = size;
2297ec681f3Smrg}
2307ec681f3Smrg
2317ec681f3Smrg#ifdef __cplusplus
2327ec681f3Smrg}
2337ec681f3Smrg#endif
2347ec681f3Smrg
2357ec681f3Smrg#endif /* BRW_RT_H */
236