1361fc4cbSmaya/*
2361fc4cbSmaya * Copyright © 2016 Red Hat.
3361fc4cbSmaya * Copyright © 2016 Bas Nieuwenhuizen
4361fc4cbSmaya *
5361fc4cbSmaya * based in part on anv driver which is:
6361fc4cbSmaya * Copyright © 2015 Intel Corporation
7361fc4cbSmaya *
8361fc4cbSmaya * Permission is hereby granted, free of charge, to any person obtaining a
9361fc4cbSmaya * copy of this software and associated documentation files (the "Software"),
10361fc4cbSmaya * to deal in the Software without restriction, including without limitation
11361fc4cbSmaya * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12361fc4cbSmaya * and/or sell copies of the Software, and to permit persons to whom the
13361fc4cbSmaya * Software is furnished to do so, subject to the following conditions:
14361fc4cbSmaya *
15361fc4cbSmaya * The above copyright notice and this permission notice (including the next
16361fc4cbSmaya * paragraph) shall be included in all copies or substantial portions of the
17361fc4cbSmaya * Software.
18361fc4cbSmaya *
19361fc4cbSmaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20361fc4cbSmaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21361fc4cbSmaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22361fc4cbSmaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23361fc4cbSmaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24361fc4cbSmaya * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25361fc4cbSmaya * DEALINGS IN THE SOFTWARE.
26361fc4cbSmaya */
27361fc4cbSmaya
287ec681f3Smrg#include "common/freedreno_guardband.h"
29361fc4cbSmaya#include "tu_private.h"
30361fc4cbSmaya
317ec681f3Smrg#include "ir3/ir3_nir.h"
32361fc4cbSmaya#include "main/menums.h"
33361fc4cbSmaya#include "nir/nir.h"
34361fc4cbSmaya#include "nir/nir_builder.h"
35361fc4cbSmaya#include "spirv/nir_spirv.h"
36361fc4cbSmaya#include "util/debug.h"
37361fc4cbSmaya#include "util/mesa-sha1.h"
38361fc4cbSmaya#include "util/u_atomic.h"
39361fc4cbSmaya#include "vk_format.h"
40361fc4cbSmaya#include "vk_util.h"
41361fc4cbSmaya
42361fc4cbSmaya#include "tu_cs.h"
43361fc4cbSmaya
447ec681f3Smrg/* Emit IB that preloads the descriptors that the shader uses */
457ec681f3Smrg
467ec681f3Smrgstatic void
477ec681f3Smrgemit_load_state(struct tu_cs *cs, unsigned opcode, enum a6xx_state_type st,
487ec681f3Smrg                enum a6xx_state_block sb, unsigned base, unsigned offset,
497ec681f3Smrg                unsigned count)
507ec681f3Smrg{
517ec681f3Smrg   /* Note: just emit one packet, even if count overflows NUM_UNIT. It's not
527ec681f3Smrg    * clear if emitting more packets will even help anything. Presumably the
537ec681f3Smrg    * descriptor cache is relatively small, and these packets stop doing
547ec681f3Smrg    * anything when there are too many descriptors.
557ec681f3Smrg    */
567ec681f3Smrg   tu_cs_emit_pkt7(cs, opcode, 3);
577ec681f3Smrg   tu_cs_emit(cs,
587ec681f3Smrg              CP_LOAD_STATE6_0_STATE_TYPE(st) |
597ec681f3Smrg              CP_LOAD_STATE6_0_STATE_SRC(SS6_BINDLESS) |
607ec681f3Smrg              CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
617ec681f3Smrg              CP_LOAD_STATE6_0_NUM_UNIT(MIN2(count, 1024-1)));
627ec681f3Smrg   tu_cs_emit_qw(cs, offset | (base << 28));
637ec681f3Smrg}
647ec681f3Smrg
657ec681f3Smrgstatic unsigned
667ec681f3Smrgtu6_load_state_size(struct tu_pipeline *pipeline, bool compute)
677ec681f3Smrg{
687ec681f3Smrg   const unsigned load_state_size = 4;
697ec681f3Smrg   unsigned size = 0;
707ec681f3Smrg   for (unsigned i = 0; i < pipeline->layout->num_sets; i++) {
717ec681f3Smrg      if (!(pipeline->active_desc_sets & (1u << i)))
727ec681f3Smrg         continue;
737ec681f3Smrg
747ec681f3Smrg      struct tu_descriptor_set_layout *set_layout = pipeline->layout->set[i].layout;
757ec681f3Smrg      for (unsigned j = 0; j < set_layout->binding_count; j++) {
767ec681f3Smrg         struct tu_descriptor_set_binding_layout *binding = &set_layout->binding[j];
777ec681f3Smrg         unsigned count = 0;
787ec681f3Smrg         /* Note: some users, like amber for example, pass in
797ec681f3Smrg          * VK_SHADER_STAGE_ALL which includes a bunch of extra bits, so
807ec681f3Smrg          * filter these out by using VK_SHADER_STAGE_ALL_GRAPHICS explicitly.
817ec681f3Smrg          */
827ec681f3Smrg         VkShaderStageFlags stages = compute ?
837ec681f3Smrg            binding->shader_stages & VK_SHADER_STAGE_COMPUTE_BIT :
847ec681f3Smrg            binding->shader_stages & VK_SHADER_STAGE_ALL_GRAPHICS;
857ec681f3Smrg         unsigned stage_count = util_bitcount(stages);
867ec681f3Smrg
877ec681f3Smrg         if (!binding->array_size)
887ec681f3Smrg            continue;
897ec681f3Smrg
907ec681f3Smrg         switch (binding->type) {
917ec681f3Smrg         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
927ec681f3Smrg         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
937ec681f3Smrg         case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
947ec681f3Smrg         case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
957ec681f3Smrg            /* IBO-backed resources only need one packet for all graphics stages */
967ec681f3Smrg            if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT)
977ec681f3Smrg               count += 1;
987ec681f3Smrg            if (stages & VK_SHADER_STAGE_COMPUTE_BIT)
997ec681f3Smrg               count += 1;
1007ec681f3Smrg            break;
1017ec681f3Smrg         case VK_DESCRIPTOR_TYPE_SAMPLER:
1027ec681f3Smrg         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1037ec681f3Smrg         case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1047ec681f3Smrg         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1057ec681f3Smrg         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1067ec681f3Smrg            /* Textures and UBO's needs a packet for each stage */
1077ec681f3Smrg            count = stage_count;
1087ec681f3Smrg            break;
1097ec681f3Smrg         case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1107ec681f3Smrg            /* Because of how we pack combined images and samplers, we
1117ec681f3Smrg             * currently can't use one packet for the whole array.
1127ec681f3Smrg             */
1137ec681f3Smrg            count = stage_count * binding->array_size * 2;
1147ec681f3Smrg            break;
1157ec681f3Smrg         case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
1167ec681f3Smrg         case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
1177ec681f3Smrg            break;
1187ec681f3Smrg         default:
1197ec681f3Smrg            unreachable("bad descriptor type");
1207ec681f3Smrg         }
1217ec681f3Smrg         size += count * load_state_size;
1227ec681f3Smrg      }
1237ec681f3Smrg   }
1247ec681f3Smrg   return size;
1257ec681f3Smrg}
1267ec681f3Smrg
1277ec681f3Smrgstatic void
1287ec681f3Smrgtu6_emit_load_state(struct tu_pipeline *pipeline, bool compute)
1297ec681f3Smrg{
1307ec681f3Smrg   unsigned size = tu6_load_state_size(pipeline, compute);
1317ec681f3Smrg   if (size == 0)
1327ec681f3Smrg      return;
1337ec681f3Smrg
1347ec681f3Smrg   struct tu_cs cs;
1357ec681f3Smrg   tu_cs_begin_sub_stream(&pipeline->cs, size, &cs);
1367ec681f3Smrg
1377ec681f3Smrg   struct tu_pipeline_layout *layout = pipeline->layout;
1387ec681f3Smrg   for (unsigned i = 0; i < layout->num_sets; i++) {
1397ec681f3Smrg      /* From 13.2.7. Descriptor Set Binding:
1407ec681f3Smrg       *
1417ec681f3Smrg       *    A compatible descriptor set must be bound for all set numbers that
1427ec681f3Smrg       *    any shaders in a pipeline access, at the time that a draw or
1437ec681f3Smrg       *    dispatch command is recorded to execute using that pipeline.
1447ec681f3Smrg       *    However, if none of the shaders in a pipeline statically use any
1457ec681f3Smrg       *    bindings with a particular set number, then no descriptor set need
1467ec681f3Smrg       *    be bound for that set number, even if the pipeline layout includes
1477ec681f3Smrg       *    a non-trivial descriptor set layout for that set number.
1487ec681f3Smrg       *
1497ec681f3Smrg       * This means that descriptor sets unused by the pipeline may have a
1507ec681f3Smrg       * garbage or 0 BINDLESS_BASE register, which will cause context faults
1517ec681f3Smrg       * when prefetching descriptors from these sets. Skip prefetching for
1527ec681f3Smrg       * descriptors from them to avoid this. This is also an optimization,
1537ec681f3Smrg       * since these prefetches would be useless.
1547ec681f3Smrg       */
1557ec681f3Smrg      if (!(pipeline->active_desc_sets & (1u << i)))
1567ec681f3Smrg         continue;
1577ec681f3Smrg
1587ec681f3Smrg      struct tu_descriptor_set_layout *set_layout = layout->set[i].layout;
1597ec681f3Smrg      for (unsigned j = 0; j < set_layout->binding_count; j++) {
1607ec681f3Smrg         struct tu_descriptor_set_binding_layout *binding = &set_layout->binding[j];
1617ec681f3Smrg         unsigned base = i;
1627ec681f3Smrg         unsigned offset = binding->offset / 4;
1637ec681f3Smrg         /* Note: some users, like amber for example, pass in
1647ec681f3Smrg          * VK_SHADER_STAGE_ALL which includes a bunch of extra bits, so
1657ec681f3Smrg          * filter these out by using VK_SHADER_STAGE_ALL_GRAPHICS explicitly.
1667ec681f3Smrg          */
1677ec681f3Smrg         VkShaderStageFlags stages = compute ?
1687ec681f3Smrg            binding->shader_stages & VK_SHADER_STAGE_COMPUTE_BIT :
1697ec681f3Smrg            binding->shader_stages & VK_SHADER_STAGE_ALL_GRAPHICS;
1707ec681f3Smrg         unsigned count = binding->array_size;
1717ec681f3Smrg         if (count == 0 || stages == 0)
1727ec681f3Smrg            continue;
1737ec681f3Smrg         switch (binding->type) {
1747ec681f3Smrg         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1757ec681f3Smrg            base = MAX_SETS;
1767ec681f3Smrg            offset = (layout->set[i].dynamic_offset_start +
1777ec681f3Smrg                      binding->dynamic_offset_offset) * A6XX_TEX_CONST_DWORDS;
1787ec681f3Smrg            FALLTHROUGH;
1797ec681f3Smrg         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1807ec681f3Smrg         case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1817ec681f3Smrg         case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1827ec681f3Smrg            /* IBO-backed resources only need one packet for all graphics stages */
1837ec681f3Smrg            if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT) {
1847ec681f3Smrg               emit_load_state(&cs, CP_LOAD_STATE6, ST6_SHADER, SB6_IBO,
1857ec681f3Smrg                               base, offset, count);
1867ec681f3Smrg            }
1877ec681f3Smrg            if (stages & VK_SHADER_STAGE_COMPUTE_BIT) {
1887ec681f3Smrg               emit_load_state(&cs, CP_LOAD_STATE6_FRAG, ST6_IBO, SB6_CS_SHADER,
1897ec681f3Smrg                               base, offset, count);
1907ec681f3Smrg            }
1917ec681f3Smrg            break;
1927ec681f3Smrg         case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
1937ec681f3Smrg         case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
1947ec681f3Smrg            /* nothing - input attachment doesn't use bindless */
1957ec681f3Smrg            break;
1967ec681f3Smrg         case VK_DESCRIPTOR_TYPE_SAMPLER:
1977ec681f3Smrg         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1987ec681f3Smrg         case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: {
1997ec681f3Smrg            tu_foreach_stage(stage, stages) {
2007ec681f3Smrg               emit_load_state(&cs, tu6_stage2opcode(stage),
2017ec681f3Smrg                               binding->type == VK_DESCRIPTOR_TYPE_SAMPLER ?
2027ec681f3Smrg                               ST6_SHADER : ST6_CONSTANTS,
2037ec681f3Smrg                               tu6_stage2texsb(stage), base, offset, count);
2047ec681f3Smrg            }
2057ec681f3Smrg            break;
2067ec681f3Smrg         }
2077ec681f3Smrg         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2087ec681f3Smrg            base = MAX_SETS;
2097ec681f3Smrg            offset = (layout->set[i].dynamic_offset_start +
2107ec681f3Smrg                      binding->dynamic_offset_offset) * A6XX_TEX_CONST_DWORDS;
2117ec681f3Smrg            FALLTHROUGH;
2127ec681f3Smrg         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
2137ec681f3Smrg            tu_foreach_stage(stage, stages) {
2147ec681f3Smrg               emit_load_state(&cs, tu6_stage2opcode(stage), ST6_UBO,
2157ec681f3Smrg                               tu6_stage2shadersb(stage), base, offset, count);
2167ec681f3Smrg            }
2177ec681f3Smrg            break;
2187ec681f3Smrg         }
2197ec681f3Smrg         case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
2207ec681f3Smrg            tu_foreach_stage(stage, stages) {
2217ec681f3Smrg               /* TODO: We could emit less CP_LOAD_STATE6 if we used
2227ec681f3Smrg                * struct-of-arrays instead of array-of-structs.
2237ec681f3Smrg                */
2247ec681f3Smrg               for (unsigned i = 0; i < count; i++) {
2257ec681f3Smrg                  unsigned tex_offset = offset + 2 * i * A6XX_TEX_CONST_DWORDS;
2267ec681f3Smrg                  unsigned sam_offset = offset + (2 * i + 1) * A6XX_TEX_CONST_DWORDS;
2277ec681f3Smrg                  emit_load_state(&cs, tu6_stage2opcode(stage),
2287ec681f3Smrg                                  ST6_CONSTANTS, tu6_stage2texsb(stage),
2297ec681f3Smrg                                  base, tex_offset, 1);
2307ec681f3Smrg                  emit_load_state(&cs, tu6_stage2opcode(stage),
2317ec681f3Smrg                                  ST6_SHADER, tu6_stage2texsb(stage),
2327ec681f3Smrg                                  base, sam_offset, 1);
2337ec681f3Smrg               }
2347ec681f3Smrg            }
2357ec681f3Smrg            break;
2367ec681f3Smrg         }
2377ec681f3Smrg         default:
2387ec681f3Smrg            unreachable("bad descriptor type");
2397ec681f3Smrg         }
2407ec681f3Smrg      }
2417ec681f3Smrg   }
2427ec681f3Smrg
2437ec681f3Smrg   pipeline->load_state = tu_cs_end_draw_state(&pipeline->cs, &cs);
2447ec681f3Smrg}
2457ec681f3Smrg
246361fc4cbSmayastruct tu_pipeline_builder
247361fc4cbSmaya{
248361fc4cbSmaya   struct tu_device *device;
249361fc4cbSmaya   struct tu_pipeline_cache *cache;
2507ec681f3Smrg   struct tu_pipeline_layout *layout;
251361fc4cbSmaya   const VkAllocationCallbacks *alloc;
252361fc4cbSmaya   const VkGraphicsPipelineCreateInfo *create_info;
253361fc4cbSmaya
2547ec681f3Smrg   struct tu_shader *shaders[MESA_SHADER_FRAGMENT + 1];
2557ec681f3Smrg   struct ir3_shader_variant *variants[MESA_SHADER_FRAGMENT + 1];
2567ec681f3Smrg   struct ir3_shader_variant *binning_variant;
2577ec681f3Smrg   uint64_t shader_iova[MESA_SHADER_FRAGMENT + 1];
2587ec681f3Smrg   uint64_t binning_vs_iova;
2597ec681f3Smrg
2607ec681f3Smrg   uint32_t additional_cs_reserve_size;
2617ec681f3Smrg
2627ec681f3Smrg   struct tu_pvtmem_config pvtmem;
263361fc4cbSmaya
264361fc4cbSmaya   bool rasterizer_discard;
265361fc4cbSmaya   /* these states are affectd by rasterizer_discard */
2667ec681f3Smrg   bool emit_msaa_state;
267361fc4cbSmaya   VkSampleCountFlagBits samples;
268361fc4cbSmaya   bool use_color_attachments;
2697ec681f3Smrg   bool use_dual_src_blend;
2707ec681f3Smrg   bool alpha_to_coverage;
271361fc4cbSmaya   uint32_t color_attachment_count;
272361fc4cbSmaya   VkFormat color_attachment_formats[MAX_RTS];
2737ec681f3Smrg   VkFormat depth_attachment_format;
2747ec681f3Smrg   uint32_t render_components;
2757ec681f3Smrg   uint32_t multiview_mask;
276361fc4cbSmaya};
277361fc4cbSmaya
278361fc4cbSmayastatic bool
279361fc4cbSmayatu_logic_op_reads_dst(VkLogicOp op)
280361fc4cbSmaya{
281361fc4cbSmaya   switch (op) {
282361fc4cbSmaya   case VK_LOGIC_OP_CLEAR:
283361fc4cbSmaya   case VK_LOGIC_OP_COPY:
284361fc4cbSmaya   case VK_LOGIC_OP_COPY_INVERTED:
285361fc4cbSmaya   case VK_LOGIC_OP_SET:
286361fc4cbSmaya      return false;
287361fc4cbSmaya   default:
288361fc4cbSmaya      return true;
289361fc4cbSmaya   }
290361fc4cbSmaya}
291361fc4cbSmaya
292361fc4cbSmayastatic VkBlendFactor
293361fc4cbSmayatu_blend_factor_no_dst_alpha(VkBlendFactor factor)
294361fc4cbSmaya{
295361fc4cbSmaya   /* treat dst alpha as 1.0 and avoid reading it */
296361fc4cbSmaya   switch (factor) {
297361fc4cbSmaya   case VK_BLEND_FACTOR_DST_ALPHA:
298361fc4cbSmaya      return VK_BLEND_FACTOR_ONE;
299361fc4cbSmaya   case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
300361fc4cbSmaya      return VK_BLEND_FACTOR_ZERO;
301361fc4cbSmaya   default:
302361fc4cbSmaya      return factor;
303361fc4cbSmaya   }
304361fc4cbSmaya}
305361fc4cbSmaya
3067ec681f3Smrgstatic bool tu_blend_factor_is_dual_src(VkBlendFactor factor)
3077ec681f3Smrg{
3087ec681f3Smrg   switch (factor) {
3097ec681f3Smrg   case VK_BLEND_FACTOR_SRC1_COLOR:
3107ec681f3Smrg   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
3117ec681f3Smrg   case VK_BLEND_FACTOR_SRC1_ALPHA:
3127ec681f3Smrg   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
3137ec681f3Smrg      return true;
314361fc4cbSmaya   default:
3157ec681f3Smrg      return false;
316361fc4cbSmaya   }
317361fc4cbSmaya}
318361fc4cbSmaya
3197ec681f3Smrgstatic bool
3207ec681f3Smrgtu_blend_state_is_dual_src(const VkPipelineColorBlendStateCreateInfo *info)
321361fc4cbSmaya{
3227ec681f3Smrg   if (!info)
3237ec681f3Smrg      return false;
3247ec681f3Smrg
3257ec681f3Smrg   for (unsigned i = 0; i < info->attachmentCount; i++) {
3267ec681f3Smrg      const VkPipelineColorBlendAttachmentState *blend = &info->pAttachments[i];
3277ec681f3Smrg      if (tu_blend_factor_is_dual_src(blend->srcColorBlendFactor) ||
3287ec681f3Smrg          tu_blend_factor_is_dual_src(blend->dstColorBlendFactor) ||
3297ec681f3Smrg          tu_blend_factor_is_dual_src(blend->srcAlphaBlendFactor) ||
3307ec681f3Smrg          tu_blend_factor_is_dual_src(blend->dstAlphaBlendFactor))
3317ec681f3Smrg         return true;
332361fc4cbSmaya   }
333361fc4cbSmaya
3347ec681f3Smrg   return false;
3357ec681f3Smrg}
3367ec681f3Smrg
3377ec681f3Smrgstatic const struct xs_config {
3387ec681f3Smrg   uint16_t reg_sp_xs_ctrl;
3397ec681f3Smrg   uint16_t reg_sp_xs_config;
3407ec681f3Smrg   uint16_t reg_sp_xs_instrlen;
3417ec681f3Smrg   uint16_t reg_hlsq_xs_ctrl;
3427ec681f3Smrg   uint16_t reg_sp_xs_first_exec_offset;
3437ec681f3Smrg   uint16_t reg_sp_xs_pvt_mem_hw_stack_offset;
3447ec681f3Smrg} xs_config[] = {
3457ec681f3Smrg   [MESA_SHADER_VERTEX] = {
3467ec681f3Smrg      REG_A6XX_SP_VS_CTRL_REG0,
3477ec681f3Smrg      REG_A6XX_SP_VS_CONFIG,
3487ec681f3Smrg      REG_A6XX_SP_VS_INSTRLEN,
3497ec681f3Smrg      REG_A6XX_HLSQ_VS_CNTL,
3507ec681f3Smrg      REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET,
3517ec681f3Smrg      REG_A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET,
3527ec681f3Smrg   },
3537ec681f3Smrg   [MESA_SHADER_TESS_CTRL] = {
3547ec681f3Smrg      REG_A6XX_SP_HS_CTRL_REG0,
3557ec681f3Smrg      REG_A6XX_SP_HS_CONFIG,
3567ec681f3Smrg      REG_A6XX_SP_HS_INSTRLEN,
3577ec681f3Smrg      REG_A6XX_HLSQ_HS_CNTL,
3587ec681f3Smrg      REG_A6XX_SP_HS_OBJ_FIRST_EXEC_OFFSET,
3597ec681f3Smrg      REG_A6XX_SP_HS_PVT_MEM_HW_STACK_OFFSET,
3607ec681f3Smrg   },
3617ec681f3Smrg   [MESA_SHADER_TESS_EVAL] = {
3627ec681f3Smrg      REG_A6XX_SP_DS_CTRL_REG0,
3637ec681f3Smrg      REG_A6XX_SP_DS_CONFIG,
3647ec681f3Smrg      REG_A6XX_SP_DS_INSTRLEN,
3657ec681f3Smrg      REG_A6XX_HLSQ_DS_CNTL,
3667ec681f3Smrg      REG_A6XX_SP_DS_OBJ_FIRST_EXEC_OFFSET,
3677ec681f3Smrg      REG_A6XX_SP_DS_PVT_MEM_HW_STACK_OFFSET,
3687ec681f3Smrg   },
3697ec681f3Smrg   [MESA_SHADER_GEOMETRY] = {
3707ec681f3Smrg      REG_A6XX_SP_GS_CTRL_REG0,
3717ec681f3Smrg      REG_A6XX_SP_GS_CONFIG,
3727ec681f3Smrg      REG_A6XX_SP_GS_INSTRLEN,
3737ec681f3Smrg      REG_A6XX_HLSQ_GS_CNTL,
3747ec681f3Smrg      REG_A6XX_SP_GS_OBJ_FIRST_EXEC_OFFSET,
3757ec681f3Smrg      REG_A6XX_SP_GS_PVT_MEM_HW_STACK_OFFSET,
3767ec681f3Smrg   },
3777ec681f3Smrg   [MESA_SHADER_FRAGMENT] = {
3787ec681f3Smrg      REG_A6XX_SP_FS_CTRL_REG0,
3797ec681f3Smrg      REG_A6XX_SP_FS_CONFIG,
3807ec681f3Smrg      REG_A6XX_SP_FS_INSTRLEN,
3817ec681f3Smrg      REG_A6XX_HLSQ_FS_CNTL,
3827ec681f3Smrg      REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET,
3837ec681f3Smrg      REG_A6XX_SP_FS_PVT_MEM_HW_STACK_OFFSET,
3847ec681f3Smrg   },
3857ec681f3Smrg   [MESA_SHADER_COMPUTE] = {
3867ec681f3Smrg      REG_A6XX_SP_CS_CTRL_REG0,
3877ec681f3Smrg      REG_A6XX_SP_CS_CONFIG,
3887ec681f3Smrg      REG_A6XX_SP_CS_INSTRLEN,
3897ec681f3Smrg      REG_A6XX_HLSQ_CS_CNTL,
3907ec681f3Smrg      REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET,
3917ec681f3Smrg      REG_A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET,
3927ec681f3Smrg   },
3937ec681f3Smrg};
3947ec681f3Smrg
3957ec681f3Smrgstatic uint32_t
3967ec681f3Smrgtu_xs_get_immediates_packet_size_dwords(const struct ir3_shader_variant *xs)
397361fc4cbSmaya{
3987ec681f3Smrg   const struct ir3_const_state *const_state = ir3_const_state(xs);
3997ec681f3Smrg   uint32_t base = const_state->offsets.immediate;
4007ec681f3Smrg   int32_t size = DIV_ROUND_UP(const_state->immediates_count, 4);
4017ec681f3Smrg
4027ec681f3Smrg   /* truncate size to avoid writing constants that shader
4037ec681f3Smrg    * does not use:
4047ec681f3Smrg    */
4057ec681f3Smrg   size = MIN2(size + base, xs->constlen) - base;
4067ec681f3Smrg
4077ec681f3Smrg   return MAX2(size, 0) * 4;
408361fc4cbSmaya}
409361fc4cbSmaya
4107ec681f3Smrg/* We allocate fixed-length substreams for shader state, however some
4117ec681f3Smrg * parts of the state may have unbound length. Their additional space
4127ec681f3Smrg * requirements should be calculated here.
4137ec681f3Smrg */
4147ec681f3Smrgstatic uint32_t
4157ec681f3Smrgtu_xs_get_additional_cs_size_dwords(const struct ir3_shader_variant *xs)
416361fc4cbSmaya{
4177ec681f3Smrg   uint32_t size = tu_xs_get_immediates_packet_size_dwords(xs);
4187ec681f3Smrg   return size;
419361fc4cbSmaya}
420361fc4cbSmaya
4217ec681f3Smrgvoid
4227ec681f3Smrgtu6_emit_xs_config(struct tu_cs *cs,
4237ec681f3Smrg                   gl_shader_stage stage, /* xs->type, but xs may be NULL */
4247ec681f3Smrg                   const struct ir3_shader_variant *xs)
425361fc4cbSmaya{
4267ec681f3Smrg   const struct xs_config *cfg = &xs_config[stage];
4277ec681f3Smrg
4287ec681f3Smrg   if (!xs) {
4297ec681f3Smrg      /* shader stage disabled */
4307ec681f3Smrg      tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_config, 1);
4317ec681f3Smrg      tu_cs_emit(cs, 0);
4327ec681f3Smrg
4337ec681f3Smrg      tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1);
4347ec681f3Smrg      tu_cs_emit(cs, 0);
4357ec681f3Smrg      return;
436361fc4cbSmaya   }
4377ec681f3Smrg
4387ec681f3Smrg   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_config, 1);
4397ec681f3Smrg   tu_cs_emit(cs, A6XX_SP_VS_CONFIG_ENABLED |
4407ec681f3Smrg                  COND(xs->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) |
4417ec681f3Smrg                  COND(xs->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) |
4427ec681f3Smrg                  COND(xs->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_IBO) |
4437ec681f3Smrg                  COND(xs->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) |
4447ec681f3Smrg                  A6XX_SP_VS_CONFIG_NTEX(xs->num_samp) |
4457ec681f3Smrg                  A6XX_SP_VS_CONFIG_NSAMP(xs->num_samp));
4467ec681f3Smrg
4477ec681f3Smrg   tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1);
4487ec681f3Smrg   tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(xs->constlen) |
4497ec681f3Smrg                  A6XX_HLSQ_VS_CNTL_ENABLED);
450361fc4cbSmaya}
451361fc4cbSmaya
4527ec681f3Smrgvoid
4537ec681f3Smrgtu6_emit_xs(struct tu_cs *cs,
4547ec681f3Smrg            gl_shader_stage stage, /* xs->type, but xs may be NULL */
4557ec681f3Smrg            const struct ir3_shader_variant *xs,
4567ec681f3Smrg            const struct tu_pvtmem_config *pvtmem,
4577ec681f3Smrg            uint64_t binary_iova)
458361fc4cbSmaya{
4597ec681f3Smrg   const struct xs_config *cfg = &xs_config[stage];
4607ec681f3Smrg
4617ec681f3Smrg   if (!xs) {
4627ec681f3Smrg      /* shader stage disabled */
4637ec681f3Smrg      return;
4647ec681f3Smrg   }
4657ec681f3Smrg
4667ec681f3Smrg   enum a6xx_threadsize thrsz =
4677ec681f3Smrg      xs->info.double_threadsize ? THREAD128 : THREAD64;
4687ec681f3Smrg   switch (stage) {
4697ec681f3Smrg   case MESA_SHADER_VERTEX:
4707ec681f3Smrg      tu_cs_emit_regs(cs, A6XX_SP_VS_CTRL_REG0(
4717ec681f3Smrg               .fullregfootprint = xs->info.max_reg + 1,
4727ec681f3Smrg               .halfregfootprint = xs->info.max_half_reg + 1,
4737ec681f3Smrg               .branchstack = ir3_shader_branchstack_hw(xs),
4747ec681f3Smrg               .mergedregs = xs->mergedregs,
4757ec681f3Smrg      ));
4767ec681f3Smrg      break;
4777ec681f3Smrg   case MESA_SHADER_TESS_CTRL:
4787ec681f3Smrg      tu_cs_emit_regs(cs, A6XX_SP_HS_CTRL_REG0(
4797ec681f3Smrg               .fullregfootprint = xs->info.max_reg + 1,
4807ec681f3Smrg               .halfregfootprint = xs->info.max_half_reg + 1,
4817ec681f3Smrg               .branchstack = ir3_shader_branchstack_hw(xs),
4827ec681f3Smrg      ));
4837ec681f3Smrg      break;
4847ec681f3Smrg   case MESA_SHADER_TESS_EVAL:
4857ec681f3Smrg      tu_cs_emit_regs(cs, A6XX_SP_DS_CTRL_REG0(
4867ec681f3Smrg               .fullregfootprint = xs->info.max_reg + 1,
4877ec681f3Smrg               .halfregfootprint = xs->info.max_half_reg + 1,
4887ec681f3Smrg               .branchstack = ir3_shader_branchstack_hw(xs),
4897ec681f3Smrg               .mergedregs = xs->mergedregs,
4907ec681f3Smrg      ));
4917ec681f3Smrg      break;
4927ec681f3Smrg   case MESA_SHADER_GEOMETRY:
4937ec681f3Smrg      tu_cs_emit_regs(cs, A6XX_SP_GS_CTRL_REG0(
4947ec681f3Smrg               .fullregfootprint = xs->info.max_reg + 1,
4957ec681f3Smrg               .halfregfootprint = xs->info.max_half_reg + 1,
4967ec681f3Smrg               .branchstack = ir3_shader_branchstack_hw(xs),
4977ec681f3Smrg      ));
4987ec681f3Smrg      break;
4997ec681f3Smrg   case MESA_SHADER_FRAGMENT:
5007ec681f3Smrg      tu_cs_emit_regs(cs, A6XX_SP_FS_CTRL_REG0(
5017ec681f3Smrg               .fullregfootprint = xs->info.max_reg + 1,
5027ec681f3Smrg               .halfregfootprint = xs->info.max_half_reg + 1,
5037ec681f3Smrg               .branchstack = ir3_shader_branchstack_hw(xs),
5047ec681f3Smrg               .mergedregs = xs->mergedregs,
5057ec681f3Smrg               .threadsize = thrsz,
5067ec681f3Smrg               .pixlodenable = xs->need_pixlod,
5077ec681f3Smrg               .diff_fine = xs->need_fine_derivatives,
5087ec681f3Smrg               .varying = xs->total_in != 0,
5097ec681f3Smrg               /* unknown bit, seems unnecessary */
5107ec681f3Smrg               .unk24 = true,
5117ec681f3Smrg      ));
5127ec681f3Smrg      break;
5137ec681f3Smrg   case MESA_SHADER_COMPUTE:
5147ec681f3Smrg      tu_cs_emit_regs(cs, A6XX_SP_CS_CTRL_REG0(
5157ec681f3Smrg               .fullregfootprint = xs->info.max_reg + 1,
5167ec681f3Smrg               .halfregfootprint = xs->info.max_half_reg + 1,
5177ec681f3Smrg               .branchstack = ir3_shader_branchstack_hw(xs),
5187ec681f3Smrg               .mergedregs = xs->mergedregs,
5197ec681f3Smrg               .threadsize = thrsz,
5207ec681f3Smrg      ));
5217ec681f3Smrg      break;
522361fc4cbSmaya   default:
5237ec681f3Smrg      unreachable("bad shader stage");
524361fc4cbSmaya   }
525361fc4cbSmaya
5267ec681f3Smrg   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_instrlen, 1);
5277ec681f3Smrg   tu_cs_emit(cs, xs->instrlen);
5287ec681f3Smrg
5297ec681f3Smrg   /* emit program binary & private memory layout
5307ec681f3Smrg    * binary_iova should be aligned to 1 instrlen unit (128 bytes)
5317ec681f3Smrg    */
5327ec681f3Smrg
5337ec681f3Smrg   assert((binary_iova & 0x7f) == 0);
5347ec681f3Smrg   assert((pvtmem->iova & 0x1f) == 0);
5357ec681f3Smrg
5367ec681f3Smrg   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_first_exec_offset, 7);
5377ec681f3Smrg   tu_cs_emit(cs, 0);
5387ec681f3Smrg   tu_cs_emit_qw(cs, binary_iova);
5397ec681f3Smrg   tu_cs_emit(cs,
5407ec681f3Smrg              A6XX_SP_VS_PVT_MEM_PARAM_MEMSIZEPERITEM(pvtmem->per_fiber_size));
5417ec681f3Smrg   tu_cs_emit_qw(cs, pvtmem->iova);
5427ec681f3Smrg   tu_cs_emit(cs, A6XX_SP_VS_PVT_MEM_SIZE_TOTALPVTMEMSIZE(pvtmem->per_sp_size) |
5437ec681f3Smrg                  COND(pvtmem->per_wave, A6XX_SP_VS_PVT_MEM_SIZE_PERWAVEMEMLAYOUT));
5447ec681f3Smrg
5457ec681f3Smrg   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_pvt_mem_hw_stack_offset, 1);
5467ec681f3Smrg   tu_cs_emit(cs, A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET_OFFSET(pvtmem->per_sp_size));
5477ec681f3Smrg
5487ec681f3Smrg   tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3);
5497ec681f3Smrg   tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
5507ec681f3Smrg                  CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
5517ec681f3Smrg                  CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
5527ec681f3Smrg                  CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
5537ec681f3Smrg                  CP_LOAD_STATE6_0_NUM_UNIT(xs->instrlen));
5547ec681f3Smrg   tu_cs_emit_qw(cs, binary_iova);
5557ec681f3Smrg
5567ec681f3Smrg   /* emit immediates */
557361fc4cbSmaya
5587ec681f3Smrg   const struct ir3_const_state *const_state = ir3_const_state(xs);
5597ec681f3Smrg   uint32_t base = const_state->offsets.immediate;
5607ec681f3Smrg   unsigned immediate_size = tu_xs_get_immediates_packet_size_dwords(xs);
561361fc4cbSmaya
5627ec681f3Smrg   if (immediate_size > 0) {
5637ec681f3Smrg      tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + immediate_size);
5647ec681f3Smrg      tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
5657ec681f3Smrg                 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
5667ec681f3Smrg                 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
5677ec681f3Smrg                 CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
5687ec681f3Smrg                 CP_LOAD_STATE6_0_NUM_UNIT(immediate_size / 4));
5697ec681f3Smrg      tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
5707ec681f3Smrg      tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
571361fc4cbSmaya
5727ec681f3Smrg      tu_cs_emit_array(cs, const_state->immediates, immediate_size);
5737ec681f3Smrg   }
5747ec681f3Smrg
5757ec681f3Smrg   if (const_state->constant_data_ubo != -1) {
5767ec681f3Smrg      uint64_t iova = binary_iova + xs->info.constant_data_offset;
5777ec681f3Smrg
5787ec681f3Smrg      /* Upload UBO state for the constant data. */
5797ec681f3Smrg      tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 5);
5807ec681f3Smrg      tu_cs_emit(cs,
5817ec681f3Smrg                 CP_LOAD_STATE6_0_DST_OFF(const_state->constant_data_ubo) |
5827ec681f3Smrg                 CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO)|
5837ec681f3Smrg                 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
5847ec681f3Smrg                 CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
5857ec681f3Smrg                 CP_LOAD_STATE6_0_NUM_UNIT(1));
5867ec681f3Smrg      tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
5877ec681f3Smrg      tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
5887ec681f3Smrg      int size_vec4s = DIV_ROUND_UP(xs->constant_data_size, 16);
5897ec681f3Smrg      tu_cs_emit_qw(cs,
5907ec681f3Smrg                    iova |
5917ec681f3Smrg                    (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32);
5927ec681f3Smrg
5937ec681f3Smrg      /* Upload the constant data to the const file if needed. */
5947ec681f3Smrg      const struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state;
5957ec681f3Smrg
5967ec681f3Smrg      for (int i = 0; i < ubo_state->num_enabled; i++) {
5977ec681f3Smrg         if (ubo_state->range[i].ubo.block != const_state->constant_data_ubo ||
5987ec681f3Smrg             ubo_state->range[i].ubo.bindless) {
5997ec681f3Smrg            continue;
6007ec681f3Smrg         }
601361fc4cbSmaya
6027ec681f3Smrg         uint32_t start = ubo_state->range[i].start;
6037ec681f3Smrg         uint32_t end = ubo_state->range[i].end;
6047ec681f3Smrg         uint32_t size = MIN2(end - start,
6057ec681f3Smrg                              (16 * xs->constlen) - ubo_state->range[i].offset);
6067ec681f3Smrg
6077ec681f3Smrg         tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3);
6087ec681f3Smrg         tu_cs_emit(cs,
6097ec681f3Smrg                    CP_LOAD_STATE6_0_DST_OFF(ubo_state->range[i].offset / 16) |
6107ec681f3Smrg                    CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
6117ec681f3Smrg                    CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
6127ec681f3Smrg                    CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
6137ec681f3Smrg                    CP_LOAD_STATE6_0_NUM_UNIT(size / 16));
6147ec681f3Smrg         tu_cs_emit_qw(cs, iova + start);
6157ec681f3Smrg      }
6167ec681f3Smrg   }
617361fc4cbSmaya}
618361fc4cbSmaya
619361fc4cbSmayastatic void
6207ec681f3Smrgtu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
6217ec681f3Smrg                   const struct ir3_shader_variant *v,
6227ec681f3Smrg                   const struct tu_pvtmem_config *pvtmem,
6237ec681f3Smrg                   uint64_t binary_iova)
6247ec681f3Smrg{
6257ec681f3Smrg   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
6267ec681f3Smrg         .cs_state = true,
6277ec681f3Smrg         .cs_ibo = true));
6287ec681f3Smrg
6297ec681f3Smrg   tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v);
6307ec681f3Smrg   tu6_emit_xs(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova);
6317ec681f3Smrg
6327ec681f3Smrg   uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
6337ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
6347ec681f3Smrg   tu_cs_emit(cs, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
6357ec681f3Smrg                  A6XX_SP_CS_UNKNOWN_A9B1_UNK6);
6367ec681f3Smrg
6377ec681f3Smrg   if (cs->device->physical_device->info->a6xx.has_lpac) {
6387ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, 1);
6397ec681f3Smrg      tu_cs_emit(cs, A6XX_HLSQ_CS_UNKNOWN_B9D0_SHARED_SIZE(shared_size) |
6407ec681f3Smrg                     A6XX_HLSQ_CS_UNKNOWN_B9D0_UNK6);
6417ec681f3Smrg   }
642361fc4cbSmaya
6437ec681f3Smrg   uint32_t local_invocation_id =
6447ec681f3Smrg      ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
6457ec681f3Smrg   uint32_t work_group_id =
6467ec681f3Smrg      ir3_find_sysval_regid(v, SYSTEM_VALUE_WORKGROUP_ID);
647361fc4cbSmaya
6487ec681f3Smrg   enum a6xx_threadsize thrsz = v->info.double_threadsize ? THREAD128 : THREAD64;
6497ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_CNTL_0, 2);
6507ec681f3Smrg   tu_cs_emit(cs,
6517ec681f3Smrg              A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) |
6527ec681f3Smrg              A6XX_HLSQ_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) |
6537ec681f3Smrg              A6XX_HLSQ_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) |
6547ec681f3Smrg              A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
6557ec681f3Smrg   tu_cs_emit(cs, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
6567ec681f3Smrg                  A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz));
6577ec681f3Smrg
6587ec681f3Smrg   if (cs->device->physical_device->info->a6xx.has_lpac) {
6597ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CNTL_0, 2);
6607ec681f3Smrg      tu_cs_emit(cs,
6617ec681f3Smrg                 A6XX_SP_CS_CNTL_0_WGIDCONSTID(work_group_id) |
6627ec681f3Smrg                 A6XX_SP_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) |
6637ec681f3Smrg                 A6XX_SP_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) |
6647ec681f3Smrg                 A6XX_SP_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
6657ec681f3Smrg      tu_cs_emit(cs, A6XX_SP_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
6667ec681f3Smrg                     A6XX_SP_CS_CNTL_1_THREADSIZE(thrsz));
6677ec681f3Smrg   }
668361fc4cbSmaya}
669361fc4cbSmaya
670361fc4cbSmayastatic void
6717ec681f3Smrgtu6_emit_vs_system_values(struct tu_cs *cs,
6727ec681f3Smrg                          const struct ir3_shader_variant *vs,
6737ec681f3Smrg                          const struct ir3_shader_variant *hs,
6747ec681f3Smrg                          const struct ir3_shader_variant *ds,
6757ec681f3Smrg                          const struct ir3_shader_variant *gs,
6767ec681f3Smrg                          bool primid_passthru)
677361fc4cbSmaya{
6787ec681f3Smrg   const uint32_t vertexid_regid =
6797ec681f3Smrg         ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID);
6807ec681f3Smrg   const uint32_t instanceid_regid =
6817ec681f3Smrg         ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID);
6827ec681f3Smrg   const uint32_t tess_coord_x_regid = hs ?
6837ec681f3Smrg         ir3_find_sysval_regid(ds, SYSTEM_VALUE_TESS_COORD) :
6847ec681f3Smrg         regid(63, 0);
6857ec681f3Smrg   const uint32_t tess_coord_y_regid = VALIDREG(tess_coord_x_regid) ?
6867ec681f3Smrg         tess_coord_x_regid + 1 :
6877ec681f3Smrg         regid(63, 0);
6887ec681f3Smrg   const uint32_t hs_rel_patch_regid = hs ?
6897ec681f3Smrg         ir3_find_sysval_regid(hs, SYSTEM_VALUE_REL_PATCH_ID_IR3) :
6907ec681f3Smrg         regid(63, 0);
6917ec681f3Smrg   const uint32_t ds_rel_patch_regid = hs ?
6927ec681f3Smrg         ir3_find_sysval_regid(ds, SYSTEM_VALUE_REL_PATCH_ID_IR3) :
6937ec681f3Smrg         regid(63, 0);
6947ec681f3Smrg   const uint32_t hs_invocation_regid = hs ?
6957ec681f3Smrg         ir3_find_sysval_regid(hs, SYSTEM_VALUE_TCS_HEADER_IR3) :
6967ec681f3Smrg         regid(63, 0);
6977ec681f3Smrg   const uint32_t gs_primitiveid_regid = gs ?
6987ec681f3Smrg         ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) :
6997ec681f3Smrg         regid(63, 0);
7007ec681f3Smrg   const uint32_t vs_primitiveid_regid = hs ?
7017ec681f3Smrg         ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID) :
7027ec681f3Smrg         gs_primitiveid_regid;
7037ec681f3Smrg   const uint32_t ds_primitiveid_regid = ds ?
7047ec681f3Smrg         ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID) :
7057ec681f3Smrg         regid(63, 0);
7067ec681f3Smrg   const uint32_t gsheader_regid = gs ?
7077ec681f3Smrg         ir3_find_sysval_regid(gs, SYSTEM_VALUE_GS_HEADER_IR3) :
7087ec681f3Smrg         regid(63, 0);
7097ec681f3Smrg
7107ec681f3Smrg   /* Note: we currently don't support multiview with tess or GS. If we did,
7117ec681f3Smrg    * and the HW actually works, then we'd have to somehow share this across
7127ec681f3Smrg    * stages. Note that the blob doesn't support this either.
7137ec681f3Smrg    */
7147ec681f3Smrg   const uint32_t viewid_regid =
7157ec681f3Smrg      ir3_find_sysval_regid(vs, SYSTEM_VALUE_VIEW_INDEX);
716361fc4cbSmaya
7177ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_1, 6);
7187ec681f3Smrg   tu_cs_emit(cs, A6XX_VFD_CONTROL_1_REGID4VTX(vertexid_regid) |
7197ec681f3Smrg                  A6XX_VFD_CONTROL_1_REGID4INST(instanceid_regid) |
7207ec681f3Smrg                  A6XX_VFD_CONTROL_1_REGID4PRIMID(vs_primitiveid_regid) |
7217ec681f3Smrg                  A6XX_VFD_CONTROL_1_REGID4VIEWID(viewid_regid));
7227ec681f3Smrg   tu_cs_emit(cs, A6XX_VFD_CONTROL_2_REGID_HSRELPATCHID(hs_rel_patch_regid) |
7237ec681f3Smrg                  A6XX_VFD_CONTROL_2_REGID_INVOCATIONID(hs_invocation_regid));
7247ec681f3Smrg   tu_cs_emit(cs, A6XX_VFD_CONTROL_3_REGID_DSRELPATCHID(ds_rel_patch_regid) |
7257ec681f3Smrg                  A6XX_VFD_CONTROL_3_REGID_TESSX(tess_coord_x_regid) |
7267ec681f3Smrg                  A6XX_VFD_CONTROL_3_REGID_TESSY(tess_coord_y_regid) |
7277ec681f3Smrg                  A6XX_VFD_CONTROL_3_REGID_DSPRIMID(ds_primitiveid_regid));
7287ec681f3Smrg   tu_cs_emit(cs, 0x000000fc); /* VFD_CONTROL_4 */
7297ec681f3Smrg   tu_cs_emit(cs, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gsheader_regid) |
7307ec681f3Smrg                  0xfc00); /* VFD_CONTROL_5 */
7317ec681f3Smrg   tu_cs_emit(cs, COND(primid_passthru, A6XX_VFD_CONTROL_6_PRIMID_PASSTHRU)); /* VFD_CONTROL_6 */
732361fc4cbSmaya}
733361fc4cbSmaya
734361fc4cbSmayastatic void
7357ec681f3Smrgtu6_setup_streamout(struct tu_cs *cs,
7367ec681f3Smrg                    const struct ir3_shader_variant *v,
7377ec681f3Smrg                    struct ir3_shader_linkage *l)
738361fc4cbSmaya{
7397ec681f3Smrg   const struct ir3_stream_output_info *info = &v->shader->stream_output;
7407ec681f3Smrg   /* Note: 64 here comes from the HW layout of the program RAM. The program
7417ec681f3Smrg    * for stream N is at DWORD 64 * N.
7427ec681f3Smrg    */
7437ec681f3Smrg#define A6XX_SO_PROG_DWORDS 64
7447ec681f3Smrg   uint32_t prog[A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS] = {};
7457ec681f3Smrg   BITSET_DECLARE(valid_dwords, A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS) = {0};
7467ec681f3Smrg   uint32_t ncomp[IR3_MAX_SO_BUFFERS] = {};
747361fc4cbSmaya
7487ec681f3Smrg   /* TODO: streamout state should be in a non-GMEM draw state */
7497ec681f3Smrg
7507ec681f3Smrg   /* no streamout: */
7517ec681f3Smrg   if (info->num_outputs == 0) {
7527ec681f3Smrg      tu_cs_emit_pkt7(cs, CP_CONTEXT_REG_BUNCH, 4);
7537ec681f3Smrg      tu_cs_emit(cs, REG_A6XX_VPC_SO_CNTL);
7547ec681f3Smrg      tu_cs_emit(cs, 0);
7557ec681f3Smrg      tu_cs_emit(cs, REG_A6XX_VPC_SO_STREAM_CNTL);
7567ec681f3Smrg      tu_cs_emit(cs, 0);
7577ec681f3Smrg      return;
7587ec681f3Smrg   }
7597ec681f3Smrg
7607ec681f3Smrg   /* is there something to do with info->stride[i]? */
761361fc4cbSmaya
7627ec681f3Smrg   for (unsigned i = 0; i < info->num_outputs; i++) {
7637ec681f3Smrg      const struct ir3_stream_output *out = &info->output[i];
7647ec681f3Smrg      unsigned k = out->register_index;
7657ec681f3Smrg      unsigned idx;
766361fc4cbSmaya
7677ec681f3Smrg      /* Skip it, if it's an output that was never assigned a register. */
7687ec681f3Smrg      if (k >= v->outputs_count || v->outputs[k].regid == INVALID_REG)
7697ec681f3Smrg         continue;
7707ec681f3Smrg
7717ec681f3Smrg      ncomp[out->output_buffer] += out->num_components;
7727ec681f3Smrg
7737ec681f3Smrg      /* linkage map sorted by order frag shader wants things, so
7747ec681f3Smrg       * a bit less ideal here..
7757ec681f3Smrg       */
7767ec681f3Smrg      for (idx = 0; idx < l->cnt; idx++)
7777ec681f3Smrg         if (l->var[idx].regid == v->outputs[k].regid)
7787ec681f3Smrg            break;
7797ec681f3Smrg
7807ec681f3Smrg      debug_assert(idx < l->cnt);
7817ec681f3Smrg
7827ec681f3Smrg      for (unsigned j = 0; j < out->num_components; j++) {
7837ec681f3Smrg         unsigned c   = j + out->start_component;
7847ec681f3Smrg         unsigned loc = l->var[idx].loc + c;
7857ec681f3Smrg         unsigned off = j + out->dst_offset;  /* in dwords */
7867ec681f3Smrg
7877ec681f3Smrg         assert(loc < A6XX_SO_PROG_DWORDS * 2);
7887ec681f3Smrg         unsigned dword = out->stream * A6XX_SO_PROG_DWORDS + loc/2;
7897ec681f3Smrg         if (loc & 1) {
7907ec681f3Smrg            prog[dword] |= A6XX_VPC_SO_PROG_B_EN |
7917ec681f3Smrg                           A6XX_VPC_SO_PROG_B_BUF(out->output_buffer) |
7927ec681f3Smrg                           A6XX_VPC_SO_PROG_B_OFF(off * 4);
7937ec681f3Smrg         } else {
7947ec681f3Smrg            prog[dword] |= A6XX_VPC_SO_PROG_A_EN |
7957ec681f3Smrg                           A6XX_VPC_SO_PROG_A_BUF(out->output_buffer) |
7967ec681f3Smrg                           A6XX_VPC_SO_PROG_A_OFF(off * 4);
7977ec681f3Smrg         }
7987ec681f3Smrg         BITSET_SET(valid_dwords, dword);
7997ec681f3Smrg      }
8007ec681f3Smrg   }
8017ec681f3Smrg
8027ec681f3Smrg   unsigned prog_count = 0;
8037ec681f3Smrg   unsigned start, end;
8047ec681f3Smrg   BITSET_FOREACH_RANGE(start, end, valid_dwords,
8057ec681f3Smrg                        A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS) {
8067ec681f3Smrg      prog_count += end - start + 1;
8077ec681f3Smrg   }
8087ec681f3Smrg
8097ec681f3Smrg   tu_cs_emit_pkt7(cs, CP_CONTEXT_REG_BUNCH, 10 + 2 * prog_count);
8107ec681f3Smrg   tu_cs_emit(cs, REG_A6XX_VPC_SO_STREAM_CNTL);
8117ec681f3Smrg   tu_cs_emit(cs, A6XX_VPC_SO_STREAM_CNTL_STREAM_ENABLE(info->streams_written) |
8127ec681f3Smrg                  COND(ncomp[0] > 0,
8137ec681f3Smrg                       A6XX_VPC_SO_STREAM_CNTL_BUF0_STREAM(1 + info->buffer_to_stream[0])) |
8147ec681f3Smrg                  COND(ncomp[1] > 0,
8157ec681f3Smrg                       A6XX_VPC_SO_STREAM_CNTL_BUF1_STREAM(1 + info->buffer_to_stream[1])) |
8167ec681f3Smrg                  COND(ncomp[2] > 0,
8177ec681f3Smrg                       A6XX_VPC_SO_STREAM_CNTL_BUF2_STREAM(1 + info->buffer_to_stream[2])) |
8187ec681f3Smrg                  COND(ncomp[3] > 0,
8197ec681f3Smrg                       A6XX_VPC_SO_STREAM_CNTL_BUF3_STREAM(1 + info->buffer_to_stream[3])));
8207ec681f3Smrg   for (uint32_t i = 0; i < 4; i++) {
8217ec681f3Smrg      tu_cs_emit(cs, REG_A6XX_VPC_SO_NCOMP(i));
8227ec681f3Smrg      tu_cs_emit(cs, ncomp[i]);
8237ec681f3Smrg   }
8247ec681f3Smrg   bool first = true;
8257ec681f3Smrg   BITSET_FOREACH_RANGE(start, end, valid_dwords,
8267ec681f3Smrg                        A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS) {
8277ec681f3Smrg      tu_cs_emit(cs, REG_A6XX_VPC_SO_CNTL);
8287ec681f3Smrg      tu_cs_emit(cs, COND(first, A6XX_VPC_SO_CNTL_RESET) |
8297ec681f3Smrg                     A6XX_VPC_SO_CNTL_ADDR(start));
8307ec681f3Smrg      for (unsigned i = start; i < end; i++) {
8317ec681f3Smrg         tu_cs_emit(cs, REG_A6XX_VPC_SO_PROG);
8327ec681f3Smrg         tu_cs_emit(cs, prog[i]);
8337ec681f3Smrg      }
8347ec681f3Smrg      first = false;
8357ec681f3Smrg   }
836361fc4cbSmaya}
837361fc4cbSmaya
838361fc4cbSmayastatic void
8397ec681f3Smrgtu6_emit_const(struct tu_cs *cs, uint32_t opcode, uint32_t base,
8407ec681f3Smrg               enum a6xx_state_block block, uint32_t offset,
8417ec681f3Smrg               uint32_t size, const uint32_t *dwords) {
8427ec681f3Smrg   assert(size % 4 == 0);
843361fc4cbSmaya
8447ec681f3Smrg   tu_cs_emit_pkt7(cs, opcode, 3 + size);
8457ec681f3Smrg   tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
8467ec681f3Smrg         CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
8477ec681f3Smrg         CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
8487ec681f3Smrg         CP_LOAD_STATE6_0_STATE_BLOCK(block) |
8497ec681f3Smrg         CP_LOAD_STATE6_0_NUM_UNIT(size / 4));
850361fc4cbSmaya
8517ec681f3Smrg   tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
8527ec681f3Smrg   tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
8537ec681f3Smrg   dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
854361fc4cbSmaya
8557ec681f3Smrg   tu_cs_emit_array(cs, dwords, size);
856361fc4cbSmaya}
857361fc4cbSmaya
858361fc4cbSmayastatic void
8597ec681f3Smrgtu6_emit_link_map(struct tu_cs *cs,
8607ec681f3Smrg                  const struct ir3_shader_variant *producer,
8617ec681f3Smrg                  const struct ir3_shader_variant *consumer,
8627ec681f3Smrg                  enum a6xx_state_block sb)
863361fc4cbSmaya{
8647ec681f3Smrg   const struct ir3_const_state *const_state = ir3_const_state(consumer);
8657ec681f3Smrg   uint32_t base = const_state->offsets.primitive_map;
8667ec681f3Smrg   int size = DIV_ROUND_UP(consumer->input_size, 4);
867361fc4cbSmaya
8687ec681f3Smrg   size = (MIN2(size + base, consumer->constlen) - base) * 4;
8697ec681f3Smrg   if (size <= 0)
8707ec681f3Smrg      return;
8717ec681f3Smrg
8727ec681f3Smrg   tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, sb, 0, size,
8737ec681f3Smrg                         producer->output_loc);
874361fc4cbSmaya}
875361fc4cbSmaya
8767ec681f3Smrgstatic uint16_t
8777ec681f3Smrggl_primitive_to_tess(uint16_t primitive) {
8787ec681f3Smrg   switch (primitive) {
8797ec681f3Smrg   case GL_POINTS:
8807ec681f3Smrg      return TESS_POINTS;
8817ec681f3Smrg   case GL_LINE_STRIP:
8827ec681f3Smrg      return TESS_LINES;
8837ec681f3Smrg   case GL_TRIANGLE_STRIP:
8847ec681f3Smrg      return TESS_CW_TRIS;
8857ec681f3Smrg   default:
8867ec681f3Smrg      unreachable("");
8877ec681f3Smrg   }
8887ec681f3Smrg}
8897ec681f3Smrg
8907ec681f3Smrgvoid
891361fc4cbSmayatu6_emit_vpc(struct tu_cs *cs,
892361fc4cbSmaya             const struct ir3_shader_variant *vs,
8937ec681f3Smrg             const struct ir3_shader_variant *hs,
8947ec681f3Smrg             const struct ir3_shader_variant *ds,
8957ec681f3Smrg             const struct ir3_shader_variant *gs,
896361fc4cbSmaya             const struct ir3_shader_variant *fs,
8977ec681f3Smrg             uint32_t patch_control_points)
8987ec681f3Smrg{
8997ec681f3Smrg   /* note: doesn't compile as static because of the array regs.. */
9007ec681f3Smrg   const struct reg_config {
9017ec681f3Smrg      uint16_t reg_sp_xs_out_reg;
9027ec681f3Smrg      uint16_t reg_sp_xs_vpc_dst_reg;
9037ec681f3Smrg      uint16_t reg_vpc_xs_pack;
9047ec681f3Smrg      uint16_t reg_vpc_xs_clip_cntl;
9057ec681f3Smrg      uint16_t reg_gras_xs_cl_cntl;
9067ec681f3Smrg      uint16_t reg_pc_xs_out_cntl;
9077ec681f3Smrg      uint16_t reg_sp_xs_primitive_cntl;
9087ec681f3Smrg      uint16_t reg_vpc_xs_layer_cntl;
9097ec681f3Smrg      uint16_t reg_gras_xs_layer_cntl;
9107ec681f3Smrg   } reg_config[] = {
9117ec681f3Smrg      [MESA_SHADER_VERTEX] = {
9127ec681f3Smrg         REG_A6XX_SP_VS_OUT_REG(0),
9137ec681f3Smrg         REG_A6XX_SP_VS_VPC_DST_REG(0),
9147ec681f3Smrg         REG_A6XX_VPC_VS_PACK,
9157ec681f3Smrg         REG_A6XX_VPC_VS_CLIP_CNTL,
9167ec681f3Smrg         REG_A6XX_GRAS_VS_CL_CNTL,
9177ec681f3Smrg         REG_A6XX_PC_VS_OUT_CNTL,
9187ec681f3Smrg         REG_A6XX_SP_VS_PRIMITIVE_CNTL,
9197ec681f3Smrg         REG_A6XX_VPC_VS_LAYER_CNTL,
9207ec681f3Smrg         REG_A6XX_GRAS_VS_LAYER_CNTL
9217ec681f3Smrg      },
9227ec681f3Smrg      [MESA_SHADER_TESS_CTRL] = {
9237ec681f3Smrg         0,
9247ec681f3Smrg         0,
9257ec681f3Smrg         0,
9267ec681f3Smrg         0,
9277ec681f3Smrg         0,
9287ec681f3Smrg         REG_A6XX_PC_HS_OUT_CNTL,
9297ec681f3Smrg         0,
9307ec681f3Smrg         0,
9317ec681f3Smrg         0
9327ec681f3Smrg      },
9337ec681f3Smrg      [MESA_SHADER_TESS_EVAL] = {
9347ec681f3Smrg         REG_A6XX_SP_DS_OUT_REG(0),
9357ec681f3Smrg         REG_A6XX_SP_DS_VPC_DST_REG(0),
9367ec681f3Smrg         REG_A6XX_VPC_DS_PACK,
9377ec681f3Smrg         REG_A6XX_VPC_DS_CLIP_CNTL,
9387ec681f3Smrg         REG_A6XX_GRAS_DS_CL_CNTL,
9397ec681f3Smrg         REG_A6XX_PC_DS_OUT_CNTL,
9407ec681f3Smrg         REG_A6XX_SP_DS_PRIMITIVE_CNTL,
9417ec681f3Smrg         REG_A6XX_VPC_DS_LAYER_CNTL,
9427ec681f3Smrg         REG_A6XX_GRAS_DS_LAYER_CNTL
9437ec681f3Smrg      },
9447ec681f3Smrg      [MESA_SHADER_GEOMETRY] = {
9457ec681f3Smrg         REG_A6XX_SP_GS_OUT_REG(0),
9467ec681f3Smrg         REG_A6XX_SP_GS_VPC_DST_REG(0),
9477ec681f3Smrg         REG_A6XX_VPC_GS_PACK,
9487ec681f3Smrg         REG_A6XX_VPC_GS_CLIP_CNTL,
9497ec681f3Smrg         REG_A6XX_GRAS_GS_CL_CNTL,
9507ec681f3Smrg         REG_A6XX_PC_GS_OUT_CNTL,
9517ec681f3Smrg         REG_A6XX_SP_GS_PRIMITIVE_CNTL,
9527ec681f3Smrg         REG_A6XX_VPC_GS_LAYER_CNTL,
9537ec681f3Smrg         REG_A6XX_GRAS_GS_LAYER_CNTL
9547ec681f3Smrg      },
9557ec681f3Smrg   };
956361fc4cbSmaya
9577ec681f3Smrg   const struct ir3_shader_variant *last_shader;
9587ec681f3Smrg   if (gs) {
9597ec681f3Smrg      last_shader = gs;
9607ec681f3Smrg   } else if (hs) {
9617ec681f3Smrg      last_shader = ds;
9627ec681f3Smrg   } else {
9637ec681f3Smrg      last_shader = vs;
964361fc4cbSmaya   }
965361fc4cbSmaya
9667ec681f3Smrg   const struct reg_config *cfg = &reg_config[last_shader->type];
9677ec681f3Smrg
9687ec681f3Smrg   struct ir3_shader_linkage linkage = {
9697ec681f3Smrg      .primid_loc = 0xff,
9707ec681f3Smrg      .clip0_loc = 0xff,
9717ec681f3Smrg      .clip1_loc = 0xff,
9727ec681f3Smrg   };
9737ec681f3Smrg   if (fs)
9747ec681f3Smrg      ir3_link_shaders(&linkage, last_shader, fs, true);
9757ec681f3Smrg
9767ec681f3Smrg   if (last_shader->shader->stream_output.num_outputs)
9777ec681f3Smrg      ir3_link_stream_out(&linkage, last_shader);
9787ec681f3Smrg
9797ec681f3Smrg   /* We do this after linking shaders in order to know whether PrimID
9807ec681f3Smrg    * passthrough needs to be enabled.
9817ec681f3Smrg    */
9827ec681f3Smrg   bool primid_passthru = linkage.primid_loc != 0xff;
9837ec681f3Smrg   tu6_emit_vs_system_values(cs, vs, hs, ds, gs, primid_passthru);
9847ec681f3Smrg
985361fc4cbSmaya   tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VAR_DISABLE(0), 4);
9867ec681f3Smrg   tu_cs_emit(cs, ~linkage.varmask[0]);
9877ec681f3Smrg   tu_cs_emit(cs, ~linkage.varmask[1]);
9887ec681f3Smrg   tu_cs_emit(cs, ~linkage.varmask[2]);
9897ec681f3Smrg   tu_cs_emit(cs, ~linkage.varmask[3]);
990361fc4cbSmaya
991361fc4cbSmaya   /* a6xx finds position/pointsize at the end */
992361fc4cbSmaya   const uint32_t pointsize_regid =
9937ec681f3Smrg      ir3_find_output_regid(last_shader, VARYING_SLOT_PSIZ);
9947ec681f3Smrg   const uint32_t layer_regid =
9957ec681f3Smrg      ir3_find_output_regid(last_shader, VARYING_SLOT_LAYER);
9967ec681f3Smrg   const uint32_t view_regid =
9977ec681f3Smrg      ir3_find_output_regid(last_shader, VARYING_SLOT_VIEWPORT);
9987ec681f3Smrg   const uint32_t clip0_regid =
9997ec681f3Smrg      ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST0);
10007ec681f3Smrg   const uint32_t clip1_regid =
10017ec681f3Smrg      ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST1);
10027ec681f3Smrg   uint32_t flags_regid = gs ?
10037ec681f3Smrg      ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3) : 0;
10047ec681f3Smrg
10057ec681f3Smrg   uint32_t pointsize_loc = 0xff, position_loc = 0xff, layer_loc = 0xff, view_loc = 0xff;
10067ec681f3Smrg
10077ec681f3Smrg   if (layer_regid != regid(63, 0)) {
10087ec681f3Smrg      layer_loc = linkage.max_loc;
10097ec681f3Smrg      ir3_link_add(&linkage, layer_regid, 0x1, linkage.max_loc);
10107ec681f3Smrg   }
10117ec681f3Smrg
10127ec681f3Smrg   if (view_regid != regid(63, 0)) {
10137ec681f3Smrg      view_loc = linkage.max_loc;
10147ec681f3Smrg      ir3_link_add(&linkage, view_regid, 0x1, linkage.max_loc);
10157ec681f3Smrg   }
10167ec681f3Smrg
10177ec681f3Smrg   unsigned extra_pos = 0;
10187ec681f3Smrg
10197ec681f3Smrg   for (unsigned i = 0; i < last_shader->outputs_count; i++) {
10207ec681f3Smrg      if (last_shader->outputs[i].slot != VARYING_SLOT_POS)
10217ec681f3Smrg         continue;
10227ec681f3Smrg
10237ec681f3Smrg      if (position_loc == 0xff)
10247ec681f3Smrg         position_loc = linkage.max_loc;
10257ec681f3Smrg
10267ec681f3Smrg      ir3_link_add(&linkage, last_shader->outputs[i].regid,
10277ec681f3Smrg                   0xf, position_loc + 4 * last_shader->outputs[i].view);
10287ec681f3Smrg      extra_pos = MAX2(extra_pos, last_shader->outputs[i].view);
10297ec681f3Smrg   }
10307ec681f3Smrg
1031361fc4cbSmaya   if (pointsize_regid != regid(63, 0)) {
1032361fc4cbSmaya      pointsize_loc = linkage.max_loc;
1033361fc4cbSmaya      ir3_link_add(&linkage, pointsize_regid, 0x1, linkage.max_loc);
1034361fc4cbSmaya   }
1035361fc4cbSmaya
10367ec681f3Smrg   uint8_t clip_cull_mask = last_shader->clip_mask | last_shader->cull_mask;
10377ec681f3Smrg
10387ec681f3Smrg   /* Handle the case where clip/cull distances aren't read by the FS */
10397ec681f3Smrg   uint32_t clip0_loc = linkage.clip0_loc, clip1_loc = linkage.clip1_loc;
10407ec681f3Smrg   if (clip0_loc == 0xff && clip0_regid != regid(63, 0)) {
10417ec681f3Smrg      clip0_loc = linkage.max_loc;
10427ec681f3Smrg      ir3_link_add(&linkage, clip0_regid, clip_cull_mask & 0xf, linkage.max_loc);
10437ec681f3Smrg   }
10447ec681f3Smrg   if (clip1_loc == 0xff && clip1_regid != regid(63, 0)) {
10457ec681f3Smrg      clip1_loc = linkage.max_loc;
10467ec681f3Smrg      ir3_link_add(&linkage, clip1_regid, clip_cull_mask >> 4, linkage.max_loc);
10477ec681f3Smrg   }
10487ec681f3Smrg
10497ec681f3Smrg   tu6_setup_streamout(cs, last_shader, &linkage);
10507ec681f3Smrg
10517ec681f3Smrg   /* The GPU hangs on some models when there are no outputs (xs_pack::CNT),
10527ec681f3Smrg    * at least when a DS is the last stage, so add a dummy output to keep it
10537ec681f3Smrg    * happy if there aren't any. We do this late in order to avoid emitting
10547ec681f3Smrg    * any unused code and make sure that optimizations don't remove it.
10557ec681f3Smrg    */
10567ec681f3Smrg   if (linkage.cnt == 0)
10577ec681f3Smrg      ir3_link_add(&linkage, 0, 0x1, linkage.max_loc);
10587ec681f3Smrg
10597ec681f3Smrg   /* map outputs of the last shader to VPC */
1060361fc4cbSmaya   assert(linkage.cnt <= 32);
10617ec681f3Smrg   const uint32_t sp_out_count = DIV_ROUND_UP(linkage.cnt, 2);
10627ec681f3Smrg   const uint32_t sp_vpc_dst_count = DIV_ROUND_UP(linkage.cnt, 4);
10637ec681f3Smrg   uint32_t sp_out[16] = {0};
10647ec681f3Smrg   uint32_t sp_vpc_dst[8] = {0};
1065361fc4cbSmaya   for (uint32_t i = 0; i < linkage.cnt; i++) {
10667ec681f3Smrg      ((uint16_t *) sp_out)[i] =
1067361fc4cbSmaya         A6XX_SP_VS_OUT_REG_A_REGID(linkage.var[i].regid) |
1068361fc4cbSmaya         A6XX_SP_VS_OUT_REG_A_COMPMASK(linkage.var[i].compmask);
10697ec681f3Smrg      ((uint8_t *) sp_vpc_dst)[i] =
1070361fc4cbSmaya         A6XX_SP_VS_VPC_DST_REG_OUTLOC0(linkage.var[i].loc);
1071361fc4cbSmaya   }
1072361fc4cbSmaya
10737ec681f3Smrg   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_out_reg, sp_out_count);
10747ec681f3Smrg   tu_cs_emit_array(cs, sp_out, sp_out_count);
10757ec681f3Smrg
10767ec681f3Smrg   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_vpc_dst_reg, sp_vpc_dst_count);
10777ec681f3Smrg   tu_cs_emit_array(cs, sp_vpc_dst, sp_vpc_dst_count);
10787ec681f3Smrg
10797ec681f3Smrg   tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_pack, 1);
10807ec681f3Smrg   tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) |
10817ec681f3Smrg                  A6XX_VPC_VS_PACK_PSIZELOC(pointsize_loc) |
10827ec681f3Smrg                  A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc) |
10837ec681f3Smrg                  A6XX_VPC_VS_PACK_EXTRAPOS(extra_pos));
10847ec681f3Smrg
10857ec681f3Smrg   tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl, 1);
10867ec681f3Smrg   tu_cs_emit(cs, A6XX_VPC_VS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) |
10877ec681f3Smrg                  A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
10887ec681f3Smrg                  A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc));
10897ec681f3Smrg
10907ec681f3Smrg   tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_cl_cntl, 1);
10917ec681f3Smrg   tu_cs_emit(cs, A6XX_GRAS_VS_CL_CNTL_CLIP_MASK(last_shader->clip_mask) |
10927ec681f3Smrg                  A6XX_GRAS_VS_CL_CNTL_CULL_MASK(last_shader->cull_mask));
10937ec681f3Smrg
10947ec681f3Smrg   const struct ir3_shader_variant *geom_shaders[] = { vs, hs, ds, gs };
10957ec681f3Smrg
10967ec681f3Smrg   for (unsigned i = 0; i < ARRAY_SIZE(geom_shaders); i++) {
10977ec681f3Smrg      const struct ir3_shader_variant *shader = geom_shaders[i];
10987ec681f3Smrg      if (!shader)
10997ec681f3Smrg         continue;
11007ec681f3Smrg
11017ec681f3Smrg      bool primid = shader->type != MESA_SHADER_VERTEX &&
11027ec681f3Smrg         VALIDREG(ir3_find_sysval_regid(shader, SYSTEM_VALUE_PRIMITIVE_ID));
11037ec681f3Smrg
11047ec681f3Smrg      tu_cs_emit_pkt4(cs, reg_config[shader->type].reg_pc_xs_out_cntl, 1);
11057ec681f3Smrg      if (shader == last_shader) {
11067ec681f3Smrg         tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
11077ec681f3Smrg                        CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) |
11087ec681f3Smrg                        CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
11097ec681f3Smrg                        CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) |
11107ec681f3Smrg                        COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID) |
11117ec681f3Smrg                        A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
11127ec681f3Smrg      } else {
11137ec681f3Smrg         tu_cs_emit(cs, COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID));
11147ec681f3Smrg      }
11157ec681f3Smrg   }
11167ec681f3Smrg
11177ec681f3Smrg   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1);
11187ec681f3Smrg   tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt) |
11197ec681f3Smrg                  A6XX_SP_GS_PRIMITIVE_CNTL_FLAGS_REGID(flags_regid));
11207ec681f3Smrg
11217ec681f3Smrg   tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1);
11227ec681f3Smrg   tu_cs_emit(cs, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) |
11237ec681f3Smrg                  A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(view_loc));
1124361fc4cbSmaya
11257ec681f3Smrg   tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1);
11267ec681f3Smrg   tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER) |
11277ec681f3Smrg                  CONDREG(view_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_VIEW));
11287ec681f3Smrg
11297ec681f3Smrg   tu_cs_emit_regs(cs, A6XX_PC_PRIMID_PASSTHRU(primid_passthru));
1130361fc4cbSmaya
1131361fc4cbSmaya   tu_cs_emit_pkt4(cs, REG_A6XX_VPC_CNTL_0, 1);
11327ec681f3Smrg   tu_cs_emit(cs, A6XX_VPC_CNTL_0_NUMNONPOSVAR(fs ? fs->total_in : 0) |
11337ec681f3Smrg                  COND(fs && fs->total_in, A6XX_VPC_CNTL_0_VARYING) |
11347ec681f3Smrg                  A6XX_VPC_CNTL_0_PRIMIDLOC(linkage.primid_loc) |
11357ec681f3Smrg                  A6XX_VPC_CNTL_0_VIEWIDLOC(linkage.viewid_loc));
11367ec681f3Smrg
11377ec681f3Smrg   if (hs) {
11387ec681f3Smrg      shader_info *hs_info = &hs->shader->nir->info;
11397ec681f3Smrg
11407ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
11417ec681f3Smrg      tu_cs_emit(cs, hs_info->tess.tcs_vertices_out);
11427ec681f3Smrg
11437ec681f3Smrg      /* Total attribute slots in HS incoming patch. */
11447ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_PC_HS_INPUT_SIZE, 1);
11457ec681f3Smrg      tu_cs_emit(cs, patch_control_points * vs->output_size / 4);
11467ec681f3Smrg
11477ec681f3Smrg      const uint32_t wavesize = 64;
11487ec681f3Smrg      const uint32_t max_wave_input_size = 64;
11497ec681f3Smrg
11507ec681f3Smrg      /* note: if HS is really just the VS extended, then this
11517ec681f3Smrg       * should be by MAX2(patch_control_points, hs_info->tess.tcs_vertices_out)
11527ec681f3Smrg       * however that doesn't match the blob, and fails some dEQP tests.
11537ec681f3Smrg       */
11547ec681f3Smrg      uint32_t prims_per_wave = wavesize / hs_info->tess.tcs_vertices_out;
11557ec681f3Smrg      uint32_t max_prims_per_wave =
11567ec681f3Smrg         max_wave_input_size * wavesize / (vs->output_size * patch_control_points);
11577ec681f3Smrg      prims_per_wave = MIN2(prims_per_wave, max_prims_per_wave);
11587ec681f3Smrg
11597ec681f3Smrg      uint32_t total_size = vs->output_size * patch_control_points * prims_per_wave;
11607ec681f3Smrg      uint32_t wave_input_size = DIV_ROUND_UP(total_size, wavesize);
11617ec681f3Smrg
11627ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1);
11637ec681f3Smrg      tu_cs_emit(cs, wave_input_size);
11647ec681f3Smrg
11657ec681f3Smrg      /* In SPIR-V generated from GLSL, the tessellation primitive params are
11667ec681f3Smrg       * are specified in the tess eval shader, but in SPIR-V generated from
11677ec681f3Smrg       * HLSL, they are specified in the tess control shader. */
11687ec681f3Smrg      shader_info *tess_info =
11697ec681f3Smrg            ds->shader->nir->info.tess.spacing == TESS_SPACING_UNSPECIFIED ?
11707ec681f3Smrg            &hs->shader->nir->info : &ds->shader->nir->info;
11717ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_CNTL, 1);
11727ec681f3Smrg      uint32_t output;
11737ec681f3Smrg      if (tess_info->tess.point_mode)
11747ec681f3Smrg         output = TESS_POINTS;
11757ec681f3Smrg      else if (tess_info->tess.primitive_mode == GL_ISOLINES)
11767ec681f3Smrg         output = TESS_LINES;
11777ec681f3Smrg      else if (tess_info->tess.ccw)
11787ec681f3Smrg         output = TESS_CCW_TRIS;
11797ec681f3Smrg      else
11807ec681f3Smrg         output = TESS_CW_TRIS;
11817ec681f3Smrg
11827ec681f3Smrg      enum a6xx_tess_spacing spacing;
11837ec681f3Smrg      switch (tess_info->tess.spacing) {
11847ec681f3Smrg      case TESS_SPACING_EQUAL:
11857ec681f3Smrg         spacing = TESS_EQUAL;
11867ec681f3Smrg         break;
11877ec681f3Smrg      case TESS_SPACING_FRACTIONAL_ODD:
11887ec681f3Smrg         spacing = TESS_FRACTIONAL_ODD;
11897ec681f3Smrg         break;
11907ec681f3Smrg      case TESS_SPACING_FRACTIONAL_EVEN:
11917ec681f3Smrg         spacing = TESS_FRACTIONAL_EVEN;
11927ec681f3Smrg         break;
11937ec681f3Smrg      case TESS_SPACING_UNSPECIFIED:
11947ec681f3Smrg      default:
11957ec681f3Smrg         unreachable("invalid tess spacing");
11967ec681f3Smrg      }
11977ec681f3Smrg      tu_cs_emit(cs, A6XX_PC_TESS_CNTL_SPACING(spacing) |
11987ec681f3Smrg            A6XX_PC_TESS_CNTL_OUTPUT(output));
11997ec681f3Smrg
12007ec681f3Smrg      tu6_emit_link_map(cs, vs, hs, SB6_HS_SHADER);
12017ec681f3Smrg      tu6_emit_link_map(cs, hs, ds, SB6_DS_SHADER);
12027ec681f3Smrg   }
1203361fc4cbSmaya
1204361fc4cbSmaya
12057ec681f3Smrg   if (gs) {
12067ec681f3Smrg      uint32_t vertices_out, invocations, output, vec4_size;
12077ec681f3Smrg      uint32_t prev_stage_output_size = ds ? ds->output_size : vs->output_size;
1208361fc4cbSmaya
12097ec681f3Smrg      /* this detects the tu_clear_blit path, which doesn't set ->nir */
12107ec681f3Smrg      if (gs->shader->nir) {
12117ec681f3Smrg         if (hs) {
12127ec681f3Smrg            tu6_emit_link_map(cs, ds, gs, SB6_GS_SHADER);
12137ec681f3Smrg         } else {
12147ec681f3Smrg            tu6_emit_link_map(cs, vs, gs, SB6_GS_SHADER);
12157ec681f3Smrg         }
12167ec681f3Smrg         vertices_out = gs->shader->nir->info.gs.vertices_out - 1;
12177ec681f3Smrg         output = gl_primitive_to_tess(gs->shader->nir->info.gs.output_primitive);
12187ec681f3Smrg         invocations = gs->shader->nir->info.gs.invocations - 1;
12197ec681f3Smrg         /* Size of per-primitive alloction in ldlw memory in vec4s. */
12207ec681f3Smrg         vec4_size = gs->shader->nir->info.gs.vertices_in *
12217ec681f3Smrg                     DIV_ROUND_UP(prev_stage_output_size, 4);
12227ec681f3Smrg      } else {
12237ec681f3Smrg         vertices_out = 3;
12247ec681f3Smrg         output = TESS_CW_TRIS;
12257ec681f3Smrg         invocations = 0;
12267ec681f3Smrg         vec4_size = 0;
12277ec681f3Smrg      }
1228361fc4cbSmaya
12297ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_5, 1);
12307ec681f3Smrg      tu_cs_emit(cs,
12317ec681f3Smrg            A6XX_PC_PRIMITIVE_CNTL_5_GS_VERTICES_OUT(vertices_out) |
12327ec681f3Smrg            A6XX_PC_PRIMITIVE_CNTL_5_GS_OUTPUT(output) |
12337ec681f3Smrg            A6XX_PC_PRIMITIVE_CNTL_5_GS_INVOCATIONS(invocations));
12347ec681f3Smrg
12357ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_PARAM, 1);
12367ec681f3Smrg      tu_cs_emit(cs, 0xff);
12377ec681f3Smrg
12387ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1);
12397ec681f3Smrg      tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size));
12407ec681f3Smrg
12417ec681f3Smrg      uint32_t prim_size = prev_stage_output_size;
12427ec681f3Smrg      if (prim_size > 64)
12437ec681f3Smrg         prim_size = 64;
12447ec681f3Smrg      else if (prim_size == 64)
12457ec681f3Smrg         prim_size = 63;
12467ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIM_SIZE, 1);
12477ec681f3Smrg      tu_cs_emit(cs, prim_size);
12487ec681f3Smrg   }
1249361fc4cbSmaya}
1250361fc4cbSmaya
1251361fc4cbSmayastatic int
1252361fc4cbSmayatu6_vpc_varying_mode(const struct ir3_shader_variant *fs,
1253361fc4cbSmaya                     uint32_t index,
1254361fc4cbSmaya                     uint8_t *interp_mode,
1255361fc4cbSmaya                     uint8_t *ps_repl_mode)
1256361fc4cbSmaya{
1257361fc4cbSmaya   enum
1258361fc4cbSmaya   {
1259361fc4cbSmaya      INTERP_SMOOTH = 0,
1260361fc4cbSmaya      INTERP_FLAT = 1,
1261361fc4cbSmaya      INTERP_ZERO = 2,
1262361fc4cbSmaya      INTERP_ONE = 3,
1263361fc4cbSmaya   };
1264361fc4cbSmaya   enum
1265361fc4cbSmaya   {
1266361fc4cbSmaya      PS_REPL_NONE = 0,
1267361fc4cbSmaya      PS_REPL_S = 1,
1268361fc4cbSmaya      PS_REPL_T = 2,
1269361fc4cbSmaya      PS_REPL_ONE_MINUS_T = 3,
1270361fc4cbSmaya   };
1271361fc4cbSmaya
1272361fc4cbSmaya   const uint32_t compmask = fs->inputs[index].compmask;
1273361fc4cbSmaya
1274361fc4cbSmaya   /* NOTE: varyings are packed, so if compmask is 0xb then first, second, and
1275361fc4cbSmaya    * fourth component occupy three consecutive varying slots
1276361fc4cbSmaya    */
1277361fc4cbSmaya   int shift = 0;
1278361fc4cbSmaya   *interp_mode = 0;
1279361fc4cbSmaya   *ps_repl_mode = 0;
1280361fc4cbSmaya   if (fs->inputs[index].slot == VARYING_SLOT_PNTC) {
1281361fc4cbSmaya      if (compmask & 0x1) {
1282361fc4cbSmaya         *ps_repl_mode |= PS_REPL_S << shift;
1283361fc4cbSmaya         shift += 2;
1284361fc4cbSmaya      }
1285361fc4cbSmaya      if (compmask & 0x2) {
1286361fc4cbSmaya         *ps_repl_mode |= PS_REPL_T << shift;
1287361fc4cbSmaya         shift += 2;
1288361fc4cbSmaya      }
1289361fc4cbSmaya      if (compmask & 0x4) {
1290361fc4cbSmaya         *interp_mode |= INTERP_ZERO << shift;
1291361fc4cbSmaya         shift += 2;
1292361fc4cbSmaya      }
1293361fc4cbSmaya      if (compmask & 0x8) {
1294361fc4cbSmaya         *interp_mode |= INTERP_ONE << 6;
1295361fc4cbSmaya         shift += 2;
1296361fc4cbSmaya      }
12977ec681f3Smrg   } else if (fs->inputs[index].flat) {
1298361fc4cbSmaya      for (int i = 0; i < 4; i++) {
1299361fc4cbSmaya         if (compmask & (1 << i)) {
1300361fc4cbSmaya            *interp_mode |= INTERP_FLAT << shift;
1301361fc4cbSmaya            shift += 2;
1302361fc4cbSmaya         }
1303361fc4cbSmaya      }
1304361fc4cbSmaya   }
1305361fc4cbSmaya
1306361fc4cbSmaya   return shift;
1307361fc4cbSmaya}
1308361fc4cbSmaya
1309361fc4cbSmayastatic void
1310361fc4cbSmayatu6_emit_vpc_varying_modes(struct tu_cs *cs,
13117ec681f3Smrg                           const struct ir3_shader_variant *fs)
1312361fc4cbSmaya{
1313361fc4cbSmaya   uint32_t interp_modes[8] = { 0 };
1314361fc4cbSmaya   uint32_t ps_repl_modes[8] = { 0 };
1315361fc4cbSmaya
13167ec681f3Smrg   if (fs) {
1317361fc4cbSmaya      for (int i = -1;
1318361fc4cbSmaya           (i = ir3_next_varying(fs, i)) < (int) fs->inputs_count;) {
1319361fc4cbSmaya
1320361fc4cbSmaya         /* get the mode for input i */
1321361fc4cbSmaya         uint8_t interp_mode;
1322361fc4cbSmaya         uint8_t ps_repl_mode;
1323361fc4cbSmaya         const int bits =
1324361fc4cbSmaya            tu6_vpc_varying_mode(fs, i, &interp_mode, &ps_repl_mode);
1325361fc4cbSmaya
1326361fc4cbSmaya         /* OR the mode into the array */
1327361fc4cbSmaya         const uint32_t inloc = fs->inputs[i].inloc * 2;
1328361fc4cbSmaya         uint32_t n = inloc / 32;
1329361fc4cbSmaya         uint32_t shift = inloc % 32;
1330361fc4cbSmaya         interp_modes[n] |= interp_mode << shift;
1331361fc4cbSmaya         ps_repl_modes[n] |= ps_repl_mode << shift;
1332361fc4cbSmaya         if (shift + bits > 32) {
1333361fc4cbSmaya            n++;
1334361fc4cbSmaya            shift = 32 - shift;
1335361fc4cbSmaya
1336361fc4cbSmaya            interp_modes[n] |= interp_mode >> shift;
1337361fc4cbSmaya            ps_repl_modes[n] |= ps_repl_mode >> shift;
1338361fc4cbSmaya         }
1339361fc4cbSmaya      }
1340361fc4cbSmaya   }
1341361fc4cbSmaya
1342361fc4cbSmaya   tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
1343361fc4cbSmaya   tu_cs_emit_array(cs, interp_modes, 8);
1344361fc4cbSmaya
1345361fc4cbSmaya   tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
1346361fc4cbSmaya   tu_cs_emit_array(cs, ps_repl_modes, 8);
1347361fc4cbSmaya}
1348361fc4cbSmaya
13497ec681f3Smrgvoid
1350361fc4cbSmayatu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
1351361fc4cbSmaya{
13527ec681f3Smrg   uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid;
13537ec681f3Smrg   uint32_t ij_regid[IJ_COUNT];
13547ec681f3Smrg   uint32_t smask_in_regid;
13557ec681f3Smrg
13567ec681f3Smrg   bool sample_shading = fs->per_samp | fs->key.sample_shading;
13577ec681f3Smrg   bool enable_varyings = fs->total_in > 0;
13587ec681f3Smrg
13597ec681f3Smrg   samp_id_regid   = ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_ID);
13607ec681f3Smrg   smask_in_regid  = ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_MASK_IN);
13617ec681f3Smrg   face_regid      = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRONT_FACE);
13627ec681f3Smrg   coord_regid     = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_COORD);
13637ec681f3Smrg   zwcoord_regid   = VALIDREG(coord_regid) ? coord_regid + 2 : regid(63, 0);
13647ec681f3Smrg   for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++)
13657ec681f3Smrg      ij_regid[i] = ir3_find_sysval_regid(fs, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i);
13667ec681f3Smrg
13677ec681f3Smrg   if (fs->num_sampler_prefetch > 0) {
13687ec681f3Smrg      assert(VALIDREG(ij_regid[IJ_PERSP_PIXEL]));
13697ec681f3Smrg      /* also, it seems like ij_pix is *required* to be r0.x */
13707ec681f3Smrg      assert(ij_regid[IJ_PERSP_PIXEL] == regid(0, 0));
13717ec681f3Smrg   }
1372361fc4cbSmaya
13737ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch);
13747ec681f3Smrg   tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CNTL_COUNT(fs->num_sampler_prefetch) |
13757ec681f3Smrg         A6XX_SP_FS_PREFETCH_CNTL_UNK4(regid(63, 0)) |
13767ec681f3Smrg         0x7000);    // XXX);
13777ec681f3Smrg   for (int i = 0; i < fs->num_sampler_prefetch; i++) {
13787ec681f3Smrg      const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i];
13797ec681f3Smrg      tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CMD_SRC(prefetch->src) |
13807ec681f3Smrg                     A6XX_SP_FS_PREFETCH_CMD_SAMP_ID(prefetch->samp_id) |
13817ec681f3Smrg                     A6XX_SP_FS_PREFETCH_CMD_TEX_ID(prefetch->tex_id) |
13827ec681f3Smrg                     A6XX_SP_FS_PREFETCH_CMD_DST(prefetch->dst) |
13837ec681f3Smrg                     A6XX_SP_FS_PREFETCH_CMD_WRMASK(prefetch->wrmask) |
13847ec681f3Smrg                     COND(prefetch->half_precision, A6XX_SP_FS_PREFETCH_CMD_HALF) |
13857ec681f3Smrg                     A6XX_SP_FS_PREFETCH_CMD_CMD(prefetch->cmd));
13867ec681f3Smrg   }
1387361fc4cbSmaya
13887ec681f3Smrg   if (fs->num_sampler_prefetch > 0) {
13897ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_BINDLESS_PREFETCH_CMD(0), fs->num_sampler_prefetch);
13907ec681f3Smrg      for (int i = 0; i < fs->num_sampler_prefetch; i++) {
13917ec681f3Smrg         const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i];
13927ec681f3Smrg         tu_cs_emit(cs,
13937ec681f3Smrg                    A6XX_SP_FS_BINDLESS_PREFETCH_CMD_SAMP_ID(prefetch->samp_bindless_id) |
13947ec681f3Smrg                    A6XX_SP_FS_BINDLESS_PREFETCH_CMD_TEX_ID(prefetch->tex_bindless_id));
13957ec681f3Smrg      }
13967ec681f3Smrg   }
1397361fc4cbSmaya
13987ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CONTROL_1_REG, 5);
13997ec681f3Smrg   tu_cs_emit(cs, 0x7);
14007ec681f3Smrg   tu_cs_emit(cs, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
14017ec681f3Smrg                  A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(samp_id_regid) |
14027ec681f3Smrg                  A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(smask_in_regid) |
14037ec681f3Smrg                  A6XX_HLSQ_CONTROL_2_REG_SIZE(ij_regid[IJ_PERSP_SIZE]));
14047ec681f3Smrg   tu_cs_emit(cs, A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(ij_regid[IJ_PERSP_PIXEL]) |
14057ec681f3Smrg                  A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_PIXEL(ij_regid[IJ_LINEAR_PIXEL]) |
14067ec681f3Smrg                  A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID(ij_regid[IJ_PERSP_CENTROID]) |
14077ec681f3Smrg                  A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_CENTROID(ij_regid[IJ_LINEAR_CENTROID]));
14087ec681f3Smrg   tu_cs_emit(cs, A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) |
14097ec681f3Smrg                  A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) |
14107ec681f3Smrg                  A6XX_HLSQ_CONTROL_4_REG_IJ_PERSP_SAMPLE(ij_regid[IJ_PERSP_SAMPLE]) |
14117ec681f3Smrg                  A6XX_HLSQ_CONTROL_4_REG_IJ_LINEAR_SAMPLE(ij_regid[IJ_LINEAR_SAMPLE]));
14127ec681f3Smrg   tu_cs_emit(cs, 0xfcfc);
14137ec681f3Smrg
14147ec681f3Smrg   enum a6xx_threadsize thrsz = fs->info.double_threadsize ? THREAD128 : THREAD64;
14157ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_FS_CNTL_0, 1);
14167ec681f3Smrg   tu_cs_emit(cs, A6XX_HLSQ_FS_CNTL_0_THREADSIZE(thrsz) |
14177ec681f3Smrg                  COND(enable_varyings, A6XX_HLSQ_FS_CNTL_0_VARYINGS));
14187ec681f3Smrg
14197ec681f3Smrg   bool need_size = fs->frag_face || fs->fragcoord_compmask != 0;
14207ec681f3Smrg   bool need_size_persamp = false;
14217ec681f3Smrg   if (VALIDREG(ij_regid[IJ_PERSP_SIZE])) {
14227ec681f3Smrg      if (sample_shading)
14237ec681f3Smrg         need_size_persamp = true;
14247ec681f3Smrg      else
14257ec681f3Smrg         need_size = true;
1426361fc4cbSmaya   }
1427361fc4cbSmaya
1428361fc4cbSmaya   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CNTL, 1);
14297ec681f3Smrg   tu_cs_emit(cs,
14307ec681f3Smrg         CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_GRAS_CNTL_IJ_PERSP_PIXEL) |
14317ec681f3Smrg         CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_GRAS_CNTL_IJ_PERSP_CENTROID) |
14327ec681f3Smrg         CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_GRAS_CNTL_IJ_PERSP_SAMPLE) |
14337ec681f3Smrg         CONDREG(ij_regid[IJ_LINEAR_PIXEL], A6XX_GRAS_CNTL_IJ_LINEAR_PIXEL) |
14347ec681f3Smrg         CONDREG(ij_regid[IJ_LINEAR_CENTROID], A6XX_GRAS_CNTL_IJ_LINEAR_CENTROID) |
14357ec681f3Smrg         CONDREG(ij_regid[IJ_LINEAR_SAMPLE], A6XX_GRAS_CNTL_IJ_LINEAR_SAMPLE) |
14367ec681f3Smrg         COND(need_size, A6XX_GRAS_CNTL_IJ_LINEAR_PIXEL) |
14377ec681f3Smrg         COND(need_size_persamp, A6XX_GRAS_CNTL_IJ_LINEAR_SAMPLE) |
14387ec681f3Smrg         COND(fs->fragcoord_compmask != 0, A6XX_GRAS_CNTL_COORD_MASK(fs->fragcoord_compmask)));
1439361fc4cbSmaya
1440361fc4cbSmaya   tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_CONTROL0, 2);
14417ec681f3Smrg   tu_cs_emit(cs,
14427ec681f3Smrg         CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_PIXEL) |
14437ec681f3Smrg         CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_CENTROID) |
14447ec681f3Smrg         CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_SAMPLE) |
14457ec681f3Smrg         CONDREG(ij_regid[IJ_LINEAR_PIXEL], A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_PIXEL) |
14467ec681f3Smrg         CONDREG(ij_regid[IJ_LINEAR_CENTROID], A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_CENTROID) |
14477ec681f3Smrg         CONDREG(ij_regid[IJ_LINEAR_SAMPLE], A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_SAMPLE) |
14487ec681f3Smrg         COND(need_size, A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_PIXEL) |
14497ec681f3Smrg         COND(enable_varyings, A6XX_RB_RENDER_CONTROL0_UNK10) |
14507ec681f3Smrg         COND(need_size_persamp, A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_SAMPLE) |
14517ec681f3Smrg         COND(fs->fragcoord_compmask != 0,
14527ec681f3Smrg                           A6XX_RB_RENDER_CONTROL0_COORD_MASK(fs->fragcoord_compmask)));
14537ec681f3Smrg   tu_cs_emit(cs,
14547ec681f3Smrg         A6XX_RB_RENDER_CONTROL1_FRAGCOORDSAMPLEMODE(
14557ec681f3Smrg            sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER) |
14567ec681f3Smrg         CONDREG(smask_in_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEMASK) |
14577ec681f3Smrg         CONDREG(samp_id_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEID) |
14587ec681f3Smrg         CONDREG(ij_regid[IJ_PERSP_SIZE], A6XX_RB_RENDER_CONTROL1_SIZE) |
14597ec681f3Smrg         COND(fs->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS));
14607ec681f3Smrg
14617ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CNTL, 1);
14627ec681f3Smrg   tu_cs_emit(cs, COND(sample_shading, A6XX_RB_SAMPLE_CNTL_PER_SAMP_MODE));
14637ec681f3Smrg
14647ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_PS_INPUT_CNTL, 1);
14657ec681f3Smrg   tu_cs_emit(cs, CONDREG(samp_id_regid, A6XX_GRAS_LRZ_PS_INPUT_CNTL_SAMPLEID) |
14667ec681f3Smrg              A6XX_GRAS_LRZ_PS_INPUT_CNTL_FRAGCOORDSAMPLEMODE(
14677ec681f3Smrg                 sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER));
14687ec681f3Smrg
14697ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CNTL, 1);
14707ec681f3Smrg   tu_cs_emit(cs, COND(sample_shading, A6XX_GRAS_SAMPLE_CNTL_PER_SAMP_MODE));
14717ec681f3Smrg}
1472361fc4cbSmaya
1473361fc4cbSmayastatic void
1474361fc4cbSmayatu6_emit_fs_outputs(struct tu_cs *cs,
1475361fc4cbSmaya                    const struct ir3_shader_variant *fs,
14767ec681f3Smrg                    uint32_t mrt_count, bool dual_src_blend,
14777ec681f3Smrg                    uint32_t render_components,
14787ec681f3Smrg                    bool no_earlyz,
14797ec681f3Smrg                    struct tu_pipeline *pipeline)
1480361fc4cbSmaya{
14817ec681f3Smrg   uint32_t smask_regid, posz_regid, stencilref_regid;
14827ec681f3Smrg
14837ec681f3Smrg   posz_regid      = ir3_find_output_regid(fs, FRAG_RESULT_DEPTH);
14847ec681f3Smrg   smask_regid     = ir3_find_output_regid(fs, FRAG_RESULT_SAMPLE_MASK);
14857ec681f3Smrg   stencilref_regid = ir3_find_output_regid(fs, FRAG_RESULT_STENCIL);
14867ec681f3Smrg
1487361fc4cbSmaya   uint32_t fragdata_regid[8];
1488361fc4cbSmaya   if (fs->color0_mrt) {
1489361fc4cbSmaya      fragdata_regid[0] = ir3_find_output_regid(fs, FRAG_RESULT_COLOR);
1490361fc4cbSmaya      for (uint32_t i = 1; i < ARRAY_SIZE(fragdata_regid); i++)
1491361fc4cbSmaya         fragdata_regid[i] = fragdata_regid[0];
1492361fc4cbSmaya   } else {
1493361fc4cbSmaya      for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++)
1494361fc4cbSmaya         fragdata_regid[i] = ir3_find_output_regid(fs, FRAG_RESULT_DATA0 + i);
1495361fc4cbSmaya   }
1496361fc4cbSmaya
1497361fc4cbSmaya   tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2);
14987ec681f3Smrg   tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(posz_regid) |
14997ec681f3Smrg                  A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(smask_regid) |
15007ec681f3Smrg                  A6XX_SP_FS_OUTPUT_CNTL0_STENCILREF_REGID(stencilref_regid) |
15017ec681f3Smrg                  COND(dual_src_blend, A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
1502361fc4cbSmaya   tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count));
1503361fc4cbSmaya
15047ec681f3Smrg   uint32_t fs_render_components = 0;
15057ec681f3Smrg
1506361fc4cbSmaya   tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), 8);
1507361fc4cbSmaya   for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++) {
1508361fc4cbSmaya      tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(fragdata_regid[i]) |
15097ec681f3Smrg                     (COND(fragdata_regid[i] & HALF_REG_ID,
15107ec681f3Smrg                           A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION)));
15117ec681f3Smrg
15127ec681f3Smrg      if (VALIDREG(fragdata_regid[i])) {
15137ec681f3Smrg         fs_render_components |= 0xf << (i * 4);
15147ec681f3Smrg      }
15157ec681f3Smrg   }
15167ec681f3Smrg
15177ec681f3Smrg   /* dual source blending has an extra fs output in the 2nd slot */
15187ec681f3Smrg   if (dual_src_blend) {
15197ec681f3Smrg      fs_render_components |= 0xf << 4;
1520361fc4cbSmaya   }
1521361fc4cbSmaya
15227ec681f3Smrg   /* There is no point in having component enabled which is not written
15237ec681f3Smrg    * by the shader. Per VK spec it is an UB, however a few apps depend on
15247ec681f3Smrg    * attachment not being changed if FS doesn't have corresponding output.
15257ec681f3Smrg    */
15267ec681f3Smrg   fs_render_components &= render_components;
15277ec681f3Smrg
15287ec681f3Smrg   tu_cs_emit_regs(cs,
15297ec681f3Smrg                   A6XX_SP_FS_RENDER_COMPONENTS(.dword = fs_render_components));
15307ec681f3Smrg
1531361fc4cbSmaya   tu_cs_emit_pkt4(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2);
15327ec681f3Smrg   tu_cs_emit(cs, COND(fs->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) |
15337ec681f3Smrg                  COND(fs->writes_smask, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK) |
15347ec681f3Smrg                  COND(fs->writes_stencilref, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_STENCILREF) |
15357ec681f3Smrg                  COND(dual_src_blend, A6XX_RB_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
1536361fc4cbSmaya   tu_cs_emit(cs, A6XX_RB_FS_OUTPUT_CNTL1_MRT(mrt_count));
1537361fc4cbSmaya
15387ec681f3Smrg   tu_cs_emit_regs(cs,
15397ec681f3Smrg                   A6XX_RB_RENDER_COMPONENTS(.dword = fs_render_components));
15407ec681f3Smrg
15417ec681f3Smrg   if (pipeline) {
15427ec681f3Smrg      pipeline->lrz.fs_has_kill = fs->has_kill;
15437ec681f3Smrg      pipeline->lrz.early_fragment_tests = fs->shader->nir->info.fs.early_fragment_tests;
15447ec681f3Smrg
15457ec681f3Smrg      if ((fs->shader && !fs->shader->nir->info.fs.early_fragment_tests) &&
15467ec681f3Smrg          (fs->no_earlyz || fs->has_kill || fs->writes_pos || fs->writes_stencilref || no_earlyz || fs->writes_smask)) {
15477ec681f3Smrg         pipeline->lrz.force_late_z = true;
15487ec681f3Smrg      }
1549361fc4cbSmaya   }
15507ec681f3Smrg}
1551361fc4cbSmaya
15527ec681f3Smrgstatic void
15537ec681f3Smrgtu6_emit_geom_tess_consts(struct tu_cs *cs,
15547ec681f3Smrg                          const struct ir3_shader_variant *vs,
15557ec681f3Smrg                          const struct ir3_shader_variant *hs,
15567ec681f3Smrg                          const struct ir3_shader_variant *ds,
15577ec681f3Smrg                          const struct ir3_shader_variant *gs,
15587ec681f3Smrg                          uint32_t cps_per_patch)
15597ec681f3Smrg{
15607ec681f3Smrg   uint32_t num_vertices =
15617ec681f3Smrg         hs ? cps_per_patch : gs->shader->nir->info.gs.vertices_in;
15627ec681f3Smrg
15637ec681f3Smrg   uint32_t vs_params[4] = {
15647ec681f3Smrg      vs->output_size * num_vertices * 4,  /* vs primitive stride */
15657ec681f3Smrg      vs->output_size * 4,                 /* vs vertex stride */
15667ec681f3Smrg      0,
15677ec681f3Smrg      0,
15687ec681f3Smrg   };
15697ec681f3Smrg   uint32_t vs_base = ir3_const_state(vs)->offsets.primitive_param;
15707ec681f3Smrg   tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, vs_base, SB6_VS_SHADER, 0,
15717ec681f3Smrg                  ARRAY_SIZE(vs_params), vs_params);
15727ec681f3Smrg
15737ec681f3Smrg   if (hs) {
15747ec681f3Smrg      assert(ds->type != MESA_SHADER_NONE);
15757ec681f3Smrg      uint32_t hs_params[4] = {
15767ec681f3Smrg         vs->output_size * num_vertices * 4,  /* hs primitive stride */
15777ec681f3Smrg         vs->output_size * 4,                 /* hs vertex stride */
15787ec681f3Smrg         hs->output_size,
15797ec681f3Smrg         cps_per_patch,
15807ec681f3Smrg      };
15817ec681f3Smrg
15827ec681f3Smrg      uint32_t hs_base = hs->const_state->offsets.primitive_param;
15837ec681f3Smrg      tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, hs_base, SB6_HS_SHADER, 0,
15847ec681f3Smrg                     ARRAY_SIZE(hs_params), hs_params);
15857ec681f3Smrg      if (gs)
15867ec681f3Smrg         num_vertices = gs->shader->nir->info.gs.vertices_in;
15877ec681f3Smrg
15887ec681f3Smrg      uint32_t ds_params[4] = {
15897ec681f3Smrg         ds->output_size * num_vertices * 4,  /* ds primitive stride */
15907ec681f3Smrg         ds->output_size * 4,                 /* ds vertex stride */
15917ec681f3Smrg         hs->output_size,                     /* hs vertex stride (dwords) */
15927ec681f3Smrg         hs->shader->nir->info.tess.tcs_vertices_out
15937ec681f3Smrg      };
15947ec681f3Smrg
15957ec681f3Smrg      uint32_t ds_base = ds->const_state->offsets.primitive_param;
15967ec681f3Smrg      tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, ds_base, SB6_DS_SHADER, 0,
15977ec681f3Smrg                     ARRAY_SIZE(ds_params), ds_params);
15987ec681f3Smrg   }
1599361fc4cbSmaya
16007ec681f3Smrg   if (gs) {
16017ec681f3Smrg      const struct ir3_shader_variant *prev = ds ? ds : vs;
16027ec681f3Smrg      uint32_t gs_params[4] = {
16037ec681f3Smrg         prev->output_size * num_vertices * 4,  /* gs primitive stride */
16047ec681f3Smrg         prev->output_size * 4,                 /* gs vertex stride */
16057ec681f3Smrg         0,
16067ec681f3Smrg         0,
16077ec681f3Smrg      };
16087ec681f3Smrg      uint32_t gs_base = gs->const_state->offsets.primitive_param;
16097ec681f3Smrg      tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, gs_base, SB6_GS_SHADER, 0,
16107ec681f3Smrg                     ARRAY_SIZE(gs_params), gs_params);
16117ec681f3Smrg   }
1612361fc4cbSmaya}
1613361fc4cbSmaya
1614361fc4cbSmayastatic void
16157ec681f3Smrgtu6_emit_program_config(struct tu_cs *cs,
16167ec681f3Smrg                        struct tu_pipeline_builder *builder)
16177ec681f3Smrg{
16187ec681f3Smrg   gl_shader_stage stage = MESA_SHADER_VERTEX;
16197ec681f3Smrg
16207ec681f3Smrg   STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
16217ec681f3Smrg
16227ec681f3Smrg   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
16237ec681f3Smrg         .vs_state = true,
16247ec681f3Smrg         .hs_state = true,
16257ec681f3Smrg         .ds_state = true,
16267ec681f3Smrg         .gs_state = true,
16277ec681f3Smrg         .fs_state = true,
16287ec681f3Smrg         .gfx_ibo = true));
16297ec681f3Smrg   for (; stage < ARRAY_SIZE(builder->shaders); stage++) {
16307ec681f3Smrg      tu6_emit_xs_config(cs, stage, builder->variants[stage]);
1631361fc4cbSmaya   }
16327ec681f3Smrg}
1633361fc4cbSmaya
16347ec681f3Smrgstatic void
16357ec681f3Smrgtu6_emit_program(struct tu_cs *cs,
16367ec681f3Smrg                 struct tu_pipeline_builder *builder,
16377ec681f3Smrg                 bool binning_pass,
16387ec681f3Smrg                 struct tu_pipeline *pipeline)
16397ec681f3Smrg{
16407ec681f3Smrg   const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX];
16417ec681f3Smrg   const struct ir3_shader_variant *bs = builder->binning_variant;
16427ec681f3Smrg   const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL];
16437ec681f3Smrg   const struct ir3_shader_variant *ds = builder->variants[MESA_SHADER_TESS_EVAL];
16447ec681f3Smrg   const struct ir3_shader_variant *gs = builder->variants[MESA_SHADER_GEOMETRY];
16457ec681f3Smrg   const struct ir3_shader_variant *fs = builder->variants[MESA_SHADER_FRAGMENT];
16467ec681f3Smrg   gl_shader_stage stage = MESA_SHADER_VERTEX;
16477ec681f3Smrg   uint32_t cps_per_patch = builder->create_info->pTessellationState ?
16487ec681f3Smrg      builder->create_info->pTessellationState->patchControlPoints : 0;
16497ec681f3Smrg   bool multi_pos_output = builder->shaders[MESA_SHADER_VERTEX]->multi_pos_output;
16507ec681f3Smrg
16517ec681f3Smrg  /* Don't use the binning pass variant when GS is present because we don't
16527ec681f3Smrg   * support compiling correct binning pass variants with GS.
16537ec681f3Smrg   */
16547ec681f3Smrg   if (binning_pass && !gs) {
16557ec681f3Smrg      vs = bs;
16567ec681f3Smrg      tu6_emit_xs(cs, stage, bs, &builder->pvtmem, builder->binning_vs_iova);
16577ec681f3Smrg      stage++;
1658361fc4cbSmaya   }
1659361fc4cbSmaya
16607ec681f3Smrg   for (; stage < ARRAY_SIZE(builder->shaders); stage++) {
16617ec681f3Smrg      const struct ir3_shader_variant *xs = builder->variants[stage];
1662361fc4cbSmaya
16637ec681f3Smrg      if (stage == MESA_SHADER_FRAGMENT && binning_pass)
16647ec681f3Smrg         fs = xs = NULL;
1665361fc4cbSmaya
16667ec681f3Smrg      tu6_emit_xs(cs, stage, xs, &builder->pvtmem, builder->shader_iova[stage]);
16677ec681f3Smrg   }
16687ec681f3Smrg
16697ec681f3Smrg   uint32_t multiview_views = util_logbase2(builder->multiview_mask) + 1;
16707ec681f3Smrg   uint32_t multiview_cntl = builder->multiview_mask ?
16717ec681f3Smrg      A6XX_PC_MULTIVIEW_CNTL_ENABLE |
16727ec681f3Smrg      A6XX_PC_MULTIVIEW_CNTL_VIEWS(multiview_views) |
16737ec681f3Smrg      COND(!multi_pos_output, A6XX_PC_MULTIVIEW_CNTL_DISABLEMULTIPOS)
16747ec681f3Smrg      : 0;
1675361fc4cbSmaya
16767ec681f3Smrg   /* Copy what the blob does here. This will emit an extra 0x3f
16777ec681f3Smrg    * CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what
16787ec681f3Smrg    * this is working around yet.
16797ec681f3Smrg    */
16807ec681f3Smrg   if (builder->device->physical_device->info->a6xx.has_cp_reg_write) {
16817ec681f3Smrg      tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3);
16827ec681f3Smrg      tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE));
16837ec681f3Smrg      tu_cs_emit(cs, REG_A6XX_PC_MULTIVIEW_CNTL);
1684361fc4cbSmaya   } else {
16857ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_PC_MULTIVIEW_CNTL, 1);
16867ec681f3Smrg   }
16877ec681f3Smrg   tu_cs_emit(cs, multiview_cntl);
1688361fc4cbSmaya
16897ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_VFD_MULTIVIEW_CNTL, 1);
16907ec681f3Smrg   tu_cs_emit(cs, multiview_cntl);
16917ec681f3Smrg
16927ec681f3Smrg   if (multiview_cntl &&
16937ec681f3Smrg       builder->device->physical_device->info->a6xx.supports_multiview_mask) {
16947ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_PC_MULTIVIEW_MASK, 1);
16957ec681f3Smrg      tu_cs_emit(cs, builder->multiview_mask);
1696361fc4cbSmaya   }
1697361fc4cbSmaya
16987ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1);
16997ec681f3Smrg   tu_cs_emit(cs, 0);
17007ec681f3Smrg
17017ec681f3Smrg   tu6_emit_vpc(cs, vs, hs, ds, gs, fs, cps_per_patch);
17027ec681f3Smrg   tu6_emit_vpc_varying_modes(cs, fs);
17037ec681f3Smrg
17047ec681f3Smrg   bool no_earlyz = builder->depth_attachment_format == VK_FORMAT_S8_UINT;
17057ec681f3Smrg   uint32_t mrt_count = builder->color_attachment_count;
17067ec681f3Smrg   uint32_t render_components = builder->render_components;
17077ec681f3Smrg
17087ec681f3Smrg   if (builder->alpha_to_coverage) {
17097ec681f3Smrg      /* alpha to coverage can behave like a discard */
17107ec681f3Smrg      no_earlyz = true;
17117ec681f3Smrg      /* alpha value comes from first mrt */
17127ec681f3Smrg      render_components |= 0xf;
17137ec681f3Smrg      if (!mrt_count) {
17147ec681f3Smrg         mrt_count = 1;
17157ec681f3Smrg         /* Disable memory write for dummy mrt because it doesn't get set otherwise */
17167ec681f3Smrg         tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(0, .component_enable = 0));
17177ec681f3Smrg      }
17187ec681f3Smrg   }
17197ec681f3Smrg
17207ec681f3Smrg   if (fs) {
17217ec681f3Smrg      tu6_emit_fs_inputs(cs, fs);
17227ec681f3Smrg      tu6_emit_fs_outputs(cs, fs, mrt_count,
17237ec681f3Smrg                          builder->use_dual_src_blend,
17247ec681f3Smrg                          render_components,
17257ec681f3Smrg                          no_earlyz,
17267ec681f3Smrg                          pipeline);
17277ec681f3Smrg   } else {
17287ec681f3Smrg      /* TODO: check if these can be skipped if fs is disabled */
17297ec681f3Smrg      struct ir3_shader_variant dummy_variant = {};
17307ec681f3Smrg      tu6_emit_fs_inputs(cs, &dummy_variant);
17317ec681f3Smrg      tu6_emit_fs_outputs(cs, &dummy_variant, mrt_count,
17327ec681f3Smrg                          builder->use_dual_src_blend,
17337ec681f3Smrg                          render_components,
17347ec681f3Smrg                          no_earlyz,
17357ec681f3Smrg                          NULL);
17367ec681f3Smrg   }
17377ec681f3Smrg
17387ec681f3Smrg   if (gs || hs) {
17397ec681f3Smrg      tu6_emit_geom_tess_consts(cs, vs, hs, ds, gs, cps_per_patch);
17407ec681f3Smrg   }
1741361fc4cbSmaya}
1742361fc4cbSmaya
1743361fc4cbSmayastatic void
17447ec681f3Smrgtu6_emit_vertex_input(struct tu_pipeline *pipeline,
17457ec681f3Smrg                      struct tu_cs *cs,
1746361fc4cbSmaya                      const struct ir3_shader_variant *vs,
17477ec681f3Smrg                      const VkPipelineVertexInputStateCreateInfo *info)
1748361fc4cbSmaya{
1749361fc4cbSmaya   uint32_t vfd_decode_idx = 0;
17507ec681f3Smrg   uint32_t binding_instanced = 0; /* bitmask of instanced bindings */
17517ec681f3Smrg   uint32_t step_rate[MAX_VBS];
1752361fc4cbSmaya
17537ec681f3Smrg   for (uint32_t i = 0; i < info->vertexBindingDescriptionCount; i++) {
17547ec681f3Smrg      const VkVertexInputBindingDescription *binding =
17557ec681f3Smrg         &info->pVertexBindingDescriptions[i];
17567ec681f3Smrg
17577ec681f3Smrg      if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VB_STRIDE))) {
17587ec681f3Smrg         tu_cs_emit_regs(cs,
17597ec681f3Smrg                        A6XX_VFD_FETCH_STRIDE(binding->binding, binding->stride));
17607ec681f3Smrg      }
1761361fc4cbSmaya
17627ec681f3Smrg      if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
17637ec681f3Smrg         binding_instanced |= 1 << binding->binding;
1764361fc4cbSmaya
17657ec681f3Smrg      step_rate[binding->binding] = 1;
17667ec681f3Smrg   }
1767361fc4cbSmaya
17687ec681f3Smrg   const VkPipelineVertexInputDivisorStateCreateInfoEXT *div_state =
17697ec681f3Smrg      vk_find_struct_const(info->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
17707ec681f3Smrg   if (div_state) {
17717ec681f3Smrg      for (uint32_t i = 0; i < div_state->vertexBindingDivisorCount; i++) {
17727ec681f3Smrg         const VkVertexInputBindingDivisorDescriptionEXT *desc =
17737ec681f3Smrg            &div_state->pVertexBindingDivisors[i];
17747ec681f3Smrg         step_rate[desc->binding] = desc->divisor;
17757ec681f3Smrg      }
17767ec681f3Smrg   }
1777361fc4cbSmaya
17787ec681f3Smrg   /* TODO: emit all VFD_DECODE/VFD_DEST_CNTL in same (two) pkt4 */
1779361fc4cbSmaya
17807ec681f3Smrg   for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) {
17817ec681f3Smrg      const VkVertexInputAttributeDescription *attr =
17827ec681f3Smrg         &info->pVertexAttributeDescriptions[i];
17837ec681f3Smrg      uint32_t input_idx;
1784361fc4cbSmaya
17857ec681f3Smrg      for (input_idx = 0; input_idx < vs->inputs_count; input_idx++) {
17867ec681f3Smrg         if ((vs->inputs[input_idx].slot - VERT_ATTRIB_GENERIC0) == attr->location)
17877ec681f3Smrg            break;
17887ec681f3Smrg      }
1789361fc4cbSmaya
17907ec681f3Smrg      /* attribute not used, skip it */
17917ec681f3Smrg      if (input_idx == vs->inputs_count)
17927ec681f3Smrg         continue;
1793361fc4cbSmaya
17947ec681f3Smrg      const struct tu_native_format format = tu6_format_vtx(attr->format);
17957ec681f3Smrg      tu_cs_emit_regs(cs,
17967ec681f3Smrg                      A6XX_VFD_DECODE_INSTR(vfd_decode_idx,
17977ec681f3Smrg                        .idx = attr->binding,
17987ec681f3Smrg                        .offset = attr->offset,
17997ec681f3Smrg                        .instanced = binding_instanced & (1 << attr->binding),
18007ec681f3Smrg                        .format = format.fmt,
18017ec681f3Smrg                        .swap = format.swap,
18027ec681f3Smrg                        .unk30 = 1,
18037ec681f3Smrg                        ._float = !vk_format_is_int(attr->format)),
18047ec681f3Smrg                      A6XX_VFD_DECODE_STEP_RATE(vfd_decode_idx, step_rate[attr->binding]));
18057ec681f3Smrg
18067ec681f3Smrg      tu_cs_emit_regs(cs,
18077ec681f3Smrg                      A6XX_VFD_DEST_CNTL_INSTR(vfd_decode_idx,
18087ec681f3Smrg                        .writemask = vs->inputs[input_idx].compmask,
18097ec681f3Smrg                        .regid = vs->inputs[input_idx].regid));
1810361fc4cbSmaya
1811361fc4cbSmaya      vfd_decode_idx++;
1812361fc4cbSmaya   }
1813361fc4cbSmaya
18147ec681f3Smrg   tu_cs_emit_regs(cs,
18157ec681f3Smrg                   A6XX_VFD_CONTROL_0(
18167ec681f3Smrg                     .fetch_cnt = vfd_decode_idx, /* decode_cnt for binning pass ? */
18177ec681f3Smrg                     .decode_cnt = vfd_decode_idx));
1818361fc4cbSmaya}
1819361fc4cbSmaya
1820361fc4cbSmayavoid
18217ec681f3Smrgtu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewports, uint32_t num_viewport)
18227ec681f3Smrg{
18237ec681f3Smrg   VkExtent2D guardband = {511, 511};
18247ec681f3Smrg
18257ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET(0), num_viewport * 6);
18267ec681f3Smrg   for (uint32_t i = 0; i < num_viewport; i++) {
18277ec681f3Smrg      const VkViewport *viewport = &viewports[i];
18287ec681f3Smrg      float offsets[3];
18297ec681f3Smrg      float scales[3];
18307ec681f3Smrg      scales[0] = viewport->width / 2.0f;
18317ec681f3Smrg      scales[1] = viewport->height / 2.0f;
18327ec681f3Smrg      scales[2] = viewport->maxDepth - viewport->minDepth;
18337ec681f3Smrg      offsets[0] = viewport->x + scales[0];
18347ec681f3Smrg      offsets[1] = viewport->y + scales[1];
18357ec681f3Smrg      offsets[2] = viewport->minDepth;
18367ec681f3Smrg      for (uint32_t j = 0; j < 3; j++) {
18377ec681f3Smrg         tu_cs_emit(cs, fui(offsets[j]));
18387ec681f3Smrg         tu_cs_emit(cs, fui(scales[j]));
18397ec681f3Smrg      }
18407ec681f3Smrg
18417ec681f3Smrg      guardband.width =
18427ec681f3Smrg         MIN2(guardband.width, fd_calc_guardband(offsets[0], scales[0], false));
18437ec681f3Smrg      guardband.height =
18447ec681f3Smrg         MIN2(guardband.height, fd_calc_guardband(offsets[1], scales[1], false));
18457ec681f3Smrg   }
1846361fc4cbSmaya
18477ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0), num_viewport * 2);
18487ec681f3Smrg   for (uint32_t i = 0; i < num_viewport; i++) {
18497ec681f3Smrg      const VkViewport *viewport = &viewports[i];
18507ec681f3Smrg      VkOffset2D min;
18517ec681f3Smrg      VkOffset2D max;
18527ec681f3Smrg      min.x = (int32_t) viewport->x;
18537ec681f3Smrg      max.x = (int32_t) ceilf(viewport->x + viewport->width);
18547ec681f3Smrg      if (viewport->height >= 0.0f) {
18557ec681f3Smrg         min.y = (int32_t) viewport->y;
18567ec681f3Smrg         max.y = (int32_t) ceilf(viewport->y + viewport->height);
18577ec681f3Smrg      } else {
18587ec681f3Smrg         min.y = (int32_t)(viewport->y + viewport->height);
18597ec681f3Smrg         max.y = (int32_t) ceilf(viewport->y);
18607ec681f3Smrg      }
18617ec681f3Smrg      /* the spec allows viewport->height to be 0.0f */
18627ec681f3Smrg      if (min.y == max.y)
18637ec681f3Smrg         max.y++;
18647ec681f3Smrg      /* allow viewport->width = 0.0f for un-initialized viewports: */
18657ec681f3Smrg      if (min.x == max.x)
18667ec681f3Smrg         max.x++;
18677ec681f3Smrg
18687ec681f3Smrg      min.x = MAX2(min.x, 0);
18697ec681f3Smrg      min.y = MAX2(min.y, 0);
18707ec681f3Smrg
18717ec681f3Smrg      assert(min.x < max.x);
18727ec681f3Smrg      assert(min.y < max.y);
18737ec681f3Smrg      tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_X(min.x) |
18747ec681f3Smrg                     A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_Y(min.y));
18757ec681f3Smrg      tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_X(max.x - 1) |
18767ec681f3Smrg                     A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_Y(max.y - 1));
18777ec681f3Smrg   }
18787ec681f3Smrg
18797ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_Z_CLAMP(0), num_viewport * 2);
18807ec681f3Smrg   for (uint32_t i = 0; i < num_viewport; i++) {
18817ec681f3Smrg      const VkViewport *viewport = &viewports[i];
18827ec681f3Smrg      tu_cs_emit(cs, fui(MIN2(viewport->minDepth, viewport->maxDepth)));
18837ec681f3Smrg      tu_cs_emit(cs, fui(MAX2(viewport->minDepth, viewport->maxDepth)));
18847ec681f3Smrg   }
1885361fc4cbSmaya   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1);
18867ec681f3Smrg   tu_cs_emit(cs, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband.width) |
18877ec681f3Smrg                  A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband.height));
18887ec681f3Smrg
18897ec681f3Smrg   /* TODO: what to do about this and multi viewport ? */
18907ec681f3Smrg   float z_clamp_min = num_viewport ? MIN2(viewports[0].minDepth, viewports[0].maxDepth) : 0;
18917ec681f3Smrg   float z_clamp_max = num_viewport ? MAX2(viewports[0].minDepth, viewports[0].maxDepth) : 0;
18927ec681f3Smrg
18937ec681f3Smrg   tu_cs_emit_regs(cs,
18947ec681f3Smrg                   A6XX_RB_Z_CLAMP_MIN(z_clamp_min),
18957ec681f3Smrg                   A6XX_RB_Z_CLAMP_MAX(z_clamp_max));
1896361fc4cbSmaya}
1897361fc4cbSmaya
1898361fc4cbSmayavoid
18997ec681f3Smrgtu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissors, uint32_t scissor_count)
19007ec681f3Smrg{
19017ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0), scissor_count * 2);
19027ec681f3Smrg
19037ec681f3Smrg   for (uint32_t i = 0; i < scissor_count; i++) {
19047ec681f3Smrg      const VkRect2D *scissor = &scissors[i];
19057ec681f3Smrg
19067ec681f3Smrg      uint32_t min_x = scissor->offset.x;
19077ec681f3Smrg      uint32_t min_y = scissor->offset.y;
19087ec681f3Smrg      uint32_t max_x = min_x + scissor->extent.width - 1;
19097ec681f3Smrg      uint32_t max_y = min_y + scissor->extent.height - 1;
19107ec681f3Smrg
19117ec681f3Smrg      if (!scissor->extent.width || !scissor->extent.height) {
19127ec681f3Smrg         min_x = min_y = 1;
19137ec681f3Smrg         max_x = max_y = 0;
19147ec681f3Smrg      } else {
19157ec681f3Smrg         /* avoid overflow */
19167ec681f3Smrg         uint32_t scissor_max = BITFIELD_MASK(15);
19177ec681f3Smrg         min_x = MIN2(scissor_max, min_x);
19187ec681f3Smrg         min_y = MIN2(scissor_max, min_y);
19197ec681f3Smrg         max_x = MIN2(scissor_max, max_x);
19207ec681f3Smrg         max_y = MIN2(scissor_max, max_y);
19217ec681f3Smrg      }
1922361fc4cbSmaya
19237ec681f3Smrg      tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_X(min_x) |
19247ec681f3Smrg                     A6XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(min_y));
19257ec681f3Smrg      tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_BR_X(max_x) |
19267ec681f3Smrg                     A6XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(max_y));
19277ec681f3Smrg   }
1928361fc4cbSmaya}
1929361fc4cbSmaya
19307ec681f3Smrgvoid
19317ec681f3Smrgtu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc)
1932361fc4cbSmaya{
19337ec681f3Smrg   if (!samp_loc) {
19347ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 1);
19357ec681f3Smrg      tu_cs_emit(cs, 0);
1936361fc4cbSmaya
19377ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 1);
19387ec681f3Smrg      tu_cs_emit(cs, 0);
19397ec681f3Smrg
19407ec681f3Smrg      tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 1);
19417ec681f3Smrg      tu_cs_emit(cs, 0);
19427ec681f3Smrg      return;
19437ec681f3Smrg   }
19447ec681f3Smrg
19457ec681f3Smrg   assert(samp_loc->sampleLocationsPerPixel == samp_loc->sampleLocationsCount);
19467ec681f3Smrg   assert(samp_loc->sampleLocationGridSize.width == 1);
19477ec681f3Smrg   assert(samp_loc->sampleLocationGridSize.height == 1);
19487ec681f3Smrg
19497ec681f3Smrg   uint32_t sample_config =
19507ec681f3Smrg      A6XX_RB_SAMPLE_CONFIG_LOCATION_ENABLE;
19517ec681f3Smrg   uint32_t sample_locations = 0;
19527ec681f3Smrg   for (uint32_t i = 0; i < samp_loc->sampleLocationsCount; i++) {
19537ec681f3Smrg      sample_locations |=
19547ec681f3Smrg         (A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_X(samp_loc->pSampleLocations[i].x) |
19557ec681f3Smrg          A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_Y(samp_loc->pSampleLocations[i].y)) << i*8;
19567ec681f3Smrg   }
19577ec681f3Smrg
19587ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 2);
19597ec681f3Smrg   tu_cs_emit(cs, sample_config);
19607ec681f3Smrg   tu_cs_emit(cs, sample_locations);
19617ec681f3Smrg
19627ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 2);
19637ec681f3Smrg   tu_cs_emit(cs, sample_config);
19647ec681f3Smrg   tu_cs_emit(cs, sample_locations);
19657ec681f3Smrg
19667ec681f3Smrg   tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 2);
19677ec681f3Smrg   tu_cs_emit(cs, sample_config);
19687ec681f3Smrg   tu_cs_emit(cs, sample_locations);
1969361fc4cbSmaya}
1970361fc4cbSmaya
1971361fc4cbSmayastatic uint32_t
1972361fc4cbSmayatu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info,
19737ec681f3Smrg                 enum a5xx_line_mode line_mode,
19747ec681f3Smrg                 bool multiview)
1975361fc4cbSmaya{
1976361fc4cbSmaya   uint32_t gras_su_cntl = 0;
1977361fc4cbSmaya
1978361fc4cbSmaya   if (rast_info->cullMode & VK_CULL_MODE_FRONT_BIT)
1979361fc4cbSmaya      gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT;
1980361fc4cbSmaya   if (rast_info->cullMode & VK_CULL_MODE_BACK_BIT)
1981361fc4cbSmaya      gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK;
1982361fc4cbSmaya
1983361fc4cbSmaya   if (rast_info->frontFace == VK_FRONT_FACE_CLOCKWISE)
1984361fc4cbSmaya      gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW;
1985361fc4cbSmaya
19867ec681f3Smrg   gras_su_cntl |=
19877ec681f3Smrg      A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(rast_info->lineWidth / 2.0f);
1988361fc4cbSmaya
1989361fc4cbSmaya   if (rast_info->depthBiasEnable)
1990361fc4cbSmaya      gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET;
1991361fc4cbSmaya
19927ec681f3Smrg   gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINE_MODE(line_mode);
1993361fc4cbSmaya
19947ec681f3Smrg   if (multiview) {
19957ec681f3Smrg      gras_su_cntl |=
19967ec681f3Smrg         A6XX_GRAS_SU_CNTL_UNK17 |
19977ec681f3Smrg         A6XX_GRAS_SU_CNTL_MULTIVIEW_ENABLE;
19987ec681f3Smrg   }
1999361fc4cbSmaya
20007ec681f3Smrg   return gras_su_cntl;
2001361fc4cbSmaya}
2002361fc4cbSmaya
2003361fc4cbSmayavoid
2004361fc4cbSmayatu6_emit_depth_bias(struct tu_cs *cs,
2005361fc4cbSmaya                    float constant_factor,
2006361fc4cbSmaya                    float clamp,
2007361fc4cbSmaya                    float slope_factor)
2008361fc4cbSmaya{
2009361fc4cbSmaya   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE, 3);
20107ec681f3Smrg   tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor).value);
20117ec681f3Smrg   tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor).value);
20127ec681f3Smrg   tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp).value);
2013361fc4cbSmaya}
2014361fc4cbSmaya
2015361fc4cbSmayastatic uint32_t
2016361fc4cbSmayatu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState *att,
2017361fc4cbSmaya                         bool has_alpha)
2018361fc4cbSmaya{
2019361fc4cbSmaya   const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(att->colorBlendOp);
2020361fc4cbSmaya   const enum adreno_rb_blend_factor src_color_factor = tu6_blend_factor(
2021361fc4cbSmaya      has_alpha ? att->srcColorBlendFactor
2022361fc4cbSmaya                : tu_blend_factor_no_dst_alpha(att->srcColorBlendFactor));
2023361fc4cbSmaya   const enum adreno_rb_blend_factor dst_color_factor = tu6_blend_factor(
2024361fc4cbSmaya      has_alpha ? att->dstColorBlendFactor
2025361fc4cbSmaya                : tu_blend_factor_no_dst_alpha(att->dstColorBlendFactor));
2026361fc4cbSmaya   const enum a3xx_rb_blend_opcode alpha_op = tu6_blend_op(att->alphaBlendOp);
2027361fc4cbSmaya   const enum adreno_rb_blend_factor src_alpha_factor =
2028361fc4cbSmaya      tu6_blend_factor(att->srcAlphaBlendFactor);
2029361fc4cbSmaya   const enum adreno_rb_blend_factor dst_alpha_factor =
2030361fc4cbSmaya      tu6_blend_factor(att->dstAlphaBlendFactor);
2031361fc4cbSmaya
2032361fc4cbSmaya   return A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(src_color_factor) |
2033361fc4cbSmaya          A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(color_op) |
2034361fc4cbSmaya          A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(dst_color_factor) |
2035361fc4cbSmaya          A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(src_alpha_factor) |
2036361fc4cbSmaya          A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(alpha_op) |
2037361fc4cbSmaya          A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(dst_alpha_factor);
2038361fc4cbSmaya}
2039361fc4cbSmaya
2040361fc4cbSmayastatic uint32_t
2041361fc4cbSmayatu6_rb_mrt_control(const VkPipelineColorBlendAttachmentState *att,
2042361fc4cbSmaya                   uint32_t rb_mrt_control_rop,
2043361fc4cbSmaya                   bool has_alpha)
2044361fc4cbSmaya{
2045361fc4cbSmaya   uint32_t rb_mrt_control =
2046361fc4cbSmaya      A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(att->colorWriteMask);
2047361fc4cbSmaya
2048361fc4cbSmaya   rb_mrt_control |= rb_mrt_control_rop;
2049361fc4cbSmaya
2050361fc4cbSmaya   if (att->blendEnable) {
2051361fc4cbSmaya      rb_mrt_control |= A6XX_RB_MRT_CONTROL_BLEND;
2052361fc4cbSmaya
2053361fc4cbSmaya      if (has_alpha)
2054361fc4cbSmaya         rb_mrt_control |= A6XX_RB_MRT_CONTROL_BLEND2;
2055361fc4cbSmaya   }
2056361fc4cbSmaya
2057361fc4cbSmaya   return rb_mrt_control;
2058361fc4cbSmaya}
2059361fc4cbSmaya
2060361fc4cbSmayastatic void
2061361fc4cbSmayatu6_emit_rb_mrt_controls(struct tu_cs *cs,
2062361fc4cbSmaya                         const VkPipelineColorBlendStateCreateInfo *blend_info,
2063361fc4cbSmaya                         const VkFormat attachment_formats[MAX_RTS],
2064361fc4cbSmaya                         uint32_t *blend_enable_mask)
2065361fc4cbSmaya{
2066361fc4cbSmaya   *blend_enable_mask = 0;
2067361fc4cbSmaya
2068361fc4cbSmaya   bool rop_reads_dst = false;
2069361fc4cbSmaya   uint32_t rb_mrt_control_rop = 0;
2070361fc4cbSmaya   if (blend_info->logicOpEnable) {
2071361fc4cbSmaya      rop_reads_dst = tu_logic_op_reads_dst(blend_info->logicOp);
2072361fc4cbSmaya      rb_mrt_control_rop =
2073361fc4cbSmaya         A6XX_RB_MRT_CONTROL_ROP_ENABLE |
2074361fc4cbSmaya         A6XX_RB_MRT_CONTROL_ROP_CODE(tu6_rop(blend_info->logicOp));
2075361fc4cbSmaya   }
2076361fc4cbSmaya
2077361fc4cbSmaya   for (uint32_t i = 0; i < blend_info->attachmentCount; i++) {
2078361fc4cbSmaya      const VkPipelineColorBlendAttachmentState *att =
2079361fc4cbSmaya         &blend_info->pAttachments[i];
2080361fc4cbSmaya      const VkFormat format = attachment_formats[i];
2081361fc4cbSmaya
2082361fc4cbSmaya      uint32_t rb_mrt_control = 0;
2083361fc4cbSmaya      uint32_t rb_mrt_blend_control = 0;
2084361fc4cbSmaya      if (format != VK_FORMAT_UNDEFINED) {
2085361fc4cbSmaya         const bool has_alpha = vk_format_has_alpha(format);
2086361fc4cbSmaya
2087361fc4cbSmaya         rb_mrt_control =
20887ec681f3Smrg            tu6_rb_mrt_control(att, rb_mrt_control_rop, has_alpha);
2089361fc4cbSmaya         rb_mrt_blend_control = tu6_rb_mrt_blend_control(att, has_alpha);
2090361fc4cbSmaya
2091361fc4cbSmaya         if (att->blendEnable || rop_reads_dst)
2092361fc4cbSmaya            *blend_enable_mask |= 1 << i;
2093361fc4cbSmaya      }
2094361fc4cbSmaya
2095361fc4cbSmaya      tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_CONTROL(i), 2);
2096361fc4cbSmaya      tu_cs_emit(cs, rb_mrt_control);
2097361fc4cbSmaya      tu_cs_emit(cs, rb_mrt_blend_control);
2098361fc4cbSmaya   }
2099361fc4cbSmaya}
2100361fc4cbSmaya
2101361fc4cbSmayastatic void
2102361fc4cbSmayatu6_emit_blend_control(struct tu_cs *cs,
2103361fc4cbSmaya                       uint32_t blend_enable_mask,
21047ec681f3Smrg                       bool dual_src_blend,
2105361fc4cbSmaya                       const VkPipelineMultisampleStateCreateInfo *msaa_info)
2106361fc4cbSmaya{
2107361fc4cbSmaya   const uint32_t sample_mask =
21087ec681f3Smrg      msaa_info->pSampleMask ? (*msaa_info->pSampleMask & 0xffff)
2109361fc4cbSmaya                             : ((1 << msaa_info->rasterizationSamples) - 1);
2110361fc4cbSmaya
21117ec681f3Smrg   tu_cs_emit_regs(cs,
21127ec681f3Smrg                   A6XX_SP_BLEND_CNTL(.enable_blend = blend_enable_mask,
21137ec681f3Smrg                                      .dual_color_in_enable = dual_src_blend,
21147ec681f3Smrg                                      .alpha_to_coverage = msaa_info->alphaToCoverageEnable,
21157ec681f3Smrg                                      .unk8 = true));
21167ec681f3Smrg
2117361fc4cbSmaya   /* set A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND only when enabled? */
21187ec681f3Smrg   tu_cs_emit_regs(cs,
21197ec681f3Smrg                   A6XX_RB_BLEND_CNTL(.enable_blend = blend_enable_mask,
21207ec681f3Smrg                                      .independent_blend = true,
21217ec681f3Smrg                                      .sample_mask = sample_mask,
21227ec681f3Smrg                                      .dual_color_in_enable = dual_src_blend,
21237ec681f3Smrg                                      .alpha_to_coverage = msaa_info->alphaToCoverageEnable,
21247ec681f3Smrg                                      .alpha_to_one = msaa_info->alphaToOneEnable));
21257ec681f3Smrg}
2126361fc4cbSmaya
21277ec681f3Smrgstatic uint32_t
21287ec681f3Smrgcalc_pvtmem_size(struct tu_device *dev, struct tu_pvtmem_config *config,
21297ec681f3Smrg                 uint32_t pvtmem_bytes)
21307ec681f3Smrg{
21317ec681f3Smrg   uint32_t per_fiber_size = ALIGN(pvtmem_bytes, 512);
21327ec681f3Smrg   uint32_t per_sp_size =
21337ec681f3Smrg      ALIGN(per_fiber_size * dev->physical_device->info->a6xx.fibers_per_sp, 1 << 12);
2134361fc4cbSmaya
21357ec681f3Smrg   if (config) {
21367ec681f3Smrg      config->per_fiber_size = per_fiber_size;
21377ec681f3Smrg      config->per_sp_size = per_sp_size;
21387ec681f3Smrg   }
21397ec681f3Smrg
21407ec681f3Smrg   return dev->physical_device->info->num_sp_cores * per_sp_size;
2141361fc4cbSmaya}
2142361fc4cbSmaya
21437ec681f3Smrgstatic VkResult
21447ec681f3Smrgtu_setup_pvtmem(struct tu_device *dev,
21457ec681f3Smrg                struct tu_pipeline *pipeline,
21467ec681f3Smrg                struct tu_pvtmem_config *config,
21477ec681f3Smrg                uint32_t pvtmem_bytes, bool per_wave)
21487ec681f3Smrg{
21497ec681f3Smrg   if (!pvtmem_bytes) {
21507ec681f3Smrg      memset(config, 0, sizeof(*config));
21517ec681f3Smrg      return VK_SUCCESS;
21527ec681f3Smrg   }
21537ec681f3Smrg
21547ec681f3Smrg   uint32_t total_size = calc_pvtmem_size(dev, config, pvtmem_bytes);
21557ec681f3Smrg   config->per_wave = per_wave;
21567ec681f3Smrg
21577ec681f3Smrg   VkResult result =
21587ec681f3Smrg      tu_bo_init_new(dev, &pipeline->pvtmem_bo, total_size,
21597ec681f3Smrg                     TU_BO_ALLOC_NO_FLAGS);
21607ec681f3Smrg   if (result != VK_SUCCESS)
21617ec681f3Smrg      return result;
21627ec681f3Smrg
21637ec681f3Smrg   config->iova = pipeline->pvtmem_bo.iova;
21647ec681f3Smrg
21657ec681f3Smrg   return result;
2166361fc4cbSmaya}
2167361fc4cbSmaya
21687ec681f3Smrg
2169361fc4cbSmayastatic VkResult
21707ec681f3Smrgtu_pipeline_allocate_cs(struct tu_device *dev,
21717ec681f3Smrg                        struct tu_pipeline *pipeline,
21727ec681f3Smrg                        struct tu_pipeline_builder *builder,
21737ec681f3Smrg                        struct ir3_shader_variant *compute)
21747ec681f3Smrg{
21757ec681f3Smrg   uint32_t size = 2048 + tu6_load_state_size(pipeline, compute);
21767ec681f3Smrg
21777ec681f3Smrg   /* graphics case: */
21787ec681f3Smrg   if (builder) {
21797ec681f3Smrg      uint32_t pvtmem_bytes = 0;
21807ec681f3Smrg      for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++) {
21817ec681f3Smrg         if (builder->variants[i]) {
21827ec681f3Smrg            size += builder->variants[i]->info.size / 4;
21837ec681f3Smrg            pvtmem_bytes = MAX2(pvtmem_bytes, builder->variants[i]->pvtmem_size);
21847ec681f3Smrg         }
21857ec681f3Smrg      }
2186361fc4cbSmaya
21877ec681f3Smrg      size += builder->binning_variant->info.size / 4;
21887ec681f3Smrg      pvtmem_bytes = MAX2(pvtmem_bytes, builder->binning_variant->pvtmem_size);
2189361fc4cbSmaya
21907ec681f3Smrg      size += calc_pvtmem_size(dev, NULL, pvtmem_bytes) / 4;
2191361fc4cbSmaya
21927ec681f3Smrg      builder->additional_cs_reserve_size = 0;
21937ec681f3Smrg      for (unsigned i = 0; i < ARRAY_SIZE(builder->variants); i++) {
21947ec681f3Smrg         struct ir3_shader_variant *variant = builder->variants[i];
21957ec681f3Smrg         if (variant) {
21967ec681f3Smrg            builder->additional_cs_reserve_size +=
21977ec681f3Smrg               tu_xs_get_additional_cs_size_dwords(variant);
21987ec681f3Smrg
21997ec681f3Smrg            if (variant->binning) {
22007ec681f3Smrg               builder->additional_cs_reserve_size +=
22017ec681f3Smrg                  tu_xs_get_additional_cs_size_dwords(variant->binning);
22027ec681f3Smrg            }
22037ec681f3Smrg         }
22047ec681f3Smrg      }
22057ec681f3Smrg
22067ec681f3Smrg      size += builder->additional_cs_reserve_size;
22077ec681f3Smrg   } else {
22087ec681f3Smrg      size += compute->info.size / 4;
22097ec681f3Smrg      size += calc_pvtmem_size(dev, NULL, compute->pvtmem_size) / 4;
22107ec681f3Smrg
22117ec681f3Smrg      size += tu_xs_get_additional_cs_size_dwords(compute);
2212361fc4cbSmaya   }
2213361fc4cbSmaya
22147ec681f3Smrg   tu_cs_init(&pipeline->cs, dev, TU_CS_MODE_SUB_STREAM, size);
2215361fc4cbSmaya
22167ec681f3Smrg   /* Reserve the space now such that tu_cs_begin_sub_stream never fails. Note
22177ec681f3Smrg    * that LOAD_STATE can potentially take up a large amount of space so we
22187ec681f3Smrg    * calculate its size explicitly.
22197ec681f3Smrg   */
22207ec681f3Smrg   return tu_cs_reserve_space(&pipeline->cs, size);
22217ec681f3Smrg}
22227ec681f3Smrg
22237ec681f3Smrgstatic void
22247ec681f3Smrgtu_pipeline_shader_key_init(struct ir3_shader_key *key,
22257ec681f3Smrg                            const struct tu_pipeline *pipeline,
22267ec681f3Smrg                            const VkGraphicsPipelineCreateInfo *pipeline_info)
22277ec681f3Smrg{
22287ec681f3Smrg   for (uint32_t i = 0; i < pipeline_info->stageCount; i++) {
22297ec681f3Smrg      if (pipeline_info->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT) {
22307ec681f3Smrg         key->has_gs = true;
22317ec681f3Smrg         break;
22327ec681f3Smrg      }
22337ec681f3Smrg   }
22347ec681f3Smrg
22357ec681f3Smrg   if (pipeline_info->pRasterizationState->rasterizerDiscardEnable &&
22367ec681f3Smrg       !(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_RASTERIZER_DISCARD)))
22377ec681f3Smrg      return;
22387ec681f3Smrg
22397ec681f3Smrg   const VkPipelineMultisampleStateCreateInfo *msaa_info = pipeline_info->pMultisampleState;
22407ec681f3Smrg   const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations =
22417ec681f3Smrg      vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
22427ec681f3Smrg   if (msaa_info->rasterizationSamples > 1 ||
22437ec681f3Smrg       /* also set msaa key when sample location is not the default
22447ec681f3Smrg        * since this affects varying interpolation */
22457ec681f3Smrg       (sample_locations && sample_locations->sampleLocationsEnable)) {
22467ec681f3Smrg      key->msaa = true;
22477ec681f3Smrg   }
22487ec681f3Smrg
22497ec681f3Smrg   /* note: not actually used by ir3, just checked in tu6_emit_fs_inputs */
22507ec681f3Smrg   if (msaa_info->sampleShadingEnable)
22517ec681f3Smrg      key->sample_shading = true;
22527ec681f3Smrg
22537ec681f3Smrg   /* We set this after we compile to NIR because we need the prim mode */
22547ec681f3Smrg   key->tessellation = IR3_TESS_NONE;
22557ec681f3Smrg}
22567ec681f3Smrg
22577ec681f3Smrgstatic uint32_t
22587ec681f3Smrgtu6_get_tessmode(struct tu_shader* shader)
22597ec681f3Smrg{
22607ec681f3Smrg   uint32_t primitive_mode = shader->ir3_shader->nir->info.tess.primitive_mode;
22617ec681f3Smrg   switch (primitive_mode) {
22627ec681f3Smrg   case GL_ISOLINES:
22637ec681f3Smrg      return IR3_TESS_ISOLINES;
22647ec681f3Smrg   case GL_TRIANGLES:
22657ec681f3Smrg      return IR3_TESS_TRIANGLES;
22667ec681f3Smrg   case GL_QUADS:
22677ec681f3Smrg      return IR3_TESS_QUADS;
22687ec681f3Smrg   case GL_NONE:
22697ec681f3Smrg      return IR3_TESS_NONE;
22707ec681f3Smrg   default:
22717ec681f3Smrg      unreachable("bad tessmode");
22727ec681f3Smrg   }
22737ec681f3Smrg}
22747ec681f3Smrg
22757ec681f3Smrgstatic uint64_t
22767ec681f3Smrgtu_upload_variant(struct tu_pipeline *pipeline,
22777ec681f3Smrg                  const struct ir3_shader_variant *variant)
22787ec681f3Smrg{
22797ec681f3Smrg   struct tu_cs_memory memory;
22807ec681f3Smrg
22817ec681f3Smrg   if (!variant)
22827ec681f3Smrg      return 0;
22837ec681f3Smrg
22847ec681f3Smrg   /* this expects to get enough alignment because shaders are allocated first
22857ec681f3Smrg    * and total size is always aligned correctly
22867ec681f3Smrg    * note: an assert in tu6_emit_xs_config validates the alignment
22877ec681f3Smrg    */
22887ec681f3Smrg   tu_cs_alloc(&pipeline->cs, variant->info.size / 4, 1, &memory);
22897ec681f3Smrg
22907ec681f3Smrg   memcpy(memory.map, variant->bin, variant->info.size);
22917ec681f3Smrg   return memory.iova;
22927ec681f3Smrg}
22937ec681f3Smrg
22947ec681f3Smrgstatic void
22957ec681f3Smrgtu_append_executable(struct tu_pipeline *pipeline, struct ir3_shader_variant *variant,
22967ec681f3Smrg                     char *nir_from_spirv)
22977ec681f3Smrg{
22987ec681f3Smrg   ralloc_steal(pipeline->executables_mem_ctx, variant->disasm_info.nir);
22997ec681f3Smrg   ralloc_steal(pipeline->executables_mem_ctx, variant->disasm_info.disasm);
23007ec681f3Smrg
23017ec681f3Smrg   struct tu_pipeline_executable exe = {
23027ec681f3Smrg      .stage = variant->shader->type,
23037ec681f3Smrg      .nir_from_spirv = nir_from_spirv,
23047ec681f3Smrg      .nir_final = variant->disasm_info.nir,
23057ec681f3Smrg      .disasm = variant->disasm_info.disasm,
23067ec681f3Smrg      .stats = variant->info,
23077ec681f3Smrg      .is_binning = variant->binning_pass,
23087ec681f3Smrg   };
23097ec681f3Smrg
23107ec681f3Smrg   util_dynarray_append(&pipeline->executables, struct tu_pipeline_executable, exe);
2311361fc4cbSmaya}
2312361fc4cbSmaya
2313361fc4cbSmayastatic VkResult
23147ec681f3Smrgtu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
23157ec681f3Smrg                                    struct tu_pipeline *pipeline)
2316361fc4cbSmaya{
23177ec681f3Smrg   const struct ir3_compiler *compiler = builder->device->compiler;
2318361fc4cbSmaya   const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
2319361fc4cbSmaya      NULL
2320361fc4cbSmaya   };
2321361fc4cbSmaya   for (uint32_t i = 0; i < builder->create_info->stageCount; i++) {
2322361fc4cbSmaya      gl_shader_stage stage =
23237ec681f3Smrg         vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage);
2324361fc4cbSmaya      stage_infos[stage] = &builder->create_info->pStages[i];
2325361fc4cbSmaya   }
2326361fc4cbSmaya
23277ec681f3Smrg   struct ir3_shader_key key = {};
23287ec681f3Smrg   tu_pipeline_shader_key_init(&key, pipeline, builder->create_info);
23297ec681f3Smrg
23307ec681f3Smrg   nir_shader *nir[ARRAY_SIZE(builder->shaders)] = { NULL };
2331361fc4cbSmaya
23327ec681f3Smrg   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
23337ec681f3Smrg        stage < ARRAY_SIZE(nir); stage++) {
2334361fc4cbSmaya      const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage];
2335361fc4cbSmaya      if (!stage_info)
2336361fc4cbSmaya         continue;
2337361fc4cbSmaya
23387ec681f3Smrg      nir[stage] = tu_spirv_to_nir(builder->device, stage_info, stage);
23397ec681f3Smrg      if (!nir[stage])
23407ec681f3Smrg         return VK_ERROR_OUT_OF_HOST_MEMORY;
23417ec681f3Smrg   }
23427ec681f3Smrg
23437ec681f3Smrg   if (!nir[MESA_SHADER_FRAGMENT]) {
23447ec681f3Smrg         const nir_shader_compiler_options *nir_options =
23457ec681f3Smrg            ir3_get_compiler_options(builder->device->compiler);
23467ec681f3Smrg         nir_builder fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
23477ec681f3Smrg                                                           nir_options,
23487ec681f3Smrg                                                           "noop_fs");
23497ec681f3Smrg         nir[MESA_SHADER_FRAGMENT] = fs_b.shader;
23507ec681f3Smrg   }
23517ec681f3Smrg
23527ec681f3Smrg   const bool executable_info = builder->create_info->flags &
23537ec681f3Smrg      VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
23547ec681f3Smrg
23557ec681f3Smrg   char *nir_initial_disasm[ARRAY_SIZE(builder->shaders)] = { NULL };
23567ec681f3Smrg
23577ec681f3Smrg   if (executable_info) {
23587ec681f3Smrg      for (gl_shader_stage stage = MESA_SHADER_VERTEX;
23597ec681f3Smrg            stage < ARRAY_SIZE(nir); stage++) {
23607ec681f3Smrg         if (!nir[stage])
23617ec681f3Smrg            continue;
23627ec681f3Smrg
23637ec681f3Smrg         nir_initial_disasm[stage] =
23647ec681f3Smrg            nir_shader_as_str(nir[stage], pipeline->executables_mem_ctx);
23657ec681f3Smrg      }
23667ec681f3Smrg   }
23677ec681f3Smrg
23687ec681f3Smrg   /* TODO do intra-stage linking here */
23697ec681f3Smrg
23707ec681f3Smrg   uint32_t desc_sets = 0;
23717ec681f3Smrg   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
23727ec681f3Smrg        stage < ARRAY_SIZE(nir); stage++) {
23737ec681f3Smrg      if (!nir[stage])
23747ec681f3Smrg         continue;
23757ec681f3Smrg
2376361fc4cbSmaya      struct tu_shader *shader =
23777ec681f3Smrg         tu_shader_create(builder->device, nir[stage],
23787ec681f3Smrg                          builder->multiview_mask, builder->layout,
23797ec681f3Smrg                          builder->alloc);
2380361fc4cbSmaya      if (!shader)
2381361fc4cbSmaya         return VK_ERROR_OUT_OF_HOST_MEMORY;
2382361fc4cbSmaya
23837ec681f3Smrg      /* In SPIR-V generated from GLSL, the primitive mode is specified in the
23847ec681f3Smrg       * tessellation evaluation shader, but in SPIR-V generated from HLSL,
23857ec681f3Smrg       * the mode is specified in the tessellation control shader. */
23867ec681f3Smrg      if ((stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_TESS_CTRL) &&
23877ec681f3Smrg          key.tessellation == IR3_TESS_NONE) {
23887ec681f3Smrg         key.tessellation = tu6_get_tessmode(shader);
23897ec681f3Smrg      }
2390361fc4cbSmaya
23917ec681f3Smrg      if (stage > MESA_SHADER_TESS_CTRL) {
23927ec681f3Smrg         if (stage == MESA_SHADER_FRAGMENT) {
23937ec681f3Smrg            key.tcs_store_primid = key.tcs_store_primid ||
23947ec681f3Smrg               (nir[stage]->info.inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID));
23957ec681f3Smrg         } else {
23967ec681f3Smrg            key.tcs_store_primid = key.tcs_store_primid ||
23977ec681f3Smrg               BITSET_TEST(nir[stage]->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
23987ec681f3Smrg         }
23997ec681f3Smrg      }
2400361fc4cbSmaya
24017ec681f3Smrg      /* Keep track of the status of each shader's active descriptor sets,
24027ec681f3Smrg       * which is set in tu_lower_io. */
24037ec681f3Smrg      desc_sets |= shader->active_desc_sets;
2404361fc4cbSmaya
24057ec681f3Smrg      builder->shaders[stage] = shader;
2406361fc4cbSmaya   }
24077ec681f3Smrg   pipeline->active_desc_sets = desc_sets;
2408361fc4cbSmaya
24097ec681f3Smrg   struct tu_shader *last_shader = builder->shaders[MESA_SHADER_GEOMETRY];
24107ec681f3Smrg   if (!last_shader)
24117ec681f3Smrg      last_shader = builder->shaders[MESA_SHADER_TESS_EVAL];
24127ec681f3Smrg   if (!last_shader)
24137ec681f3Smrg      last_shader = builder->shaders[MESA_SHADER_VERTEX];
2414361fc4cbSmaya
24157ec681f3Smrg   uint64_t outputs_written = last_shader->ir3_shader->nir->info.outputs_written;
2416361fc4cbSmaya
24177ec681f3Smrg   key.layer_zero = !(outputs_written & VARYING_BIT_LAYER);
24187ec681f3Smrg   key.view_zero = !(outputs_written & VARYING_BIT_VIEWPORT);
2419361fc4cbSmaya
24207ec681f3Smrg   pipeline->tess.patch_type = key.tessellation;
2421361fc4cbSmaya
24227ec681f3Smrg   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
24237ec681f3Smrg        stage < ARRAY_SIZE(builder->shaders); stage++) {
24247ec681f3Smrg      if (!builder->shaders[stage])
24257ec681f3Smrg         continue;
24267ec681f3Smrg
24277ec681f3Smrg      bool created;
24287ec681f3Smrg      builder->variants[stage] =
24297ec681f3Smrg         ir3_shader_get_variant(builder->shaders[stage]->ir3_shader,
24307ec681f3Smrg                                &key, false, executable_info, &created);
24317ec681f3Smrg      if (!builder->variants[stage])
24327ec681f3Smrg         return VK_ERROR_OUT_OF_HOST_MEMORY;
24337ec681f3Smrg   }
24347ec681f3Smrg
24357ec681f3Smrg   uint32_t safe_constlens = ir3_trim_constlen(builder->variants, compiler);
24367ec681f3Smrg
24377ec681f3Smrg   key.safe_constlen = true;
24387ec681f3Smrg
24397ec681f3Smrg   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
24407ec681f3Smrg        stage < ARRAY_SIZE(builder->shaders); stage++) {
24417ec681f3Smrg      if (!builder->shaders[stage])
2442361fc4cbSmaya         continue;
2443361fc4cbSmaya
24447ec681f3Smrg      if (safe_constlens & (1 << stage)) {
24457ec681f3Smrg         bool created;
24467ec681f3Smrg         builder->variants[stage] =
24477ec681f3Smrg            ir3_shader_get_variant(builder->shaders[stage]->ir3_shader,
24487ec681f3Smrg                                   &key, false, executable_info, &created);
24497ec681f3Smrg         if (!builder->variants[stage])
24507ec681f3Smrg            return VK_ERROR_OUT_OF_HOST_MEMORY;
24517ec681f3Smrg      }
2452361fc4cbSmaya   }
2453361fc4cbSmaya
24547ec681f3Smrg   const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX];
24557ec681f3Smrg   struct ir3_shader_variant *variant;
24567ec681f3Smrg
24577ec681f3Smrg   if (vs->ir3_shader->stream_output.num_outputs ||
24587ec681f3Smrg       !ir3_has_binning_vs(&key)) {
24597ec681f3Smrg      variant = builder->variants[MESA_SHADER_VERTEX];
24607ec681f3Smrg   } else {
24617ec681f3Smrg      bool created;
24627ec681f3Smrg      key.safe_constlen = !!(safe_constlens & (1 << MESA_SHADER_VERTEX));
24637ec681f3Smrg      variant = ir3_shader_get_variant(vs->ir3_shader, &key,
24647ec681f3Smrg                                       true, executable_info, &created);
24657ec681f3Smrg      if (!variant)
24667ec681f3Smrg         return VK_ERROR_OUT_OF_HOST_MEMORY;
24677ec681f3Smrg   }
24687ec681f3Smrg
24697ec681f3Smrg   builder->binning_variant = variant;
24707ec681f3Smrg
24717ec681f3Smrg   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
24727ec681f3Smrg         stage < ARRAY_SIZE(nir); stage++) {
24737ec681f3Smrg      if (builder->variants[stage]) {
24747ec681f3Smrg         tu_append_executable(pipeline, builder->variants[stage],
24757ec681f3Smrg            nir_initial_disasm[stage]);
24767ec681f3Smrg      }
24777ec681f3Smrg   }
24787ec681f3Smrg
24797ec681f3Smrg   if (builder->binning_variant != builder->variants[MESA_SHADER_VERTEX]) {
24807ec681f3Smrg      tu_append_executable(pipeline, builder->binning_variant, NULL);
2481361fc4cbSmaya   }
2482361fc4cbSmaya
2483361fc4cbSmaya   return VK_SUCCESS;
2484361fc4cbSmaya}
2485361fc4cbSmaya
2486361fc4cbSmayastatic void
2487361fc4cbSmayatu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder,
2488361fc4cbSmaya                                  struct tu_pipeline *pipeline)
2489361fc4cbSmaya{
2490361fc4cbSmaya   const VkPipelineDynamicStateCreateInfo *dynamic_info =
2491361fc4cbSmaya      builder->create_info->pDynamicState;
2492361fc4cbSmaya
24937ec681f3Smrg   pipeline->gras_su_cntl_mask = ~0u;
24947ec681f3Smrg   pipeline->rb_depth_cntl_mask = ~0u;
24957ec681f3Smrg   pipeline->rb_stencil_cntl_mask = ~0u;
24967ec681f3Smrg   pipeline->pc_raster_cntl_mask = ~0u;
24977ec681f3Smrg   pipeline->vpc_unknown_9107_mask = ~0u;
24987ec681f3Smrg
2499361fc4cbSmaya   if (!dynamic_info)
2500361fc4cbSmaya      return;
2501361fc4cbSmaya
2502361fc4cbSmaya   for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) {
25037ec681f3Smrg      VkDynamicState state = dynamic_info->pDynamicStates[i];
25047ec681f3Smrg      switch (state) {
25057ec681f3Smrg      case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE:
25067ec681f3Smrg         if (state == VK_DYNAMIC_STATE_LINE_WIDTH)
25077ec681f3Smrg            pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
25087ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(state);
25097ec681f3Smrg         break;
25107ec681f3Smrg      case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
25117ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS);
25127ec681f3Smrg         break;
25137ec681f3Smrg      case VK_DYNAMIC_STATE_CULL_MODE_EXT:
25147ec681f3Smrg         pipeline->gras_su_cntl_mask &=
25157ec681f3Smrg            ~(A6XX_GRAS_SU_CNTL_CULL_BACK | A6XX_GRAS_SU_CNTL_CULL_FRONT);
25167ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL);
25177ec681f3Smrg         break;
25187ec681f3Smrg      case VK_DYNAMIC_STATE_FRONT_FACE_EXT:
25197ec681f3Smrg         pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_FRONT_CW;
25207ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL);
25217ec681f3Smrg         break;
25227ec681f3Smrg      case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT:
25237ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY);
25247ec681f3Smrg         break;
25257ec681f3Smrg      case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT:
25267ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_VB_STRIDE);
25277ec681f3Smrg         break;
25287ec681f3Smrg      case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT:
25297ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(VK_DYNAMIC_STATE_VIEWPORT);
25307ec681f3Smrg         break;
25317ec681f3Smrg      case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT:
25327ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(VK_DYNAMIC_STATE_SCISSOR);
25337ec681f3Smrg         break;
25347ec681f3Smrg      case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT:
25357ec681f3Smrg         pipeline->rb_depth_cntl_mask &=
25367ec681f3Smrg            ~(A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE);
25377ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
25387ec681f3Smrg         break;
25397ec681f3Smrg      case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT:
25407ec681f3Smrg         pipeline->rb_depth_cntl_mask &= ~A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
25417ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
25427ec681f3Smrg         break;
25437ec681f3Smrg      case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT:
25447ec681f3Smrg         pipeline->rb_depth_cntl_mask &= ~A6XX_RB_DEPTH_CNTL_ZFUNC__MASK;
25457ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
25467ec681f3Smrg         break;
25477ec681f3Smrg      case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT:
25487ec681f3Smrg         pipeline->rb_depth_cntl_mask &=
25497ec681f3Smrg            ~(A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE);
25507ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
25517ec681f3Smrg         break;
25527ec681f3Smrg      case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT:
25537ec681f3Smrg         pipeline->rb_stencil_cntl_mask &= ~(A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
25547ec681f3Smrg                                             A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
25557ec681f3Smrg                                             A6XX_RB_STENCIL_CONTROL_STENCIL_READ);
25567ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_STENCIL_CNTL);
25577ec681f3Smrg         break;
25587ec681f3Smrg      case VK_DYNAMIC_STATE_STENCIL_OP_EXT:
25597ec681f3Smrg         pipeline->rb_stencil_cntl_mask &= ~(A6XX_RB_STENCIL_CONTROL_FUNC__MASK |
25607ec681f3Smrg                                             A6XX_RB_STENCIL_CONTROL_FAIL__MASK |
25617ec681f3Smrg                                             A6XX_RB_STENCIL_CONTROL_ZPASS__MASK |
25627ec681f3Smrg                                             A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK |
25637ec681f3Smrg                                             A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK |
25647ec681f3Smrg                                             A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK |
25657ec681f3Smrg                                             A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK |
25667ec681f3Smrg                                             A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK);
25677ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_STENCIL_CNTL);
25687ec681f3Smrg         break;
25697ec681f3Smrg      case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT:
25707ec681f3Smrg         pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_POLY_OFFSET;
25717ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL);
25727ec681f3Smrg         break;
25737ec681f3Smrg      case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT:
25747ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE);
25757ec681f3Smrg         break;
25767ec681f3Smrg      case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT:
25777ec681f3Smrg         pipeline->pc_raster_cntl_mask &= ~A6XX_PC_RASTER_CNTL_DISCARD;
25787ec681f3Smrg         pipeline->vpc_unknown_9107_mask &= ~A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD;
25797ec681f3Smrg         pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RASTERIZER_DISCARD);
25807ec681f3Smrg         break;
25817ec681f3Smrg      default:
25827ec681f3Smrg         assert(!"unsupported dynamic state");
25837ec681f3Smrg         break;
25847ec681f3Smrg      }
2585361fc4cbSmaya   }
2586361fc4cbSmaya}
2587361fc4cbSmaya
25887ec681f3Smrgstatic void
25897ec681f3Smrgtu_pipeline_set_linkage(struct tu_program_descriptor_linkage *link,
25907ec681f3Smrg                        struct tu_shader *shader,
25917ec681f3Smrg                        struct ir3_shader_variant *v)
25927ec681f3Smrg{
25937ec681f3Smrg   link->const_state = *ir3_const_state(v);
25947ec681f3Smrg   link->constlen = v->constlen;
25957ec681f3Smrg   link->push_consts = shader->push_consts;
25967ec681f3Smrg}
25977ec681f3Smrg
25987ec681f3Smrgstatic void
25997ec681f3Smrgtu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
26007ec681f3Smrg                                        struct tu_pipeline *pipeline)
26017ec681f3Smrg{
26027ec681f3Smrg   struct tu_cs prog_cs;
26037ec681f3Smrg
26047ec681f3Smrg   /* Emit HLSQ_xS_CNTL/HLSQ_SP_xS_CONFIG *first*, before emitting anything
26057ec681f3Smrg    * else that could depend on that state (like push constants)
26067ec681f3Smrg    *
26077ec681f3Smrg    * Note also that this always uses the full VS even in binning pass.  The
26087ec681f3Smrg    * binning pass variant has the same const layout as the full VS, and
26097ec681f3Smrg    * the constlen for the VS will be the same or greater than the constlen
26107ec681f3Smrg    * for the binning pass variant.  It is required that the constlen state
26117ec681f3Smrg    * matches between binning and draw passes, as some parts of the push
26127ec681f3Smrg    * consts are emitted in state groups that are shared between the binning
26137ec681f3Smrg    * and draw passes.
26147ec681f3Smrg    */
26157ec681f3Smrg   tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);
26167ec681f3Smrg   tu6_emit_program_config(&prog_cs, builder);
26177ec681f3Smrg   pipeline->program.config_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
26187ec681f3Smrg
26197ec681f3Smrg   tu_cs_begin_sub_stream(&pipeline->cs, 512 + builder->additional_cs_reserve_size, &prog_cs);
26207ec681f3Smrg   tu6_emit_program(&prog_cs, builder, false, pipeline);
26217ec681f3Smrg   pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
26227ec681f3Smrg
26237ec681f3Smrg   tu_cs_begin_sub_stream(&pipeline->cs, 512 + builder->additional_cs_reserve_size, &prog_cs);
26247ec681f3Smrg   tu6_emit_program(&prog_cs, builder, true, pipeline);
26257ec681f3Smrg   pipeline->program.binning_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
26267ec681f3Smrg
26277ec681f3Smrg   VkShaderStageFlags stages = 0;
26287ec681f3Smrg   for (unsigned i = 0; i < builder->create_info->stageCount; i++) {
26297ec681f3Smrg      stages |= builder->create_info->pStages[i].stage;
26307ec681f3Smrg   }
26317ec681f3Smrg   pipeline->active_stages = stages;
26327ec681f3Smrg
26337ec681f3Smrg   for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders); i++) {
26347ec681f3Smrg      if (!builder->shaders[i])
26357ec681f3Smrg         continue;
2636361fc4cbSmaya
26377ec681f3Smrg      tu_pipeline_set_linkage(&pipeline->program.link[i],
26387ec681f3Smrg                              builder->shaders[i],
26397ec681f3Smrg                              builder->variants[i]);
26407ec681f3Smrg   }
2641361fc4cbSmaya}
2642361fc4cbSmaya
2643361fc4cbSmayastatic void
2644361fc4cbSmayatu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder,
2645361fc4cbSmaya                                       struct tu_pipeline *pipeline)
2646361fc4cbSmaya{
2647361fc4cbSmaya   const VkPipelineVertexInputStateCreateInfo *vi_info =
2648361fc4cbSmaya      builder->create_info->pVertexInputState;
26497ec681f3Smrg   const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX];
26507ec681f3Smrg   const struct ir3_shader_variant *bs = builder->binning_variant;
2651361fc4cbSmaya
26527ec681f3Smrg   /* Bindings may contain holes */
26537ec681f3Smrg   for (unsigned i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
26547ec681f3Smrg      pipeline->num_vbs =
26557ec681f3Smrg         MAX2(pipeline->num_vbs, vi_info->pVertexBindingDescriptions[i].binding + 1);
26567ec681f3Smrg   }
2657361fc4cbSmaya
26587ec681f3Smrg   struct tu_cs vi_cs;
26597ec681f3Smrg   tu_cs_begin_sub_stream(&pipeline->cs,
26607ec681f3Smrg                          MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
26617ec681f3Smrg   tu6_emit_vertex_input(pipeline, &vi_cs, vs, vi_info);
26627ec681f3Smrg   pipeline->vi.state = tu_cs_end_draw_state(&pipeline->cs, &vi_cs);
26637ec681f3Smrg
26647ec681f3Smrg   if (bs) {
26657ec681f3Smrg      tu_cs_begin_sub_stream(&pipeline->cs,
26667ec681f3Smrg                             MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
26677ec681f3Smrg      tu6_emit_vertex_input(pipeline, &vi_cs, bs, vi_info);
26687ec681f3Smrg      pipeline->vi.binning_state =
26697ec681f3Smrg         tu_cs_end_draw_state(&pipeline->cs, &vi_cs);
2670361fc4cbSmaya   }
2671361fc4cbSmaya}
2672361fc4cbSmaya
2673361fc4cbSmayastatic void
2674361fc4cbSmayatu_pipeline_builder_parse_input_assembly(struct tu_pipeline_builder *builder,
2675361fc4cbSmaya                                         struct tu_pipeline *pipeline)
2676361fc4cbSmaya{
2677361fc4cbSmaya   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2678361fc4cbSmaya      builder->create_info->pInputAssemblyState;
2679361fc4cbSmaya
2680361fc4cbSmaya   pipeline->ia.primtype = tu6_primtype(ia_info->topology);
2681361fc4cbSmaya   pipeline->ia.primitive_restart = ia_info->primitiveRestartEnable;
2682361fc4cbSmaya}
2683361fc4cbSmaya
26847ec681f3Smrgstatic bool
26857ec681f3Smrgtu_pipeline_static_state(struct tu_pipeline *pipeline, struct tu_cs *cs,
26867ec681f3Smrg                         uint32_t id, uint32_t size)
26877ec681f3Smrg{
26887ec681f3Smrg   assert(id < ARRAY_SIZE(pipeline->dynamic_state));
26897ec681f3Smrg
26907ec681f3Smrg   if (pipeline->dynamic_state_mask & BIT(id))
26917ec681f3Smrg      return false;
26927ec681f3Smrg
26937ec681f3Smrg   pipeline->dynamic_state[id] = tu_cs_draw_state(&pipeline->cs, cs, size);
26947ec681f3Smrg   return true;
26957ec681f3Smrg}
26967ec681f3Smrg
26977ec681f3Smrgstatic void
26987ec681f3Smrgtu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder,
26997ec681f3Smrg                                       struct tu_pipeline *pipeline)
27007ec681f3Smrg{
27017ec681f3Smrg   if (!(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ||
27027ec681f3Smrg       !(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
27037ec681f3Smrg      return;
27047ec681f3Smrg
27057ec681f3Smrg   const VkPipelineTessellationStateCreateInfo *tess_info =
27067ec681f3Smrg      builder->create_info->pTessellationState;
27077ec681f3Smrg
27087ec681f3Smrg   assert(pipeline->ia.primtype == DI_PT_PATCHES0);
27097ec681f3Smrg   assert(tess_info->patchControlPoints <= 32);
27107ec681f3Smrg   pipeline->ia.primtype += tess_info->patchControlPoints;
27117ec681f3Smrg   const VkPipelineTessellationDomainOriginStateCreateInfo *domain_info =
27127ec681f3Smrg         vk_find_struct_const(tess_info->pNext, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO);
27137ec681f3Smrg   pipeline->tess.upper_left_domain_origin = !domain_info ||
27147ec681f3Smrg         domain_info->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT;
27157ec681f3Smrg   const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL];
27167ec681f3Smrg   const struct ir3_shader_variant *ds = builder->variants[MESA_SHADER_TESS_EVAL];
27177ec681f3Smrg   pipeline->tess.param_stride = hs->output_size * 4;
27187ec681f3Smrg   pipeline->tess.hs_bo_regid = hs->const_state->offsets.primitive_param + 1;
27197ec681f3Smrg   pipeline->tess.ds_bo_regid = ds->const_state->offsets.primitive_param + 1;
27207ec681f3Smrg}
27217ec681f3Smrg
2722361fc4cbSmayastatic void
2723361fc4cbSmayatu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder,
2724361fc4cbSmaya                                   struct tu_pipeline *pipeline)
2725361fc4cbSmaya{
2726361fc4cbSmaya   /* The spec says:
2727361fc4cbSmaya    *
2728361fc4cbSmaya    *    pViewportState is a pointer to an instance of the
2729361fc4cbSmaya    *    VkPipelineViewportStateCreateInfo structure, and is ignored if the
2730361fc4cbSmaya    *    pipeline has rasterization disabled."
2731361fc4cbSmaya    *
2732361fc4cbSmaya    * We leave the relevant registers stale in that case.
2733361fc4cbSmaya    */
2734361fc4cbSmaya   if (builder->rasterizer_discard)
2735361fc4cbSmaya      return;
2736361fc4cbSmaya
2737361fc4cbSmaya   const VkPipelineViewportStateCreateInfo *vp_info =
2738361fc4cbSmaya      builder->create_info->pViewportState;
2739361fc4cbSmaya
27407ec681f3Smrg   struct tu_cs cs;
2741361fc4cbSmaya
27427ec681f3Smrg   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_VIEWPORT, 8 + 10 * vp_info->viewportCount))
27437ec681f3Smrg      tu6_emit_viewport(&cs, vp_info->pViewports, vp_info->viewportCount);
2744361fc4cbSmaya
27457ec681f3Smrg   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_SCISSOR, 1 + 2 * vp_info->scissorCount))
27467ec681f3Smrg      tu6_emit_scissor(&cs, vp_info->pScissors, vp_info->scissorCount);
2747361fc4cbSmaya}
2748361fc4cbSmaya
2749361fc4cbSmayastatic void
2750361fc4cbSmayatu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
2751361fc4cbSmaya                                        struct tu_pipeline *pipeline)
2752361fc4cbSmaya{
2753361fc4cbSmaya   const VkPipelineRasterizationStateCreateInfo *rast_info =
2754361fc4cbSmaya      builder->create_info->pRasterizationState;
2755361fc4cbSmaya
27567ec681f3Smrg   enum a6xx_polygon_mode mode = tu6_polygon_mode(rast_info->polygonMode);
27577ec681f3Smrg
27587ec681f3Smrg   bool depth_clip_disable = rast_info->depthClampEnable;
27597ec681f3Smrg
27607ec681f3Smrg   const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state =
27617ec681f3Smrg      vk_find_struct_const(rast_info, PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
27627ec681f3Smrg   if (depth_clip_state)
27637ec681f3Smrg      depth_clip_disable = !depth_clip_state->depthClipEnable;
2764361fc4cbSmaya
27657ec681f3Smrg   pipeline->line_mode = RECTANGULAR;
27667ec681f3Smrg
27677ec681f3Smrg   if (tu6_primtype_line(pipeline->ia.primtype)) {
27687ec681f3Smrg      const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_state =
27697ec681f3Smrg         vk_find_struct_const(rast_info->pNext,
27707ec681f3Smrg                              PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
27717ec681f3Smrg
27727ec681f3Smrg      if (rast_line_state && rast_line_state->lineRasterizationMode ==
27737ec681f3Smrg               VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) {
27747ec681f3Smrg         pipeline->line_mode = BRESENHAM;
27757ec681f3Smrg      }
27767ec681f3Smrg   }
27777ec681f3Smrg
27787ec681f3Smrg   struct tu_cs cs;
27797ec681f3Smrg   uint32_t cs_size = 9 +
27807ec681f3Smrg      (builder->device->physical_device->info->a6xx.has_shading_rate ? 8 : 0) +
27817ec681f3Smrg      (builder->emit_msaa_state ? 11 : 0);
27827ec681f3Smrg   pipeline->rast_state = tu_cs_draw_state(&pipeline->cs, &cs, cs_size);
27837ec681f3Smrg
27847ec681f3Smrg   tu_cs_emit_regs(&cs,
27857ec681f3Smrg                   A6XX_GRAS_CL_CNTL(
27867ec681f3Smrg                     .znear_clip_disable = depth_clip_disable,
27877ec681f3Smrg                     .zfar_clip_disable = depth_clip_disable,
27887ec681f3Smrg                     /* TODO should this be depth_clip_disable instead? */
27897ec681f3Smrg                     .unk5 = rast_info->depthClampEnable,
27907ec681f3Smrg                     .zero_gb_scale_z = 1,
27917ec681f3Smrg                     .vp_clip_code_ignore = 1));
27927ec681f3Smrg
27937ec681f3Smrg   tu_cs_emit_regs(&cs,
27947ec681f3Smrg                   A6XX_VPC_POLYGON_MODE(mode));
27957ec681f3Smrg
27967ec681f3Smrg   tu_cs_emit_regs(&cs,
27977ec681f3Smrg                   A6XX_PC_POLYGON_MODE(mode));
2798361fc4cbSmaya
2799361fc4cbSmaya   /* move to hw ctx init? */
28007ec681f3Smrg   tu_cs_emit_regs(&cs,
28017ec681f3Smrg                   A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f),
28027ec681f3Smrg                   A6XX_GRAS_SU_POINT_SIZE(1.0f));
28037ec681f3Smrg
28047ec681f3Smrg   if (builder->device->physical_device->info->a6xx.has_shading_rate) {
28057ec681f3Smrg      tu_cs_emit_regs(&cs, A6XX_RB_UNKNOWN_8A00());
28067ec681f3Smrg      tu_cs_emit_regs(&cs, A6XX_RB_UNKNOWN_8A10());
28077ec681f3Smrg      tu_cs_emit_regs(&cs, A6XX_RB_UNKNOWN_8A20());
28087ec681f3Smrg      tu_cs_emit_regs(&cs, A6XX_RB_UNKNOWN_8A30());
28097ec681f3Smrg   }
28107ec681f3Smrg
28117ec681f3Smrg   /* If samples count couldn't be devised from the subpass, we should emit it here.
28127ec681f3Smrg    * It happens when subpass doesn't use any color/depth attachment.
28137ec681f3Smrg    */
28147ec681f3Smrg   if (builder->emit_msaa_state)
28157ec681f3Smrg      tu6_emit_msaa(&cs, builder->samples, pipeline->line_mode);
28167ec681f3Smrg
28177ec681f3Smrg   const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info =
28187ec681f3Smrg      vk_find_struct_const(rast_info->pNext,
28197ec681f3Smrg                           PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT);
28207ec681f3Smrg   unsigned stream = stream_info ? stream_info->rasterizationStream : 0;
28217ec681f3Smrg
28227ec681f3Smrg   pipeline->pc_raster_cntl = A6XX_PC_RASTER_CNTL_STREAM(stream);
28237ec681f3Smrg   pipeline->vpc_unknown_9107 = 0;
28247ec681f3Smrg   if (rast_info->rasterizerDiscardEnable) {
28257ec681f3Smrg      pipeline->pc_raster_cntl |= A6XX_PC_RASTER_CNTL_DISCARD;
28267ec681f3Smrg      pipeline->vpc_unknown_9107 |= A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD;
28277ec681f3Smrg   }
2828361fc4cbSmaya
28297ec681f3Smrg   if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RASTERIZER_DISCARD, 4)) {
28307ec681f3Smrg      tu_cs_emit_regs(&cs, A6XX_PC_RASTER_CNTL(.dword = pipeline->pc_raster_cntl));
28317ec681f3Smrg      tu_cs_emit_regs(&cs, A6XX_VPC_UNKNOWN_9107(.dword = pipeline->vpc_unknown_9107));
28327ec681f3Smrg   }
28337ec681f3Smrg
28347ec681f3Smrg   pipeline->gras_su_cntl =
28357ec681f3Smrg      tu6_gras_su_cntl(rast_info, pipeline->line_mode, builder->multiview_mask != 0);
2836361fc4cbSmaya
28377ec681f3Smrg   if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_GRAS_SU_CNTL, 2))
28387ec681f3Smrg      tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = pipeline->gras_su_cntl));
2839361fc4cbSmaya
28407ec681f3Smrg   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BIAS, 4)) {
28417ec681f3Smrg      tu6_emit_depth_bias(&cs, rast_info->depthBiasConstantFactor,
2842361fc4cbSmaya                          rast_info->depthBiasClamp,
2843361fc4cbSmaya                          rast_info->depthBiasSlopeFactor);
2844361fc4cbSmaya   }
2845361fc4cbSmaya
28467ec681f3Smrg   const struct VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *provoking_vtx_state =
28477ec681f3Smrg      vk_find_struct_const(rast_info->pNext, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT);
28487ec681f3Smrg   pipeline->provoking_vertex_last = provoking_vtx_state &&
28497ec681f3Smrg      provoking_vtx_state->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
2850361fc4cbSmaya}
2851361fc4cbSmaya
2852361fc4cbSmayastatic void
2853361fc4cbSmayatu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
2854361fc4cbSmaya                                        struct tu_pipeline *pipeline)
2855361fc4cbSmaya{
2856361fc4cbSmaya   /* The spec says:
2857361fc4cbSmaya    *
2858361fc4cbSmaya    *    pDepthStencilState is a pointer to an instance of the
2859361fc4cbSmaya    *    VkPipelineDepthStencilStateCreateInfo structure, and is ignored if
2860361fc4cbSmaya    *    the pipeline has rasterization disabled or if the subpass of the
2861361fc4cbSmaya    *    render pass the pipeline is created against does not use a
2862361fc4cbSmaya    *    depth/stencil attachment.
2863361fc4cbSmaya    */
2864361fc4cbSmaya   const VkPipelineDepthStencilStateCreateInfo *ds_info =
28657ec681f3Smrg      builder->create_info->pDepthStencilState;
28667ec681f3Smrg   const VkPipelineRasterizationStateCreateInfo *rast_info =
28677ec681f3Smrg      builder->create_info->pRasterizationState;
28687ec681f3Smrg   uint32_t rb_depth_cntl = 0, rb_stencil_cntl = 0;
28697ec681f3Smrg   struct tu_cs cs;
28707ec681f3Smrg
28717ec681f3Smrg   if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED &&
28727ec681f3Smrg       builder->depth_attachment_format != VK_FORMAT_S8_UINT) {
28737ec681f3Smrg      if (ds_info->depthTestEnable) {
28747ec681f3Smrg         rb_depth_cntl |=
28757ec681f3Smrg            A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE |
28767ec681f3Smrg            A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) |
28777ec681f3Smrg            A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE; /* TODO: don't set for ALWAYS/NEVER */
28787ec681f3Smrg
28797ec681f3Smrg         if (rast_info->depthClampEnable)
28807ec681f3Smrg            rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE;
28817ec681f3Smrg
28827ec681f3Smrg         if (ds_info->depthWriteEnable)
28837ec681f3Smrg            rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
28847ec681f3Smrg      }
2885361fc4cbSmaya
28867ec681f3Smrg      if (ds_info->depthBoundsTestEnable)
28877ec681f3Smrg         rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE;
2888361fc4cbSmaya
28897ec681f3Smrg      if (ds_info->depthBoundsTestEnable && !ds_info->depthTestEnable)
28907ec681f3Smrg         tu6_apply_depth_bounds_workaround(builder->device, &rb_depth_cntl);
28917ec681f3Smrg   } else {
28927ec681f3Smrg      /* if RB_DEPTH_CNTL is set dynamically, we need to make sure it is set
28937ec681f3Smrg       * to 0 when this pipeline is used, as enabling depth test when there
28947ec681f3Smrg       * is no depth attachment is a problem (at least for the S8_UINT case)
28957ec681f3Smrg       */
28967ec681f3Smrg      if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL))
28977ec681f3Smrg         pipeline->rb_depth_cntl_disable = true;
28987ec681f3Smrg   }
28997ec681f3Smrg
29007ec681f3Smrg   if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED) {
29017ec681f3Smrg      const VkStencilOpState *front = &ds_info->front;
29027ec681f3Smrg      const VkStencilOpState *back = &ds_info->back;
29037ec681f3Smrg
29047ec681f3Smrg      rb_stencil_cntl |=
29057ec681f3Smrg         A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front->compareOp)) |
29067ec681f3Smrg         A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front->failOp)) |
29077ec681f3Smrg         A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front->passOp)) |
29087ec681f3Smrg         A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front->depthFailOp)) |
29097ec681f3Smrg         A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back->compareOp)) |
29107ec681f3Smrg         A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back->failOp)) |
29117ec681f3Smrg         A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back->passOp)) |
29127ec681f3Smrg         A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back->depthFailOp));
29137ec681f3Smrg
29147ec681f3Smrg      if (ds_info->stencilTestEnable) {
29157ec681f3Smrg         rb_stencil_cntl |=
29167ec681f3Smrg            A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
29177ec681f3Smrg            A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
29187ec681f3Smrg            A6XX_RB_STENCIL_CONTROL_STENCIL_READ;
29197ec681f3Smrg      }
29207ec681f3Smrg   }
29217ec681f3Smrg
29227ec681f3Smrg   if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RB_DEPTH_CNTL, 2)) {
29237ec681f3Smrg      tu_cs_emit_pkt4(&cs, REG_A6XX_RB_DEPTH_CNTL, 1);
29247ec681f3Smrg      tu_cs_emit(&cs, rb_depth_cntl);
29257ec681f3Smrg   }
29267ec681f3Smrg   pipeline->rb_depth_cntl = rb_depth_cntl;
29277ec681f3Smrg
29287ec681f3Smrg   if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RB_STENCIL_CNTL, 2)) {
29297ec681f3Smrg      tu_cs_emit_pkt4(&cs, REG_A6XX_RB_STENCIL_CONTROL, 1);
29307ec681f3Smrg      tu_cs_emit(&cs, rb_stencil_cntl);
29317ec681f3Smrg   }
29327ec681f3Smrg   pipeline->rb_stencil_cntl = rb_stencil_cntl;
29337ec681f3Smrg
29347ec681f3Smrg   /* the remaining draw states arent used if there is no d/s, leave them empty */
29357ec681f3Smrg   if (builder->depth_attachment_format == VK_FORMAT_UNDEFINED)
29367ec681f3Smrg      return;
2937361fc4cbSmaya
29387ec681f3Smrg   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BOUNDS, 3)) {
29397ec681f3Smrg      tu_cs_emit_regs(&cs,
29407ec681f3Smrg                      A6XX_RB_Z_BOUNDS_MIN(ds_info->minDepthBounds),
29417ec681f3Smrg                      A6XX_RB_Z_BOUNDS_MAX(ds_info->maxDepthBounds));
29427ec681f3Smrg   }
2943361fc4cbSmaya
29447ec681f3Smrg   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 2)) {
29457ec681f3Smrg      tu_cs_emit_regs(&cs, A6XX_RB_STENCILMASK(.mask = ds_info->front.compareMask & 0xff,
29467ec681f3Smrg                                               .bfmask = ds_info->back.compareMask & 0xff));
2947361fc4cbSmaya   }
29487ec681f3Smrg
29497ec681f3Smrg   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, 2)) {
29507ec681f3Smrg      update_stencil_mask(&pipeline->stencil_wrmask,  VK_STENCIL_FACE_FRONT_BIT, ds_info->front.writeMask);
29517ec681f3Smrg      update_stencil_mask(&pipeline->stencil_wrmask,  VK_STENCIL_FACE_BACK_BIT, ds_info->back.writeMask);
29527ec681f3Smrg      tu_cs_emit_regs(&cs, A6XX_RB_STENCILWRMASK(.dword = pipeline->stencil_wrmask));
2953361fc4cbSmaya   }
29547ec681f3Smrg
29557ec681f3Smrg   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 2)) {
29567ec681f3Smrg      tu_cs_emit_regs(&cs, A6XX_RB_STENCILREF(.ref = ds_info->front.reference & 0xff,
29577ec681f3Smrg                                              .bfref = ds_info->back.reference & 0xff));
2958361fc4cbSmaya   }
2959361fc4cbSmaya
29607ec681f3Smrg   if (builder->shaders[MESA_SHADER_FRAGMENT]) {
29617ec681f3Smrg      const struct ir3_shader_variant *fs = &builder->shaders[MESA_SHADER_FRAGMENT]->ir3_shader->variants[0];
29627ec681f3Smrg      if (fs->has_kill || fs->no_earlyz || fs->writes_pos) {
29637ec681f3Smrg         pipeline->lrz.force_disable_mask |= TU_LRZ_FORCE_DISABLE_WRITE;
29647ec681f3Smrg      }
29657ec681f3Smrg      if (fs->no_earlyz || fs->writes_pos) {
29667ec681f3Smrg         pipeline->lrz.force_disable_mask = TU_LRZ_FORCE_DISABLE_LRZ;
29677ec681f3Smrg      }
29687ec681f3Smrg   }
2969361fc4cbSmaya}
2970361fc4cbSmaya
2971361fc4cbSmayastatic void
2972361fc4cbSmayatu_pipeline_builder_parse_multisample_and_color_blend(
2973361fc4cbSmaya   struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline)
2974361fc4cbSmaya{
2975361fc4cbSmaya   /* The spec says:
2976361fc4cbSmaya    *
2977361fc4cbSmaya    *    pMultisampleState is a pointer to an instance of the
2978361fc4cbSmaya    *    VkPipelineMultisampleStateCreateInfo, and is ignored if the pipeline
2979361fc4cbSmaya    *    has rasterization disabled.
2980361fc4cbSmaya    *
2981361fc4cbSmaya    * Also,
2982361fc4cbSmaya    *
2983361fc4cbSmaya    *    pColorBlendState is a pointer to an instance of the
2984361fc4cbSmaya    *    VkPipelineColorBlendStateCreateInfo structure, and is ignored if the
2985361fc4cbSmaya    *    pipeline has rasterization disabled or if the subpass of the render
2986361fc4cbSmaya    *    pass the pipeline is created against does not use any color
2987361fc4cbSmaya    *    attachments.
2988361fc4cbSmaya    *
2989361fc4cbSmaya    * We leave the relevant registers stale when rasterization is disabled.
2990361fc4cbSmaya    */
2991361fc4cbSmaya   if (builder->rasterizer_discard)
2992361fc4cbSmaya      return;
2993361fc4cbSmaya
2994361fc4cbSmaya   static const VkPipelineColorBlendStateCreateInfo dummy_blend_info;
2995361fc4cbSmaya   const VkPipelineMultisampleStateCreateInfo *msaa_info =
2996361fc4cbSmaya      builder->create_info->pMultisampleState;
2997361fc4cbSmaya   const VkPipelineColorBlendStateCreateInfo *blend_info =
2998361fc4cbSmaya      builder->use_color_attachments ? builder->create_info->pColorBlendState
2999361fc4cbSmaya                                     : &dummy_blend_info;
3000361fc4cbSmaya
30017ec681f3Smrg   struct tu_cs cs;
30027ec681f3Smrg   pipeline->blend_state =
30037ec681f3Smrg      tu_cs_draw_state(&pipeline->cs, &cs, blend_info->attachmentCount * 3 + 4);
3004361fc4cbSmaya
3005361fc4cbSmaya   uint32_t blend_enable_mask;
30067ec681f3Smrg   tu6_emit_rb_mrt_controls(&cs, blend_info,
3007361fc4cbSmaya                            builder->color_attachment_formats,
3008361fc4cbSmaya                            &blend_enable_mask);
3009361fc4cbSmaya
30107ec681f3Smrg   tu6_emit_blend_control(&cs, blend_enable_mask,
30117ec681f3Smrg                          builder->use_dual_src_blend, msaa_info);
30127ec681f3Smrg
30137ec681f3Smrg   assert(cs.cur == cs.end); /* validate draw state size */
30147ec681f3Smrg
30157ec681f3Smrg   if (blend_enable_mask) {
30167ec681f3Smrg      for (int i = 0; i < blend_info->attachmentCount; i++) {
30177ec681f3Smrg         VkPipelineColorBlendAttachmentState blendAttachment = blend_info->pAttachments[i];
30187ec681f3Smrg         /* Disable LRZ writes when blend is enabled, since the
30197ec681f3Smrg          * resulting pixel value from the blend-draw
30207ec681f3Smrg          * depends on an earlier draw, which LRZ in the draw pass
30217ec681f3Smrg          * could early-reject if the previous blend-enabled draw wrote LRZ.
30227ec681f3Smrg          *
30237ec681f3Smrg          * From the PoV of LRZ, having masked color channels is
30247ec681f3Smrg          * the same as having blend enabled, in that the draw will
30257ec681f3Smrg          * care about the fragments from an earlier draw.
30267ec681f3Smrg          *
30277ec681f3Smrg          * TODO: We need to disable LRZ writes only for the binning pass.
30287ec681f3Smrg          * Therefore, we need to emit it in a separate draw state. We keep
30297ec681f3Smrg          * it disabled for sysmem path as well for the moment.
30307ec681f3Smrg          */
30317ec681f3Smrg         if (blendAttachment.blendEnable || blendAttachment.colorWriteMask != 0xf) {
30327ec681f3Smrg            pipeline->lrz.force_disable_mask |= TU_LRZ_FORCE_DISABLE_WRITE;
30337ec681f3Smrg         }
30347ec681f3Smrg      }
30357ec681f3Smrg   }
30367ec681f3Smrg
30377ec681f3Smrg   if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 5)) {
30387ec681f3Smrg      tu_cs_emit_pkt4(&cs, REG_A6XX_RB_BLEND_RED_F32, 4);
30397ec681f3Smrg      tu_cs_emit_array(&cs, (const uint32_t *) blend_info->blendConstants, 4);
30407ec681f3Smrg   }
30417ec681f3Smrg
30427ec681f3Smrg   const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations =
30437ec681f3Smrg      vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
30447ec681f3Smrg   const VkSampleLocationsInfoEXT *samp_loc = NULL;
3045361fc4cbSmaya
30467ec681f3Smrg   if (sample_locations && sample_locations->sampleLocationsEnable)
30477ec681f3Smrg      samp_loc = &sample_locations->sampleLocationsInfo;
3048361fc4cbSmaya
30497ec681f3Smrg    if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_SAMPLE_LOCATIONS,
30507ec681f3Smrg                                 samp_loc ? 9 : 6)) {
30517ec681f3Smrg      tu6_emit_sample_locations(&cs, samp_loc);
30527ec681f3Smrg    }
3053361fc4cbSmaya}
3054361fc4cbSmaya
3055361fc4cbSmayastatic void
3056361fc4cbSmayatu_pipeline_finish(struct tu_pipeline *pipeline,
3057361fc4cbSmaya                   struct tu_device *dev,
3058361fc4cbSmaya                   const VkAllocationCallbacks *alloc)
3059361fc4cbSmaya{
30607ec681f3Smrg   tu_cs_finish(&pipeline->cs);
30617ec681f3Smrg
30627ec681f3Smrg   if (pipeline->pvtmem_bo.size)
30637ec681f3Smrg      tu_bo_finish(dev, &pipeline->pvtmem_bo);
3064361fc4cbSmaya
30657ec681f3Smrg   ralloc_free(pipeline->executables_mem_ctx);
3066361fc4cbSmaya}
3067361fc4cbSmaya
3068361fc4cbSmayastatic VkResult
3069361fc4cbSmayatu_pipeline_builder_build(struct tu_pipeline_builder *builder,
3070361fc4cbSmaya                          struct tu_pipeline **pipeline)
3071361fc4cbSmaya{
30727ec681f3Smrg   VkResult result;
30737ec681f3Smrg
30747ec681f3Smrg   *pipeline = vk_object_zalloc(&builder->device->vk, builder->alloc,
30757ec681f3Smrg                                sizeof(**pipeline), VK_OBJECT_TYPE_PIPELINE);
30767ec681f3Smrg   if (!*pipeline)
30777ec681f3Smrg      return VK_ERROR_OUT_OF_HOST_MEMORY;
30787ec681f3Smrg
30797ec681f3Smrg   (*pipeline)->layout = builder->layout;
30807ec681f3Smrg   (*pipeline)->executables_mem_ctx = ralloc_context(NULL);
30817ec681f3Smrg   util_dynarray_init(&(*pipeline)->executables, (*pipeline)->executables_mem_ctx);
3082361fc4cbSmaya
3083361fc4cbSmaya   /* compile and upload shaders */
30847ec681f3Smrg   result = tu_pipeline_builder_compile_shaders(builder, *pipeline);
30857ec681f3Smrg   if (result != VK_SUCCESS) {
30867ec681f3Smrg      vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
30877ec681f3Smrg      return result;
30887ec681f3Smrg   }
30897ec681f3Smrg
30907ec681f3Smrg   result = tu_pipeline_allocate_cs(builder->device, *pipeline, builder, NULL);
3091361fc4cbSmaya   if (result != VK_SUCCESS) {
30927ec681f3Smrg      vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
30937ec681f3Smrg      return result;
30947ec681f3Smrg   }
30957ec681f3Smrg
30967ec681f3Smrg   for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++)
30977ec681f3Smrg      builder->shader_iova[i] = tu_upload_variant(*pipeline, builder->variants[i]);
30987ec681f3Smrg
30997ec681f3Smrg   builder->binning_vs_iova =
31007ec681f3Smrg      tu_upload_variant(*pipeline, builder->binning_variant);
31017ec681f3Smrg
31027ec681f3Smrg   /* Setup private memory. Note that because we're sharing the same private
31037ec681f3Smrg    * memory for all stages, all stages must use the same config, or else
31047ec681f3Smrg    * fibers from one stage might overwrite fibers in another.
31057ec681f3Smrg    */
3106361fc4cbSmaya
31077ec681f3Smrg   uint32_t pvtmem_size = 0;
31087ec681f3Smrg   bool per_wave = true;
31097ec681f3Smrg   for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++) {
31107ec681f3Smrg      if (builder->variants[i]) {
31117ec681f3Smrg         pvtmem_size = MAX2(pvtmem_size, builder->variants[i]->pvtmem_size);
31127ec681f3Smrg         if (!builder->variants[i]->pvtmem_per_wave)
31137ec681f3Smrg            per_wave = false;
31147ec681f3Smrg      }
31157ec681f3Smrg   }
31167ec681f3Smrg
31177ec681f3Smrg   if (builder->binning_variant) {
31187ec681f3Smrg      pvtmem_size = MAX2(pvtmem_size, builder->binning_variant->pvtmem_size);
31197ec681f3Smrg      if (!builder->binning_variant->pvtmem_per_wave)
31207ec681f3Smrg         per_wave = false;
31217ec681f3Smrg   }
31227ec681f3Smrg
31237ec681f3Smrg   result = tu_setup_pvtmem(builder->device, *pipeline, &builder->pvtmem,
31247ec681f3Smrg                            pvtmem_size, per_wave);
31257ec681f3Smrg   if (result != VK_SUCCESS) {
31267ec681f3Smrg      vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
3127361fc4cbSmaya      return result;
3128361fc4cbSmaya   }
3129361fc4cbSmaya
3130361fc4cbSmaya   tu_pipeline_builder_parse_dynamic(builder, *pipeline);
3131361fc4cbSmaya   tu_pipeline_builder_parse_shader_stages(builder, *pipeline);
3132361fc4cbSmaya   tu_pipeline_builder_parse_vertex_input(builder, *pipeline);
3133361fc4cbSmaya   tu_pipeline_builder_parse_input_assembly(builder, *pipeline);
31347ec681f3Smrg   tu_pipeline_builder_parse_tessellation(builder, *pipeline);
3135361fc4cbSmaya   tu_pipeline_builder_parse_viewport(builder, *pipeline);
3136361fc4cbSmaya   tu_pipeline_builder_parse_rasterization(builder, *pipeline);
3137361fc4cbSmaya   tu_pipeline_builder_parse_depth_stencil(builder, *pipeline);
3138361fc4cbSmaya   tu_pipeline_builder_parse_multisample_and_color_blend(builder, *pipeline);
31397ec681f3Smrg   tu6_emit_load_state(*pipeline, false);
3140361fc4cbSmaya
3141361fc4cbSmaya   /* we should have reserved enough space upfront such that the CS never
3142361fc4cbSmaya    * grows
3143361fc4cbSmaya    */
3144361fc4cbSmaya   assert((*pipeline)->cs.bo_count == 1);
3145361fc4cbSmaya
3146361fc4cbSmaya   return VK_SUCCESS;
3147361fc4cbSmaya}
3148361fc4cbSmaya
3149361fc4cbSmayastatic void
3150361fc4cbSmayatu_pipeline_builder_finish(struct tu_pipeline_builder *builder)
3151361fc4cbSmaya{
31527ec681f3Smrg   for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders); i++) {
3153361fc4cbSmaya      if (!builder->shaders[i])
3154361fc4cbSmaya         continue;
3155361fc4cbSmaya      tu_shader_destroy(builder->device, builder->shaders[i], builder->alloc);
3156361fc4cbSmaya   }
3157361fc4cbSmaya}
3158361fc4cbSmaya
3159361fc4cbSmayastatic void
3160361fc4cbSmayatu_pipeline_builder_init_graphics(
3161361fc4cbSmaya   struct tu_pipeline_builder *builder,
3162361fc4cbSmaya   struct tu_device *dev,
3163361fc4cbSmaya   struct tu_pipeline_cache *cache,
3164361fc4cbSmaya   const VkGraphicsPipelineCreateInfo *create_info,
3165361fc4cbSmaya   const VkAllocationCallbacks *alloc)
3166361fc4cbSmaya{
31677ec681f3Smrg   TU_FROM_HANDLE(tu_pipeline_layout, layout, create_info->layout);
31687ec681f3Smrg
3169361fc4cbSmaya   *builder = (struct tu_pipeline_builder) {
3170361fc4cbSmaya      .device = dev,
3171361fc4cbSmaya      .cache = cache,
3172361fc4cbSmaya      .create_info = create_info,
3173361fc4cbSmaya      .alloc = alloc,
31747ec681f3Smrg      .layout = layout,
3175361fc4cbSmaya   };
3176361fc4cbSmaya
31777ec681f3Smrg   bool rasterizer_discard_dynamic = false;
31787ec681f3Smrg   if (create_info->pDynamicState) {
31797ec681f3Smrg      for (uint32_t i = 0; i < create_info->pDynamicState->dynamicStateCount; i++) {
31807ec681f3Smrg         if (create_info->pDynamicState->pDynamicStates[i] ==
31817ec681f3Smrg               VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT) {
31827ec681f3Smrg            rasterizer_discard_dynamic = true;
31837ec681f3Smrg            break;
31847ec681f3Smrg         }
31857ec681f3Smrg      }
31867ec681f3Smrg   }
31877ec681f3Smrg
31887ec681f3Smrg   const struct tu_render_pass *pass =
31897ec681f3Smrg      tu_render_pass_from_handle(create_info->renderPass);
31907ec681f3Smrg   const struct tu_subpass *subpass =
31917ec681f3Smrg      &pass->subpasses[create_info->subpass];
31927ec681f3Smrg
31937ec681f3Smrg   builder->multiview_mask = subpass->multiview_mask;
31947ec681f3Smrg
3195361fc4cbSmaya   builder->rasterizer_discard =
31967ec681f3Smrg      builder->create_info->pRasterizationState->rasterizerDiscardEnable &&
31977ec681f3Smrg      !rasterizer_discard_dynamic;
31987ec681f3Smrg
31997ec681f3Smrg   /* variableMultisampleRate support */
32007ec681f3Smrg   builder->emit_msaa_state = (subpass->samples == 0) && !builder->rasterizer_discard;
3201361fc4cbSmaya
3202361fc4cbSmaya   if (builder->rasterizer_discard) {
3203361fc4cbSmaya      builder->samples = VK_SAMPLE_COUNT_1_BIT;
3204361fc4cbSmaya   } else {
3205361fc4cbSmaya      builder->samples = create_info->pMultisampleState->rasterizationSamples;
32067ec681f3Smrg      builder->alpha_to_coverage = create_info->pMultisampleState->alphaToCoverageEnable;
3207361fc4cbSmaya
32087ec681f3Smrg      const uint32_t a = subpass->depth_stencil_attachment.attachment;
32097ec681f3Smrg      builder->depth_attachment_format = (a != VK_ATTACHMENT_UNUSED) ?
32107ec681f3Smrg         pass->attachments[a].format : VK_FORMAT_UNDEFINED;
3211361fc4cbSmaya
32127ec681f3Smrg      assert(subpass->color_count == 0 ||
32137ec681f3Smrg             !create_info->pColorBlendState ||
32147ec681f3Smrg             subpass->color_count == create_info->pColorBlendState->attachmentCount);
3215361fc4cbSmaya      builder->color_attachment_count = subpass->color_count;
3216361fc4cbSmaya      for (uint32_t i = 0; i < subpass->color_count; i++) {
3217361fc4cbSmaya         const uint32_t a = subpass->color_attachments[i].attachment;
3218361fc4cbSmaya         if (a == VK_ATTACHMENT_UNUSED)
3219361fc4cbSmaya            continue;
3220361fc4cbSmaya
3221361fc4cbSmaya         builder->color_attachment_formats[i] = pass->attachments[a].format;
3222361fc4cbSmaya         builder->use_color_attachments = true;
32237ec681f3Smrg         builder->render_components |= 0xf << (i * 4);
32247ec681f3Smrg      }
32257ec681f3Smrg
32267ec681f3Smrg      if (tu_blend_state_is_dual_src(create_info->pColorBlendState)) {
32277ec681f3Smrg         builder->color_attachment_count++;
32287ec681f3Smrg         builder->use_dual_src_blend = true;
32297ec681f3Smrg         /* dual source blending has an extra fs output in the 2nd slot */
32307ec681f3Smrg         if (subpass->color_attachments[0].attachment != VK_ATTACHMENT_UNUSED)
32317ec681f3Smrg            builder->render_components |= 0xf << 4;
3232361fc4cbSmaya      }
3233361fc4cbSmaya   }
3234361fc4cbSmaya}
3235361fc4cbSmaya
32367ec681f3Smrgstatic VkResult
32377ec681f3Smrgtu_graphics_pipeline_create(VkDevice device,
32387ec681f3Smrg                            VkPipelineCache pipelineCache,
32397ec681f3Smrg                            const VkGraphicsPipelineCreateInfo *pCreateInfo,
32407ec681f3Smrg                            const VkAllocationCallbacks *pAllocator,
32417ec681f3Smrg                            VkPipeline *pPipeline)
32427ec681f3Smrg{
32437ec681f3Smrg   TU_FROM_HANDLE(tu_device, dev, device);
32447ec681f3Smrg   TU_FROM_HANDLE(tu_pipeline_cache, cache, pipelineCache);
32457ec681f3Smrg
32467ec681f3Smrg   struct tu_pipeline_builder builder;
32477ec681f3Smrg   tu_pipeline_builder_init_graphics(&builder, dev, cache,
32487ec681f3Smrg                                     pCreateInfo, pAllocator);
32497ec681f3Smrg
32507ec681f3Smrg   struct tu_pipeline *pipeline = NULL;
32517ec681f3Smrg   VkResult result = tu_pipeline_builder_build(&builder, &pipeline);
32527ec681f3Smrg   tu_pipeline_builder_finish(&builder);
32537ec681f3Smrg
32547ec681f3Smrg   if (result == VK_SUCCESS)
32557ec681f3Smrg      *pPipeline = tu_pipeline_to_handle(pipeline);
32567ec681f3Smrg   else
32577ec681f3Smrg      *pPipeline = VK_NULL_HANDLE;
32587ec681f3Smrg
32597ec681f3Smrg   return result;
32607ec681f3Smrg}
32617ec681f3Smrg
32627ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
3263361fc4cbSmayatu_CreateGraphicsPipelines(VkDevice device,
3264361fc4cbSmaya                           VkPipelineCache pipelineCache,
3265361fc4cbSmaya                           uint32_t count,
3266361fc4cbSmaya                           const VkGraphicsPipelineCreateInfo *pCreateInfos,
3267361fc4cbSmaya                           const VkAllocationCallbacks *pAllocator,
3268361fc4cbSmaya                           VkPipeline *pPipelines)
3269361fc4cbSmaya{
32707ec681f3Smrg   VkResult final_result = VK_SUCCESS;
3271361fc4cbSmaya
3272361fc4cbSmaya   for (uint32_t i = 0; i < count; i++) {
32737ec681f3Smrg      VkResult result = tu_graphics_pipeline_create(device, pipelineCache,
32747ec681f3Smrg                                                    &pCreateInfos[i], pAllocator,
32757ec681f3Smrg                                                    &pPipelines[i]);
3276361fc4cbSmaya
32777ec681f3Smrg      if (result != VK_SUCCESS)
32787ec681f3Smrg         final_result = result;
3279361fc4cbSmaya   }
3280361fc4cbSmaya
32817ec681f3Smrg   return final_result;
3282361fc4cbSmaya}
3283361fc4cbSmaya
3284361fc4cbSmayastatic VkResult
32857ec681f3Smrgtu_compute_pipeline_create(VkDevice device,
3286361fc4cbSmaya                           VkPipelineCache _cache,
3287361fc4cbSmaya                           const VkComputePipelineCreateInfo *pCreateInfo,
3288361fc4cbSmaya                           const VkAllocationCallbacks *pAllocator,
3289361fc4cbSmaya                           VkPipeline *pPipeline)
3290361fc4cbSmaya{
32917ec681f3Smrg   TU_FROM_HANDLE(tu_device, dev, device);
32927ec681f3Smrg   TU_FROM_HANDLE(tu_pipeline_layout, layout, pCreateInfo->layout);
32937ec681f3Smrg   const VkPipelineShaderStageCreateInfo *stage_info = &pCreateInfo->stage;
32947ec681f3Smrg   VkResult result;
32957ec681f3Smrg
32967ec681f3Smrg   struct tu_pipeline *pipeline;
32977ec681f3Smrg
32987ec681f3Smrg   *pPipeline = VK_NULL_HANDLE;
32997ec681f3Smrg
33007ec681f3Smrg   pipeline = vk_object_zalloc(&dev->vk, pAllocator, sizeof(*pipeline),
33017ec681f3Smrg                               VK_OBJECT_TYPE_PIPELINE);
33027ec681f3Smrg   if (!pipeline)
33037ec681f3Smrg      return VK_ERROR_OUT_OF_HOST_MEMORY;
33047ec681f3Smrg
33057ec681f3Smrg   pipeline->layout = layout;
33067ec681f3Smrg
33077ec681f3Smrg   pipeline->executables_mem_ctx = ralloc_context(NULL);
33087ec681f3Smrg   util_dynarray_init(&pipeline->executables, pipeline->executables_mem_ctx);
33097ec681f3Smrg
33107ec681f3Smrg   struct ir3_shader_key key = {};
33117ec681f3Smrg
33127ec681f3Smrg   nir_shader *nir = tu_spirv_to_nir(dev, stage_info, MESA_SHADER_COMPUTE);
33137ec681f3Smrg
33147ec681f3Smrg   const bool executable_info = pCreateInfo->flags &
33157ec681f3Smrg      VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
33167ec681f3Smrg
33177ec681f3Smrg   char *nir_initial_disasm = executable_info ?
33187ec681f3Smrg      nir_shader_as_str(nir, pipeline->executables_mem_ctx) : NULL;
33197ec681f3Smrg
33207ec681f3Smrg   struct tu_shader *shader =
33217ec681f3Smrg      tu_shader_create(dev, nir, 0, layout, pAllocator);
33227ec681f3Smrg   if (!shader) {
33237ec681f3Smrg      result = VK_ERROR_OUT_OF_HOST_MEMORY;
33247ec681f3Smrg      goto fail;
33257ec681f3Smrg   }
33267ec681f3Smrg
33277ec681f3Smrg   pipeline->active_desc_sets = shader->active_desc_sets;
33287ec681f3Smrg
33297ec681f3Smrg   bool created;
33307ec681f3Smrg   struct ir3_shader_variant *v =
33317ec681f3Smrg      ir3_shader_get_variant(shader->ir3_shader, &key, false, executable_info, &created);
33327ec681f3Smrg   if (!v) {
33337ec681f3Smrg      result = VK_ERROR_OUT_OF_HOST_MEMORY;
33347ec681f3Smrg      goto fail;
33357ec681f3Smrg   }
33367ec681f3Smrg
33377ec681f3Smrg   tu_pipeline_set_linkage(&pipeline->program.link[MESA_SHADER_COMPUTE],
33387ec681f3Smrg                           shader, v);
33397ec681f3Smrg
33407ec681f3Smrg   result = tu_pipeline_allocate_cs(dev, pipeline, NULL, v);
33417ec681f3Smrg   if (result != VK_SUCCESS)
33427ec681f3Smrg      goto fail;
33437ec681f3Smrg
33447ec681f3Smrg   uint64_t shader_iova = tu_upload_variant(pipeline, v);
33457ec681f3Smrg
33467ec681f3Smrg   struct tu_pvtmem_config pvtmem;
33477ec681f3Smrg   tu_setup_pvtmem(dev, pipeline, &pvtmem, v->pvtmem_size, v->pvtmem_per_wave);
33487ec681f3Smrg
33497ec681f3Smrg   for (int i = 0; i < 3; i++)
33507ec681f3Smrg      pipeline->compute.local_size[i] = v->local_size[i];
33517ec681f3Smrg
33527ec681f3Smrg   pipeline->compute.subgroup_size = v->info.double_threadsize ? 128 : 64;
33537ec681f3Smrg
33547ec681f3Smrg   struct tu_cs prog_cs;
33557ec681f3Smrg   uint32_t additional_reserve_size = tu_xs_get_additional_cs_size_dwords(v);
33567ec681f3Smrg   tu_cs_begin_sub_stream(&pipeline->cs, 64 + additional_reserve_size, &prog_cs);
33577ec681f3Smrg   tu6_emit_cs_config(&prog_cs, shader, v, &pvtmem, shader_iova);
33587ec681f3Smrg   pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
33597ec681f3Smrg
33607ec681f3Smrg   tu6_emit_load_state(pipeline, true);
33617ec681f3Smrg
33627ec681f3Smrg   tu_append_executable(pipeline, v, nir_initial_disasm);
33637ec681f3Smrg
33647ec681f3Smrg   tu_shader_destroy(dev, shader, pAllocator);
33657ec681f3Smrg
33667ec681f3Smrg   *pPipeline = tu_pipeline_to_handle(pipeline);
33677ec681f3Smrg
3368361fc4cbSmaya   return VK_SUCCESS;
33697ec681f3Smrg
33707ec681f3Smrgfail:
33717ec681f3Smrg   if (shader)
33727ec681f3Smrg      tu_shader_destroy(dev, shader, pAllocator);
33737ec681f3Smrg
33747ec681f3Smrg   vk_object_free(&dev->vk, pAllocator, pipeline);
33757ec681f3Smrg
33767ec681f3Smrg   return result;
3377361fc4cbSmaya}
3378361fc4cbSmaya
33797ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
33807ec681f3Smrgtu_CreateComputePipelines(VkDevice device,
3381361fc4cbSmaya                          VkPipelineCache pipelineCache,
3382361fc4cbSmaya                          uint32_t count,
3383361fc4cbSmaya                          const VkComputePipelineCreateInfo *pCreateInfos,
3384361fc4cbSmaya                          const VkAllocationCallbacks *pAllocator,
3385361fc4cbSmaya                          VkPipeline *pPipelines)
3386361fc4cbSmaya{
33877ec681f3Smrg   VkResult final_result = VK_SUCCESS;
3388361fc4cbSmaya
33897ec681f3Smrg   for (uint32_t i = 0; i < count; i++) {
33907ec681f3Smrg      VkResult result = tu_compute_pipeline_create(device, pipelineCache,
33917ec681f3Smrg                                                   &pCreateInfos[i],
33927ec681f3Smrg                                                   pAllocator, &pPipelines[i]);
33937ec681f3Smrg      if (result != VK_SUCCESS)
33947ec681f3Smrg         final_result = result;
3395361fc4cbSmaya   }
3396361fc4cbSmaya
33977ec681f3Smrg   return final_result;
3398361fc4cbSmaya}
3399361fc4cbSmaya
34007ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
3401361fc4cbSmayatu_DestroyPipeline(VkDevice _device,
3402361fc4cbSmaya                   VkPipeline _pipeline,
3403361fc4cbSmaya                   const VkAllocationCallbacks *pAllocator)
3404361fc4cbSmaya{
3405361fc4cbSmaya   TU_FROM_HANDLE(tu_device, dev, _device);
3406361fc4cbSmaya   TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline);
3407361fc4cbSmaya
3408361fc4cbSmaya   if (!_pipeline)
3409361fc4cbSmaya      return;
3410361fc4cbSmaya
3411361fc4cbSmaya   tu_pipeline_finish(pipeline, dev, pAllocator);
34127ec681f3Smrg   vk_object_free(&dev->vk, pAllocator, pipeline);
34137ec681f3Smrg}
34147ec681f3Smrg
34157ec681f3Smrg#define WRITE_STR(field, ...) ({                                \
34167ec681f3Smrg   memset(field, 0, sizeof(field));                             \
34177ec681f3Smrg   UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \
34187ec681f3Smrg   assert(_i > 0 && _i < sizeof(field));                        \
34197ec681f3Smrg})
34207ec681f3Smrg
34217ec681f3Smrgstatic const struct tu_pipeline_executable *
34227ec681f3Smrgtu_pipeline_get_executable(struct tu_pipeline *pipeline, uint32_t index)
34237ec681f3Smrg{
34247ec681f3Smrg   assert(index < util_dynarray_num_elements(&pipeline->executables,
34257ec681f3Smrg                                             struct tu_pipeline_executable));
34267ec681f3Smrg   return util_dynarray_element(
34277ec681f3Smrg      &pipeline->executables, struct tu_pipeline_executable, index);
34287ec681f3Smrg}
34297ec681f3Smrg
34307ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
34317ec681f3Smrgtu_GetPipelineExecutablePropertiesKHR(
34327ec681f3Smrg      VkDevice _device,
34337ec681f3Smrg      const VkPipelineInfoKHR* pPipelineInfo,
34347ec681f3Smrg      uint32_t* pExecutableCount,
34357ec681f3Smrg      VkPipelineExecutablePropertiesKHR* pProperties)
34367ec681f3Smrg{
34377ec681f3Smrg   TU_FROM_HANDLE(tu_device, dev, _device);
34387ec681f3Smrg   TU_FROM_HANDLE(tu_pipeline, pipeline, pPipelineInfo->pipeline);
34397ec681f3Smrg   VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);
34407ec681f3Smrg
34417ec681f3Smrg   util_dynarray_foreach (&pipeline->executables, struct tu_pipeline_executable, exe) {
34427ec681f3Smrg      vk_outarray_append(&out, props) {
34437ec681f3Smrg         gl_shader_stage stage = exe->stage;
34447ec681f3Smrg         props->stages = mesa_to_vk_shader_stage(stage);
34457ec681f3Smrg
34467ec681f3Smrg         if (!exe->is_binning)
34477ec681f3Smrg            WRITE_STR(props->name, "%s", _mesa_shader_stage_to_abbrev(stage));
34487ec681f3Smrg         else
34497ec681f3Smrg            WRITE_STR(props->name, "Binning VS");
34507ec681f3Smrg
34517ec681f3Smrg         WRITE_STR(props->description, "%s", _mesa_shader_stage_to_string(stage));
34527ec681f3Smrg
34537ec681f3Smrg         props->subgroupSize =
34547ec681f3Smrg            dev->compiler->threadsize_base * (exe->stats.double_threadsize ? 2 : 1);
34557ec681f3Smrg      }
34567ec681f3Smrg   }
34577ec681f3Smrg
34587ec681f3Smrg   return vk_outarray_status(&out);
34597ec681f3Smrg}
34607ec681f3Smrg
34617ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
34627ec681f3Smrgtu_GetPipelineExecutableStatisticsKHR(
34637ec681f3Smrg      VkDevice _device,
34647ec681f3Smrg      const VkPipelineExecutableInfoKHR* pExecutableInfo,
34657ec681f3Smrg      uint32_t* pStatisticCount,
34667ec681f3Smrg      VkPipelineExecutableStatisticKHR* pStatistics)
34677ec681f3Smrg{
34687ec681f3Smrg   TU_FROM_HANDLE(tu_pipeline, pipeline, pExecutableInfo->pipeline);
34697ec681f3Smrg   VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);
34707ec681f3Smrg
34717ec681f3Smrg   const struct tu_pipeline_executable *exe =
34727ec681f3Smrg      tu_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
34737ec681f3Smrg
34747ec681f3Smrg   vk_outarray_append(&out, stat) {
34757ec681f3Smrg      WRITE_STR(stat->name, "Max Waves Per Core");
34767ec681f3Smrg      WRITE_STR(stat->description,
34777ec681f3Smrg                "Maximum number of simultaneous waves per core.");
34787ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
34797ec681f3Smrg      stat->value.u64 = exe->stats.max_waves;
34807ec681f3Smrg   }
34817ec681f3Smrg
34827ec681f3Smrg   vk_outarray_append(&out, stat) {
34837ec681f3Smrg      WRITE_STR(stat->name, "Instruction Count");
34847ec681f3Smrg      WRITE_STR(stat->description,
34857ec681f3Smrg                "Total number of IR3 instructions in the final generated "
34867ec681f3Smrg                "shader executable.");
34877ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
34887ec681f3Smrg      stat->value.u64 = exe->stats.instrs_count;
34897ec681f3Smrg   }
34907ec681f3Smrg
34917ec681f3Smrg   vk_outarray_append(&out, stat) {
34927ec681f3Smrg      WRITE_STR(stat->name, "NOPs Count");
34937ec681f3Smrg      WRITE_STR(stat->description,
34947ec681f3Smrg                "Number of NOP instructions in the final generated "
34957ec681f3Smrg                "shader executable.");
34967ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
34977ec681f3Smrg      stat->value.u64 = exe->stats.nops_count;
34987ec681f3Smrg   }
34997ec681f3Smrg
35007ec681f3Smrg   vk_outarray_append(&out, stat) {
35017ec681f3Smrg      WRITE_STR(stat->name, "MOV Count");
35027ec681f3Smrg      WRITE_STR(stat->description,
35037ec681f3Smrg                "Number of MOV instructions in the final generated "
35047ec681f3Smrg                "shader executable.");
35057ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
35067ec681f3Smrg      stat->value.u64 = exe->stats.mov_count;
35077ec681f3Smrg   }
35087ec681f3Smrg
35097ec681f3Smrg   vk_outarray_append(&out, stat) {
35107ec681f3Smrg      WRITE_STR(stat->name, "COV Count");
35117ec681f3Smrg      WRITE_STR(stat->description,
35127ec681f3Smrg                "Number of COV instructions in the final generated "
35137ec681f3Smrg                "shader executable.");
35147ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
35157ec681f3Smrg      stat->value.u64 = exe->stats.cov_count;
35167ec681f3Smrg   }
35177ec681f3Smrg
35187ec681f3Smrg   vk_outarray_append(&out, stat) {
35197ec681f3Smrg      WRITE_STR(stat->name, "Registers used");
35207ec681f3Smrg      WRITE_STR(stat->description,
35217ec681f3Smrg                "Number of registers used in the final generated "
35227ec681f3Smrg                "shader executable.");
35237ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
35247ec681f3Smrg      stat->value.u64 = exe->stats.max_reg + 1;
35257ec681f3Smrg   }
35267ec681f3Smrg
35277ec681f3Smrg   vk_outarray_append(&out, stat) {
35287ec681f3Smrg      WRITE_STR(stat->name, "Half-registers used");
35297ec681f3Smrg      WRITE_STR(stat->description,
35307ec681f3Smrg                "Number of half-registers used in the final generated "
35317ec681f3Smrg                "shader executable.");
35327ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
35337ec681f3Smrg      stat->value.u64 = exe->stats.max_half_reg + 1;
35347ec681f3Smrg   }
35357ec681f3Smrg
35367ec681f3Smrg   vk_outarray_append(&out, stat) {
35377ec681f3Smrg      WRITE_STR(stat->name, "Instructions with SS sync bit");
35387ec681f3Smrg      WRITE_STR(stat->description,
35397ec681f3Smrg                "SS bit is set for instructions which depend on a result "
35407ec681f3Smrg                "of \"long\" instructions to prevent RAW hazard.");
35417ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
35427ec681f3Smrg      stat->value.u64 = exe->stats.ss;
35437ec681f3Smrg   }
35447ec681f3Smrg
35457ec681f3Smrg   vk_outarray_append(&out, stat) {
35467ec681f3Smrg      WRITE_STR(stat->name, "Instructions with SY sync bit");
35477ec681f3Smrg      WRITE_STR(stat->description,
35487ec681f3Smrg                "SY bit is set for instructions which depend on a result "
35497ec681f3Smrg                "of loads from global memory to prevent RAW hazard.");
35507ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
35517ec681f3Smrg      stat->value.u64 = exe->stats.sy;
35527ec681f3Smrg   }
35537ec681f3Smrg
35547ec681f3Smrg   vk_outarray_append(&out, stat) {
35557ec681f3Smrg      WRITE_STR(stat->name, "Estimated cycles stalled on SS");
35567ec681f3Smrg      WRITE_STR(stat->description,
35577ec681f3Smrg                "A better metric to estimate the impact of SS syncs.");
35587ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
35597ec681f3Smrg      stat->value.u64 = exe->stats.sstall;
35607ec681f3Smrg   }
35617ec681f3Smrg
35627ec681f3Smrg   for (int i = 0; i < ARRAY_SIZE(exe->stats.instrs_per_cat); i++) {
35637ec681f3Smrg      vk_outarray_append(&out, stat) {
35647ec681f3Smrg         WRITE_STR(stat->name, "cat%d instructions", i);
35657ec681f3Smrg         WRITE_STR(stat->description,
35667ec681f3Smrg                  "Number of cat%d instructions.", i);
35677ec681f3Smrg         stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
35687ec681f3Smrg         stat->value.u64 = exe->stats.instrs_per_cat[i];
35697ec681f3Smrg      }
35707ec681f3Smrg   }
35717ec681f3Smrg
35727ec681f3Smrg   vk_outarray_append(&out, stat) {
35737ec681f3Smrg      WRITE_STR(stat->name, "STP Count");
35747ec681f3Smrg      WRITE_STR(stat->description,
35757ec681f3Smrg                "Number of STore Private instructions in the final generated "
35767ec681f3Smrg                "shader executable.");
35777ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
35787ec681f3Smrg      stat->value.u64 = exe->stats.stp_count;
35797ec681f3Smrg   }
35807ec681f3Smrg
35817ec681f3Smrg   vk_outarray_append(&out, stat) {
35827ec681f3Smrg      WRITE_STR(stat->name, "LDP Count");
35837ec681f3Smrg      WRITE_STR(stat->description,
35847ec681f3Smrg                "Number of LoaD Private instructions in the final generated "
35857ec681f3Smrg                "shader executable.");
35867ec681f3Smrg      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
35877ec681f3Smrg      stat->value.u64 = exe->stats.ldp_count;
35887ec681f3Smrg   }
35897ec681f3Smrg
35907ec681f3Smrg   return vk_outarray_status(&out);
35917ec681f3Smrg}
35927ec681f3Smrg
35937ec681f3Smrgstatic bool
35947ec681f3Smrgwrite_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
35957ec681f3Smrg              const char *data)
35967ec681f3Smrg{
35977ec681f3Smrg   ir->isText = VK_TRUE;
35987ec681f3Smrg
35997ec681f3Smrg   size_t data_len = strlen(data) + 1;
36007ec681f3Smrg
36017ec681f3Smrg   if (ir->pData == NULL) {
36027ec681f3Smrg      ir->dataSize = data_len;
36037ec681f3Smrg      return true;
36047ec681f3Smrg   }
36057ec681f3Smrg
36067ec681f3Smrg   strncpy(ir->pData, data, ir->dataSize);
36077ec681f3Smrg   if (ir->dataSize < data_len)
36087ec681f3Smrg      return false;
36097ec681f3Smrg
36107ec681f3Smrg   ir->dataSize = data_len;
36117ec681f3Smrg   return true;
36127ec681f3Smrg}
36137ec681f3Smrg
36147ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
36157ec681f3Smrgtu_GetPipelineExecutableInternalRepresentationsKHR(
36167ec681f3Smrg    VkDevice _device,
36177ec681f3Smrg    const VkPipelineExecutableInfoKHR* pExecutableInfo,
36187ec681f3Smrg    uint32_t* pInternalRepresentationCount,
36197ec681f3Smrg    VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
36207ec681f3Smrg{
36217ec681f3Smrg   TU_FROM_HANDLE(tu_pipeline, pipeline, pExecutableInfo->pipeline);
36227ec681f3Smrg   VK_OUTARRAY_MAKE(out, pInternalRepresentations, pInternalRepresentationCount);
36237ec681f3Smrg   bool incomplete_text = false;
36247ec681f3Smrg
36257ec681f3Smrg   const struct tu_pipeline_executable *exe =
36267ec681f3Smrg      tu_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
36277ec681f3Smrg
36287ec681f3Smrg   if (exe->nir_from_spirv) {
36297ec681f3Smrg      vk_outarray_append(&out, ir) {
36307ec681f3Smrg         WRITE_STR(ir->name, "NIR from SPIRV");
36317ec681f3Smrg         WRITE_STR(ir->description,
36327ec681f3Smrg                   "Initial NIR before any optimizations");
36337ec681f3Smrg
36347ec681f3Smrg         if (!write_ir_text(ir, exe->nir_from_spirv))
36357ec681f3Smrg            incomplete_text = true;
36367ec681f3Smrg      }
36377ec681f3Smrg   }
36387ec681f3Smrg
36397ec681f3Smrg   if (exe->nir_final) {
36407ec681f3Smrg      vk_outarray_append(&out, ir) {
36417ec681f3Smrg         WRITE_STR(ir->name, "Final NIR");
36427ec681f3Smrg         WRITE_STR(ir->description,
36437ec681f3Smrg                   "Final NIR before going into the back-end compiler");
36447ec681f3Smrg
36457ec681f3Smrg         if (!write_ir_text(ir, exe->nir_final))
36467ec681f3Smrg            incomplete_text = true;
36477ec681f3Smrg      }
36487ec681f3Smrg   }
36497ec681f3Smrg
36507ec681f3Smrg   if (exe->disasm) {
36517ec681f3Smrg      vk_outarray_append(&out, ir) {
36527ec681f3Smrg         WRITE_STR(ir->name, "IR3 Assembly");
36537ec681f3Smrg         WRITE_STR(ir->description,
36547ec681f3Smrg                   "Final IR3 assembly for the generated shader binary");
36557ec681f3Smrg
36567ec681f3Smrg         if (!write_ir_text(ir, exe->disasm))
36577ec681f3Smrg            incomplete_text = true;
36587ec681f3Smrg      }
36597ec681f3Smrg   }
36607ec681f3Smrg
36617ec681f3Smrg   return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
3662361fc4cbSmaya}
3663