1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2016 Red Hat. 3b8e80941Smrg * Copyright © 2016 Bas Nieuwenhuizen 4b8e80941Smrg * 5b8e80941Smrg * based in part on anv driver which is: 6b8e80941Smrg * Copyright © 2015 Intel Corporation 7b8e80941Smrg * 8b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 9b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 10b8e80941Smrg * to deal in the Software without restriction, including without limitation 11b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 13b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 14b8e80941Smrg * 15b8e80941Smrg * The above copyright notice and this permission notice (including the next 16b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 17b8e80941Smrg * Software. 18b8e80941Smrg * 19b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25b8e80941Smrg * DEALINGS IN THE SOFTWARE. 26b8e80941Smrg */ 27b8e80941Smrg 28b8e80941Smrg#include "tu_private.h" 29b8e80941Smrg 30b8e80941Smrg#include "main/menums.h" 31b8e80941Smrg#include "nir/nir.h" 32b8e80941Smrg#include "nir/nir_builder.h" 33b8e80941Smrg#include "spirv/nir_spirv.h" 34b8e80941Smrg#include "util/debug.h" 35b8e80941Smrg#include "util/mesa-sha1.h" 36b8e80941Smrg#include "util/u_atomic.h" 37b8e80941Smrg#include "vk_format.h" 38b8e80941Smrg#include "vk_util.h" 39b8e80941Smrg 40b8e80941Smrg#include "tu_cs.h" 41b8e80941Smrg 42b8e80941Smrgstruct tu_pipeline_builder 43b8e80941Smrg{ 44b8e80941Smrg struct tu_device *device; 45b8e80941Smrg struct tu_pipeline_cache *cache; 46b8e80941Smrg const VkAllocationCallbacks *alloc; 47b8e80941Smrg const VkGraphicsPipelineCreateInfo *create_info; 48b8e80941Smrg 49b8e80941Smrg struct tu_shader *shaders[MESA_SHADER_STAGES]; 50b8e80941Smrg uint32_t shader_offsets[MESA_SHADER_STAGES]; 51b8e80941Smrg uint32_t binning_vs_offset; 52b8e80941Smrg uint32_t shader_total_size; 53b8e80941Smrg 54b8e80941Smrg bool rasterizer_discard; 55b8e80941Smrg /* these states are affectd by rasterizer_discard */ 56b8e80941Smrg VkSampleCountFlagBits samples; 57b8e80941Smrg bool use_depth_stencil_attachment; 58b8e80941Smrg bool use_color_attachments; 59b8e80941Smrg uint32_t color_attachment_count; 60b8e80941Smrg VkFormat color_attachment_formats[MAX_RTS]; 61b8e80941Smrg}; 62b8e80941Smrg 63b8e80941Smrgstatic enum tu_dynamic_state_bits 64b8e80941Smrgtu_dynamic_state_bit(VkDynamicState state) 65b8e80941Smrg{ 66b8e80941Smrg switch (state) { 67b8e80941Smrg case VK_DYNAMIC_STATE_VIEWPORT: 68b8e80941Smrg return TU_DYNAMIC_VIEWPORT; 69b8e80941Smrg case VK_DYNAMIC_STATE_SCISSOR: 70b8e80941Smrg return TU_DYNAMIC_SCISSOR; 71b8e80941Smrg case VK_DYNAMIC_STATE_LINE_WIDTH: 72b8e80941Smrg return TU_DYNAMIC_LINE_WIDTH; 73b8e80941Smrg case VK_DYNAMIC_STATE_DEPTH_BIAS: 74b8e80941Smrg return TU_DYNAMIC_DEPTH_BIAS; 75b8e80941Smrg case VK_DYNAMIC_STATE_BLEND_CONSTANTS: 76b8e80941Smrg return TU_DYNAMIC_BLEND_CONSTANTS; 77b8e80941Smrg case VK_DYNAMIC_STATE_DEPTH_BOUNDS: 78b8e80941Smrg return TU_DYNAMIC_DEPTH_BOUNDS; 79b8e80941Smrg case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: 80b8e80941Smrg return TU_DYNAMIC_STENCIL_COMPARE_MASK; 81b8e80941Smrg case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: 82b8e80941Smrg return TU_DYNAMIC_STENCIL_WRITE_MASK; 83b8e80941Smrg case VK_DYNAMIC_STATE_STENCIL_REFERENCE: 84b8e80941Smrg return TU_DYNAMIC_STENCIL_REFERENCE; 85b8e80941Smrg default: 86b8e80941Smrg unreachable("invalid dynamic state"); 87b8e80941Smrg return 0; 88b8e80941Smrg } 89b8e80941Smrg} 90b8e80941Smrg 91b8e80941Smrgstatic gl_shader_stage 92b8e80941Smrgtu_shader_stage(VkShaderStageFlagBits stage) 93b8e80941Smrg{ 94b8e80941Smrg switch (stage) { 95b8e80941Smrg case VK_SHADER_STAGE_VERTEX_BIT: 96b8e80941Smrg return MESA_SHADER_VERTEX; 97b8e80941Smrg case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: 98b8e80941Smrg return MESA_SHADER_TESS_CTRL; 99b8e80941Smrg case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: 100b8e80941Smrg return MESA_SHADER_TESS_EVAL; 101b8e80941Smrg case VK_SHADER_STAGE_GEOMETRY_BIT: 102b8e80941Smrg return MESA_SHADER_GEOMETRY; 103b8e80941Smrg case VK_SHADER_STAGE_FRAGMENT_BIT: 104b8e80941Smrg return MESA_SHADER_FRAGMENT; 105b8e80941Smrg case VK_SHADER_STAGE_COMPUTE_BIT: 106b8e80941Smrg return MESA_SHADER_COMPUTE; 107b8e80941Smrg default: 108b8e80941Smrg unreachable("invalid VkShaderStageFlagBits"); 109b8e80941Smrg return MESA_SHADER_NONE; 110b8e80941Smrg } 111b8e80941Smrg} 112b8e80941Smrg 113b8e80941Smrgstatic const VkVertexInputAttributeDescription * 114b8e80941Smrgtu_find_vertex_input_attribute( 115b8e80941Smrg const VkPipelineVertexInputStateCreateInfo *vi_info, uint32_t slot) 116b8e80941Smrg{ 117b8e80941Smrg assert(slot >= VERT_ATTRIB_GENERIC0); 118b8e80941Smrg slot -= VERT_ATTRIB_GENERIC0; 119b8e80941Smrg for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { 120b8e80941Smrg if (vi_info->pVertexAttributeDescriptions[i].location == slot) 121b8e80941Smrg return &vi_info->pVertexAttributeDescriptions[i]; 122b8e80941Smrg } 123b8e80941Smrg return NULL; 124b8e80941Smrg} 125b8e80941Smrg 126b8e80941Smrgstatic const VkVertexInputBindingDescription * 127b8e80941Smrgtu_find_vertex_input_binding( 128b8e80941Smrg const VkPipelineVertexInputStateCreateInfo *vi_info, 129b8e80941Smrg const VkVertexInputAttributeDescription *vi_attr) 130b8e80941Smrg{ 131b8e80941Smrg assert(vi_attr); 132b8e80941Smrg for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { 133b8e80941Smrg if (vi_info->pVertexBindingDescriptions[i].binding == vi_attr->binding) 134b8e80941Smrg return &vi_info->pVertexBindingDescriptions[i]; 135b8e80941Smrg } 136b8e80941Smrg return NULL; 137b8e80941Smrg} 138b8e80941Smrg 139b8e80941Smrgstatic bool 140b8e80941Smrgtu_logic_op_reads_dst(VkLogicOp op) 141b8e80941Smrg{ 142b8e80941Smrg switch (op) { 143b8e80941Smrg case VK_LOGIC_OP_CLEAR: 144b8e80941Smrg case VK_LOGIC_OP_COPY: 145b8e80941Smrg case VK_LOGIC_OP_COPY_INVERTED: 146b8e80941Smrg case VK_LOGIC_OP_SET: 147b8e80941Smrg return false; 148b8e80941Smrg default: 149b8e80941Smrg return true; 150b8e80941Smrg } 151b8e80941Smrg} 152b8e80941Smrg 153b8e80941Smrgstatic VkBlendFactor 154b8e80941Smrgtu_blend_factor_no_dst_alpha(VkBlendFactor factor) 155b8e80941Smrg{ 156b8e80941Smrg /* treat dst alpha as 1.0 and avoid reading it */ 157b8e80941Smrg switch (factor) { 158b8e80941Smrg case VK_BLEND_FACTOR_DST_ALPHA: 159b8e80941Smrg return VK_BLEND_FACTOR_ONE; 160b8e80941Smrg case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: 161b8e80941Smrg return VK_BLEND_FACTOR_ZERO; 162b8e80941Smrg default: 163b8e80941Smrg return factor; 164b8e80941Smrg } 165b8e80941Smrg} 166b8e80941Smrg 167b8e80941Smrgstatic enum pc_di_primtype 168b8e80941Smrgtu6_primtype(VkPrimitiveTopology topology) 169b8e80941Smrg{ 170b8e80941Smrg switch (topology) { 171b8e80941Smrg case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: 172b8e80941Smrg return DI_PT_POINTLIST; 173b8e80941Smrg case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: 174b8e80941Smrg return DI_PT_LINELIST; 175b8e80941Smrg case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: 176b8e80941Smrg return DI_PT_LINESTRIP; 177b8e80941Smrg case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: 178b8e80941Smrg return DI_PT_TRILIST; 179b8e80941Smrg case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: 180b8e80941Smrg return DI_PT_TRILIST; 181b8e80941Smrg case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: 182b8e80941Smrg return DI_PT_TRIFAN; 183b8e80941Smrg case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: 184b8e80941Smrg return DI_PT_LINE_ADJ; 185b8e80941Smrg case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: 186b8e80941Smrg return DI_PT_LINESTRIP_ADJ; 187b8e80941Smrg case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: 188b8e80941Smrg return DI_PT_TRI_ADJ; 189b8e80941Smrg case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: 190b8e80941Smrg return DI_PT_TRISTRIP_ADJ; 191b8e80941Smrg case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: 192b8e80941Smrg default: 193b8e80941Smrg unreachable("invalid primitive topology"); 194b8e80941Smrg return DI_PT_NONE; 195b8e80941Smrg } 196b8e80941Smrg} 197b8e80941Smrg 198b8e80941Smrgstatic enum adreno_compare_func 199b8e80941Smrgtu6_compare_func(VkCompareOp op) 200b8e80941Smrg{ 201b8e80941Smrg switch (op) { 202b8e80941Smrg case VK_COMPARE_OP_NEVER: 203b8e80941Smrg return FUNC_NEVER; 204b8e80941Smrg case VK_COMPARE_OP_LESS: 205b8e80941Smrg return FUNC_LESS; 206b8e80941Smrg case VK_COMPARE_OP_EQUAL: 207b8e80941Smrg return FUNC_EQUAL; 208b8e80941Smrg case VK_COMPARE_OP_LESS_OR_EQUAL: 209b8e80941Smrg return FUNC_LEQUAL; 210b8e80941Smrg case VK_COMPARE_OP_GREATER: 211b8e80941Smrg return FUNC_GREATER; 212b8e80941Smrg case VK_COMPARE_OP_NOT_EQUAL: 213b8e80941Smrg return FUNC_NOTEQUAL; 214b8e80941Smrg case VK_COMPARE_OP_GREATER_OR_EQUAL: 215b8e80941Smrg return FUNC_GEQUAL; 216b8e80941Smrg case VK_COMPARE_OP_ALWAYS: 217b8e80941Smrg return FUNC_ALWAYS; 218b8e80941Smrg default: 219b8e80941Smrg unreachable("invalid VkCompareOp"); 220b8e80941Smrg return FUNC_NEVER; 221b8e80941Smrg } 222b8e80941Smrg} 223b8e80941Smrg 224b8e80941Smrgstatic enum adreno_stencil_op 225b8e80941Smrgtu6_stencil_op(VkStencilOp op) 226b8e80941Smrg{ 227b8e80941Smrg switch (op) { 228b8e80941Smrg case VK_STENCIL_OP_KEEP: 229b8e80941Smrg return STENCIL_KEEP; 230b8e80941Smrg case VK_STENCIL_OP_ZERO: 231b8e80941Smrg return STENCIL_ZERO; 232b8e80941Smrg case VK_STENCIL_OP_REPLACE: 233b8e80941Smrg return STENCIL_REPLACE; 234b8e80941Smrg case VK_STENCIL_OP_INCREMENT_AND_CLAMP: 235b8e80941Smrg return STENCIL_INCR_CLAMP; 236b8e80941Smrg case VK_STENCIL_OP_DECREMENT_AND_CLAMP: 237b8e80941Smrg return STENCIL_DECR_CLAMP; 238b8e80941Smrg case VK_STENCIL_OP_INVERT: 239b8e80941Smrg return STENCIL_INVERT; 240b8e80941Smrg case VK_STENCIL_OP_INCREMENT_AND_WRAP: 241b8e80941Smrg return STENCIL_INCR_WRAP; 242b8e80941Smrg case VK_STENCIL_OP_DECREMENT_AND_WRAP: 243b8e80941Smrg return STENCIL_DECR_WRAP; 244b8e80941Smrg default: 245b8e80941Smrg unreachable("invalid VkStencilOp"); 246b8e80941Smrg return STENCIL_KEEP; 247b8e80941Smrg } 248b8e80941Smrg} 249b8e80941Smrg 250b8e80941Smrgstatic enum a3xx_rop_code 251b8e80941Smrgtu6_rop(VkLogicOp op) 252b8e80941Smrg{ 253b8e80941Smrg switch (op) { 254b8e80941Smrg case VK_LOGIC_OP_CLEAR: 255b8e80941Smrg return ROP_CLEAR; 256b8e80941Smrg case VK_LOGIC_OP_AND: 257b8e80941Smrg return ROP_AND; 258b8e80941Smrg case VK_LOGIC_OP_AND_REVERSE: 259b8e80941Smrg return ROP_AND_REVERSE; 260b8e80941Smrg case VK_LOGIC_OP_COPY: 261b8e80941Smrg return ROP_COPY; 262b8e80941Smrg case VK_LOGIC_OP_AND_INVERTED: 263b8e80941Smrg return ROP_AND_INVERTED; 264b8e80941Smrg case VK_LOGIC_OP_NO_OP: 265b8e80941Smrg return ROP_NOOP; 266b8e80941Smrg case VK_LOGIC_OP_XOR: 267b8e80941Smrg return ROP_XOR; 268b8e80941Smrg case VK_LOGIC_OP_OR: 269b8e80941Smrg return ROP_OR; 270b8e80941Smrg case VK_LOGIC_OP_NOR: 271b8e80941Smrg return ROP_NOR; 272b8e80941Smrg case VK_LOGIC_OP_EQUIVALENT: 273b8e80941Smrg return ROP_EQUIV; 274b8e80941Smrg case VK_LOGIC_OP_INVERT: 275b8e80941Smrg return ROP_INVERT; 276b8e80941Smrg case VK_LOGIC_OP_OR_REVERSE: 277b8e80941Smrg return ROP_OR_REVERSE; 278b8e80941Smrg case VK_LOGIC_OP_COPY_INVERTED: 279b8e80941Smrg return ROP_COPY_INVERTED; 280b8e80941Smrg case VK_LOGIC_OP_OR_INVERTED: 281b8e80941Smrg return ROP_OR_INVERTED; 282b8e80941Smrg case VK_LOGIC_OP_NAND: 283b8e80941Smrg return ROP_NAND; 284b8e80941Smrg case VK_LOGIC_OP_SET: 285b8e80941Smrg return ROP_SET; 286b8e80941Smrg default: 287b8e80941Smrg unreachable("invalid VkLogicOp"); 288b8e80941Smrg return ROP_NOOP; 289b8e80941Smrg } 290b8e80941Smrg} 291b8e80941Smrg 292b8e80941Smrgstatic enum adreno_rb_blend_factor 293b8e80941Smrgtu6_blend_factor(VkBlendFactor factor) 294b8e80941Smrg{ 295b8e80941Smrg switch (factor) { 296b8e80941Smrg case VK_BLEND_FACTOR_ZERO: 297b8e80941Smrg return FACTOR_ZERO; 298b8e80941Smrg case VK_BLEND_FACTOR_ONE: 299b8e80941Smrg return FACTOR_ONE; 300b8e80941Smrg case VK_BLEND_FACTOR_SRC_COLOR: 301b8e80941Smrg return FACTOR_SRC_COLOR; 302b8e80941Smrg case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: 303b8e80941Smrg return FACTOR_ONE_MINUS_SRC_COLOR; 304b8e80941Smrg case VK_BLEND_FACTOR_DST_COLOR: 305b8e80941Smrg return FACTOR_DST_COLOR; 306b8e80941Smrg case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: 307b8e80941Smrg return FACTOR_ONE_MINUS_DST_COLOR; 308b8e80941Smrg case VK_BLEND_FACTOR_SRC_ALPHA: 309b8e80941Smrg return FACTOR_SRC_ALPHA; 310b8e80941Smrg case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: 311b8e80941Smrg return FACTOR_ONE_MINUS_SRC_ALPHA; 312b8e80941Smrg case VK_BLEND_FACTOR_DST_ALPHA: 313b8e80941Smrg return FACTOR_DST_ALPHA; 314b8e80941Smrg case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: 315b8e80941Smrg return FACTOR_ONE_MINUS_DST_ALPHA; 316b8e80941Smrg case VK_BLEND_FACTOR_CONSTANT_COLOR: 317b8e80941Smrg return FACTOR_CONSTANT_COLOR; 318b8e80941Smrg case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: 319b8e80941Smrg return FACTOR_ONE_MINUS_CONSTANT_COLOR; 320b8e80941Smrg case VK_BLEND_FACTOR_CONSTANT_ALPHA: 321b8e80941Smrg return FACTOR_CONSTANT_ALPHA; 322b8e80941Smrg case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: 323b8e80941Smrg return FACTOR_ONE_MINUS_CONSTANT_ALPHA; 324b8e80941Smrg case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: 325b8e80941Smrg return FACTOR_SRC_ALPHA_SATURATE; 326b8e80941Smrg case VK_BLEND_FACTOR_SRC1_COLOR: 327b8e80941Smrg return FACTOR_SRC1_COLOR; 328b8e80941Smrg case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: 329b8e80941Smrg return FACTOR_ONE_MINUS_SRC1_COLOR; 330b8e80941Smrg case VK_BLEND_FACTOR_SRC1_ALPHA: 331b8e80941Smrg return FACTOR_SRC1_ALPHA; 332b8e80941Smrg case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: 333b8e80941Smrg return FACTOR_ONE_MINUS_SRC1_ALPHA; 334b8e80941Smrg default: 335b8e80941Smrg unreachable("invalid VkBlendFactor"); 336b8e80941Smrg return FACTOR_ZERO; 337b8e80941Smrg } 338b8e80941Smrg} 339b8e80941Smrg 340b8e80941Smrgstatic enum a3xx_rb_blend_opcode 341b8e80941Smrgtu6_blend_op(VkBlendOp op) 342b8e80941Smrg{ 343b8e80941Smrg switch (op) { 344b8e80941Smrg case VK_BLEND_OP_ADD: 345b8e80941Smrg return BLEND_DST_PLUS_SRC; 346b8e80941Smrg case VK_BLEND_OP_SUBTRACT: 347b8e80941Smrg return BLEND_SRC_MINUS_DST; 348b8e80941Smrg case VK_BLEND_OP_REVERSE_SUBTRACT: 349b8e80941Smrg return BLEND_DST_MINUS_SRC; 350b8e80941Smrg case VK_BLEND_OP_MIN: 351b8e80941Smrg return BLEND_MIN_DST_SRC; 352b8e80941Smrg case VK_BLEND_OP_MAX: 353b8e80941Smrg return BLEND_MAX_DST_SRC; 354b8e80941Smrg default: 355b8e80941Smrg unreachable("invalid VkBlendOp"); 356b8e80941Smrg return BLEND_DST_PLUS_SRC; 357b8e80941Smrg } 358b8e80941Smrg} 359b8e80941Smrg 360b8e80941Smrgstatic void 361b8e80941Smrgtu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs) 362b8e80941Smrg{ 363b8e80941Smrg uint32_t sp_vs_ctrl = 364b8e80941Smrg A6XX_SP_VS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | 365b8e80941Smrg A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) | 366b8e80941Smrg A6XX_SP_VS_CTRL_REG0_MERGEDREGS | 367b8e80941Smrg A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack); 368b8e80941Smrg if (vs->num_samp) 369b8e80941Smrg sp_vs_ctrl |= A6XX_SP_VS_CTRL_REG0_PIXLODENABLE; 370b8e80941Smrg 371b8e80941Smrg uint32_t sp_vs_config = A6XX_SP_VS_CONFIG_NTEX(vs->num_samp) | 372b8e80941Smrg A6XX_SP_VS_CONFIG_NSAMP(vs->num_samp); 373b8e80941Smrg if (vs->instrlen) 374b8e80941Smrg sp_vs_config |= A6XX_SP_VS_CONFIG_ENABLED; 375b8e80941Smrg 376b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_CTRL_REG0, 1); 377b8e80941Smrg tu_cs_emit(cs, sp_vs_ctrl); 378b8e80941Smrg 379b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_CONFIG, 2); 380b8e80941Smrg tu_cs_emit(cs, sp_vs_config); 381b8e80941Smrg tu_cs_emit(cs, vs->instrlen); 382b8e80941Smrg 383b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_VS_CNTL, 1); 384b8e80941Smrg tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(align(vs->constlen, 4)) | 0x100); 385b8e80941Smrg} 386b8e80941Smrg 387b8e80941Smrgstatic void 388b8e80941Smrgtu6_emit_hs_config(struct tu_cs *cs, const struct ir3_shader_variant *hs) 389b8e80941Smrg{ 390b8e80941Smrg uint32_t sp_hs_config = 0; 391b8e80941Smrg if (hs->instrlen) 392b8e80941Smrg sp_hs_config |= A6XX_SP_HS_CONFIG_ENABLED; 393b8e80941Smrg 394b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_UNKNOWN_A831, 1); 395b8e80941Smrg tu_cs_emit(cs, 0); 396b8e80941Smrg 397b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_CONFIG, 2); 398b8e80941Smrg tu_cs_emit(cs, sp_hs_config); 399b8e80941Smrg tu_cs_emit(cs, hs->instrlen); 400b8e80941Smrg 401b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_HS_CNTL, 1); 402b8e80941Smrg tu_cs_emit(cs, A6XX_HLSQ_HS_CNTL_CONSTLEN(align(hs->constlen, 4))); 403b8e80941Smrg} 404b8e80941Smrg 405b8e80941Smrgstatic void 406b8e80941Smrgtu6_emit_ds_config(struct tu_cs *cs, const struct ir3_shader_variant *ds) 407b8e80941Smrg{ 408b8e80941Smrg uint32_t sp_ds_config = 0; 409b8e80941Smrg if (ds->instrlen) 410b8e80941Smrg sp_ds_config |= A6XX_SP_DS_CONFIG_ENABLED; 411b8e80941Smrg 412b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_CONFIG, 2); 413b8e80941Smrg tu_cs_emit(cs, sp_ds_config); 414b8e80941Smrg tu_cs_emit(cs, ds->instrlen); 415b8e80941Smrg 416b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_DS_CNTL, 1); 417b8e80941Smrg tu_cs_emit(cs, A6XX_HLSQ_DS_CNTL_CONSTLEN(align(ds->constlen, 4))); 418b8e80941Smrg} 419b8e80941Smrg 420b8e80941Smrgstatic void 421b8e80941Smrgtu6_emit_gs_config(struct tu_cs *cs, const struct ir3_shader_variant *gs) 422b8e80941Smrg{ 423b8e80941Smrg uint32_t sp_gs_config = 0; 424b8e80941Smrg if (gs->instrlen) 425b8e80941Smrg sp_gs_config |= A6XX_SP_GS_CONFIG_ENABLED; 426b8e80941Smrg 427b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_UNKNOWN_A871, 1); 428b8e80941Smrg tu_cs_emit(cs, 0); 429b8e80941Smrg 430b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_CONFIG, 2); 431b8e80941Smrg tu_cs_emit(cs, sp_gs_config); 432b8e80941Smrg tu_cs_emit(cs, gs->instrlen); 433b8e80941Smrg 434b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_GS_CNTL, 1); 435b8e80941Smrg tu_cs_emit(cs, A6XX_HLSQ_GS_CNTL_CONSTLEN(align(gs->constlen, 4))); 436b8e80941Smrg} 437b8e80941Smrg 438b8e80941Smrgstatic void 439b8e80941Smrgtu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs) 440b8e80941Smrg{ 441b8e80941Smrg uint32_t sp_fs_ctrl = 442b8e80941Smrg A6XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | 0x1000000 | 443b8e80941Smrg A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fs->info.max_reg + 1) | 444b8e80941Smrg A6XX_SP_FS_CTRL_REG0_MERGEDREGS | 445b8e80941Smrg A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(fs->branchstack); 446b8e80941Smrg if (fs->total_in > 0 || fs->frag_coord) 447b8e80941Smrg sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_VARYING; 448b8e80941Smrg if (fs->num_samp > 0) 449b8e80941Smrg sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_PIXLODENABLE; 450b8e80941Smrg 451b8e80941Smrg uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(fs->num_samp) | 452b8e80941Smrg A6XX_SP_FS_CONFIG_NSAMP(fs->num_samp); 453b8e80941Smrg if (fs->instrlen) 454b8e80941Smrg sp_fs_config |= A6XX_SP_FS_CONFIG_ENABLED; 455b8e80941Smrg 456b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A99E, 1); 457b8e80941Smrg tu_cs_emit(cs, 0x7fc0); 458b8e80941Smrg 459b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A9A8, 1); 460b8e80941Smrg tu_cs_emit(cs, 0); 461b8e80941Smrg 462b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_AB00, 1); 463b8e80941Smrg tu_cs_emit(cs, 0x5); 464b8e80941Smrg 465b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_CTRL_REG0, 1); 466b8e80941Smrg tu_cs_emit(cs, sp_fs_ctrl); 467b8e80941Smrg 468b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_CONFIG, 2); 469b8e80941Smrg tu_cs_emit(cs, sp_fs_config); 470b8e80941Smrg tu_cs_emit(cs, fs->instrlen); 471b8e80941Smrg 472b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_FS_CNTL, 1); 473b8e80941Smrg tu_cs_emit(cs, A6XX_HLSQ_FS_CNTL_CONSTLEN(align(fs->constlen, 4)) | 0x100); 474b8e80941Smrg} 475b8e80941Smrg 476b8e80941Smrgstatic void 477b8e80941Smrgtu6_emit_vs_system_values(struct tu_cs *cs, 478b8e80941Smrg const struct ir3_shader_variant *vs) 479b8e80941Smrg{ 480b8e80941Smrg const uint32_t vertexid_regid = 481b8e80941Smrg ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE); 482b8e80941Smrg const uint32_t instanceid_regid = 483b8e80941Smrg ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID); 484b8e80941Smrg 485b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_1, 6); 486b8e80941Smrg tu_cs_emit(cs, A6XX_VFD_CONTROL_1_REGID4VTX(vertexid_regid) | 487b8e80941Smrg A6XX_VFD_CONTROL_1_REGID4INST(instanceid_regid) | 488b8e80941Smrg 0xfcfc0000); 489b8e80941Smrg tu_cs_emit(cs, 0x0000fcfc); /* VFD_CONTROL_2 */ 490b8e80941Smrg tu_cs_emit(cs, 0xfcfcfcfc); /* VFD_CONTROL_3 */ 491b8e80941Smrg tu_cs_emit(cs, 0x000000fc); /* VFD_CONTROL_4 */ 492b8e80941Smrg tu_cs_emit(cs, 0x0000fcfc); /* VFD_CONTROL_5 */ 493b8e80941Smrg tu_cs_emit(cs, 0x00000000); /* VFD_CONTROL_6 */ 494b8e80941Smrg} 495b8e80941Smrg 496b8e80941Smrgstatic void 497b8e80941Smrgtu6_emit_vpc(struct tu_cs *cs, 498b8e80941Smrg const struct ir3_shader_variant *vs, 499b8e80941Smrg const struct ir3_shader_variant *fs, 500b8e80941Smrg bool binning_pass) 501b8e80941Smrg{ 502b8e80941Smrg struct ir3_shader_linkage linkage = { 0 }; 503b8e80941Smrg ir3_link_shaders(&linkage, vs, fs); 504b8e80941Smrg 505b8e80941Smrg if (vs->shader->stream_output.num_outputs && !binning_pass) 506b8e80941Smrg tu_finishme("stream output"); 507b8e80941Smrg 508b8e80941Smrg BITSET_DECLARE(vpc_var_enables, 128) = { 0 }; 509b8e80941Smrg for (uint32_t i = 0; i < linkage.cnt; i++) { 510b8e80941Smrg const uint32_t comp_count = util_last_bit(linkage.var[i].compmask); 511b8e80941Smrg for (uint32_t j = 0; j < comp_count; j++) 512b8e80941Smrg BITSET_SET(vpc_var_enables, linkage.var[i].loc + j); 513b8e80941Smrg } 514b8e80941Smrg 515b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VAR_DISABLE(0), 4); 516b8e80941Smrg tu_cs_emit(cs, ~vpc_var_enables[0]); 517b8e80941Smrg tu_cs_emit(cs, ~vpc_var_enables[1]); 518b8e80941Smrg tu_cs_emit(cs, ~vpc_var_enables[2]); 519b8e80941Smrg tu_cs_emit(cs, ~vpc_var_enables[3]); 520b8e80941Smrg 521b8e80941Smrg /* a6xx finds position/pointsize at the end */ 522b8e80941Smrg const uint32_t position_regid = 523b8e80941Smrg ir3_find_output_regid(vs, VARYING_SLOT_POS); 524b8e80941Smrg const uint32_t pointsize_regid = 525b8e80941Smrg ir3_find_output_regid(vs, VARYING_SLOT_PSIZ); 526b8e80941Smrg uint32_t pointsize_loc = 0xff; 527b8e80941Smrg if (position_regid != regid(63, 0)) 528b8e80941Smrg ir3_link_add(&linkage, position_regid, 0xf, linkage.max_loc); 529b8e80941Smrg if (pointsize_regid != regid(63, 0)) { 530b8e80941Smrg pointsize_loc = linkage.max_loc; 531b8e80941Smrg ir3_link_add(&linkage, pointsize_regid, 0x1, linkage.max_loc); 532b8e80941Smrg } 533b8e80941Smrg 534b8e80941Smrg /* map vs outputs to VPC */ 535b8e80941Smrg assert(linkage.cnt <= 32); 536b8e80941Smrg const uint32_t sp_vs_out_count = (linkage.cnt + 1) / 2; 537b8e80941Smrg const uint32_t sp_vs_vpc_dst_count = (linkage.cnt + 3) / 4; 538b8e80941Smrg uint32_t sp_vs_out[16]; 539b8e80941Smrg uint32_t sp_vs_vpc_dst[8]; 540b8e80941Smrg sp_vs_out[sp_vs_out_count - 1] = 0; 541b8e80941Smrg sp_vs_vpc_dst[sp_vs_vpc_dst_count - 1] = 0; 542b8e80941Smrg for (uint32_t i = 0; i < linkage.cnt; i++) { 543b8e80941Smrg ((uint16_t *) sp_vs_out)[i] = 544b8e80941Smrg A6XX_SP_VS_OUT_REG_A_REGID(linkage.var[i].regid) | 545b8e80941Smrg A6XX_SP_VS_OUT_REG_A_COMPMASK(linkage.var[i].compmask); 546b8e80941Smrg ((uint8_t *) sp_vs_vpc_dst)[i] = 547b8e80941Smrg A6XX_SP_VS_VPC_DST_REG_OUTLOC0(linkage.var[i].loc); 548b8e80941Smrg } 549b8e80941Smrg 550b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_OUT_REG(0), sp_vs_out_count); 551b8e80941Smrg tu_cs_emit_array(cs, sp_vs_out, sp_vs_out_count); 552b8e80941Smrg 553b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_VPC_DST_REG(0), sp_vs_vpc_dst_count); 554b8e80941Smrg tu_cs_emit_array(cs, sp_vs_vpc_dst, sp_vs_vpc_dst_count); 555b8e80941Smrg 556b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_VPC_CNTL_0, 1); 557b8e80941Smrg tu_cs_emit(cs, A6XX_VPC_CNTL_0_NUMNONPOSVAR(fs->total_in) | 558b8e80941Smrg (fs->total_in > 0 ? A6XX_VPC_CNTL_0_VARYING : 0) | 559b8e80941Smrg 0xff00ff00); 560b8e80941Smrg 561b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_VPC_PACK, 1); 562b8e80941Smrg tu_cs_emit(cs, A6XX_VPC_PACK_NUMNONPOSVAR(fs->total_in) | 563b8e80941Smrg A6XX_VPC_PACK_PSIZELOC(pointsize_loc) | 564b8e80941Smrg A6XX_VPC_PACK_STRIDE_IN_VPC(linkage.max_loc)); 565b8e80941Smrg 566b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_SIV_CNTL, 1); 567b8e80941Smrg tu_cs_emit(cs, 0x0000ffff); /* XXX */ 568b8e80941Smrg 569b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_PRIMITIVE_CNTL, 1); 570b8e80941Smrg tu_cs_emit(cs, A6XX_SP_PRIMITIVE_CNTL_VSOUT(linkage.cnt)); 571b8e80941Smrg 572b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_1, 1); 573b8e80941Smrg tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC(linkage.max_loc) | 574b8e80941Smrg (vs->writes_psize ? A6XX_PC_PRIMITIVE_CNTL_1_PSIZE : 0)); 575b8e80941Smrg} 576b8e80941Smrg 577b8e80941Smrgstatic int 578b8e80941Smrgtu6_vpc_varying_mode(const struct ir3_shader_variant *fs, 579b8e80941Smrg uint32_t index, 580b8e80941Smrg uint8_t *interp_mode, 581b8e80941Smrg uint8_t *ps_repl_mode) 582b8e80941Smrg{ 583b8e80941Smrg enum 584b8e80941Smrg { 585b8e80941Smrg INTERP_SMOOTH = 0, 586b8e80941Smrg INTERP_FLAT = 1, 587b8e80941Smrg INTERP_ZERO = 2, 588b8e80941Smrg INTERP_ONE = 3, 589b8e80941Smrg }; 590b8e80941Smrg enum 591b8e80941Smrg { 592b8e80941Smrg PS_REPL_NONE = 0, 593b8e80941Smrg PS_REPL_S = 1, 594b8e80941Smrg PS_REPL_T = 2, 595b8e80941Smrg PS_REPL_ONE_MINUS_T = 3, 596b8e80941Smrg }; 597b8e80941Smrg 598b8e80941Smrg const uint32_t compmask = fs->inputs[index].compmask; 599b8e80941Smrg 600b8e80941Smrg /* NOTE: varyings are packed, so if compmask is 0xb then first, second, and 601b8e80941Smrg * fourth component occupy three consecutive varying slots 602b8e80941Smrg */ 603b8e80941Smrg int shift = 0; 604b8e80941Smrg *interp_mode = 0; 605b8e80941Smrg *ps_repl_mode = 0; 606b8e80941Smrg if (fs->inputs[index].slot == VARYING_SLOT_PNTC) { 607b8e80941Smrg if (compmask & 0x1) { 608b8e80941Smrg *ps_repl_mode |= PS_REPL_S << shift; 609b8e80941Smrg shift += 2; 610b8e80941Smrg } 611b8e80941Smrg if (compmask & 0x2) { 612b8e80941Smrg *ps_repl_mode |= PS_REPL_T << shift; 613b8e80941Smrg shift += 2; 614b8e80941Smrg } 615b8e80941Smrg if (compmask & 0x4) { 616b8e80941Smrg *interp_mode |= INTERP_ZERO << shift; 617b8e80941Smrg shift += 2; 618b8e80941Smrg } 619b8e80941Smrg if (compmask & 0x8) { 620b8e80941Smrg *interp_mode |= INTERP_ONE << 6; 621b8e80941Smrg shift += 2; 622b8e80941Smrg } 623b8e80941Smrg } else if ((fs->inputs[index].interpolate == INTERP_MODE_FLAT) || 624b8e80941Smrg fs->inputs[index].rasterflat) { 625b8e80941Smrg for (int i = 0; i < 4; i++) { 626b8e80941Smrg if (compmask & (1 << i)) { 627b8e80941Smrg *interp_mode |= INTERP_FLAT << shift; 628b8e80941Smrg shift += 2; 629b8e80941Smrg } 630b8e80941Smrg } 631b8e80941Smrg } 632b8e80941Smrg 633b8e80941Smrg return shift; 634b8e80941Smrg} 635b8e80941Smrg 636b8e80941Smrgstatic void 637b8e80941Smrgtu6_emit_vpc_varying_modes(struct tu_cs *cs, 638b8e80941Smrg const struct ir3_shader_variant *fs, 639b8e80941Smrg bool binning_pass) 640b8e80941Smrg{ 641b8e80941Smrg uint32_t interp_modes[8] = { 0 }; 642b8e80941Smrg uint32_t ps_repl_modes[8] = { 0 }; 643b8e80941Smrg 644b8e80941Smrg if (!binning_pass) { 645b8e80941Smrg for (int i = -1; 646b8e80941Smrg (i = ir3_next_varying(fs, i)) < (int) fs->inputs_count;) { 647b8e80941Smrg 648b8e80941Smrg /* get the mode for input i */ 649b8e80941Smrg uint8_t interp_mode; 650b8e80941Smrg uint8_t ps_repl_mode; 651b8e80941Smrg const int bits = 652b8e80941Smrg tu6_vpc_varying_mode(fs, i, &interp_mode, &ps_repl_mode); 653b8e80941Smrg 654b8e80941Smrg /* OR the mode into the array */ 655b8e80941Smrg const uint32_t inloc = fs->inputs[i].inloc * 2; 656b8e80941Smrg uint32_t n = inloc / 32; 657b8e80941Smrg uint32_t shift = inloc % 32; 658b8e80941Smrg interp_modes[n] |= interp_mode << shift; 659b8e80941Smrg ps_repl_modes[n] |= ps_repl_mode << shift; 660b8e80941Smrg if (shift + bits > 32) { 661b8e80941Smrg n++; 662b8e80941Smrg shift = 32 - shift; 663b8e80941Smrg 664b8e80941Smrg interp_modes[n] |= interp_mode >> shift; 665b8e80941Smrg ps_repl_modes[n] |= ps_repl_mode >> shift; 666b8e80941Smrg } 667b8e80941Smrg } 668b8e80941Smrg } 669b8e80941Smrg 670b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8); 671b8e80941Smrg tu_cs_emit_array(cs, interp_modes, 8); 672b8e80941Smrg 673b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8); 674b8e80941Smrg tu_cs_emit_array(cs, ps_repl_modes, 8); 675b8e80941Smrg} 676b8e80941Smrg 677b8e80941Smrgstatic void 678b8e80941Smrgtu6_emit_fs_system_values(struct tu_cs *cs, 679b8e80941Smrg const struct ir3_shader_variant *fs) 680b8e80941Smrg{ 681b8e80941Smrg const uint32_t frontfacing_regid = 682b8e80941Smrg ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRONT_FACE); 683b8e80941Smrg const uint32_t sampleid_regid = 684b8e80941Smrg ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_ID); 685b8e80941Smrg const uint32_t samplemaskin_regid = 686b8e80941Smrg ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_MASK_IN); 687b8e80941Smrg const uint32_t fragcoord_xy_regid = 688b8e80941Smrg ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_COORD); 689b8e80941Smrg const uint32_t fragcoord_zw_regid = (fragcoord_xy_regid != regid(63, 0)) 690b8e80941Smrg ? (fragcoord_xy_regid + 2) 691b8e80941Smrg : fragcoord_xy_regid; 692b8e80941Smrg const uint32_t varyingcoord_regid = 693b8e80941Smrg ir3_find_sysval_regid(fs, SYSTEM_VALUE_BARYCENTRIC_PIXEL); 694b8e80941Smrg 695b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CONTROL_1_REG, 5); 696b8e80941Smrg tu_cs_emit(cs, 0x7); 697b8e80941Smrg tu_cs_emit(cs, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(frontfacing_regid) | 698b8e80941Smrg A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(sampleid_regid) | 699b8e80941Smrg A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(samplemaskin_regid) | 700b8e80941Smrg A6XX_HLSQ_CONTROL_2_REG_SIZE(regid(63, 0))); 701b8e80941Smrg tu_cs_emit(cs, 702b8e80941Smrg A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_PIXEL(varyingcoord_regid) | 703b8e80941Smrg A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_CENTROID(regid(63, 0)) | 704b8e80941Smrg 0xfc00fc00); 705b8e80941Smrg tu_cs_emit(cs, 706b8e80941Smrg A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(fragcoord_xy_regid) | 707b8e80941Smrg A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(fragcoord_zw_regid) | 708b8e80941Smrg A6XX_HLSQ_CONTROL_4_REG_BARY_IJ_PIXEL_PERSAMP(regid(63, 0)) | 709b8e80941Smrg 0x0000fc00); 710b8e80941Smrg tu_cs_emit(cs, 0xfc); 711b8e80941Smrg} 712b8e80941Smrg 713b8e80941Smrgstatic void 714b8e80941Smrgtu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) 715b8e80941Smrg{ 716b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UNKNOWN_B980, 1); 717b8e80941Smrg tu_cs_emit(cs, fs->total_in > 0 ? 3 : 1); 718b8e80941Smrg 719b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A982, 1); 720b8e80941Smrg tu_cs_emit(cs, 0); /* XXX */ 721b8e80941Smrg 722b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1); 723b8e80941Smrg tu_cs_emit(cs, 0xff); /* XXX */ 724b8e80941Smrg 725b8e80941Smrg uint32_t gras_cntl = 0; 726b8e80941Smrg if (fs->total_in > 0) 727b8e80941Smrg gras_cntl |= A6XX_GRAS_CNTL_VARYING; 728b8e80941Smrg if (fs->frag_coord) { 729b8e80941Smrg gras_cntl |= A6XX_GRAS_CNTL_SIZE | A6XX_GRAS_CNTL_XCOORD | 730b8e80941Smrg A6XX_GRAS_CNTL_YCOORD | A6XX_GRAS_CNTL_ZCOORD | 731b8e80941Smrg A6XX_GRAS_CNTL_WCOORD; 732b8e80941Smrg } 733b8e80941Smrg 734b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CNTL, 1); 735b8e80941Smrg tu_cs_emit(cs, gras_cntl); 736b8e80941Smrg 737b8e80941Smrg uint32_t rb_render_control = 0; 738b8e80941Smrg if (fs->total_in > 0) { 739b8e80941Smrg rb_render_control = 740b8e80941Smrg A6XX_RB_RENDER_CONTROL0_VARYING | A6XX_RB_RENDER_CONTROL0_UNK10; 741b8e80941Smrg } 742b8e80941Smrg if (fs->frag_coord) { 743b8e80941Smrg rb_render_control |= 744b8e80941Smrg A6XX_RB_RENDER_CONTROL0_SIZE | A6XX_RB_RENDER_CONTROL0_XCOORD | 745b8e80941Smrg A6XX_RB_RENDER_CONTROL0_YCOORD | A6XX_RB_RENDER_CONTROL0_ZCOORD | 746b8e80941Smrg A6XX_RB_RENDER_CONTROL0_WCOORD; 747b8e80941Smrg } 748b8e80941Smrg 749b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_CONTROL0, 2); 750b8e80941Smrg tu_cs_emit(cs, rb_render_control); 751b8e80941Smrg tu_cs_emit(cs, (fs->frag_face ? A6XX_RB_RENDER_CONTROL1_FACENESS : 0)); 752b8e80941Smrg} 753b8e80941Smrg 754b8e80941Smrgstatic void 755b8e80941Smrgtu6_emit_fs_outputs(struct tu_cs *cs, 756b8e80941Smrg const struct ir3_shader_variant *fs, 757b8e80941Smrg uint32_t mrt_count) 758b8e80941Smrg{ 759b8e80941Smrg const uint32_t fragdepth_regid = 760b8e80941Smrg ir3_find_output_regid(fs, FRAG_RESULT_DEPTH); 761b8e80941Smrg uint32_t fragdata_regid[8]; 762b8e80941Smrg if (fs->color0_mrt) { 763b8e80941Smrg fragdata_regid[0] = ir3_find_output_regid(fs, FRAG_RESULT_COLOR); 764b8e80941Smrg for (uint32_t i = 1; i < ARRAY_SIZE(fragdata_regid); i++) 765b8e80941Smrg fragdata_regid[i] = fragdata_regid[0]; 766b8e80941Smrg } else { 767b8e80941Smrg for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++) 768b8e80941Smrg fragdata_regid[i] = ir3_find_output_regid(fs, FRAG_RESULT_DATA0 + i); 769b8e80941Smrg } 770b8e80941Smrg 771b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); 772b8e80941Smrg tu_cs_emit( 773b8e80941Smrg cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(fragdepth_regid) | 0xfcfc0000); 774b8e80941Smrg tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count)); 775b8e80941Smrg 776b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), 8); 777b8e80941Smrg for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++) { 778b8e80941Smrg // TODO we could have a mix of half and full precision outputs, 779b8e80941Smrg // we really need to figure out half-precision from IR3_REG_HALF 780b8e80941Smrg tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(fragdata_regid[i]) | 781b8e80941Smrg (false ? A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION : 0)); 782b8e80941Smrg } 783b8e80941Smrg 784b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2); 785b8e80941Smrg tu_cs_emit(cs, fs->writes_pos ? A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z : 0); 786b8e80941Smrg tu_cs_emit(cs, A6XX_RB_FS_OUTPUT_CNTL1_MRT(mrt_count)); 787b8e80941Smrg 788b8e80941Smrg uint32_t gras_su_depth_plane_cntl = 0; 789b8e80941Smrg uint32_t rb_depth_plane_cntl = 0; 790b8e80941Smrg if (fs->no_earlyz | fs->writes_pos) { 791b8e80941Smrg gras_su_depth_plane_cntl |= A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z; 792b8e80941Smrg rb_depth_plane_cntl |= A6XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z; 793b8e80941Smrg } 794b8e80941Smrg 795b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL, 1); 796b8e80941Smrg tu_cs_emit(cs, gras_su_depth_plane_cntl); 797b8e80941Smrg 798b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_PLANE_CNTL, 1); 799b8e80941Smrg tu_cs_emit(cs, rb_depth_plane_cntl); 800b8e80941Smrg} 801b8e80941Smrg 802b8e80941Smrgstatic void 803b8e80941Smrgtu6_emit_shader_object(struct tu_cs *cs, 804b8e80941Smrg gl_shader_stage stage, 805b8e80941Smrg const struct ir3_shader_variant *variant, 806b8e80941Smrg const struct tu_bo *binary_bo, 807b8e80941Smrg uint32_t binary_offset) 808b8e80941Smrg{ 809b8e80941Smrg uint16_t reg; 810b8e80941Smrg uint8_t opcode; 811b8e80941Smrg enum a6xx_state_block sb; 812b8e80941Smrg switch (stage) { 813b8e80941Smrg case MESA_SHADER_VERTEX: 814b8e80941Smrg reg = REG_A6XX_SP_VS_OBJ_START_LO; 815b8e80941Smrg opcode = CP_LOAD_STATE6_GEOM; 816b8e80941Smrg sb = SB6_VS_SHADER; 817b8e80941Smrg break; 818b8e80941Smrg case MESA_SHADER_TESS_CTRL: 819b8e80941Smrg reg = REG_A6XX_SP_HS_OBJ_START_LO; 820b8e80941Smrg opcode = CP_LOAD_STATE6_GEOM; 821b8e80941Smrg sb = SB6_HS_SHADER; 822b8e80941Smrg break; 823b8e80941Smrg case MESA_SHADER_TESS_EVAL: 824b8e80941Smrg reg = REG_A6XX_SP_DS_OBJ_START_LO; 825b8e80941Smrg opcode = CP_LOAD_STATE6_GEOM; 826b8e80941Smrg sb = SB6_DS_SHADER; 827b8e80941Smrg break; 828b8e80941Smrg case MESA_SHADER_GEOMETRY: 829b8e80941Smrg reg = REG_A6XX_SP_GS_OBJ_START_LO; 830b8e80941Smrg opcode = CP_LOAD_STATE6_GEOM; 831b8e80941Smrg sb = SB6_GS_SHADER; 832b8e80941Smrg break; 833b8e80941Smrg case MESA_SHADER_FRAGMENT: 834b8e80941Smrg reg = REG_A6XX_SP_FS_OBJ_START_LO; 835b8e80941Smrg opcode = CP_LOAD_STATE6_FRAG; 836b8e80941Smrg sb = SB6_FS_SHADER; 837b8e80941Smrg break; 838b8e80941Smrg case MESA_SHADER_COMPUTE: 839b8e80941Smrg reg = REG_A6XX_SP_CS_OBJ_START_LO; 840b8e80941Smrg opcode = CP_LOAD_STATE6_FRAG; 841b8e80941Smrg sb = SB6_CS_SHADER; 842b8e80941Smrg break; 843b8e80941Smrg default: 844b8e80941Smrg unreachable("invalid gl_shader_stage"); 845b8e80941Smrg opcode = CP_LOAD_STATE6_GEOM; 846b8e80941Smrg sb = SB6_VS_SHADER; 847b8e80941Smrg break; 848b8e80941Smrg } 849b8e80941Smrg 850b8e80941Smrg if (!variant->instrlen) { 851b8e80941Smrg tu_cs_emit_pkt4(cs, reg, 2); 852b8e80941Smrg tu_cs_emit_qw(cs, 0); 853b8e80941Smrg return; 854b8e80941Smrg } 855b8e80941Smrg 856b8e80941Smrg assert(variant->type == stage); 857b8e80941Smrg 858b8e80941Smrg const uint64_t binary_iova = binary_bo->iova + binary_offset; 859b8e80941Smrg assert((binary_iova & 0x3) == 0); 860b8e80941Smrg 861b8e80941Smrg tu_cs_emit_pkt4(cs, reg, 2); 862b8e80941Smrg tu_cs_emit_qw(cs, binary_iova); 863b8e80941Smrg 864b8e80941Smrg /* always indirect */ 865b8e80941Smrg const bool indirect = true; 866b8e80941Smrg if (indirect) { 867b8e80941Smrg tu_cs_emit_pkt7(cs, opcode, 3); 868b8e80941Smrg tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | 869b8e80941Smrg CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | 870b8e80941Smrg CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | 871b8e80941Smrg CP_LOAD_STATE6_0_STATE_BLOCK(sb) | 872b8e80941Smrg CP_LOAD_STATE6_0_NUM_UNIT(variant->instrlen)); 873b8e80941Smrg tu_cs_emit_qw(cs, binary_iova); 874b8e80941Smrg } else { 875b8e80941Smrg const void *binary = binary_bo->map + binary_offset; 876b8e80941Smrg 877b8e80941Smrg tu_cs_emit_pkt7(cs, opcode, 3 + variant->info.sizedwords); 878b8e80941Smrg tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | 879b8e80941Smrg CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | 880b8e80941Smrg CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | 881b8e80941Smrg CP_LOAD_STATE6_0_STATE_BLOCK(sb) | 882b8e80941Smrg CP_LOAD_STATE6_0_NUM_UNIT(variant->instrlen)); 883b8e80941Smrg tu_cs_emit_qw(cs, 0); 884b8e80941Smrg tu_cs_emit_array(cs, binary, variant->info.sizedwords); 885b8e80941Smrg } 886b8e80941Smrg} 887b8e80941Smrg 888b8e80941Smrgstatic void 889b8e80941Smrgtu6_emit_program(struct tu_cs *cs, 890b8e80941Smrg const struct tu_pipeline_builder *builder, 891b8e80941Smrg const struct tu_bo *binary_bo, 892b8e80941Smrg bool binning_pass) 893b8e80941Smrg{ 894b8e80941Smrg static const struct ir3_shader_variant dummy_variant = { 895b8e80941Smrg .type = MESA_SHADER_NONE 896b8e80941Smrg }; 897b8e80941Smrg assert(builder->shaders[MESA_SHADER_VERTEX]); 898b8e80941Smrg const struct ir3_shader_variant *vs = 899b8e80941Smrg &builder->shaders[MESA_SHADER_VERTEX]->variants[0]; 900b8e80941Smrg const struct ir3_shader_variant *hs = 901b8e80941Smrg builder->shaders[MESA_SHADER_TESS_CTRL] 902b8e80941Smrg ? &builder->shaders[MESA_SHADER_TESS_CTRL]->variants[0] 903b8e80941Smrg : &dummy_variant; 904b8e80941Smrg const struct ir3_shader_variant *ds = 905b8e80941Smrg builder->shaders[MESA_SHADER_TESS_EVAL] 906b8e80941Smrg ? &builder->shaders[MESA_SHADER_TESS_EVAL]->variants[0] 907b8e80941Smrg : &dummy_variant; 908b8e80941Smrg const struct ir3_shader_variant *gs = 909b8e80941Smrg builder->shaders[MESA_SHADER_GEOMETRY] 910b8e80941Smrg ? &builder->shaders[MESA_SHADER_GEOMETRY]->variants[0] 911b8e80941Smrg : &dummy_variant; 912b8e80941Smrg const struct ir3_shader_variant *fs = 913b8e80941Smrg builder->shaders[MESA_SHADER_FRAGMENT] 914b8e80941Smrg ? &builder->shaders[MESA_SHADER_FRAGMENT]->variants[0] 915b8e80941Smrg : &dummy_variant; 916b8e80941Smrg 917b8e80941Smrg if (binning_pass) { 918b8e80941Smrg vs = &builder->shaders[MESA_SHADER_VERTEX]->variants[1]; 919b8e80941Smrg fs = &dummy_variant; 920b8e80941Smrg } 921b8e80941Smrg 922b8e80941Smrg tu6_emit_vs_config(cs, vs); 923b8e80941Smrg tu6_emit_hs_config(cs, hs); 924b8e80941Smrg tu6_emit_ds_config(cs, ds); 925b8e80941Smrg tu6_emit_gs_config(cs, gs); 926b8e80941Smrg tu6_emit_fs_config(cs, fs); 927b8e80941Smrg 928b8e80941Smrg tu6_emit_vs_system_values(cs, vs); 929b8e80941Smrg tu6_emit_vpc(cs, vs, fs, binning_pass); 930b8e80941Smrg tu6_emit_vpc_varying_modes(cs, fs, binning_pass); 931b8e80941Smrg tu6_emit_fs_system_values(cs, fs); 932b8e80941Smrg tu6_emit_fs_inputs(cs, fs); 933b8e80941Smrg tu6_emit_fs_outputs(cs, fs, builder->color_attachment_count); 934b8e80941Smrg 935b8e80941Smrg tu6_emit_shader_object(cs, MESA_SHADER_VERTEX, vs, binary_bo, 936b8e80941Smrg builder->shader_offsets[MESA_SHADER_VERTEX]); 937b8e80941Smrg 938b8e80941Smrg tu6_emit_shader_object(cs, MESA_SHADER_FRAGMENT, fs, binary_bo, 939b8e80941Smrg builder->shader_offsets[MESA_SHADER_FRAGMENT]); 940b8e80941Smrg} 941b8e80941Smrg 942b8e80941Smrgstatic void 943b8e80941Smrgtu6_emit_vertex_input(struct tu_cs *cs, 944b8e80941Smrg const struct ir3_shader_variant *vs, 945b8e80941Smrg const VkPipelineVertexInputStateCreateInfo *vi_info, 946b8e80941Smrg uint8_t bindings[MAX_VERTEX_ATTRIBS], 947b8e80941Smrg uint16_t strides[MAX_VERTEX_ATTRIBS], 948b8e80941Smrg uint16_t offsets[MAX_VERTEX_ATTRIBS], 949b8e80941Smrg uint32_t *count) 950b8e80941Smrg{ 951b8e80941Smrg uint32_t vfd_decode_idx = 0; 952b8e80941Smrg 953b8e80941Smrg /* why do we go beyond inputs_count? */ 954b8e80941Smrg assert(vs->inputs_count + 1 <= MAX_VERTEX_ATTRIBS); 955b8e80941Smrg for (uint32_t i = 0; i <= vs->inputs_count; i++) { 956b8e80941Smrg if (vs->inputs[i].sysval || !vs->inputs[i].compmask) 957b8e80941Smrg continue; 958b8e80941Smrg 959b8e80941Smrg const VkVertexInputAttributeDescription *vi_attr = 960b8e80941Smrg tu_find_vertex_input_attribute(vi_info, vs->inputs[i].slot); 961b8e80941Smrg const VkVertexInputBindingDescription *vi_binding = 962b8e80941Smrg tu_find_vertex_input_binding(vi_info, vi_attr); 963b8e80941Smrg assert(vi_attr && vi_binding); 964b8e80941Smrg 965b8e80941Smrg const struct tu_native_format *format = 966b8e80941Smrg tu6_get_native_format(vi_attr->format); 967b8e80941Smrg assert(format && format->vtx >= 0); 968b8e80941Smrg 969b8e80941Smrg uint32_t vfd_decode = A6XX_VFD_DECODE_INSTR_IDX(vfd_decode_idx) | 970b8e80941Smrg A6XX_VFD_DECODE_INSTR_FORMAT(format->vtx) | 971b8e80941Smrg A6XX_VFD_DECODE_INSTR_SWAP(format->swap) | 972b8e80941Smrg A6XX_VFD_DECODE_INSTR_UNK30; 973b8e80941Smrg if (vi_binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) 974b8e80941Smrg vfd_decode |= A6XX_VFD_DECODE_INSTR_INSTANCED; 975b8e80941Smrg if (!vk_format_is_int(vi_attr->format)) 976b8e80941Smrg vfd_decode |= A6XX_VFD_DECODE_INSTR_FLOAT; 977b8e80941Smrg 978b8e80941Smrg const uint32_t vfd_decode_step_rate = 1; 979b8e80941Smrg 980b8e80941Smrg const uint32_t vfd_dest_cntl = 981b8e80941Smrg A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vs->inputs[i].compmask) | 982b8e80941Smrg A6XX_VFD_DEST_CNTL_INSTR_REGID(vs->inputs[i].regid); 983b8e80941Smrg 984b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DECODE(vfd_decode_idx), 2); 985b8e80941Smrg tu_cs_emit(cs, vfd_decode); 986b8e80941Smrg tu_cs_emit(cs, vfd_decode_step_rate); 987b8e80941Smrg 988b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DEST_CNTL(vfd_decode_idx), 1); 989b8e80941Smrg tu_cs_emit(cs, vfd_dest_cntl); 990b8e80941Smrg 991b8e80941Smrg bindings[vfd_decode_idx] = vi_binding->binding; 992b8e80941Smrg strides[vfd_decode_idx] = vi_binding->stride; 993b8e80941Smrg offsets[vfd_decode_idx] = vi_attr->offset; 994b8e80941Smrg 995b8e80941Smrg vfd_decode_idx++; 996b8e80941Smrg } 997b8e80941Smrg 998b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_0, 1); 999b8e80941Smrg tu_cs_emit( 1000b8e80941Smrg cs, A6XX_VFD_CONTROL_0_VTXCNT(vfd_decode_idx) | (vfd_decode_idx << 8)); 1001b8e80941Smrg 1002b8e80941Smrg *count = vfd_decode_idx; 1003b8e80941Smrg} 1004b8e80941Smrg 1005b8e80941Smrgstatic uint32_t 1006b8e80941Smrgtu6_guardband_adj(uint32_t v) 1007b8e80941Smrg{ 1008b8e80941Smrg if (v > 256) 1009b8e80941Smrg return (uint32_t)(511.0 - 65.0 * (log2(v) - 8.0)); 1010b8e80941Smrg else 1011b8e80941Smrg return 511; 1012b8e80941Smrg} 1013b8e80941Smrg 1014b8e80941Smrgvoid 1015b8e80941Smrgtu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport) 1016b8e80941Smrg{ 1017b8e80941Smrg float offsets[3]; 1018b8e80941Smrg float scales[3]; 1019b8e80941Smrg scales[0] = viewport->width / 2.0f; 1020b8e80941Smrg scales[1] = viewport->height / 2.0f; 1021b8e80941Smrg scales[2] = viewport->maxDepth - viewport->minDepth; 1022b8e80941Smrg offsets[0] = viewport->x + scales[0]; 1023b8e80941Smrg offsets[1] = viewport->y + scales[1]; 1024b8e80941Smrg offsets[2] = viewport->minDepth; 1025b8e80941Smrg 1026b8e80941Smrg VkOffset2D min; 1027b8e80941Smrg VkOffset2D max; 1028b8e80941Smrg min.x = (int32_t) viewport->x; 1029b8e80941Smrg max.x = (int32_t) ceilf(viewport->x + viewport->width); 1030b8e80941Smrg if (viewport->height >= 0.0f) { 1031b8e80941Smrg min.y = (int32_t) viewport->y; 1032b8e80941Smrg max.y = (int32_t) ceilf(viewport->y + viewport->height); 1033b8e80941Smrg } else { 1034b8e80941Smrg min.y = (int32_t)(viewport->y + viewport->height); 1035b8e80941Smrg max.y = (int32_t) ceilf(viewport->y); 1036b8e80941Smrg } 1037b8e80941Smrg /* the spec allows viewport->height to be 0.0f */ 1038b8e80941Smrg if (min.y == max.y) 1039b8e80941Smrg max.y++; 1040b8e80941Smrg assert(min.x >= 0 && min.x < max.x); 1041b8e80941Smrg assert(min.y >= 0 && min.y < max.y); 1042b8e80941Smrg 1043b8e80941Smrg VkExtent2D guardband_adj; 1044b8e80941Smrg guardband_adj.width = tu6_guardband_adj(max.x - min.x); 1045b8e80941Smrg guardband_adj.height = tu6_guardband_adj(max.y - min.y); 1046b8e80941Smrg 1047b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET_0, 6); 1048b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XOFFSET_0(offsets[0])); 1049b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XSCALE_0(scales[0])); 1050b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YOFFSET_0(offsets[1])); 1051b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YSCALE_0(scales[1])); 1052b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZOFFSET_0(offsets[2])); 1053b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZSCALE_0(scales[2])); 1054b8e80941Smrg 1055b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2); 1056b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(min.x) | 1057b8e80941Smrg A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(min.y)); 1058b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(max.x - 1) | 1059b8e80941Smrg A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(max.y - 1)); 1060b8e80941Smrg 1061b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1); 1062b8e80941Smrg tu_cs_emit(cs, 1063b8e80941Smrg A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband_adj.width) | 1064b8e80941Smrg A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband_adj.height)); 1065b8e80941Smrg} 1066b8e80941Smrg 1067b8e80941Smrgvoid 1068b8e80941Smrgtu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor) 1069b8e80941Smrg{ 1070b8e80941Smrg const VkOffset2D min = scissor->offset; 1071b8e80941Smrg const VkOffset2D max = { 1072b8e80941Smrg scissor->offset.x + scissor->extent.width, 1073b8e80941Smrg scissor->offset.y + scissor->extent.height, 1074b8e80941Smrg }; 1075b8e80941Smrg 1076b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2); 1077b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(min.x) | 1078b8e80941Smrg A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(min.y)); 1079b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(max.x - 1) | 1080b8e80941Smrg A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(max.y - 1)); 1081b8e80941Smrg} 1082b8e80941Smrg 1083b8e80941Smrgstatic void 1084b8e80941Smrgtu6_emit_gras_unknowns(struct tu_cs *cs) 1085b8e80941Smrg{ 1086b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8000, 1); 1087b8e80941Smrg tu_cs_emit(cs, 0x80); 1088b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8001, 1); 1089b8e80941Smrg tu_cs_emit(cs, 0x0); 1090b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8004, 1); 1091b8e80941Smrg tu_cs_emit(cs, 0x0); 1092b8e80941Smrg} 1093b8e80941Smrg 1094b8e80941Smrgstatic void 1095b8e80941Smrgtu6_emit_point_size(struct tu_cs *cs) 1096b8e80941Smrg{ 1097b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POINT_MINMAX, 2); 1098b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_SU_POINT_MINMAX_MIN(1.0f / 16.0f) | 1099b8e80941Smrg A6XX_GRAS_SU_POINT_MINMAX_MAX(4092.0f)); 1100b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_SU_POINT_SIZE(1.0f)); 1101b8e80941Smrg} 1102b8e80941Smrg 1103b8e80941Smrgstatic uint32_t 1104b8e80941Smrgtu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info, 1105b8e80941Smrg VkSampleCountFlagBits samples) 1106b8e80941Smrg{ 1107b8e80941Smrg uint32_t gras_su_cntl = 0; 1108b8e80941Smrg 1109b8e80941Smrg if (rast_info->cullMode & VK_CULL_MODE_FRONT_BIT) 1110b8e80941Smrg gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT; 1111b8e80941Smrg if (rast_info->cullMode & VK_CULL_MODE_BACK_BIT) 1112b8e80941Smrg gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK; 1113b8e80941Smrg 1114b8e80941Smrg if (rast_info->frontFace == VK_FRONT_FACE_CLOCKWISE) 1115b8e80941Smrg gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW; 1116b8e80941Smrg 1117b8e80941Smrg /* don't set A6XX_GRAS_SU_CNTL_LINEHALFWIDTH */ 1118b8e80941Smrg 1119b8e80941Smrg if (rast_info->depthBiasEnable) 1120b8e80941Smrg gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET; 1121b8e80941Smrg 1122b8e80941Smrg if (samples > VK_SAMPLE_COUNT_1_BIT) 1123b8e80941Smrg gras_su_cntl |= A6XX_GRAS_SU_CNTL_MSAA_ENABLE; 1124b8e80941Smrg 1125b8e80941Smrg return gras_su_cntl; 1126b8e80941Smrg} 1127b8e80941Smrg 1128b8e80941Smrgvoid 1129b8e80941Smrgtu6_emit_gras_su_cntl(struct tu_cs *cs, 1130b8e80941Smrg uint32_t gras_su_cntl, 1131b8e80941Smrg float line_width) 1132b8e80941Smrg{ 1133b8e80941Smrg assert((gras_su_cntl & A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK) == 0); 1134b8e80941Smrg gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(line_width / 2.0f); 1135b8e80941Smrg 1136b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_CNTL, 1); 1137b8e80941Smrg tu_cs_emit(cs, gras_su_cntl); 1138b8e80941Smrg} 1139b8e80941Smrg 1140b8e80941Smrgvoid 1141b8e80941Smrgtu6_emit_depth_bias(struct tu_cs *cs, 1142b8e80941Smrg float constant_factor, 1143b8e80941Smrg float clamp, 1144b8e80941Smrg float slope_factor) 1145b8e80941Smrg{ 1146b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE, 3); 1147b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor)); 1148b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor)); 1149b8e80941Smrg tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp)); 1150b8e80941Smrg} 1151b8e80941Smrg 1152b8e80941Smrgstatic void 1153b8e80941Smrgtu6_emit_alpha_control_disable(struct tu_cs *cs) 1154b8e80941Smrg{ 1155b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_RB_ALPHA_CONTROL, 1); 1156b8e80941Smrg tu_cs_emit(cs, 0); 1157b8e80941Smrg} 1158b8e80941Smrg 1159b8e80941Smrgstatic void 1160b8e80941Smrgtu6_emit_depth_control(struct tu_cs *cs, 1161b8e80941Smrg const VkPipelineDepthStencilStateCreateInfo *ds_info) 1162b8e80941Smrg{ 1163b8e80941Smrg assert(!ds_info->depthBoundsTestEnable); 1164b8e80941Smrg 1165b8e80941Smrg uint32_t rb_depth_cntl = 0; 1166b8e80941Smrg if (ds_info->depthTestEnable) { 1167b8e80941Smrg rb_depth_cntl |= 1168b8e80941Smrg A6XX_RB_DEPTH_CNTL_Z_ENABLE | 1169b8e80941Smrg A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) | 1170b8e80941Smrg A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; 1171b8e80941Smrg 1172b8e80941Smrg if (ds_info->depthWriteEnable) 1173b8e80941Smrg rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; 1174b8e80941Smrg } 1175b8e80941Smrg 1176b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_CNTL, 1); 1177b8e80941Smrg tu_cs_emit(cs, rb_depth_cntl); 1178b8e80941Smrg} 1179b8e80941Smrg 1180b8e80941Smrgstatic void 1181b8e80941Smrgtu6_emit_stencil_control(struct tu_cs *cs, 1182b8e80941Smrg const VkPipelineDepthStencilStateCreateInfo *ds_info) 1183b8e80941Smrg{ 1184b8e80941Smrg uint32_t rb_stencil_control = 0; 1185b8e80941Smrg if (ds_info->stencilTestEnable) { 1186b8e80941Smrg const VkStencilOpState *front = &ds_info->front; 1187b8e80941Smrg const VkStencilOpState *back = &ds_info->back; 1188b8e80941Smrg rb_stencil_control |= 1189b8e80941Smrg A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | 1190b8e80941Smrg A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | 1191b8e80941Smrg A6XX_RB_STENCIL_CONTROL_STENCIL_READ | 1192b8e80941Smrg A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front->compareOp)) | 1193b8e80941Smrg A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front->failOp)) | 1194b8e80941Smrg A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front->passOp)) | 1195b8e80941Smrg A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front->depthFailOp)) | 1196b8e80941Smrg A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back->compareOp)) | 1197b8e80941Smrg A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back->failOp)) | 1198b8e80941Smrg A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back->passOp)) | 1199b8e80941Smrg A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back->depthFailOp)); 1200b8e80941Smrg } 1201b8e80941Smrg 1202b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_CONTROL, 1); 1203b8e80941Smrg tu_cs_emit(cs, rb_stencil_control); 1204b8e80941Smrg} 1205b8e80941Smrg 1206b8e80941Smrgvoid 1207b8e80941Smrgtu6_emit_stencil_compare_mask(struct tu_cs *cs, uint32_t front, uint32_t back) 1208b8e80941Smrg{ 1209b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILMASK, 1); 1210b8e80941Smrg tu_cs_emit( 1211b8e80941Smrg cs, A6XX_RB_STENCILMASK_MASK(front) | A6XX_RB_STENCILMASK_BFMASK(back)); 1212b8e80941Smrg} 1213b8e80941Smrg 1214b8e80941Smrgvoid 1215b8e80941Smrgtu6_emit_stencil_write_mask(struct tu_cs *cs, uint32_t front, uint32_t back) 1216b8e80941Smrg{ 1217b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILWRMASK, 1); 1218b8e80941Smrg tu_cs_emit(cs, A6XX_RB_STENCILWRMASK_WRMASK(front) | 1219b8e80941Smrg A6XX_RB_STENCILWRMASK_BFWRMASK(back)); 1220b8e80941Smrg} 1221b8e80941Smrg 1222b8e80941Smrgvoid 1223b8e80941Smrgtu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back) 1224b8e80941Smrg{ 1225b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILREF, 1); 1226b8e80941Smrg tu_cs_emit(cs, 1227b8e80941Smrg A6XX_RB_STENCILREF_REF(front) | A6XX_RB_STENCILREF_BFREF(back)); 1228b8e80941Smrg} 1229b8e80941Smrg 1230b8e80941Smrgstatic uint32_t 1231b8e80941Smrgtu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState *att, 1232b8e80941Smrg bool has_alpha) 1233b8e80941Smrg{ 1234b8e80941Smrg const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(att->colorBlendOp); 1235b8e80941Smrg const enum adreno_rb_blend_factor src_color_factor = tu6_blend_factor( 1236b8e80941Smrg has_alpha ? att->srcColorBlendFactor 1237b8e80941Smrg : tu_blend_factor_no_dst_alpha(att->srcColorBlendFactor)); 1238b8e80941Smrg const enum adreno_rb_blend_factor dst_color_factor = tu6_blend_factor( 1239b8e80941Smrg has_alpha ? att->dstColorBlendFactor 1240b8e80941Smrg : tu_blend_factor_no_dst_alpha(att->dstColorBlendFactor)); 1241b8e80941Smrg const enum a3xx_rb_blend_opcode alpha_op = tu6_blend_op(att->alphaBlendOp); 1242b8e80941Smrg const enum adreno_rb_blend_factor src_alpha_factor = 1243b8e80941Smrg tu6_blend_factor(att->srcAlphaBlendFactor); 1244b8e80941Smrg const enum adreno_rb_blend_factor dst_alpha_factor = 1245b8e80941Smrg tu6_blend_factor(att->dstAlphaBlendFactor); 1246b8e80941Smrg 1247b8e80941Smrg return A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(src_color_factor) | 1248b8e80941Smrg A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(color_op) | 1249b8e80941Smrg A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(dst_color_factor) | 1250b8e80941Smrg A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(src_alpha_factor) | 1251b8e80941Smrg A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(alpha_op) | 1252b8e80941Smrg A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(dst_alpha_factor); 1253b8e80941Smrg} 1254b8e80941Smrg 1255b8e80941Smrgstatic uint32_t 1256b8e80941Smrgtu6_rb_mrt_control(const VkPipelineColorBlendAttachmentState *att, 1257b8e80941Smrg uint32_t rb_mrt_control_rop, 1258b8e80941Smrg bool is_int, 1259b8e80941Smrg bool has_alpha) 1260b8e80941Smrg{ 1261b8e80941Smrg uint32_t rb_mrt_control = 1262b8e80941Smrg A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(att->colorWriteMask); 1263b8e80941Smrg 1264b8e80941Smrg /* ignore blending and logic op for integer attachments */ 1265b8e80941Smrg if (is_int) { 1266b8e80941Smrg rb_mrt_control |= A6XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); 1267b8e80941Smrg return rb_mrt_control; 1268b8e80941Smrg } 1269b8e80941Smrg 1270b8e80941Smrg rb_mrt_control |= rb_mrt_control_rop; 1271b8e80941Smrg 1272b8e80941Smrg if (att->blendEnable) { 1273b8e80941Smrg rb_mrt_control |= A6XX_RB_MRT_CONTROL_BLEND; 1274b8e80941Smrg 1275b8e80941Smrg if (has_alpha) 1276b8e80941Smrg rb_mrt_control |= A6XX_RB_MRT_CONTROL_BLEND2; 1277b8e80941Smrg } 1278b8e80941Smrg 1279b8e80941Smrg return rb_mrt_control; 1280b8e80941Smrg} 1281b8e80941Smrg 1282b8e80941Smrgstatic void 1283b8e80941Smrgtu6_emit_rb_mrt_controls(struct tu_cs *cs, 1284b8e80941Smrg const VkPipelineColorBlendStateCreateInfo *blend_info, 1285b8e80941Smrg const VkFormat attachment_formats[MAX_RTS], 1286b8e80941Smrg uint32_t *blend_enable_mask) 1287b8e80941Smrg{ 1288b8e80941Smrg *blend_enable_mask = 0; 1289b8e80941Smrg 1290b8e80941Smrg bool rop_reads_dst = false; 1291b8e80941Smrg uint32_t rb_mrt_control_rop = 0; 1292b8e80941Smrg if (blend_info->logicOpEnable) { 1293b8e80941Smrg rop_reads_dst = tu_logic_op_reads_dst(blend_info->logicOp); 1294b8e80941Smrg rb_mrt_control_rop = 1295b8e80941Smrg A6XX_RB_MRT_CONTROL_ROP_ENABLE | 1296b8e80941Smrg A6XX_RB_MRT_CONTROL_ROP_CODE(tu6_rop(blend_info->logicOp)); 1297b8e80941Smrg } 1298b8e80941Smrg 1299b8e80941Smrg for (uint32_t i = 0; i < blend_info->attachmentCount; i++) { 1300b8e80941Smrg const VkPipelineColorBlendAttachmentState *att = 1301b8e80941Smrg &blend_info->pAttachments[i]; 1302b8e80941Smrg const VkFormat format = attachment_formats[i]; 1303b8e80941Smrg 1304b8e80941Smrg uint32_t rb_mrt_control = 0; 1305b8e80941Smrg uint32_t rb_mrt_blend_control = 0; 1306b8e80941Smrg if (format != VK_FORMAT_UNDEFINED) { 1307b8e80941Smrg const bool is_int = vk_format_is_int(format); 1308b8e80941Smrg const bool has_alpha = vk_format_has_alpha(format); 1309b8e80941Smrg 1310b8e80941Smrg rb_mrt_control = 1311b8e80941Smrg tu6_rb_mrt_control(att, rb_mrt_control_rop, is_int, has_alpha); 1312b8e80941Smrg rb_mrt_blend_control = tu6_rb_mrt_blend_control(att, has_alpha); 1313b8e80941Smrg 1314b8e80941Smrg if (att->blendEnable || rop_reads_dst) 1315b8e80941Smrg *blend_enable_mask |= 1 << i; 1316b8e80941Smrg } 1317b8e80941Smrg 1318b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_CONTROL(i), 2); 1319b8e80941Smrg tu_cs_emit(cs, rb_mrt_control); 1320b8e80941Smrg tu_cs_emit(cs, rb_mrt_blend_control); 1321b8e80941Smrg } 1322b8e80941Smrg 1323b8e80941Smrg for (uint32_t i = blend_info->attachmentCount; i < MAX_RTS; i++) { 1324b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_CONTROL(i), 2); 1325b8e80941Smrg tu_cs_emit(cs, 0); 1326b8e80941Smrg tu_cs_emit(cs, 0); 1327b8e80941Smrg } 1328b8e80941Smrg} 1329b8e80941Smrg 1330b8e80941Smrgstatic void 1331b8e80941Smrgtu6_emit_blend_control(struct tu_cs *cs, 1332b8e80941Smrg uint32_t blend_enable_mask, 1333b8e80941Smrg const VkPipelineMultisampleStateCreateInfo *msaa_info) 1334b8e80941Smrg{ 1335b8e80941Smrg assert(!msaa_info->sampleShadingEnable); 1336b8e80941Smrg assert(!msaa_info->alphaToOneEnable); 1337b8e80941Smrg 1338b8e80941Smrg uint32_t sp_blend_cntl = A6XX_SP_BLEND_CNTL_UNK8; 1339b8e80941Smrg if (blend_enable_mask) 1340b8e80941Smrg sp_blend_cntl |= A6XX_SP_BLEND_CNTL_ENABLED; 1341b8e80941Smrg if (msaa_info->alphaToCoverageEnable) 1342b8e80941Smrg sp_blend_cntl |= A6XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE; 1343b8e80941Smrg 1344b8e80941Smrg const uint32_t sample_mask = 1345b8e80941Smrg msaa_info->pSampleMask ? *msaa_info->pSampleMask 1346b8e80941Smrg : ((1 << msaa_info->rasterizationSamples) - 1); 1347b8e80941Smrg 1348b8e80941Smrg /* set A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND only when enabled? */ 1349b8e80941Smrg uint32_t rb_blend_cntl = 1350b8e80941Smrg A6XX_RB_BLEND_CNTL_ENABLE_BLEND(blend_enable_mask) | 1351b8e80941Smrg A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND | 1352b8e80941Smrg A6XX_RB_BLEND_CNTL_SAMPLE_MASK(sample_mask); 1353b8e80941Smrg if (msaa_info->alphaToCoverageEnable) 1354b8e80941Smrg rb_blend_cntl |= A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE; 1355b8e80941Smrg 1356b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_SP_BLEND_CNTL, 1); 1357b8e80941Smrg tu_cs_emit(cs, sp_blend_cntl); 1358b8e80941Smrg 1359b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLEND_CNTL, 1); 1360b8e80941Smrg tu_cs_emit(cs, rb_blend_cntl); 1361b8e80941Smrg} 1362b8e80941Smrg 1363b8e80941Smrgvoid 1364b8e80941Smrgtu6_emit_blend_constants(struct tu_cs *cs, const float constants[4]) 1365b8e80941Smrg{ 1366b8e80941Smrg tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLEND_RED_F32, 4); 1367b8e80941Smrg tu_cs_emit_array(cs, (const uint32_t *) constants, 4); 1368b8e80941Smrg} 1369b8e80941Smrg 1370b8e80941Smrgstatic VkResult 1371b8e80941Smrgtu_pipeline_builder_create_pipeline(struct tu_pipeline_builder *builder, 1372b8e80941Smrg struct tu_pipeline **out_pipeline) 1373b8e80941Smrg{ 1374b8e80941Smrg struct tu_device *dev = builder->device; 1375b8e80941Smrg 1376b8e80941Smrg struct tu_pipeline *pipeline = 1377b8e80941Smrg vk_zalloc2(&dev->alloc, builder->alloc, sizeof(*pipeline), 8, 1378b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1379b8e80941Smrg if (!pipeline) 1380b8e80941Smrg return VK_ERROR_OUT_OF_HOST_MEMORY; 1381b8e80941Smrg 1382b8e80941Smrg tu_cs_init(&pipeline->cs, TU_CS_MODE_SUB_STREAM, 2048); 1383b8e80941Smrg 1384b8e80941Smrg /* reserve the space now such that tu_cs_begin_sub_stream never fails */ 1385b8e80941Smrg VkResult result = tu_cs_reserve_space(dev, &pipeline->cs, 2048); 1386b8e80941Smrg if (result != VK_SUCCESS) { 1387b8e80941Smrg vk_free2(&dev->alloc, builder->alloc, pipeline); 1388b8e80941Smrg return result; 1389b8e80941Smrg } 1390b8e80941Smrg 1391b8e80941Smrg *out_pipeline = pipeline; 1392b8e80941Smrg 1393b8e80941Smrg return VK_SUCCESS; 1394b8e80941Smrg} 1395b8e80941Smrg 1396b8e80941Smrgstatic VkResult 1397b8e80941Smrgtu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder) 1398b8e80941Smrg{ 1399b8e80941Smrg const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = { 1400b8e80941Smrg NULL 1401b8e80941Smrg }; 1402b8e80941Smrg for (uint32_t i = 0; i < builder->create_info->stageCount; i++) { 1403b8e80941Smrg gl_shader_stage stage = 1404b8e80941Smrg tu_shader_stage(builder->create_info->pStages[i].stage); 1405b8e80941Smrg stage_infos[stage] = &builder->create_info->pStages[i]; 1406b8e80941Smrg } 1407b8e80941Smrg 1408b8e80941Smrg struct tu_shader_compile_options options; 1409b8e80941Smrg tu_shader_compile_options_init(&options, builder->create_info); 1410b8e80941Smrg 1411b8e80941Smrg /* compile shaders in reverse order */ 1412b8e80941Smrg struct tu_shader *next_stage_shader = NULL; 1413b8e80941Smrg for (gl_shader_stage stage = MESA_SHADER_STAGES - 1; 1414b8e80941Smrg stage > MESA_SHADER_NONE; stage--) { 1415b8e80941Smrg const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage]; 1416b8e80941Smrg if (!stage_info) 1417b8e80941Smrg continue; 1418b8e80941Smrg 1419b8e80941Smrg struct tu_shader *shader = 1420b8e80941Smrg tu_shader_create(builder->device, stage, stage_info, builder->alloc); 1421b8e80941Smrg if (!shader) 1422b8e80941Smrg return VK_ERROR_OUT_OF_HOST_MEMORY; 1423b8e80941Smrg 1424b8e80941Smrg VkResult result = 1425b8e80941Smrg tu_shader_compile(builder->device, shader, next_stage_shader, 1426b8e80941Smrg &options, builder->alloc); 1427b8e80941Smrg if (result != VK_SUCCESS) 1428b8e80941Smrg return result; 1429b8e80941Smrg 1430b8e80941Smrg builder->shaders[stage] = shader; 1431b8e80941Smrg builder->shader_offsets[stage] = builder->shader_total_size; 1432b8e80941Smrg builder->shader_total_size += 1433b8e80941Smrg sizeof(uint32_t) * shader->variants[0].info.sizedwords; 1434b8e80941Smrg 1435b8e80941Smrg next_stage_shader = shader; 1436b8e80941Smrg } 1437b8e80941Smrg 1438b8e80941Smrg if (builder->shaders[MESA_SHADER_VERTEX]->has_binning_pass) { 1439b8e80941Smrg const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX]; 1440b8e80941Smrg builder->binning_vs_offset = builder->shader_total_size; 1441b8e80941Smrg builder->shader_total_size += 1442b8e80941Smrg sizeof(uint32_t) * vs->variants[1].info.sizedwords; 1443b8e80941Smrg } 1444b8e80941Smrg 1445b8e80941Smrg return VK_SUCCESS; 1446b8e80941Smrg} 1447b8e80941Smrg 1448b8e80941Smrgstatic VkResult 1449b8e80941Smrgtu_pipeline_builder_upload_shaders(struct tu_pipeline_builder *builder, 1450b8e80941Smrg struct tu_pipeline *pipeline) 1451b8e80941Smrg{ 1452b8e80941Smrg struct tu_bo *bo = &pipeline->program.binary_bo; 1453b8e80941Smrg 1454b8e80941Smrg VkResult result = 1455b8e80941Smrg tu_bo_init_new(builder->device, bo, builder->shader_total_size); 1456b8e80941Smrg if (result != VK_SUCCESS) 1457b8e80941Smrg return result; 1458b8e80941Smrg 1459b8e80941Smrg result = tu_bo_map(builder->device, bo); 1460b8e80941Smrg if (result != VK_SUCCESS) 1461b8e80941Smrg return result; 1462b8e80941Smrg 1463b8e80941Smrg for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { 1464b8e80941Smrg const struct tu_shader *shader = builder->shaders[i]; 1465b8e80941Smrg if (!shader) 1466b8e80941Smrg continue; 1467b8e80941Smrg 1468b8e80941Smrg memcpy(bo->map + builder->shader_offsets[i], shader->binary, 1469b8e80941Smrg sizeof(uint32_t) * shader->variants[0].info.sizedwords); 1470b8e80941Smrg } 1471b8e80941Smrg 1472b8e80941Smrg if (builder->shaders[MESA_SHADER_VERTEX]->has_binning_pass) { 1473b8e80941Smrg const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX]; 1474b8e80941Smrg memcpy(bo->map + builder->binning_vs_offset, vs->binning_binary, 1475b8e80941Smrg sizeof(uint32_t) * vs->variants[1].info.sizedwords); 1476b8e80941Smrg } 1477b8e80941Smrg 1478b8e80941Smrg return VK_SUCCESS; 1479b8e80941Smrg} 1480b8e80941Smrg 1481b8e80941Smrgstatic void 1482b8e80941Smrgtu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder, 1483b8e80941Smrg struct tu_pipeline *pipeline) 1484b8e80941Smrg{ 1485b8e80941Smrg const VkPipelineDynamicStateCreateInfo *dynamic_info = 1486b8e80941Smrg builder->create_info->pDynamicState; 1487b8e80941Smrg 1488b8e80941Smrg if (!dynamic_info) 1489b8e80941Smrg return; 1490b8e80941Smrg 1491b8e80941Smrg for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) { 1492b8e80941Smrg pipeline->dynamic_state.mask |= 1493b8e80941Smrg tu_dynamic_state_bit(dynamic_info->pDynamicStates[i]); 1494b8e80941Smrg } 1495b8e80941Smrg} 1496b8e80941Smrg 1497b8e80941Smrgstatic void 1498b8e80941Smrgtu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, 1499b8e80941Smrg struct tu_pipeline *pipeline) 1500b8e80941Smrg{ 1501b8e80941Smrg struct tu_cs prog_cs; 1502b8e80941Smrg tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 512, &prog_cs); 1503b8e80941Smrg tu6_emit_program(&prog_cs, builder, &pipeline->program.binary_bo, false); 1504b8e80941Smrg pipeline->program.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &prog_cs); 1505b8e80941Smrg 1506b8e80941Smrg tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 512, &prog_cs); 1507b8e80941Smrg tu6_emit_program(&prog_cs, builder, &pipeline->program.binary_bo, true); 1508b8e80941Smrg pipeline->program.binning_state_ib = 1509b8e80941Smrg tu_cs_end_sub_stream(&pipeline->cs, &prog_cs); 1510b8e80941Smrg} 1511b8e80941Smrg 1512b8e80941Smrgstatic void 1513b8e80941Smrgtu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder, 1514b8e80941Smrg struct tu_pipeline *pipeline) 1515b8e80941Smrg{ 1516b8e80941Smrg const VkPipelineVertexInputStateCreateInfo *vi_info = 1517b8e80941Smrg builder->create_info->pVertexInputState; 1518b8e80941Smrg const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX]; 1519b8e80941Smrg 1520b8e80941Smrg struct tu_cs vi_cs; 1521b8e80941Smrg tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 1522b8e80941Smrg MAX_VERTEX_ATTRIBS * 5 + 2, &vi_cs); 1523b8e80941Smrg tu6_emit_vertex_input(&vi_cs, &vs->variants[0], vi_info, 1524b8e80941Smrg pipeline->vi.bindings, pipeline->vi.strides, 1525b8e80941Smrg pipeline->vi.offsets, &pipeline->vi.count); 1526b8e80941Smrg pipeline->vi.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vi_cs); 1527b8e80941Smrg 1528b8e80941Smrg if (vs->has_binning_pass) { 1529b8e80941Smrg tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 1530b8e80941Smrg MAX_VERTEX_ATTRIBS * 5 + 2, &vi_cs); 1531b8e80941Smrg tu6_emit_vertex_input( 1532b8e80941Smrg &vi_cs, &vs->variants[1], vi_info, pipeline->vi.binning_bindings, 1533b8e80941Smrg pipeline->vi.binning_strides, pipeline->vi.binning_offsets, 1534b8e80941Smrg &pipeline->vi.binning_count); 1535b8e80941Smrg pipeline->vi.binning_state_ib = 1536b8e80941Smrg tu_cs_end_sub_stream(&pipeline->cs, &vi_cs); 1537b8e80941Smrg } 1538b8e80941Smrg} 1539b8e80941Smrg 1540b8e80941Smrgstatic void 1541b8e80941Smrgtu_pipeline_builder_parse_input_assembly(struct tu_pipeline_builder *builder, 1542b8e80941Smrg struct tu_pipeline *pipeline) 1543b8e80941Smrg{ 1544b8e80941Smrg const VkPipelineInputAssemblyStateCreateInfo *ia_info = 1545b8e80941Smrg builder->create_info->pInputAssemblyState; 1546b8e80941Smrg 1547b8e80941Smrg pipeline->ia.primtype = tu6_primtype(ia_info->topology); 1548b8e80941Smrg pipeline->ia.primitive_restart = ia_info->primitiveRestartEnable; 1549b8e80941Smrg} 1550b8e80941Smrg 1551b8e80941Smrgstatic void 1552b8e80941Smrgtu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder, 1553b8e80941Smrg struct tu_pipeline *pipeline) 1554b8e80941Smrg{ 1555b8e80941Smrg /* The spec says: 1556b8e80941Smrg * 1557b8e80941Smrg * pViewportState is a pointer to an instance of the 1558b8e80941Smrg * VkPipelineViewportStateCreateInfo structure, and is ignored if the 1559b8e80941Smrg * pipeline has rasterization disabled." 1560b8e80941Smrg * 1561b8e80941Smrg * We leave the relevant registers stale in that case. 1562b8e80941Smrg */ 1563b8e80941Smrg if (builder->rasterizer_discard) 1564b8e80941Smrg return; 1565b8e80941Smrg 1566b8e80941Smrg const VkPipelineViewportStateCreateInfo *vp_info = 1567b8e80941Smrg builder->create_info->pViewportState; 1568b8e80941Smrg 1569b8e80941Smrg struct tu_cs vp_cs; 1570b8e80941Smrg tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 15, &vp_cs); 1571b8e80941Smrg 1572b8e80941Smrg if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_VIEWPORT)) { 1573b8e80941Smrg assert(vp_info->viewportCount == 1); 1574b8e80941Smrg tu6_emit_viewport(&vp_cs, vp_info->pViewports); 1575b8e80941Smrg } 1576b8e80941Smrg 1577b8e80941Smrg if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SCISSOR)) { 1578b8e80941Smrg assert(vp_info->scissorCount == 1); 1579b8e80941Smrg tu6_emit_scissor(&vp_cs, vp_info->pScissors); 1580b8e80941Smrg } 1581b8e80941Smrg 1582b8e80941Smrg pipeline->vp.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vp_cs); 1583b8e80941Smrg} 1584b8e80941Smrg 1585b8e80941Smrgstatic void 1586b8e80941Smrgtu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, 1587b8e80941Smrg struct tu_pipeline *pipeline) 1588b8e80941Smrg{ 1589b8e80941Smrg const VkPipelineRasterizationStateCreateInfo *rast_info = 1590b8e80941Smrg builder->create_info->pRasterizationState; 1591b8e80941Smrg 1592b8e80941Smrg assert(!rast_info->depthClampEnable); 1593b8e80941Smrg assert(rast_info->polygonMode == VK_POLYGON_MODE_FILL); 1594b8e80941Smrg 1595b8e80941Smrg struct tu_cs rast_cs; 1596b8e80941Smrg tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 20, &rast_cs); 1597b8e80941Smrg 1598b8e80941Smrg /* move to hw ctx init? */ 1599b8e80941Smrg tu6_emit_gras_unknowns(&rast_cs); 1600b8e80941Smrg tu6_emit_point_size(&rast_cs); 1601b8e80941Smrg 1602b8e80941Smrg const uint32_t gras_su_cntl = 1603b8e80941Smrg tu6_gras_su_cntl(rast_info, builder->samples); 1604b8e80941Smrg 1605b8e80941Smrg if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) 1606b8e80941Smrg tu6_emit_gras_su_cntl(&rast_cs, gras_su_cntl, rast_info->lineWidth); 1607b8e80941Smrg 1608b8e80941Smrg if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_DEPTH_BIAS)) { 1609b8e80941Smrg tu6_emit_depth_bias(&rast_cs, rast_info->depthBiasConstantFactor, 1610b8e80941Smrg rast_info->depthBiasClamp, 1611b8e80941Smrg rast_info->depthBiasSlopeFactor); 1612b8e80941Smrg } 1613b8e80941Smrg 1614b8e80941Smrg pipeline->rast.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &rast_cs); 1615b8e80941Smrg 1616b8e80941Smrg pipeline->rast.gras_su_cntl = gras_su_cntl; 1617b8e80941Smrg} 1618b8e80941Smrg 1619b8e80941Smrgstatic void 1620b8e80941Smrgtu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, 1621b8e80941Smrg struct tu_pipeline *pipeline) 1622b8e80941Smrg{ 1623b8e80941Smrg /* The spec says: 1624b8e80941Smrg * 1625b8e80941Smrg * pDepthStencilState is a pointer to an instance of the 1626b8e80941Smrg * VkPipelineDepthStencilStateCreateInfo structure, and is ignored if 1627b8e80941Smrg * the pipeline has rasterization disabled or if the subpass of the 1628b8e80941Smrg * render pass the pipeline is created against does not use a 1629b8e80941Smrg * depth/stencil attachment. 1630b8e80941Smrg * 1631b8e80941Smrg * We disable both depth and stenil tests in those cases. 1632b8e80941Smrg */ 1633b8e80941Smrg static const VkPipelineDepthStencilStateCreateInfo dummy_ds_info; 1634b8e80941Smrg const VkPipelineDepthStencilStateCreateInfo *ds_info = 1635b8e80941Smrg builder->use_depth_stencil_attachment 1636b8e80941Smrg ? builder->create_info->pDepthStencilState 1637b8e80941Smrg : &dummy_ds_info; 1638b8e80941Smrg 1639b8e80941Smrg struct tu_cs ds_cs; 1640b8e80941Smrg tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 12, &ds_cs); 1641b8e80941Smrg 1642b8e80941Smrg /* move to hw ctx init? */ 1643b8e80941Smrg tu6_emit_alpha_control_disable(&ds_cs); 1644b8e80941Smrg 1645b8e80941Smrg tu6_emit_depth_control(&ds_cs, ds_info); 1646b8e80941Smrg tu6_emit_stencil_control(&ds_cs, ds_info); 1647b8e80941Smrg 1648b8e80941Smrg if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) { 1649b8e80941Smrg tu6_emit_stencil_compare_mask(&ds_cs, ds_info->front.compareMask, 1650b8e80941Smrg ds_info->back.compareMask); 1651b8e80941Smrg } 1652b8e80941Smrg if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) { 1653b8e80941Smrg tu6_emit_stencil_write_mask(&ds_cs, ds_info->front.writeMask, 1654b8e80941Smrg ds_info->back.writeMask); 1655b8e80941Smrg } 1656b8e80941Smrg if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) { 1657b8e80941Smrg tu6_emit_stencil_reference(&ds_cs, ds_info->front.reference, 1658b8e80941Smrg ds_info->back.reference); 1659b8e80941Smrg } 1660b8e80941Smrg 1661b8e80941Smrg pipeline->ds.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &ds_cs); 1662b8e80941Smrg} 1663b8e80941Smrg 1664b8e80941Smrgstatic void 1665b8e80941Smrgtu_pipeline_builder_parse_multisample_and_color_blend( 1666b8e80941Smrg struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) 1667b8e80941Smrg{ 1668b8e80941Smrg /* The spec says: 1669b8e80941Smrg * 1670b8e80941Smrg * pMultisampleState is a pointer to an instance of the 1671b8e80941Smrg * VkPipelineMultisampleStateCreateInfo, and is ignored if the pipeline 1672b8e80941Smrg * has rasterization disabled. 1673b8e80941Smrg * 1674b8e80941Smrg * Also, 1675b8e80941Smrg * 1676b8e80941Smrg * pColorBlendState is a pointer to an instance of the 1677b8e80941Smrg * VkPipelineColorBlendStateCreateInfo structure, and is ignored if the 1678b8e80941Smrg * pipeline has rasterization disabled or if the subpass of the render 1679b8e80941Smrg * pass the pipeline is created against does not use any color 1680b8e80941Smrg * attachments. 1681b8e80941Smrg * 1682b8e80941Smrg * We leave the relevant registers stale when rasterization is disabled. 1683b8e80941Smrg */ 1684b8e80941Smrg if (builder->rasterizer_discard) 1685b8e80941Smrg return; 1686b8e80941Smrg 1687b8e80941Smrg static const VkPipelineColorBlendStateCreateInfo dummy_blend_info; 1688b8e80941Smrg const VkPipelineMultisampleStateCreateInfo *msaa_info = 1689b8e80941Smrg builder->create_info->pMultisampleState; 1690b8e80941Smrg const VkPipelineColorBlendStateCreateInfo *blend_info = 1691b8e80941Smrg builder->use_color_attachments ? builder->create_info->pColorBlendState 1692b8e80941Smrg : &dummy_blend_info; 1693b8e80941Smrg 1694b8e80941Smrg struct tu_cs blend_cs; 1695b8e80941Smrg tu_cs_begin_sub_stream(builder->device, &pipeline->cs, MAX_RTS * 3 + 9, 1696b8e80941Smrg &blend_cs); 1697b8e80941Smrg 1698b8e80941Smrg uint32_t blend_enable_mask; 1699b8e80941Smrg tu6_emit_rb_mrt_controls(&blend_cs, blend_info, 1700b8e80941Smrg builder->color_attachment_formats, 1701b8e80941Smrg &blend_enable_mask); 1702b8e80941Smrg 1703b8e80941Smrg if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_BLEND_CONSTANTS)) 1704b8e80941Smrg tu6_emit_blend_constants(&blend_cs, blend_info->blendConstants); 1705b8e80941Smrg 1706b8e80941Smrg tu6_emit_blend_control(&blend_cs, blend_enable_mask, msaa_info); 1707b8e80941Smrg 1708b8e80941Smrg pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &blend_cs); 1709b8e80941Smrg} 1710b8e80941Smrg 1711b8e80941Smrgstatic void 1712b8e80941Smrgtu_pipeline_finish(struct tu_pipeline *pipeline, 1713b8e80941Smrg struct tu_device *dev, 1714b8e80941Smrg const VkAllocationCallbacks *alloc) 1715b8e80941Smrg{ 1716b8e80941Smrg tu_cs_finish(dev, &pipeline->cs); 1717b8e80941Smrg 1718b8e80941Smrg if (pipeline->program.binary_bo.gem_handle) 1719b8e80941Smrg tu_bo_finish(dev, &pipeline->program.binary_bo); 1720b8e80941Smrg} 1721b8e80941Smrg 1722b8e80941Smrgstatic VkResult 1723b8e80941Smrgtu_pipeline_builder_build(struct tu_pipeline_builder *builder, 1724b8e80941Smrg struct tu_pipeline **pipeline) 1725b8e80941Smrg{ 1726b8e80941Smrg VkResult result = tu_pipeline_builder_create_pipeline(builder, pipeline); 1727b8e80941Smrg if (result != VK_SUCCESS) 1728b8e80941Smrg return result; 1729b8e80941Smrg 1730b8e80941Smrg /* compile and upload shaders */ 1731b8e80941Smrg result = tu_pipeline_builder_compile_shaders(builder); 1732b8e80941Smrg if (result == VK_SUCCESS) 1733b8e80941Smrg result = tu_pipeline_builder_upload_shaders(builder, *pipeline); 1734b8e80941Smrg if (result != VK_SUCCESS) { 1735b8e80941Smrg tu_pipeline_finish(*pipeline, builder->device, builder->alloc); 1736b8e80941Smrg vk_free2(&builder->device->alloc, builder->alloc, *pipeline); 1737b8e80941Smrg *pipeline = VK_NULL_HANDLE; 1738b8e80941Smrg 1739b8e80941Smrg return result; 1740b8e80941Smrg } 1741b8e80941Smrg 1742b8e80941Smrg tu_pipeline_builder_parse_dynamic(builder, *pipeline); 1743b8e80941Smrg tu_pipeline_builder_parse_shader_stages(builder, *pipeline); 1744b8e80941Smrg tu_pipeline_builder_parse_vertex_input(builder, *pipeline); 1745b8e80941Smrg tu_pipeline_builder_parse_input_assembly(builder, *pipeline); 1746b8e80941Smrg tu_pipeline_builder_parse_viewport(builder, *pipeline); 1747b8e80941Smrg tu_pipeline_builder_parse_rasterization(builder, *pipeline); 1748b8e80941Smrg tu_pipeline_builder_parse_depth_stencil(builder, *pipeline); 1749b8e80941Smrg tu_pipeline_builder_parse_multisample_and_color_blend(builder, *pipeline); 1750b8e80941Smrg 1751b8e80941Smrg /* we should have reserved enough space upfront such that the CS never 1752b8e80941Smrg * grows 1753b8e80941Smrg */ 1754b8e80941Smrg assert((*pipeline)->cs.bo_count == 1); 1755b8e80941Smrg 1756b8e80941Smrg return VK_SUCCESS; 1757b8e80941Smrg} 1758b8e80941Smrg 1759b8e80941Smrgstatic void 1760b8e80941Smrgtu_pipeline_builder_finish(struct tu_pipeline_builder *builder) 1761b8e80941Smrg{ 1762b8e80941Smrg for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { 1763b8e80941Smrg if (!builder->shaders[i]) 1764b8e80941Smrg continue; 1765b8e80941Smrg tu_shader_destroy(builder->device, builder->shaders[i], builder->alloc); 1766b8e80941Smrg } 1767b8e80941Smrg} 1768b8e80941Smrg 1769b8e80941Smrgstatic void 1770b8e80941Smrgtu_pipeline_builder_init_graphics( 1771b8e80941Smrg struct tu_pipeline_builder *builder, 1772b8e80941Smrg struct tu_device *dev, 1773b8e80941Smrg struct tu_pipeline_cache *cache, 1774b8e80941Smrg const VkGraphicsPipelineCreateInfo *create_info, 1775b8e80941Smrg const VkAllocationCallbacks *alloc) 1776b8e80941Smrg{ 1777b8e80941Smrg *builder = (struct tu_pipeline_builder) { 1778b8e80941Smrg .device = dev, 1779b8e80941Smrg .cache = cache, 1780b8e80941Smrg .create_info = create_info, 1781b8e80941Smrg .alloc = alloc, 1782b8e80941Smrg }; 1783b8e80941Smrg 1784b8e80941Smrg builder->rasterizer_discard = 1785b8e80941Smrg create_info->pRasterizationState->rasterizerDiscardEnable; 1786b8e80941Smrg 1787b8e80941Smrg if (builder->rasterizer_discard) { 1788b8e80941Smrg builder->samples = VK_SAMPLE_COUNT_1_BIT; 1789b8e80941Smrg } else { 1790b8e80941Smrg builder->samples = create_info->pMultisampleState->rasterizationSamples; 1791b8e80941Smrg 1792b8e80941Smrg const struct tu_render_pass *pass = 1793b8e80941Smrg tu_render_pass_from_handle(create_info->renderPass); 1794b8e80941Smrg const struct tu_subpass *subpass = 1795b8e80941Smrg &pass->subpasses[create_info->subpass]; 1796b8e80941Smrg 1797b8e80941Smrg builder->use_depth_stencil_attachment = 1798b8e80941Smrg subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED; 1799b8e80941Smrg 1800b8e80941Smrg assert(subpass->color_count == 1801b8e80941Smrg create_info->pColorBlendState->attachmentCount); 1802b8e80941Smrg builder->color_attachment_count = subpass->color_count; 1803b8e80941Smrg for (uint32_t i = 0; i < subpass->color_count; i++) { 1804b8e80941Smrg const uint32_t a = subpass->color_attachments[i].attachment; 1805b8e80941Smrg if (a == VK_ATTACHMENT_UNUSED) 1806b8e80941Smrg continue; 1807b8e80941Smrg 1808b8e80941Smrg builder->color_attachment_formats[i] = pass->attachments[a].format; 1809b8e80941Smrg builder->use_color_attachments = true; 1810b8e80941Smrg } 1811b8e80941Smrg } 1812b8e80941Smrg} 1813b8e80941Smrg 1814b8e80941SmrgVkResult 1815b8e80941Smrgtu_CreateGraphicsPipelines(VkDevice device, 1816b8e80941Smrg VkPipelineCache pipelineCache, 1817b8e80941Smrg uint32_t count, 1818b8e80941Smrg const VkGraphicsPipelineCreateInfo *pCreateInfos, 1819b8e80941Smrg const VkAllocationCallbacks *pAllocator, 1820b8e80941Smrg VkPipeline *pPipelines) 1821b8e80941Smrg{ 1822b8e80941Smrg TU_FROM_HANDLE(tu_device, dev, device); 1823b8e80941Smrg TU_FROM_HANDLE(tu_pipeline_cache, cache, pipelineCache); 1824b8e80941Smrg 1825b8e80941Smrg for (uint32_t i = 0; i < count; i++) { 1826b8e80941Smrg struct tu_pipeline_builder builder; 1827b8e80941Smrg tu_pipeline_builder_init_graphics(&builder, dev, cache, 1828b8e80941Smrg &pCreateInfos[i], pAllocator); 1829b8e80941Smrg 1830b8e80941Smrg struct tu_pipeline *pipeline; 1831b8e80941Smrg VkResult result = tu_pipeline_builder_build(&builder, &pipeline); 1832b8e80941Smrg tu_pipeline_builder_finish(&builder); 1833b8e80941Smrg 1834b8e80941Smrg if (result != VK_SUCCESS) { 1835b8e80941Smrg for (uint32_t j = 0; j < i; j++) { 1836b8e80941Smrg tu_DestroyPipeline(device, pPipelines[j], pAllocator); 1837b8e80941Smrg pPipelines[j] = VK_NULL_HANDLE; 1838b8e80941Smrg } 1839b8e80941Smrg 1840b8e80941Smrg return result; 1841b8e80941Smrg } 1842b8e80941Smrg 1843b8e80941Smrg pPipelines[i] = tu_pipeline_to_handle(pipeline); 1844b8e80941Smrg } 1845b8e80941Smrg 1846b8e80941Smrg return VK_SUCCESS; 1847b8e80941Smrg} 1848b8e80941Smrg 1849b8e80941Smrgstatic VkResult 1850b8e80941Smrgtu_compute_pipeline_create(VkDevice _device, 1851b8e80941Smrg VkPipelineCache _cache, 1852b8e80941Smrg const VkComputePipelineCreateInfo *pCreateInfo, 1853b8e80941Smrg const VkAllocationCallbacks *pAllocator, 1854b8e80941Smrg VkPipeline *pPipeline) 1855b8e80941Smrg{ 1856b8e80941Smrg return VK_SUCCESS; 1857b8e80941Smrg} 1858b8e80941Smrg 1859b8e80941SmrgVkResult 1860b8e80941Smrgtu_CreateComputePipelines(VkDevice _device, 1861b8e80941Smrg VkPipelineCache pipelineCache, 1862b8e80941Smrg uint32_t count, 1863b8e80941Smrg const VkComputePipelineCreateInfo *pCreateInfos, 1864b8e80941Smrg const VkAllocationCallbacks *pAllocator, 1865b8e80941Smrg VkPipeline *pPipelines) 1866b8e80941Smrg{ 1867b8e80941Smrg VkResult result = VK_SUCCESS; 1868b8e80941Smrg 1869b8e80941Smrg unsigned i = 0; 1870b8e80941Smrg for (; i < count; i++) { 1871b8e80941Smrg VkResult r; 1872b8e80941Smrg r = tu_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], 1873b8e80941Smrg pAllocator, &pPipelines[i]); 1874b8e80941Smrg if (r != VK_SUCCESS) { 1875b8e80941Smrg result = r; 1876b8e80941Smrg pPipelines[i] = VK_NULL_HANDLE; 1877b8e80941Smrg } 1878b8e80941Smrg } 1879b8e80941Smrg 1880b8e80941Smrg return result; 1881b8e80941Smrg} 1882b8e80941Smrg 1883b8e80941Smrgvoid 1884b8e80941Smrgtu_DestroyPipeline(VkDevice _device, 1885b8e80941Smrg VkPipeline _pipeline, 1886b8e80941Smrg const VkAllocationCallbacks *pAllocator) 1887b8e80941Smrg{ 1888b8e80941Smrg TU_FROM_HANDLE(tu_device, dev, _device); 1889b8e80941Smrg TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline); 1890b8e80941Smrg 1891b8e80941Smrg if (!_pipeline) 1892b8e80941Smrg return; 1893b8e80941Smrg 1894b8e80941Smrg tu_pipeline_finish(pipeline, dev, pAllocator); 1895b8e80941Smrg vk_free2(&dev->alloc, pAllocator, pipeline); 1896b8e80941Smrg} 1897