1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2015 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "anv_private.h" 25b8e80941Smrg 26b8e80941Smrg#include "genxml/gen_macros.h" 27b8e80941Smrg#include "genxml/genX_pack.h" 28b8e80941Smrg 29b8e80941Smrg#include "common/gen_l3_config.h" 30b8e80941Smrg#include "common/gen_sample_positions.h" 31b8e80941Smrg#include "nir/nir_xfb_info.h" 32b8e80941Smrg#include "vk_util.h" 33b8e80941Smrg#include "vk_format_info.h" 34b8e80941Smrg 35b8e80941Smrgstatic uint32_t 36b8e80941Smrgvertex_element_comp_control(enum isl_format format, unsigned comp) 37b8e80941Smrg{ 38b8e80941Smrg uint8_t bits; 39b8e80941Smrg switch (comp) { 40b8e80941Smrg case 0: bits = isl_format_layouts[format].channels.r.bits; break; 41b8e80941Smrg case 1: bits = isl_format_layouts[format].channels.g.bits; break; 42b8e80941Smrg case 2: bits = isl_format_layouts[format].channels.b.bits; break; 43b8e80941Smrg case 3: bits = isl_format_layouts[format].channels.a.bits; break; 44b8e80941Smrg default: unreachable("Invalid component"); 45b8e80941Smrg } 46b8e80941Smrg 47b8e80941Smrg /* 48b8e80941Smrg * Take in account hardware restrictions when dealing with 64-bit floats. 49b8e80941Smrg * 50b8e80941Smrg * From Broadwell spec, command reference structures, page 586: 51b8e80941Smrg * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats, 52b8e80941Smrg * 64-bit components are stored * in the URB without any conversion. In 53b8e80941Smrg * this case, vertex elements must be written as 128 or 256 bits, with 54b8e80941Smrg * VFCOMP_STORE_0 being used to pad the output as required. E.g., if 55b8e80941Smrg * R64_PASSTHRU is used to copy a 64-bit Red component into the URB, 56b8e80941Smrg * Component 1 must be specified as VFCOMP_STORE_0 (with Components 2,3 57b8e80941Smrg * set to VFCOMP_NOSTORE) in order to output a 128-bit vertex element, or 58b8e80941Smrg * Components 1-3 must be specified as VFCOMP_STORE_0 in order to output 59b8e80941Smrg * a 256-bit vertex element. Likewise, use of R64G64B64_PASSTHRU requires 60b8e80941Smrg * Component 3 to be specified as VFCOMP_STORE_0 in order to output a 61b8e80941Smrg * 256-bit vertex element." 62b8e80941Smrg */ 63b8e80941Smrg if (bits) { 64b8e80941Smrg return VFCOMP_STORE_SRC; 65b8e80941Smrg } else if (comp >= 2 && 66b8e80941Smrg !isl_format_layouts[format].channels.b.bits && 67b8e80941Smrg isl_format_layouts[format].channels.r.type == ISL_RAW) { 68b8e80941Smrg /* When emitting 64-bit attributes, we need to write either 128 or 256 69b8e80941Smrg * bit chunks, using VFCOMP_NOSTORE when not writing the chunk, and 70b8e80941Smrg * VFCOMP_STORE_0 to pad the written chunk */ 71b8e80941Smrg return VFCOMP_NOSTORE; 72b8e80941Smrg } else if (comp < 3 || 73b8e80941Smrg isl_format_layouts[format].channels.r.type == ISL_RAW) { 74b8e80941Smrg /* Note we need to pad with value 0, not 1, due hardware restrictions 75b8e80941Smrg * (see comment above) */ 76b8e80941Smrg return VFCOMP_STORE_0; 77b8e80941Smrg } else if (isl_format_layouts[format].channels.r.type == ISL_UINT || 78b8e80941Smrg isl_format_layouts[format].channels.r.type == ISL_SINT) { 79b8e80941Smrg assert(comp == 3); 80b8e80941Smrg return VFCOMP_STORE_1_INT; 81b8e80941Smrg } else { 82b8e80941Smrg assert(comp == 3); 83b8e80941Smrg return VFCOMP_STORE_1_FP; 84b8e80941Smrg } 85b8e80941Smrg} 86b8e80941Smrg 87b8e80941Smrgstatic void 88b8e80941Smrgemit_vertex_input(struct anv_pipeline *pipeline, 89b8e80941Smrg const VkPipelineVertexInputStateCreateInfo *info) 90b8e80941Smrg{ 91b8e80941Smrg const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); 92b8e80941Smrg 93b8e80941Smrg /* Pull inputs_read out of the VS prog data */ 94b8e80941Smrg const uint64_t inputs_read = vs_prog_data->inputs_read; 95b8e80941Smrg const uint64_t double_inputs_read = 96b8e80941Smrg vs_prog_data->double_inputs_read & inputs_read; 97b8e80941Smrg assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); 98b8e80941Smrg const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0; 99b8e80941Smrg const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0; 100b8e80941Smrg const bool needs_svgs_elem = vs_prog_data->uses_vertexid || 101b8e80941Smrg vs_prog_data->uses_instanceid || 102b8e80941Smrg vs_prog_data->uses_firstvertex || 103b8e80941Smrg vs_prog_data->uses_baseinstance; 104b8e80941Smrg 105b8e80941Smrg uint32_t elem_count = __builtin_popcount(elements) - 106b8e80941Smrg __builtin_popcount(elements_double) / 2; 107b8e80941Smrg 108b8e80941Smrg const uint32_t total_elems = 109b8e80941Smrg MAX2(1, elem_count + needs_svgs_elem + vs_prog_data->uses_drawid); 110b8e80941Smrg 111b8e80941Smrg uint32_t *p; 112b8e80941Smrg 113b8e80941Smrg const uint32_t num_dwords = 1 + total_elems * 2; 114b8e80941Smrg p = anv_batch_emitn(&pipeline->batch, num_dwords, 115b8e80941Smrg GENX(3DSTATE_VERTEX_ELEMENTS)); 116b8e80941Smrg if (!p) 117b8e80941Smrg return; 118b8e80941Smrg 119b8e80941Smrg for (uint32_t i = 0; i < total_elems; i++) { 120b8e80941Smrg /* The SKL docs for VERTEX_ELEMENT_STATE say: 121b8e80941Smrg * 122b8e80941Smrg * "All elements must be valid from Element[0] to the last valid 123b8e80941Smrg * element. (I.e. if Element[2] is valid then Element[1] and 124b8e80941Smrg * Element[0] must also be valid)." 125b8e80941Smrg * 126b8e80941Smrg * The SKL docs for 3D_Vertex_Component_Control say: 127b8e80941Smrg * 128b8e80941Smrg * "Don't store this component. (Not valid for Component 0, but can 129b8e80941Smrg * be used for Component 1-3)." 130b8e80941Smrg * 131b8e80941Smrg * So we can't just leave a vertex element blank and hope for the best. 132b8e80941Smrg * We have to tell the VF hardware to put something in it; so we just 133b8e80941Smrg * store a bunch of zero. 134b8e80941Smrg * 135b8e80941Smrg * TODO: Compact vertex elements so we never end up with holes. 136b8e80941Smrg */ 137b8e80941Smrg struct GENX(VERTEX_ELEMENT_STATE) element = { 138b8e80941Smrg .Valid = true, 139b8e80941Smrg .Component0Control = VFCOMP_STORE_0, 140b8e80941Smrg .Component1Control = VFCOMP_STORE_0, 141b8e80941Smrg .Component2Control = VFCOMP_STORE_0, 142b8e80941Smrg .Component3Control = VFCOMP_STORE_0, 143b8e80941Smrg }; 144b8e80941Smrg GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + i * 2], &element); 145b8e80941Smrg } 146b8e80941Smrg 147b8e80941Smrg for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { 148b8e80941Smrg const VkVertexInputAttributeDescription *desc = 149b8e80941Smrg &info->pVertexAttributeDescriptions[i]; 150b8e80941Smrg enum isl_format format = anv_get_isl_format(&pipeline->device->info, 151b8e80941Smrg desc->format, 152b8e80941Smrg VK_IMAGE_ASPECT_COLOR_BIT, 153b8e80941Smrg VK_IMAGE_TILING_LINEAR); 154b8e80941Smrg 155b8e80941Smrg assert(desc->binding < MAX_VBS); 156b8e80941Smrg 157b8e80941Smrg if ((elements & (1 << desc->location)) == 0) 158b8e80941Smrg continue; /* Binding unused */ 159b8e80941Smrg 160b8e80941Smrg uint32_t slot = 161b8e80941Smrg __builtin_popcount(elements & ((1 << desc->location) - 1)) - 162b8e80941Smrg DIV_ROUND_UP(__builtin_popcount(elements_double & 163b8e80941Smrg ((1 << desc->location) -1)), 2); 164b8e80941Smrg 165b8e80941Smrg struct GENX(VERTEX_ELEMENT_STATE) element = { 166b8e80941Smrg .VertexBufferIndex = desc->binding, 167b8e80941Smrg .Valid = true, 168b8e80941Smrg .SourceElementFormat = format, 169b8e80941Smrg .EdgeFlagEnable = false, 170b8e80941Smrg .SourceElementOffset = desc->offset, 171b8e80941Smrg .Component0Control = vertex_element_comp_control(format, 0), 172b8e80941Smrg .Component1Control = vertex_element_comp_control(format, 1), 173b8e80941Smrg .Component2Control = vertex_element_comp_control(format, 2), 174b8e80941Smrg .Component3Control = vertex_element_comp_control(format, 3), 175b8e80941Smrg }; 176b8e80941Smrg GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); 177b8e80941Smrg 178b8e80941Smrg#if GEN_GEN >= 8 179b8e80941Smrg /* On Broadwell and later, we have a separate VF_INSTANCING packet 180b8e80941Smrg * that controls instancing. On Haswell and prior, that's part of 181b8e80941Smrg * VERTEX_BUFFER_STATE which we emit later. 182b8e80941Smrg */ 183b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), vfi) { 184b8e80941Smrg vfi.InstancingEnable = pipeline->vb[desc->binding].instanced; 185b8e80941Smrg vfi.VertexElementIndex = slot; 186b8e80941Smrg vfi.InstanceDataStepRate = 187b8e80941Smrg pipeline->vb[desc->binding].instance_divisor; 188b8e80941Smrg } 189b8e80941Smrg#endif 190b8e80941Smrg } 191b8e80941Smrg 192b8e80941Smrg const uint32_t id_slot = elem_count; 193b8e80941Smrg if (needs_svgs_elem) { 194b8e80941Smrg /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum: 195b8e80941Smrg * "Within a VERTEX_ELEMENT_STATE structure, if a Component 196b8e80941Smrg * Control field is set to something other than VFCOMP_STORE_SRC, 197b8e80941Smrg * no higher-numbered Component Control fields may be set to 198b8e80941Smrg * VFCOMP_STORE_SRC" 199b8e80941Smrg * 200b8e80941Smrg * This means, that if we have BaseInstance, we need BaseVertex as 201b8e80941Smrg * well. Just do all or nothing. 202b8e80941Smrg */ 203b8e80941Smrg uint32_t base_ctrl = (vs_prog_data->uses_firstvertex || 204b8e80941Smrg vs_prog_data->uses_baseinstance) ? 205b8e80941Smrg VFCOMP_STORE_SRC : VFCOMP_STORE_0; 206b8e80941Smrg 207b8e80941Smrg struct GENX(VERTEX_ELEMENT_STATE) element = { 208b8e80941Smrg .VertexBufferIndex = ANV_SVGS_VB_INDEX, 209b8e80941Smrg .Valid = true, 210b8e80941Smrg .SourceElementFormat = ISL_FORMAT_R32G32_UINT, 211b8e80941Smrg .Component0Control = base_ctrl, 212b8e80941Smrg .Component1Control = base_ctrl, 213b8e80941Smrg#if GEN_GEN >= 8 214b8e80941Smrg .Component2Control = VFCOMP_STORE_0, 215b8e80941Smrg .Component3Control = VFCOMP_STORE_0, 216b8e80941Smrg#else 217b8e80941Smrg .Component2Control = VFCOMP_STORE_VID, 218b8e80941Smrg .Component3Control = VFCOMP_STORE_IID, 219b8e80941Smrg#endif 220b8e80941Smrg }; 221b8e80941Smrg GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element); 222b8e80941Smrg } 223b8e80941Smrg 224b8e80941Smrg#if GEN_GEN >= 8 225b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), sgvs) { 226b8e80941Smrg sgvs.VertexIDEnable = vs_prog_data->uses_vertexid; 227b8e80941Smrg sgvs.VertexIDComponentNumber = 2; 228b8e80941Smrg sgvs.VertexIDElementOffset = id_slot; 229b8e80941Smrg sgvs.InstanceIDEnable = vs_prog_data->uses_instanceid; 230b8e80941Smrg sgvs.InstanceIDComponentNumber = 3; 231b8e80941Smrg sgvs.InstanceIDElementOffset = id_slot; 232b8e80941Smrg } 233b8e80941Smrg#endif 234b8e80941Smrg 235b8e80941Smrg const uint32_t drawid_slot = elem_count + needs_svgs_elem; 236b8e80941Smrg if (vs_prog_data->uses_drawid) { 237b8e80941Smrg struct GENX(VERTEX_ELEMENT_STATE) element = { 238b8e80941Smrg .VertexBufferIndex = ANV_DRAWID_VB_INDEX, 239b8e80941Smrg .Valid = true, 240b8e80941Smrg .SourceElementFormat = ISL_FORMAT_R32_UINT, 241b8e80941Smrg .Component0Control = VFCOMP_STORE_SRC, 242b8e80941Smrg .Component1Control = VFCOMP_STORE_0, 243b8e80941Smrg .Component2Control = VFCOMP_STORE_0, 244b8e80941Smrg .Component3Control = VFCOMP_STORE_0, 245b8e80941Smrg }; 246b8e80941Smrg GENX(VERTEX_ELEMENT_STATE_pack)(NULL, 247b8e80941Smrg &p[1 + drawid_slot * 2], 248b8e80941Smrg &element); 249b8e80941Smrg 250b8e80941Smrg#if GEN_GEN >= 8 251b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), vfi) { 252b8e80941Smrg vfi.VertexElementIndex = drawid_slot; 253b8e80941Smrg } 254b8e80941Smrg#endif 255b8e80941Smrg } 256b8e80941Smrg} 257b8e80941Smrg 258b8e80941Smrgvoid 259b8e80941SmrggenX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, 260b8e80941Smrg const struct gen_l3_config *l3_config, 261b8e80941Smrg VkShaderStageFlags active_stages, 262b8e80941Smrg const unsigned entry_size[4]) 263b8e80941Smrg{ 264b8e80941Smrg const struct gen_device_info *devinfo = &device->info; 265b8e80941Smrg#if GEN_IS_HASWELL 266b8e80941Smrg const unsigned push_constant_kb = devinfo->gt == 3 ? 32 : 16; 267b8e80941Smrg#else 268b8e80941Smrg const unsigned push_constant_kb = GEN_GEN >= 8 ? 32 : 16; 269b8e80941Smrg#endif 270b8e80941Smrg 271b8e80941Smrg const unsigned urb_size_kb = gen_get_l3_config_urb_size(devinfo, l3_config); 272b8e80941Smrg 273b8e80941Smrg unsigned entries[4]; 274b8e80941Smrg unsigned start[4]; 275b8e80941Smrg gen_get_urb_config(devinfo, 276b8e80941Smrg 1024 * push_constant_kb, 1024 * urb_size_kb, 277b8e80941Smrg active_stages & 278b8e80941Smrg VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, 279b8e80941Smrg active_stages & VK_SHADER_STAGE_GEOMETRY_BIT, 280b8e80941Smrg entry_size, entries, start); 281b8e80941Smrg 282b8e80941Smrg#if GEN_GEN == 7 && !GEN_IS_HASWELL 283b8e80941Smrg /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: 284b8e80941Smrg * 285b8e80941Smrg * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall 286b8e80941Smrg * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, 287b8e80941Smrg * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, 288b8e80941Smrg * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL 289b8e80941Smrg * needs to be sent before any combination of VS associated 3DSTATE." 290b8e80941Smrg */ 291b8e80941Smrg anv_batch_emit(batch, GEN7_PIPE_CONTROL, pc) { 292b8e80941Smrg pc.DepthStallEnable = true; 293b8e80941Smrg pc.PostSyncOperation = WriteImmediateData; 294b8e80941Smrg pc.Address = (struct anv_address) { &device->workaround_bo, 0 }; 295b8e80941Smrg } 296b8e80941Smrg#endif 297b8e80941Smrg 298b8e80941Smrg for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) { 299b8e80941Smrg anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) { 300b8e80941Smrg urb._3DCommandSubOpcode += i; 301b8e80941Smrg urb.VSURBStartingAddress = start[i]; 302b8e80941Smrg urb.VSURBEntryAllocationSize = entry_size[i] - 1; 303b8e80941Smrg urb.VSNumberofURBEntries = entries[i]; 304b8e80941Smrg } 305b8e80941Smrg } 306b8e80941Smrg} 307b8e80941Smrg 308b8e80941Smrgstatic void 309b8e80941Smrgemit_urb_setup(struct anv_pipeline *pipeline) 310b8e80941Smrg{ 311b8e80941Smrg unsigned entry_size[4]; 312b8e80941Smrg for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 313b8e80941Smrg const struct brw_vue_prog_data *prog_data = 314b8e80941Smrg !anv_pipeline_has_stage(pipeline, i) ? NULL : 315b8e80941Smrg (const struct brw_vue_prog_data *) pipeline->shaders[i]->prog_data; 316b8e80941Smrg 317b8e80941Smrg entry_size[i] = prog_data ? prog_data->urb_entry_size : 1; 318b8e80941Smrg } 319b8e80941Smrg 320b8e80941Smrg genX(emit_urb_setup)(pipeline->device, &pipeline->batch, 321b8e80941Smrg pipeline->urb.l3_config, 322b8e80941Smrg pipeline->active_stages, entry_size); 323b8e80941Smrg} 324b8e80941Smrg 325b8e80941Smrgstatic void 326b8e80941Smrgemit_3dstate_sbe(struct anv_pipeline *pipeline) 327b8e80941Smrg{ 328b8e80941Smrg const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 329b8e80941Smrg 330b8e80941Smrg if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 331b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), sbe); 332b8e80941Smrg#if GEN_GEN >= 8 333b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE_SWIZ), sbe); 334b8e80941Smrg#endif 335b8e80941Smrg return; 336b8e80941Smrg } 337b8e80941Smrg 338b8e80941Smrg const struct brw_vue_map *fs_input_map = 339b8e80941Smrg &anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map; 340b8e80941Smrg 341b8e80941Smrg struct GENX(3DSTATE_SBE) sbe = { 342b8e80941Smrg GENX(3DSTATE_SBE_header), 343b8e80941Smrg .AttributeSwizzleEnable = true, 344b8e80941Smrg .PointSpriteTextureCoordinateOrigin = UPPERLEFT, 345b8e80941Smrg .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs, 346b8e80941Smrg .ConstantInterpolationEnable = wm_prog_data->flat_inputs, 347b8e80941Smrg }; 348b8e80941Smrg 349b8e80941Smrg#if GEN_GEN >= 9 350b8e80941Smrg for (unsigned i = 0; i < 32; i++) 351b8e80941Smrg sbe.AttributeActiveComponentFormat[i] = ACF_XYZW; 352b8e80941Smrg#endif 353b8e80941Smrg 354b8e80941Smrg#if GEN_GEN >= 8 355b8e80941Smrg /* On Broadwell, they broke 3DSTATE_SBE into two packets */ 356b8e80941Smrg struct GENX(3DSTATE_SBE_SWIZ) swiz = { 357b8e80941Smrg GENX(3DSTATE_SBE_SWIZ_header), 358b8e80941Smrg }; 359b8e80941Smrg#else 360b8e80941Smrg# define swiz sbe 361b8e80941Smrg#endif 362b8e80941Smrg 363b8e80941Smrg /* Skip the VUE header and position slots by default */ 364b8e80941Smrg unsigned urb_entry_read_offset = 1; 365b8e80941Smrg int max_source_attr = 0; 366b8e80941Smrg for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { 367b8e80941Smrg int input_index = wm_prog_data->urb_setup[attr]; 368b8e80941Smrg 369b8e80941Smrg if (input_index < 0) 370b8e80941Smrg continue; 371b8e80941Smrg 372b8e80941Smrg /* gl_Layer is stored in the VUE header */ 373b8e80941Smrg if (attr == VARYING_SLOT_LAYER) { 374b8e80941Smrg urb_entry_read_offset = 0; 375b8e80941Smrg continue; 376b8e80941Smrg } 377b8e80941Smrg 378b8e80941Smrg if (attr == VARYING_SLOT_PNTC) { 379b8e80941Smrg sbe.PointSpriteTextureCoordinateEnable = 1 << input_index; 380b8e80941Smrg continue; 381b8e80941Smrg } 382b8e80941Smrg 383b8e80941Smrg const int slot = fs_input_map->varying_to_slot[attr]; 384b8e80941Smrg 385b8e80941Smrg if (input_index >= 16) 386b8e80941Smrg continue; 387b8e80941Smrg 388b8e80941Smrg if (slot == -1) { 389b8e80941Smrg /* This attribute does not exist in the VUE--that means that the 390b8e80941Smrg * vertex shader did not write to it. It could be that it's a 391b8e80941Smrg * regular varying read by the fragment shader but not written by 392b8e80941Smrg * the vertex shader or it's gl_PrimitiveID. In the first case the 393b8e80941Smrg * value is undefined, in the second it needs to be 394b8e80941Smrg * gl_PrimitiveID. 395b8e80941Smrg */ 396b8e80941Smrg swiz.Attribute[input_index].ConstantSource = PRIM_ID; 397b8e80941Smrg swiz.Attribute[input_index].ComponentOverrideX = true; 398b8e80941Smrg swiz.Attribute[input_index].ComponentOverrideY = true; 399b8e80941Smrg swiz.Attribute[input_index].ComponentOverrideZ = true; 400b8e80941Smrg swiz.Attribute[input_index].ComponentOverrideW = true; 401b8e80941Smrg } else { 402b8e80941Smrg /* We have to subtract two slots to accout for the URB entry output 403b8e80941Smrg * read offset in the VS and GS stages. 404b8e80941Smrg */ 405b8e80941Smrg const int source_attr = slot - 2 * urb_entry_read_offset; 406b8e80941Smrg assert(source_attr >= 0 && source_attr < 32); 407b8e80941Smrg max_source_attr = MAX2(max_source_attr, source_attr); 408b8e80941Smrg swiz.Attribute[input_index].SourceAttribute = source_attr; 409b8e80941Smrg } 410b8e80941Smrg } 411b8e80941Smrg 412b8e80941Smrg sbe.VertexURBEntryReadOffset = urb_entry_read_offset; 413b8e80941Smrg sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2); 414b8e80941Smrg#if GEN_GEN >= 8 415b8e80941Smrg sbe.ForceVertexURBEntryReadOffset = true; 416b8e80941Smrg sbe.ForceVertexURBEntryReadLength = true; 417b8e80941Smrg#endif 418b8e80941Smrg 419b8e80941Smrg uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, 420b8e80941Smrg GENX(3DSTATE_SBE_length)); 421b8e80941Smrg if (!dw) 422b8e80941Smrg return; 423b8e80941Smrg GENX(3DSTATE_SBE_pack)(&pipeline->batch, dw, &sbe); 424b8e80941Smrg 425b8e80941Smrg#if GEN_GEN >= 8 426b8e80941Smrg dw = anv_batch_emit_dwords(&pipeline->batch, GENX(3DSTATE_SBE_SWIZ_length)); 427b8e80941Smrg if (!dw) 428b8e80941Smrg return; 429b8e80941Smrg GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); 430b8e80941Smrg#endif 431b8e80941Smrg} 432b8e80941Smrg 433b8e80941Smrgstatic const uint32_t vk_to_gen_cullmode[] = { 434b8e80941Smrg [VK_CULL_MODE_NONE] = CULLMODE_NONE, 435b8e80941Smrg [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, 436b8e80941Smrg [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, 437b8e80941Smrg [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH 438b8e80941Smrg}; 439b8e80941Smrg 440b8e80941Smrgstatic const uint32_t vk_to_gen_fillmode[] = { 441b8e80941Smrg [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID, 442b8e80941Smrg [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME, 443b8e80941Smrg [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT, 444b8e80941Smrg}; 445b8e80941Smrg 446b8e80941Smrgstatic const uint32_t vk_to_gen_front_face[] = { 447b8e80941Smrg [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, 448b8e80941Smrg [VK_FRONT_FACE_CLOCKWISE] = 0 449b8e80941Smrg}; 450b8e80941Smrg 451b8e80941Smrgstatic void 452b8e80941Smrgemit_rs_state(struct anv_pipeline *pipeline, 453b8e80941Smrg const VkPipelineRasterizationStateCreateInfo *rs_info, 454b8e80941Smrg const VkPipelineMultisampleStateCreateInfo *ms_info, 455b8e80941Smrg const struct anv_render_pass *pass, 456b8e80941Smrg const struct anv_subpass *subpass) 457b8e80941Smrg{ 458b8e80941Smrg struct GENX(3DSTATE_SF) sf = { 459b8e80941Smrg GENX(3DSTATE_SF_header), 460b8e80941Smrg }; 461b8e80941Smrg 462b8e80941Smrg sf.ViewportTransformEnable = true; 463b8e80941Smrg sf.StatisticsEnable = true; 464b8e80941Smrg sf.TriangleStripListProvokingVertexSelect = 0; 465b8e80941Smrg sf.LineStripListProvokingVertexSelect = 0; 466b8e80941Smrg sf.TriangleFanProvokingVertexSelect = 1; 467b8e80941Smrg sf.VertexSubPixelPrecisionSelect = _8Bit; 468b8e80941Smrg 469b8e80941Smrg const struct brw_vue_prog_data *last_vue_prog_data = 470b8e80941Smrg anv_pipeline_get_last_vue_prog_data(pipeline); 471b8e80941Smrg 472b8e80941Smrg if (last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { 473b8e80941Smrg sf.PointWidthSource = Vertex; 474b8e80941Smrg } else { 475b8e80941Smrg sf.PointWidthSource = State; 476b8e80941Smrg sf.PointWidth = 1.0; 477b8e80941Smrg } 478b8e80941Smrg 479b8e80941Smrg#if GEN_GEN >= 8 480b8e80941Smrg struct GENX(3DSTATE_RASTER) raster = { 481b8e80941Smrg GENX(3DSTATE_RASTER_header), 482b8e80941Smrg }; 483b8e80941Smrg#else 484b8e80941Smrg# define raster sf 485b8e80941Smrg#endif 486b8e80941Smrg 487b8e80941Smrg /* For details on 3DSTATE_RASTER multisample state, see the BSpec table 488b8e80941Smrg * "Multisample Modes State". 489b8e80941Smrg */ 490b8e80941Smrg#if GEN_GEN >= 8 491b8e80941Smrg raster.DXMultisampleRasterizationEnable = true; 492b8e80941Smrg /* NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the BDW and SKL PMA fix 493b8e80941Smrg * computations. If we ever set this bit to a different value, they will 494b8e80941Smrg * need to be updated accordingly. 495b8e80941Smrg */ 496b8e80941Smrg raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0; 497b8e80941Smrg raster.ForceMultisampling = false; 498b8e80941Smrg#else 499b8e80941Smrg raster.MultisampleRasterizationMode = 500b8e80941Smrg (ms_info && ms_info->rasterizationSamples > 1) ? 501b8e80941Smrg MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; 502b8e80941Smrg#endif 503b8e80941Smrg 504b8e80941Smrg raster.FrontWinding = vk_to_gen_front_face[rs_info->frontFace]; 505b8e80941Smrg raster.CullMode = vk_to_gen_cullmode[rs_info->cullMode]; 506b8e80941Smrg raster.FrontFaceFillMode = vk_to_gen_fillmode[rs_info->polygonMode]; 507b8e80941Smrg raster.BackFaceFillMode = vk_to_gen_fillmode[rs_info->polygonMode]; 508b8e80941Smrg raster.ScissorRectangleEnable = true; 509b8e80941Smrg 510b8e80941Smrg#if GEN_GEN >= 9 511b8e80941Smrg /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */ 512b8e80941Smrg raster.ViewportZFarClipTestEnable = pipeline->depth_clip_enable; 513b8e80941Smrg raster.ViewportZNearClipTestEnable = pipeline->depth_clip_enable; 514b8e80941Smrg#elif GEN_GEN >= 8 515b8e80941Smrg raster.ViewportZClipTestEnable = pipeline->depth_clip_enable; 516b8e80941Smrg#endif 517b8e80941Smrg 518b8e80941Smrg raster.GlobalDepthOffsetEnableSolid = rs_info->depthBiasEnable; 519b8e80941Smrg raster.GlobalDepthOffsetEnableWireframe = rs_info->depthBiasEnable; 520b8e80941Smrg raster.GlobalDepthOffsetEnablePoint = rs_info->depthBiasEnable; 521b8e80941Smrg 522b8e80941Smrg#if GEN_GEN == 7 523b8e80941Smrg /* Gen7 requires that we provide the depth format in 3DSTATE_SF so that it 524b8e80941Smrg * can get the depth offsets correct. 525b8e80941Smrg */ 526b8e80941Smrg if (subpass->depth_stencil_attachment) { 527b8e80941Smrg VkFormat vk_format = 528b8e80941Smrg pass->attachments[subpass->depth_stencil_attachment->attachment].format; 529b8e80941Smrg assert(vk_format_is_depth_or_stencil(vk_format)); 530b8e80941Smrg if (vk_format_aspects(vk_format) & VK_IMAGE_ASPECT_DEPTH_BIT) { 531b8e80941Smrg enum isl_format isl_format = 532b8e80941Smrg anv_get_isl_format(&pipeline->device->info, vk_format, 533b8e80941Smrg VK_IMAGE_ASPECT_DEPTH_BIT, 534b8e80941Smrg VK_IMAGE_TILING_OPTIMAL); 535b8e80941Smrg sf.DepthBufferSurfaceFormat = 536b8e80941Smrg isl_format_get_depth_format(isl_format, false); 537b8e80941Smrg } 538b8e80941Smrg } 539b8e80941Smrg#endif 540b8e80941Smrg 541b8e80941Smrg#if GEN_GEN >= 8 542b8e80941Smrg GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf); 543b8e80941Smrg GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster); 544b8e80941Smrg#else 545b8e80941Smrg# undef raster 546b8e80941Smrg GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf); 547b8e80941Smrg#endif 548b8e80941Smrg} 549b8e80941Smrg 550b8e80941Smrgstatic void 551b8e80941Smrgemit_ms_state(struct anv_pipeline *pipeline, 552b8e80941Smrg const VkPipelineMultisampleStateCreateInfo *info) 553b8e80941Smrg{ 554b8e80941Smrg uint32_t samples = 1; 555b8e80941Smrg uint32_t log2_samples = 0; 556b8e80941Smrg 557b8e80941Smrg /* From the Vulkan 1.0 spec: 558b8e80941Smrg * If pSampleMask is NULL, it is treated as if the mask has all bits 559b8e80941Smrg * enabled, i.e. no coverage is removed from fragments. 560b8e80941Smrg * 561b8e80941Smrg * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits. 562b8e80941Smrg */ 563b8e80941Smrg#if GEN_GEN >= 8 564b8e80941Smrg uint32_t sample_mask = 0xffff; 565b8e80941Smrg#else 566b8e80941Smrg uint32_t sample_mask = 0xff; 567b8e80941Smrg#endif 568b8e80941Smrg 569b8e80941Smrg if (info) { 570b8e80941Smrg samples = info->rasterizationSamples; 571b8e80941Smrg log2_samples = __builtin_ffs(samples) - 1; 572b8e80941Smrg } 573b8e80941Smrg 574b8e80941Smrg if (info && info->pSampleMask) 575b8e80941Smrg sample_mask &= info->pSampleMask[0]; 576b8e80941Smrg 577b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) { 578b8e80941Smrg ms.NumberofMultisamples = log2_samples; 579b8e80941Smrg 580b8e80941Smrg ms.PixelLocation = CENTER; 581b8e80941Smrg#if GEN_GEN >= 8 582b8e80941Smrg /* The PRM says that this bit is valid only for DX9: 583b8e80941Smrg * 584b8e80941Smrg * SW can choose to set this bit only for DX9 API. DX10/OGL API's 585b8e80941Smrg * should not have any effect by setting or not setting this bit. 586b8e80941Smrg */ 587b8e80941Smrg ms.PixelPositionOffsetEnable = false; 588b8e80941Smrg#else 589b8e80941Smrg 590b8e80941Smrg switch (samples) { 591b8e80941Smrg case 1: 592b8e80941Smrg GEN_SAMPLE_POS_1X(ms.Sample); 593b8e80941Smrg break; 594b8e80941Smrg case 2: 595b8e80941Smrg GEN_SAMPLE_POS_2X(ms.Sample); 596b8e80941Smrg break; 597b8e80941Smrg case 4: 598b8e80941Smrg GEN_SAMPLE_POS_4X(ms.Sample); 599b8e80941Smrg break; 600b8e80941Smrg case 8: 601b8e80941Smrg GEN_SAMPLE_POS_8X(ms.Sample); 602b8e80941Smrg break; 603b8e80941Smrg default: 604b8e80941Smrg break; 605b8e80941Smrg } 606b8e80941Smrg#endif 607b8e80941Smrg } 608b8e80941Smrg 609b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) { 610b8e80941Smrg sm.SampleMask = sample_mask; 611b8e80941Smrg } 612b8e80941Smrg} 613b8e80941Smrg 614b8e80941Smrgstatic const uint32_t vk_to_gen_logic_op[] = { 615b8e80941Smrg [VK_LOGIC_OP_COPY] = LOGICOP_COPY, 616b8e80941Smrg [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, 617b8e80941Smrg [VK_LOGIC_OP_AND] = LOGICOP_AND, 618b8e80941Smrg [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, 619b8e80941Smrg [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, 620b8e80941Smrg [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, 621b8e80941Smrg [VK_LOGIC_OP_XOR] = LOGICOP_XOR, 622b8e80941Smrg [VK_LOGIC_OP_OR] = LOGICOP_OR, 623b8e80941Smrg [VK_LOGIC_OP_NOR] = LOGICOP_NOR, 624b8e80941Smrg [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, 625b8e80941Smrg [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, 626b8e80941Smrg [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, 627b8e80941Smrg [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, 628b8e80941Smrg [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, 629b8e80941Smrg [VK_LOGIC_OP_NAND] = LOGICOP_NAND, 630b8e80941Smrg [VK_LOGIC_OP_SET] = LOGICOP_SET, 631b8e80941Smrg}; 632b8e80941Smrg 633b8e80941Smrgstatic const uint32_t vk_to_gen_blend[] = { 634b8e80941Smrg [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, 635b8e80941Smrg [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, 636b8e80941Smrg [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, 637b8e80941Smrg [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, 638b8e80941Smrg [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, 639b8e80941Smrg [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, 640b8e80941Smrg [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, 641b8e80941Smrg [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, 642b8e80941Smrg [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, 643b8e80941Smrg [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, 644b8e80941Smrg [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, 645b8e80941Smrg [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR, 646b8e80941Smrg [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, 647b8e80941Smrg [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA, 648b8e80941Smrg [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, 649b8e80941Smrg [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, 650b8e80941Smrg [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, 651b8e80941Smrg [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, 652b8e80941Smrg [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, 653b8e80941Smrg}; 654b8e80941Smrg 655b8e80941Smrgstatic const uint32_t vk_to_gen_blend_op[] = { 656b8e80941Smrg [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, 657b8e80941Smrg [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, 658b8e80941Smrg [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, 659b8e80941Smrg [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, 660b8e80941Smrg [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, 661b8e80941Smrg}; 662b8e80941Smrg 663b8e80941Smrgstatic const uint32_t vk_to_gen_compare_op[] = { 664b8e80941Smrg [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, 665b8e80941Smrg [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, 666b8e80941Smrg [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, 667b8e80941Smrg [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, 668b8e80941Smrg [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, 669b8e80941Smrg [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, 670b8e80941Smrg [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, 671b8e80941Smrg [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, 672b8e80941Smrg}; 673b8e80941Smrg 674b8e80941Smrgstatic const uint32_t vk_to_gen_stencil_op[] = { 675b8e80941Smrg [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, 676b8e80941Smrg [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, 677b8e80941Smrg [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, 678b8e80941Smrg [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, 679b8e80941Smrg [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, 680b8e80941Smrg [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, 681b8e80941Smrg [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, 682b8e80941Smrg [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, 683b8e80941Smrg}; 684b8e80941Smrg 685b8e80941Smrg/* This function sanitizes the VkStencilOpState by looking at the compare ops 686b8e80941Smrg * and trying to determine whether or not a given stencil op can ever actually 687b8e80941Smrg * occur. Stencil ops which can never occur are set to VK_STENCIL_OP_KEEP. 688b8e80941Smrg * This function returns true if, after sanitation, any of the stencil ops are 689b8e80941Smrg * set to something other than VK_STENCIL_OP_KEEP. 690b8e80941Smrg */ 691b8e80941Smrgstatic bool 692b8e80941Smrgsanitize_stencil_face(VkStencilOpState *face, 693b8e80941Smrg VkCompareOp depthCompareOp) 694b8e80941Smrg{ 695b8e80941Smrg /* If compareOp is ALWAYS then the stencil test will never fail and failOp 696b8e80941Smrg * will never happen. Set failOp to KEEP in this case. 697b8e80941Smrg */ 698b8e80941Smrg if (face->compareOp == VK_COMPARE_OP_ALWAYS) 699b8e80941Smrg face->failOp = VK_STENCIL_OP_KEEP; 700b8e80941Smrg 701b8e80941Smrg /* If compareOp is NEVER or depthCompareOp is NEVER then one of the depth 702b8e80941Smrg * or stencil tests will fail and passOp will never happen. 703b8e80941Smrg */ 704b8e80941Smrg if (face->compareOp == VK_COMPARE_OP_NEVER || 705b8e80941Smrg depthCompareOp == VK_COMPARE_OP_NEVER) 706b8e80941Smrg face->passOp = VK_STENCIL_OP_KEEP; 707b8e80941Smrg 708b8e80941Smrg /* If compareOp is NEVER or depthCompareOp is ALWAYS then either the 709b8e80941Smrg * stencil test will fail or the depth test will pass. In either case, 710b8e80941Smrg * depthFailOp will never happen. 711b8e80941Smrg */ 712b8e80941Smrg if (face->compareOp == VK_COMPARE_OP_NEVER || 713b8e80941Smrg depthCompareOp == VK_COMPARE_OP_ALWAYS) 714b8e80941Smrg face->depthFailOp = VK_STENCIL_OP_KEEP; 715b8e80941Smrg 716b8e80941Smrg return face->failOp != VK_STENCIL_OP_KEEP || 717b8e80941Smrg face->depthFailOp != VK_STENCIL_OP_KEEP || 718b8e80941Smrg face->passOp != VK_STENCIL_OP_KEEP; 719b8e80941Smrg} 720b8e80941Smrg 721b8e80941Smrg/* Intel hardware is fairly sensitive to whether or not depth/stencil writes 722b8e80941Smrg * are enabled. In the presence of discards, it's fairly easy to get into the 723b8e80941Smrg * non-promoted case which means a fairly big performance hit. From the Iron 724b8e80941Smrg * Lake PRM, Vol 2, pt. 1, section 8.4.3.2, "Early Depth Test Cases": 725b8e80941Smrg * 726b8e80941Smrg * "Non-promoted depth (N) is active whenever the depth test can be done 727b8e80941Smrg * early but it cannot determine whether or not to write source depth to 728b8e80941Smrg * the depth buffer, therefore the depth write must be performed post pixel 729b8e80941Smrg * shader. This includes cases where the pixel shader can kill pixels, 730b8e80941Smrg * including via sampler chroma key, as well as cases where the alpha test 731b8e80941Smrg * function is enabled, which kills pixels based on a programmable alpha 732b8e80941Smrg * test. In this case, even if the depth test fails, the pixel cannot be 733b8e80941Smrg * killed if a stencil write is indicated. Whether or not the stencil write 734b8e80941Smrg * happens depends on whether or not the pixel is killed later. In these 735b8e80941Smrg * cases if stencil test fails and stencil writes are off, the pixels can 736b8e80941Smrg * also be killed early. If stencil writes are enabled, the pixels must be 737b8e80941Smrg * treated as Computed depth (described above)." 738b8e80941Smrg * 739b8e80941Smrg * The same thing as mentioned in the stencil case can happen in the depth 740b8e80941Smrg * case as well if it thinks it writes depth but, thanks to the depth test 741b8e80941Smrg * being GL_EQUAL, the write doesn't actually matter. A little extra work 742b8e80941Smrg * up-front to try and disable depth and stencil writes can make a big 743b8e80941Smrg * difference. 744b8e80941Smrg * 745b8e80941Smrg * Unfortunately, the way depth and stencil testing is specified, there are 746b8e80941Smrg * many case where, regardless of depth/stencil writes being enabled, nothing 747b8e80941Smrg * actually gets written due to some other bit of state being set. This 748b8e80941Smrg * function attempts to "sanitize" the depth stencil state and disable writes 749b8e80941Smrg * and sometimes even testing whenever possible. 750b8e80941Smrg */ 751b8e80941Smrgstatic void 752b8e80941Smrgsanitize_ds_state(VkPipelineDepthStencilStateCreateInfo *state, 753b8e80941Smrg bool *stencilWriteEnable, 754b8e80941Smrg VkImageAspectFlags ds_aspects) 755b8e80941Smrg{ 756b8e80941Smrg *stencilWriteEnable = state->stencilTestEnable; 757b8e80941Smrg 758b8e80941Smrg /* If the depth test is disabled, we won't be writing anything. Make sure we 759b8e80941Smrg * treat the test as always passing later on as well. 760b8e80941Smrg * 761b8e80941Smrg * Also, the Vulkan spec requires that if either depth or stencil is not 762b8e80941Smrg * present, the pipeline is to act as if the test silently passes. In that 763b8e80941Smrg * case we won't write either. 764b8e80941Smrg */ 765b8e80941Smrg if (!state->depthTestEnable || !(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { 766b8e80941Smrg state->depthWriteEnable = false; 767b8e80941Smrg state->depthCompareOp = VK_COMPARE_OP_ALWAYS; 768b8e80941Smrg } 769b8e80941Smrg 770b8e80941Smrg if (!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT)) { 771b8e80941Smrg *stencilWriteEnable = false; 772b8e80941Smrg state->front.compareOp = VK_COMPARE_OP_ALWAYS; 773b8e80941Smrg state->back.compareOp = VK_COMPARE_OP_ALWAYS; 774b8e80941Smrg } 775b8e80941Smrg 776b8e80941Smrg /* If the stencil test is enabled and always fails, then we will never get 777b8e80941Smrg * to the depth test so we can just disable the depth test entirely. 778b8e80941Smrg */ 779b8e80941Smrg if (state->stencilTestEnable && 780b8e80941Smrg state->front.compareOp == VK_COMPARE_OP_NEVER && 781b8e80941Smrg state->back.compareOp == VK_COMPARE_OP_NEVER) { 782b8e80941Smrg state->depthTestEnable = false; 783b8e80941Smrg state->depthWriteEnable = false; 784b8e80941Smrg } 785b8e80941Smrg 786b8e80941Smrg /* If depthCompareOp is EQUAL then the value we would be writing to the 787b8e80941Smrg * depth buffer is the same as the value that's already there so there's no 788b8e80941Smrg * point in writing it. 789b8e80941Smrg */ 790b8e80941Smrg if (state->depthCompareOp == VK_COMPARE_OP_EQUAL) 791b8e80941Smrg state->depthWriteEnable = false; 792b8e80941Smrg 793b8e80941Smrg /* If the stencil ops are such that we don't actually ever modify the 794b8e80941Smrg * stencil buffer, we should disable writes. 795b8e80941Smrg */ 796b8e80941Smrg if (!sanitize_stencil_face(&state->front, state->depthCompareOp) && 797b8e80941Smrg !sanitize_stencil_face(&state->back, state->depthCompareOp)) 798b8e80941Smrg *stencilWriteEnable = false; 799b8e80941Smrg 800b8e80941Smrg /* If the depth test always passes and we never write out depth, that's the 801b8e80941Smrg * same as if the depth test is disabled entirely. 802b8e80941Smrg */ 803b8e80941Smrg if (state->depthCompareOp == VK_COMPARE_OP_ALWAYS && 804b8e80941Smrg !state->depthWriteEnable) 805b8e80941Smrg state->depthTestEnable = false; 806b8e80941Smrg 807b8e80941Smrg /* If the stencil test always passes and we never write out stencil, that's 808b8e80941Smrg * the same as if the stencil test is disabled entirely. 809b8e80941Smrg */ 810b8e80941Smrg if (state->front.compareOp == VK_COMPARE_OP_ALWAYS && 811b8e80941Smrg state->back.compareOp == VK_COMPARE_OP_ALWAYS && 812b8e80941Smrg !*stencilWriteEnable) 813b8e80941Smrg state->stencilTestEnable = false; 814b8e80941Smrg} 815b8e80941Smrg 816b8e80941Smrgstatic void 817b8e80941Smrgemit_ds_state(struct anv_pipeline *pipeline, 818b8e80941Smrg const VkPipelineDepthStencilStateCreateInfo *pCreateInfo, 819b8e80941Smrg const struct anv_render_pass *pass, 820b8e80941Smrg const struct anv_subpass *subpass) 821b8e80941Smrg{ 822b8e80941Smrg#if GEN_GEN == 7 823b8e80941Smrg# define depth_stencil_dw pipeline->gen7.depth_stencil_state 824b8e80941Smrg#elif GEN_GEN == 8 825b8e80941Smrg# define depth_stencil_dw pipeline->gen8.wm_depth_stencil 826b8e80941Smrg#else 827b8e80941Smrg# define depth_stencil_dw pipeline->gen9.wm_depth_stencil 828b8e80941Smrg#endif 829b8e80941Smrg 830b8e80941Smrg if (pCreateInfo == NULL) { 831b8e80941Smrg /* We're going to OR this together with the dynamic state. We need 832b8e80941Smrg * to make sure it's initialized to something useful. 833b8e80941Smrg */ 834b8e80941Smrg pipeline->writes_stencil = false; 835b8e80941Smrg pipeline->stencil_test_enable = false; 836b8e80941Smrg pipeline->writes_depth = false; 837b8e80941Smrg pipeline->depth_test_enable = false; 838b8e80941Smrg memset(depth_stencil_dw, 0, sizeof(depth_stencil_dw)); 839b8e80941Smrg return; 840b8e80941Smrg } 841b8e80941Smrg 842b8e80941Smrg VkImageAspectFlags ds_aspects = 0; 843b8e80941Smrg if (subpass->depth_stencil_attachment) { 844b8e80941Smrg VkFormat depth_stencil_format = 845b8e80941Smrg pass->attachments[subpass->depth_stencil_attachment->attachment].format; 846b8e80941Smrg ds_aspects = vk_format_aspects(depth_stencil_format); 847b8e80941Smrg } 848b8e80941Smrg 849b8e80941Smrg VkPipelineDepthStencilStateCreateInfo info = *pCreateInfo; 850b8e80941Smrg sanitize_ds_state(&info, &pipeline->writes_stencil, ds_aspects); 851b8e80941Smrg pipeline->stencil_test_enable = info.stencilTestEnable; 852b8e80941Smrg pipeline->writes_depth = info.depthWriteEnable; 853b8e80941Smrg pipeline->depth_test_enable = info.depthTestEnable; 854b8e80941Smrg 855b8e80941Smrg /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */ 856b8e80941Smrg 857b8e80941Smrg#if GEN_GEN <= 7 858b8e80941Smrg struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { 859b8e80941Smrg#else 860b8e80941Smrg struct GENX(3DSTATE_WM_DEPTH_STENCIL) depth_stencil = { 861b8e80941Smrg#endif 862b8e80941Smrg .DepthTestEnable = info.depthTestEnable, 863b8e80941Smrg .DepthBufferWriteEnable = info.depthWriteEnable, 864b8e80941Smrg .DepthTestFunction = vk_to_gen_compare_op[info.depthCompareOp], 865b8e80941Smrg .DoubleSidedStencilEnable = true, 866b8e80941Smrg 867b8e80941Smrg .StencilTestEnable = info.stencilTestEnable, 868b8e80941Smrg .StencilFailOp = vk_to_gen_stencil_op[info.front.failOp], 869b8e80941Smrg .StencilPassDepthPassOp = vk_to_gen_stencil_op[info.front.passOp], 870b8e80941Smrg .StencilPassDepthFailOp = vk_to_gen_stencil_op[info.front.depthFailOp], 871b8e80941Smrg .StencilTestFunction = vk_to_gen_compare_op[info.front.compareOp], 872b8e80941Smrg .BackfaceStencilFailOp = vk_to_gen_stencil_op[info.back.failOp], 873b8e80941Smrg .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info.back.passOp], 874b8e80941Smrg .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info.back.depthFailOp], 875b8e80941Smrg .BackfaceStencilTestFunction = vk_to_gen_compare_op[info.back.compareOp], 876b8e80941Smrg }; 877b8e80941Smrg 878b8e80941Smrg#if GEN_GEN <= 7 879b8e80941Smrg GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil); 880b8e80941Smrg#else 881b8e80941Smrg GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, depth_stencil_dw, &depth_stencil); 882b8e80941Smrg#endif 883b8e80941Smrg} 884b8e80941Smrg 885b8e80941SmrgMAYBE_UNUSED static bool 886b8e80941Smrgis_dual_src_blend_factor(VkBlendFactor factor) 887b8e80941Smrg{ 888b8e80941Smrg return factor == VK_BLEND_FACTOR_SRC1_COLOR || 889b8e80941Smrg factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR || 890b8e80941Smrg factor == VK_BLEND_FACTOR_SRC1_ALPHA || 891b8e80941Smrg factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA; 892b8e80941Smrg} 893b8e80941Smrg 894b8e80941Smrgstatic void 895b8e80941Smrgemit_cb_state(struct anv_pipeline *pipeline, 896b8e80941Smrg const VkPipelineColorBlendStateCreateInfo *info, 897b8e80941Smrg const VkPipelineMultisampleStateCreateInfo *ms_info) 898b8e80941Smrg{ 899b8e80941Smrg struct anv_device *device = pipeline->device; 900b8e80941Smrg const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 901b8e80941Smrg 902b8e80941Smrg struct GENX(BLEND_STATE) blend_state = { 903b8e80941Smrg#if GEN_GEN >= 8 904b8e80941Smrg .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, 905b8e80941Smrg .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, 906b8e80941Smrg#endif 907b8e80941Smrg }; 908b8e80941Smrg 909b8e80941Smrg uint32_t surface_count = 0; 910b8e80941Smrg struct anv_pipeline_bind_map *map; 911b8e80941Smrg if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 912b8e80941Smrg map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map; 913b8e80941Smrg surface_count = map->surface_count; 914b8e80941Smrg } 915b8e80941Smrg 916b8e80941Smrg const uint32_t num_dwords = GENX(BLEND_STATE_length) + 917b8e80941Smrg GENX(BLEND_STATE_ENTRY_length) * surface_count; 918b8e80941Smrg pipeline->blend_state = 919b8e80941Smrg anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); 920b8e80941Smrg 921b8e80941Smrg bool has_writeable_rt = false; 922b8e80941Smrg uint32_t *state_pos = pipeline->blend_state.map; 923b8e80941Smrg state_pos += GENX(BLEND_STATE_length); 924b8e80941Smrg#if GEN_GEN >= 8 925b8e80941Smrg struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 }; 926b8e80941Smrg#endif 927b8e80941Smrg for (unsigned i = 0; i < surface_count; i++) { 928b8e80941Smrg struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i]; 929b8e80941Smrg 930b8e80941Smrg /* All color attachments are at the beginning of the binding table */ 931b8e80941Smrg if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) 932b8e80941Smrg break; 933b8e80941Smrg 934b8e80941Smrg /* We can have at most 8 attachments */ 935b8e80941Smrg assert(i < 8); 936b8e80941Smrg 937b8e80941Smrg if (info == NULL || binding->index >= info->attachmentCount) { 938b8e80941Smrg /* Default everything to disabled */ 939b8e80941Smrg struct GENX(BLEND_STATE_ENTRY) entry = { 940b8e80941Smrg .WriteDisableAlpha = true, 941b8e80941Smrg .WriteDisableRed = true, 942b8e80941Smrg .WriteDisableGreen = true, 943b8e80941Smrg .WriteDisableBlue = true, 944b8e80941Smrg }; 945b8e80941Smrg GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry); 946b8e80941Smrg state_pos += GENX(BLEND_STATE_ENTRY_length); 947b8e80941Smrg continue; 948b8e80941Smrg } 949b8e80941Smrg 950b8e80941Smrg assert(binding->binding == 0); 951b8e80941Smrg const VkPipelineColorBlendAttachmentState *a = 952b8e80941Smrg &info->pAttachments[binding->index]; 953b8e80941Smrg 954b8e80941Smrg struct GENX(BLEND_STATE_ENTRY) entry = { 955b8e80941Smrg#if GEN_GEN < 8 956b8e80941Smrg .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, 957b8e80941Smrg .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, 958b8e80941Smrg#endif 959b8e80941Smrg .LogicOpEnable = info->logicOpEnable, 960b8e80941Smrg .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], 961b8e80941Smrg .ColorBufferBlendEnable = a->blendEnable, 962b8e80941Smrg .ColorClampRange = COLORCLAMP_RTFORMAT, 963b8e80941Smrg .PreBlendColorClampEnable = true, 964b8e80941Smrg .PostBlendColorClampEnable = true, 965b8e80941Smrg .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], 966b8e80941Smrg .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], 967b8e80941Smrg .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], 968b8e80941Smrg .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], 969b8e80941Smrg .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], 970b8e80941Smrg .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], 971b8e80941Smrg .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), 972b8e80941Smrg .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), 973b8e80941Smrg .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), 974b8e80941Smrg .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), 975b8e80941Smrg }; 976b8e80941Smrg 977b8e80941Smrg if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || 978b8e80941Smrg a->dstColorBlendFactor != a->dstAlphaBlendFactor || 979b8e80941Smrg a->colorBlendOp != a->alphaBlendOp) { 980b8e80941Smrg#if GEN_GEN >= 8 981b8e80941Smrg blend_state.IndependentAlphaBlendEnable = true; 982b8e80941Smrg#else 983b8e80941Smrg entry.IndependentAlphaBlendEnable = true; 984b8e80941Smrg#endif 985b8e80941Smrg } 986b8e80941Smrg 987b8e80941Smrg /* The Dual Source Blending documentation says: 988b8e80941Smrg * 989b8e80941Smrg * "If SRC1 is included in a src/dst blend factor and 990b8e80941Smrg * a DualSource RT Write message is not used, results 991b8e80941Smrg * are UNDEFINED. (This reflects the same restriction in DX APIs, 992b8e80941Smrg * where undefined results are produced if “o1” is not written 993b8e80941Smrg * by a PS – there are no default values defined)." 994b8e80941Smrg * 995b8e80941Smrg * There is no way to gracefully fix this undefined situation 996b8e80941Smrg * so we just disable the blending to prevent possible issues. 997b8e80941Smrg */ 998b8e80941Smrg if (!wm_prog_data->dual_src_blend && 999b8e80941Smrg (is_dual_src_blend_factor(a->srcColorBlendFactor) || 1000b8e80941Smrg is_dual_src_blend_factor(a->dstColorBlendFactor) || 1001b8e80941Smrg is_dual_src_blend_factor(a->srcAlphaBlendFactor) || 1002b8e80941Smrg is_dual_src_blend_factor(a->dstAlphaBlendFactor))) { 1003b8e80941Smrg vk_debug_report(&device->instance->debug_report_callbacks, 1004b8e80941Smrg VK_DEBUG_REPORT_WARNING_BIT_EXT, 1005b8e80941Smrg VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, 1006b8e80941Smrg (uint64_t)(uintptr_t)device, 1007b8e80941Smrg 0, 0, "anv", 1008b8e80941Smrg "Enabled dual-src blend factors without writing both targets " 1009b8e80941Smrg "in the shader. Disabling blending to avoid GPU hangs."); 1010b8e80941Smrg entry.ColorBufferBlendEnable = false; 1011b8e80941Smrg } 1012b8e80941Smrg 1013b8e80941Smrg if (a->colorWriteMask != 0) 1014b8e80941Smrg has_writeable_rt = true; 1015b8e80941Smrg 1016b8e80941Smrg /* Our hardware applies the blend factor prior to the blend function 1017b8e80941Smrg * regardless of what function is used. Technically, this means the 1018b8e80941Smrg * hardware can do MORE than GL or Vulkan specify. However, it also 1019b8e80941Smrg * means that, for MIN and MAX, we have to stomp the blend factor to 1020b8e80941Smrg * ONE to make it a no-op. 1021b8e80941Smrg */ 1022b8e80941Smrg if (a->colorBlendOp == VK_BLEND_OP_MIN || 1023b8e80941Smrg a->colorBlendOp == VK_BLEND_OP_MAX) { 1024b8e80941Smrg entry.SourceBlendFactor = BLENDFACTOR_ONE; 1025b8e80941Smrg entry.DestinationBlendFactor = BLENDFACTOR_ONE; 1026b8e80941Smrg } 1027b8e80941Smrg if (a->alphaBlendOp == VK_BLEND_OP_MIN || 1028b8e80941Smrg a->alphaBlendOp == VK_BLEND_OP_MAX) { 1029b8e80941Smrg entry.SourceAlphaBlendFactor = BLENDFACTOR_ONE; 1030b8e80941Smrg entry.DestinationAlphaBlendFactor = BLENDFACTOR_ONE; 1031b8e80941Smrg } 1032b8e80941Smrg GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry); 1033b8e80941Smrg state_pos += GENX(BLEND_STATE_ENTRY_length); 1034b8e80941Smrg#if GEN_GEN >= 8 1035b8e80941Smrg if (i == 0) 1036b8e80941Smrg bs0 = entry; 1037b8e80941Smrg#endif 1038b8e80941Smrg } 1039b8e80941Smrg 1040b8e80941Smrg#if GEN_GEN >= 8 1041b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), blend) { 1042b8e80941Smrg blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable; 1043b8e80941Smrg blend.HasWriteableRT = has_writeable_rt; 1044b8e80941Smrg blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable; 1045b8e80941Smrg blend.SourceAlphaBlendFactor = bs0.SourceAlphaBlendFactor; 1046b8e80941Smrg blend.DestinationAlphaBlendFactor = bs0.DestinationAlphaBlendFactor; 1047b8e80941Smrg blend.SourceBlendFactor = bs0.SourceBlendFactor; 1048b8e80941Smrg blend.DestinationBlendFactor = bs0.DestinationBlendFactor; 1049b8e80941Smrg blend.AlphaTestEnable = false; 1050b8e80941Smrg blend.IndependentAlphaBlendEnable = 1051b8e80941Smrg blend_state.IndependentAlphaBlendEnable; 1052b8e80941Smrg } 1053b8e80941Smrg#else 1054b8e80941Smrg (void)has_writeable_rt; 1055b8e80941Smrg#endif 1056b8e80941Smrg 1057b8e80941Smrg GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); 1058b8e80941Smrg 1059b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { 1060b8e80941Smrg bsp.BlendStatePointer = pipeline->blend_state.offset; 1061b8e80941Smrg#if GEN_GEN >= 8 1062b8e80941Smrg bsp.BlendStatePointerValid = true; 1063b8e80941Smrg#endif 1064b8e80941Smrg } 1065b8e80941Smrg} 1066b8e80941Smrg 1067b8e80941Smrgstatic void 1068b8e80941Smrgemit_3dstate_clip(struct anv_pipeline *pipeline, 1069b8e80941Smrg const VkPipelineViewportStateCreateInfo *vp_info, 1070b8e80941Smrg const VkPipelineRasterizationStateCreateInfo *rs_info) 1071b8e80941Smrg{ 1072b8e80941Smrg const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 1073b8e80941Smrg (void) wm_prog_data; 1074b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) { 1075b8e80941Smrg clip.ClipEnable = true; 1076b8e80941Smrg clip.StatisticsEnable = true; 1077b8e80941Smrg clip.EarlyCullEnable = true; 1078b8e80941Smrg clip.APIMode = APIMODE_D3D; 1079b8e80941Smrg clip.ViewportXYClipTestEnable = true; 1080b8e80941Smrg 1081b8e80941Smrg#if GEN_GEN >= 8 1082b8e80941Smrg clip.VertexSubPixelPrecisionSelect = _8Bit; 1083b8e80941Smrg#endif 1084b8e80941Smrg 1085b8e80941Smrg clip.ClipMode = CLIPMODE_NORMAL; 1086b8e80941Smrg 1087b8e80941Smrg clip.TriangleStripListProvokingVertexSelect = 0; 1088b8e80941Smrg clip.LineStripListProvokingVertexSelect = 0; 1089b8e80941Smrg clip.TriangleFanProvokingVertexSelect = 1; 1090b8e80941Smrg 1091b8e80941Smrg clip.MinimumPointWidth = 0.125; 1092b8e80941Smrg clip.MaximumPointWidth = 255.875; 1093b8e80941Smrg 1094b8e80941Smrg const struct brw_vue_prog_data *last = 1095b8e80941Smrg anv_pipeline_get_last_vue_prog_data(pipeline); 1096b8e80941Smrg 1097b8e80941Smrg /* From the Vulkan 1.0.45 spec: 1098b8e80941Smrg * 1099b8e80941Smrg * "If the last active vertex processing stage shader entry point's 1100b8e80941Smrg * interface does not include a variable decorated with 1101b8e80941Smrg * ViewportIndex, then the first viewport is used." 1102b8e80941Smrg */ 1103b8e80941Smrg if (vp_info && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) { 1104b8e80941Smrg clip.MaximumVPIndex = vp_info->viewportCount - 1; 1105b8e80941Smrg } else { 1106b8e80941Smrg clip.MaximumVPIndex = 0; 1107b8e80941Smrg } 1108b8e80941Smrg 1109b8e80941Smrg /* From the Vulkan 1.0.45 spec: 1110b8e80941Smrg * 1111b8e80941Smrg * "If the last active vertex processing stage shader entry point's 1112b8e80941Smrg * interface does not include a variable decorated with Layer, then 1113b8e80941Smrg * the first layer is used." 1114b8e80941Smrg */ 1115b8e80941Smrg clip.ForceZeroRTAIndexEnable = 1116b8e80941Smrg !(last->vue_map.slots_valid & VARYING_BIT_LAYER); 1117b8e80941Smrg 1118b8e80941Smrg#if GEN_GEN == 7 1119b8e80941Smrg clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace]; 1120b8e80941Smrg clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode]; 1121b8e80941Smrg clip.ViewportZClipTestEnable = pipeline->depth_clip_enable; 1122b8e80941Smrg clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask; 1123b8e80941Smrg clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask; 1124b8e80941Smrg#else 1125b8e80941Smrg clip.NonPerspectiveBarycentricEnable = wm_prog_data ? 1126b8e80941Smrg (wm_prog_data->barycentric_interp_modes & 1127b8e80941Smrg BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) != 0 : 0; 1128b8e80941Smrg#endif 1129b8e80941Smrg } 1130b8e80941Smrg} 1131b8e80941Smrg 1132b8e80941Smrgstatic void 1133b8e80941Smrgemit_3dstate_streamout(struct anv_pipeline *pipeline, 1134b8e80941Smrg const VkPipelineRasterizationStateCreateInfo *rs_info) 1135b8e80941Smrg{ 1136b8e80941Smrg#if GEN_GEN >= 8 1137b8e80941Smrg const struct brw_vue_prog_data *prog_data = 1138b8e80941Smrg anv_pipeline_get_last_vue_prog_data(pipeline); 1139b8e80941Smrg const struct brw_vue_map *vue_map = &prog_data->vue_map; 1140b8e80941Smrg#endif 1141b8e80941Smrg 1142b8e80941Smrg nir_xfb_info *xfb_info; 1143b8e80941Smrg if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) 1144b8e80941Smrg xfb_info = pipeline->shaders[MESA_SHADER_GEOMETRY]->xfb_info; 1145b8e80941Smrg else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) 1146b8e80941Smrg xfb_info = pipeline->shaders[MESA_SHADER_TESS_EVAL]->xfb_info; 1147b8e80941Smrg else 1148b8e80941Smrg xfb_info = pipeline->shaders[MESA_SHADER_VERTEX]->xfb_info; 1149b8e80941Smrg 1150b8e80941Smrg pipeline->xfb_used = xfb_info ? xfb_info->buffers_written : 0; 1151b8e80941Smrg 1152b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_STREAMOUT), so) { 1153b8e80941Smrg so.RenderingDisable = rs_info->rasterizerDiscardEnable; 1154b8e80941Smrg 1155b8e80941Smrg#if GEN_GEN >= 8 1156b8e80941Smrg if (xfb_info) { 1157b8e80941Smrg so.SOFunctionEnable = true; 1158b8e80941Smrg so.SOStatisticsEnable = true; 1159b8e80941Smrg 1160b8e80941Smrg const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info = 1161b8e80941Smrg vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT); 1162b8e80941Smrg so.RenderStreamSelect = stream_info ? 1163b8e80941Smrg stream_info->rasterizationStream : 0; 1164b8e80941Smrg 1165b8e80941Smrg so.Buffer0SurfacePitch = xfb_info->buffers[0].stride; 1166b8e80941Smrg so.Buffer1SurfacePitch = xfb_info->buffers[1].stride; 1167b8e80941Smrg so.Buffer2SurfacePitch = xfb_info->buffers[2].stride; 1168b8e80941Smrg so.Buffer3SurfacePitch = xfb_info->buffers[3].stride; 1169b8e80941Smrg 1170b8e80941Smrg int urb_entry_read_offset = 0; 1171b8e80941Smrg int urb_entry_read_length = 1172b8e80941Smrg (prog_data->vue_map.num_slots + 1) / 2 - urb_entry_read_offset; 1173b8e80941Smrg 1174b8e80941Smrg /* We always read the whole vertex. This could be reduced at some 1175b8e80941Smrg * point by reading less and offsetting the register index in the 1176b8e80941Smrg * SO_DECLs. 1177b8e80941Smrg */ 1178b8e80941Smrg so.Stream0VertexReadOffset = urb_entry_read_offset; 1179b8e80941Smrg so.Stream0VertexReadLength = urb_entry_read_length - 1; 1180b8e80941Smrg so.Stream1VertexReadOffset = urb_entry_read_offset; 1181b8e80941Smrg so.Stream1VertexReadLength = urb_entry_read_length - 1; 1182b8e80941Smrg so.Stream2VertexReadOffset = urb_entry_read_offset; 1183b8e80941Smrg so.Stream2VertexReadLength = urb_entry_read_length - 1; 1184b8e80941Smrg so.Stream3VertexReadOffset = urb_entry_read_offset; 1185b8e80941Smrg so.Stream3VertexReadLength = urb_entry_read_length - 1; 1186b8e80941Smrg } 1187b8e80941Smrg#endif /* GEN_GEN >= 8 */ 1188b8e80941Smrg } 1189b8e80941Smrg 1190b8e80941Smrg#if GEN_GEN >= 8 1191b8e80941Smrg if (xfb_info) { 1192b8e80941Smrg struct GENX(SO_DECL) so_decl[MAX_XFB_STREAMS][128]; 1193b8e80941Smrg int next_offset[MAX_XFB_BUFFERS] = {0, 0, 0, 0}; 1194b8e80941Smrg int decls[MAX_XFB_STREAMS] = {0, 0, 0, 0}; 1195b8e80941Smrg 1196b8e80941Smrg memset(so_decl, 0, sizeof(so_decl)); 1197b8e80941Smrg 1198b8e80941Smrg for (unsigned i = 0; i < xfb_info->output_count; i++) { 1199b8e80941Smrg const nir_xfb_output_info *output = &xfb_info->outputs[i]; 1200b8e80941Smrg unsigned buffer = output->buffer; 1201b8e80941Smrg unsigned stream = xfb_info->buffer_to_stream[buffer]; 1202b8e80941Smrg 1203b8e80941Smrg /* Our hardware is unusual in that it requires us to program SO_DECLs 1204b8e80941Smrg * for fake "hole" components, rather than simply taking the offset 1205b8e80941Smrg * for each real varying. Each hole can have size 1, 2, 3, or 4; we 1206b8e80941Smrg * program as many size = 4 holes as we can, then a final hole to 1207b8e80941Smrg * accommodate the final 1, 2, or 3 remaining. 1208b8e80941Smrg */ 1209b8e80941Smrg int hole_dwords = (output->offset - next_offset[buffer]) / 4; 1210b8e80941Smrg while (hole_dwords > 0) { 1211b8e80941Smrg so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) { 1212b8e80941Smrg .HoleFlag = 1, 1213b8e80941Smrg .OutputBufferSlot = buffer, 1214b8e80941Smrg .ComponentMask = (1 << MIN2(hole_dwords, 4)) - 1, 1215b8e80941Smrg }; 1216b8e80941Smrg hole_dwords -= 4; 1217b8e80941Smrg } 1218b8e80941Smrg 1219b8e80941Smrg int varying = output->location; 1220b8e80941Smrg uint8_t component_mask = output->component_mask; 1221b8e80941Smrg /* VARYING_SLOT_PSIZ contains three scalar fields packed together: 1222b8e80941Smrg * - VARYING_SLOT_LAYER in VARYING_SLOT_PSIZ.y 1223b8e80941Smrg * - VARYING_SLOT_VIEWPORT in VARYING_SLOT_PSIZ.z 1224b8e80941Smrg * - VARYING_SLOT_PSIZ in VARYING_SLOT_PSIZ.w 1225b8e80941Smrg */ 1226b8e80941Smrg if (varying == VARYING_SLOT_LAYER) { 1227b8e80941Smrg varying = VARYING_SLOT_PSIZ; 1228b8e80941Smrg component_mask = 1 << 1; // SO_DECL_COMPMASK_Y 1229b8e80941Smrg } else if (varying == VARYING_SLOT_VIEWPORT) { 1230b8e80941Smrg varying = VARYING_SLOT_PSIZ; 1231b8e80941Smrg component_mask = 1 << 2; // SO_DECL_COMPMASK_Z 1232b8e80941Smrg } else if (varying == VARYING_SLOT_PSIZ) { 1233b8e80941Smrg component_mask = 1 << 3; // SO_DECL_COMPMASK_W 1234b8e80941Smrg } 1235b8e80941Smrg 1236b8e80941Smrg next_offset[buffer] = output->offset + 1237b8e80941Smrg __builtin_popcount(component_mask) * 4; 1238b8e80941Smrg 1239b8e80941Smrg so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) { 1240b8e80941Smrg .OutputBufferSlot = buffer, 1241b8e80941Smrg .RegisterIndex = vue_map->varying_to_slot[varying], 1242b8e80941Smrg .ComponentMask = component_mask, 1243b8e80941Smrg }; 1244b8e80941Smrg } 1245b8e80941Smrg 1246b8e80941Smrg int max_decls = 0; 1247b8e80941Smrg for (unsigned s = 0; s < MAX_XFB_STREAMS; s++) 1248b8e80941Smrg max_decls = MAX2(max_decls, decls[s]); 1249b8e80941Smrg 1250b8e80941Smrg uint8_t sbs[MAX_XFB_STREAMS] = { }; 1251b8e80941Smrg for (unsigned b = 0; b < MAX_XFB_BUFFERS; b++) { 1252b8e80941Smrg if (xfb_info->buffers_written & (1 << b)) 1253b8e80941Smrg sbs[xfb_info->buffer_to_stream[b]] |= 1 << b; 1254b8e80941Smrg } 1255b8e80941Smrg 1256b8e80941Smrg uint32_t *dw = anv_batch_emitn(&pipeline->batch, 3 + 2 * max_decls, 1257b8e80941Smrg GENX(3DSTATE_SO_DECL_LIST), 1258b8e80941Smrg .StreamtoBufferSelects0 = sbs[0], 1259b8e80941Smrg .StreamtoBufferSelects1 = sbs[1], 1260b8e80941Smrg .StreamtoBufferSelects2 = sbs[2], 1261b8e80941Smrg .StreamtoBufferSelects3 = sbs[3], 1262b8e80941Smrg .NumEntries0 = decls[0], 1263b8e80941Smrg .NumEntries1 = decls[1], 1264b8e80941Smrg .NumEntries2 = decls[2], 1265b8e80941Smrg .NumEntries3 = decls[3]); 1266b8e80941Smrg 1267b8e80941Smrg for (int i = 0; i < max_decls; i++) { 1268b8e80941Smrg GENX(SO_DECL_ENTRY_pack)(NULL, dw + 3 + i * 2, 1269b8e80941Smrg &(struct GENX(SO_DECL_ENTRY)) { 1270b8e80941Smrg .Stream0Decl = so_decl[0][i], 1271b8e80941Smrg .Stream1Decl = so_decl[1][i], 1272b8e80941Smrg .Stream2Decl = so_decl[2][i], 1273b8e80941Smrg .Stream3Decl = so_decl[3][i], 1274b8e80941Smrg }); 1275b8e80941Smrg } 1276b8e80941Smrg } 1277b8e80941Smrg#endif /* GEN_GEN >= 8 */ 1278b8e80941Smrg} 1279b8e80941Smrg 1280b8e80941Smrgstatic uint32_t 1281b8e80941Smrgget_sampler_count(const struct anv_shader_bin *bin) 1282b8e80941Smrg{ 1283b8e80941Smrg uint32_t count_by_4 = DIV_ROUND_UP(bin->bind_map.sampler_count, 4); 1284b8e80941Smrg 1285b8e80941Smrg /* We can potentially have way more than 32 samplers and that's ok. 1286b8e80941Smrg * However, the 3DSTATE_XS packets only have 3 bits to specify how 1287b8e80941Smrg * many to pre-fetch and all values above 4 are marked reserved. 1288b8e80941Smrg */ 1289b8e80941Smrg return MIN2(count_by_4, 4); 1290b8e80941Smrg} 1291b8e80941Smrg 1292b8e80941Smrgstatic uint32_t 1293b8e80941Smrgget_binding_table_entry_count(const struct anv_shader_bin *bin) 1294b8e80941Smrg{ 1295b8e80941Smrg return DIV_ROUND_UP(bin->bind_map.surface_count, 32); 1296b8e80941Smrg} 1297b8e80941Smrg 1298b8e80941Smrgstatic struct anv_address 1299b8e80941Smrgget_scratch_address(struct anv_pipeline *pipeline, 1300b8e80941Smrg gl_shader_stage stage, 1301b8e80941Smrg const struct anv_shader_bin *bin) 1302b8e80941Smrg{ 1303b8e80941Smrg return (struct anv_address) { 1304b8e80941Smrg .bo = anv_scratch_pool_alloc(pipeline->device, 1305b8e80941Smrg &pipeline->device->scratch_pool, 1306b8e80941Smrg stage, bin->prog_data->total_scratch), 1307b8e80941Smrg .offset = 0, 1308b8e80941Smrg }; 1309b8e80941Smrg} 1310b8e80941Smrg 1311b8e80941Smrgstatic uint32_t 1312b8e80941Smrgget_scratch_space(const struct anv_shader_bin *bin) 1313b8e80941Smrg{ 1314b8e80941Smrg return ffs(bin->prog_data->total_scratch / 2048); 1315b8e80941Smrg} 1316b8e80941Smrg 1317b8e80941Smrgstatic void 1318b8e80941Smrgemit_3dstate_vs(struct anv_pipeline *pipeline) 1319b8e80941Smrg{ 1320b8e80941Smrg const struct gen_device_info *devinfo = &pipeline->device->info; 1321b8e80941Smrg const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); 1322b8e80941Smrg const struct anv_shader_bin *vs_bin = 1323b8e80941Smrg pipeline->shaders[MESA_SHADER_VERTEX]; 1324b8e80941Smrg 1325b8e80941Smrg assert(anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX)); 1326b8e80941Smrg 1327b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs) { 1328b8e80941Smrg vs.Enable = true; 1329b8e80941Smrg vs.StatisticsEnable = true; 1330b8e80941Smrg vs.KernelStartPointer = vs_bin->kernel.offset; 1331b8e80941Smrg#if GEN_GEN >= 8 1332b8e80941Smrg vs.SIMD8DispatchEnable = 1333b8e80941Smrg vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8; 1334b8e80941Smrg#endif 1335b8e80941Smrg 1336b8e80941Smrg assert(!vs_prog_data->base.base.use_alt_mode); 1337b8e80941Smrg#if GEN_GEN < 11 1338b8e80941Smrg vs.SingleVertexDispatch = false; 1339b8e80941Smrg#endif 1340b8e80941Smrg vs.VectorMaskEnable = false; 1341b8e80941Smrg /* WA_1606682166: 1342b8e80941Smrg * Incorrect TDL's SSP address shift in SARB for 16:6 & 18:8 modes. 1343b8e80941Smrg * Disable the Sampler state prefetch functionality in the SARB by 1344b8e80941Smrg * programming 0xB000[30] to '1'. 1345b8e80941Smrg */ 1346b8e80941Smrg vs.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(vs_bin); 1347b8e80941Smrg /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to 1348b8e80941Smrg * disable prefetching of binding tables on A0 and B0 steppings. 1349b8e80941Smrg * TODO: Revisit this WA on newer steppings. 1350b8e80941Smrg */ 1351b8e80941Smrg vs.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(vs_bin); 1352b8e80941Smrg vs.FloatingPointMode = IEEE754; 1353b8e80941Smrg vs.IllegalOpcodeExceptionEnable = false; 1354b8e80941Smrg vs.SoftwareExceptionEnable = false; 1355b8e80941Smrg vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1; 1356b8e80941Smrg 1357b8e80941Smrg if (GEN_GEN == 9 && devinfo->gt == 4 && 1358b8e80941Smrg anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) { 1359b8e80941Smrg /* On Sky Lake GT4, we have experienced some hangs related to the VS 1360b8e80941Smrg * cache and tessellation. It is unknown exactly what is happening 1361b8e80941Smrg * but the Haswell docs for the "VS Reference Count Full Force Miss 1362b8e80941Smrg * Enable" field of the "Thread Mode" register refer to a HSW bug in 1363b8e80941Smrg * which the VUE handle reference count would overflow resulting in 1364b8e80941Smrg * internal reference counting bugs. My (Jason's) best guess is that 1365b8e80941Smrg * this bug cropped back up on SKL GT4 when we suddenly had more 1366b8e80941Smrg * threads in play than any previous gen9 hardware. 1367b8e80941Smrg * 1368b8e80941Smrg * What we do know for sure is that setting this bit when 1369b8e80941Smrg * tessellation shaders are in use fixes a GPU hang in Batman: Arkham 1370b8e80941Smrg * City when playing with DXVK (https://bugs.freedesktop.org/107280). 1371b8e80941Smrg * Disabling the vertex cache with tessellation shaders should only 1372b8e80941Smrg * have a minor performance impact as the tessellation shaders are 1373b8e80941Smrg * likely generating and processing far more geometry than the vertex 1374b8e80941Smrg * stage. 1375b8e80941Smrg */ 1376b8e80941Smrg vs.VertexCacheDisable = true; 1377b8e80941Smrg } 1378b8e80941Smrg 1379b8e80941Smrg vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length; 1380b8e80941Smrg vs.VertexURBEntryReadOffset = 0; 1381b8e80941Smrg vs.DispatchGRFStartRegisterForURBData = 1382b8e80941Smrg vs_prog_data->base.base.dispatch_grf_start_reg; 1383b8e80941Smrg 1384b8e80941Smrg#if GEN_GEN >= 8 1385b8e80941Smrg vs.UserClipDistanceClipTestEnableBitmask = 1386b8e80941Smrg vs_prog_data->base.clip_distance_mask; 1387b8e80941Smrg vs.UserClipDistanceCullTestEnableBitmask = 1388b8e80941Smrg vs_prog_data->base.cull_distance_mask; 1389b8e80941Smrg#endif 1390b8e80941Smrg 1391b8e80941Smrg vs.PerThreadScratchSpace = get_scratch_space(vs_bin); 1392b8e80941Smrg vs.ScratchSpaceBasePointer = 1393b8e80941Smrg get_scratch_address(pipeline, MESA_SHADER_VERTEX, vs_bin); 1394b8e80941Smrg } 1395b8e80941Smrg} 1396b8e80941Smrg 1397b8e80941Smrgstatic void 1398b8e80941Smrgemit_3dstate_hs_te_ds(struct anv_pipeline *pipeline, 1399b8e80941Smrg const VkPipelineTessellationStateCreateInfo *tess_info) 1400b8e80941Smrg{ 1401b8e80941Smrg if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) { 1402b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), hs); 1403b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), te); 1404b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), ds); 1405b8e80941Smrg return; 1406b8e80941Smrg } 1407b8e80941Smrg 1408b8e80941Smrg const struct gen_device_info *devinfo = &pipeline->device->info; 1409b8e80941Smrg const struct anv_shader_bin *tcs_bin = 1410b8e80941Smrg pipeline->shaders[MESA_SHADER_TESS_CTRL]; 1411b8e80941Smrg const struct anv_shader_bin *tes_bin = 1412b8e80941Smrg pipeline->shaders[MESA_SHADER_TESS_EVAL]; 1413b8e80941Smrg 1414b8e80941Smrg const struct brw_tcs_prog_data *tcs_prog_data = get_tcs_prog_data(pipeline); 1415b8e80941Smrg const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline); 1416b8e80941Smrg 1417b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), hs) { 1418b8e80941Smrg hs.Enable = true; 1419b8e80941Smrg hs.StatisticsEnable = true; 1420b8e80941Smrg hs.KernelStartPointer = tcs_bin->kernel.offset; 1421b8e80941Smrg /* WA_1606682166 */ 1422b8e80941Smrg hs.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(tcs_bin); 1423b8e80941Smrg /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */ 1424b8e80941Smrg hs.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(tcs_bin); 1425b8e80941Smrg hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; 1426b8e80941Smrg hs.IncludeVertexHandles = true; 1427b8e80941Smrg hs.InstanceCount = tcs_prog_data->instances - 1; 1428b8e80941Smrg 1429b8e80941Smrg hs.VertexURBEntryReadLength = 0; 1430b8e80941Smrg hs.VertexURBEntryReadOffset = 0; 1431b8e80941Smrg hs.DispatchGRFStartRegisterForURBData = 1432b8e80941Smrg tcs_prog_data->base.base.dispatch_grf_start_reg; 1433b8e80941Smrg 1434b8e80941Smrg hs.PerThreadScratchSpace = get_scratch_space(tcs_bin); 1435b8e80941Smrg hs.ScratchSpaceBasePointer = 1436b8e80941Smrg get_scratch_address(pipeline, MESA_SHADER_TESS_CTRL, tcs_bin); 1437b8e80941Smrg } 1438b8e80941Smrg 1439b8e80941Smrg const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state = 1440b8e80941Smrg tess_info ? vk_find_struct_const(tess_info, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO) : NULL; 1441b8e80941Smrg 1442b8e80941Smrg VkTessellationDomainOrigin uv_origin = 1443b8e80941Smrg domain_origin_state ? domain_origin_state->domainOrigin : 1444b8e80941Smrg VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT; 1445b8e80941Smrg 1446b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), te) { 1447b8e80941Smrg te.Partitioning = tes_prog_data->partitioning; 1448b8e80941Smrg 1449b8e80941Smrg if (uv_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) { 1450b8e80941Smrg te.OutputTopology = tes_prog_data->output_topology; 1451b8e80941Smrg } else { 1452b8e80941Smrg /* When the origin is upper-left, we have to flip the winding order */ 1453b8e80941Smrg if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) { 1454b8e80941Smrg te.OutputTopology = OUTPUT_TRI_CW; 1455b8e80941Smrg } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) { 1456b8e80941Smrg te.OutputTopology = OUTPUT_TRI_CCW; 1457b8e80941Smrg } else { 1458b8e80941Smrg te.OutputTopology = tes_prog_data->output_topology; 1459b8e80941Smrg } 1460b8e80941Smrg } 1461b8e80941Smrg 1462b8e80941Smrg te.TEDomain = tes_prog_data->domain; 1463b8e80941Smrg te.TEEnable = true; 1464b8e80941Smrg te.MaximumTessellationFactorOdd = 63.0; 1465b8e80941Smrg te.MaximumTessellationFactorNotOdd = 64.0; 1466b8e80941Smrg } 1467b8e80941Smrg 1468b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), ds) { 1469b8e80941Smrg ds.Enable = true; 1470b8e80941Smrg ds.StatisticsEnable = true; 1471b8e80941Smrg ds.KernelStartPointer = tes_bin->kernel.offset; 1472b8e80941Smrg /* WA_1606682166 */ 1473b8e80941Smrg ds.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(tes_bin); 1474b8e80941Smrg /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */ 1475b8e80941Smrg ds.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(tes_bin); 1476b8e80941Smrg ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1; 1477b8e80941Smrg 1478b8e80941Smrg ds.ComputeWCoordinateEnable = 1479b8e80941Smrg tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; 1480b8e80941Smrg 1481b8e80941Smrg ds.PatchURBEntryReadLength = tes_prog_data->base.urb_read_length; 1482b8e80941Smrg ds.PatchURBEntryReadOffset = 0; 1483b8e80941Smrg ds.DispatchGRFStartRegisterForURBData = 1484b8e80941Smrg tes_prog_data->base.base.dispatch_grf_start_reg; 1485b8e80941Smrg 1486b8e80941Smrg#if GEN_GEN >= 8 1487b8e80941Smrg#if GEN_GEN < 11 1488b8e80941Smrg ds.DispatchMode = 1489b8e80941Smrg tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ? 1490b8e80941Smrg DISPATCH_MODE_SIMD8_SINGLE_PATCH : 1491b8e80941Smrg DISPATCH_MODE_SIMD4X2; 1492b8e80941Smrg#else 1493b8e80941Smrg assert(tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8); 1494b8e80941Smrg ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; 1495b8e80941Smrg#endif 1496b8e80941Smrg 1497b8e80941Smrg ds.UserClipDistanceClipTestEnableBitmask = 1498b8e80941Smrg tes_prog_data->base.clip_distance_mask; 1499b8e80941Smrg ds.UserClipDistanceCullTestEnableBitmask = 1500b8e80941Smrg tes_prog_data->base.cull_distance_mask; 1501b8e80941Smrg#endif 1502b8e80941Smrg 1503b8e80941Smrg ds.PerThreadScratchSpace = get_scratch_space(tes_bin); 1504b8e80941Smrg ds.ScratchSpaceBasePointer = 1505b8e80941Smrg get_scratch_address(pipeline, MESA_SHADER_TESS_EVAL, tes_bin); 1506b8e80941Smrg } 1507b8e80941Smrg} 1508b8e80941Smrg 1509b8e80941Smrgstatic void 1510b8e80941Smrgemit_3dstate_gs(struct anv_pipeline *pipeline) 1511b8e80941Smrg{ 1512b8e80941Smrg const struct gen_device_info *devinfo = &pipeline->device->info; 1513b8e80941Smrg const struct anv_shader_bin *gs_bin = 1514b8e80941Smrg pipeline->shaders[MESA_SHADER_GEOMETRY]; 1515b8e80941Smrg 1516b8e80941Smrg if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { 1517b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs); 1518b8e80941Smrg return; 1519b8e80941Smrg } 1520b8e80941Smrg 1521b8e80941Smrg const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline); 1522b8e80941Smrg 1523b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) { 1524b8e80941Smrg gs.Enable = true; 1525b8e80941Smrg gs.StatisticsEnable = true; 1526b8e80941Smrg gs.KernelStartPointer = gs_bin->kernel.offset; 1527b8e80941Smrg gs.DispatchMode = gs_prog_data->base.dispatch_mode; 1528b8e80941Smrg 1529b8e80941Smrg gs.SingleProgramFlow = false; 1530b8e80941Smrg gs.VectorMaskEnable = false; 1531b8e80941Smrg /* WA_1606682166 */ 1532b8e80941Smrg gs.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(gs_bin); 1533b8e80941Smrg /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */ 1534b8e80941Smrg gs.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(gs_bin); 1535b8e80941Smrg gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles; 1536b8e80941Smrg gs.IncludePrimitiveID = gs_prog_data->include_primitive_id; 1537b8e80941Smrg 1538b8e80941Smrg if (GEN_GEN == 8) { 1539b8e80941Smrg /* Broadwell is weird. It needs us to divide by 2. */ 1540b8e80941Smrg gs.MaximumNumberofThreads = devinfo->max_gs_threads / 2 - 1; 1541b8e80941Smrg } else { 1542b8e80941Smrg gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1; 1543b8e80941Smrg } 1544b8e80941Smrg 1545b8e80941Smrg gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1; 1546b8e80941Smrg gs.OutputTopology = gs_prog_data->output_topology; 1547b8e80941Smrg gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length; 1548b8e80941Smrg gs.ControlDataFormat = gs_prog_data->control_data_format; 1549b8e80941Smrg gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords; 1550b8e80941Smrg gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1; 1551b8e80941Smrg gs.ReorderMode = TRAILING; 1552b8e80941Smrg 1553b8e80941Smrg#if GEN_GEN >= 8 1554b8e80941Smrg gs.ExpectedVertexCount = gs_prog_data->vertices_in; 1555b8e80941Smrg gs.StaticOutput = gs_prog_data->static_vertex_count >= 0; 1556b8e80941Smrg gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count >= 0 ? 1557b8e80941Smrg gs_prog_data->static_vertex_count : 0; 1558b8e80941Smrg#endif 1559b8e80941Smrg 1560b8e80941Smrg gs.VertexURBEntryReadOffset = 0; 1561b8e80941Smrg gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length; 1562b8e80941Smrg gs.DispatchGRFStartRegisterForURBData = 1563b8e80941Smrg gs_prog_data->base.base.dispatch_grf_start_reg; 1564b8e80941Smrg 1565b8e80941Smrg#if GEN_GEN >= 8 1566b8e80941Smrg gs.UserClipDistanceClipTestEnableBitmask = 1567b8e80941Smrg gs_prog_data->base.clip_distance_mask; 1568b8e80941Smrg gs.UserClipDistanceCullTestEnableBitmask = 1569b8e80941Smrg gs_prog_data->base.cull_distance_mask; 1570b8e80941Smrg#endif 1571b8e80941Smrg 1572b8e80941Smrg gs.PerThreadScratchSpace = get_scratch_space(gs_bin); 1573b8e80941Smrg gs.ScratchSpaceBasePointer = 1574b8e80941Smrg get_scratch_address(pipeline, MESA_SHADER_GEOMETRY, gs_bin); 1575b8e80941Smrg } 1576b8e80941Smrg} 1577b8e80941Smrg 1578b8e80941Smrgstatic bool 1579b8e80941Smrghas_color_buffer_write_enabled(const struct anv_pipeline *pipeline, 1580b8e80941Smrg const VkPipelineColorBlendStateCreateInfo *blend) 1581b8e80941Smrg{ 1582b8e80941Smrg const struct anv_shader_bin *shader_bin = 1583b8e80941Smrg pipeline->shaders[MESA_SHADER_FRAGMENT]; 1584b8e80941Smrg if (!shader_bin) 1585b8e80941Smrg return false; 1586b8e80941Smrg 1587b8e80941Smrg const struct anv_pipeline_bind_map *bind_map = &shader_bin->bind_map; 1588b8e80941Smrg for (int i = 0; i < bind_map->surface_count; i++) { 1589b8e80941Smrg struct anv_pipeline_binding *binding = &bind_map->surface_to_descriptor[i]; 1590b8e80941Smrg 1591b8e80941Smrg if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) 1592b8e80941Smrg continue; 1593b8e80941Smrg 1594b8e80941Smrg if (binding->index == UINT32_MAX) 1595b8e80941Smrg continue; 1596b8e80941Smrg 1597b8e80941Smrg if (blend && blend->pAttachments[binding->index].colorWriteMask != 0) 1598b8e80941Smrg return true; 1599b8e80941Smrg } 1600b8e80941Smrg 1601b8e80941Smrg return false; 1602b8e80941Smrg} 1603b8e80941Smrg 1604b8e80941Smrgstatic void 1605b8e80941Smrgemit_3dstate_wm(struct anv_pipeline *pipeline, struct anv_subpass *subpass, 1606b8e80941Smrg const VkPipelineColorBlendStateCreateInfo *blend, 1607b8e80941Smrg const VkPipelineMultisampleStateCreateInfo *multisample) 1608b8e80941Smrg{ 1609b8e80941Smrg const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 1610b8e80941Smrg 1611b8e80941Smrg MAYBE_UNUSED uint32_t samples = 1612b8e80941Smrg multisample ? multisample->rasterizationSamples : 1; 1613b8e80941Smrg 1614b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) { 1615b8e80941Smrg wm.StatisticsEnable = true; 1616b8e80941Smrg wm.LineEndCapAntialiasingRegionWidth = _05pixels; 1617b8e80941Smrg wm.LineAntialiasingRegionWidth = _10pixels; 1618b8e80941Smrg wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT; 1619b8e80941Smrg 1620b8e80941Smrg if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 1621b8e80941Smrg if (wm_prog_data->early_fragment_tests) { 1622b8e80941Smrg wm.EarlyDepthStencilControl = EDSC_PREPS; 1623b8e80941Smrg } else if (wm_prog_data->has_side_effects) { 1624b8e80941Smrg wm.EarlyDepthStencilControl = EDSC_PSEXEC; 1625b8e80941Smrg } else { 1626b8e80941Smrg wm.EarlyDepthStencilControl = EDSC_NORMAL; 1627b8e80941Smrg } 1628b8e80941Smrg 1629b8e80941Smrg#if GEN_GEN >= 8 1630b8e80941Smrg /* Gen8 hardware tries to compute ThreadDispatchEnable for us but 1631b8e80941Smrg * doesn't take into account KillPixels when no depth or stencil 1632b8e80941Smrg * writes are enabled. In order for occlusion queries to work 1633b8e80941Smrg * correctly with no attachments, we need to force-enable PS thread 1634b8e80941Smrg * dispatch. 1635b8e80941Smrg * 1636b8e80941Smrg * The BDW docs are pretty clear that that this bit isn't validated 1637b8e80941Smrg * and probably shouldn't be used in production: 1638b8e80941Smrg * 1639b8e80941Smrg * "This must always be set to Normal. This field should not be 1640b8e80941Smrg * tested for functional validation." 1641b8e80941Smrg * 1642b8e80941Smrg * Unfortunately, however, the other mechanism we have for doing this 1643b8e80941Smrg * is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW. 1644b8e80941Smrg * Given two bad options, we choose the one which works. 1645b8e80941Smrg */ 1646b8e80941Smrg if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) && 1647b8e80941Smrg !has_color_buffer_write_enabled(pipeline, blend)) 1648b8e80941Smrg wm.ForceThreadDispatchEnable = ForceON; 1649b8e80941Smrg#endif 1650b8e80941Smrg 1651b8e80941Smrg wm.BarycentricInterpolationMode = 1652b8e80941Smrg wm_prog_data->barycentric_interp_modes; 1653b8e80941Smrg 1654b8e80941Smrg#if GEN_GEN < 8 1655b8e80941Smrg wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; 1656b8e80941Smrg wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; 1657b8e80941Smrg wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; 1658b8e80941Smrg wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask; 1659b8e80941Smrg 1660b8e80941Smrg /* If the subpass has a depth or stencil self-dependency, then we 1661b8e80941Smrg * need to force the hardware to do the depth/stencil write *after* 1662b8e80941Smrg * fragment shader execution. Otherwise, the writes may hit memory 1663b8e80941Smrg * before we get around to fetching from the input attachment and we 1664b8e80941Smrg * may get the depth or stencil value from the current draw rather 1665b8e80941Smrg * than the previous one. 1666b8e80941Smrg */ 1667b8e80941Smrg wm.PixelShaderKillsPixel = subpass->has_ds_self_dep || 1668b8e80941Smrg wm_prog_data->uses_kill; 1669b8e80941Smrg 1670b8e80941Smrg if (wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF || 1671b8e80941Smrg wm_prog_data->has_side_effects || 1672b8e80941Smrg wm.PixelShaderKillsPixel || 1673b8e80941Smrg has_color_buffer_write_enabled(pipeline, blend)) 1674b8e80941Smrg wm.ThreadDispatchEnable = true; 1675b8e80941Smrg 1676b8e80941Smrg if (samples > 1) { 1677b8e80941Smrg wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; 1678b8e80941Smrg if (wm_prog_data->persample_dispatch) { 1679b8e80941Smrg wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 1680b8e80941Smrg } else { 1681b8e80941Smrg wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL; 1682b8e80941Smrg } 1683b8e80941Smrg } else { 1684b8e80941Smrg wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; 1685b8e80941Smrg wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 1686b8e80941Smrg } 1687b8e80941Smrg#endif 1688b8e80941Smrg } 1689b8e80941Smrg } 1690b8e80941Smrg} 1691b8e80941Smrg 1692b8e80941Smrgstatic void 1693b8e80941Smrgemit_3dstate_ps(struct anv_pipeline *pipeline, 1694b8e80941Smrg const VkPipelineColorBlendStateCreateInfo *blend, 1695b8e80941Smrg const VkPipelineMultisampleStateCreateInfo *multisample) 1696b8e80941Smrg{ 1697b8e80941Smrg MAYBE_UNUSED const struct gen_device_info *devinfo = &pipeline->device->info; 1698b8e80941Smrg const struct anv_shader_bin *fs_bin = 1699b8e80941Smrg pipeline->shaders[MESA_SHADER_FRAGMENT]; 1700b8e80941Smrg 1701b8e80941Smrg if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 1702b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { 1703b8e80941Smrg#if GEN_GEN == 7 1704b8e80941Smrg /* Even if no fragments are ever dispatched, gen7 hardware hangs if 1705b8e80941Smrg * we don't at least set the maximum number of threads. 1706b8e80941Smrg */ 1707b8e80941Smrg ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1; 1708b8e80941Smrg#endif 1709b8e80941Smrg } 1710b8e80941Smrg return; 1711b8e80941Smrg } 1712b8e80941Smrg 1713b8e80941Smrg const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 1714b8e80941Smrg 1715b8e80941Smrg#if GEN_GEN < 8 1716b8e80941Smrg /* The hardware wedges if you have this bit set but don't turn on any dual 1717b8e80941Smrg * source blend factors. 1718b8e80941Smrg */ 1719b8e80941Smrg bool dual_src_blend = false; 1720b8e80941Smrg if (wm_prog_data->dual_src_blend && blend) { 1721b8e80941Smrg for (uint32_t i = 0; i < blend->attachmentCount; i++) { 1722b8e80941Smrg const VkPipelineColorBlendAttachmentState *bstate = 1723b8e80941Smrg &blend->pAttachments[i]; 1724b8e80941Smrg 1725b8e80941Smrg if (bstate->blendEnable && 1726b8e80941Smrg (is_dual_src_blend_factor(bstate->srcColorBlendFactor) || 1727b8e80941Smrg is_dual_src_blend_factor(bstate->dstColorBlendFactor) || 1728b8e80941Smrg is_dual_src_blend_factor(bstate->srcAlphaBlendFactor) || 1729b8e80941Smrg is_dual_src_blend_factor(bstate->dstAlphaBlendFactor))) { 1730b8e80941Smrg dual_src_blend = true; 1731b8e80941Smrg break; 1732b8e80941Smrg } 1733b8e80941Smrg } 1734b8e80941Smrg } 1735b8e80941Smrg#endif 1736b8e80941Smrg 1737b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { 1738b8e80941Smrg ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; 1739b8e80941Smrg ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; 1740b8e80941Smrg ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; 1741b8e80941Smrg 1742b8e80941Smrg /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable: 1743b8e80941Smrg * 1744b8e80941Smrg * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32 1745b8e80941Smrg * Dispatch must not be enabled for PER_PIXEL dispatch mode." 1746b8e80941Smrg * 1747b8e80941Smrg * Since 16x MSAA is first introduced on SKL, we don't need to apply 1748b8e80941Smrg * the workaround on any older hardware. 1749b8e80941Smrg */ 1750b8e80941Smrg if (GEN_GEN >= 9 && !wm_prog_data->persample_dispatch && 1751b8e80941Smrg multisample && multisample->rasterizationSamples == 16) { 1752b8e80941Smrg assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable); 1753b8e80941Smrg ps._32PixelDispatchEnable = false; 1754b8e80941Smrg } 1755b8e80941Smrg 1756b8e80941Smrg ps.KernelStartPointer0 = fs_bin->kernel.offset + 1757b8e80941Smrg brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); 1758b8e80941Smrg ps.KernelStartPointer1 = fs_bin->kernel.offset + 1759b8e80941Smrg brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1); 1760b8e80941Smrg ps.KernelStartPointer2 = fs_bin->kernel.offset + 1761b8e80941Smrg brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); 1762b8e80941Smrg 1763b8e80941Smrg ps.SingleProgramFlow = false; 1764b8e80941Smrg ps.VectorMaskEnable = GEN_GEN >= 8; 1765b8e80941Smrg /* WA_1606682166 */ 1766b8e80941Smrg ps.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(fs_bin); 1767b8e80941Smrg /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */ 1768b8e80941Smrg ps.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(fs_bin); 1769b8e80941Smrg ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 || 1770b8e80941Smrg wm_prog_data->base.ubo_ranges[0].length; 1771b8e80941Smrg ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? 1772b8e80941Smrg POSOFFSET_SAMPLE: POSOFFSET_NONE; 1773b8e80941Smrg#if GEN_GEN < 8 1774b8e80941Smrg ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; 1775b8e80941Smrg ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; 1776b8e80941Smrg ps.DualSourceBlendEnable = dual_src_blend; 1777b8e80941Smrg#endif 1778b8e80941Smrg 1779b8e80941Smrg#if GEN_IS_HASWELL 1780b8e80941Smrg /* Haswell requires the sample mask to be set in this packet as well 1781b8e80941Smrg * as in 3DSTATE_SAMPLE_MASK; the values should match. 1782b8e80941Smrg */ 1783b8e80941Smrg ps.SampleMask = 0xff; 1784b8e80941Smrg#endif 1785b8e80941Smrg 1786b8e80941Smrg#if GEN_GEN >= 9 1787b8e80941Smrg ps.MaximumNumberofThreadsPerPSD = 64 - 1; 1788b8e80941Smrg#elif GEN_GEN >= 8 1789b8e80941Smrg ps.MaximumNumberofThreadsPerPSD = 64 - 2; 1790b8e80941Smrg#else 1791b8e80941Smrg ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1; 1792b8e80941Smrg#endif 1793b8e80941Smrg 1794b8e80941Smrg ps.DispatchGRFStartRegisterForConstantSetupData0 = 1795b8e80941Smrg brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); 1796b8e80941Smrg ps.DispatchGRFStartRegisterForConstantSetupData1 = 1797b8e80941Smrg brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1); 1798b8e80941Smrg ps.DispatchGRFStartRegisterForConstantSetupData2 = 1799b8e80941Smrg brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2); 1800b8e80941Smrg 1801b8e80941Smrg ps.PerThreadScratchSpace = get_scratch_space(fs_bin); 1802b8e80941Smrg ps.ScratchSpaceBasePointer = 1803b8e80941Smrg get_scratch_address(pipeline, MESA_SHADER_FRAGMENT, fs_bin); 1804b8e80941Smrg } 1805b8e80941Smrg} 1806b8e80941Smrg 1807b8e80941Smrg#if GEN_GEN >= 8 1808b8e80941Smrgstatic void 1809b8e80941Smrgemit_3dstate_ps_extra(struct anv_pipeline *pipeline, 1810b8e80941Smrg struct anv_subpass *subpass, 1811b8e80941Smrg const VkPipelineColorBlendStateCreateInfo *blend) 1812b8e80941Smrg{ 1813b8e80941Smrg const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 1814b8e80941Smrg 1815b8e80941Smrg if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 1816b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps); 1817b8e80941Smrg return; 1818b8e80941Smrg } 1819b8e80941Smrg 1820b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps) { 1821b8e80941Smrg ps.PixelShaderValid = true; 1822b8e80941Smrg ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; 1823b8e80941Smrg ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; 1824b8e80941Smrg ps.PixelShaderIsPerSample = wm_prog_data->persample_dispatch; 1825b8e80941Smrg ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; 1826b8e80941Smrg ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; 1827b8e80941Smrg ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; 1828b8e80941Smrg 1829b8e80941Smrg /* If the subpass has a depth or stencil self-dependency, then we need 1830b8e80941Smrg * to force the hardware to do the depth/stencil write *after* fragment 1831b8e80941Smrg * shader execution. Otherwise, the writes may hit memory before we get 1832b8e80941Smrg * around to fetching from the input attachment and we may get the depth 1833b8e80941Smrg * or stencil value from the current draw rather than the previous one. 1834b8e80941Smrg */ 1835b8e80941Smrg ps.PixelShaderKillsPixel = subpass->has_ds_self_dep || 1836b8e80941Smrg wm_prog_data->uses_kill; 1837b8e80941Smrg 1838b8e80941Smrg#if GEN_GEN >= 9 1839b8e80941Smrg ps.PixelShaderComputesStencil = wm_prog_data->computed_stencil; 1840b8e80941Smrg ps.PixelShaderPullsBary = wm_prog_data->pulls_bary; 1841b8e80941Smrg 1842b8e80941Smrg ps.InputCoverageMaskState = ICMS_NONE; 1843b8e80941Smrg if (wm_prog_data->uses_sample_mask) { 1844b8e80941Smrg if (wm_prog_data->post_depth_coverage) 1845b8e80941Smrg ps.InputCoverageMaskState = ICMS_DEPTH_COVERAGE; 1846b8e80941Smrg else 1847b8e80941Smrg ps.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE; 1848b8e80941Smrg } 1849b8e80941Smrg#else 1850b8e80941Smrg ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask; 1851b8e80941Smrg#endif 1852b8e80941Smrg } 1853b8e80941Smrg} 1854b8e80941Smrg 1855b8e80941Smrgstatic void 1856b8e80941Smrgemit_3dstate_vf_topology(struct anv_pipeline *pipeline) 1857b8e80941Smrg{ 1858b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) { 1859b8e80941Smrg vft.PrimitiveTopologyType = pipeline->topology; 1860b8e80941Smrg } 1861b8e80941Smrg} 1862b8e80941Smrg#endif 1863b8e80941Smrg 1864b8e80941Smrgstatic void 1865b8e80941Smrgemit_3dstate_vf_statistics(struct anv_pipeline *pipeline) 1866b8e80941Smrg{ 1867b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_STATISTICS), vfs) { 1868b8e80941Smrg vfs.StatisticsEnable = true; 1869b8e80941Smrg } 1870b8e80941Smrg} 1871b8e80941Smrg 1872b8e80941Smrgstatic void 1873b8e80941Smrgcompute_kill_pixel(struct anv_pipeline *pipeline, 1874b8e80941Smrg const VkPipelineMultisampleStateCreateInfo *ms_info, 1875b8e80941Smrg const struct anv_subpass *subpass) 1876b8e80941Smrg{ 1877b8e80941Smrg if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 1878b8e80941Smrg pipeline->kill_pixel = false; 1879b8e80941Smrg return; 1880b8e80941Smrg } 1881b8e80941Smrg 1882b8e80941Smrg const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 1883b8e80941Smrg 1884b8e80941Smrg /* This computes the KillPixel portion of the computation for whether or 1885b8e80941Smrg * not we want to enable the PMA fix on gen8 or gen9. It's given by this 1886b8e80941Smrg * chunk of the giant formula: 1887b8e80941Smrg * 1888b8e80941Smrg * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 1889b8e80941Smrg * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 1890b8e80941Smrg * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 1891b8e80941Smrg * 3DSTATE_PS_BLEND::AlphaTestEnable || 1892b8e80941Smrg * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) 1893b8e80941Smrg * 1894b8e80941Smrg * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false and so is 1895b8e80941Smrg * 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a concept 1896b8e80941Smrg * of an alpha test. 1897b8e80941Smrg */ 1898b8e80941Smrg pipeline->kill_pixel = 1899b8e80941Smrg subpass->has_ds_self_dep || wm_prog_data->uses_kill || 1900b8e80941Smrg wm_prog_data->uses_omask || 1901b8e80941Smrg (ms_info && ms_info->alphaToCoverageEnable); 1902b8e80941Smrg} 1903b8e80941Smrg 1904b8e80941Smrgstatic VkResult 1905b8e80941SmrggenX(graphics_pipeline_create)( 1906b8e80941Smrg VkDevice _device, 1907b8e80941Smrg struct anv_pipeline_cache * cache, 1908b8e80941Smrg const VkGraphicsPipelineCreateInfo* pCreateInfo, 1909b8e80941Smrg const VkAllocationCallbacks* pAllocator, 1910b8e80941Smrg VkPipeline* pPipeline) 1911b8e80941Smrg{ 1912b8e80941Smrg ANV_FROM_HANDLE(anv_device, device, _device); 1913b8e80941Smrg ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); 1914b8e80941Smrg struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; 1915b8e80941Smrg struct anv_pipeline *pipeline; 1916b8e80941Smrg VkResult result; 1917b8e80941Smrg 1918b8e80941Smrg assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); 1919b8e80941Smrg 1920b8e80941Smrg /* Use the default pipeline cache if none is specified */ 1921b8e80941Smrg if (cache == NULL && device->instance->pipeline_cache_enabled) 1922b8e80941Smrg cache = &device->default_pipeline_cache; 1923b8e80941Smrg 1924b8e80941Smrg pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, 1925b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1926b8e80941Smrg if (pipeline == NULL) 1927b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1928b8e80941Smrg 1929b8e80941Smrg result = anv_pipeline_init(pipeline, device, cache, 1930b8e80941Smrg pCreateInfo, pAllocator); 1931b8e80941Smrg if (result != VK_SUCCESS) { 1932b8e80941Smrg vk_free2(&device->alloc, pAllocator, pipeline); 1933b8e80941Smrg return result; 1934b8e80941Smrg } 1935b8e80941Smrg 1936b8e80941Smrg assert(pCreateInfo->pVertexInputState); 1937b8e80941Smrg emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); 1938b8e80941Smrg assert(pCreateInfo->pRasterizationState); 1939b8e80941Smrg emit_rs_state(pipeline, pCreateInfo->pRasterizationState, 1940b8e80941Smrg pCreateInfo->pMultisampleState, pass, subpass); 1941b8e80941Smrg emit_ms_state(pipeline, pCreateInfo->pMultisampleState); 1942b8e80941Smrg emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, subpass); 1943b8e80941Smrg emit_cb_state(pipeline, pCreateInfo->pColorBlendState, 1944b8e80941Smrg pCreateInfo->pMultisampleState); 1945b8e80941Smrg compute_kill_pixel(pipeline, pCreateInfo->pMultisampleState, subpass); 1946b8e80941Smrg 1947b8e80941Smrg emit_urb_setup(pipeline); 1948b8e80941Smrg 1949b8e80941Smrg emit_3dstate_clip(pipeline, pCreateInfo->pViewportState, 1950b8e80941Smrg pCreateInfo->pRasterizationState); 1951b8e80941Smrg emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState); 1952b8e80941Smrg 1953b8e80941Smrg#if 0 1954b8e80941Smrg /* From gen7_vs_state.c */ 1955b8e80941Smrg 1956b8e80941Smrg /** 1957b8e80941Smrg * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > 1958b8e80941Smrg * Geometry > Geometry Shader > State: 1959b8e80941Smrg * 1960b8e80941Smrg * "Note: Because of corruption in IVB:GT2, software needs to flush the 1961b8e80941Smrg * whole fixed function pipeline when the GS enable changes value in 1962b8e80941Smrg * the 3DSTATE_GS." 1963b8e80941Smrg * 1964b8e80941Smrg * The hardware architects have clarified that in this context "flush the 1965b8e80941Smrg * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS 1966b8e80941Smrg * Stall" bit set. 1967b8e80941Smrg */ 1968b8e80941Smrg if (!device->info.is_haswell && !device->info.is_baytrail) 1969b8e80941Smrg gen7_emit_vs_workaround_flush(brw); 1970b8e80941Smrg#endif 1971b8e80941Smrg 1972b8e80941Smrg emit_3dstate_vs(pipeline); 1973b8e80941Smrg emit_3dstate_hs_te_ds(pipeline, pCreateInfo->pTessellationState); 1974b8e80941Smrg emit_3dstate_gs(pipeline); 1975b8e80941Smrg emit_3dstate_sbe(pipeline); 1976b8e80941Smrg emit_3dstate_wm(pipeline, subpass, pCreateInfo->pColorBlendState, 1977b8e80941Smrg pCreateInfo->pMultisampleState); 1978b8e80941Smrg emit_3dstate_ps(pipeline, pCreateInfo->pColorBlendState, 1979b8e80941Smrg pCreateInfo->pMultisampleState); 1980b8e80941Smrg#if GEN_GEN >= 8 1981b8e80941Smrg emit_3dstate_ps_extra(pipeline, subpass, pCreateInfo->pColorBlendState); 1982b8e80941Smrg emit_3dstate_vf_topology(pipeline); 1983b8e80941Smrg#endif 1984b8e80941Smrg emit_3dstate_vf_statistics(pipeline); 1985b8e80941Smrg 1986b8e80941Smrg *pPipeline = anv_pipeline_to_handle(pipeline); 1987b8e80941Smrg 1988b8e80941Smrg return pipeline->batch.status; 1989b8e80941Smrg} 1990b8e80941Smrg 1991b8e80941Smrgstatic VkResult 1992b8e80941Smrgcompute_pipeline_create( 1993b8e80941Smrg VkDevice _device, 1994b8e80941Smrg struct anv_pipeline_cache * cache, 1995b8e80941Smrg const VkComputePipelineCreateInfo* pCreateInfo, 1996b8e80941Smrg const VkAllocationCallbacks* pAllocator, 1997b8e80941Smrg VkPipeline* pPipeline) 1998b8e80941Smrg{ 1999b8e80941Smrg ANV_FROM_HANDLE(anv_device, device, _device); 2000b8e80941Smrg const struct anv_physical_device *physical_device = 2001b8e80941Smrg &device->instance->physicalDevice; 2002b8e80941Smrg const struct gen_device_info *devinfo = &physical_device->info; 2003b8e80941Smrg struct anv_pipeline *pipeline; 2004b8e80941Smrg VkResult result; 2005b8e80941Smrg 2006b8e80941Smrg assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); 2007b8e80941Smrg 2008b8e80941Smrg /* Use the default pipeline cache if none is specified */ 2009b8e80941Smrg if (cache == NULL && device->instance->pipeline_cache_enabled) 2010b8e80941Smrg cache = &device->default_pipeline_cache; 2011b8e80941Smrg 2012b8e80941Smrg pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, 2013b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2014b8e80941Smrg if (pipeline == NULL) 2015b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 2016b8e80941Smrg 2017b8e80941Smrg pipeline->device = device; 2018b8e80941Smrg 2019b8e80941Smrg pipeline->blend_state.map = NULL; 2020b8e80941Smrg 2021b8e80941Smrg result = anv_reloc_list_init(&pipeline->batch_relocs, 2022b8e80941Smrg pAllocator ? pAllocator : &device->alloc); 2023b8e80941Smrg if (result != VK_SUCCESS) { 2024b8e80941Smrg vk_free2(&device->alloc, pAllocator, pipeline); 2025b8e80941Smrg return result; 2026b8e80941Smrg } 2027b8e80941Smrg pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; 2028b8e80941Smrg pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); 2029b8e80941Smrg pipeline->batch.relocs = &pipeline->batch_relocs; 2030b8e80941Smrg pipeline->batch.status = VK_SUCCESS; 2031b8e80941Smrg 2032b8e80941Smrg /* When we free the pipeline, we detect stages based on the NULL status 2033b8e80941Smrg * of various prog_data pointers. Make them NULL by default. 2034b8e80941Smrg */ 2035b8e80941Smrg memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); 2036b8e80941Smrg 2037b8e80941Smrg pipeline->needs_data_cache = false; 2038b8e80941Smrg 2039b8e80941Smrg assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); 2040b8e80941Smrg pipeline->active_stages |= VK_SHADER_STAGE_COMPUTE_BIT; 2041b8e80941Smrg ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); 2042b8e80941Smrg result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, 2043b8e80941Smrg pCreateInfo->stage.pName, 2044b8e80941Smrg pCreateInfo->stage.pSpecializationInfo); 2045b8e80941Smrg if (result != VK_SUCCESS) { 2046b8e80941Smrg vk_free2(&device->alloc, pAllocator, pipeline); 2047b8e80941Smrg return result; 2048b8e80941Smrg } 2049b8e80941Smrg 2050b8e80941Smrg const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); 2051b8e80941Smrg 2052b8e80941Smrg anv_pipeline_setup_l3_config(pipeline, cs_prog_data->base.total_shared > 0); 2053b8e80941Smrg 2054b8e80941Smrg uint32_t group_size = cs_prog_data->local_size[0] * 2055b8e80941Smrg cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; 2056b8e80941Smrg uint32_t remainder = group_size & (cs_prog_data->simd_size - 1); 2057b8e80941Smrg 2058b8e80941Smrg if (remainder > 0) 2059b8e80941Smrg pipeline->cs_right_mask = ~0u >> (32 - remainder); 2060b8e80941Smrg else 2061b8e80941Smrg pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size); 2062b8e80941Smrg 2063b8e80941Smrg const uint32_t vfe_curbe_allocation = 2064b8e80941Smrg ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads + 2065b8e80941Smrg cs_prog_data->push.cross_thread.regs, 2); 2066b8e80941Smrg 2067b8e80941Smrg const uint32_t subslices = MAX2(physical_device->subslice_total, 1); 2068b8e80941Smrg 2069b8e80941Smrg const struct anv_shader_bin *cs_bin = 2070b8e80941Smrg pipeline->shaders[MESA_SHADER_COMPUTE]; 2071b8e80941Smrg 2072b8e80941Smrg anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), vfe) { 2073b8e80941Smrg#if GEN_GEN > 7 2074b8e80941Smrg vfe.StackSize = 0; 2075b8e80941Smrg#else 2076b8e80941Smrg vfe.GPGPUMode = true; 2077b8e80941Smrg#endif 2078b8e80941Smrg vfe.MaximumNumberofThreads = 2079b8e80941Smrg devinfo->max_cs_threads * subslices - 1; 2080b8e80941Smrg vfe.NumberofURBEntries = GEN_GEN <= 7 ? 0 : 2; 2081b8e80941Smrg#if GEN_GEN < 11 2082b8e80941Smrg vfe.ResetGatewayTimer = true; 2083b8e80941Smrg#endif 2084b8e80941Smrg#if GEN_GEN <= 8 2085b8e80941Smrg vfe.BypassGatewayControl = true; 2086b8e80941Smrg#endif 2087b8e80941Smrg vfe.URBEntryAllocationSize = GEN_GEN <= 7 ? 0 : 2; 2088b8e80941Smrg vfe.CURBEAllocationSize = vfe_curbe_allocation; 2089b8e80941Smrg 2090b8e80941Smrg if (cs_bin->prog_data->total_scratch) { 2091b8e80941Smrg if (GEN_GEN >= 8) { 2092b8e80941Smrg /* Broadwell's Per Thread Scratch Space is in the range [0, 11] 2093b8e80941Smrg * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M. 2094b8e80941Smrg */ 2095b8e80941Smrg vfe.PerThreadScratchSpace = 2096b8e80941Smrg ffs(cs_bin->prog_data->total_scratch) - 11; 2097b8e80941Smrg } else if (GEN_IS_HASWELL) { 2098b8e80941Smrg /* Haswell's Per Thread Scratch Space is in the range [0, 10] 2099b8e80941Smrg * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M. 2100b8e80941Smrg */ 2101b8e80941Smrg vfe.PerThreadScratchSpace = 2102b8e80941Smrg ffs(cs_bin->prog_data->total_scratch) - 12; 2103b8e80941Smrg } else { 2104b8e80941Smrg /* IVB and BYT use the range [0, 11] to mean [1kB, 12kB] 2105b8e80941Smrg * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB. 2106b8e80941Smrg */ 2107b8e80941Smrg vfe.PerThreadScratchSpace = 2108b8e80941Smrg cs_bin->prog_data->total_scratch / 1024 - 1; 2109b8e80941Smrg } 2110b8e80941Smrg vfe.ScratchSpaceBasePointer = 2111b8e80941Smrg get_scratch_address(pipeline, MESA_SHADER_COMPUTE, cs_bin); 2112b8e80941Smrg } 2113b8e80941Smrg } 2114b8e80941Smrg 2115b8e80941Smrg struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = { 2116b8e80941Smrg .KernelStartPointer = cs_bin->kernel.offset, 2117b8e80941Smrg /* WA_1606682166 */ 2118b8e80941Smrg .SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(cs_bin), 2119b8e80941Smrg /* Gen 11 workarounds table #2056 WABTPPrefetchDisable 2120b8e80941Smrg * 2121b8e80941Smrg * We add 1 because the CS indirect parameters buffer isn't accounted 2122b8e80941Smrg * for in bind_map.surface_count. 2123b8e80941Smrg */ 2124b8e80941Smrg .BindingTableEntryCount = GEN_GEN == 11 ? 0 : 1 + MIN2(cs_bin->bind_map.surface_count, 30), 2125b8e80941Smrg .BarrierEnable = cs_prog_data->uses_barrier, 2126b8e80941Smrg .SharedLocalMemorySize = 2127b8e80941Smrg encode_slm_size(GEN_GEN, cs_prog_data->base.total_shared), 2128b8e80941Smrg 2129b8e80941Smrg#if !GEN_IS_HASWELL 2130b8e80941Smrg .ConstantURBEntryReadOffset = 0, 2131b8e80941Smrg#endif 2132b8e80941Smrg .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs, 2133b8e80941Smrg#if GEN_GEN >= 8 || GEN_IS_HASWELL 2134b8e80941Smrg .CrossThreadConstantDataReadLength = 2135b8e80941Smrg cs_prog_data->push.cross_thread.regs, 2136b8e80941Smrg#endif 2137b8e80941Smrg 2138b8e80941Smrg .NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads, 2139b8e80941Smrg }; 2140b8e80941Smrg GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, 2141b8e80941Smrg pipeline->interface_descriptor_data, 2142b8e80941Smrg &desc); 2143b8e80941Smrg 2144b8e80941Smrg *pPipeline = anv_pipeline_to_handle(pipeline); 2145b8e80941Smrg 2146b8e80941Smrg return pipeline->batch.status; 2147b8e80941Smrg} 2148b8e80941Smrg 2149b8e80941SmrgVkResult genX(CreateGraphicsPipelines)( 2150b8e80941Smrg VkDevice _device, 2151b8e80941Smrg VkPipelineCache pipelineCache, 2152b8e80941Smrg uint32_t count, 2153b8e80941Smrg const VkGraphicsPipelineCreateInfo* pCreateInfos, 2154b8e80941Smrg const VkAllocationCallbacks* pAllocator, 2155b8e80941Smrg VkPipeline* pPipelines) 2156b8e80941Smrg{ 2157b8e80941Smrg ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache); 2158b8e80941Smrg 2159b8e80941Smrg VkResult result = VK_SUCCESS; 2160b8e80941Smrg 2161b8e80941Smrg unsigned i; 2162b8e80941Smrg for (i = 0; i < count; i++) { 2163b8e80941Smrg result = genX(graphics_pipeline_create)(_device, 2164b8e80941Smrg pipeline_cache, 2165b8e80941Smrg &pCreateInfos[i], 2166b8e80941Smrg pAllocator, &pPipelines[i]); 2167b8e80941Smrg 2168b8e80941Smrg /* Bail out on the first error as it is not obvious what error should be 2169b8e80941Smrg * report upon 2 different failures. */ 2170b8e80941Smrg if (result != VK_SUCCESS) 2171b8e80941Smrg break; 2172b8e80941Smrg } 2173b8e80941Smrg 2174b8e80941Smrg for (; i < count; i++) 2175b8e80941Smrg pPipelines[i] = VK_NULL_HANDLE; 2176b8e80941Smrg 2177b8e80941Smrg return result; 2178b8e80941Smrg} 2179b8e80941Smrg 2180b8e80941SmrgVkResult genX(CreateComputePipelines)( 2181b8e80941Smrg VkDevice _device, 2182b8e80941Smrg VkPipelineCache pipelineCache, 2183b8e80941Smrg uint32_t count, 2184b8e80941Smrg const VkComputePipelineCreateInfo* pCreateInfos, 2185b8e80941Smrg const VkAllocationCallbacks* pAllocator, 2186b8e80941Smrg VkPipeline* pPipelines) 2187b8e80941Smrg{ 2188b8e80941Smrg ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache); 2189b8e80941Smrg 2190b8e80941Smrg VkResult result = VK_SUCCESS; 2191b8e80941Smrg 2192b8e80941Smrg unsigned i; 2193b8e80941Smrg for (i = 0; i < count; i++) { 2194b8e80941Smrg result = compute_pipeline_create(_device, pipeline_cache, 2195b8e80941Smrg &pCreateInfos[i], 2196b8e80941Smrg pAllocator, &pPipelines[i]); 2197b8e80941Smrg 2198b8e80941Smrg /* Bail out on the first error as it is not obvious what error should be 2199b8e80941Smrg * report upon 2 different failures. */ 2200b8e80941Smrg if (result != VK_SUCCESS) 2201b8e80941Smrg break; 2202b8e80941Smrg } 2203b8e80941Smrg 2204b8e80941Smrg for (; i < count; i++) 2205b8e80941Smrg pPipelines[i] = VK_NULL_HANDLE; 2206b8e80941Smrg 2207b8e80941Smrg return result; 2208b8e80941Smrg} 2209