genX_pipeline.c revision 993e1d59
1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "anv_private.h" 25 26#include "genxml/gen_macros.h" 27#include "genxml/genX_pack.h" 28 29#include "common/gen_l3_config.h" 30#include "common/gen_sample_positions.h" 31#include "vk_util.h" 32#include "vk_format_info.h" 33 34static uint32_t 35vertex_element_comp_control(enum isl_format format, unsigned comp) 36{ 37 uint8_t bits; 38 switch (comp) { 39 case 0: bits = isl_format_layouts[format].channels.r.bits; break; 40 case 1: bits = isl_format_layouts[format].channels.g.bits; break; 41 case 2: bits = isl_format_layouts[format].channels.b.bits; break; 42 case 3: bits = isl_format_layouts[format].channels.a.bits; break; 43 default: unreachable("Invalid component"); 44 } 45 46 /* 47 * Take in account hardware restrictions when dealing with 64-bit floats. 48 * 49 * From Broadwell spec, command reference structures, page 586: 50 * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats, 51 * 64-bit components are stored * in the URB without any conversion. In 52 * this case, vertex elements must be written as 128 or 256 bits, with 53 * VFCOMP_STORE_0 being used to pad the output as required. E.g., if 54 * R64_PASSTHRU is used to copy a 64-bit Red component into the URB, 55 * Component 1 must be specified as VFCOMP_STORE_0 (with Components 2,3 56 * set to VFCOMP_NOSTORE) in order to output a 128-bit vertex element, or 57 * Components 1-3 must be specified as VFCOMP_STORE_0 in order to output 58 * a 256-bit vertex element. Likewise, use of R64G64B64_PASSTHRU requires 59 * Component 3 to be specified as VFCOMP_STORE_0 in order to output a 60 * 256-bit vertex element." 61 */ 62 if (bits) { 63 return VFCOMP_STORE_SRC; 64 } else if (comp >= 2 && 65 !isl_format_layouts[format].channels.b.bits && 66 isl_format_layouts[format].channels.r.type == ISL_RAW) { 67 /* When emitting 64-bit attributes, we need to write either 128 or 256 68 * bit chunks, using VFCOMP_NOSTORE when not writing the chunk, and 69 * VFCOMP_STORE_0 to pad the written chunk */ 70 return VFCOMP_NOSTORE; 71 } else if (comp < 3 || 72 isl_format_layouts[format].channels.r.type == ISL_RAW) { 73 /* Note we need to pad with value 0, not 1, due hardware restrictions 74 * (see comment above) */ 75 return VFCOMP_STORE_0; 76 } else if (isl_format_layouts[format].channels.r.type == ISL_UINT || 77 isl_format_layouts[format].channels.r.type == ISL_SINT) { 78 assert(comp == 3); 79 return VFCOMP_STORE_1_INT; 80 } else { 81 assert(comp == 3); 82 return VFCOMP_STORE_1_FP; 83 } 84} 85 86static void 87emit_vertex_input(struct anv_pipeline *pipeline, 88 const VkPipelineVertexInputStateCreateInfo *info) 89{ 90 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); 91 92 /* Pull inputs_read out of the VS prog data */ 93 const uint64_t inputs_read = vs_prog_data->inputs_read; 94 const uint64_t double_inputs_read = 95 vs_prog_data->double_inputs_read & inputs_read; 96 assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); 97 const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0; 98 const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0; 99 const bool needs_svgs_elem = vs_prog_data->uses_vertexid || 100 vs_prog_data->uses_instanceid || 101 vs_prog_data->uses_firstvertex || 102 vs_prog_data->uses_baseinstance; 103 104 uint32_t elem_count = __builtin_popcount(elements) - 105 __builtin_popcount(elements_double) / 2; 106 107 const uint32_t total_elems = 108 elem_count + needs_svgs_elem + vs_prog_data->uses_drawid; 109 if (total_elems == 0) 110 return; 111 112 uint32_t *p; 113 114 const uint32_t num_dwords = 1 + total_elems * 2; 115 p = anv_batch_emitn(&pipeline->batch, num_dwords, 116 GENX(3DSTATE_VERTEX_ELEMENTS)); 117 if (!p) 118 return; 119 120 for (uint32_t i = 0; i < total_elems; i++) { 121 /* The SKL docs for VERTEX_ELEMENT_STATE say: 122 * 123 * "All elements must be valid from Element[0] to the last valid 124 * element. (I.e. if Element[2] is valid then Element[1] and 125 * Element[0] must also be valid)." 126 * 127 * The SKL docs for 3D_Vertex_Component_Control say: 128 * 129 * "Don't store this component. (Not valid for Component 0, but can 130 * be used for Component 1-3)." 131 * 132 * So we can't just leave a vertex element blank and hope for the best. 133 * We have to tell the VF hardware to put something in it; so we just 134 * store a bunch of zero. 135 * 136 * TODO: Compact vertex elements so we never end up with holes. 137 */ 138 struct GENX(VERTEX_ELEMENT_STATE) element = { 139 .Valid = true, 140 .Component0Control = VFCOMP_STORE_0, 141 .Component1Control = VFCOMP_STORE_0, 142 .Component2Control = VFCOMP_STORE_0, 143 .Component3Control = VFCOMP_STORE_0, 144 }; 145 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + i * 2], &element); 146 } 147 148 for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { 149 const VkVertexInputAttributeDescription *desc = 150 &info->pVertexAttributeDescriptions[i]; 151 enum isl_format format = anv_get_isl_format(&pipeline->device->info, 152 desc->format, 153 VK_IMAGE_ASPECT_COLOR_BIT, 154 VK_IMAGE_TILING_LINEAR); 155 156 assert(desc->binding < MAX_VBS); 157 158 if ((elements & (1 << desc->location)) == 0) 159 continue; /* Binding unused */ 160 161 uint32_t slot = 162 __builtin_popcount(elements & ((1 << desc->location) - 1)) - 163 DIV_ROUND_UP(__builtin_popcount(elements_double & 164 ((1 << desc->location) -1)), 2); 165 166 struct GENX(VERTEX_ELEMENT_STATE) element = { 167 .VertexBufferIndex = desc->binding, 168 .Valid = true, 169 .SourceElementFormat = format, 170 .EdgeFlagEnable = false, 171 .SourceElementOffset = desc->offset, 172 .Component0Control = vertex_element_comp_control(format, 0), 173 .Component1Control = vertex_element_comp_control(format, 1), 174 .Component2Control = vertex_element_comp_control(format, 2), 175 .Component3Control = vertex_element_comp_control(format, 3), 176 }; 177 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); 178 179#if GEN_GEN >= 8 180 /* On Broadwell and later, we have a separate VF_INSTANCING packet 181 * that controls instancing. On Haswell and prior, that's part of 182 * VERTEX_BUFFER_STATE which we emit later. 183 */ 184 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), vfi) { 185 vfi.InstancingEnable = pipeline->vb[desc->binding].instanced; 186 vfi.VertexElementIndex = slot; 187 vfi.InstanceDataStepRate = 188 pipeline->vb[desc->binding].instance_divisor; 189 } 190#endif 191 } 192 193 const uint32_t id_slot = elem_count; 194 if (needs_svgs_elem) { 195 /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum: 196 * "Within a VERTEX_ELEMENT_STATE structure, if a Component 197 * Control field is set to something other than VFCOMP_STORE_SRC, 198 * no higher-numbered Component Control fields may be set to 199 * VFCOMP_STORE_SRC" 200 * 201 * This means, that if we have BaseInstance, we need BaseVertex as 202 * well. Just do all or nothing. 203 */ 204 uint32_t base_ctrl = (vs_prog_data->uses_firstvertex || 205 vs_prog_data->uses_baseinstance) ? 206 VFCOMP_STORE_SRC : VFCOMP_STORE_0; 207 208 struct GENX(VERTEX_ELEMENT_STATE) element = { 209 .VertexBufferIndex = ANV_SVGS_VB_INDEX, 210 .Valid = true, 211 .SourceElementFormat = ISL_FORMAT_R32G32_UINT, 212 .Component0Control = base_ctrl, 213 .Component1Control = base_ctrl, 214#if GEN_GEN >= 8 215 .Component2Control = VFCOMP_STORE_0, 216 .Component3Control = VFCOMP_STORE_0, 217#else 218 .Component2Control = VFCOMP_STORE_VID, 219 .Component3Control = VFCOMP_STORE_IID, 220#endif 221 }; 222 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element); 223 } 224 225#if GEN_GEN >= 8 226 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), sgvs) { 227 sgvs.VertexIDEnable = vs_prog_data->uses_vertexid; 228 sgvs.VertexIDComponentNumber = 2; 229 sgvs.VertexIDElementOffset = id_slot; 230 sgvs.InstanceIDEnable = vs_prog_data->uses_instanceid; 231 sgvs.InstanceIDComponentNumber = 3; 232 sgvs.InstanceIDElementOffset = id_slot; 233 } 234#endif 235 236 const uint32_t drawid_slot = elem_count + needs_svgs_elem; 237 if (vs_prog_data->uses_drawid) { 238 struct GENX(VERTEX_ELEMENT_STATE) element = { 239 .VertexBufferIndex = ANV_DRAWID_VB_INDEX, 240 .Valid = true, 241 .SourceElementFormat = ISL_FORMAT_R32_UINT, 242 .Component0Control = VFCOMP_STORE_SRC, 243 .Component1Control = VFCOMP_STORE_0, 244 .Component2Control = VFCOMP_STORE_0, 245 .Component3Control = VFCOMP_STORE_0, 246 }; 247 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, 248 &p[1 + drawid_slot * 2], 249 &element); 250 251#if GEN_GEN >= 8 252 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), vfi) { 253 vfi.VertexElementIndex = drawid_slot; 254 } 255#endif 256 } 257} 258 259void 260genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, 261 const struct gen_l3_config *l3_config, 262 VkShaderStageFlags active_stages, 263 const unsigned entry_size[4]) 264{ 265 const struct gen_device_info *devinfo = &device->info; 266#if GEN_IS_HASWELL 267 const unsigned push_constant_kb = devinfo->gt == 3 ? 32 : 16; 268#else 269 const unsigned push_constant_kb = GEN_GEN >= 8 ? 32 : 16; 270#endif 271 272 const unsigned urb_size_kb = gen_get_l3_config_urb_size(devinfo, l3_config); 273 274 unsigned entries[4]; 275 unsigned start[4]; 276 gen_get_urb_config(devinfo, 277 1024 * push_constant_kb, 1024 * urb_size_kb, 278 active_stages & 279 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, 280 active_stages & VK_SHADER_STAGE_GEOMETRY_BIT, 281 entry_size, entries, start); 282 283#if GEN_GEN == 7 && !GEN_IS_HASWELL 284 /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: 285 * 286 * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall 287 * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, 288 * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, 289 * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL 290 * needs to be sent before any combination of VS associated 3DSTATE." 291 */ 292 anv_batch_emit(batch, GEN7_PIPE_CONTROL, pc) { 293 pc.DepthStallEnable = true; 294 pc.PostSyncOperation = WriteImmediateData; 295 pc.Address = (struct anv_address) { &device->workaround_bo, 0 }; 296 } 297#endif 298 299 for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) { 300 anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) { 301 urb._3DCommandSubOpcode += i; 302 urb.VSURBStartingAddress = start[i]; 303 urb.VSURBEntryAllocationSize = entry_size[i] - 1; 304 urb.VSNumberofURBEntries = entries[i]; 305 } 306 } 307} 308 309static void 310emit_urb_setup(struct anv_pipeline *pipeline) 311{ 312 unsigned entry_size[4]; 313 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 314 const struct brw_vue_prog_data *prog_data = 315 !anv_pipeline_has_stage(pipeline, i) ? NULL : 316 (const struct brw_vue_prog_data *) pipeline->shaders[i]->prog_data; 317 318 entry_size[i] = prog_data ? prog_data->urb_entry_size : 1; 319 } 320 321 genX(emit_urb_setup)(pipeline->device, &pipeline->batch, 322 pipeline->urb.l3_config, 323 pipeline->active_stages, entry_size); 324} 325 326static void 327emit_3dstate_sbe(struct anv_pipeline *pipeline) 328{ 329 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 330 331 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 332 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), sbe); 333#if GEN_GEN >= 8 334 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE_SWIZ), sbe); 335#endif 336 return; 337 } 338 339 const struct brw_vue_map *fs_input_map = 340 &anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map; 341 342 struct GENX(3DSTATE_SBE) sbe = { 343 GENX(3DSTATE_SBE_header), 344 .AttributeSwizzleEnable = true, 345 .PointSpriteTextureCoordinateOrigin = UPPERLEFT, 346 .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs, 347 .ConstantInterpolationEnable = wm_prog_data->flat_inputs, 348 }; 349 350#if GEN_GEN >= 9 351 for (unsigned i = 0; i < 32; i++) 352 sbe.AttributeActiveComponentFormat[i] = ACF_XYZW; 353#endif 354 355#if GEN_GEN >= 8 356 /* On Broadwell, they broke 3DSTATE_SBE into two packets */ 357 struct GENX(3DSTATE_SBE_SWIZ) swiz = { 358 GENX(3DSTATE_SBE_SWIZ_header), 359 }; 360#else 361# define swiz sbe 362#endif 363 364 /* Skip the VUE header and position slots by default */ 365 unsigned urb_entry_read_offset = 1; 366 int max_source_attr = 0; 367 for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { 368 int input_index = wm_prog_data->urb_setup[attr]; 369 370 if (input_index < 0) 371 continue; 372 373 /* gl_Layer is stored in the VUE header */ 374 if (attr == VARYING_SLOT_LAYER) { 375 urb_entry_read_offset = 0; 376 continue; 377 } 378 379 if (attr == VARYING_SLOT_PNTC) { 380 sbe.PointSpriteTextureCoordinateEnable = 1 << input_index; 381 continue; 382 } 383 384 const int slot = fs_input_map->varying_to_slot[attr]; 385 386 if (input_index >= 16) 387 continue; 388 389 if (slot == -1) { 390 /* This attribute does not exist in the VUE--that means that the 391 * vertex shader did not write to it. It could be that it's a 392 * regular varying read by the fragment shader but not written by 393 * the vertex shader or it's gl_PrimitiveID. In the first case the 394 * value is undefined, in the second it needs to be 395 * gl_PrimitiveID. 396 */ 397 swiz.Attribute[input_index].ConstantSource = PRIM_ID; 398 swiz.Attribute[input_index].ComponentOverrideX = true; 399 swiz.Attribute[input_index].ComponentOverrideY = true; 400 swiz.Attribute[input_index].ComponentOverrideZ = true; 401 swiz.Attribute[input_index].ComponentOverrideW = true; 402 } else { 403 /* We have to subtract two slots to accout for the URB entry output 404 * read offset in the VS and GS stages. 405 */ 406 const int source_attr = slot - 2 * urb_entry_read_offset; 407 assert(source_attr >= 0 && source_attr < 32); 408 max_source_attr = MAX2(max_source_attr, source_attr); 409 swiz.Attribute[input_index].SourceAttribute = source_attr; 410 } 411 } 412 413 sbe.VertexURBEntryReadOffset = urb_entry_read_offset; 414 sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2); 415#if GEN_GEN >= 8 416 sbe.ForceVertexURBEntryReadOffset = true; 417 sbe.ForceVertexURBEntryReadLength = true; 418#endif 419 420 uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, 421 GENX(3DSTATE_SBE_length)); 422 if (!dw) 423 return; 424 GENX(3DSTATE_SBE_pack)(&pipeline->batch, dw, &sbe); 425 426#if GEN_GEN >= 8 427 dw = anv_batch_emit_dwords(&pipeline->batch, GENX(3DSTATE_SBE_SWIZ_length)); 428 if (!dw) 429 return; 430 GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); 431#endif 432} 433 434static const uint32_t vk_to_gen_cullmode[] = { 435 [VK_CULL_MODE_NONE] = CULLMODE_NONE, 436 [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, 437 [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, 438 [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH 439}; 440 441static const uint32_t vk_to_gen_fillmode[] = { 442 [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID, 443 [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME, 444 [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT, 445}; 446 447static const uint32_t vk_to_gen_front_face[] = { 448 [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, 449 [VK_FRONT_FACE_CLOCKWISE] = 0 450}; 451 452static void 453emit_rs_state(struct anv_pipeline *pipeline, 454 const VkPipelineRasterizationStateCreateInfo *rs_info, 455 const VkPipelineMultisampleStateCreateInfo *ms_info, 456 const struct anv_render_pass *pass, 457 const struct anv_subpass *subpass) 458{ 459 struct GENX(3DSTATE_SF) sf = { 460 GENX(3DSTATE_SF_header), 461 }; 462 463 sf.ViewportTransformEnable = true; 464 sf.StatisticsEnable = true; 465 sf.TriangleStripListProvokingVertexSelect = 0; 466 sf.LineStripListProvokingVertexSelect = 0; 467 sf.TriangleFanProvokingVertexSelect = 1; 468 sf.VertexSubPixelPrecisionSelect = _8Bit; 469 470 const struct brw_vue_prog_data *last_vue_prog_data = 471 anv_pipeline_get_last_vue_prog_data(pipeline); 472 473 if (last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { 474 sf.PointWidthSource = Vertex; 475 } else { 476 sf.PointWidthSource = State; 477 sf.PointWidth = 1.0; 478 } 479 480#if GEN_GEN >= 8 481 struct GENX(3DSTATE_RASTER) raster = { 482 GENX(3DSTATE_RASTER_header), 483 }; 484#else 485# define raster sf 486#endif 487 488 /* For details on 3DSTATE_RASTER multisample state, see the BSpec table 489 * "Multisample Modes State". 490 */ 491#if GEN_GEN >= 8 492 raster.DXMultisampleRasterizationEnable = true; 493 /* NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the BDW and SKL PMA fix 494 * computations. If we ever set this bit to a different value, they will 495 * need to be updated accordingly. 496 */ 497 raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0; 498 raster.ForceMultisampling = false; 499#else 500 raster.MultisampleRasterizationMode = 501 (ms_info && ms_info->rasterizationSamples > 1) ? 502 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; 503#endif 504 505 raster.FrontWinding = vk_to_gen_front_face[rs_info->frontFace]; 506 raster.CullMode = vk_to_gen_cullmode[rs_info->cullMode]; 507 raster.FrontFaceFillMode = vk_to_gen_fillmode[rs_info->polygonMode]; 508 raster.BackFaceFillMode = vk_to_gen_fillmode[rs_info->polygonMode]; 509 raster.ScissorRectangleEnable = true; 510 511#if GEN_GEN >= 9 512 /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */ 513 raster.ViewportZFarClipTestEnable = !pipeline->depth_clamp_enable; 514 raster.ViewportZNearClipTestEnable = !pipeline->depth_clamp_enable; 515#elif GEN_GEN >= 8 516 raster.ViewportZClipTestEnable = !pipeline->depth_clamp_enable; 517#endif 518 519 raster.GlobalDepthOffsetEnableSolid = rs_info->depthBiasEnable; 520 raster.GlobalDepthOffsetEnableWireframe = rs_info->depthBiasEnable; 521 raster.GlobalDepthOffsetEnablePoint = rs_info->depthBiasEnable; 522 523#if GEN_GEN == 7 524 /* Gen7 requires that we provide the depth format in 3DSTATE_SF so that it 525 * can get the depth offsets correct. 526 */ 527 if (subpass->depth_stencil_attachment) { 528 VkFormat vk_format = 529 pass->attachments[subpass->depth_stencil_attachment->attachment].format; 530 assert(vk_format_is_depth_or_stencil(vk_format)); 531 if (vk_format_aspects(vk_format) & VK_IMAGE_ASPECT_DEPTH_BIT) { 532 enum isl_format isl_format = 533 anv_get_isl_format(&pipeline->device->info, vk_format, 534 VK_IMAGE_ASPECT_DEPTH_BIT, 535 VK_IMAGE_TILING_OPTIMAL); 536 sf.DepthBufferSurfaceFormat = 537 isl_format_get_depth_format(isl_format, false); 538 } 539 } 540#endif 541 542#if GEN_GEN >= 8 543 GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf); 544 GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster); 545#else 546# undef raster 547 GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf); 548#endif 549} 550 551static void 552emit_ms_state(struct anv_pipeline *pipeline, 553 const VkPipelineMultisampleStateCreateInfo *info) 554{ 555 uint32_t samples = 1; 556 uint32_t log2_samples = 0; 557 558 /* From the Vulkan 1.0 spec: 559 * If pSampleMask is NULL, it is treated as if the mask has all bits 560 * enabled, i.e. no coverage is removed from fragments. 561 * 562 * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits. 563 */ 564#if GEN_GEN >= 8 565 uint32_t sample_mask = 0xffff; 566#else 567 uint32_t sample_mask = 0xff; 568#endif 569 570 if (info) { 571 samples = info->rasterizationSamples; 572 log2_samples = __builtin_ffs(samples) - 1; 573 } 574 575 if (info && info->pSampleMask) 576 sample_mask &= info->pSampleMask[0]; 577 578 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) { 579 ms.NumberofMultisamples = log2_samples; 580 581 ms.PixelLocation = CENTER; 582#if GEN_GEN >= 8 583 /* The PRM says that this bit is valid only for DX9: 584 * 585 * SW can choose to set this bit only for DX9 API. DX10/OGL API's 586 * should not have any effect by setting or not setting this bit. 587 */ 588 ms.PixelPositionOffsetEnable = false; 589#else 590 591 switch (samples) { 592 case 1: 593 GEN_SAMPLE_POS_1X(ms.Sample); 594 break; 595 case 2: 596 GEN_SAMPLE_POS_2X(ms.Sample); 597 break; 598 case 4: 599 GEN_SAMPLE_POS_4X(ms.Sample); 600 break; 601 case 8: 602 GEN_SAMPLE_POS_8X(ms.Sample); 603 break; 604 default: 605 break; 606 } 607#endif 608 } 609 610 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) { 611 sm.SampleMask = sample_mask; 612 } 613} 614 615static const uint32_t vk_to_gen_logic_op[] = { 616 [VK_LOGIC_OP_COPY] = LOGICOP_COPY, 617 [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, 618 [VK_LOGIC_OP_AND] = LOGICOP_AND, 619 [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, 620 [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, 621 [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, 622 [VK_LOGIC_OP_XOR] = LOGICOP_XOR, 623 [VK_LOGIC_OP_OR] = LOGICOP_OR, 624 [VK_LOGIC_OP_NOR] = LOGICOP_NOR, 625 [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, 626 [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, 627 [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, 628 [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, 629 [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, 630 [VK_LOGIC_OP_NAND] = LOGICOP_NAND, 631 [VK_LOGIC_OP_SET] = LOGICOP_SET, 632}; 633 634static const uint32_t vk_to_gen_blend[] = { 635 [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, 636 [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, 637 [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, 638 [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, 639 [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, 640 [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, 641 [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, 642 [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, 643 [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, 644 [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, 645 [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, 646 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR, 647 [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, 648 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA, 649 [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, 650 [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, 651 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, 652 [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, 653 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, 654}; 655 656static const uint32_t vk_to_gen_blend_op[] = { 657 [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, 658 [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, 659 [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, 660 [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, 661 [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, 662}; 663 664static const uint32_t vk_to_gen_compare_op[] = { 665 [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, 666 [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, 667 [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, 668 [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, 669 [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, 670 [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, 671 [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, 672 [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, 673}; 674 675static const uint32_t vk_to_gen_stencil_op[] = { 676 [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, 677 [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, 678 [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, 679 [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, 680 [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, 681 [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, 682 [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, 683 [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, 684}; 685 686/* This function sanitizes the VkStencilOpState by looking at the compare ops 687 * and trying to determine whether or not a given stencil op can ever actually 688 * occur. Stencil ops which can never occur are set to VK_STENCIL_OP_KEEP. 689 * This function returns true if, after sanitation, any of the stencil ops are 690 * set to something other than VK_STENCIL_OP_KEEP. 691 */ 692static bool 693sanitize_stencil_face(VkStencilOpState *face, 694 VkCompareOp depthCompareOp) 695{ 696 /* If compareOp is ALWAYS then the stencil test will never fail and failOp 697 * will never happen. Set failOp to KEEP in this case. 698 */ 699 if (face->compareOp == VK_COMPARE_OP_ALWAYS) 700 face->failOp = VK_STENCIL_OP_KEEP; 701 702 /* If compareOp is NEVER or depthCompareOp is NEVER then one of the depth 703 * or stencil tests will fail and passOp will never happen. 704 */ 705 if (face->compareOp == VK_COMPARE_OP_NEVER || 706 depthCompareOp == VK_COMPARE_OP_NEVER) 707 face->passOp = VK_STENCIL_OP_KEEP; 708 709 /* If compareOp is NEVER or depthCompareOp is ALWAYS then either the 710 * stencil test will fail or the depth test will pass. In either case, 711 * depthFailOp will never happen. 712 */ 713 if (face->compareOp == VK_COMPARE_OP_NEVER || 714 depthCompareOp == VK_COMPARE_OP_ALWAYS) 715 face->depthFailOp = VK_STENCIL_OP_KEEP; 716 717 return face->failOp != VK_STENCIL_OP_KEEP || 718 face->depthFailOp != VK_STENCIL_OP_KEEP || 719 face->passOp != VK_STENCIL_OP_KEEP; 720} 721 722/* Intel hardware is fairly sensitive to whether or not depth/stencil writes 723 * are enabled. In the presence of discards, it's fairly easy to get into the 724 * non-promoted case which means a fairly big performance hit. From the Iron 725 * Lake PRM, Vol 2, pt. 1, section 8.4.3.2, "Early Depth Test Cases": 726 * 727 * "Non-promoted depth (N) is active whenever the depth test can be done 728 * early but it cannot determine whether or not to write source depth to 729 * the depth buffer, therefore the depth write must be performed post pixel 730 * shader. This includes cases where the pixel shader can kill pixels, 731 * including via sampler chroma key, as well as cases where the alpha test 732 * function is enabled, which kills pixels based on a programmable alpha 733 * test. In this case, even if the depth test fails, the pixel cannot be 734 * killed if a stencil write is indicated. Whether or not the stencil write 735 * happens depends on whether or not the pixel is killed later. In these 736 * cases if stencil test fails and stencil writes are off, the pixels can 737 * also be killed early. If stencil writes are enabled, the pixels must be 738 * treated as Computed depth (described above)." 739 * 740 * The same thing as mentioned in the stencil case can happen in the depth 741 * case as well if it thinks it writes depth but, thanks to the depth test 742 * being GL_EQUAL, the write doesn't actually matter. A little extra work 743 * up-front to try and disable depth and stencil writes can make a big 744 * difference. 745 * 746 * Unfortunately, the way depth and stencil testing is specified, there are 747 * many case where, regardless of depth/stencil writes being enabled, nothing 748 * actually gets written due to some other bit of state being set. This 749 * function attempts to "sanitize" the depth stencil state and disable writes 750 * and sometimes even testing whenever possible. 751 */ 752static void 753sanitize_ds_state(VkPipelineDepthStencilStateCreateInfo *state, 754 bool *stencilWriteEnable, 755 VkImageAspectFlags ds_aspects) 756{ 757 *stencilWriteEnable = state->stencilTestEnable; 758 759 /* If the depth test is disabled, we won't be writing anything. Make sure we 760 * treat the test as always passing later on as well. 761 * 762 * Also, the Vulkan spec requires that if either depth or stencil is not 763 * present, the pipeline is to act as if the test silently passes. In that 764 * case we won't write either. 765 */ 766 if (!state->depthTestEnable || !(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { 767 state->depthWriteEnable = false; 768 state->depthCompareOp = VK_COMPARE_OP_ALWAYS; 769 } 770 771 if (!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT)) { 772 *stencilWriteEnable = false; 773 state->front.compareOp = VK_COMPARE_OP_ALWAYS; 774 state->back.compareOp = VK_COMPARE_OP_ALWAYS; 775 } 776 777 /* If the stencil test is enabled and always fails, then we will never get 778 * to the depth test so we can just disable the depth test entirely. 779 */ 780 if (state->stencilTestEnable && 781 state->front.compareOp == VK_COMPARE_OP_NEVER && 782 state->back.compareOp == VK_COMPARE_OP_NEVER) { 783 state->depthTestEnable = false; 784 state->depthWriteEnable = false; 785 } 786 787 /* If depthCompareOp is EQUAL then the value we would be writing to the 788 * depth buffer is the same as the value that's already there so there's no 789 * point in writing it. 790 */ 791 if (state->depthCompareOp == VK_COMPARE_OP_EQUAL) 792 state->depthWriteEnable = false; 793 794 /* If the stencil ops are such that we don't actually ever modify the 795 * stencil buffer, we should disable writes. 796 */ 797 if (!sanitize_stencil_face(&state->front, state->depthCompareOp) && 798 !sanitize_stencil_face(&state->back, state->depthCompareOp)) 799 *stencilWriteEnable = false; 800 801 /* If the depth test always passes and we never write out depth, that's the 802 * same as if the depth test is disabled entirely. 803 */ 804 if (state->depthCompareOp == VK_COMPARE_OP_ALWAYS && 805 !state->depthWriteEnable) 806 state->depthTestEnable = false; 807 808 /* If the stencil test always passes and we never write out stencil, that's 809 * the same as if the stencil test is disabled entirely. 810 */ 811 if (state->front.compareOp == VK_COMPARE_OP_ALWAYS && 812 state->back.compareOp == VK_COMPARE_OP_ALWAYS && 813 !*stencilWriteEnable) 814 state->stencilTestEnable = false; 815} 816 817static void 818emit_ds_state(struct anv_pipeline *pipeline, 819 const VkPipelineDepthStencilStateCreateInfo *pCreateInfo, 820 const struct anv_render_pass *pass, 821 const struct anv_subpass *subpass) 822{ 823#if GEN_GEN == 7 824# define depth_stencil_dw pipeline->gen7.depth_stencil_state 825#elif GEN_GEN == 8 826# define depth_stencil_dw pipeline->gen8.wm_depth_stencil 827#else 828# define depth_stencil_dw pipeline->gen9.wm_depth_stencil 829#endif 830 831 if (pCreateInfo == NULL) { 832 /* We're going to OR this together with the dynamic state. We need 833 * to make sure it's initialized to something useful. 834 */ 835 pipeline->writes_stencil = false; 836 pipeline->stencil_test_enable = false; 837 pipeline->writes_depth = false; 838 pipeline->depth_test_enable = false; 839 memset(depth_stencil_dw, 0, sizeof(depth_stencil_dw)); 840 return; 841 } 842 843 VkImageAspectFlags ds_aspects = 0; 844 if (subpass->depth_stencil_attachment) { 845 VkFormat depth_stencil_format = 846 pass->attachments[subpass->depth_stencil_attachment->attachment].format; 847 ds_aspects = vk_format_aspects(depth_stencil_format); 848 } 849 850 VkPipelineDepthStencilStateCreateInfo info = *pCreateInfo; 851 sanitize_ds_state(&info, &pipeline->writes_stencil, ds_aspects); 852 pipeline->stencil_test_enable = info.stencilTestEnable; 853 pipeline->writes_depth = info.depthWriteEnable; 854 pipeline->depth_test_enable = info.depthTestEnable; 855 856 /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */ 857 858#if GEN_GEN <= 7 859 struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { 860#else 861 struct GENX(3DSTATE_WM_DEPTH_STENCIL) depth_stencil = { 862#endif 863 .DepthTestEnable = info.depthTestEnable, 864 .DepthBufferWriteEnable = info.depthWriteEnable, 865 .DepthTestFunction = vk_to_gen_compare_op[info.depthCompareOp], 866 .DoubleSidedStencilEnable = true, 867 868 .StencilTestEnable = info.stencilTestEnable, 869 .StencilFailOp = vk_to_gen_stencil_op[info.front.failOp], 870 .StencilPassDepthPassOp = vk_to_gen_stencil_op[info.front.passOp], 871 .StencilPassDepthFailOp = vk_to_gen_stencil_op[info.front.depthFailOp], 872 .StencilTestFunction = vk_to_gen_compare_op[info.front.compareOp], 873 .BackfaceStencilFailOp = vk_to_gen_stencil_op[info.back.failOp], 874 .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info.back.passOp], 875 .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info.back.depthFailOp], 876 .BackfaceStencilTestFunction = vk_to_gen_compare_op[info.back.compareOp], 877 }; 878 879#if GEN_GEN <= 7 880 GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil); 881#else 882 GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, depth_stencil_dw, &depth_stencil); 883#endif 884} 885 886MAYBE_UNUSED static bool 887is_dual_src_blend_factor(VkBlendFactor factor) 888{ 889 return factor == VK_BLEND_FACTOR_SRC1_COLOR || 890 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR || 891 factor == VK_BLEND_FACTOR_SRC1_ALPHA || 892 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA; 893} 894 895static void 896emit_cb_state(struct anv_pipeline *pipeline, 897 const VkPipelineColorBlendStateCreateInfo *info, 898 const VkPipelineMultisampleStateCreateInfo *ms_info) 899{ 900 struct anv_device *device = pipeline->device; 901 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 902 903 struct GENX(BLEND_STATE) blend_state = { 904#if GEN_GEN >= 8 905 .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, 906 .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, 907#endif 908 }; 909 910 uint32_t surface_count = 0; 911 struct anv_pipeline_bind_map *map; 912 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 913 map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map; 914 surface_count = map->surface_count; 915 } 916 917 const uint32_t num_dwords = GENX(BLEND_STATE_length) + 918 GENX(BLEND_STATE_ENTRY_length) * surface_count; 919 pipeline->blend_state = 920 anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); 921 922 bool has_writeable_rt = false; 923 uint32_t *state_pos = pipeline->blend_state.map; 924 state_pos += GENX(BLEND_STATE_length); 925#if GEN_GEN >= 8 926 struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 }; 927#endif 928 for (unsigned i = 0; i < surface_count; i++) { 929 struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i]; 930 931 /* All color attachments are at the beginning of the binding table */ 932 if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) 933 break; 934 935 /* We can have at most 8 attachments */ 936 assert(i < 8); 937 938 if (info == NULL || binding->index >= info->attachmentCount) { 939 /* Default everything to disabled */ 940 struct GENX(BLEND_STATE_ENTRY) entry = { 941 .WriteDisableAlpha = true, 942 .WriteDisableRed = true, 943 .WriteDisableGreen = true, 944 .WriteDisableBlue = true, 945 }; 946 GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry); 947 state_pos += GENX(BLEND_STATE_ENTRY_length); 948 continue; 949 } 950 951 assert(binding->binding == 0); 952 const VkPipelineColorBlendAttachmentState *a = 953 &info->pAttachments[binding->index]; 954 955 struct GENX(BLEND_STATE_ENTRY) entry = { 956#if GEN_GEN < 8 957 .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, 958 .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, 959#endif 960 .LogicOpEnable = info->logicOpEnable, 961 .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], 962 .ColorBufferBlendEnable = a->blendEnable, 963 .ColorClampRange = COLORCLAMP_RTFORMAT, 964 .PreBlendColorClampEnable = true, 965 .PostBlendColorClampEnable = true, 966 .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], 967 .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], 968 .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], 969 .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], 970 .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], 971 .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], 972 .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), 973 .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), 974 .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), 975 .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), 976 }; 977 978 if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || 979 a->dstColorBlendFactor != a->dstAlphaBlendFactor || 980 a->colorBlendOp != a->alphaBlendOp) { 981#if GEN_GEN >= 8 982 blend_state.IndependentAlphaBlendEnable = true; 983#else 984 entry.IndependentAlphaBlendEnable = true; 985#endif 986 } 987 988 /* The Dual Source Blending documentation says: 989 * 990 * "If SRC1 is included in a src/dst blend factor and 991 * a DualSource RT Write message is not used, results 992 * are UNDEFINED. (This reflects the same restriction in DX APIs, 993 * where undefined results are produced if “o1” is not written 994 * by a PS – there are no default values defined)." 995 * 996 * There is no way to gracefully fix this undefined situation 997 * so we just disable the blending to prevent possible issues. 998 */ 999 if (!wm_prog_data->dual_src_blend && 1000 (is_dual_src_blend_factor(a->srcColorBlendFactor) || 1001 is_dual_src_blend_factor(a->dstColorBlendFactor) || 1002 is_dual_src_blend_factor(a->srcAlphaBlendFactor) || 1003 is_dual_src_blend_factor(a->dstAlphaBlendFactor))) { 1004 vk_debug_report(&device->instance->debug_report_callbacks, 1005 VK_DEBUG_REPORT_WARNING_BIT_EXT, 1006 VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, 1007 (uint64_t)(uintptr_t)device, 1008 0, 0, "anv", 1009 "Enabled dual-src blend factors without writing both targets " 1010 "in the shader. Disabling blending to avoid GPU hangs."); 1011 entry.ColorBufferBlendEnable = false; 1012 } 1013 1014 if (a->colorWriteMask != 0) 1015 has_writeable_rt = true; 1016 1017 /* Our hardware applies the blend factor prior to the blend function 1018 * regardless of what function is used. Technically, this means the 1019 * hardware can do MORE than GL or Vulkan specify. However, it also 1020 * means that, for MIN and MAX, we have to stomp the blend factor to 1021 * ONE to make it a no-op. 1022 */ 1023 if (a->colorBlendOp == VK_BLEND_OP_MIN || 1024 a->colorBlendOp == VK_BLEND_OP_MAX) { 1025 entry.SourceBlendFactor = BLENDFACTOR_ONE; 1026 entry.DestinationBlendFactor = BLENDFACTOR_ONE; 1027 } 1028 if (a->alphaBlendOp == VK_BLEND_OP_MIN || 1029 a->alphaBlendOp == VK_BLEND_OP_MAX) { 1030 entry.SourceAlphaBlendFactor = BLENDFACTOR_ONE; 1031 entry.DestinationAlphaBlendFactor = BLENDFACTOR_ONE; 1032 } 1033 GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry); 1034 state_pos += GENX(BLEND_STATE_ENTRY_length); 1035#if GEN_GEN >= 8 1036 if (i == 0) 1037 bs0 = entry; 1038#endif 1039 } 1040 1041#if GEN_GEN >= 8 1042 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), blend) { 1043 blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable; 1044 blend.HasWriteableRT = has_writeable_rt; 1045 blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable; 1046 blend.SourceAlphaBlendFactor = bs0.SourceAlphaBlendFactor; 1047 blend.DestinationAlphaBlendFactor = bs0.DestinationAlphaBlendFactor; 1048 blend.SourceBlendFactor = bs0.SourceBlendFactor; 1049 blend.DestinationBlendFactor = bs0.DestinationBlendFactor; 1050 blend.AlphaTestEnable = false; 1051 blend.IndependentAlphaBlendEnable = 1052 blend_state.IndependentAlphaBlendEnable; 1053 } 1054#else 1055 (void)has_writeable_rt; 1056#endif 1057 1058 GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); 1059 anv_state_flush(device, pipeline->blend_state); 1060 1061 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { 1062 bsp.BlendStatePointer = pipeline->blend_state.offset; 1063#if GEN_GEN >= 8 1064 bsp.BlendStatePointerValid = true; 1065#endif 1066 } 1067} 1068 1069static void 1070emit_3dstate_clip(struct anv_pipeline *pipeline, 1071 const VkPipelineViewportStateCreateInfo *vp_info, 1072 const VkPipelineRasterizationStateCreateInfo *rs_info) 1073{ 1074 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 1075 (void) wm_prog_data; 1076 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) { 1077 clip.ClipEnable = true; 1078 clip.StatisticsEnable = true; 1079 clip.EarlyCullEnable = true; 1080 clip.APIMode = APIMODE_D3D, 1081 clip.ViewportXYClipTestEnable = true; 1082 1083#if GEN_GEN >= 8 1084 clip.VertexSubPixelPrecisionSelect = _8Bit; 1085#endif 1086 1087 clip.ClipMode = CLIPMODE_NORMAL; 1088 1089 clip.TriangleStripListProvokingVertexSelect = 0; 1090 clip.LineStripListProvokingVertexSelect = 0; 1091 clip.TriangleFanProvokingVertexSelect = 1; 1092 1093 clip.MinimumPointWidth = 0.125; 1094 clip.MaximumPointWidth = 255.875; 1095 1096 const struct brw_vue_prog_data *last = 1097 anv_pipeline_get_last_vue_prog_data(pipeline); 1098 1099 /* From the Vulkan 1.0.45 spec: 1100 * 1101 * "If the last active vertex processing stage shader entry point's 1102 * interface does not include a variable decorated with 1103 * ViewportIndex, then the first viewport is used." 1104 */ 1105 if (vp_info && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) { 1106 clip.MaximumVPIndex = vp_info->viewportCount - 1; 1107 } else { 1108 clip.MaximumVPIndex = 0; 1109 } 1110 1111 /* From the Vulkan 1.0.45 spec: 1112 * 1113 * "If the last active vertex processing stage shader entry point's 1114 * interface does not include a variable decorated with Layer, then 1115 * the first layer is used." 1116 */ 1117 clip.ForceZeroRTAIndexEnable = 1118 !(last->vue_map.slots_valid & VARYING_BIT_LAYER); 1119 1120#if GEN_GEN == 7 1121 clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace]; 1122 clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode]; 1123 clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable; 1124 if (last) { 1125 clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask; 1126 clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask; 1127 } 1128#else 1129 clip.NonPerspectiveBarycentricEnable = wm_prog_data ? 1130 (wm_prog_data->barycentric_interp_modes & 1131 BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) != 0 : 0; 1132#endif 1133 } 1134} 1135 1136static void 1137emit_3dstate_streamout(struct anv_pipeline *pipeline, 1138 const VkPipelineRasterizationStateCreateInfo *rs_info) 1139{ 1140 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_STREAMOUT), so) { 1141 so.RenderingDisable = rs_info->rasterizerDiscardEnable; 1142 } 1143} 1144 1145static uint32_t 1146get_sampler_count(const struct anv_shader_bin *bin) 1147{ 1148 uint32_t count_by_4 = DIV_ROUND_UP(bin->bind_map.sampler_count, 4); 1149 1150 /* We can potentially have way more than 32 samplers and that's ok. 1151 * However, the 3DSTATE_XS packets only have 3 bits to specify how 1152 * many to pre-fetch and all values above 4 are marked reserved. 1153 */ 1154 return MIN2(count_by_4, 4); 1155} 1156 1157static uint32_t 1158get_binding_table_entry_count(const struct anv_shader_bin *bin) 1159{ 1160 return DIV_ROUND_UP(bin->bind_map.surface_count, 32); 1161} 1162 1163static struct anv_address 1164get_scratch_address(struct anv_pipeline *pipeline, 1165 gl_shader_stage stage, 1166 const struct anv_shader_bin *bin) 1167{ 1168 return (struct anv_address) { 1169 .bo = anv_scratch_pool_alloc(pipeline->device, 1170 &pipeline->device->scratch_pool, 1171 stage, bin->prog_data->total_scratch), 1172 .offset = 0, 1173 }; 1174} 1175 1176static uint32_t 1177get_scratch_space(const struct anv_shader_bin *bin) 1178{ 1179 return ffs(bin->prog_data->total_scratch / 2048); 1180} 1181 1182static void 1183emit_3dstate_vs(struct anv_pipeline *pipeline) 1184{ 1185 const struct gen_device_info *devinfo = &pipeline->device->info; 1186 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); 1187 const struct anv_shader_bin *vs_bin = 1188 pipeline->shaders[MESA_SHADER_VERTEX]; 1189 1190 assert(anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX)); 1191 1192 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs) { 1193 vs.Enable = true; 1194 vs.StatisticsEnable = true; 1195 vs.KernelStartPointer = vs_bin->kernel.offset; 1196#if GEN_GEN >= 8 1197 vs.SIMD8DispatchEnable = 1198 vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8; 1199#endif 1200 1201 assert(!vs_prog_data->base.base.use_alt_mode); 1202#if GEN_GEN < 11 1203 vs.SingleVertexDispatch = false; 1204#endif 1205 vs.VectorMaskEnable = false; 1206 vs.SamplerCount = get_sampler_count(vs_bin); 1207 /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to 1208 * disable prefetching of binding tables on A0 and B0 steppings. 1209 * TODO: Revisit this WA on newer steppings. 1210 */ 1211 vs.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(vs_bin); 1212 vs.FloatingPointMode = IEEE754; 1213 vs.IllegalOpcodeExceptionEnable = false; 1214 vs.SoftwareExceptionEnable = false; 1215 vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1; 1216 1217 if (GEN_GEN == 9 && devinfo->gt == 4 && 1218 anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) { 1219 /* On Sky Lake GT4, we have experienced some hangs related to the VS 1220 * cache and tessellation. It is unknown exactly what is happening 1221 * but the Haswell docs for the "VS Reference Count Full Force Miss 1222 * Enable" field of the "Thread Mode" register refer to a HSW bug in 1223 * which the VUE handle reference count would overflow resulting in 1224 * internal reference counting bugs. My (Jason's) best guess is that 1225 * this bug cropped back up on SKL GT4 when we suddenly had more 1226 * threads in play than any previous gen9 hardware. 1227 * 1228 * What we do know for sure is that setting this bit when 1229 * tessellation shaders are in use fixes a GPU hang in Batman: Arkham 1230 * City when playing with DXVK (https://bugs.freedesktop.org/107280). 1231 * Disabling the vertex cache with tessellation shaders should only 1232 * have a minor performance impact as the tessellation shaders are 1233 * likely generating and processing far more geometry than the vertex 1234 * stage. 1235 */ 1236 vs.VertexCacheDisable = true; 1237 } 1238 1239 vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length; 1240 vs.VertexURBEntryReadOffset = 0; 1241 vs.DispatchGRFStartRegisterForURBData = 1242 vs_prog_data->base.base.dispatch_grf_start_reg; 1243 1244#if GEN_GEN >= 8 1245 vs.UserClipDistanceClipTestEnableBitmask = 1246 vs_prog_data->base.clip_distance_mask; 1247 vs.UserClipDistanceCullTestEnableBitmask = 1248 vs_prog_data->base.cull_distance_mask; 1249#endif 1250 1251 vs.PerThreadScratchSpace = get_scratch_space(vs_bin); 1252 vs.ScratchSpaceBasePointer = 1253 get_scratch_address(pipeline, MESA_SHADER_VERTEX, vs_bin); 1254 } 1255} 1256 1257static void 1258emit_3dstate_hs_te_ds(struct anv_pipeline *pipeline, 1259 const VkPipelineTessellationStateCreateInfo *tess_info) 1260{ 1261 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) { 1262 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), hs); 1263 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), te); 1264 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), ds); 1265 return; 1266 } 1267 1268 const struct gen_device_info *devinfo = &pipeline->device->info; 1269 const struct anv_shader_bin *tcs_bin = 1270 pipeline->shaders[MESA_SHADER_TESS_CTRL]; 1271 const struct anv_shader_bin *tes_bin = 1272 pipeline->shaders[MESA_SHADER_TESS_EVAL]; 1273 1274 const struct brw_tcs_prog_data *tcs_prog_data = get_tcs_prog_data(pipeline); 1275 const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline); 1276 1277 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), hs) { 1278 hs.Enable = true; 1279 hs.StatisticsEnable = true; 1280 hs.KernelStartPointer = tcs_bin->kernel.offset; 1281 1282 hs.SamplerCount = get_sampler_count(tcs_bin); 1283 /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */ 1284 hs.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(tcs_bin); 1285 hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; 1286 hs.IncludeVertexHandles = true; 1287 hs.InstanceCount = tcs_prog_data->instances - 1; 1288 1289 hs.VertexURBEntryReadLength = 0; 1290 hs.VertexURBEntryReadOffset = 0; 1291 hs.DispatchGRFStartRegisterForURBData = 1292 tcs_prog_data->base.base.dispatch_grf_start_reg; 1293 1294 hs.PerThreadScratchSpace = get_scratch_space(tcs_bin); 1295 hs.ScratchSpaceBasePointer = 1296 get_scratch_address(pipeline, MESA_SHADER_TESS_CTRL, tcs_bin); 1297 } 1298 1299 const VkPipelineTessellationDomainOriginStateCreateInfoKHR *domain_origin_state = 1300 tess_info ? vk_find_struct_const(tess_info, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO_KHR) : NULL; 1301 1302 VkTessellationDomainOriginKHR uv_origin = 1303 domain_origin_state ? domain_origin_state->domainOrigin : 1304 VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT_KHR; 1305 1306 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), te) { 1307 te.Partitioning = tes_prog_data->partitioning; 1308 1309 if (uv_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT_KHR) { 1310 te.OutputTopology = tes_prog_data->output_topology; 1311 } else { 1312 /* When the origin is upper-left, we have to flip the winding order */ 1313 if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) { 1314 te.OutputTopology = OUTPUT_TRI_CW; 1315 } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) { 1316 te.OutputTopology = OUTPUT_TRI_CCW; 1317 } else { 1318 te.OutputTopology = tes_prog_data->output_topology; 1319 } 1320 } 1321 1322 te.TEDomain = tes_prog_data->domain; 1323 te.TEEnable = true; 1324 te.MaximumTessellationFactorOdd = 63.0; 1325 te.MaximumTessellationFactorNotOdd = 64.0; 1326 } 1327 1328 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), ds) { 1329 ds.Enable = true; 1330 ds.StatisticsEnable = true; 1331 ds.KernelStartPointer = tes_bin->kernel.offset; 1332 1333 ds.SamplerCount = get_sampler_count(tes_bin); 1334 /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */ 1335 ds.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(tes_bin); 1336 ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1; 1337 1338 ds.ComputeWCoordinateEnable = 1339 tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; 1340 1341 ds.PatchURBEntryReadLength = tes_prog_data->base.urb_read_length; 1342 ds.PatchURBEntryReadOffset = 0; 1343 ds.DispatchGRFStartRegisterForURBData = 1344 tes_prog_data->base.base.dispatch_grf_start_reg; 1345 1346#if GEN_GEN >= 8 1347#if GEN_GEN < 11 1348 ds.DispatchMode = 1349 tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ? 1350 DISPATCH_MODE_SIMD8_SINGLE_PATCH : 1351 DISPATCH_MODE_SIMD4X2; 1352#else 1353 assert(tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8); 1354 ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; 1355#endif 1356 1357 ds.UserClipDistanceClipTestEnableBitmask = 1358 tes_prog_data->base.clip_distance_mask; 1359 ds.UserClipDistanceCullTestEnableBitmask = 1360 tes_prog_data->base.cull_distance_mask; 1361#endif 1362 1363 ds.PerThreadScratchSpace = get_scratch_space(tes_bin); 1364 ds.ScratchSpaceBasePointer = 1365 get_scratch_address(pipeline, MESA_SHADER_TESS_EVAL, tes_bin); 1366 } 1367} 1368 1369static void 1370emit_3dstate_gs(struct anv_pipeline *pipeline) 1371{ 1372 const struct gen_device_info *devinfo = &pipeline->device->info; 1373 const struct anv_shader_bin *gs_bin = 1374 pipeline->shaders[MESA_SHADER_GEOMETRY]; 1375 1376 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { 1377 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs); 1378 return; 1379 } 1380 1381 const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline); 1382 1383 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) { 1384 gs.Enable = true; 1385 gs.StatisticsEnable = true; 1386 gs.KernelStartPointer = gs_bin->kernel.offset; 1387 gs.DispatchMode = gs_prog_data->base.dispatch_mode; 1388 1389 gs.SingleProgramFlow = false; 1390 gs.VectorMaskEnable = false; 1391 gs.SamplerCount = get_sampler_count(gs_bin); 1392 /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */ 1393 gs.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(gs_bin); 1394 gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles; 1395 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id; 1396 1397 if (GEN_GEN == 8) { 1398 /* Broadwell is weird. It needs us to divide by 2. */ 1399 gs.MaximumNumberofThreads = devinfo->max_gs_threads / 2 - 1; 1400 } else { 1401 gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1; 1402 } 1403 1404 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1; 1405 gs.OutputTopology = gs_prog_data->output_topology; 1406 gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length; 1407 gs.ControlDataFormat = gs_prog_data->control_data_format; 1408 gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords; 1409 gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1; 1410 gs.ReorderMode = TRAILING; 1411 1412#if GEN_GEN >= 8 1413 gs.ExpectedVertexCount = gs_prog_data->vertices_in; 1414 gs.StaticOutput = gs_prog_data->static_vertex_count >= 0; 1415 gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count >= 0 ? 1416 gs_prog_data->static_vertex_count : 0; 1417#endif 1418 1419 gs.VertexURBEntryReadOffset = 0; 1420 gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length; 1421 gs.DispatchGRFStartRegisterForURBData = 1422 gs_prog_data->base.base.dispatch_grf_start_reg; 1423 1424#if GEN_GEN >= 8 1425 gs.UserClipDistanceClipTestEnableBitmask = 1426 gs_prog_data->base.clip_distance_mask; 1427 gs.UserClipDistanceCullTestEnableBitmask = 1428 gs_prog_data->base.cull_distance_mask; 1429#endif 1430 1431 gs.PerThreadScratchSpace = get_scratch_space(gs_bin); 1432 gs.ScratchSpaceBasePointer = 1433 get_scratch_address(pipeline, MESA_SHADER_GEOMETRY, gs_bin); 1434 } 1435} 1436 1437static bool 1438has_color_buffer_write_enabled(const struct anv_pipeline *pipeline, 1439 const VkPipelineColorBlendStateCreateInfo *blend) 1440{ 1441 const struct anv_shader_bin *shader_bin = 1442 pipeline->shaders[MESA_SHADER_FRAGMENT]; 1443 if (!shader_bin) 1444 return false; 1445 1446 const struct anv_pipeline_bind_map *bind_map = &shader_bin->bind_map; 1447 for (int i = 0; i < bind_map->surface_count; i++) { 1448 struct anv_pipeline_binding *binding = &bind_map->surface_to_descriptor[i]; 1449 1450 if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) 1451 continue; 1452 1453 if (binding->index == UINT32_MAX) 1454 continue; 1455 1456 if (blend && blend->pAttachments[binding->index].colorWriteMask != 0) 1457 return true; 1458 } 1459 1460 return false; 1461} 1462 1463static void 1464emit_3dstate_wm(struct anv_pipeline *pipeline, struct anv_subpass *subpass, 1465 const VkPipelineColorBlendStateCreateInfo *blend, 1466 const VkPipelineMultisampleStateCreateInfo *multisample) 1467{ 1468 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 1469 1470 MAYBE_UNUSED uint32_t samples = 1471 multisample ? multisample->rasterizationSamples : 1; 1472 1473 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) { 1474 wm.StatisticsEnable = true; 1475 wm.LineEndCapAntialiasingRegionWidth = _05pixels; 1476 wm.LineAntialiasingRegionWidth = _10pixels; 1477 wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT; 1478 1479 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 1480 if (wm_prog_data->early_fragment_tests) { 1481 wm.EarlyDepthStencilControl = EDSC_PREPS; 1482 } else if (wm_prog_data->has_side_effects) { 1483 wm.EarlyDepthStencilControl = EDSC_PSEXEC; 1484 } else { 1485 wm.EarlyDepthStencilControl = EDSC_NORMAL; 1486 } 1487 1488#if GEN_GEN >= 8 1489 /* Gen8 hardware tries to compute ThreadDispatchEnable for us but 1490 * doesn't take into account KillPixels when no depth or stencil 1491 * writes are enabled. In order for occlusion queries to work 1492 * correctly with no attachments, we need to force-enable PS thread 1493 * dispatch. 1494 * 1495 * The BDW docs are pretty clear that that this bit isn't validated 1496 * and probably shouldn't be used in production: 1497 * 1498 * "This must always be set to Normal. This field should not be 1499 * tested for functional validation." 1500 * 1501 * Unfortunately, however, the other mechanism we have for doing this 1502 * is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW. 1503 * Given two bad options, we choose the one which works. 1504 */ 1505 if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) && 1506 !has_color_buffer_write_enabled(pipeline, blend)) 1507 wm.ForceThreadDispatchEnable = ForceON; 1508#endif 1509 1510 wm.BarycentricInterpolationMode = 1511 wm_prog_data->barycentric_interp_modes; 1512 1513#if GEN_GEN < 8 1514 wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; 1515 wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; 1516 wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; 1517 wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask; 1518 1519 /* If the subpass has a depth or stencil self-dependency, then we 1520 * need to force the hardware to do the depth/stencil write *after* 1521 * fragment shader execution. Otherwise, the writes may hit memory 1522 * before we get around to fetching from the input attachment and we 1523 * may get the depth or stencil value from the current draw rather 1524 * than the previous one. 1525 */ 1526 wm.PixelShaderKillsPixel = subpass->has_ds_self_dep || 1527 wm_prog_data->uses_kill; 1528 1529 if (wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF || 1530 wm_prog_data->has_side_effects || 1531 wm.PixelShaderKillsPixel || 1532 has_color_buffer_write_enabled(pipeline, blend)) 1533 wm.ThreadDispatchEnable = true; 1534 1535 if (samples > 1) { 1536 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; 1537 if (wm_prog_data->persample_dispatch) { 1538 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 1539 } else { 1540 wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL; 1541 } 1542 } else { 1543 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; 1544 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 1545 } 1546#endif 1547 } 1548 } 1549} 1550 1551static void 1552emit_3dstate_ps(struct anv_pipeline *pipeline, 1553 const VkPipelineColorBlendStateCreateInfo *blend, 1554 const VkPipelineMultisampleStateCreateInfo *multisample) 1555{ 1556 MAYBE_UNUSED const struct gen_device_info *devinfo = &pipeline->device->info; 1557 const struct anv_shader_bin *fs_bin = 1558 pipeline->shaders[MESA_SHADER_FRAGMENT]; 1559 1560 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 1561 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { 1562#if GEN_GEN == 7 1563 /* Even if no fragments are ever dispatched, gen7 hardware hangs if 1564 * we don't at least set the maximum number of threads. 1565 */ 1566 ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1; 1567#endif 1568 } 1569 return; 1570 } 1571 1572 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 1573 1574#if GEN_GEN < 8 1575 /* The hardware wedges if you have this bit set but don't turn on any dual 1576 * source blend factors. 1577 */ 1578 bool dual_src_blend = false; 1579 if (wm_prog_data->dual_src_blend && blend) { 1580 for (uint32_t i = 0; i < blend->attachmentCount; i++) { 1581 const VkPipelineColorBlendAttachmentState *bstate = 1582 &blend->pAttachments[i]; 1583 1584 if (bstate->blendEnable && 1585 (is_dual_src_blend_factor(bstate->srcColorBlendFactor) || 1586 is_dual_src_blend_factor(bstate->dstColorBlendFactor) || 1587 is_dual_src_blend_factor(bstate->srcAlphaBlendFactor) || 1588 is_dual_src_blend_factor(bstate->dstAlphaBlendFactor))) { 1589 dual_src_blend = true; 1590 break; 1591 } 1592 } 1593 } 1594#endif 1595 1596 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { 1597 ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; 1598 ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; 1599 ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; 1600 1601 /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable: 1602 * 1603 * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32 1604 * Dispatch must not be enabled for PER_PIXEL dispatch mode." 1605 * 1606 * Since 16x MSAA is first introduced on SKL, we don't need to apply 1607 * the workaround on any older hardware. 1608 */ 1609 if (GEN_GEN >= 9 && !wm_prog_data->persample_dispatch && 1610 multisample && multisample->rasterizationSamples == 16) { 1611 assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable); 1612 ps._32PixelDispatchEnable = false; 1613 } 1614 1615 ps.KernelStartPointer0 = fs_bin->kernel.offset + 1616 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); 1617 ps.KernelStartPointer1 = fs_bin->kernel.offset + 1618 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1); 1619 ps.KernelStartPointer2 = fs_bin->kernel.offset + 1620 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); 1621 1622 ps.SingleProgramFlow = false; 1623 ps.VectorMaskEnable = true; 1624 ps.SamplerCount = get_sampler_count(fs_bin); 1625 /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */ 1626 ps.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(fs_bin); 1627 ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 || 1628 wm_prog_data->base.ubo_ranges[0].length; 1629 ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? 1630 POSOFFSET_SAMPLE: POSOFFSET_NONE; 1631#if GEN_GEN < 8 1632 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; 1633 ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; 1634 ps.DualSourceBlendEnable = dual_src_blend; 1635#endif 1636 1637#if GEN_IS_HASWELL 1638 /* Haswell requires the sample mask to be set in this packet as well 1639 * as in 3DSTATE_SAMPLE_MASK; the values should match. 1640 */ 1641 ps.SampleMask = 0xff; 1642#endif 1643 1644#if GEN_GEN >= 9 1645 ps.MaximumNumberofThreadsPerPSD = 64 - 1; 1646#elif GEN_GEN >= 8 1647 ps.MaximumNumberofThreadsPerPSD = 64 - 2; 1648#else 1649 ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1; 1650#endif 1651 1652 ps.DispatchGRFStartRegisterForConstantSetupData0 = 1653 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); 1654 ps.DispatchGRFStartRegisterForConstantSetupData1 = 1655 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1); 1656 ps.DispatchGRFStartRegisterForConstantSetupData2 = 1657 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2); 1658 1659 ps.PerThreadScratchSpace = get_scratch_space(fs_bin); 1660 ps.ScratchSpaceBasePointer = 1661 get_scratch_address(pipeline, MESA_SHADER_FRAGMENT, fs_bin); 1662 } 1663} 1664 1665#if GEN_GEN >= 8 1666static void 1667emit_3dstate_ps_extra(struct anv_pipeline *pipeline, 1668 struct anv_subpass *subpass, 1669 const VkPipelineColorBlendStateCreateInfo *blend) 1670{ 1671 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 1672 1673 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 1674 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps); 1675 return; 1676 } 1677 1678 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps) { 1679 ps.PixelShaderValid = true; 1680 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; 1681 ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; 1682 ps.PixelShaderIsPerSample = wm_prog_data->persample_dispatch; 1683 ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; 1684 ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; 1685 ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; 1686 1687 /* If the subpass has a depth or stencil self-dependency, then we need 1688 * to force the hardware to do the depth/stencil write *after* fragment 1689 * shader execution. Otherwise, the writes may hit memory before we get 1690 * around to fetching from the input attachment and we may get the depth 1691 * or stencil value from the current draw rather than the previous one. 1692 */ 1693 ps.PixelShaderKillsPixel = subpass->has_ds_self_dep || 1694 wm_prog_data->uses_kill; 1695 1696#if GEN_GEN >= 9 1697 ps.PixelShaderComputesStencil = wm_prog_data->computed_stencil; 1698 ps.PixelShaderPullsBary = wm_prog_data->pulls_bary; 1699 1700 ps.InputCoverageMaskState = ICMS_NONE; 1701 if (wm_prog_data->uses_sample_mask) { 1702 if (wm_prog_data->post_depth_coverage) 1703 ps.InputCoverageMaskState = ICMS_DEPTH_COVERAGE; 1704 else 1705 ps.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE; 1706 } 1707#else 1708 ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask; 1709#endif 1710 } 1711} 1712 1713static void 1714emit_3dstate_vf_topology(struct anv_pipeline *pipeline) 1715{ 1716 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) { 1717 vft.PrimitiveTopologyType = pipeline->topology; 1718 } 1719} 1720#endif 1721 1722static void 1723emit_3dstate_vf_statistics(struct anv_pipeline *pipeline) 1724{ 1725 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_STATISTICS), vfs) { 1726 vfs.StatisticsEnable = true; 1727 } 1728} 1729 1730static void 1731compute_kill_pixel(struct anv_pipeline *pipeline, 1732 const VkPipelineMultisampleStateCreateInfo *ms_info, 1733 const struct anv_subpass *subpass) 1734{ 1735 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 1736 pipeline->kill_pixel = false; 1737 return; 1738 } 1739 1740 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 1741 1742 /* This computes the KillPixel portion of the computation for whether or 1743 * not we want to enable the PMA fix on gen8 or gen9. It's given by this 1744 * chunk of the giant formula: 1745 * 1746 * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 1747 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 1748 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 1749 * 3DSTATE_PS_BLEND::AlphaTestEnable || 1750 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) 1751 * 1752 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false and so is 1753 * 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a concept 1754 * of an alpha test. 1755 */ 1756 pipeline->kill_pixel = 1757 subpass->has_ds_self_dep || wm_prog_data->uses_kill || 1758 wm_prog_data->uses_omask || 1759 (ms_info && ms_info->alphaToCoverageEnable); 1760} 1761 1762static VkResult 1763genX(graphics_pipeline_create)( 1764 VkDevice _device, 1765 struct anv_pipeline_cache * cache, 1766 const VkGraphicsPipelineCreateInfo* pCreateInfo, 1767 const VkAllocationCallbacks* pAllocator, 1768 VkPipeline* pPipeline) 1769{ 1770 ANV_FROM_HANDLE(anv_device, device, _device); 1771 ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); 1772 struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; 1773 struct anv_pipeline *pipeline; 1774 VkResult result; 1775 1776 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); 1777 1778 /* Use the default pipeline cache if none is specified */ 1779 if (cache == NULL && device->instance->pipeline_cache_enabled) 1780 cache = &device->default_pipeline_cache; 1781 1782 pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, 1783 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1784 if (pipeline == NULL) 1785 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1786 1787 result = anv_pipeline_init(pipeline, device, cache, 1788 pCreateInfo, pAllocator); 1789 if (result != VK_SUCCESS) { 1790 vk_free2(&device->alloc, pAllocator, pipeline); 1791 return result; 1792 } 1793 1794 assert(pCreateInfo->pVertexInputState); 1795 emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); 1796 assert(pCreateInfo->pRasterizationState); 1797 emit_rs_state(pipeline, pCreateInfo->pRasterizationState, 1798 pCreateInfo->pMultisampleState, pass, subpass); 1799 emit_ms_state(pipeline, pCreateInfo->pMultisampleState); 1800 emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, subpass); 1801 emit_cb_state(pipeline, pCreateInfo->pColorBlendState, 1802 pCreateInfo->pMultisampleState); 1803 compute_kill_pixel(pipeline, pCreateInfo->pMultisampleState, subpass); 1804 1805 emit_urb_setup(pipeline); 1806 1807 emit_3dstate_clip(pipeline, pCreateInfo->pViewportState, 1808 pCreateInfo->pRasterizationState); 1809 emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState); 1810 1811#if 0 1812 /* From gen7_vs_state.c */ 1813 1814 /** 1815 * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > 1816 * Geometry > Geometry Shader > State: 1817 * 1818 * "Note: Because of corruption in IVB:GT2, software needs to flush the 1819 * whole fixed function pipeline when the GS enable changes value in 1820 * the 3DSTATE_GS." 1821 * 1822 * The hardware architects have clarified that in this context "flush the 1823 * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS 1824 * Stall" bit set. 1825 */ 1826 if (!device->info.is_haswell && !device->info.is_baytrail) 1827 gen7_emit_vs_workaround_flush(brw); 1828#endif 1829 1830 emit_3dstate_vs(pipeline); 1831 emit_3dstate_hs_te_ds(pipeline, pCreateInfo->pTessellationState); 1832 emit_3dstate_gs(pipeline); 1833 emit_3dstate_sbe(pipeline); 1834 emit_3dstate_wm(pipeline, subpass, pCreateInfo->pColorBlendState, 1835 pCreateInfo->pMultisampleState); 1836 emit_3dstate_ps(pipeline, pCreateInfo->pColorBlendState, 1837 pCreateInfo->pMultisampleState); 1838#if GEN_GEN >= 8 1839 emit_3dstate_ps_extra(pipeline, subpass, pCreateInfo->pColorBlendState); 1840 emit_3dstate_vf_topology(pipeline); 1841#endif 1842 emit_3dstate_vf_statistics(pipeline); 1843 1844 *pPipeline = anv_pipeline_to_handle(pipeline); 1845 1846 return pipeline->batch.status; 1847} 1848 1849static VkResult 1850compute_pipeline_create( 1851 VkDevice _device, 1852 struct anv_pipeline_cache * cache, 1853 const VkComputePipelineCreateInfo* pCreateInfo, 1854 const VkAllocationCallbacks* pAllocator, 1855 VkPipeline* pPipeline) 1856{ 1857 ANV_FROM_HANDLE(anv_device, device, _device); 1858 const struct anv_physical_device *physical_device = 1859 &device->instance->physicalDevice; 1860 const struct gen_device_info *devinfo = &physical_device->info; 1861 struct anv_pipeline *pipeline; 1862 VkResult result; 1863 1864 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); 1865 1866 /* Use the default pipeline cache if none is specified */ 1867 if (cache == NULL && device->instance->pipeline_cache_enabled) 1868 cache = &device->default_pipeline_cache; 1869 1870 pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, 1871 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1872 if (pipeline == NULL) 1873 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1874 1875 pipeline->device = device; 1876 1877 pipeline->blend_state.map = NULL; 1878 1879 result = anv_reloc_list_init(&pipeline->batch_relocs, 1880 pAllocator ? pAllocator : &device->alloc); 1881 if (result != VK_SUCCESS) { 1882 vk_free2(&device->alloc, pAllocator, pipeline); 1883 return result; 1884 } 1885 pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; 1886 pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); 1887 pipeline->batch.relocs = &pipeline->batch_relocs; 1888 pipeline->batch.status = VK_SUCCESS; 1889 1890 /* When we free the pipeline, we detect stages based on the NULL status 1891 * of various prog_data pointers. Make them NULL by default. 1892 */ 1893 memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); 1894 1895 pipeline->needs_data_cache = false; 1896 1897 assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); 1898 pipeline->active_stages |= VK_SHADER_STAGE_COMPUTE_BIT; 1899 ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); 1900 result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, 1901 pCreateInfo->stage.pName, 1902 pCreateInfo->stage.pSpecializationInfo); 1903 if (result != VK_SUCCESS) { 1904 vk_free2(&device->alloc, pAllocator, pipeline); 1905 return result; 1906 } 1907 1908 const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); 1909 1910 anv_pipeline_setup_l3_config(pipeline, cs_prog_data->base.total_shared > 0); 1911 1912 uint32_t group_size = cs_prog_data->local_size[0] * 1913 cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; 1914 uint32_t remainder = group_size & (cs_prog_data->simd_size - 1); 1915 1916 if (remainder > 0) 1917 pipeline->cs_right_mask = ~0u >> (32 - remainder); 1918 else 1919 pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size); 1920 1921 const uint32_t vfe_curbe_allocation = 1922 ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads + 1923 cs_prog_data->push.cross_thread.regs, 2); 1924 1925 const uint32_t subslices = MAX2(physical_device->subslice_total, 1); 1926 1927 const struct anv_shader_bin *cs_bin = 1928 pipeline->shaders[MESA_SHADER_COMPUTE]; 1929 1930 anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), vfe) { 1931#if GEN_GEN > 7 1932 vfe.StackSize = 0; 1933#else 1934 vfe.GPGPUMode = true; 1935#endif 1936 vfe.MaximumNumberofThreads = 1937 devinfo->max_cs_threads * subslices - 1; 1938 vfe.NumberofURBEntries = GEN_GEN <= 7 ? 0 : 2; 1939#if GEN_GEN < 11 1940 vfe.ResetGatewayTimer = true; 1941#endif 1942#if GEN_GEN <= 8 1943 vfe.BypassGatewayControl = true; 1944#endif 1945 vfe.URBEntryAllocationSize = GEN_GEN <= 7 ? 0 : 2; 1946 vfe.CURBEAllocationSize = vfe_curbe_allocation; 1947 1948 vfe.PerThreadScratchSpace = get_scratch_space(cs_bin); 1949 vfe.ScratchSpaceBasePointer = 1950 get_scratch_address(pipeline, MESA_SHADER_COMPUTE, cs_bin); 1951 } 1952 1953 struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = { 1954 .KernelStartPointer = cs_bin->kernel.offset, 1955 1956 .SamplerCount = get_sampler_count(cs_bin), 1957 /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */ 1958 .BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(cs_bin), 1959 .BarrierEnable = cs_prog_data->uses_barrier, 1960 .SharedLocalMemorySize = 1961 encode_slm_size(GEN_GEN, cs_prog_data->base.total_shared), 1962 1963#if !GEN_IS_HASWELL 1964 .ConstantURBEntryReadOffset = 0, 1965#endif 1966 .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs, 1967#if GEN_GEN >= 8 || GEN_IS_HASWELL 1968 .CrossThreadConstantDataReadLength = 1969 cs_prog_data->push.cross_thread.regs, 1970#endif 1971 1972 .NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads, 1973 }; 1974 GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, 1975 pipeline->interface_descriptor_data, 1976 &desc); 1977 1978 *pPipeline = anv_pipeline_to_handle(pipeline); 1979 1980 return pipeline->batch.status; 1981} 1982 1983VkResult genX(CreateGraphicsPipelines)( 1984 VkDevice _device, 1985 VkPipelineCache pipelineCache, 1986 uint32_t count, 1987 const VkGraphicsPipelineCreateInfo* pCreateInfos, 1988 const VkAllocationCallbacks* pAllocator, 1989 VkPipeline* pPipelines) 1990{ 1991 ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache); 1992 1993 VkResult result = VK_SUCCESS; 1994 1995 unsigned i; 1996 for (i = 0; i < count; i++) { 1997 result = genX(graphics_pipeline_create)(_device, 1998 pipeline_cache, 1999 &pCreateInfos[i], 2000 pAllocator, &pPipelines[i]); 2001 2002 /* Bail out on the first error as it is not obvious what error should be 2003 * report upon 2 different failures. */ 2004 if (result != VK_SUCCESS) 2005 break; 2006 } 2007 2008 for (; i < count; i++) 2009 pPipelines[i] = VK_NULL_HANDLE; 2010 2011 return result; 2012} 2013 2014VkResult genX(CreateComputePipelines)( 2015 VkDevice _device, 2016 VkPipelineCache pipelineCache, 2017 uint32_t count, 2018 const VkComputePipelineCreateInfo* pCreateInfos, 2019 const VkAllocationCallbacks* pAllocator, 2020 VkPipeline* pPipelines) 2021{ 2022 ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache); 2023 2024 VkResult result = VK_SUCCESS; 2025 2026 unsigned i; 2027 for (i = 0; i < count; i++) { 2028 result = compute_pipeline_create(_device, pipeline_cache, 2029 &pCreateInfos[i], 2030 pAllocator, &pPipelines[i]); 2031 2032 /* Bail out on the first error as it is not obvious what error should be 2033 * report upon 2 different failures. */ 2034 if (result != VK_SUCCESS) 2035 break; 2036 } 2037 2038 for (; i < count; i++) 2039 pPipelines[i] = VK_NULL_HANDLE; 2040 2041 return result; 2042} 2043