1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "anv_private.h" 25 26#include "genxml/gen_macros.h" 27#include "genxml/genX_pack.h" 28#include "genxml/gen_rt_pack.h" 29 30#include "common/intel_l3_config.h" 31#include "common/intel_sample_positions.h" 32#include "nir/nir_xfb_info.h" 33#include "vk_util.h" 34#include "vk_format.h" 35#include "vk_log.h" 36 37static uint32_t 38vertex_element_comp_control(enum isl_format format, unsigned comp) 39{ 40 uint8_t bits; 41 switch (comp) { 42 case 0: bits = isl_format_layouts[format].channels.r.bits; break; 43 case 1: bits = isl_format_layouts[format].channels.g.bits; break; 44 case 2: bits = isl_format_layouts[format].channels.b.bits; break; 45 case 3: bits = isl_format_layouts[format].channels.a.bits; break; 46 default: unreachable("Invalid component"); 47 } 48 49 /* 50 * Take in account hardware restrictions when dealing with 64-bit floats. 51 * 52 * From Broadwell spec, command reference structures, page 586: 53 * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats, 54 * 64-bit components are stored * in the URB without any conversion. In 55 * this case, vertex elements must be written as 128 or 256 bits, with 56 * VFCOMP_STORE_0 being used to pad the output as required. E.g., if 57 * R64_PASSTHRU is used to copy a 64-bit Red component into the URB, 58 * Component 1 must be specified as VFCOMP_STORE_0 (with Components 2,3 59 * set to VFCOMP_NOSTORE) in order to output a 128-bit vertex element, or 60 * Components 1-3 must be specified as VFCOMP_STORE_0 in order to output 61 * a 256-bit vertex element. Likewise, use of R64G64B64_PASSTHRU requires 62 * Component 3 to be specified as VFCOMP_STORE_0 in order to output a 63 * 256-bit vertex element." 64 */ 65 if (bits) { 66 return VFCOMP_STORE_SRC; 67 } else if (comp >= 2 && 68 !isl_format_layouts[format].channels.b.bits && 69 isl_format_layouts[format].channels.r.type == ISL_RAW) { 70 /* When emitting 64-bit attributes, we need to write either 128 or 256 71 * bit chunks, using VFCOMP_NOSTORE when not writing the chunk, and 72 * VFCOMP_STORE_0 to pad the written chunk */ 73 return VFCOMP_NOSTORE; 74 } else if (comp < 3 || 75 isl_format_layouts[format].channels.r.type == ISL_RAW) { 76 /* Note we need to pad with value 0, not 1, due hardware restrictions 77 * (see comment above) */ 78 return VFCOMP_STORE_0; 79 } else if (isl_format_layouts[format].channels.r.type == ISL_UINT || 80 isl_format_layouts[format].channels.r.type == ISL_SINT) { 81 assert(comp == 3); 82 return VFCOMP_STORE_1_INT; 83 } else { 84 assert(comp == 3); 85 return VFCOMP_STORE_1_FP; 86 } 87} 88 89static void 90emit_vertex_input(struct anv_graphics_pipeline *pipeline, 91 const VkPipelineVertexInputStateCreateInfo *info) 92{ 93 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); 94 95 /* Pull inputs_read out of the VS prog data */ 96 const uint64_t inputs_read = vs_prog_data->inputs_read; 97 const uint64_t double_inputs_read = 98 vs_prog_data->double_inputs_read & inputs_read; 99 assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); 100 const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0; 101 const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0; 102 const bool needs_svgs_elem = vs_prog_data->uses_vertexid || 103 vs_prog_data->uses_instanceid || 104 vs_prog_data->uses_firstvertex || 105 vs_prog_data->uses_baseinstance; 106 107 uint32_t elem_count = __builtin_popcount(elements) - 108 __builtin_popcount(elements_double) / 2; 109 110 const uint32_t total_elems = 111 MAX2(1, elem_count + needs_svgs_elem + vs_prog_data->uses_drawid); 112 113 uint32_t *p; 114 115 const uint32_t num_dwords = 1 + total_elems * 2; 116 p = anv_batch_emitn(&pipeline->base.batch, num_dwords, 117 GENX(3DSTATE_VERTEX_ELEMENTS)); 118 if (!p) 119 return; 120 121 for (uint32_t i = 0; i < total_elems; i++) { 122 /* The SKL docs for VERTEX_ELEMENT_STATE say: 123 * 124 * "All elements must be valid from Element[0] to the last valid 125 * element. (I.e. if Element[2] is valid then Element[1] and 126 * Element[0] must also be valid)." 127 * 128 * The SKL docs for 3D_Vertex_Component_Control say: 129 * 130 * "Don't store this component. (Not valid for Component 0, but can 131 * be used for Component 1-3)." 132 * 133 * So we can't just leave a vertex element blank and hope for the best. 134 * We have to tell the VF hardware to put something in it; so we just 135 * store a bunch of zero. 136 * 137 * TODO: Compact vertex elements so we never end up with holes. 138 */ 139 struct GENX(VERTEX_ELEMENT_STATE) element = { 140 .Valid = true, 141 .Component0Control = VFCOMP_STORE_0, 142 .Component1Control = VFCOMP_STORE_0, 143 .Component2Control = VFCOMP_STORE_0, 144 .Component3Control = VFCOMP_STORE_0, 145 }; 146 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + i * 2], &element); 147 } 148 149 for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { 150 const VkVertexInputAttributeDescription *desc = 151 &info->pVertexAttributeDescriptions[i]; 152 enum isl_format format = anv_get_isl_format(&pipeline->base.device->info, 153 desc->format, 154 VK_IMAGE_ASPECT_COLOR_BIT, 155 VK_IMAGE_TILING_LINEAR); 156 157 assert(desc->binding < MAX_VBS); 158 159 if ((elements & (1 << desc->location)) == 0) 160 continue; /* Binding unused */ 161 162 uint32_t slot = 163 __builtin_popcount(elements & ((1 << desc->location) - 1)) - 164 DIV_ROUND_UP(__builtin_popcount(elements_double & 165 ((1 << desc->location) -1)), 2); 166 167 struct GENX(VERTEX_ELEMENT_STATE) element = { 168 .VertexBufferIndex = desc->binding, 169 .Valid = true, 170 .SourceElementFormat = format, 171 .EdgeFlagEnable = false, 172 .SourceElementOffset = desc->offset, 173 .Component0Control = vertex_element_comp_control(format, 0), 174 .Component1Control = vertex_element_comp_control(format, 1), 175 .Component2Control = vertex_element_comp_control(format, 2), 176 .Component3Control = vertex_element_comp_control(format, 3), 177 }; 178 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); 179 180#if GFX_VER >= 8 181 /* On Broadwell and later, we have a separate VF_INSTANCING packet 182 * that controls instancing. On Haswell and prior, that's part of 183 * VERTEX_BUFFER_STATE which we emit later. 184 */ 185 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) { 186 vfi.InstancingEnable = pipeline->vb[desc->binding].instanced; 187 vfi.VertexElementIndex = slot; 188 vfi.InstanceDataStepRate = 189 pipeline->vb[desc->binding].instance_divisor; 190 } 191#endif 192 } 193 194 const uint32_t id_slot = elem_count; 195 if (needs_svgs_elem) { 196 /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum: 197 * "Within a VERTEX_ELEMENT_STATE structure, if a Component 198 * Control field is set to something other than VFCOMP_STORE_SRC, 199 * no higher-numbered Component Control fields may be set to 200 * VFCOMP_STORE_SRC" 201 * 202 * This means, that if we have BaseInstance, we need BaseVertex as 203 * well. Just do all or nothing. 204 */ 205 uint32_t base_ctrl = (vs_prog_data->uses_firstvertex || 206 vs_prog_data->uses_baseinstance) ? 207 VFCOMP_STORE_SRC : VFCOMP_STORE_0; 208 209 struct GENX(VERTEX_ELEMENT_STATE) element = { 210 .VertexBufferIndex = ANV_SVGS_VB_INDEX, 211 .Valid = true, 212 .SourceElementFormat = ISL_FORMAT_R32G32_UINT, 213 .Component0Control = base_ctrl, 214 .Component1Control = base_ctrl, 215#if GFX_VER >= 8 216 .Component2Control = VFCOMP_STORE_0, 217 .Component3Control = VFCOMP_STORE_0, 218#else 219 .Component2Control = VFCOMP_STORE_VID, 220 .Component3Control = VFCOMP_STORE_IID, 221#endif 222 }; 223 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element); 224 225#if GFX_VER >= 8 226 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) { 227 vfi.VertexElementIndex = id_slot; 228 } 229#endif 230 } 231 232#if GFX_VER >= 8 233 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_SGVS), sgvs) { 234 sgvs.VertexIDEnable = vs_prog_data->uses_vertexid; 235 sgvs.VertexIDComponentNumber = 2; 236 sgvs.VertexIDElementOffset = id_slot; 237 sgvs.InstanceIDEnable = vs_prog_data->uses_instanceid; 238 sgvs.InstanceIDComponentNumber = 3; 239 sgvs.InstanceIDElementOffset = id_slot; 240 } 241#endif 242 243 const uint32_t drawid_slot = elem_count + needs_svgs_elem; 244 if (vs_prog_data->uses_drawid) { 245 struct GENX(VERTEX_ELEMENT_STATE) element = { 246 .VertexBufferIndex = ANV_DRAWID_VB_INDEX, 247 .Valid = true, 248 .SourceElementFormat = ISL_FORMAT_R32_UINT, 249 .Component0Control = VFCOMP_STORE_SRC, 250 .Component1Control = VFCOMP_STORE_0, 251 .Component2Control = VFCOMP_STORE_0, 252 .Component3Control = VFCOMP_STORE_0, 253 }; 254 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, 255 &p[1 + drawid_slot * 2], 256 &element); 257 258#if GFX_VER >= 8 259 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) { 260 vfi.VertexElementIndex = drawid_slot; 261 } 262#endif 263 } 264} 265 266void 267genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, 268 const struct intel_l3_config *l3_config, 269 VkShaderStageFlags active_stages, 270 const unsigned entry_size[4], 271 enum intel_urb_deref_block_size *deref_block_size) 272{ 273 const struct intel_device_info *devinfo = &device->info; 274 275 unsigned entries[4]; 276 unsigned start[4]; 277 bool constrained; 278 intel_get_urb_config(devinfo, l3_config, 279 active_stages & 280 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, 281 active_stages & VK_SHADER_STAGE_GEOMETRY_BIT, 282 entry_size, entries, start, deref_block_size, 283 &constrained); 284 285#if GFX_VERx10 == 70 286 /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: 287 * 288 * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall 289 * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, 290 * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, 291 * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL 292 * needs to be sent before any combination of VS associated 3DSTATE." 293 */ 294 anv_batch_emit(batch, GFX7_PIPE_CONTROL, pc) { 295 pc.DepthStallEnable = true; 296 pc.PostSyncOperation = WriteImmediateData; 297 pc.Address = device->workaround_address; 298 } 299#endif 300 301 for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) { 302 anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) { 303 urb._3DCommandSubOpcode += i; 304 urb.VSURBStartingAddress = start[i]; 305 urb.VSURBEntryAllocationSize = entry_size[i] - 1; 306 urb.VSNumberofURBEntries = entries[i]; 307 } 308 } 309} 310 311static void 312emit_urb_setup(struct anv_graphics_pipeline *pipeline, 313 enum intel_urb_deref_block_size *deref_block_size) 314{ 315 unsigned entry_size[4]; 316 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 317 const struct brw_vue_prog_data *prog_data = 318 !anv_pipeline_has_stage(pipeline, i) ? NULL : 319 (const struct brw_vue_prog_data *) pipeline->shaders[i]->prog_data; 320 321 entry_size[i] = prog_data ? prog_data->urb_entry_size : 1; 322 } 323 324 genX(emit_urb_setup)(pipeline->base.device, &pipeline->base.batch, 325 pipeline->base.l3_config, 326 pipeline->active_stages, entry_size, 327 deref_block_size); 328} 329 330static void 331emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline) 332{ 333 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 334 335 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 336 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE), sbe); 337#if GFX_VER >= 8 338 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ), sbe); 339#endif 340 return; 341 } 342 343 struct GENX(3DSTATE_SBE) sbe = { 344 GENX(3DSTATE_SBE_header), 345 .AttributeSwizzleEnable = true, 346 .PointSpriteTextureCoordinateOrigin = UPPERLEFT, 347 .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs, 348 .ConstantInterpolationEnable = wm_prog_data->flat_inputs, 349 }; 350 351#if GFX_VER >= 9 352 for (unsigned i = 0; i < 32; i++) 353 sbe.AttributeActiveComponentFormat[i] = ACF_XYZW; 354#endif 355 356#if GFX_VER >= 8 357 /* On Broadwell, they broke 3DSTATE_SBE into two packets */ 358 struct GENX(3DSTATE_SBE_SWIZ) swiz = { 359 GENX(3DSTATE_SBE_SWIZ_header), 360 }; 361#else 362# define swiz sbe 363#endif 364 365 if (anv_pipeline_is_primitive(pipeline)) { 366 const struct brw_vue_map *fs_input_map = 367 &anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map; 368 369 int first_slot = brw_compute_first_urb_slot_required(wm_prog_data->inputs, 370 fs_input_map); 371 assert(first_slot % 2 == 0); 372 unsigned urb_entry_read_offset = first_slot / 2; 373 int max_source_attr = 0; 374 for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) { 375 uint8_t attr = wm_prog_data->urb_setup_attribs[idx]; 376 int input_index = wm_prog_data->urb_setup[attr]; 377 378 assert(0 <= input_index); 379 380 /* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the 381 * VUE header 382 */ 383 if (attr == VARYING_SLOT_VIEWPORT || 384 attr == VARYING_SLOT_LAYER || 385 attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) { 386 continue; 387 } 388 389 if (attr == VARYING_SLOT_PNTC) { 390 sbe.PointSpriteTextureCoordinateEnable = 1 << input_index; 391 continue; 392 } 393 394 const int slot = fs_input_map->varying_to_slot[attr]; 395 396 if (slot == -1) { 397 /* This attribute does not exist in the VUE--that means that the 398 * vertex shader did not write to it. It could be that it's a 399 * regular varying read by the fragment shader but not written by 400 * the vertex shader or it's gl_PrimitiveID. In the first case the 401 * value is undefined, in the second it needs to be 402 * gl_PrimitiveID. 403 */ 404 swiz.Attribute[input_index].ConstantSource = PRIM_ID; 405 swiz.Attribute[input_index].ComponentOverrideX = true; 406 swiz.Attribute[input_index].ComponentOverrideY = true; 407 swiz.Attribute[input_index].ComponentOverrideZ = true; 408 swiz.Attribute[input_index].ComponentOverrideW = true; 409 continue; 410 } 411 412 /* We have to subtract two slots to accout for the URB entry output 413 * read offset in the VS and GS stages. 414 */ 415 const int source_attr = slot - 2 * urb_entry_read_offset; 416 assert(source_attr >= 0 && source_attr < 32); 417 max_source_attr = MAX2(max_source_attr, source_attr); 418 /* The hardware can only do overrides on 16 overrides at a time, and the 419 * other up to 16 have to be lined up so that the input index = the 420 * output index. We'll need to do some tweaking to make sure that's the 421 * case. 422 */ 423 if (input_index < 16) 424 swiz.Attribute[input_index].SourceAttribute = source_attr; 425 else 426 assert(source_attr == input_index); 427 } 428 429 sbe.VertexURBEntryReadOffset = urb_entry_read_offset; 430 sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2); 431#if GFX_VER >= 8 432 sbe.ForceVertexURBEntryReadOffset = true; 433 sbe.ForceVertexURBEntryReadLength = true; 434#endif 435 } 436 437 uint32_t *dw = anv_batch_emit_dwords(&pipeline->base.batch, 438 GENX(3DSTATE_SBE_length)); 439 if (!dw) 440 return; 441 GENX(3DSTATE_SBE_pack)(&pipeline->base.batch, dw, &sbe); 442 443#if GFX_VER >= 8 444 dw = anv_batch_emit_dwords(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ_length)); 445 if (!dw) 446 return; 447 GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->base.batch, dw, &swiz); 448#endif 449} 450 451/** Returns the final polygon mode for rasterization 452 * 453 * This function takes into account polygon mode, primitive topology and the 454 * different shader stages which might generate their own type of primitives. 455 */ 456VkPolygonMode 457genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline, 458 VkPrimitiveTopology primitive_topology) 459{ 460 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { 461 switch (get_gs_prog_data(pipeline)->output_topology) { 462 case _3DPRIM_POINTLIST: 463 return VK_POLYGON_MODE_POINT; 464 465 case _3DPRIM_LINELIST: 466 case _3DPRIM_LINESTRIP: 467 case _3DPRIM_LINELOOP: 468 return VK_POLYGON_MODE_LINE; 469 470 case _3DPRIM_TRILIST: 471 case _3DPRIM_TRIFAN: 472 case _3DPRIM_TRISTRIP: 473 case _3DPRIM_RECTLIST: 474 case _3DPRIM_QUADLIST: 475 case _3DPRIM_QUADSTRIP: 476 case _3DPRIM_POLYGON: 477 return pipeline->polygon_mode; 478 } 479 unreachable("Unsupported GS output topology"); 480 } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) { 481 switch (get_tes_prog_data(pipeline)->output_topology) { 482 case BRW_TESS_OUTPUT_TOPOLOGY_POINT: 483 return VK_POLYGON_MODE_POINT; 484 485 case BRW_TESS_OUTPUT_TOPOLOGY_LINE: 486 return VK_POLYGON_MODE_LINE; 487 488 case BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW: 489 case BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW: 490 return pipeline->polygon_mode; 491 } 492 unreachable("Unsupported TCS output topology"); 493 } else { 494 switch (primitive_topology) { 495 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: 496 return VK_POLYGON_MODE_POINT; 497 498 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: 499 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: 500 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: 501 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: 502 return VK_POLYGON_MODE_LINE; 503 504 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: 505 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: 506 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: 507 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: 508 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: 509 return pipeline->polygon_mode; 510 511 default: 512 unreachable("Unsupported primitive topology"); 513 } 514 } 515} 516 517uint32_t 518genX(ms_rasterization_mode)(struct anv_graphics_pipeline *pipeline, 519 VkPolygonMode raster_mode) 520{ 521#if GFX_VER <= 7 522 if (raster_mode == VK_POLYGON_MODE_LINE) { 523 switch (pipeline->line_mode) { 524 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT: 525 return MSRASTMODE_ON_PATTERN; 526 527 case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT: 528 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT: 529 return MSRASTMODE_OFF_PIXEL; 530 531 default: 532 unreachable("Unsupported line rasterization mode"); 533 } 534 } else { 535 return pipeline->rasterization_samples > 1 ? 536 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; 537 } 538#else 539 unreachable("Only on gen7"); 540#endif 541} 542 543static VkProvokingVertexModeEXT 544vk_provoking_vertex_mode(const VkPipelineRasterizationStateCreateInfo *rs_info) 545{ 546 const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *rs_pv_info = 547 vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT); 548 549 return rs_pv_info == NULL ? VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT : 550 rs_pv_info->provokingVertexMode; 551} 552 553const uint32_t genX(vk_to_intel_cullmode)[] = { 554 [VK_CULL_MODE_NONE] = CULLMODE_NONE, 555 [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, 556 [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, 557 [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH 558}; 559 560const uint32_t genX(vk_to_intel_fillmode)[] = { 561 [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID, 562 [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME, 563 [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT, 564}; 565 566const uint32_t genX(vk_to_intel_front_face)[] = { 567 [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, 568 [VK_FRONT_FACE_CLOCKWISE] = 0 569}; 570 571#if GFX_VER >= 9 572static VkConservativeRasterizationModeEXT 573vk_conservative_rasterization_mode(const VkPipelineRasterizationStateCreateInfo *rs_info) 574{ 575 const VkPipelineRasterizationConservativeStateCreateInfoEXT *cr = 576 vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT); 577 578 return cr ? cr->conservativeRasterizationMode : 579 VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT; 580} 581#endif 582 583void 584genX(rasterization_mode)(VkPolygonMode raster_mode, 585 VkLineRasterizationModeEXT line_mode, 586 float line_width, 587 uint32_t *api_mode, 588 bool *msaa_rasterization_enable) 589{ 590#if GFX_VER >= 8 591 if (raster_mode == VK_POLYGON_MODE_LINE) { 592 /* Unfortunately, configuring our line rasterization hardware on gfx8 593 * and later is rather painful. Instead of giving us bits to tell the 594 * hardware what line mode to use like we had on gfx7, we now have an 595 * arcane combination of API Mode and MSAA enable bits which do things 596 * in a table which are expected to magically put the hardware into the 597 * right mode for your API. Sadly, Vulkan isn't any of the APIs the 598 * hardware people thought of so nothing works the way you want it to. 599 * 600 * Look at the table titled "Multisample Rasterization Modes" in Vol 7 601 * of the Skylake PRM for more details. 602 */ 603 switch (line_mode) { 604 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT: 605 *api_mode = DX100; 606#if GFX_VER <= 9 607 /* Prior to ICL, the algorithm the HW uses to draw wide lines 608 * doesn't quite match what the CTS expects, at least for rectangular 609 * lines, so we set this to false here, making it draw parallelograms 610 * instead, which work well enough. 611 */ 612 *msaa_rasterization_enable = line_width < 1.0078125; 613#else 614 *msaa_rasterization_enable = true; 615#endif 616 break; 617 618 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT: 619 case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT: 620 *api_mode = DX9OGL; 621 *msaa_rasterization_enable = false; 622 break; 623 624 default: 625 unreachable("Unsupported line rasterization mode"); 626 } 627 } else { 628 *api_mode = DX100; 629 *msaa_rasterization_enable = true; 630 } 631#else 632 unreachable("Invalid call"); 633#endif 634} 635 636static void 637emit_rs_state(struct anv_graphics_pipeline *pipeline, 638 const VkPipelineInputAssemblyStateCreateInfo *ia_info, 639 const VkPipelineRasterizationStateCreateInfo *rs_info, 640 const VkPipelineMultisampleStateCreateInfo *ms_info, 641 const VkPipelineRasterizationLineStateCreateInfoEXT *line_info, 642 const uint32_t dynamic_states, 643 const struct anv_render_pass *pass, 644 const struct anv_subpass *subpass, 645 enum intel_urb_deref_block_size urb_deref_block_size) 646{ 647 struct GENX(3DSTATE_SF) sf = { 648 GENX(3DSTATE_SF_header), 649 }; 650 651 sf.ViewportTransformEnable = true; 652 sf.StatisticsEnable = true; 653 sf.VertexSubPixelPrecisionSelect = _8Bit; 654 sf.AALineDistanceMode = true; 655 656 switch (vk_provoking_vertex_mode(rs_info)) { 657 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: 658 sf.TriangleStripListProvokingVertexSelect = 0; 659 sf.LineStripListProvokingVertexSelect = 0; 660 sf.TriangleFanProvokingVertexSelect = 1; 661 break; 662 663 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: 664 sf.TriangleStripListProvokingVertexSelect = 2; 665 sf.LineStripListProvokingVertexSelect = 1; 666 sf.TriangleFanProvokingVertexSelect = 2; 667 break; 668 669 default: 670 unreachable("Invalid provoking vertex mode"); 671 } 672 673#if GFX_VERx10 == 75 674 sf.LineStippleEnable = line_info && line_info->stippledLineEnable; 675#endif 676 677#if GFX_VER >= 12 678 sf.DerefBlockSize = urb_deref_block_size; 679#endif 680 681 if (anv_pipeline_is_primitive(pipeline)) { 682 const struct brw_vue_prog_data *last_vue_prog_data = 683 anv_pipeline_get_last_vue_prog_data(pipeline); 684 685 if (last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { 686 sf.PointWidthSource = Vertex; 687 } else { 688 sf.PointWidthSource = State; 689 sf.PointWidth = 1.0; 690 } 691 } 692 693#if GFX_VER >= 8 694 struct GENX(3DSTATE_RASTER) raster = { 695 GENX(3DSTATE_RASTER_header), 696 }; 697#else 698# define raster sf 699#endif 700 701 VkPolygonMode raster_mode = 702 genX(raster_polygon_mode)(pipeline, ia_info->topology); 703 bool dynamic_primitive_topology = 704 dynamic_states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY; 705 706 /* For details on 3DSTATE_RASTER multisample state, see the BSpec table 707 * "Multisample Modes State". 708 */ 709#if GFX_VER >= 8 710 if (!dynamic_primitive_topology) 711 genX(rasterization_mode)(raster_mode, pipeline->line_mode, 712 rs_info->lineWidth, 713 &raster.APIMode, 714 &raster.DXMultisampleRasterizationEnable); 715 716 /* NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the BDW and SKL PMA fix 717 * computations. If we ever set this bit to a different value, they will 718 * need to be updated accordingly. 719 */ 720 raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0; 721 raster.ForceMultisampling = false; 722#else 723 uint32_t ms_rast_mode = 0; 724 725 if (!dynamic_primitive_topology) 726 ms_rast_mode = genX(ms_rasterization_mode)(pipeline, raster_mode); 727 728 raster.MultisampleRasterizationMode = ms_rast_mode; 729#endif 730 731 raster.AntialiasingEnable = 732 dynamic_primitive_topology ? 0 : 733 anv_rasterization_aa_mode(raster_mode, pipeline->line_mode); 734 735 raster.FrontWinding = 736 dynamic_states & ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE ? 737 0 : genX(vk_to_intel_front_face)[rs_info->frontFace]; 738 raster.CullMode = 739 dynamic_states & ANV_CMD_DIRTY_DYNAMIC_CULL_MODE ? 740 0 : genX(vk_to_intel_cullmode)[rs_info->cullMode]; 741 742 raster.FrontFaceFillMode = genX(vk_to_intel_fillmode)[rs_info->polygonMode]; 743 raster.BackFaceFillMode = genX(vk_to_intel_fillmode)[rs_info->polygonMode]; 744 raster.ScissorRectangleEnable = true; 745 746#if GFX_VER >= 9 747 /* GFX9+ splits ViewportZClipTestEnable into near and far enable bits */ 748 raster.ViewportZFarClipTestEnable = pipeline->depth_clip_enable; 749 raster.ViewportZNearClipTestEnable = pipeline->depth_clip_enable; 750#elif GFX_VER >= 8 751 raster.ViewportZClipTestEnable = pipeline->depth_clip_enable; 752#endif 753 754#if GFX_VER >= 9 755 raster.ConservativeRasterizationEnable = 756 vk_conservative_rasterization_mode(rs_info) != 757 VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT; 758#endif 759 760 bool depth_bias_enable = 761 dynamic_states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE ? 762 0 : rs_info->depthBiasEnable; 763 764 raster.GlobalDepthOffsetEnableSolid = depth_bias_enable; 765 raster.GlobalDepthOffsetEnableWireframe = depth_bias_enable; 766 raster.GlobalDepthOffsetEnablePoint = depth_bias_enable; 767 768#if GFX_VER == 7 769 /* Gfx7 requires that we provide the depth format in 3DSTATE_SF so that it 770 * can get the depth offsets correct. 771 */ 772 if (subpass->depth_stencil_attachment) { 773 VkFormat vk_format = 774 pass->attachments[subpass->depth_stencil_attachment->attachment].format; 775 assert(vk_format_is_depth_or_stencil(vk_format)); 776 if (vk_format_aspects(vk_format) & VK_IMAGE_ASPECT_DEPTH_BIT) { 777 enum isl_format isl_format = 778 anv_get_isl_format(&pipeline->base.device->info, vk_format, 779 VK_IMAGE_ASPECT_DEPTH_BIT, 780 VK_IMAGE_TILING_OPTIMAL); 781 sf.DepthBufferSurfaceFormat = 782 isl_format_get_depth_format(isl_format, false); 783 } 784 } 785#endif 786 787#if GFX_VER >= 8 788 GENX(3DSTATE_SF_pack)(NULL, pipeline->gfx8.sf, &sf); 789 GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gfx8.raster, &raster); 790#else 791# undef raster 792 GENX(3DSTATE_SF_pack)(NULL, &pipeline->gfx7.sf, &sf); 793#endif 794} 795 796static void 797emit_ms_state(struct anv_graphics_pipeline *pipeline, 798 const VkPipelineMultisampleStateCreateInfo *info, 799 uint32_t dynamic_states) 800{ 801 /* Only lookup locations if the extensions is active, otherwise the default 802 * ones will be used either at device initialization time or through 803 * 3DSTATE_MULTISAMPLE on Gfx7/7.5 by passing NULL locations. 804 */ 805 if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations) { 806 /* If the sample locations are dynamic, 3DSTATE_MULTISAMPLE on Gfx7/7.5 807 * will be emitted dynamically, so skip it here. On Gfx8+ 808 * 3DSTATE_SAMPLE_PATTERN will be emitted dynamically, so skip it here. 809 */ 810 if (!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)) { 811#if GFX_VER >= 8 812 genX(emit_sample_pattern)(&pipeline->base.batch, 813 pipeline->dynamic_state.sample_locations.samples, 814 pipeline->dynamic_state.sample_locations.locations); 815#endif 816 } 817 818 genX(emit_multisample)(&pipeline->base.batch, 819 pipeline->dynamic_state.sample_locations.samples, 820 pipeline->dynamic_state.sample_locations.locations); 821 } else { 822 /* On Gfx8+ 3DSTATE_MULTISAMPLE does not hold anything we need to modify 823 * for sample locations, so we don't have to emit it dynamically. 824 */ 825#if GFX_VER >= 8 826 genX(emit_multisample)(&pipeline->base.batch, 827 info ? info->rasterizationSamples : 1, 828 NULL); 829#endif 830 } 831 832 /* From the Vulkan 1.0 spec: 833 * If pSampleMask is NULL, it is treated as if the mask has all bits 834 * enabled, i.e. no coverage is removed from fragments. 835 * 836 * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits. 837 */ 838#if GFX_VER >= 8 839 uint32_t sample_mask = 0xffff; 840#else 841 uint32_t sample_mask = 0xff; 842#endif 843 844 if (info && info->pSampleMask) 845 sample_mask &= info->pSampleMask[0]; 846 847 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SAMPLE_MASK), sm) { 848 sm.SampleMask = sample_mask; 849 } 850 851 pipeline->cps_state = ANV_STATE_NULL; 852#if GFX_VER >= 11 853 if (!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) && 854 pipeline->base.device->vk.enabled_extensions.KHR_fragment_shading_rate) { 855#if GFX_VER >= 12 856 struct anv_device *device = pipeline->base.device; 857 const uint32_t num_dwords = 858 GENX(CPS_STATE_length) * 4 * pipeline->dynamic_state.viewport.count; 859 pipeline->cps_state = 860 anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords, 32); 861#endif 862 863 genX(emit_shading_rate)(&pipeline->base.batch, 864 pipeline, 865 pipeline->cps_state, 866 &pipeline->dynamic_state); 867 } 868#endif 869} 870 871const uint32_t genX(vk_to_intel_logic_op)[] = { 872 [VK_LOGIC_OP_COPY] = LOGICOP_COPY, 873 [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, 874 [VK_LOGIC_OP_AND] = LOGICOP_AND, 875 [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, 876 [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, 877 [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, 878 [VK_LOGIC_OP_XOR] = LOGICOP_XOR, 879 [VK_LOGIC_OP_OR] = LOGICOP_OR, 880 [VK_LOGIC_OP_NOR] = LOGICOP_NOR, 881 [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, 882 [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, 883 [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, 884 [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, 885 [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, 886 [VK_LOGIC_OP_NAND] = LOGICOP_NAND, 887 [VK_LOGIC_OP_SET] = LOGICOP_SET, 888}; 889 890static const uint32_t vk_to_intel_blend[] = { 891 [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, 892 [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, 893 [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, 894 [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, 895 [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, 896 [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, 897 [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, 898 [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, 899 [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, 900 [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, 901 [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, 902 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR, 903 [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, 904 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA, 905 [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, 906 [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, 907 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, 908 [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, 909 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, 910}; 911 912static const uint32_t vk_to_intel_blend_op[] = { 913 [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, 914 [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, 915 [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, 916 [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, 917 [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, 918}; 919 920const uint32_t genX(vk_to_intel_compare_op)[] = { 921 [VK_COMPARE_OP_NEVER] = PREFILTEROP_NEVER, 922 [VK_COMPARE_OP_LESS] = PREFILTEROP_LESS, 923 [VK_COMPARE_OP_EQUAL] = PREFILTEROP_EQUAL, 924 [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROP_LEQUAL, 925 [VK_COMPARE_OP_GREATER] = PREFILTEROP_GREATER, 926 [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROP_NOTEQUAL, 927 [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROP_GEQUAL, 928 [VK_COMPARE_OP_ALWAYS] = PREFILTEROP_ALWAYS, 929}; 930 931const uint32_t genX(vk_to_intel_stencil_op)[] = { 932 [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, 933 [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, 934 [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, 935 [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, 936 [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, 937 [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, 938 [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, 939 [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, 940}; 941 942const uint32_t genX(vk_to_intel_primitive_type)[] = { 943 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, 944 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, 945 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, 946 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, 947 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, 948 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, 949 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ, 950 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, 951 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ, 952 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, 953}; 954 955/* This function sanitizes the VkStencilOpState by looking at the compare ops 956 * and trying to determine whether or not a given stencil op can ever actually 957 * occur. Stencil ops which can never occur are set to VK_STENCIL_OP_KEEP. 958 * This function returns true if, after sanitation, any of the stencil ops are 959 * set to something other than VK_STENCIL_OP_KEEP. 960 */ 961static bool 962sanitize_stencil_face(VkStencilOpState *face, 963 VkCompareOp depthCompareOp) 964{ 965 /* If compareOp is ALWAYS then the stencil test will never fail and failOp 966 * will never happen. Set failOp to KEEP in this case. 967 */ 968 if (face->compareOp == VK_COMPARE_OP_ALWAYS) 969 face->failOp = VK_STENCIL_OP_KEEP; 970 971 /* If compareOp is NEVER or depthCompareOp is NEVER then one of the depth 972 * or stencil tests will fail and passOp will never happen. 973 */ 974 if (face->compareOp == VK_COMPARE_OP_NEVER || 975 depthCompareOp == VK_COMPARE_OP_NEVER) 976 face->passOp = VK_STENCIL_OP_KEEP; 977 978 /* If compareOp is NEVER or depthCompareOp is ALWAYS then either the 979 * stencil test will fail or the depth test will pass. In either case, 980 * depthFailOp will never happen. 981 */ 982 if (face->compareOp == VK_COMPARE_OP_NEVER || 983 depthCompareOp == VK_COMPARE_OP_ALWAYS) 984 face->depthFailOp = VK_STENCIL_OP_KEEP; 985 986 return face->failOp != VK_STENCIL_OP_KEEP || 987 face->depthFailOp != VK_STENCIL_OP_KEEP || 988 face->passOp != VK_STENCIL_OP_KEEP; 989} 990 991/* Intel hardware is fairly sensitive to whether or not depth/stencil writes 992 * are enabled. In the presence of discards, it's fairly easy to get into the 993 * non-promoted case which means a fairly big performance hit. From the Iron 994 * Lake PRM, Vol 2, pt. 1, section 8.4.3.2, "Early Depth Test Cases": 995 * 996 * "Non-promoted depth (N) is active whenever the depth test can be done 997 * early but it cannot determine whether or not to write source depth to 998 * the depth buffer, therefore the depth write must be performed post pixel 999 * shader. This includes cases where the pixel shader can kill pixels, 1000 * including via sampler chroma key, as well as cases where the alpha test 1001 * function is enabled, which kills pixels based on a programmable alpha 1002 * test. In this case, even if the depth test fails, the pixel cannot be 1003 * killed if a stencil write is indicated. Whether or not the stencil write 1004 * happens depends on whether or not the pixel is killed later. In these 1005 * cases if stencil test fails and stencil writes are off, the pixels can 1006 * also be killed early. If stencil writes are enabled, the pixels must be 1007 * treated as Computed depth (described above)." 1008 * 1009 * The same thing as mentioned in the stencil case can happen in the depth 1010 * case as well if it thinks it writes depth but, thanks to the depth test 1011 * being GL_EQUAL, the write doesn't actually matter. A little extra work 1012 * up-front to try and disable depth and stencil writes can make a big 1013 * difference. 1014 * 1015 * Unfortunately, the way depth and stencil testing is specified, there are 1016 * many case where, regardless of depth/stencil writes being enabled, nothing 1017 * actually gets written due to some other bit of state being set. This 1018 * function attempts to "sanitize" the depth stencil state and disable writes 1019 * and sometimes even testing whenever possible. 1020 */ 1021static void 1022sanitize_ds_state(VkPipelineDepthStencilStateCreateInfo *state, 1023 bool *stencilWriteEnable, 1024 VkImageAspectFlags ds_aspects) 1025{ 1026 *stencilWriteEnable = state->stencilTestEnable; 1027 1028 /* If the depth test is disabled, we won't be writing anything. Make sure we 1029 * treat the test as always passing later on as well. 1030 * 1031 * Also, the Vulkan spec requires that if either depth or stencil is not 1032 * present, the pipeline is to act as if the test silently passes. In that 1033 * case we won't write either. 1034 */ 1035 if (!state->depthTestEnable || !(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { 1036 state->depthWriteEnable = false; 1037 state->depthCompareOp = VK_COMPARE_OP_ALWAYS; 1038 } 1039 1040 if (!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT)) { 1041 *stencilWriteEnable = false; 1042 state->front.compareOp = VK_COMPARE_OP_ALWAYS; 1043 state->back.compareOp = VK_COMPARE_OP_ALWAYS; 1044 } 1045 1046 /* If the stencil test is enabled and always fails, then we will never get 1047 * to the depth test so we can just disable the depth test entirely. 1048 */ 1049 if (state->stencilTestEnable && 1050 state->front.compareOp == VK_COMPARE_OP_NEVER && 1051 state->back.compareOp == VK_COMPARE_OP_NEVER) { 1052 state->depthTestEnable = false; 1053 state->depthWriteEnable = false; 1054 } 1055 1056 /* If depthCompareOp is EQUAL then the value we would be writing to the 1057 * depth buffer is the same as the value that's already there so there's no 1058 * point in writing it. 1059 */ 1060 if (state->depthCompareOp == VK_COMPARE_OP_EQUAL) 1061 state->depthWriteEnable = false; 1062 1063 /* If the stencil ops are such that we don't actually ever modify the 1064 * stencil buffer, we should disable writes. 1065 */ 1066 if (!sanitize_stencil_face(&state->front, state->depthCompareOp) && 1067 !sanitize_stencil_face(&state->back, state->depthCompareOp)) 1068 *stencilWriteEnable = false; 1069 1070 /* If the depth test always passes and we never write out depth, that's the 1071 * same as if the depth test is disabled entirely. 1072 */ 1073 if (state->depthCompareOp == VK_COMPARE_OP_ALWAYS && 1074 !state->depthWriteEnable) 1075 state->depthTestEnable = false; 1076 1077 /* If the stencil test always passes and we never write out stencil, that's 1078 * the same as if the stencil test is disabled entirely. 1079 */ 1080 if (state->front.compareOp == VK_COMPARE_OP_ALWAYS && 1081 state->back.compareOp == VK_COMPARE_OP_ALWAYS && 1082 !*stencilWriteEnable) 1083 state->stencilTestEnable = false; 1084} 1085 1086static void 1087emit_ds_state(struct anv_graphics_pipeline *pipeline, 1088 const VkPipelineDepthStencilStateCreateInfo *pCreateInfo, 1089 const uint32_t dynamic_states, 1090 const struct anv_render_pass *pass, 1091 const struct anv_subpass *subpass) 1092{ 1093#if GFX_VER == 7 1094# define depth_stencil_dw pipeline->gfx7.depth_stencil_state 1095#elif GFX_VER == 8 1096# define depth_stencil_dw pipeline->gfx8.wm_depth_stencil 1097#else 1098# define depth_stencil_dw pipeline->gfx9.wm_depth_stencil 1099#endif 1100 1101 if (pCreateInfo == NULL) { 1102 /* We're going to OR this together with the dynamic state. We need 1103 * to make sure it's initialized to something useful. 1104 */ 1105 pipeline->writes_stencil = false; 1106 pipeline->stencil_test_enable = false; 1107 pipeline->writes_depth = false; 1108 pipeline->depth_test_enable = false; 1109 pipeline->depth_bounds_test_enable = false; 1110 memset(depth_stencil_dw, 0, sizeof(depth_stencil_dw)); 1111 return; 1112 } 1113 1114 VkImageAspectFlags ds_aspects = 0; 1115 if (subpass->depth_stencil_attachment) { 1116 VkFormat depth_stencil_format = 1117 pass->attachments[subpass->depth_stencil_attachment->attachment].format; 1118 ds_aspects = vk_format_aspects(depth_stencil_format); 1119 } 1120 1121 VkPipelineDepthStencilStateCreateInfo info = *pCreateInfo; 1122 sanitize_ds_state(&info, &pipeline->writes_stencil, ds_aspects); 1123 pipeline->stencil_test_enable = info.stencilTestEnable; 1124 pipeline->writes_depth = info.depthWriteEnable; 1125 pipeline->depth_test_enable = info.depthTestEnable; 1126 pipeline->depth_bounds_test_enable = info.depthBoundsTestEnable; 1127 1128 bool dynamic_stencil_op = 1129 dynamic_states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP; 1130 1131#if GFX_VER <= 7 1132 struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { 1133#else 1134 struct GENX(3DSTATE_WM_DEPTH_STENCIL) depth_stencil = { 1135#endif 1136 .DepthTestEnable = 1137 dynamic_states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE ? 1138 0 : info.depthTestEnable, 1139 1140 .DepthBufferWriteEnable = 1141 dynamic_states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE ? 1142 0 : info.depthWriteEnable, 1143 1144 .DepthTestFunction = 1145 dynamic_states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP ? 1146 0 : genX(vk_to_intel_compare_op)[info.depthCompareOp], 1147 1148 .DoubleSidedStencilEnable = true, 1149 1150 .StencilTestEnable = 1151 dynamic_states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE ? 1152 0 : info.stencilTestEnable, 1153 1154 .StencilFailOp = genX(vk_to_intel_stencil_op)[info.front.failOp], 1155 .StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[info.front.passOp], 1156 .StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[info.front.depthFailOp], 1157 .StencilTestFunction = genX(vk_to_intel_compare_op)[info.front.compareOp], 1158 .BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[info.back.failOp], 1159 .BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[info.back.passOp], 1160 .BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[info.back.depthFailOp], 1161 .BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[info.back.compareOp], 1162 }; 1163 1164 if (dynamic_stencil_op) { 1165 depth_stencil.StencilFailOp = 0; 1166 depth_stencil.StencilPassDepthPassOp = 0; 1167 depth_stencil.StencilPassDepthFailOp = 0; 1168 depth_stencil.StencilTestFunction = 0; 1169 depth_stencil.BackfaceStencilFailOp = 0; 1170 depth_stencil.BackfaceStencilPassDepthPassOp = 0; 1171 depth_stencil.BackfaceStencilPassDepthFailOp = 0; 1172 depth_stencil.BackfaceStencilTestFunction = 0; 1173 } 1174 1175#if GFX_VER <= 7 1176 GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil); 1177#else 1178 GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, depth_stencil_dw, &depth_stencil); 1179#endif 1180} 1181 1182static bool 1183is_dual_src_blend_factor(VkBlendFactor factor) 1184{ 1185 return factor == VK_BLEND_FACTOR_SRC1_COLOR || 1186 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR || 1187 factor == VK_BLEND_FACTOR_SRC1_ALPHA || 1188 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA; 1189} 1190 1191static inline uint32_t * 1192write_disabled_blend(uint32_t *state) 1193{ 1194 struct GENX(BLEND_STATE_ENTRY) entry = { 1195 .WriteDisableAlpha = true, 1196 .WriteDisableRed = true, 1197 .WriteDisableGreen = true, 1198 .WriteDisableBlue = true, 1199 }; 1200 GENX(BLEND_STATE_ENTRY_pack)(NULL, state, &entry); 1201 return state + GENX(BLEND_STATE_ENTRY_length); 1202} 1203 1204static void 1205emit_cb_state(struct anv_graphics_pipeline *pipeline, 1206 const VkPipelineColorBlendStateCreateInfo *info, 1207 const VkPipelineMultisampleStateCreateInfo *ms_info, 1208 uint32_t dynamic_states) 1209{ 1210 struct anv_device *device = pipeline->base.device; 1211 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 1212 1213 struct GENX(BLEND_STATE) blend_state = { 1214#if GFX_VER >= 8 1215 .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, 1216 .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, 1217#endif 1218 }; 1219 1220 uint32_t surface_count = 0; 1221 struct anv_pipeline_bind_map *map; 1222 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 1223 map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map; 1224 surface_count = map->surface_count; 1225 } 1226 1227 const uint32_t num_dwords = GENX(BLEND_STATE_length) + 1228 GENX(BLEND_STATE_ENTRY_length) * surface_count; 1229 uint32_t *blend_state_start, *state_pos; 1230 1231 if (dynamic_states & (ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | 1232 ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP)) { 1233 const struct intel_device_info *devinfo = &pipeline->base.device->info; 1234 blend_state_start = devinfo->ver >= 8 ? 1235 pipeline->gfx8.blend_state : pipeline->gfx7.blend_state; 1236 pipeline->blend_state = ANV_STATE_NULL; 1237 } else { 1238 pipeline->blend_state = 1239 anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); 1240 blend_state_start = pipeline->blend_state.map; 1241 } 1242 state_pos = blend_state_start; 1243 1244 bool has_writeable_rt = false; 1245 state_pos += GENX(BLEND_STATE_length); 1246#if GFX_VER >= 8 1247 struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 }; 1248#endif 1249 for (unsigned i = 0; i < surface_count; i++) { 1250 struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i]; 1251 1252 /* All color attachments are at the beginning of the binding table */ 1253 if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) 1254 break; 1255 1256 /* We can have at most 8 attachments */ 1257 assert(i < MAX_RTS); 1258 1259 if (info == NULL || binding->index >= info->attachmentCount) { 1260 state_pos = write_disabled_blend(state_pos); 1261 continue; 1262 } 1263 1264 if ((pipeline->dynamic_state.color_writes & (1u << binding->index)) == 0) { 1265 state_pos = write_disabled_blend(state_pos); 1266 continue; 1267 } 1268 1269 const VkPipelineColorBlendAttachmentState *a = 1270 &info->pAttachments[binding->index]; 1271 1272 struct GENX(BLEND_STATE_ENTRY) entry = { 1273#if GFX_VER < 8 1274 .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, 1275 .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, 1276#endif 1277 .LogicOpEnable = info->logicOpEnable, 1278 .LogicOpFunction = dynamic_states & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP ? 1279 0: genX(vk_to_intel_logic_op)[info->logicOp], 1280 1281 /* Vulkan specification 1.2.168, VkLogicOp: 1282 * 1283 * "Logical operations are controlled by the logicOpEnable and 1284 * logicOp members of VkPipelineColorBlendStateCreateInfo. If 1285 * logicOpEnable is VK_TRUE, then a logical operation selected by 1286 * logicOp is applied between each color attachment and the 1287 * fragment’s corresponding output value, and blending of all 1288 * attachments is treated as if it were disabled." 1289 * 1290 * From the Broadwell PRM Volume 2d: Command Reference: Structures: 1291 * BLEND_STATE_ENTRY: 1292 * 1293 * "Enabling LogicOp and Color Buffer Blending at the same time is 1294 * UNDEFINED" 1295 */ 1296 .ColorBufferBlendEnable = !info->logicOpEnable && a->blendEnable, 1297 .ColorClampRange = COLORCLAMP_RTFORMAT, 1298 .PreBlendColorClampEnable = true, 1299 .PostBlendColorClampEnable = true, 1300 .SourceBlendFactor = vk_to_intel_blend[a->srcColorBlendFactor], 1301 .DestinationBlendFactor = vk_to_intel_blend[a->dstColorBlendFactor], 1302 .ColorBlendFunction = vk_to_intel_blend_op[a->colorBlendOp], 1303 .SourceAlphaBlendFactor = vk_to_intel_blend[a->srcAlphaBlendFactor], 1304 .DestinationAlphaBlendFactor = vk_to_intel_blend[a->dstAlphaBlendFactor], 1305 .AlphaBlendFunction = vk_to_intel_blend_op[a->alphaBlendOp], 1306 .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), 1307 .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), 1308 .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), 1309 .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), 1310 }; 1311 1312 if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || 1313 a->dstColorBlendFactor != a->dstAlphaBlendFactor || 1314 a->colorBlendOp != a->alphaBlendOp) { 1315#if GFX_VER >= 8 1316 blend_state.IndependentAlphaBlendEnable = true; 1317#else 1318 entry.IndependentAlphaBlendEnable = true; 1319#endif 1320 } 1321 1322 /* The Dual Source Blending documentation says: 1323 * 1324 * "If SRC1 is included in a src/dst blend factor and 1325 * a DualSource RT Write message is not used, results 1326 * are UNDEFINED. (This reflects the same restriction in DX APIs, 1327 * where undefined results are produced if “o1” is not written 1328 * by a PS – there are no default values defined)." 1329 * 1330 * There is no way to gracefully fix this undefined situation 1331 * so we just disable the blending to prevent possible issues. 1332 */ 1333 if (!wm_prog_data->dual_src_blend && 1334 (is_dual_src_blend_factor(a->srcColorBlendFactor) || 1335 is_dual_src_blend_factor(a->dstColorBlendFactor) || 1336 is_dual_src_blend_factor(a->srcAlphaBlendFactor) || 1337 is_dual_src_blend_factor(a->dstAlphaBlendFactor))) { 1338 vk_logw(VK_LOG_OBJS(&device->vk.base), 1339 "Enabled dual-src blend factors without writing both targets " 1340 "in the shader. Disabling blending to avoid GPU hangs."); 1341 entry.ColorBufferBlendEnable = false; 1342 } 1343 1344 if (a->colorWriteMask != 0) 1345 has_writeable_rt = true; 1346 1347 /* Our hardware applies the blend factor prior to the blend function 1348 * regardless of what function is used. Technically, this means the 1349 * hardware can do MORE than GL or Vulkan specify. However, it also 1350 * means that, for MIN and MAX, we have to stomp the blend factor to 1351 * ONE to make it a no-op. 1352 */ 1353 if (a->colorBlendOp == VK_BLEND_OP_MIN || 1354 a->colorBlendOp == VK_BLEND_OP_MAX) { 1355 entry.SourceBlendFactor = BLENDFACTOR_ONE; 1356 entry.DestinationBlendFactor = BLENDFACTOR_ONE; 1357 } 1358 if (a->alphaBlendOp == VK_BLEND_OP_MIN || 1359 a->alphaBlendOp == VK_BLEND_OP_MAX) { 1360 entry.SourceAlphaBlendFactor = BLENDFACTOR_ONE; 1361 entry.DestinationAlphaBlendFactor = BLENDFACTOR_ONE; 1362 } 1363 GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry); 1364 state_pos += GENX(BLEND_STATE_ENTRY_length); 1365#if GFX_VER >= 8 1366 if (i == 0) 1367 bs0 = entry; 1368#endif 1369 } 1370 1371#if GFX_VER >= 8 1372 struct GENX(3DSTATE_PS_BLEND) blend = { 1373 GENX(3DSTATE_PS_BLEND_header), 1374 }; 1375 blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable; 1376 blend.HasWriteableRT = has_writeable_rt; 1377 blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable; 1378 blend.SourceAlphaBlendFactor = bs0.SourceAlphaBlendFactor; 1379 blend.DestinationAlphaBlendFactor = bs0.DestinationAlphaBlendFactor; 1380 blend.SourceBlendFactor = bs0.SourceBlendFactor; 1381 blend.DestinationBlendFactor = bs0.DestinationBlendFactor; 1382 blend.AlphaTestEnable = false; 1383 blend.IndependentAlphaBlendEnable = blend_state.IndependentAlphaBlendEnable; 1384 1385 if (dynamic_states & (ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | 1386 ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP)) { 1387 GENX(3DSTATE_PS_BLEND_pack)(NULL, pipeline->gfx8.ps_blend, &blend); 1388 } else { 1389 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_BLEND), _blend) 1390 _blend = blend; 1391 } 1392#else 1393 (void)has_writeable_rt; 1394#endif 1395 1396 GENX(BLEND_STATE_pack)(NULL, blend_state_start, &blend_state); 1397 1398 if (!(dynamic_states & (ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | 1399 ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP))) { 1400 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { 1401 bsp.BlendStatePointer = pipeline->blend_state.offset; 1402#if GFX_VER >= 8 1403 bsp.BlendStatePointerValid = true; 1404#endif 1405 } 1406 } 1407} 1408 1409static void 1410emit_3dstate_clip(struct anv_graphics_pipeline *pipeline, 1411 const VkPipelineInputAssemblyStateCreateInfo *ia_info, 1412 const VkPipelineViewportStateCreateInfo *vp_info, 1413 const VkPipelineRasterizationStateCreateInfo *rs_info, 1414 const uint32_t dynamic_states) 1415{ 1416 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 1417 (void) wm_prog_data; 1418 1419 struct GENX(3DSTATE_CLIP) clip = { 1420 GENX(3DSTATE_CLIP_header), 1421 }; 1422 1423 clip.ClipEnable = true; 1424 clip.StatisticsEnable = true; 1425 clip.EarlyCullEnable = true; 1426 clip.APIMode = APIMODE_D3D; 1427 clip.GuardbandClipTestEnable = true; 1428 1429 /* Only enable the XY clip test when the final polygon rasterization 1430 * mode is VK_POLYGON_MODE_FILL. We want to leave it disabled for 1431 * points and lines so we get "pop-free" clipping. 1432 */ 1433 VkPolygonMode raster_mode = 1434 genX(raster_polygon_mode)(pipeline, ia_info->topology); 1435 clip.ViewportXYClipTestEnable = 1436 dynamic_states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY ? 1437 0 : (raster_mode == VK_POLYGON_MODE_FILL); 1438 1439#if GFX_VER >= 8 1440 clip.VertexSubPixelPrecisionSelect = _8Bit; 1441#endif 1442 clip.ClipMode = CLIPMODE_NORMAL; 1443 1444 switch (vk_provoking_vertex_mode(rs_info)) { 1445 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: 1446 clip.TriangleStripListProvokingVertexSelect = 0; 1447 clip.LineStripListProvokingVertexSelect = 0; 1448 clip.TriangleFanProvokingVertexSelect = 1; 1449 break; 1450 1451 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: 1452 clip.TriangleStripListProvokingVertexSelect = 2; 1453 clip.LineStripListProvokingVertexSelect = 1; 1454 clip.TriangleFanProvokingVertexSelect = 2; 1455 break; 1456 1457 default: 1458 unreachable("Invalid provoking vertex mode"); 1459 } 1460 1461 clip.MinimumPointWidth = 0.125; 1462 clip.MaximumPointWidth = 255.875; 1463 1464 if (anv_pipeline_is_primitive(pipeline)) { 1465 const struct brw_vue_prog_data *last = 1466 anv_pipeline_get_last_vue_prog_data(pipeline); 1467 1468 /* From the Vulkan 1.0.45 spec: 1469 * 1470 * "If the last active vertex processing stage shader entry point's 1471 * interface does not include a variable decorated with 1472 * ViewportIndex, then the first viewport is used." 1473 */ 1474 if (vp_info && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) { 1475 clip.MaximumVPIndex = vp_info->viewportCount > 0 ? 1476 vp_info->viewportCount - 1 : 0; 1477 } else { 1478 clip.MaximumVPIndex = 0; 1479 } 1480 1481 /* From the Vulkan 1.0.45 spec: 1482 * 1483 * "If the last active vertex processing stage shader entry point's 1484 * interface does not include a variable decorated with Layer, then 1485 * the first layer is used." 1486 */ 1487 clip.ForceZeroRTAIndexEnable = 1488 !(last->vue_map.slots_valid & VARYING_BIT_LAYER); 1489 1490#if GFX_VER == 7 1491 clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask; 1492 clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask; 1493#endif 1494 } 1495 1496#if GFX_VER == 7 1497 clip.FrontWinding = genX(vk_to_intel_front_face)[rs_info->frontFace]; 1498 clip.CullMode = genX(vk_to_intel_cullmode)[rs_info->cullMode]; 1499 clip.ViewportZClipTestEnable = pipeline->depth_clip_enable; 1500#else 1501 clip.NonPerspectiveBarycentricEnable = wm_prog_data ? 1502 (wm_prog_data->barycentric_interp_modes & 1503 BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) != 0 : 0; 1504#endif 1505 1506 GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx7.clip, &clip); 1507} 1508 1509static void 1510emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline, 1511 const VkPipelineRasterizationStateCreateInfo *rs_info, 1512 const uint32_t dynamic_states) 1513{ 1514 const struct brw_vue_prog_data *prog_data = 1515 anv_pipeline_get_last_vue_prog_data(pipeline); 1516 const struct brw_vue_map *vue_map = &prog_data->vue_map; 1517 1518 nir_xfb_info *xfb_info; 1519 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) 1520 xfb_info = pipeline->shaders[MESA_SHADER_GEOMETRY]->xfb_info; 1521 else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) 1522 xfb_info = pipeline->shaders[MESA_SHADER_TESS_EVAL]->xfb_info; 1523 else 1524 xfb_info = pipeline->shaders[MESA_SHADER_VERTEX]->xfb_info; 1525 1526#if GFX_VER == 7 1527# define streamout_state_dw pipeline->gfx7.streamout_state 1528#else 1529# define streamout_state_dw pipeline->gfx8.streamout_state 1530#endif 1531 1532 struct GENX(3DSTATE_STREAMOUT) so = { 1533 GENX(3DSTATE_STREAMOUT_header), 1534 .RenderingDisable = 1535 (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) ? 1536 0 : rs_info->rasterizerDiscardEnable, 1537 }; 1538 1539 if (xfb_info) { 1540 so.SOFunctionEnable = true; 1541 so.SOStatisticsEnable = true; 1542 1543 switch (vk_provoking_vertex_mode(rs_info)) { 1544 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: 1545 so.ReorderMode = LEADING; 1546 break; 1547 1548 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: 1549 so.ReorderMode = TRAILING; 1550 break; 1551 1552 default: 1553 unreachable("Invalid provoking vertex mode"); 1554 } 1555 1556 const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info = 1557 vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT); 1558 so.RenderStreamSelect = stream_info ? 1559 stream_info->rasterizationStream : 0; 1560 1561#if GFX_VER >= 8 1562 so.Buffer0SurfacePitch = xfb_info->buffers[0].stride; 1563 so.Buffer1SurfacePitch = xfb_info->buffers[1].stride; 1564 so.Buffer2SurfacePitch = xfb_info->buffers[2].stride; 1565 so.Buffer3SurfacePitch = xfb_info->buffers[3].stride; 1566#else 1567 pipeline->gfx7.xfb_bo_pitch[0] = xfb_info->buffers[0].stride; 1568 pipeline->gfx7.xfb_bo_pitch[1] = xfb_info->buffers[1].stride; 1569 pipeline->gfx7.xfb_bo_pitch[2] = xfb_info->buffers[2].stride; 1570 pipeline->gfx7.xfb_bo_pitch[3] = xfb_info->buffers[3].stride; 1571 1572 /* On Gfx7, the SO buffer enables live in 3DSTATE_STREAMOUT which 1573 * is a bit inconvenient because we don't know what buffers will 1574 * actually be enabled until draw time. We do our best here by 1575 * setting them based on buffers_written and we disable them 1576 * as-needed at draw time by setting EndAddress = BaseAddress. 1577 */ 1578 so.SOBufferEnable0 = xfb_info->buffers_written & (1 << 0); 1579 so.SOBufferEnable1 = xfb_info->buffers_written & (1 << 1); 1580 so.SOBufferEnable2 = xfb_info->buffers_written & (1 << 2); 1581 so.SOBufferEnable3 = xfb_info->buffers_written & (1 << 3); 1582#endif 1583 1584 int urb_entry_read_offset = 0; 1585 int urb_entry_read_length = 1586 (prog_data->vue_map.num_slots + 1) / 2 - urb_entry_read_offset; 1587 1588 /* We always read the whole vertex. This could be reduced at some 1589 * point by reading less and offsetting the register index in the 1590 * SO_DECLs. 1591 */ 1592 so.Stream0VertexReadOffset = urb_entry_read_offset; 1593 so.Stream0VertexReadLength = urb_entry_read_length - 1; 1594 so.Stream1VertexReadOffset = urb_entry_read_offset; 1595 so.Stream1VertexReadLength = urb_entry_read_length - 1; 1596 so.Stream2VertexReadOffset = urb_entry_read_offset; 1597 so.Stream2VertexReadLength = urb_entry_read_length - 1; 1598 so.Stream3VertexReadOffset = urb_entry_read_offset; 1599 so.Stream3VertexReadLength = urb_entry_read_length - 1; 1600 } 1601 1602 if (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) { 1603 GENX(3DSTATE_STREAMOUT_pack)(NULL, streamout_state_dw, &so); 1604 } else { 1605 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_STREAMOUT), _so) 1606 _so = so; 1607 } 1608 1609 if (xfb_info) { 1610 struct GENX(SO_DECL) so_decl[MAX_XFB_STREAMS][128]; 1611 int next_offset[MAX_XFB_BUFFERS] = {0, 0, 0, 0}; 1612 int decls[MAX_XFB_STREAMS] = {0, 0, 0, 0}; 1613 1614 memset(so_decl, 0, sizeof(so_decl)); 1615 1616 for (unsigned i = 0; i < xfb_info->output_count; i++) { 1617 const nir_xfb_output_info *output = &xfb_info->outputs[i]; 1618 unsigned buffer = output->buffer; 1619 unsigned stream = xfb_info->buffer_to_stream[buffer]; 1620 1621 /* Our hardware is unusual in that it requires us to program SO_DECLs 1622 * for fake "hole" components, rather than simply taking the offset 1623 * for each real varying. Each hole can have size 1, 2, 3, or 4; we 1624 * program as many size = 4 holes as we can, then a final hole to 1625 * accommodate the final 1, 2, or 3 remaining. 1626 */ 1627 int hole_dwords = (output->offset - next_offset[buffer]) / 4; 1628 while (hole_dwords > 0) { 1629 so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) { 1630 .HoleFlag = 1, 1631 .OutputBufferSlot = buffer, 1632 .ComponentMask = (1 << MIN2(hole_dwords, 4)) - 1, 1633 }; 1634 hole_dwords -= 4; 1635 } 1636 1637 int varying = output->location; 1638 uint8_t component_mask = output->component_mask; 1639 /* VARYING_SLOT_PSIZ contains four scalar fields packed together: 1640 * - VARYING_SLOT_PRIMITIVE_SHADING_RATE in VARYING_SLOT_PSIZ.x 1641 * - VARYING_SLOT_LAYER in VARYING_SLOT_PSIZ.y 1642 * - VARYING_SLOT_VIEWPORT in VARYING_SLOT_PSIZ.z 1643 * - VARYING_SLOT_PSIZ in VARYING_SLOT_PSIZ.w 1644 */ 1645 if (varying == VARYING_SLOT_PRIMITIVE_SHADING_RATE) { 1646 varying = VARYING_SLOT_PSIZ; 1647 component_mask = 1 << 0; // SO_DECL_COMPMASK_X 1648 } else if (varying == VARYING_SLOT_LAYER) { 1649 varying = VARYING_SLOT_PSIZ; 1650 component_mask = 1 << 1; // SO_DECL_COMPMASK_Y 1651 } else if (varying == VARYING_SLOT_VIEWPORT) { 1652 varying = VARYING_SLOT_PSIZ; 1653 component_mask = 1 << 2; // SO_DECL_COMPMASK_Z 1654 } else if (varying == VARYING_SLOT_PSIZ) { 1655 component_mask = 1 << 3; // SO_DECL_COMPMASK_W 1656 } 1657 1658 next_offset[buffer] = output->offset + 1659 __builtin_popcount(component_mask) * 4; 1660 1661 const int slot = vue_map->varying_to_slot[varying]; 1662 if (slot < 0) { 1663 /* This can happen if the shader never writes to the varying. 1664 * Insert a hole instead of actual varying data. 1665 */ 1666 so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) { 1667 .HoleFlag = true, 1668 .OutputBufferSlot = buffer, 1669 .ComponentMask = component_mask, 1670 }; 1671 } else { 1672 so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) { 1673 .OutputBufferSlot = buffer, 1674 .RegisterIndex = slot, 1675 .ComponentMask = component_mask, 1676 }; 1677 } 1678 } 1679 1680 int max_decls = 0; 1681 for (unsigned s = 0; s < MAX_XFB_STREAMS; s++) 1682 max_decls = MAX2(max_decls, decls[s]); 1683 1684 uint8_t sbs[MAX_XFB_STREAMS] = { }; 1685 for (unsigned b = 0; b < MAX_XFB_BUFFERS; b++) { 1686 if (xfb_info->buffers_written & (1 << b)) 1687 sbs[xfb_info->buffer_to_stream[b]] |= 1 << b; 1688 } 1689 1690 uint32_t *dw = anv_batch_emitn(&pipeline->base.batch, 3 + 2 * max_decls, 1691 GENX(3DSTATE_SO_DECL_LIST), 1692 .StreamtoBufferSelects0 = sbs[0], 1693 .StreamtoBufferSelects1 = sbs[1], 1694 .StreamtoBufferSelects2 = sbs[2], 1695 .StreamtoBufferSelects3 = sbs[3], 1696 .NumEntries0 = decls[0], 1697 .NumEntries1 = decls[1], 1698 .NumEntries2 = decls[2], 1699 .NumEntries3 = decls[3]); 1700 1701 for (int i = 0; i < max_decls; i++) { 1702 GENX(SO_DECL_ENTRY_pack)(NULL, dw + 3 + i * 2, 1703 &(struct GENX(SO_DECL_ENTRY)) { 1704 .Stream0Decl = so_decl[0][i], 1705 .Stream1Decl = so_decl[1][i], 1706 .Stream2Decl = so_decl[2][i], 1707 .Stream3Decl = so_decl[3][i], 1708 }); 1709 } 1710 } 1711} 1712 1713static uint32_t 1714get_sampler_count(const struct anv_shader_bin *bin) 1715{ 1716 uint32_t count_by_4 = DIV_ROUND_UP(bin->bind_map.sampler_count, 4); 1717 1718 /* We can potentially have way more than 32 samplers and that's ok. 1719 * However, the 3DSTATE_XS packets only have 3 bits to specify how 1720 * many to pre-fetch and all values above 4 are marked reserved. 1721 */ 1722 return MIN2(count_by_4, 4); 1723} 1724 1725static UNUSED struct anv_address 1726get_scratch_address(struct anv_pipeline *pipeline, 1727 gl_shader_stage stage, 1728 const struct anv_shader_bin *bin) 1729{ 1730 return (struct anv_address) { 1731 .bo = anv_scratch_pool_alloc(pipeline->device, 1732 &pipeline->device->scratch_pool, 1733 stage, bin->prog_data->total_scratch), 1734 .offset = 0, 1735 }; 1736} 1737 1738static UNUSED uint32_t 1739get_scratch_space(const struct anv_shader_bin *bin) 1740{ 1741 return ffs(bin->prog_data->total_scratch / 2048); 1742} 1743 1744static UNUSED uint32_t 1745get_scratch_surf(struct anv_pipeline *pipeline, 1746 gl_shader_stage stage, 1747 const struct anv_shader_bin *bin) 1748{ 1749 if (bin->prog_data->total_scratch == 0) 1750 return 0; 1751 1752 struct anv_bo *bo = 1753 anv_scratch_pool_alloc(pipeline->device, 1754 &pipeline->device->scratch_pool, 1755 stage, bin->prog_data->total_scratch); 1756 anv_reloc_list_add_bo(pipeline->batch.relocs, 1757 pipeline->batch.alloc, bo); 1758 return anv_scratch_pool_get_surf(pipeline->device, 1759 &pipeline->device->scratch_pool, 1760 bin->prog_data->total_scratch) >> 4; 1761} 1762 1763static void 1764emit_3dstate_vs(struct anv_graphics_pipeline *pipeline) 1765{ 1766 const struct intel_device_info *devinfo = &pipeline->base.device->info; 1767 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); 1768 const struct anv_shader_bin *vs_bin = 1769 pipeline->shaders[MESA_SHADER_VERTEX]; 1770 1771 assert(anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX)); 1772 1773 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VS), vs) { 1774 vs.Enable = true; 1775 vs.StatisticsEnable = true; 1776 vs.KernelStartPointer = vs_bin->kernel.offset; 1777#if GFX_VER >= 8 1778 vs.SIMD8DispatchEnable = 1779 vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8; 1780#endif 1781 1782 assert(!vs_prog_data->base.base.use_alt_mode); 1783#if GFX_VER < 11 1784 vs.SingleVertexDispatch = false; 1785#endif 1786 vs.VectorMaskEnable = false; 1787 /* Wa_1606682166: 1788 * Incorrect TDL's SSP address shift in SARB for 16:6 & 18:8 modes. 1789 * Disable the Sampler state prefetch functionality in the SARB by 1790 * programming 0xB000[30] to '1'. 1791 */ 1792 vs.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(vs_bin); 1793 vs.BindingTableEntryCount = vs_bin->bind_map.surface_count; 1794 vs.FloatingPointMode = IEEE754; 1795 vs.IllegalOpcodeExceptionEnable = false; 1796 vs.SoftwareExceptionEnable = false; 1797 vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1; 1798 1799 if (GFX_VER == 9 && devinfo->gt == 4 && 1800 anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) { 1801 /* On Sky Lake GT4, we have experienced some hangs related to the VS 1802 * cache and tessellation. It is unknown exactly what is happening 1803 * but the Haswell docs for the "VS Reference Count Full Force Miss 1804 * Enable" field of the "Thread Mode" register refer to a HSW bug in 1805 * which the VUE handle reference count would overflow resulting in 1806 * internal reference counting bugs. My (Jason's) best guess is that 1807 * this bug cropped back up on SKL GT4 when we suddenly had more 1808 * threads in play than any previous gfx9 hardware. 1809 * 1810 * What we do know for sure is that setting this bit when 1811 * tessellation shaders are in use fixes a GPU hang in Batman: Arkham 1812 * City when playing with DXVK (https://bugs.freedesktop.org/107280). 1813 * Disabling the vertex cache with tessellation shaders should only 1814 * have a minor performance impact as the tessellation shaders are 1815 * likely generating and processing far more geometry than the vertex 1816 * stage. 1817 */ 1818 vs.VertexCacheDisable = true; 1819 } 1820 1821 vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length; 1822 vs.VertexURBEntryReadOffset = 0; 1823 vs.DispatchGRFStartRegisterForURBData = 1824 vs_prog_data->base.base.dispatch_grf_start_reg; 1825 1826#if GFX_VER >= 8 1827 vs.UserClipDistanceClipTestEnableBitmask = 1828 vs_prog_data->base.clip_distance_mask; 1829 vs.UserClipDistanceCullTestEnableBitmask = 1830 vs_prog_data->base.cull_distance_mask; 1831#endif 1832 1833#if GFX_VERx10 >= 125 1834 vs.ScratchSpaceBuffer = 1835 get_scratch_surf(&pipeline->base, MESA_SHADER_VERTEX, vs_bin); 1836#else 1837 vs.PerThreadScratchSpace = get_scratch_space(vs_bin); 1838 vs.ScratchSpaceBasePointer = 1839 get_scratch_address(&pipeline->base, MESA_SHADER_VERTEX, vs_bin); 1840#endif 1841 } 1842} 1843 1844static void 1845emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline, 1846 const VkPipelineTessellationStateCreateInfo *tess_info) 1847{ 1848 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) { 1849 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_HS), hs); 1850 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TE), te); 1851 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_DS), ds); 1852 return; 1853 } 1854 1855 const struct intel_device_info *devinfo = &pipeline->base.device->info; 1856 const struct anv_shader_bin *tcs_bin = 1857 pipeline->shaders[MESA_SHADER_TESS_CTRL]; 1858 const struct anv_shader_bin *tes_bin = 1859 pipeline->shaders[MESA_SHADER_TESS_EVAL]; 1860 1861 const struct brw_tcs_prog_data *tcs_prog_data = get_tcs_prog_data(pipeline); 1862 const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline); 1863 1864 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_HS), hs) { 1865 hs.Enable = true; 1866 hs.StatisticsEnable = true; 1867 hs.KernelStartPointer = tcs_bin->kernel.offset; 1868 /* Wa_1606682166 */ 1869 hs.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(tcs_bin); 1870 hs.BindingTableEntryCount = tcs_bin->bind_map.surface_count; 1871 1872#if GFX_VER >= 12 1873 /* Wa_1604578095: 1874 * 1875 * Hang occurs when the number of max threads is less than 2 times 1876 * the number of instance count. The number of max threads must be 1877 * more than 2 times the number of instance count. 1878 */ 1879 assert((devinfo->max_tcs_threads / 2) > tcs_prog_data->instances); 1880#endif 1881 1882 hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; 1883 hs.IncludeVertexHandles = true; 1884 hs.InstanceCount = tcs_prog_data->instances - 1; 1885 1886 hs.VertexURBEntryReadLength = 0; 1887 hs.VertexURBEntryReadOffset = 0; 1888 hs.DispatchGRFStartRegisterForURBData = 1889 tcs_prog_data->base.base.dispatch_grf_start_reg & 0x1f; 1890#if GFX_VER >= 12 1891 hs.DispatchGRFStartRegisterForURBData5 = 1892 tcs_prog_data->base.base.dispatch_grf_start_reg >> 5; 1893#endif 1894 1895#if GFX_VERx10 >= 125 1896 hs.ScratchSpaceBuffer = 1897 get_scratch_surf(&pipeline->base, MESA_SHADER_TESS_CTRL, tcs_bin); 1898#else 1899 hs.PerThreadScratchSpace = get_scratch_space(tcs_bin); 1900 hs.ScratchSpaceBasePointer = 1901 get_scratch_address(&pipeline->base, MESA_SHADER_TESS_CTRL, tcs_bin); 1902#endif 1903 1904#if GFX_VER == 12 1905 /* Patch Count threshold specifies the maximum number of patches that 1906 * will be accumulated before a thread dispatch is forced. 1907 */ 1908 hs.PatchCountThreshold = tcs_prog_data->patch_count_threshold; 1909#endif 1910 1911#if GFX_VER >= 9 1912 hs.DispatchMode = tcs_prog_data->base.dispatch_mode; 1913 hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id; 1914#endif 1915 } 1916 1917 const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state = 1918 tess_info ? vk_find_struct_const(tess_info, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO) : NULL; 1919 1920 VkTessellationDomainOrigin uv_origin = 1921 domain_origin_state ? domain_origin_state->domainOrigin : 1922 VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT; 1923 1924 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TE), te) { 1925 te.Partitioning = tes_prog_data->partitioning; 1926 1927 if (uv_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) { 1928 te.OutputTopology = tes_prog_data->output_topology; 1929 } else { 1930 /* When the origin is upper-left, we have to flip the winding order */ 1931 if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) { 1932 te.OutputTopology = OUTPUT_TRI_CW; 1933 } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) { 1934 te.OutputTopology = OUTPUT_TRI_CCW; 1935 } else { 1936 te.OutputTopology = tes_prog_data->output_topology; 1937 } 1938 } 1939 1940 te.TEDomain = tes_prog_data->domain; 1941 te.TEEnable = true; 1942 te.MaximumTessellationFactorOdd = 63.0; 1943 te.MaximumTessellationFactorNotOdd = 64.0; 1944 } 1945 1946 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_DS), ds) { 1947 ds.Enable = true; 1948 ds.StatisticsEnable = true; 1949 ds.KernelStartPointer = tes_bin->kernel.offset; 1950 /* Wa_1606682166 */ 1951 ds.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(tes_bin); 1952 ds.BindingTableEntryCount = tes_bin->bind_map.surface_count; 1953 ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1; 1954 1955 ds.ComputeWCoordinateEnable = 1956 tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; 1957 1958 ds.PatchURBEntryReadLength = tes_prog_data->base.urb_read_length; 1959 ds.PatchURBEntryReadOffset = 0; 1960 ds.DispatchGRFStartRegisterForURBData = 1961 tes_prog_data->base.base.dispatch_grf_start_reg; 1962 1963#if GFX_VER >= 8 1964#if GFX_VER < 11 1965 ds.DispatchMode = 1966 tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ? 1967 DISPATCH_MODE_SIMD8_SINGLE_PATCH : 1968 DISPATCH_MODE_SIMD4X2; 1969#else 1970 assert(tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8); 1971 ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; 1972#endif 1973 1974 ds.UserClipDistanceClipTestEnableBitmask = 1975 tes_prog_data->base.clip_distance_mask; 1976 ds.UserClipDistanceCullTestEnableBitmask = 1977 tes_prog_data->base.cull_distance_mask; 1978#endif 1979 1980#if GFX_VERx10 >= 125 1981 ds.ScratchSpaceBuffer = 1982 get_scratch_surf(&pipeline->base, MESA_SHADER_TESS_EVAL, tes_bin); 1983#else 1984 ds.PerThreadScratchSpace = get_scratch_space(tes_bin); 1985 ds.ScratchSpaceBasePointer = 1986 get_scratch_address(&pipeline->base, MESA_SHADER_TESS_EVAL, tes_bin); 1987#endif 1988 } 1989} 1990 1991static void 1992emit_3dstate_gs(struct anv_graphics_pipeline *pipeline) 1993{ 1994 const struct intel_device_info *devinfo = &pipeline->base.device->info; 1995 const struct anv_shader_bin *gs_bin = 1996 pipeline->shaders[MESA_SHADER_GEOMETRY]; 1997 1998 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { 1999 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_GS), gs); 2000 return; 2001 } 2002 2003 const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline); 2004 2005 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_GS), gs) { 2006 gs.Enable = true; 2007 gs.StatisticsEnable = true; 2008 gs.KernelStartPointer = gs_bin->kernel.offset; 2009 gs.DispatchMode = gs_prog_data->base.dispatch_mode; 2010 2011 gs.SingleProgramFlow = false; 2012 gs.VectorMaskEnable = false; 2013 /* Wa_1606682166 */ 2014 gs.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(gs_bin); 2015 gs.BindingTableEntryCount = gs_bin->bind_map.surface_count; 2016 gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles; 2017 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id; 2018 2019 if (GFX_VER == 8) { 2020 /* Broadwell is weird. It needs us to divide by 2. */ 2021 gs.MaximumNumberofThreads = devinfo->max_gs_threads / 2 - 1; 2022 } else { 2023 gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1; 2024 } 2025 2026 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1; 2027 gs.OutputTopology = gs_prog_data->output_topology; 2028 gs.ControlDataFormat = gs_prog_data->control_data_format; 2029 gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords; 2030 gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1; 2031 gs.ReorderMode = TRAILING; 2032 2033#if GFX_VER >= 8 2034 gs.ExpectedVertexCount = gs_prog_data->vertices_in; 2035 gs.StaticOutput = gs_prog_data->static_vertex_count >= 0; 2036 gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count >= 0 ? 2037 gs_prog_data->static_vertex_count : 0; 2038#endif 2039 2040 gs.VertexURBEntryReadOffset = 0; 2041 gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length; 2042 gs.DispatchGRFStartRegisterForURBData = 2043 gs_prog_data->base.base.dispatch_grf_start_reg; 2044 2045#if GFX_VER >= 8 2046 gs.UserClipDistanceClipTestEnableBitmask = 2047 gs_prog_data->base.clip_distance_mask; 2048 gs.UserClipDistanceCullTestEnableBitmask = 2049 gs_prog_data->base.cull_distance_mask; 2050#endif 2051 2052#if GFX_VERx10 >= 125 2053 gs.ScratchSpaceBuffer = 2054 get_scratch_surf(&pipeline->base, MESA_SHADER_GEOMETRY, gs_bin); 2055#else 2056 gs.PerThreadScratchSpace = get_scratch_space(gs_bin); 2057 gs.ScratchSpaceBasePointer = 2058 get_scratch_address(&pipeline->base, MESA_SHADER_GEOMETRY, gs_bin); 2059#endif 2060 } 2061} 2062 2063static bool 2064has_color_buffer_write_enabled(const struct anv_graphics_pipeline *pipeline, 2065 const VkPipelineColorBlendStateCreateInfo *blend) 2066{ 2067 const struct anv_shader_bin *shader_bin = 2068 pipeline->shaders[MESA_SHADER_FRAGMENT]; 2069 if (!shader_bin) 2070 return false; 2071 2072 if (!pipeline->dynamic_state.color_writes) 2073 return false; 2074 2075 const struct anv_pipeline_bind_map *bind_map = &shader_bin->bind_map; 2076 for (int i = 0; i < bind_map->surface_count; i++) { 2077 struct anv_pipeline_binding *binding = &bind_map->surface_to_descriptor[i]; 2078 2079 if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) 2080 continue; 2081 2082 if (binding->index == UINT32_MAX) 2083 continue; 2084 2085 if (blend && blend->pAttachments[binding->index].colorWriteMask != 0) 2086 return true; 2087 } 2088 2089 return false; 2090} 2091 2092static void 2093emit_3dstate_wm(struct anv_graphics_pipeline *pipeline, struct anv_subpass *subpass, 2094 const VkPipelineInputAssemblyStateCreateInfo *ia, 2095 const VkPipelineRasterizationStateCreateInfo *raster, 2096 const VkPipelineColorBlendStateCreateInfo *blend, 2097 const VkPipelineMultisampleStateCreateInfo *multisample, 2098 const VkPipelineRasterizationLineStateCreateInfoEXT *line, 2099 const uint32_t dynamic_states) 2100{ 2101 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 2102 2103 struct GENX(3DSTATE_WM) wm = { 2104 GENX(3DSTATE_WM_header), 2105 }; 2106 wm.StatisticsEnable = true; 2107 wm.LineEndCapAntialiasingRegionWidth = _05pixels; 2108 wm.LineAntialiasingRegionWidth = _10pixels; 2109 wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT; 2110 2111 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 2112 if (wm_prog_data->early_fragment_tests) { 2113 wm.EarlyDepthStencilControl = EDSC_PREPS; 2114 } else if (wm_prog_data->has_side_effects) { 2115 wm.EarlyDepthStencilControl = EDSC_PSEXEC; 2116 } else { 2117 wm.EarlyDepthStencilControl = EDSC_NORMAL; 2118 } 2119 2120#if GFX_VER >= 8 2121 /* Gen8 hardware tries to compute ThreadDispatchEnable for us but 2122 * doesn't take into account KillPixels when no depth or stencil 2123 * writes are enabled. In order for occlusion queries to work 2124 * correctly with no attachments, we need to force-enable PS thread 2125 * dispatch. 2126 * 2127 * The BDW docs are pretty clear that that this bit isn't validated 2128 * and probably shouldn't be used in production: 2129 * 2130 * "This must always be set to Normal. This field should not be 2131 * tested for functional validation." 2132 * 2133 * Unfortunately, however, the other mechanism we have for doing this 2134 * is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW. 2135 * Given two bad options, we choose the one which works. 2136 */ 2137 pipeline->force_fragment_thread_dispatch = 2138 wm_prog_data->has_side_effects || 2139 wm_prog_data->uses_kill; 2140 2141 if (pipeline->force_fragment_thread_dispatch || 2142 !has_color_buffer_write_enabled(pipeline, blend)) { 2143 /* Only set this value in non dynamic mode. */ 2144 wm.ForceThreadDispatchEnable = 2145 !(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) ? ForceON : 0; 2146 } 2147#endif 2148 2149 wm.BarycentricInterpolationMode = 2150 wm_prog_data->barycentric_interp_modes; 2151 2152#if GFX_VER < 8 2153 wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; 2154 wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; 2155 wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; 2156 wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask; 2157 2158 /* If the subpass has a depth or stencil self-dependency, then we 2159 * need to force the hardware to do the depth/stencil write *after* 2160 * fragment shader execution. Otherwise, the writes may hit memory 2161 * before we get around to fetching from the input attachment and we 2162 * may get the depth or stencil value from the current draw rather 2163 * than the previous one. 2164 */ 2165 wm.PixelShaderKillsPixel = subpass->has_ds_self_dep || 2166 wm_prog_data->uses_kill; 2167 2168 pipeline->force_fragment_thread_dispatch = 2169 wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF || 2170 wm_prog_data->has_side_effects || 2171 wm.PixelShaderKillsPixel; 2172 2173 if (pipeline->force_fragment_thread_dispatch || 2174 has_color_buffer_write_enabled(pipeline, blend)) { 2175 /* Only set this value in non dynamic mode. */ 2176 wm.ThreadDispatchEnable = !(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE); 2177 } 2178 2179 if (multisample && multisample->rasterizationSamples > 1) { 2180 if (wm_prog_data->persample_dispatch) { 2181 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 2182 } else { 2183 wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL; 2184 } 2185 } else { 2186 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 2187 } 2188 2189 VkPolygonMode raster_mode = 2190 genX(raster_polygon_mode)(pipeline, ia->topology); 2191 2192 wm.MultisampleRasterizationMode = 2193 dynamic_states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY ? 0 : 2194 genX(ms_rasterization_mode)(pipeline, raster_mode); 2195#endif 2196 2197 wm.LineStippleEnable = line && line->stippledLineEnable; 2198 } 2199 2200 uint32_t dynamic_wm_states = ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE; 2201 2202#if GFX_VER < 8 2203 dynamic_wm_states |= ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY; 2204#endif 2205 2206 if (dynamic_states & dynamic_wm_states) { 2207 const struct intel_device_info *devinfo = &pipeline->base.device->info; 2208 uint32_t *dws = devinfo->ver >= 8 ? pipeline->gfx8.wm : pipeline->gfx7.wm; 2209 GENX(3DSTATE_WM_pack)(NULL, dws, &wm); 2210 } else { 2211 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_WM), _wm) 2212 _wm = wm; 2213 } 2214} 2215 2216static void 2217emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, 2218 const VkPipelineColorBlendStateCreateInfo *blend, 2219 const VkPipelineMultisampleStateCreateInfo *multisample) 2220{ 2221 UNUSED const struct intel_device_info *devinfo = 2222 &pipeline->base.device->info; 2223 const struct anv_shader_bin *fs_bin = 2224 pipeline->shaders[MESA_SHADER_FRAGMENT]; 2225 2226 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 2227 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) { 2228#if GFX_VER == 7 2229 /* Even if no fragments are ever dispatched, gfx7 hardware hangs if 2230 * we don't at least set the maximum number of threads. 2231 */ 2232 ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1; 2233#endif 2234 } 2235 return; 2236 } 2237 2238 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 2239 2240#if GFX_VER < 8 2241 /* The hardware wedges if you have this bit set but don't turn on any dual 2242 * source blend factors. 2243 */ 2244 bool dual_src_blend = false; 2245 if (wm_prog_data->dual_src_blend && blend) { 2246 for (uint32_t i = 0; i < blend->attachmentCount; i++) { 2247 const VkPipelineColorBlendAttachmentState *bstate = 2248 &blend->pAttachments[i]; 2249 2250 if (bstate->blendEnable && 2251 (is_dual_src_blend_factor(bstate->srcColorBlendFactor) || 2252 is_dual_src_blend_factor(bstate->dstColorBlendFactor) || 2253 is_dual_src_blend_factor(bstate->srcAlphaBlendFactor) || 2254 is_dual_src_blend_factor(bstate->dstAlphaBlendFactor))) { 2255 dual_src_blend = true; 2256 break; 2257 } 2258 } 2259 } 2260#endif 2261 2262 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) { 2263 ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; 2264 ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; 2265 ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; 2266 2267 /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable: 2268 * 2269 * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32 2270 * Dispatch must not be enabled for PER_PIXEL dispatch mode." 2271 * 2272 * Since 16x MSAA is first introduced on SKL, we don't need to apply 2273 * the workaround on any older hardware. 2274 */ 2275 if (GFX_VER >= 9 && !wm_prog_data->persample_dispatch && 2276 multisample && multisample->rasterizationSamples == 16) { 2277 assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable); 2278 ps._32PixelDispatchEnable = false; 2279 } 2280 2281 ps.KernelStartPointer0 = fs_bin->kernel.offset + 2282 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); 2283 ps.KernelStartPointer1 = fs_bin->kernel.offset + 2284 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1); 2285 ps.KernelStartPointer2 = fs_bin->kernel.offset + 2286 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); 2287 2288 ps.SingleProgramFlow = false; 2289 ps.VectorMaskEnable = GFX_VER >= 8; 2290 /* Wa_1606682166 */ 2291 ps.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(fs_bin); 2292 ps.BindingTableEntryCount = fs_bin->bind_map.surface_count; 2293 ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 || 2294 wm_prog_data->base.ubo_ranges[0].length; 2295 ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? 2296 POSOFFSET_SAMPLE: POSOFFSET_NONE; 2297#if GFX_VER < 8 2298 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; 2299 ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; 2300 ps.DualSourceBlendEnable = dual_src_blend; 2301#endif 2302 2303#if GFX_VERx10 == 75 2304 /* Haswell requires the sample mask to be set in this packet as well 2305 * as in 3DSTATE_SAMPLE_MASK; the values should match. 2306 */ 2307 ps.SampleMask = 0xff; 2308#endif 2309 2310#if GFX_VER >= 9 2311 ps.MaximumNumberofThreadsPerPSD = 64 - 1; 2312#elif GFX_VER >= 8 2313 ps.MaximumNumberofThreadsPerPSD = 64 - 2; 2314#else 2315 ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1; 2316#endif 2317 2318 ps.DispatchGRFStartRegisterForConstantSetupData0 = 2319 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); 2320 ps.DispatchGRFStartRegisterForConstantSetupData1 = 2321 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1); 2322 ps.DispatchGRFStartRegisterForConstantSetupData2 = 2323 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2); 2324 2325#if GFX_VERx10 >= 125 2326 ps.ScratchSpaceBuffer = 2327 get_scratch_surf(&pipeline->base, MESA_SHADER_FRAGMENT, fs_bin); 2328#else 2329 ps.PerThreadScratchSpace = get_scratch_space(fs_bin); 2330 ps.ScratchSpaceBasePointer = 2331 get_scratch_address(&pipeline->base, MESA_SHADER_FRAGMENT, fs_bin); 2332#endif 2333 } 2334} 2335 2336#if GFX_VER >= 8 2337static void 2338emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline, 2339 struct anv_subpass *subpass, 2340 const VkPipelineRasterizationStateCreateInfo *rs_info) 2341{ 2342 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 2343 2344 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 2345 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_EXTRA), ps); 2346 return; 2347 } 2348 2349 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_EXTRA), ps) { 2350 ps.PixelShaderValid = true; 2351 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; 2352 ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; 2353 ps.PixelShaderIsPerSample = wm_prog_data->persample_dispatch; 2354 ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; 2355 ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; 2356 ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; 2357 2358 /* If the subpass has a depth or stencil self-dependency, then we need 2359 * to force the hardware to do the depth/stencil write *after* fragment 2360 * shader execution. Otherwise, the writes may hit memory before we get 2361 * around to fetching from the input attachment and we may get the depth 2362 * or stencil value from the current draw rather than the previous one. 2363 */ 2364 ps.PixelShaderKillsPixel = subpass->has_ds_self_dep || 2365 wm_prog_data->uses_kill; 2366 2367#if GFX_VER >= 9 2368 ps.PixelShaderComputesStencil = wm_prog_data->computed_stencil; 2369 ps.PixelShaderPullsBary = wm_prog_data->pulls_bary; 2370 2371 ps.InputCoverageMaskState = ICMS_NONE; 2372 assert(!wm_prog_data->inner_coverage); /* Not available in SPIR-V */ 2373 if (!wm_prog_data->uses_sample_mask) 2374 ps.InputCoverageMaskState = ICMS_NONE; 2375 else if (wm_prog_data->per_coarse_pixel_dispatch) 2376 ps.InputCoverageMaskState = ICMS_NORMAL; 2377 else if (wm_prog_data->post_depth_coverage) 2378 ps.InputCoverageMaskState = ICMS_DEPTH_COVERAGE; 2379 else 2380 ps.InputCoverageMaskState = ICMS_NORMAL; 2381#else 2382 ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask; 2383#endif 2384 2385#if GFX_VER >= 11 2386 ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients = 2387 wm_prog_data->uses_depth_w_coefficients; 2388 ps.PixelShaderIsPerCoarsePixel = wm_prog_data->per_coarse_pixel_dispatch; 2389#endif 2390 } 2391} 2392 2393static void 2394emit_3dstate_vf_topology(struct anv_graphics_pipeline *pipeline) 2395{ 2396 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_TOPOLOGY), vft) { 2397 vft.PrimitiveTopologyType = pipeline->topology; 2398 } 2399} 2400#endif 2401 2402static void 2403emit_3dstate_vf_statistics(struct anv_graphics_pipeline *pipeline) 2404{ 2405 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_STATISTICS), vfs) { 2406 vfs.StatisticsEnable = true; 2407 } 2408} 2409 2410static void 2411compute_kill_pixel(struct anv_graphics_pipeline *pipeline, 2412 const VkPipelineMultisampleStateCreateInfo *ms_info, 2413 const struct anv_subpass *subpass) 2414{ 2415 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 2416 pipeline->kill_pixel = false; 2417 return; 2418 } 2419 2420 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 2421 2422 /* This computes the KillPixel portion of the computation for whether or 2423 * not we want to enable the PMA fix on gfx8 or gfx9. It's given by this 2424 * chunk of the giant formula: 2425 * 2426 * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 2427 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 2428 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 2429 * 3DSTATE_PS_BLEND::AlphaTestEnable || 2430 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) 2431 * 2432 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false and so is 2433 * 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a concept 2434 * of an alpha test. 2435 */ 2436 pipeline->kill_pixel = 2437 subpass->has_ds_self_dep || wm_prog_data->uses_kill || 2438 wm_prog_data->uses_omask || 2439 (ms_info && ms_info->alphaToCoverageEnable); 2440} 2441 2442#if GFX_VER == 12 2443static void 2444emit_3dstate_primitive_replication(struct anv_graphics_pipeline *pipeline) 2445{ 2446 if (!pipeline->use_primitive_replication) { 2447 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr); 2448 return; 2449 } 2450 2451 uint32_t view_mask = pipeline->subpass->view_mask; 2452 int view_count = util_bitcount(view_mask); 2453 assert(view_count > 1 && view_count <= MAX_VIEWS_FOR_PRIMITIVE_REPLICATION); 2454 2455 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr) { 2456 pr.ReplicaMask = (1 << view_count) - 1; 2457 pr.ReplicationCount = view_count - 1; 2458 2459 int i = 0; 2460 u_foreach_bit(view_index, view_mask) { 2461 pr.RTAIOffset[i] = view_index; 2462 i++; 2463 } 2464 } 2465} 2466#endif 2467 2468static VkResult 2469genX(graphics_pipeline_create)( 2470 VkDevice _device, 2471 struct anv_pipeline_cache * cache, 2472 const VkGraphicsPipelineCreateInfo* pCreateInfo, 2473 const VkAllocationCallbacks* pAllocator, 2474 VkPipeline* pPipeline) 2475{ 2476 ANV_FROM_HANDLE(anv_device, device, _device); 2477 ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); 2478 struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; 2479 struct anv_graphics_pipeline *pipeline; 2480 VkResult result; 2481 2482 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); 2483 2484 /* Use the default pipeline cache if none is specified */ 2485 if (cache == NULL && device->physical->instance->pipeline_cache_enabled) 2486 cache = &device->default_pipeline_cache; 2487 2488 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, 2489 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2490 if (pipeline == NULL) 2491 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2492 2493 result = anv_graphics_pipeline_init(pipeline, device, cache, 2494 pCreateInfo, pAllocator); 2495 if (result != VK_SUCCESS) { 2496 vk_free2(&device->vk.alloc, pAllocator, pipeline); 2497 if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT) 2498 *pPipeline = VK_NULL_HANDLE; 2499 return result; 2500 } 2501 2502 /* Information on which states are considered dynamic. */ 2503 const VkPipelineDynamicStateCreateInfo *dyn_info = 2504 pCreateInfo->pDynamicState; 2505 uint32_t dynamic_states = 0; 2506 if (dyn_info) { 2507 for (unsigned i = 0; i < dyn_info->dynamicStateCount; i++) 2508 dynamic_states |= 2509 anv_cmd_dirty_bit_for_vk_dynamic_state(dyn_info->pDynamicStates[i]); 2510 } 2511 2512 2513 /* If rasterization is not enabled, various CreateInfo structs must be 2514 * ignored. 2515 */ 2516 const bool raster_enabled = 2517 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable || 2518 (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE); 2519 2520 const VkPipelineViewportStateCreateInfo *vp_info = 2521 raster_enabled ? pCreateInfo->pViewportState : NULL; 2522 2523 const VkPipelineMultisampleStateCreateInfo *ms_info = 2524 raster_enabled ? pCreateInfo->pMultisampleState : NULL; 2525 2526 const VkPipelineDepthStencilStateCreateInfo *ds_info = 2527 raster_enabled ? pCreateInfo->pDepthStencilState : NULL; 2528 2529 const VkPipelineColorBlendStateCreateInfo *cb_info = 2530 raster_enabled ? pCreateInfo->pColorBlendState : NULL; 2531 2532 const VkPipelineRasterizationLineStateCreateInfoEXT *line_info = 2533 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, 2534 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); 2535 2536 enum intel_urb_deref_block_size urb_deref_block_size; 2537 emit_urb_setup(pipeline, &urb_deref_block_size); 2538 2539 assert(pCreateInfo->pRasterizationState); 2540 emit_rs_state(pipeline, pCreateInfo->pInputAssemblyState, 2541 pCreateInfo->pRasterizationState, 2542 ms_info, line_info, dynamic_states, pass, subpass, 2543 urb_deref_block_size); 2544 emit_ms_state(pipeline, ms_info, dynamic_states); 2545 emit_ds_state(pipeline, ds_info, dynamic_states, pass, subpass); 2546 emit_cb_state(pipeline, cb_info, ms_info, dynamic_states); 2547 compute_kill_pixel(pipeline, ms_info, subpass); 2548 2549 emit_3dstate_clip(pipeline, 2550 pCreateInfo->pInputAssemblyState, 2551 vp_info, 2552 pCreateInfo->pRasterizationState, 2553 dynamic_states); 2554 2555#if GFX_VER == 12 2556 emit_3dstate_primitive_replication(pipeline); 2557#endif 2558 2559#if 0 2560 /* From gfx7_vs_state.c */ 2561 2562 /** 2563 * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > 2564 * Geometry > Geometry Shader > State: 2565 * 2566 * "Note: Because of corruption in IVB:GT2, software needs to flush the 2567 * whole fixed function pipeline when the GS enable changes value in 2568 * the 3DSTATE_GS." 2569 * 2570 * The hardware architects have clarified that in this context "flush the 2571 * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS 2572 * Stall" bit set. 2573 */ 2574 if (!device->info.is_haswell && !device->info.is_baytrail) 2575 gfx7_emit_vs_workaround_flush(brw); 2576#endif 2577 2578 if (anv_pipeline_is_primitive(pipeline)) { 2579 assert(pCreateInfo->pVertexInputState); 2580 emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); 2581 2582 emit_3dstate_vs(pipeline); 2583 emit_3dstate_hs_te_ds(pipeline, pCreateInfo->pTessellationState); 2584 emit_3dstate_gs(pipeline); 2585 2586#if GFX_VER >= 8 2587 if (!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)) 2588 emit_3dstate_vf_topology(pipeline); 2589#endif 2590 2591 emit_3dstate_vf_statistics(pipeline); 2592 2593 emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState, 2594 dynamic_states); 2595 } 2596 2597 emit_3dstate_sbe(pipeline); 2598 emit_3dstate_wm(pipeline, subpass, 2599 pCreateInfo->pInputAssemblyState, 2600 pCreateInfo->pRasterizationState, 2601 cb_info, ms_info, line_info, dynamic_states); 2602 emit_3dstate_ps(pipeline, cb_info, ms_info); 2603#if GFX_VER >= 8 2604 emit_3dstate_ps_extra(pipeline, subpass, 2605 pCreateInfo->pRasterizationState); 2606#endif 2607 2608 *pPipeline = anv_pipeline_to_handle(&pipeline->base); 2609 2610 return pipeline->base.batch.status; 2611} 2612 2613#if GFX_VERx10 >= 125 2614 2615static void 2616emit_compute_state(struct anv_compute_pipeline *pipeline, 2617 const struct anv_device *device) 2618{ 2619 const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); 2620 anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0); 2621 2622 const UNUSED struct anv_shader_bin *cs_bin = pipeline->cs; 2623 const struct intel_device_info *devinfo = &device->info; 2624 2625 anv_batch_emit(&pipeline->base.batch, GENX(CFE_STATE), cfe) { 2626 cfe.MaximumNumberofThreads = 2627 devinfo->max_cs_threads * devinfo->subslice_total - 1; 2628 cfe.ScratchSpaceBuffer = 2629 get_scratch_surf(&pipeline->base, MESA_SHADER_COMPUTE, cs_bin); 2630 } 2631} 2632 2633#else /* #if GFX_VERx10 >= 125 */ 2634 2635static void 2636emit_compute_state(struct anv_compute_pipeline *pipeline, 2637 const struct anv_device *device) 2638{ 2639 const struct intel_device_info *devinfo = &device->info; 2640 const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); 2641 2642 anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0); 2643 2644 const struct brw_cs_dispatch_info dispatch = 2645 brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL); 2646 const uint32_t vfe_curbe_allocation = 2647 ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads + 2648 cs_prog_data->push.cross_thread.regs, 2); 2649 2650 const struct anv_shader_bin *cs_bin = pipeline->cs; 2651 2652 anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) { 2653#if GFX_VER > 7 2654 vfe.StackSize = 0; 2655#else 2656 vfe.GPGPUMode = true; 2657#endif 2658 vfe.MaximumNumberofThreads = 2659 devinfo->max_cs_threads * devinfo->subslice_total - 1; 2660 vfe.NumberofURBEntries = GFX_VER <= 7 ? 0 : 2; 2661#if GFX_VER < 11 2662 vfe.ResetGatewayTimer = true; 2663#endif 2664#if GFX_VER <= 8 2665 vfe.BypassGatewayControl = true; 2666#endif 2667 vfe.URBEntryAllocationSize = GFX_VER <= 7 ? 0 : 2; 2668 vfe.CURBEAllocationSize = vfe_curbe_allocation; 2669 2670 if (cs_bin->prog_data->total_scratch) { 2671 if (GFX_VER >= 8) { 2672 /* Broadwell's Per Thread Scratch Space is in the range [0, 11] 2673 * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M. 2674 */ 2675 vfe.PerThreadScratchSpace = 2676 ffs(cs_bin->prog_data->total_scratch) - 11; 2677 } else if (GFX_VERx10 == 75) { 2678 /* Haswell's Per Thread Scratch Space is in the range [0, 10] 2679 * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M. 2680 */ 2681 vfe.PerThreadScratchSpace = 2682 ffs(cs_bin->prog_data->total_scratch) - 12; 2683 } else { 2684 /* IVB and BYT use the range [0, 11] to mean [1kB, 12kB] 2685 * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB. 2686 */ 2687 vfe.PerThreadScratchSpace = 2688 cs_bin->prog_data->total_scratch / 1024 - 1; 2689 } 2690 vfe.ScratchSpaceBasePointer = 2691 get_scratch_address(&pipeline->base, MESA_SHADER_COMPUTE, cs_bin); 2692 } 2693 } 2694 2695 struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = { 2696 .KernelStartPointer = 2697 cs_bin->kernel.offset + 2698 brw_cs_prog_data_prog_offset(cs_prog_data, dispatch.simd_size), 2699 2700 /* Wa_1606682166 */ 2701 .SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(cs_bin), 2702 /* We add 1 because the CS indirect parameters buffer isn't accounted 2703 * for in bind_map.surface_count. 2704 */ 2705 .BindingTableEntryCount = 1 + MIN2(cs_bin->bind_map.surface_count, 30), 2706 .BarrierEnable = cs_prog_data->uses_barrier, 2707 .SharedLocalMemorySize = 2708 encode_slm_size(GFX_VER, cs_prog_data->base.total_shared), 2709 2710#if GFX_VERx10 != 75 2711 .ConstantURBEntryReadOffset = 0, 2712#endif 2713 .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs, 2714#if GFX_VERx10 >= 75 2715 .CrossThreadConstantDataReadLength = 2716 cs_prog_data->push.cross_thread.regs, 2717#endif 2718#if GFX_VER >= 12 2719 /* TODO: Check if we are missing workarounds and enable mid-thread 2720 * preemption. 2721 * 2722 * We still have issues with mid-thread preemption (it was already 2723 * disabled by the kernel on gfx11, due to missing workarounds). It's 2724 * possible that we are just missing some workarounds, and could enable 2725 * it later, but for now let's disable it to fix a GPU in compute in Car 2726 * Chase (and possibly more). 2727 */ 2728 .ThreadPreemptionDisable = true, 2729#endif 2730 2731 .NumberofThreadsinGPGPUThreadGroup = dispatch.threads, 2732 }; 2733 GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, 2734 pipeline->interface_descriptor_data, 2735 &desc); 2736} 2737 2738#endif /* #if GFX_VERx10 >= 125 */ 2739 2740static VkResult 2741compute_pipeline_create( 2742 VkDevice _device, 2743 struct anv_pipeline_cache * cache, 2744 const VkComputePipelineCreateInfo* pCreateInfo, 2745 const VkAllocationCallbacks* pAllocator, 2746 VkPipeline* pPipeline) 2747{ 2748 ANV_FROM_HANDLE(anv_device, device, _device); 2749 struct anv_compute_pipeline *pipeline; 2750 VkResult result; 2751 2752 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); 2753 2754 /* Use the default pipeline cache if none is specified */ 2755 if (cache == NULL && device->physical->instance->pipeline_cache_enabled) 2756 cache = &device->default_pipeline_cache; 2757 2758 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, 2759 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2760 if (pipeline == NULL) 2761 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2762 2763 result = anv_pipeline_init(&pipeline->base, device, 2764 ANV_PIPELINE_COMPUTE, pCreateInfo->flags, 2765 pAllocator); 2766 if (result != VK_SUCCESS) { 2767 vk_free2(&device->vk.alloc, pAllocator, pipeline); 2768 return result; 2769 } 2770 2771 anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS, 2772 pipeline->batch_data, sizeof(pipeline->batch_data)); 2773 2774 assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); 2775 VK_FROM_HANDLE(vk_shader_module, module, pCreateInfo->stage.module); 2776 result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, 2777 pCreateInfo->stage.pName, 2778 pCreateInfo->stage.pSpecializationInfo); 2779 if (result != VK_SUCCESS) { 2780 anv_pipeline_finish(&pipeline->base, device, pAllocator); 2781 vk_free2(&device->vk.alloc, pAllocator, pipeline); 2782 if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT) 2783 *pPipeline = VK_NULL_HANDLE; 2784 return result; 2785 } 2786 2787 emit_compute_state(pipeline, device); 2788 2789 *pPipeline = anv_pipeline_to_handle(&pipeline->base); 2790 2791 return pipeline->base.batch.status; 2792} 2793 2794VkResult genX(CreateGraphicsPipelines)( 2795 VkDevice _device, 2796 VkPipelineCache pipelineCache, 2797 uint32_t count, 2798 const VkGraphicsPipelineCreateInfo* pCreateInfos, 2799 const VkAllocationCallbacks* pAllocator, 2800 VkPipeline* pPipelines) 2801{ 2802 ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache); 2803 2804 VkResult result = VK_SUCCESS; 2805 2806 unsigned i; 2807 for (i = 0; i < count; i++) { 2808 VkResult res = genX(graphics_pipeline_create)(_device, 2809 pipeline_cache, 2810 &pCreateInfos[i], 2811 pAllocator, &pPipelines[i]); 2812 2813 if (res == VK_SUCCESS) 2814 continue; 2815 2816 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it 2817 * is not obvious what error should be report upon 2 different failures. 2818 * */ 2819 result = res; 2820 if (res != VK_PIPELINE_COMPILE_REQUIRED_EXT) 2821 break; 2822 2823 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) 2824 break; 2825 } 2826 2827 for (; i < count; i++) 2828 pPipelines[i] = VK_NULL_HANDLE; 2829 2830 return result; 2831} 2832 2833VkResult genX(CreateComputePipelines)( 2834 VkDevice _device, 2835 VkPipelineCache pipelineCache, 2836 uint32_t count, 2837 const VkComputePipelineCreateInfo* pCreateInfos, 2838 const VkAllocationCallbacks* pAllocator, 2839 VkPipeline* pPipelines) 2840{ 2841 ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache); 2842 2843 VkResult result = VK_SUCCESS; 2844 2845 unsigned i; 2846 for (i = 0; i < count; i++) { 2847 VkResult res = compute_pipeline_create(_device, pipeline_cache, 2848 &pCreateInfos[i], 2849 pAllocator, &pPipelines[i]); 2850 2851 if (res == VK_SUCCESS) 2852 continue; 2853 2854 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it 2855 * is not obvious what error should be report upon 2 different failures. 2856 * */ 2857 result = res; 2858 if (res != VK_PIPELINE_COMPILE_REQUIRED_EXT) 2859 break; 2860 2861 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) 2862 break; 2863 } 2864 2865 for (; i < count; i++) 2866 pPipelines[i] = VK_NULL_HANDLE; 2867 2868 return result; 2869} 2870 2871#if GFX_VERx10 >= 125 2872 2873static void 2874assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo, 2875 uint32_t stage_idx, 2876 VkShaderStageFlags valid_stages) 2877{ 2878 if (stage_idx == VK_SHADER_UNUSED_KHR) 2879 return; 2880 2881 assert(stage_idx <= pCreateInfo->stageCount); 2882 assert(util_bitcount(pCreateInfo->pStages[stage_idx].stage) == 1); 2883 assert(pCreateInfo->pStages[stage_idx].stage & valid_stages); 2884} 2885 2886static VkResult 2887ray_tracing_pipeline_create( 2888 VkDevice _device, 2889 struct anv_pipeline_cache * cache, 2890 const VkRayTracingPipelineCreateInfoKHR* pCreateInfo, 2891 const VkAllocationCallbacks* pAllocator, 2892 VkPipeline* pPipeline) 2893{ 2894 ANV_FROM_HANDLE(anv_device, device, _device); 2895 VkResult result; 2896 2897 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR); 2898 2899 /* Use the default pipeline cache if none is specified */ 2900 if (cache == NULL && device->physical->instance->pipeline_cache_enabled) 2901 cache = &device->default_pipeline_cache; 2902 2903 VK_MULTIALLOC(ma); 2904 VK_MULTIALLOC_DECL(&ma, struct anv_ray_tracing_pipeline, pipeline, 1); 2905 VK_MULTIALLOC_DECL(&ma, struct anv_rt_shader_group, groups, pCreateInfo->groupCount); 2906 if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, 2907 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) 2908 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2909 2910 result = anv_pipeline_init(&pipeline->base, device, 2911 ANV_PIPELINE_RAY_TRACING, pCreateInfo->flags, 2912 pAllocator); 2913 if (result != VK_SUCCESS) { 2914 vk_free2(&device->vk.alloc, pAllocator, pipeline); 2915 return result; 2916 } 2917 2918 pipeline->group_count = pCreateInfo->groupCount; 2919 pipeline->groups = groups; 2920 2921 ASSERTED const VkShaderStageFlags ray_tracing_stages = 2922 VK_SHADER_STAGE_RAYGEN_BIT_KHR | 2923 VK_SHADER_STAGE_ANY_HIT_BIT_KHR | 2924 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | 2925 VK_SHADER_STAGE_MISS_BIT_KHR | 2926 VK_SHADER_STAGE_INTERSECTION_BIT_KHR | 2927 VK_SHADER_STAGE_CALLABLE_BIT_KHR; 2928 2929 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) 2930 assert((pCreateInfo->pStages[i].stage & ~ray_tracing_stages) == 0); 2931 2932 for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) { 2933 const VkRayTracingShaderGroupCreateInfoKHR *ginfo = 2934 &pCreateInfo->pGroups[i]; 2935 assert_rt_stage_index_valid(pCreateInfo, ginfo->generalShader, 2936 VK_SHADER_STAGE_RAYGEN_BIT_KHR | 2937 VK_SHADER_STAGE_MISS_BIT_KHR | 2938 VK_SHADER_STAGE_CALLABLE_BIT_KHR); 2939 assert_rt_stage_index_valid(pCreateInfo, ginfo->closestHitShader, 2940 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR); 2941 assert_rt_stage_index_valid(pCreateInfo, ginfo->anyHitShader, 2942 VK_SHADER_STAGE_ANY_HIT_BIT_KHR); 2943 assert_rt_stage_index_valid(pCreateInfo, ginfo->intersectionShader, 2944 VK_SHADER_STAGE_INTERSECTION_BIT_KHR); 2945 switch (ginfo->type) { 2946 case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: 2947 assert(ginfo->generalShader < pCreateInfo->stageCount); 2948 assert(ginfo->anyHitShader == VK_SHADER_UNUSED_KHR); 2949 assert(ginfo->closestHitShader == VK_SHADER_UNUSED_KHR); 2950 assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR); 2951 break; 2952 2953 case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: 2954 assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR); 2955 assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR); 2956 break; 2957 2958 case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: 2959 assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR); 2960 break; 2961 2962 default: 2963 unreachable("Invalid ray-tracing shader group type"); 2964 } 2965 } 2966 2967 result = anv_ray_tracing_pipeline_init(pipeline, device, cache, 2968 pCreateInfo, pAllocator); 2969 if (result != VK_SUCCESS) { 2970 anv_pipeline_finish(&pipeline->base, device, pAllocator); 2971 vk_free2(&device->vk.alloc, pAllocator, pipeline); 2972 return result; 2973 } 2974 2975 for (uint32_t i = 0; i < pipeline->group_count; i++) { 2976 struct anv_rt_shader_group *group = &pipeline->groups[i]; 2977 2978 switch (group->type) { 2979 case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: { 2980 struct GFX_RT_GENERAL_SBT_HANDLE sh = {}; 2981 sh.General = anv_shader_bin_get_bsr(group->general, 32); 2982 GFX_RT_GENERAL_SBT_HANDLE_pack(NULL, group->handle, &sh); 2983 break; 2984 } 2985 2986 case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: { 2987 struct GFX_RT_TRIANGLES_SBT_HANDLE sh = {}; 2988 if (group->closest_hit) 2989 sh.ClosestHit = anv_shader_bin_get_bsr(group->closest_hit, 32); 2990 if (group->any_hit) 2991 sh.AnyHit = anv_shader_bin_get_bsr(group->any_hit, 24); 2992 GFX_RT_TRIANGLES_SBT_HANDLE_pack(NULL, group->handle, &sh); 2993 break; 2994 } 2995 2996 case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: { 2997 struct GFX_RT_PROCEDURAL_SBT_HANDLE sh = {}; 2998 if (group->closest_hit) 2999 sh.ClosestHit = anv_shader_bin_get_bsr(group->closest_hit, 32); 3000 sh.Intersection = anv_shader_bin_get_bsr(group->intersection, 24); 3001 GFX_RT_PROCEDURAL_SBT_HANDLE_pack(NULL, group->handle, &sh); 3002 break; 3003 } 3004 3005 default: 3006 unreachable("Invalid shader group type"); 3007 } 3008 } 3009 3010 *pPipeline = anv_pipeline_to_handle(&pipeline->base); 3011 3012 return pipeline->base.batch.status; 3013} 3014 3015VkResult 3016genX(CreateRayTracingPipelinesKHR)( 3017 VkDevice _device, 3018 VkDeferredOperationKHR deferredOperation, 3019 VkPipelineCache pipelineCache, 3020 uint32_t createInfoCount, 3021 const VkRayTracingPipelineCreateInfoKHR* pCreateInfos, 3022 const VkAllocationCallbacks* pAllocator, 3023 VkPipeline* pPipelines) 3024{ 3025 ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache); 3026 3027 VkResult result = VK_SUCCESS; 3028 3029 unsigned i; 3030 for (i = 0; i < createInfoCount; i++) { 3031 VkResult res = ray_tracing_pipeline_create(_device, pipeline_cache, 3032 &pCreateInfos[i], 3033 pAllocator, &pPipelines[i]); 3034 3035 if (res == VK_SUCCESS) 3036 continue; 3037 3038 /* Bail out on the first error as it is not obvious what error should be 3039 * report upon 2 different failures. */ 3040 result = res; 3041 if (result != VK_PIPELINE_COMPILE_REQUIRED_EXT) 3042 break; 3043 3044 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) 3045 break; 3046 } 3047 3048 for (; i < createInfoCount; i++) 3049 pPipelines[i] = VK_NULL_HANDLE; 3050 3051 return result; 3052} 3053#endif /* GFX_VERx10 >= 125 */ 3054