1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdbool.h> 26#include <string.h> 27#include <unistd.h> 28#include <fcntl.h> 29 30#include "anv_private.h" 31 32#include "genxml/gen_macros.h" 33#include "genxml/genX_pack.h" 34#include "common/intel_guardband.h" 35 36#if GFX_VER == 8 37void 38gfx8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) 39{ 40 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; 41 uint32_t count = cmd_buffer->state.gfx.dynamic.viewport.count; 42 const VkViewport *viewports = 43 cmd_buffer->state.gfx.dynamic.viewport.viewports; 44 struct anv_state sf_clip_state = 45 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); 46 47 for (uint32_t i = 0; i < count; i++) { 48 const VkViewport *vp = &viewports[i]; 49 50 /* The gfx7 state struct has just the matrix and guardband fields, the 51 * gfx8 struct adds the min/max viewport fields. */ 52 struct GENX(SF_CLIP_VIEWPORT) sfv = { 53 .ViewportMatrixElementm00 = vp->width / 2, 54 .ViewportMatrixElementm11 = vp->height / 2, 55 .ViewportMatrixElementm22 = vp->maxDepth - vp->minDepth, 56 .ViewportMatrixElementm30 = vp->x + vp->width / 2, 57 .ViewportMatrixElementm31 = vp->y + vp->height / 2, 58 .ViewportMatrixElementm32 = vp->minDepth, 59 .XMinClipGuardband = -1.0f, 60 .XMaxClipGuardband = 1.0f, 61 .YMinClipGuardband = -1.0f, 62 .YMaxClipGuardband = 1.0f, 63 .XMinViewPort = vp->x, 64 .XMaxViewPort = vp->x + vp->width - 1, 65 .YMinViewPort = MIN2(vp->y, vp->y + vp->height), 66 .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1, 67 }; 68 69 if (fb) { 70 /* We can only calculate a "real" guardband clip if we know the 71 * framebuffer at the time we emit the packet. Otherwise, we have 72 * fall back to a worst-case guardband of [-1, 1]. 73 */ 74 intel_calculate_guardband_size(fb->width, fb->height, 75 sfv.ViewportMatrixElementm00, 76 sfv.ViewportMatrixElementm11, 77 sfv.ViewportMatrixElementm30, 78 sfv.ViewportMatrixElementm31, 79 &sfv.XMinClipGuardband, 80 &sfv.XMaxClipGuardband, 81 &sfv.YMinClipGuardband, 82 &sfv.YMaxClipGuardband); 83 } 84 85 GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv); 86 } 87 88 anv_batch_emit(&cmd_buffer->batch, 89 GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) { 90 clip.SFClipViewportPointer = sf_clip_state.offset; 91 } 92} 93 94void 95gfx8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer, 96 bool depth_clamp_enable) 97{ 98 uint32_t count = cmd_buffer->state.gfx.dynamic.viewport.count; 99 const VkViewport *viewports = 100 cmd_buffer->state.gfx.dynamic.viewport.viewports; 101 struct anv_state cc_state = 102 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); 103 104 for (uint32_t i = 0; i < count; i++) { 105 const VkViewport *vp = &viewports[i]; 106 107 /* From the Vulkan spec: 108 * 109 * "It is valid for minDepth to be greater than or equal to 110 * maxDepth." 111 */ 112 float min_depth = MIN2(vp->minDepth, vp->maxDepth); 113 float max_depth = MAX2(vp->minDepth, vp->maxDepth); 114 115 struct GENX(CC_VIEWPORT) cc_viewport = { 116 .MinimumDepth = depth_clamp_enable ? min_depth : 0.0f, 117 .MaximumDepth = depth_clamp_enable ? max_depth : 1.0f, 118 }; 119 120 GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); 121 } 122 123 anv_batch_emit(&cmd_buffer->batch, 124 GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) { 125 cc.CCViewportPointer = cc_state.offset; 126 } 127} 128#endif 129 130void 131genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) 132{ 133 if (cmd_buffer->state.pma_fix_enabled == enable) 134 return; 135 136 cmd_buffer->state.pma_fix_enabled = enable; 137 138 /* According to the Broadwell PIPE_CONTROL documentation, software should 139 * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set 140 * prior to the LRI. If stencil buffer writes are enabled, then a Render 141 * Cache Flush is also necessary. 142 * 143 * The Skylake docs say to use a depth stall rather than a command 144 * streamer stall. However, the hardware seems to violently disagree. 145 * A full command streamer stall seems to be needed in both cases. 146 */ 147 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { 148 pc.DepthCacheFlushEnable = true; 149 pc.CommandStreamerStallEnable = true; 150 pc.RenderTargetCacheFlushEnable = true; 151#if GFX_VER >= 12 152 pc.TileCacheFlushEnable = true; 153 154 /* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must 155 * be set with any PIPE_CONTROL with Depth Flush Enable bit set. 156 */ 157 pc.DepthStallEnable = true; 158#endif 159 } 160 161#if GFX_VER == 9 162 163 uint32_t cache_mode; 164 anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0), 165 .STCPMAOptimizationEnable = enable, 166 .STCPMAOptimizationEnableMask = true); 167 anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) { 168 lri.RegisterOffset = GENX(CACHE_MODE_0_num); 169 lri.DataDWord = cache_mode; 170 } 171 172#elif GFX_VER == 8 173 174 uint32_t cache_mode; 175 anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1), 176 .NPPMAFixEnable = enable, 177 .NPEarlyZFailsDisable = enable, 178 .NPPMAFixEnableMask = true, 179 .NPEarlyZFailsDisableMask = true); 180 anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) { 181 lri.RegisterOffset = GENX(CACHE_MODE_1_num); 182 lri.DataDWord = cache_mode; 183 } 184 185#endif /* GFX_VER == 8 */ 186 187 /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache 188 * Flush bits is often necessary. We do it regardless because it's easier. 189 * The render cache flush is also necessary if stencil writes are enabled. 190 * 191 * Again, the Skylake docs give a different set of flushes but the BDW 192 * flushes seem to work just as well. 193 */ 194 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { 195 pc.DepthStallEnable = true; 196 pc.DepthCacheFlushEnable = true; 197 pc.RenderTargetCacheFlushEnable = true; 198#if GFX_VER >= 12 199 pc.TileCacheFlushEnable = true; 200#endif 201 } 202} 203 204UNUSED static bool 205want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer) 206{ 207 assert(GFX_VER == 8); 208 209 /* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE: 210 * 211 * SW must set this bit in order to enable this fix when following 212 * expression is TRUE. 213 * 214 * 3DSTATE_WM::ForceThreadDispatch != 1 && 215 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) && 216 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) && 217 * (3DSTATE_DEPTH_BUFFER::HIZ Enable) && 218 * !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) && 219 * (3DSTATE_PS_EXTRA::PixelShaderValid) && 220 * !(3DSTATE_WM_HZ_OP::DepthBufferClear || 221 * 3DSTATE_WM_HZ_OP::DepthBufferResolve || 222 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || 223 * 3DSTATE_WM_HZ_OP::StencilBufferClear) && 224 * (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) && 225 * (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 226 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 227 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 228 * 3DSTATE_PS_BLEND::AlphaTestEnable || 229 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) && 230 * 3DSTATE_WM::ForceKillPix != ForceOff && 231 * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable && 232 * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) || 233 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && 234 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE && 235 * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) || 236 * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF)) 237 */ 238 239 /* These are always true: 240 * 3DSTATE_WM::ForceThreadDispatch != 1 && 241 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) 242 */ 243 244 /* We only enable the PMA fix if we know for certain that HiZ is enabled. 245 * If we don't know whether HiZ is enabled or not, we disable the PMA fix 246 * and there is no harm. 247 * 248 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) && 249 * 3DSTATE_DEPTH_BUFFER::HIZ Enable 250 */ 251 if (!cmd_buffer->state.hiz_enabled) 252 return false; 253 254 /* 3DSTATE_PS_EXTRA::PixelShaderValid */ 255 struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 256 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) 257 return false; 258 259 /* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */ 260 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 261 if (wm_prog_data->early_fragment_tests) 262 return false; 263 264 /* We never use anv_pipeline for HiZ ops so this is trivially true: 265 * !(3DSTATE_WM_HZ_OP::DepthBufferClear || 266 * 3DSTATE_WM_HZ_OP::DepthBufferResolve || 267 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || 268 * 3DSTATE_WM_HZ_OP::StencilBufferClear) 269 */ 270 271 /* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */ 272 if (!pipeline->depth_test_enable) 273 return false; 274 275 /* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 276 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 277 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 278 * 3DSTATE_PS_BLEND::AlphaTestEnable || 279 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) && 280 * 3DSTATE_WM::ForceKillPix != ForceOff && 281 * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable && 282 * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) || 283 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && 284 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE && 285 * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) || 286 * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF)) 287 */ 288 return (pipeline->kill_pixel && (pipeline->writes_depth || 289 pipeline->writes_stencil)) || 290 wm_prog_data->computed_depth_mode != PSCDEPTH_OFF; 291} 292 293UNUSED static bool 294want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer) 295{ 296 if (GFX_VER > 9) 297 return false; 298 assert(GFX_VER == 9); 299 300 /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable: 301 * 302 * Clearing this bit will force the STC cache to wait for pending 303 * retirement of pixels at the HZ-read stage and do the STC-test for 304 * Non-promoted, R-computed and Computed depth modes instead of 305 * postponing the STC-test to RCPFE. 306 * 307 * STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && 308 * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable 309 * 310 * STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && 311 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && 312 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE) 313 * 314 * COMP_STC_EN = STC_TEST_EN && 315 * 3DSTATE_PS_EXTRA::PixelShaderComputesStencil 316 * 317 * SW parses the pipeline states to generate the following logical 318 * signal indicating if PMA FIX can be enabled. 319 * 320 * STC_PMA_OPT = 321 * 3DSTATE_WM::ForceThreadDispatch != 1 && 322 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) && 323 * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL && 324 * 3DSTATE_DEPTH_BUFFER::HIZ Enable && 325 * !(3DSTATE_WM::EDSC_Mode == 2) && 326 * 3DSTATE_PS_EXTRA::PixelShaderValid && 327 * !(3DSTATE_WM_HZ_OP::DepthBufferClear || 328 * 3DSTATE_WM_HZ_OP::DepthBufferResolve || 329 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || 330 * 3DSTATE_WM_HZ_OP::StencilBufferClear) && 331 * (COMP_STC_EN || STC_WRITE_EN) && 332 * ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 333 * 3DSTATE_WM::ForceKillPix == ON || 334 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 335 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 336 * 3DSTATE_PS_BLEND::AlphaTestEnable || 337 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) || 338 * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)) 339 */ 340 341 /* These are always true: 342 * 3DSTATE_WM::ForceThreadDispatch != 1 && 343 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) 344 */ 345 346 /* We only enable the PMA fix if we know for certain that HiZ is enabled. 347 * If we don't know whether HiZ is enabled or not, we disable the PMA fix 348 * and there is no harm. 349 * 350 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) && 351 * 3DSTATE_DEPTH_BUFFER::HIZ Enable 352 */ 353 if (!cmd_buffer->state.hiz_enabled) 354 return false; 355 356 /* We can't possibly know if HiZ is enabled without the framebuffer */ 357 assert(cmd_buffer->state.framebuffer); 358 359 /* HiZ is enabled so we had better have a depth buffer with HiZ */ 360 const struct anv_image_view *ds_iview = 361 anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); 362 assert(ds_iview && ds_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ); 363 364 /* 3DSTATE_PS_EXTRA::PixelShaderValid */ 365 struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 366 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) 367 return false; 368 369 /* !(3DSTATE_WM::EDSC_Mode == 2) */ 370 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 371 if (wm_prog_data->early_fragment_tests) 372 return false; 373 374 /* We never use anv_pipeline for HiZ ops so this is trivially true: 375 * !(3DSTATE_WM_HZ_OP::DepthBufferClear || 376 * 3DSTATE_WM_HZ_OP::DepthBufferResolve || 377 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || 378 * 3DSTATE_WM_HZ_OP::StencilBufferClear) 379 */ 380 381 /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && 382 * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable 383 */ 384 const bool stc_test_en = 385 (ds_iview->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && 386 pipeline->stencil_test_enable; 387 388 /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && 389 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && 390 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE) 391 */ 392 const bool stc_write_en = 393 (ds_iview->image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && 394 (cmd_buffer->state.gfx.dynamic.stencil_write_mask.front || 395 cmd_buffer->state.gfx.dynamic.stencil_write_mask.back) && 396 pipeline->writes_stencil; 397 398 /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */ 399 const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil; 400 401 /* COMP_STC_EN || STC_WRITE_EN */ 402 if (!(comp_stc_en || stc_write_en)) 403 return false; 404 405 /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 406 * 3DSTATE_WM::ForceKillPix == ON || 407 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 408 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 409 * 3DSTATE_PS_BLEND::AlphaTestEnable || 410 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) || 411 * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF) 412 */ 413 return pipeline->kill_pixel || 414 wm_prog_data->computed_depth_mode != PSCDEPTH_OFF; 415} 416 417void 418genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) 419{ 420 struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 421 struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic; 422 423 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) { 424 uint32_t topology; 425 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) 426 topology = pipeline->topology; 427 else 428 topology = genX(vk_to_intel_primitive_type)[d->primitive_topology]; 429 430 cmd_buffer->state.gfx.primitive_topology = topology; 431 432 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) { 433 vft.PrimitiveTopologyType = topology; 434 } 435 } 436 437 if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | 438 ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { 439 uint32_t sf_dw[GENX(3DSTATE_SF_length)]; 440 struct GENX(3DSTATE_SF) sf = { 441 GENX(3DSTATE_SF_header), 442 }; 443#if GFX_VER == 8 444 if (cmd_buffer->device->info.is_cherryview) { 445 sf.CHVLineWidth = d->line_width; 446 } else { 447 sf.LineWidth = d->line_width; 448 } 449#else 450 sf.LineWidth = d->line_width, 451#endif 452 GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); 453 anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx8.sf); 454 } 455 456 if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | 457 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | 458 ANV_CMD_DIRTY_DYNAMIC_CULL_MODE | 459 ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE | 460 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE | 461 ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)) { 462 /* Take dynamic primitive topology in to account with 463 * 3DSTATE_RASTER::APIMode 464 * 3DSTATE_RASTER::DXMultisampleRasterizationEnable 465 * 3DSTATE_RASTER::AntialiasingEnable 466 */ 467 uint32_t api_mode = 0; 468 bool msaa_raster_enable = false; 469 bool aa_enable = 0; 470 471 if (cmd_buffer->state.gfx.pipeline->dynamic_states & 472 ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) { 473 VkPrimitiveTopology primitive_topology = 474 cmd_buffer->state.gfx.dynamic.primitive_topology; 475 476 VkPolygonMode dynamic_raster_mode = 477 genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline, 478 primitive_topology); 479 480 genX(rasterization_mode)( 481 dynamic_raster_mode, pipeline->line_mode, d->line_width, 482 &api_mode, &msaa_raster_enable); 483 484 aa_enable = 485 anv_rasterization_aa_mode(dynamic_raster_mode, 486 pipeline->line_mode); 487 } 488 489 uint32_t raster_dw[GENX(3DSTATE_RASTER_length)]; 490 struct GENX(3DSTATE_RASTER) raster = { 491 GENX(3DSTATE_RASTER_header), 492 .APIMode = api_mode, 493 .DXMultisampleRasterizationEnable = msaa_raster_enable, 494 .AntialiasingEnable = aa_enable, 495 .GlobalDepthOffsetConstant = d->depth_bias.bias, 496 .GlobalDepthOffsetScale = d->depth_bias.slope, 497 .GlobalDepthOffsetClamp = d->depth_bias.clamp, 498 .CullMode = genX(vk_to_intel_cullmode)[d->cull_mode], 499 .FrontWinding = genX(vk_to_intel_front_face)[d->front_face], 500 .GlobalDepthOffsetEnableSolid = d->depth_bias_enable, 501 .GlobalDepthOffsetEnableWireframe = d->depth_bias_enable, 502 .GlobalDepthOffsetEnablePoint = d->depth_bias_enable, 503 }; 504 GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster); 505 anv_batch_emit_merge(&cmd_buffer->batch, raster_dw, 506 pipeline->gfx8.raster); 507 } 508 509 /* Stencil reference values moved from COLOR_CALC_STATE in gfx8 to 510 * 3DSTATE_WM_DEPTH_STENCIL in gfx9. That means the dirty bits gets split 511 * across different state packets for gfx8 and gfx9. We handle that by 512 * using a big old #if switch here. 513 */ 514#if GFX_VER == 8 515 if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | 516 ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { 517 struct anv_state cc_state = 518 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 519 GENX(COLOR_CALC_STATE_length) * 4, 520 64); 521 struct GENX(COLOR_CALC_STATE) cc = { 522 .BlendConstantColorRed = d->blend_constants[0], 523 .BlendConstantColorGreen = d->blend_constants[1], 524 .BlendConstantColorBlue = d->blend_constants[2], 525 .BlendConstantColorAlpha = d->blend_constants[3], 526 .StencilReferenceValue = d->stencil_reference.front & 0xff, 527 .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff, 528 }; 529 GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); 530 531 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) { 532 ccp.ColorCalcStatePointer = cc_state.offset; 533 ccp.ColorCalcStatePointerValid = true; 534 } 535 } 536 537 if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | 538 ANV_CMD_DIRTY_RENDER_TARGETS | 539 ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | 540 ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | 541 ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | 542 ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | 543 ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | 544 ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | 545 ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP)) { 546 uint32_t wm_depth_stencil_dw[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; 547 548 struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = { 549 GENX(3DSTATE_WM_DEPTH_STENCIL_header), 550 551 .StencilTestMask = d->stencil_compare_mask.front & 0xff, 552 .StencilWriteMask = d->stencil_write_mask.front & 0xff, 553 554 .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, 555 .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, 556 557 .StencilBufferWriteEnable = 558 (d->stencil_write_mask.front || d->stencil_write_mask.back) && 559 d->stencil_test_enable, 560 561 .DepthTestEnable = d->depth_test_enable, 562 .DepthBufferWriteEnable = d->depth_test_enable && d->depth_write_enable, 563 .DepthTestFunction = genX(vk_to_intel_compare_op)[d->depth_compare_op], 564 .StencilTestEnable = d->stencil_test_enable, 565 .StencilFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.fail_op], 566 .StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.pass_op], 567 .StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.depth_fail_op], 568 .StencilTestFunction = genX(vk_to_intel_compare_op)[d->stencil_op.front.compare_op], 569 .BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.fail_op], 570 .BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.pass_op], 571 .BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.depth_fail_op], 572 .BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[d->stencil_op.back.compare_op], 573 }; 574 GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, wm_depth_stencil_dw, 575 &wm_depth_stencil); 576 577 anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw, 578 pipeline->gfx8.wm_depth_stencil); 579 580 genX(cmd_buffer_enable_pma_fix)(cmd_buffer, 581 want_depth_pma_fix(cmd_buffer)); 582 } 583#else 584 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) { 585 struct anv_state cc_state = 586 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 587 GENX(COLOR_CALC_STATE_length) * 4, 588 64); 589 struct GENX(COLOR_CALC_STATE) cc = { 590 .BlendConstantColorRed = d->blend_constants[0], 591 .BlendConstantColorGreen = d->blend_constants[1], 592 .BlendConstantColorBlue = d->blend_constants[2], 593 .BlendConstantColorAlpha = d->blend_constants[3], 594 }; 595 GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); 596 597 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) { 598 ccp.ColorCalcStatePointer = cc_state.offset; 599 ccp.ColorCalcStatePointerValid = true; 600 } 601 } 602 603 if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | 604 ANV_CMD_DIRTY_RENDER_TARGETS | 605 ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | 606 ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | 607 ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | 608 ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | 609 ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | 610 ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | 611 ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | 612 ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP)) { 613 uint32_t dwords[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; 614 struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = { 615 GENX(3DSTATE_WM_DEPTH_STENCIL_header), 616 617 .StencilTestMask = d->stencil_compare_mask.front & 0xff, 618 .StencilWriteMask = d->stencil_write_mask.front & 0xff, 619 620 .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, 621 .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, 622 623 .StencilReferenceValue = d->stencil_reference.front & 0xff, 624 .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff, 625 626 .StencilBufferWriteEnable = 627 (d->stencil_write_mask.front || d->stencil_write_mask.back) && 628 d->stencil_test_enable, 629 630 .DepthTestEnable = d->depth_test_enable, 631 .DepthBufferWriteEnable = d->depth_test_enable && d->depth_write_enable, 632 .DepthTestFunction = genX(vk_to_intel_compare_op)[d->depth_compare_op], 633 .StencilTestEnable = d->stencil_test_enable, 634 .StencilFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.fail_op], 635 .StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.pass_op], 636 .StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.depth_fail_op], 637 .StencilTestFunction = genX(vk_to_intel_compare_op)[d->stencil_op.front.compare_op], 638 .BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.fail_op], 639 .BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.pass_op], 640 .BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.depth_fail_op], 641 .BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[d->stencil_op.back.compare_op], 642 643 }; 644 GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dwords, &wm_depth_stencil); 645 646 anv_batch_emit_merge(&cmd_buffer->batch, dwords, 647 pipeline->gfx9.wm_depth_stencil); 648 649 genX(cmd_buffer_enable_pma_fix)(cmd_buffer, 650 want_stencil_pma_fix(cmd_buffer)); 651 } 652#endif 653 654#if GFX_VER >= 12 655 if(cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | 656 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS | 657 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE)) { 658 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) { 659 db.DepthBoundsTestValueModifyDisable = false; 660 db.DepthBoundsTestEnableModifyDisable = false; 661 db.DepthBoundsTestEnable = d->depth_bounds_test_enable; 662 db.DepthBoundsTestMinValue = d->depth_bounds.min; 663 db.DepthBoundsTestMaxValue = d->depth_bounds.max; 664 } 665 } 666#endif 667 668 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) { 669 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) { 670 ls.LineStipplePattern = d->line_stipple.pattern; 671 ls.LineStippleInverseRepeatCount = 672 1.0f / MAX2(1, d->line_stipple.factor); 673 ls.LineStippleRepeatCount = d->line_stipple.factor; 674 } 675 } 676 677 if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | 678 ANV_CMD_DIRTY_INDEX_BUFFER | 679 ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE)) { 680 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) { 681 vf.IndexedDrawCutIndexEnable = d->primitive_restart_enable; 682 vf.CutIndex = cmd_buffer->state.restart_index; 683 } 684 } 685 686 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) { 687 genX(emit_sample_pattern)(&cmd_buffer->batch, 688 cmd_buffer->state.gfx.dynamic.sample_locations.samples, 689 cmd_buffer->state.gfx.dynamic.sample_locations.locations); 690 } 691 692 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE || 693 cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP) { 694 const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes; 695 /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders 696 * threads. 697 */ 698 bool dirty_color_blend = 699 cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE; 700 701 if (dirty_color_blend) { 702 uint32_t dwords[MAX2(GENX(3DSTATE_WM_length), 703 GENX(3DSTATE_PS_BLEND_length))]; 704 struct GENX(3DSTATE_WM) wm = { 705 GENX(3DSTATE_WM_header), 706 707 .ForceThreadDispatchEnable = (pipeline->force_fragment_thread_dispatch || 708 !color_writes) ? ForceON : 0, 709 }; 710 GENX(3DSTATE_WM_pack)(NULL, dwords, &wm); 711 712 anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.wm); 713 714 /* 3DSTATE_PS_BLEND to be consistent with the rest of the 715 * BLEND_STATE_ENTRY. 716 */ 717 struct GENX(3DSTATE_PS_BLEND) ps_blend = { 718 GENX(3DSTATE_PS_BLEND_header), 719 .HasWriteableRT = color_writes != 0, 720 }; 721 GENX(3DSTATE_PS_BLEND_pack)(NULL, dwords, &ps_blend); 722 anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.ps_blend); 723 } 724 725 /* Blend states of each RT */ 726 uint32_t surface_count = 0; 727 struct anv_pipeline_bind_map *map; 728 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 729 map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map; 730 surface_count = map->surface_count; 731 } 732 733 uint32_t blend_dws[GENX(BLEND_STATE_length) + 734 MAX_RTS * GENX(BLEND_STATE_ENTRY_length)]; 735 uint32_t *dws = blend_dws; 736 memset(blend_dws, 0, sizeof(blend_dws)); 737 738 /* Skip this part */ 739 dws += GENX(BLEND_STATE_length); 740 741 bool dirty_logic_op = 742 cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP; 743 744 for (uint32_t i = 0; i < surface_count; i++) { 745 struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i]; 746 bool write_disabled = 747 dirty_color_blend && (color_writes & (1u << binding->index)) == 0; 748 struct GENX(BLEND_STATE_ENTRY) entry = { 749 .WriteDisableAlpha = write_disabled, 750 .WriteDisableRed = write_disabled, 751 .WriteDisableGreen = write_disabled, 752 .WriteDisableBlue = write_disabled, 753 .LogicOpFunction = 754 dirty_logic_op ? genX(vk_to_intel_logic_op)[d->logic_op] : 0, 755 }; 756 GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry); 757 dws += GENX(BLEND_STATE_ENTRY_length); 758 } 759 760 uint32_t num_dwords = GENX(BLEND_STATE_length) + 761 GENX(BLEND_STATE_ENTRY_length) * surface_count; 762 763 struct anv_state blend_states = 764 anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws, 765 pipeline->gfx8.blend_state, num_dwords, 64); 766 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { 767 bsp.BlendStatePointer = blend_states.offset; 768 bsp.BlendStatePointerValid = true; 769 } 770 } 771 772#if GFX_VER >= 11 773 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) { 774 struct anv_state cps_states = ANV_STATE_NULL; 775 776#if GFX_VER >= 12 777 uint32_t count = cmd_buffer->state.gfx.dynamic.viewport.count; 778 cps_states = 779 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 780 GENX(CPS_STATE_length) * 4 * count, 781 32); 782#endif /* GFX_VER >= 12 */ 783 784 genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, cps_states, 785 &cmd_buffer->state.gfx.dynamic); 786 } 787#endif /* GFX_VER >= 11 */ 788 789 cmd_buffer->state.gfx.dirty = 0; 790} 791 792static uint32_t vk_to_intel_index_type(VkIndexType type) 793{ 794 switch (type) { 795 case VK_INDEX_TYPE_UINT8_EXT: 796 return INDEX_BYTE; 797 case VK_INDEX_TYPE_UINT16: 798 return INDEX_WORD; 799 case VK_INDEX_TYPE_UINT32: 800 return INDEX_DWORD; 801 default: 802 unreachable("invalid index type"); 803 } 804} 805 806static uint32_t restart_index_for_type(VkIndexType type) 807{ 808 switch (type) { 809 case VK_INDEX_TYPE_UINT8_EXT: 810 return UINT8_MAX; 811 case VK_INDEX_TYPE_UINT16: 812 return UINT16_MAX; 813 case VK_INDEX_TYPE_UINT32: 814 return UINT32_MAX; 815 default: 816 unreachable("invalid index type"); 817 } 818} 819 820void genX(CmdBindIndexBuffer)( 821 VkCommandBuffer commandBuffer, 822 VkBuffer _buffer, 823 VkDeviceSize offset, 824 VkIndexType indexType) 825{ 826 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 827 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); 828 829 cmd_buffer->state.restart_index = restart_index_for_type(indexType); 830 831 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) { 832 ib.IndexFormat = vk_to_intel_index_type(indexType); 833 ib.MOCS = anv_mocs(cmd_buffer->device, 834 buffer->address.bo, 835 ISL_SURF_USAGE_INDEX_BUFFER_BIT); 836#if GFX_VER >= 12 837 ib.L3BypassDisable = true; 838#endif 839 ib.BufferStartingAddress = anv_address_add(buffer->address, offset); 840 ib.BufferSize = buffer->size - offset; 841 } 842 843 cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; 844} 845