gfx7_cmd_buffer.c revision 7ec681f3
1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdbool.h> 26#include <string.h> 27#include <unistd.h> 28#include <fcntl.h> 29 30#include "anv_private.h" 31#include "vk_format.h" 32 33#include "genxml/gen_macros.h" 34#include "genxml/genX_pack.h" 35 36#if GFX_VERx10 == 70 37static int64_t 38clamp_int64(int64_t x, int64_t min, int64_t max) 39{ 40 if (x < min) 41 return min; 42 else if (x < max) 43 return x; 44 else 45 return max; 46} 47 48void 49gfx7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) 50{ 51 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; 52 uint32_t count = cmd_buffer->state.gfx.dynamic.scissor.count; 53 const VkRect2D *scissors = cmd_buffer->state.gfx.dynamic.scissor.scissors; 54 55 /* Wa_1409725701: 56 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is 57 * stored as an array of up to 16 elements. The location of first 58 * element of the array, as specified by Pointer to SCISSOR_RECT, should 59 * be aligned to a 64-byte boundary. 60 */ 61 uint32_t alignment = 64; 62 struct anv_state scissor_state = 63 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, alignment); 64 65 for (uint32_t i = 0; i < count; i++) { 66 const VkRect2D *s = &scissors[i]; 67 68 /* Since xmax and ymax are inclusive, we have to have xmax < xmin or 69 * ymax < ymin for empty clips. In case clip x, y, width height are all 70 * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't 71 * what we want. Just special case empty clips and produce a canonical 72 * empty clip. */ 73 static const struct GFX7_SCISSOR_RECT empty_scissor = { 74 .ScissorRectangleYMin = 1, 75 .ScissorRectangleXMin = 1, 76 .ScissorRectangleYMax = 0, 77 .ScissorRectangleXMax = 0 78 }; 79 80 const int max = 0xffff; 81 82 uint32_t y_min = s->offset.y; 83 uint32_t x_min = s->offset.x; 84 uint32_t y_max = s->offset.y + s->extent.height - 1; 85 uint32_t x_max = s->offset.x + s->extent.width - 1; 86 87 /* Do this math using int64_t so overflow gets clamped correctly. */ 88 if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { 89 y_min = clamp_int64((uint64_t) y_min, 90 cmd_buffer->state.render_area.offset.y, max); 91 x_min = clamp_int64((uint64_t) x_min, 92 cmd_buffer->state.render_area.offset.x, max); 93 y_max = clamp_int64((uint64_t) y_max, 0, 94 cmd_buffer->state.render_area.offset.y + 95 cmd_buffer->state.render_area.extent.height - 1); 96 x_max = clamp_int64((uint64_t) x_max, 0, 97 cmd_buffer->state.render_area.offset.x + 98 cmd_buffer->state.render_area.extent.width - 1); 99 } else if (fb) { 100 y_min = clamp_int64((uint64_t) y_min, 0, max); 101 x_min = clamp_int64((uint64_t) x_min, 0, max); 102 y_max = clamp_int64((uint64_t) y_max, 0, fb->height - 1); 103 x_max = clamp_int64((uint64_t) x_max, 0, fb->width - 1); 104 } 105 106 struct GFX7_SCISSOR_RECT scissor = { 107 .ScissorRectangleYMin = y_min, 108 .ScissorRectangleXMin = x_min, 109 .ScissorRectangleYMax = y_max, 110 .ScissorRectangleXMax = x_max 111 }; 112 113 if (s->extent.width <= 0 || s->extent.height <= 0) { 114 GFX7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, 115 &empty_scissor); 116 } else { 117 GFX7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, &scissor); 118 } 119 } 120 121 anv_batch_emit(&cmd_buffer->batch, 122 GFX7_3DSTATE_SCISSOR_STATE_POINTERS, ssp) { 123 ssp.ScissorRectPointer = scissor_state.offset; 124 } 125} 126#endif 127 128static uint32_t vk_to_intel_index_type(VkIndexType type) 129{ 130 switch (type) { 131 case VK_INDEX_TYPE_UINT8_EXT: 132 return INDEX_BYTE; 133 case VK_INDEX_TYPE_UINT16: 134 return INDEX_WORD; 135 case VK_INDEX_TYPE_UINT32: 136 return INDEX_DWORD; 137 default: 138 unreachable("invalid index type"); 139 } 140} 141 142static uint32_t restart_index_for_type(VkIndexType type) 143{ 144 switch (type) { 145 case VK_INDEX_TYPE_UINT8_EXT: 146 return UINT8_MAX; 147 case VK_INDEX_TYPE_UINT16: 148 return UINT16_MAX; 149 case VK_INDEX_TYPE_UINT32: 150 return UINT32_MAX; 151 default: 152 unreachable("invalid index type"); 153 } 154} 155 156void genX(CmdBindIndexBuffer)( 157 VkCommandBuffer commandBuffer, 158 VkBuffer _buffer, 159 VkDeviceSize offset, 160 VkIndexType indexType) 161{ 162 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 163 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); 164 165 cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; 166 if (GFX_VERx10 == 75) 167 cmd_buffer->state.restart_index = restart_index_for_type(indexType); 168 cmd_buffer->state.gfx.gfx7.index_buffer = buffer; 169 cmd_buffer->state.gfx.gfx7.index_type = vk_to_intel_index_type(indexType); 170 cmd_buffer->state.gfx.gfx7.index_offset = offset; 171} 172 173static uint32_t 174get_depth_format(struct anv_cmd_buffer *cmd_buffer) 175{ 176 const struct anv_render_pass *pass = cmd_buffer->state.pass; 177 const struct anv_subpass *subpass = cmd_buffer->state.subpass; 178 179 if (!subpass->depth_stencil_attachment) 180 return D16_UNORM; 181 182 struct anv_render_pass_attachment *att = 183 &pass->attachments[subpass->depth_stencil_attachment->attachment]; 184 185 switch (att->format) { 186 case VK_FORMAT_D16_UNORM: 187 case VK_FORMAT_D16_UNORM_S8_UINT: 188 return D16_UNORM; 189 190 case VK_FORMAT_X8_D24_UNORM_PACK32: 191 case VK_FORMAT_D24_UNORM_S8_UINT: 192 return D24_UNORM_X8_UINT; 193 194 case VK_FORMAT_D32_SFLOAT: 195 case VK_FORMAT_D32_SFLOAT_S8_UINT: 196 return D32_FLOAT; 197 198 default: 199 return D16_UNORM; 200 } 201} 202 203void 204genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) 205{ 206 struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 207 struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic; 208 209 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) { 210 uint32_t topology; 211 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) 212 topology = pipeline->topology; 213 else 214 topology = genX(vk_to_intel_primitive_type)[d->primitive_topology]; 215 216 cmd_buffer->state.gfx.primitive_topology = topology; 217 } 218 219 if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | 220 ANV_CMD_DIRTY_RENDER_TARGETS | 221 ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | 222 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | 223 ANV_CMD_DIRTY_DYNAMIC_CULL_MODE | 224 ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE | 225 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE | 226 ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)) { 227 /* Take dynamic primitive topology in to account with 228 * 3DSTATE_SF::MultisampleRasterizationMode 229 */ 230 uint32_t ms_rast_mode = 0; 231 232 if (cmd_buffer->state.gfx.pipeline->dynamic_states & 233 ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) { 234 VkPrimitiveTopology primitive_topology = 235 cmd_buffer->state.gfx.dynamic.primitive_topology; 236 237 VkPolygonMode dynamic_raster_mode = 238 genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline, 239 primitive_topology); 240 241 ms_rast_mode = 242 genX(ms_rasterization_mode)(pipeline, dynamic_raster_mode); 243 } 244 245 uint32_t sf_dw[GENX(3DSTATE_SF_length)]; 246 struct GENX(3DSTATE_SF) sf = { 247 GENX(3DSTATE_SF_header), 248 .DepthBufferSurfaceFormat = get_depth_format(cmd_buffer), 249 .LineWidth = d->line_width, 250 .GlobalDepthOffsetConstant = d->depth_bias.bias, 251 .GlobalDepthOffsetScale = d->depth_bias.slope, 252 .GlobalDepthOffsetClamp = d->depth_bias.clamp, 253 .FrontWinding = genX(vk_to_intel_front_face)[d->front_face], 254 .CullMode = genX(vk_to_intel_cullmode)[d->cull_mode], 255 .GlobalDepthOffsetEnableSolid = d->depth_bias_enable, 256 .GlobalDepthOffsetEnableWireframe = d->depth_bias_enable, 257 .GlobalDepthOffsetEnablePoint = d->depth_bias_enable, 258 .MultisampleRasterizationMode = ms_rast_mode, 259 }; 260 GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); 261 262 anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx7.sf); 263 } 264 265 if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | 266 ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { 267 struct anv_state cc_state = 268 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 269 GENX(COLOR_CALC_STATE_length) * 4, 270 64); 271 struct GENX(COLOR_CALC_STATE) cc = { 272 .BlendConstantColorRed = d->blend_constants[0], 273 .BlendConstantColorGreen = d->blend_constants[1], 274 .BlendConstantColorBlue = d->blend_constants[2], 275 .BlendConstantColorAlpha = d->blend_constants[3], 276 .StencilReferenceValue = d->stencil_reference.front & 0xff, 277 .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff, 278 }; 279 GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); 280 281 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) { 282 ccp.ColorCalcStatePointer = cc_state.offset; 283 } 284 } 285 286 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) { 287 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) { 288 ls.LineStipplePattern = d->line_stipple.pattern; 289 ls.LineStippleInverseRepeatCount = 290 1.0f / MAX2(1, d->line_stipple.factor); 291 ls.LineStippleRepeatCount = d->line_stipple.factor; 292 } 293 } 294 295 if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | 296 ANV_CMD_DIRTY_RENDER_TARGETS | 297 ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | 298 ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | 299 ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | 300 ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | 301 ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | 302 ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | 303 ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP)) { 304 uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)]; 305 306 struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { 307 .StencilTestMask = d->stencil_compare_mask.front & 0xff, 308 .StencilWriteMask = d->stencil_write_mask.front & 0xff, 309 310 .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, 311 .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, 312 313 .StencilBufferWriteEnable = 314 (d->stencil_write_mask.front || d->stencil_write_mask.back) && 315 d->stencil_test_enable, 316 317 .DepthTestEnable = d->depth_test_enable, 318 .DepthBufferWriteEnable = d->depth_test_enable && d->depth_write_enable, 319 .DepthTestFunction = genX(vk_to_intel_compare_op)[d->depth_compare_op], 320 .StencilTestEnable = d->stencil_test_enable, 321 .StencilFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.fail_op], 322 .StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.pass_op], 323 .StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.depth_fail_op], 324 .StencilTestFunction = genX(vk_to_intel_compare_op)[d->stencil_op.front.compare_op], 325 .BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.fail_op], 326 .BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.pass_op], 327 .BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.depth_fail_op], 328 .BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[d->stencil_op.back.compare_op], 329 }; 330 GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil); 331 332 struct anv_state ds_state = 333 anv_cmd_buffer_merge_dynamic(cmd_buffer, depth_stencil_dw, 334 pipeline->gfx7.depth_stencil_state, 335 GENX(DEPTH_STENCIL_STATE_length), 64); 336 337 anv_batch_emit(&cmd_buffer->batch, 338 GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), dsp) { 339 dsp.PointertoDEPTH_STENCIL_STATE = ds_state.offset; 340 } 341 } 342 343 if (cmd_buffer->state.gfx.gfx7.index_buffer && 344 cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | 345 ANV_CMD_DIRTY_INDEX_BUFFER | 346 ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE)) { 347 struct anv_buffer *buffer = cmd_buffer->state.gfx.gfx7.index_buffer; 348 uint32_t offset = cmd_buffer->state.gfx.gfx7.index_offset; 349 350#if GFX_VERx10 == 75 351 anv_batch_emit(&cmd_buffer->batch, GFX75_3DSTATE_VF, vf) { 352 vf.IndexedDrawCutIndexEnable = d->primitive_restart_enable; 353 vf.CutIndex = cmd_buffer->state.restart_index; 354 } 355#endif 356 357 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) { 358#if GFX_VERx10 != 75 359 ib.CutIndexEnable = d->primitive_restart_enable; 360#endif 361 ib.IndexFormat = cmd_buffer->state.gfx.gfx7.index_type; 362 ib.MOCS = anv_mocs(cmd_buffer->device, 363 buffer->address.bo, 364 ISL_SURF_USAGE_INDEX_BUFFER_BIT); 365 366 ib.BufferStartingAddress = anv_address_add(buffer->address, offset); 367 ib.BufferEndingAddress = anv_address_add(buffer->address, 368 buffer->size); 369 } 370 } 371 372 /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders 373 * threads or if we have dirty dynamic primitive topology state and 374 * need to toggle 3DSTATE_WM::MultisampleRasterizationMode dynamically. 375 */ 376 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE || 377 cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) { 378 const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes; 379 380 bool dirty_color_blend = 381 cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE; 382 383 bool dirty_primitive_topology = 384 cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY; 385 386 VkPolygonMode dynamic_raster_mode; 387 VkPrimitiveTopology primitive_topology = 388 cmd_buffer->state.gfx.dynamic.primitive_topology; 389 dynamic_raster_mode = 390 genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline, 391 primitive_topology); 392 393 if (dirty_color_blend || dirty_primitive_topology) { 394 uint32_t dwords[GENX(3DSTATE_WM_length)]; 395 struct GENX(3DSTATE_WM) wm = { 396 GENX(3DSTATE_WM_header), 397 398 .ThreadDispatchEnable = pipeline->force_fragment_thread_dispatch || 399 color_writes, 400 .MultisampleRasterizationMode = 401 genX(ms_rasterization_mode)(pipeline, dynamic_raster_mode), 402 }; 403 GENX(3DSTATE_WM_pack)(NULL, dwords, &wm); 404 405 anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm); 406 } 407 408 } 409 410 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) { 411 genX(emit_multisample)(&cmd_buffer->batch, 412 cmd_buffer->state.gfx.dynamic.sample_locations.samples, 413 cmd_buffer->state.gfx.dynamic.sample_locations.locations); 414 } 415 416 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE || 417 cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP) { 418 const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes; 419 bool dirty_color_blend = 420 cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE; 421 422 /* Blend states of each RT */ 423 uint32_t surface_count = 0; 424 struct anv_pipeline_bind_map *map; 425 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { 426 map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map; 427 surface_count = map->surface_count; 428 } 429 430 uint32_t blend_dws[GENX(BLEND_STATE_length) + 431 MAX_RTS * GENX(BLEND_STATE_ENTRY_length)]; 432 uint32_t *dws = blend_dws; 433 memset(blend_dws, 0, sizeof(blend_dws)); 434 435 /* Skip this part */ 436 dws += GENX(BLEND_STATE_length); 437 438 bool dirty_logic_op = 439 cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP; 440 441 for (uint32_t i = 0; i < surface_count; i++) { 442 struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i]; 443 bool write_disabled = 444 dirty_color_blend && (color_writes & (1u << binding->index)) == 0; 445 struct GENX(BLEND_STATE_ENTRY) entry = { 446 .WriteDisableAlpha = write_disabled, 447 .WriteDisableRed = write_disabled, 448 .WriteDisableGreen = write_disabled, 449 .WriteDisableBlue = write_disabled, 450 .LogicOpFunction = 451 dirty_logic_op ? genX(vk_to_intel_logic_op)[d->logic_op] : 0, 452 }; 453 GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry); 454 dws += GENX(BLEND_STATE_ENTRY_length); 455 } 456 457 uint32_t num_dwords = GENX(BLEND_STATE_length) + 458 GENX(BLEND_STATE_ENTRY_length) * surface_count; 459 460 struct anv_state blend_states = 461 anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws, 462 pipeline->gfx7.blend_state, num_dwords, 64); 463 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { 464 bsp.BlendStatePointer = blend_states.offset; 465 } 466 } 467 468 cmd_buffer->state.gfx.dirty = 0; 469} 470 471void 472genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, 473 bool enable) 474{ 475 /* The NP PMA fix doesn't exist on gfx7 */ 476} 477