1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdbool.h> 26 27#include "radv_meta.h" 28#include "radv_private.h" 29#include "sid.h" 30 31enum radv_color_op { 32 FAST_CLEAR_ELIMINATE, 33 FMASK_DECOMPRESS, 34 DCC_DECOMPRESS, 35}; 36 37static nir_shader * 38build_dcc_decompress_compute_shader(struct radv_device *dev) 39{ 40 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT); 41 42 nir_builder b = 43 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_decompress_compute"); 44 45 /* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */ 46 b.shader->info.workgroup_size[0] = 16; 47 b.shader->info.workgroup_size[1] = 16; 48 b.shader->info.workgroup_size[2] = 1; 49 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "in_img"); 50 input_img->data.descriptor_set = 0; 51 input_img->data.binding = 0; 52 53 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 54 output_img->data.descriptor_set = 0; 55 output_img->data.binding = 1; 56 57 nir_ssa_def *global_id = get_global_ids(&b, 2); 58 nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0), 59 nir_channel(&b, global_id, 1), 60 nir_ssa_undef(&b, 1, 32), 61 nir_ssa_undef(&b, 1, 32)); 62 63 nir_ssa_def *data = nir_image_deref_load( 64 &b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32), 65 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D); 66 67 /* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid 68 * creating a vmcnt(0) because it expects the L1 cache to keep memory 69 * operations in-order for the same workgroup. The vmcnt(0) seems 70 * necessary however. */ 71 nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE, 72 .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo); 73 74 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, 75 nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0), 76 .image_dim = GLSL_SAMPLER_DIM_2D); 77 return b.shader; 78} 79 80static VkResult 81create_dcc_compress_compute(struct radv_device *device) 82{ 83 VkResult result = VK_SUCCESS; 84 nir_shader *cs = build_dcc_decompress_compute_shader(device); 85 86 VkDescriptorSetLayoutCreateInfo ds_create_info = { 87 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 88 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 89 .bindingCount = 2, 90 .pBindings = (VkDescriptorSetLayoutBinding[]){ 91 {.binding = 0, 92 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 93 .descriptorCount = 1, 94 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 95 .pImmutableSamplers = NULL}, 96 {.binding = 1, 97 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 98 .descriptorCount = 1, 99 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 100 .pImmutableSamplers = NULL}, 101 }}; 102 103 result = radv_CreateDescriptorSetLayout( 104 radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, 105 &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout); 106 if (result != VK_SUCCESS) 107 goto cleanup; 108 109 VkPipelineLayoutCreateInfo pl_create_info = { 110 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 111 .setLayoutCount = 1, 112 .pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout, 113 .pushConstantRangeCount = 0, 114 .pPushConstantRanges = NULL, 115 }; 116 117 result = radv_CreatePipelineLayout( 118 radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, 119 &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout); 120 if (result != VK_SUCCESS) 121 goto cleanup; 122 123 /* compute shader */ 124 125 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 126 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 127 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 128 .module = vk_shader_module_handle_from_nir(cs), 129 .pName = "main", 130 .pSpecializationInfo = NULL, 131 }; 132 133 VkComputePipelineCreateInfo vk_pipeline_info = { 134 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 135 .stage = pipeline_shader_stage, 136 .flags = 0, 137 .layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, 138 }; 139 140 result = radv_CreateComputePipelines( 141 radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 142 &vk_pipeline_info, NULL, 143 &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline); 144 if (result != VK_SUCCESS) 145 goto cleanup; 146 147cleanup: 148 ralloc_free(cs); 149 return result; 150} 151 152static VkResult 153create_pass(struct radv_device *device) 154{ 155 VkResult result; 156 VkDevice device_h = radv_device_to_handle(device); 157 const VkAllocationCallbacks *alloc = &device->meta_state.alloc; 158 VkAttachmentDescription2 attachment; 159 160 attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; 161 attachment.pNext = NULL; 162 attachment.format = VK_FORMAT_UNDEFINED; 163 attachment.samples = 1; 164 attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 165 attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; 166 attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; 167 attachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; 168 169 result = radv_CreateRenderPass2( 170 device_h, 171 &(VkRenderPassCreateInfo2){ 172 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2, 173 .attachmentCount = 1, 174 .pAttachments = &attachment, 175 .subpassCount = 1, 176 .pSubpasses = 177 &(VkSubpassDescription2){ 178 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2, 179 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, 180 .inputAttachmentCount = 0, 181 .colorAttachmentCount = 1, 182 .pColorAttachments = 183 (VkAttachmentReference2[]){ 184 { 185 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 186 .attachment = 0, 187 .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 188 }, 189 }, 190 .pResolveAttachments = NULL, 191 .pDepthStencilAttachment = 192 &(VkAttachmentReference2){ 193 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 194 .attachment = VK_ATTACHMENT_UNUSED, 195 }, 196 .preserveAttachmentCount = 0, 197 .pPreserveAttachments = NULL, 198 }, 199 .dependencyCount = 2, 200 .pDependencies = 201 (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, 202 .srcSubpass = VK_SUBPASS_EXTERNAL, 203 .dstSubpass = 0, 204 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 205 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 206 .srcAccessMask = 0, 207 .dstAccessMask = 0, 208 .dependencyFlags = 0}, 209 {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, 210 .srcSubpass = 0, 211 .dstSubpass = VK_SUBPASS_EXTERNAL, 212 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 213 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 214 .srcAccessMask = 0, 215 .dstAccessMask = 0, 216 .dependencyFlags = 0}}, 217 }, 218 alloc, &device->meta_state.fast_clear_flush.pass); 219 220 return result; 221} 222 223static VkResult 224create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout) 225{ 226 VkPipelineLayoutCreateInfo pl_create_info = { 227 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 228 .setLayoutCount = 0, 229 .pSetLayouts = NULL, 230 .pushConstantRangeCount = 0, 231 .pPushConstantRanges = NULL, 232 }; 233 234 return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 235 &device->meta_state.alloc, layout); 236} 237 238static VkResult 239create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout) 240{ 241 VkResult result; 242 VkDevice device_h = radv_device_to_handle(device); 243 244 nir_shader *fs_module = radv_meta_build_nir_fs_noop(); 245 246 if (!fs_module) { 247 /* XXX: Need more accurate error */ 248 result = VK_ERROR_OUT_OF_HOST_MEMORY; 249 goto cleanup; 250 } 251 252 const VkPipelineShaderStageCreateInfo stages[2] = { 253 { 254 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 255 .stage = VK_SHADER_STAGE_VERTEX_BIT, 256 .module = vs_module_h, 257 .pName = "main", 258 }, 259 { 260 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 261 .stage = VK_SHADER_STAGE_FRAGMENT_BIT, 262 .module = vk_shader_module_handle_from_nir(fs_module), 263 .pName = "main", 264 }, 265 }; 266 267 const VkPipelineVertexInputStateCreateInfo vi_state = { 268 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, 269 .vertexBindingDescriptionCount = 0, 270 .vertexAttributeDescriptionCount = 0, 271 }; 272 273 const VkPipelineInputAssemblyStateCreateInfo ia_state = { 274 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, 275 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 276 .primitiveRestartEnable = false, 277 }; 278 279 const VkPipelineColorBlendStateCreateInfo blend_state = { 280 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, 281 .logicOpEnable = false, 282 .attachmentCount = 1, 283 .pAttachments = (VkPipelineColorBlendAttachmentState[]){ 284 { 285 .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | 286 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, 287 }, 288 }}; 289 const VkPipelineRasterizationStateCreateInfo rs_state = { 290 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, 291 .depthClampEnable = false, 292 .rasterizerDiscardEnable = false, 293 .polygonMode = VK_POLYGON_MODE_FILL, 294 .cullMode = VK_CULL_MODE_NONE, 295 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, 296 }; 297 298 result = radv_graphics_pipeline_create( 299 device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache), 300 &(VkGraphicsPipelineCreateInfo){ 301 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 302 .stageCount = 2, 303 .pStages = stages, 304 305 .pVertexInputState = &vi_state, 306 .pInputAssemblyState = &ia_state, 307 308 .pViewportState = 309 &(VkPipelineViewportStateCreateInfo){ 310 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, 311 .viewportCount = 1, 312 .scissorCount = 1, 313 }, 314 .pRasterizationState = &rs_state, 315 .pMultisampleState = 316 &(VkPipelineMultisampleStateCreateInfo){ 317 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 318 .rasterizationSamples = 1, 319 .sampleShadingEnable = false, 320 .pSampleMask = NULL, 321 .alphaToCoverageEnable = false, 322 .alphaToOneEnable = false, 323 }, 324 .pColorBlendState = &blend_state, 325 .pDynamicState = 326 &(VkPipelineDynamicStateCreateInfo){ 327 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, 328 .dynamicStateCount = 2, 329 .pDynamicStates = 330 (VkDynamicState[]){ 331 VK_DYNAMIC_STATE_VIEWPORT, 332 VK_DYNAMIC_STATE_SCISSOR, 333 }, 334 }, 335 .layout = layout, 336 .renderPass = device->meta_state.fast_clear_flush.pass, 337 .subpass = 0, 338 }, 339 &(struct radv_graphics_pipeline_create_info){ 340 .use_rectlist = true, 341 .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR, 342 }, 343 &device->meta_state.alloc, &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline); 344 if (result != VK_SUCCESS) 345 goto cleanup; 346 347 result = radv_graphics_pipeline_create( 348 device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache), 349 &(VkGraphicsPipelineCreateInfo){ 350 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 351 .stageCount = 2, 352 .pStages = stages, 353 354 .pVertexInputState = &vi_state, 355 .pInputAssemblyState = &ia_state, 356 357 .pViewportState = 358 &(VkPipelineViewportStateCreateInfo){ 359 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, 360 .viewportCount = 1, 361 .scissorCount = 1, 362 }, 363 .pRasterizationState = &rs_state, 364 .pMultisampleState = 365 &(VkPipelineMultisampleStateCreateInfo){ 366 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 367 .rasterizationSamples = 1, 368 .sampleShadingEnable = false, 369 .pSampleMask = NULL, 370 .alphaToCoverageEnable = false, 371 .alphaToOneEnable = false, 372 }, 373 .pColorBlendState = &blend_state, 374 .pDynamicState = 375 &(VkPipelineDynamicStateCreateInfo){ 376 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, 377 .dynamicStateCount = 2, 378 .pDynamicStates = 379 (VkDynamicState[]){ 380 VK_DYNAMIC_STATE_VIEWPORT, 381 VK_DYNAMIC_STATE_SCISSOR, 382 }, 383 }, 384 .layout = layout, 385 .renderPass = device->meta_state.fast_clear_flush.pass, 386 .subpass = 0, 387 }, 388 &(struct radv_graphics_pipeline_create_info){ 389 .use_rectlist = true, 390 .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS, 391 }, 392 &device->meta_state.alloc, &device->meta_state.fast_clear_flush.fmask_decompress_pipeline); 393 if (result != VK_SUCCESS) 394 goto cleanup; 395 396 result = radv_graphics_pipeline_create( 397 device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache), 398 &(VkGraphicsPipelineCreateInfo){ 399 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 400 .stageCount = 2, 401 .pStages = stages, 402 403 .pVertexInputState = &vi_state, 404 .pInputAssemblyState = &ia_state, 405 406 .pViewportState = 407 &(VkPipelineViewportStateCreateInfo){ 408 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, 409 .viewportCount = 1, 410 .scissorCount = 1, 411 }, 412 .pRasterizationState = &rs_state, 413 .pMultisampleState = 414 &(VkPipelineMultisampleStateCreateInfo){ 415 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 416 .rasterizationSamples = 1, 417 .sampleShadingEnable = false, 418 .pSampleMask = NULL, 419 .alphaToCoverageEnable = false, 420 .alphaToOneEnable = false, 421 }, 422 .pColorBlendState = &blend_state, 423 .pDynamicState = 424 &(VkPipelineDynamicStateCreateInfo){ 425 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, 426 .dynamicStateCount = 2, 427 .pDynamicStates = 428 (VkDynamicState[]){ 429 VK_DYNAMIC_STATE_VIEWPORT, 430 VK_DYNAMIC_STATE_SCISSOR, 431 }, 432 }, 433 .layout = layout, 434 .renderPass = device->meta_state.fast_clear_flush.pass, 435 .subpass = 0, 436 }, 437 &(struct radv_graphics_pipeline_create_info){ 438 .use_rectlist = true, 439 .custom_blend_mode = V_028808_CB_DCC_DECOMPRESS, 440 }, 441 &device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline); 442 if (result != VK_SUCCESS) 443 goto cleanup; 444 445 goto cleanup; 446 447cleanup: 448 ralloc_free(fs_module); 449 return result; 450} 451 452void 453radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device) 454{ 455 struct radv_meta_state *state = &device->meta_state; 456 457 radv_DestroyPipeline(radv_device_to_handle(device), 458 state->fast_clear_flush.dcc_decompress_pipeline, &state->alloc); 459 radv_DestroyPipeline(radv_device_to_handle(device), 460 state->fast_clear_flush.fmask_decompress_pipeline, &state->alloc); 461 radv_DestroyPipeline(radv_device_to_handle(device), 462 state->fast_clear_flush.cmask_eliminate_pipeline, &state->alloc); 463 radv_DestroyRenderPass(radv_device_to_handle(device), state->fast_clear_flush.pass, 464 &state->alloc); 465 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout, 466 &state->alloc); 467 468 radv_DestroyPipeline(radv_device_to_handle(device), 469 state->fast_clear_flush.dcc_decompress_compute_pipeline, &state->alloc); 470 radv_DestroyPipelineLayout(radv_device_to_handle(device), 471 state->fast_clear_flush.dcc_decompress_compute_p_layout, 472 &state->alloc); 473 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 474 state->fast_clear_flush.dcc_decompress_compute_ds_layout, 475 &state->alloc); 476} 477 478static VkResult 479radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device) 480{ 481 VkResult res = VK_SUCCESS; 482 483 mtx_lock(&device->meta_state.mtx); 484 if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) { 485 mtx_unlock(&device->meta_state.mtx); 486 return VK_SUCCESS; 487 } 488 489 nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices(); 490 if (!vs_module) { 491 /* XXX: Need more accurate error */ 492 res = VK_ERROR_OUT_OF_HOST_MEMORY; 493 goto fail; 494 } 495 496 res = create_pass(device); 497 if (res != VK_SUCCESS) 498 goto fail; 499 500 res = create_pipeline_layout(device, &device->meta_state.fast_clear_flush.p_layout); 501 if (res != VK_SUCCESS) 502 goto fail; 503 504 VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module); 505 res = create_pipeline(device, vs_module_h, device->meta_state.fast_clear_flush.p_layout); 506 if (res != VK_SUCCESS) 507 goto fail; 508 509 res = create_dcc_compress_compute(device); 510 if (res != VK_SUCCESS) 511 goto fail; 512 513 goto cleanup; 514 515fail: 516 radv_device_finish_meta_fast_clear_flush_state(device); 517 518cleanup: 519 ralloc_free(vs_module); 520 mtx_unlock(&device->meta_state.mtx); 521 522 return res; 523} 524 525VkResult 526radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand) 527{ 528 if (on_demand) 529 return VK_SUCCESS; 530 531 return radv_device_init_meta_fast_clear_flush_state_internal(device); 532} 533 534static void 535radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer, 536 struct radv_image *image, uint64_t pred_offset, 537 bool value) 538{ 539 uint64_t va = 0; 540 541 if (value) { 542 va = radv_buffer_get_va(image->bo) + image->offset; 543 va += pred_offset; 544 } 545 546 si_emit_set_predication_state(cmd_buffer, true, PREDICATION_OP_BOOL64, va); 547} 548 549static void 550radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 551 const VkImageSubresourceRange *range, int level, int layer, 552 bool flush_cb) 553{ 554 struct radv_device *device = cmd_buffer->device; 555 struct radv_image_view iview; 556 uint32_t width, height; 557 558 width = radv_minify(image->info.width, range->baseMipLevel + level); 559 height = radv_minify(image->info.height, range->baseMipLevel + level); 560 561 radv_image_view_init(&iview, device, 562 &(VkImageViewCreateInfo){ 563 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 564 .image = radv_image_to_handle(image), 565 .viewType = radv_meta_get_view_type(image), 566 .format = image->vk_format, 567 .subresourceRange = 568 { 569 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 570 .baseMipLevel = range->baseMipLevel + level, 571 .levelCount = 1, 572 .baseArrayLayer = range->baseArrayLayer + layer, 573 .layerCount = 1, 574 }, 575 }, 576 NULL); 577 578 VkFramebuffer fb_h; 579 radv_CreateFramebuffer( 580 radv_device_to_handle(device), 581 &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, 582 .attachmentCount = 1, 583 .pAttachments = (VkImageView[]){radv_image_view_to_handle(&iview)}, 584 .width = width, 585 .height = height, 586 .layers = 1}, 587 &cmd_buffer->pool->alloc, &fb_h); 588 589 radv_cmd_buffer_begin_render_pass(cmd_buffer, 590 &(VkRenderPassBeginInfo){ 591 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 592 .renderPass = device->meta_state.fast_clear_flush.pass, 593 .framebuffer = fb_h, 594 .renderArea = {.offset = 595 { 596 0, 597 0, 598 }, 599 .extent = 600 { 601 width, 602 height, 603 }}, 604 .clearValueCount = 0, 605 .pClearValues = NULL, 606 }, 607 NULL); 608 609 radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]); 610 611 if (flush_cb) 612 cmd_buffer->state.flush_bits |= 613 radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image); 614 615 radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); 616 617 if (flush_cb) 618 cmd_buffer->state.flush_bits |= 619 radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image); 620 621 radv_cmd_buffer_end_render_pass(cmd_buffer); 622 623 radv_image_view_finish(&iview); 624 radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc); 625} 626 627static void 628radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 629 const VkImageSubresourceRange *subresourceRange, enum radv_color_op op) 630{ 631 struct radv_device *device = cmd_buffer->device; 632 struct radv_meta_saved_state saved_state; 633 bool old_predicating = false; 634 bool flush_cb = false; 635 uint64_t pred_offset; 636 VkPipeline *pipeline; 637 638 switch (op) { 639 case FAST_CLEAR_ELIMINATE: 640 pipeline = &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline; 641 pred_offset = image->fce_pred_offset; 642 break; 643 case FMASK_DECOMPRESS: 644 pipeline = &device->meta_state.fast_clear_flush.fmask_decompress_pipeline; 645 pred_offset = 0; /* FMASK_DECOMPRESS is never predicated. */ 646 647 /* Flushing CB is required before and after FMASK_DECOMPRESS. */ 648 flush_cb = true; 649 break; 650 case DCC_DECOMPRESS: 651 pipeline = &device->meta_state.fast_clear_flush.dcc_decompress_pipeline; 652 pred_offset = image->dcc_pred_offset; 653 654 /* Flushing CB is required before and after DCC_DECOMPRESS. */ 655 flush_cb = true; 656 break; 657 default: 658 unreachable("Invalid color op"); 659 } 660 661 if (radv_dcc_enabled(image, subresourceRange->baseMipLevel) && 662 (image->info.array_size != radv_get_layerCount(image, subresourceRange) || 663 subresourceRange->baseArrayLayer != 0)) { 664 /* Only use predication if the image has DCC with mipmaps or 665 * if the range of layers covers the whole image because the 666 * predication is based on mip level. 667 */ 668 pred_offset = 0; 669 } 670 671 if (!*pipeline) { 672 VkResult ret; 673 674 ret = radv_device_init_meta_fast_clear_flush_state_internal(device); 675 if (ret != VK_SUCCESS) { 676 cmd_buffer->record_result = ret; 677 return; 678 } 679 } 680 681 radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_PASS); 682 683 if (pred_offset) { 684 pred_offset += 8 * subresourceRange->baseMipLevel; 685 686 old_predicating = cmd_buffer->state.predicating; 687 688 radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true); 689 cmd_buffer->state.predicating = true; 690 } 691 692 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, 693 *pipeline); 694 695 for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) { 696 uint32_t width, height; 697 698 /* Do not decompress levels without DCC. */ 699 if (op == DCC_DECOMPRESS && !radv_dcc_enabled(image, subresourceRange->baseMipLevel + l)) 700 continue; 701 702 width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l); 703 height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l); 704 705 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, 706 &(VkViewport){.x = 0, 707 .y = 0, 708 .width = width, 709 .height = height, 710 .minDepth = 0.0f, 711 .maxDepth = 1.0f}); 712 713 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, 714 &(VkRect2D){ 715 .offset = {0, 0}, 716 .extent = {width, height}, 717 }); 718 719 for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) { 720 radv_process_color_image_layer(cmd_buffer, image, subresourceRange, l, s, flush_cb); 721 } 722 } 723 724 cmd_buffer->state.flush_bits |= 725 RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 726 727 if (pred_offset) { 728 pred_offset += 8 * subresourceRange->baseMipLevel; 729 730 cmd_buffer->state.predicating = old_predicating; 731 732 radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false); 733 734 if (cmd_buffer->state.predication_type != -1) { 735 /* Restore previous conditional rendering user state. */ 736 si_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type, 737 cmd_buffer->state.predication_op, 738 cmd_buffer->state.predication_va); 739 } 740 } 741 742 radv_meta_restore(&saved_state, cmd_buffer); 743 744 /* Clear the image's fast-clear eliminate predicate because FMASK_DECOMPRESS and DCC_DECOMPRESS 745 * also perform a fast-clear eliminate. 746 */ 747 radv_update_fce_metadata(cmd_buffer, image, subresourceRange, false); 748 749 /* Mark the image as being decompressed. */ 750 if (op == DCC_DECOMPRESS) 751 radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false); 752} 753 754static void 755radv_fast_clear_eliminate(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 756 const VkImageSubresourceRange *subresourceRange) 757{ 758 struct radv_barrier_data barrier = {0}; 759 760 barrier.layout_transitions.fast_clear_eliminate = 1; 761 radv_describe_layout_transition(cmd_buffer, &barrier); 762 763 radv_process_color_image(cmd_buffer, image, subresourceRange, FAST_CLEAR_ELIMINATE); 764} 765 766static void 767radv_fmask_decompress(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 768 const VkImageSubresourceRange *subresourceRange) 769{ 770 struct radv_barrier_data barrier = {0}; 771 772 barrier.layout_transitions.fmask_decompress = 1; 773 radv_describe_layout_transition(cmd_buffer, &barrier); 774 775 radv_process_color_image(cmd_buffer, image, subresourceRange, FMASK_DECOMPRESS); 776} 777 778void 779radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 780 const VkImageSubresourceRange *subresourceRange) 781{ 782 if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) { 783 if (radv_image_has_dcc(image) && radv_image_has_cmask(image)) { 784 /* MSAA images with DCC and CMASK might have been fast-cleared and might require a FCE but 785 * FMASK_DECOMPRESS can't eliminate DCC fast clears. 786 */ 787 radv_fast_clear_eliminate(cmd_buffer, image, subresourceRange); 788 } 789 790 radv_fmask_decompress(cmd_buffer, image, subresourceRange); 791 } else { 792 /* Skip fast clear eliminate for images that support comp-to-single fast clears. */ 793 if (image->support_comp_to_single) 794 return; 795 796 radv_fast_clear_eliminate(cmd_buffer, image, subresourceRange); 797 } 798} 799 800static void 801radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 802 const VkImageSubresourceRange *subresourceRange) 803{ 804 struct radv_meta_saved_state saved_state; 805 struct radv_image_view load_iview = {0}; 806 struct radv_image_view store_iview = {0}; 807 struct radv_device *device = cmd_buffer->device; 808 809 cmd_buffer->state.flush_bits |= 810 radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image); 811 812 if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) { 813 VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device); 814 if (ret != VK_SUCCESS) { 815 cmd_buffer->record_result = ret; 816 return; 817 } 818 } 819 820 radv_meta_save(&saved_state, cmd_buffer, 821 RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE); 822 823 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 824 device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline); 825 826 for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) { 827 uint32_t width, height; 828 829 /* Do not decompress levels without DCC. */ 830 if (!radv_dcc_enabled(image, subresourceRange->baseMipLevel + l)) 831 continue; 832 833 width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l); 834 height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l); 835 836 for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) { 837 radv_image_view_init( 838 &load_iview, cmd_buffer->device, 839 &(VkImageViewCreateInfo){ 840 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 841 .image = radv_image_to_handle(image), 842 .viewType = VK_IMAGE_VIEW_TYPE_2D, 843 .format = image->vk_format, 844 .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 845 .baseMipLevel = subresourceRange->baseMipLevel + l, 846 .levelCount = 1, 847 .baseArrayLayer = subresourceRange->baseArrayLayer + s, 848 .layerCount = 1}, 849 }, 850 &(struct radv_image_view_extra_create_info){.enable_compression = true}); 851 radv_image_view_init( 852 &store_iview, cmd_buffer->device, 853 &(VkImageViewCreateInfo){ 854 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 855 .image = radv_image_to_handle(image), 856 .viewType = VK_IMAGE_VIEW_TYPE_2D, 857 .format = image->vk_format, 858 .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 859 .baseMipLevel = subresourceRange->baseMipLevel + l, 860 .levelCount = 1, 861 .baseArrayLayer = subresourceRange->baseArrayLayer + s, 862 .layerCount = 1}, 863 }, 864 &(struct radv_image_view_extra_create_info){.disable_compression = true}); 865 866 radv_meta_push_descriptor_set( 867 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, 868 device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, 0, /* set */ 869 2, /* descriptorWriteCount */ 870 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 871 .dstBinding = 0, 872 .dstArrayElement = 0, 873 .descriptorCount = 1, 874 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 875 .pImageInfo = 876 (VkDescriptorImageInfo[]){ 877 { 878 .sampler = VK_NULL_HANDLE, 879 .imageView = radv_image_view_to_handle(&load_iview), 880 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 881 }, 882 }}, 883 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 884 .dstBinding = 1, 885 .dstArrayElement = 0, 886 .descriptorCount = 1, 887 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 888 .pImageInfo = (VkDescriptorImageInfo[]){ 889 { 890 .sampler = VK_NULL_HANDLE, 891 .imageView = radv_image_view_to_handle(&store_iview), 892 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 893 }, 894 }}}); 895 896 radv_unaligned_dispatch(cmd_buffer, width, height, 1); 897 898 radv_image_view_finish(&load_iview); 899 radv_image_view_finish(&store_iview); 900 } 901 } 902 903 /* Mark this image as actually being decompressed. */ 904 radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false); 905 906 radv_meta_restore(&saved_state, cmd_buffer); 907 908 cmd_buffer->state.flush_bits |= 909 RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | 910 radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image); 911 912 /* Initialize the DCC metadata as "fully expanded". */ 913 cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff); 914} 915 916void 917radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 918 const VkImageSubresourceRange *subresourceRange) 919{ 920 struct radv_barrier_data barrier = {0}; 921 922 barrier.layout_transitions.dcc_decompress = 1; 923 radv_describe_layout_transition(cmd_buffer, &barrier); 924 925 if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) 926 radv_process_color_image(cmd_buffer, image, subresourceRange, DCC_DECOMPRESS); 927 else 928 radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange); 929} 930