radv_meta_decompress.c revision 7ec681f3
1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdbool.h> 26 27#include "radv_meta.h" 28#include "radv_private.h" 29#include "sid.h" 30 31enum radv_depth_op { 32 DEPTH_DECOMPRESS, 33 DEPTH_RESUMMARIZE, 34}; 35 36static nir_shader * 37build_expand_depth_stencil_compute_shader(struct radv_device *dev) 38{ 39 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT); 40 41 nir_builder b = 42 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "expand_depth_stencil_compute"); 43 44 /* We need at least 8/8/1 to cover an entire HTILE block in a single workgroup. */ 45 b.shader->info.workgroup_size[0] = 8; 46 b.shader->info.workgroup_size[1] = 8; 47 b.shader->info.workgroup_size[2] = 1; 48 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "in_img"); 49 input_img->data.descriptor_set = 0; 50 input_img->data.binding = 0; 51 52 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 53 output_img->data.descriptor_set = 0; 54 output_img->data.binding = 1; 55 56 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); 57 nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32); 58 nir_ssa_def *block_size = 59 nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1], 60 b.shader->info.workgroup_size[2], 0); 61 62 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); 63 64 nir_ssa_def *data = nir_image_deref_load( 65 &b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id, nir_ssa_undef(&b, 1, 32), 66 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D); 67 68 /* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid 69 * creating a vmcnt(0) because it expects the L1 cache to keep memory 70 * operations in-order for the same workgroup. The vmcnt(0) seems 71 * necessary however. */ 72 nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE, 73 .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo); 74 75 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id, 76 nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0), 77 .image_dim = GLSL_SAMPLER_DIM_2D); 78 return b.shader; 79} 80 81static VkResult 82create_expand_depth_stencil_compute(struct radv_device *device) 83{ 84 VkResult result = VK_SUCCESS; 85 nir_shader *cs = build_expand_depth_stencil_compute_shader(device); 86 87 VkDescriptorSetLayoutCreateInfo ds_create_info = { 88 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 89 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 90 .bindingCount = 2, 91 .pBindings = (VkDescriptorSetLayoutBinding[]){ 92 {.binding = 0, 93 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 94 .descriptorCount = 1, 95 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 96 .pImmutableSamplers = NULL}, 97 {.binding = 1, 98 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 99 .descriptorCount = 1, 100 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 101 .pImmutableSamplers = NULL}, 102 }}; 103 104 result = radv_CreateDescriptorSetLayout( 105 radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc, 106 &device->meta_state.expand_depth_stencil_compute_ds_layout); 107 if (result != VK_SUCCESS) 108 goto cleanup; 109 110 VkPipelineLayoutCreateInfo pl_create_info = { 111 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 112 .setLayoutCount = 1, 113 .pSetLayouts = &device->meta_state.expand_depth_stencil_compute_ds_layout, 114 .pushConstantRangeCount = 0, 115 .pPushConstantRanges = NULL, 116 }; 117 118 result = radv_CreatePipelineLayout( 119 radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, 120 &device->meta_state.expand_depth_stencil_compute_p_layout); 121 if (result != VK_SUCCESS) 122 goto cleanup; 123 124 /* compute shader */ 125 126 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 127 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 128 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 129 .module = vk_shader_module_handle_from_nir(cs), 130 .pName = "main", 131 .pSpecializationInfo = NULL, 132 }; 133 134 VkComputePipelineCreateInfo vk_pipeline_info = { 135 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 136 .stage = pipeline_shader_stage, 137 .flags = 0, 138 .layout = device->meta_state.expand_depth_stencil_compute_p_layout, 139 }; 140 141 result = radv_CreateComputePipelines( 142 radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 143 &vk_pipeline_info, NULL, 144 &device->meta_state.expand_depth_stencil_compute_pipeline); 145 if (result != VK_SUCCESS) 146 goto cleanup; 147 148cleanup: 149 ralloc_free(cs); 150 return result; 151} 152 153static VkResult 154create_pass(struct radv_device *device, uint32_t samples, VkRenderPass *pass) 155{ 156 VkResult result; 157 VkDevice device_h = radv_device_to_handle(device); 158 const VkAllocationCallbacks *alloc = &device->meta_state.alloc; 159 VkAttachmentDescription2 attachment; 160 161 attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; 162 attachment.pNext = NULL; 163 attachment.flags = 0; 164 attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT; 165 attachment.samples = samples; 166 attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 167 attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; 168 attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 169 attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; 170 attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; 171 attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; 172 173 result = radv_CreateRenderPass2( 174 device_h, 175 &(VkRenderPassCreateInfo2){ 176 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2, 177 .attachmentCount = 1, 178 .pAttachments = &attachment, 179 .subpassCount = 1, 180 .pSubpasses = 181 &(VkSubpassDescription2){ 182 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2, 183 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, 184 .inputAttachmentCount = 0, 185 .colorAttachmentCount = 0, 186 .pColorAttachments = NULL, 187 .pResolveAttachments = NULL, 188 .pDepthStencilAttachment = 189 &(VkAttachmentReference2){ 190 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 191 .attachment = 0, 192 .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 193 }, 194 .preserveAttachmentCount = 0, 195 .pPreserveAttachments = NULL, 196 }, 197 .dependencyCount = 2, 198 .pDependencies = 199 (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, 200 .srcSubpass = VK_SUBPASS_EXTERNAL, 201 .dstSubpass = 0, 202 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 203 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 204 .srcAccessMask = 0, 205 .dstAccessMask = 0, 206 .dependencyFlags = 0}, 207 {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, 208 .srcSubpass = 0, 209 .dstSubpass = VK_SUBPASS_EXTERNAL, 210 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 211 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 212 .srcAccessMask = 0, 213 .dstAccessMask = 0, 214 .dependencyFlags = 0}}, 215 }, 216 alloc, pass); 217 218 return result; 219} 220 221static VkResult 222create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout) 223{ 224 VkPipelineLayoutCreateInfo pl_create_info = { 225 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 226 .setLayoutCount = 0, 227 .pSetLayouts = NULL, 228 .pushConstantRangeCount = 0, 229 .pPushConstantRanges = NULL, 230 }; 231 232 return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 233 &device->meta_state.alloc, layout); 234} 235 236static VkResult 237create_pipeline(struct radv_device *device, uint32_t samples, VkRenderPass pass, 238 VkPipelineLayout layout, enum radv_depth_op op, VkPipeline *pipeline) 239{ 240 VkResult result; 241 VkDevice device_h = radv_device_to_handle(device); 242 243 mtx_lock(&device->meta_state.mtx); 244 if (*pipeline) { 245 mtx_unlock(&device->meta_state.mtx); 246 return VK_SUCCESS; 247 } 248 249 nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices(); 250 nir_shader *fs_module = radv_meta_build_nir_fs_noop(); 251 252 if (!vs_module || !fs_module) { 253 /* XXX: Need more accurate error */ 254 result = VK_ERROR_OUT_OF_HOST_MEMORY; 255 goto cleanup; 256 } 257 258 const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = { 259 .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT, 260 .sampleLocationsEnable = false, 261 }; 262 263 const VkGraphicsPipelineCreateInfo pipeline_create_info = { 264 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 265 .stageCount = 2, 266 .pStages = 267 (VkPipelineShaderStageCreateInfo[]){ 268 { 269 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 270 .stage = VK_SHADER_STAGE_VERTEX_BIT, 271 .module = vk_shader_module_handle_from_nir(vs_module), 272 .pName = "main", 273 }, 274 { 275 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 276 .stage = VK_SHADER_STAGE_FRAGMENT_BIT, 277 .module = vk_shader_module_handle_from_nir(fs_module), 278 .pName = "main", 279 }, 280 }, 281 .pVertexInputState = 282 &(VkPipelineVertexInputStateCreateInfo){ 283 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, 284 .vertexBindingDescriptionCount = 0, 285 .vertexAttributeDescriptionCount = 0, 286 }, 287 .pInputAssemblyState = 288 &(VkPipelineInputAssemblyStateCreateInfo){ 289 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, 290 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 291 .primitiveRestartEnable = false, 292 }, 293 .pViewportState = 294 &(VkPipelineViewportStateCreateInfo){ 295 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, 296 .viewportCount = 1, 297 .scissorCount = 1, 298 }, 299 .pRasterizationState = 300 &(VkPipelineRasterizationStateCreateInfo){ 301 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, 302 .depthClampEnable = false, 303 .rasterizerDiscardEnable = false, 304 .polygonMode = VK_POLYGON_MODE_FILL, 305 .cullMode = VK_CULL_MODE_NONE, 306 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, 307 }, 308 .pMultisampleState = 309 &(VkPipelineMultisampleStateCreateInfo){ 310 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 311 .pNext = &sample_locs_create_info, 312 .rasterizationSamples = samples, 313 .sampleShadingEnable = false, 314 .pSampleMask = NULL, 315 .alphaToCoverageEnable = false, 316 .alphaToOneEnable = false, 317 }, 318 .pColorBlendState = 319 &(VkPipelineColorBlendStateCreateInfo){ 320 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, 321 .logicOpEnable = false, 322 .attachmentCount = 0, 323 .pAttachments = NULL, 324 }, 325 .pDepthStencilState = 326 &(VkPipelineDepthStencilStateCreateInfo){ 327 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, 328 .depthTestEnable = false, 329 .depthWriteEnable = false, 330 .depthBoundsTestEnable = false, 331 .stencilTestEnable = false, 332 }, 333 .pDynamicState = 334 &(VkPipelineDynamicStateCreateInfo){ 335 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, 336 .dynamicStateCount = 3, 337 .pDynamicStates = 338 (VkDynamicState[]){ 339 VK_DYNAMIC_STATE_VIEWPORT, 340 VK_DYNAMIC_STATE_SCISSOR, 341 VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT, 342 }, 343 }, 344 .layout = layout, 345 .renderPass = pass, 346 .subpass = 0, 347 }; 348 349 struct radv_graphics_pipeline_create_info extra = { 350 .use_rectlist = true, 351 .depth_compress_disable = true, 352 .stencil_compress_disable = true, 353 .resummarize_enable = op == DEPTH_RESUMMARIZE, 354 }; 355 356 result = radv_graphics_pipeline_create( 357 device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache), &pipeline_create_info, 358 &extra, &device->meta_state.alloc, pipeline); 359 360cleanup: 361 ralloc_free(fs_module); 362 ralloc_free(vs_module); 363 mtx_unlock(&device->meta_state.mtx); 364 return result; 365} 366 367void 368radv_device_finish_meta_depth_decomp_state(struct radv_device *device) 369{ 370 struct radv_meta_state *state = &device->meta_state; 371 372 for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) { 373 radv_DestroyRenderPass(radv_device_to_handle(device), state->depth_decomp[i].pass, 374 &state->alloc); 375 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp[i].p_layout, 376 &state->alloc); 377 378 radv_DestroyPipeline(radv_device_to_handle(device), 379 state->depth_decomp[i].decompress_pipeline, &state->alloc); 380 radv_DestroyPipeline(radv_device_to_handle(device), 381 state->depth_decomp[i].resummarize_pipeline, &state->alloc); 382 } 383 384 radv_DestroyPipeline(radv_device_to_handle(device), 385 state->expand_depth_stencil_compute_pipeline, &state->alloc); 386 radv_DestroyPipelineLayout(radv_device_to_handle(device), 387 state->expand_depth_stencil_compute_p_layout, &state->alloc); 388 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), 389 state->expand_depth_stencil_compute_ds_layout, &state->alloc); 390} 391 392VkResult 393radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand) 394{ 395 struct radv_meta_state *state = &device->meta_state; 396 VkResult res = VK_SUCCESS; 397 398 for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) { 399 uint32_t samples = 1 << i; 400 401 res = create_pass(device, samples, &state->depth_decomp[i].pass); 402 if (res != VK_SUCCESS) 403 goto fail; 404 405 res = create_pipeline_layout(device, &state->depth_decomp[i].p_layout); 406 if (res != VK_SUCCESS) 407 goto fail; 408 409 if (on_demand) 410 continue; 411 412 res = create_pipeline(device, samples, state->depth_decomp[i].pass, 413 state->depth_decomp[i].p_layout, DEPTH_DECOMPRESS, 414 &state->depth_decomp[i].decompress_pipeline); 415 if (res != VK_SUCCESS) 416 goto fail; 417 418 res = create_pipeline(device, samples, state->depth_decomp[i].pass, 419 state->depth_decomp[i].p_layout, DEPTH_RESUMMARIZE, 420 &state->depth_decomp[i].resummarize_pipeline); 421 if (res != VK_SUCCESS) 422 goto fail; 423 } 424 425 res = create_expand_depth_stencil_compute(device); 426 if (res != VK_SUCCESS) 427 goto fail; 428 429 return VK_SUCCESS; 430 431fail: 432 radv_device_finish_meta_depth_decomp_state(device); 433 return res; 434} 435 436static VkPipeline * 437radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 438 const VkImageSubresourceRange *subresourceRange, enum radv_depth_op op) 439{ 440 struct radv_meta_state *state = &cmd_buffer->device->meta_state; 441 uint32_t samples = image->info.samples; 442 uint32_t samples_log2 = ffs(samples) - 1; 443 VkPipeline *pipeline; 444 445 if (!state->depth_decomp[samples_log2].decompress_pipeline) { 446 VkResult ret; 447 448 ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].pass, 449 state->depth_decomp[samples_log2].p_layout, DEPTH_DECOMPRESS, 450 &state->depth_decomp[samples_log2].decompress_pipeline); 451 if (ret != VK_SUCCESS) { 452 cmd_buffer->record_result = ret; 453 return NULL; 454 } 455 456 ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].pass, 457 state->depth_decomp[samples_log2].p_layout, DEPTH_RESUMMARIZE, 458 &state->depth_decomp[samples_log2].resummarize_pipeline); 459 if (ret != VK_SUCCESS) { 460 cmd_buffer->record_result = ret; 461 return NULL; 462 } 463 } 464 465 switch (op) { 466 case DEPTH_DECOMPRESS: 467 pipeline = &state->depth_decomp[samples_log2].decompress_pipeline; 468 break; 469 case DEPTH_RESUMMARIZE: 470 pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline; 471 break; 472 default: 473 unreachable("unknown operation"); 474 } 475 476 return pipeline; 477} 478 479static void 480radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 481 const VkImageSubresourceRange *range, int level, int layer) 482{ 483 struct radv_device *device = cmd_buffer->device; 484 struct radv_meta_state *state = &device->meta_state; 485 uint32_t samples_log2 = ffs(image->info.samples) - 1; 486 struct radv_image_view iview; 487 uint32_t width, height; 488 489 width = radv_minify(image->info.width, range->baseMipLevel + level); 490 height = radv_minify(image->info.height, range->baseMipLevel + level); 491 492 radv_image_view_init(&iview, device, 493 &(VkImageViewCreateInfo){ 494 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 495 .image = radv_image_to_handle(image), 496 .viewType = radv_meta_get_view_type(image), 497 .format = image->vk_format, 498 .subresourceRange = 499 { 500 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT, 501 .baseMipLevel = range->baseMipLevel + level, 502 .levelCount = 1, 503 .baseArrayLayer = range->baseArrayLayer + layer, 504 .layerCount = 1, 505 }, 506 }, 507 NULL); 508 509 VkFramebuffer fb_h; 510 radv_CreateFramebuffer( 511 radv_device_to_handle(device), 512 &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, 513 .attachmentCount = 1, 514 .pAttachments = (VkImageView[]){radv_image_view_to_handle(&iview)}, 515 .width = width, 516 .height = height, 517 .layers = 1}, 518 &cmd_buffer->pool->alloc, &fb_h); 519 520 radv_cmd_buffer_begin_render_pass(cmd_buffer, 521 &(VkRenderPassBeginInfo){ 522 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 523 .renderPass = state->depth_decomp[samples_log2].pass, 524 .framebuffer = fb_h, 525 .renderArea = {.offset = 526 { 527 0, 528 0, 529 }, 530 .extent = 531 { 532 width, 533 height, 534 }}, 535 .clearValueCount = 0, 536 .pClearValues = NULL, 537 }, 538 NULL); 539 radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]); 540 541 radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); 542 radv_cmd_buffer_end_render_pass(cmd_buffer); 543 544 radv_image_view_finish(&iview); 545 radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc); 546} 547 548static void 549radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 550 const VkImageSubresourceRange *subresourceRange, 551 struct radv_sample_locations_state *sample_locs, enum radv_depth_op op) 552{ 553 struct radv_meta_saved_state saved_state; 554 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); 555 VkPipeline *pipeline; 556 557 radv_meta_save( 558 &saved_state, cmd_buffer, 559 RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_SAMPLE_LOCATIONS | RADV_META_SAVE_PASS); 560 561 pipeline = radv_get_depth_pipeline(cmd_buffer, image, subresourceRange, op); 562 563 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, 564 *pipeline); 565 566 if (sample_locs) { 567 assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT); 568 569 /* Set the sample locations specified during explicit or 570 * automatic layout transitions, otherwise the depth decompress 571 * pass uses the default HW locations. 572 */ 573 radv_CmdSetSampleLocationsEXT(cmd_buffer_h, 574 &(VkSampleLocationsInfoEXT){ 575 .sampleLocationsPerPixel = sample_locs->per_pixel, 576 .sampleLocationGridSize = sample_locs->grid_size, 577 .sampleLocationsCount = sample_locs->count, 578 .pSampleLocations = sample_locs->locations, 579 }); 580 } 581 582 for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) { 583 584 /* Do not decompress levels without HTILE. */ 585 if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l)) 586 continue; 587 588 uint32_t width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l); 589 uint32_t height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l); 590 591 radv_CmdSetViewport(cmd_buffer_h, 0, 1, 592 &(VkViewport){.x = 0, 593 .y = 0, 594 .width = width, 595 .height = height, 596 .minDepth = 0.0f, 597 .maxDepth = 1.0f}); 598 599 radv_CmdSetScissor(cmd_buffer_h, 0, 1, 600 &(VkRect2D){ 601 .offset = {0, 0}, 602 .extent = {width, height}, 603 }); 604 605 for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) { 606 radv_process_depth_image_layer(cmd_buffer, image, subresourceRange, l, s); 607 } 608 } 609 610 radv_meta_restore(&saved_state, cmd_buffer); 611} 612 613static void 614radv_expand_depth_stencil_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 615 const VkImageSubresourceRange *subresourceRange) 616{ 617 struct radv_meta_saved_state saved_state; 618 struct radv_image_view load_iview = {0}; 619 struct radv_image_view store_iview = {0}; 620 struct radv_device *device = cmd_buffer->device; 621 622 assert(radv_image_is_tc_compat_htile(image)); 623 624 cmd_buffer->state.flush_bits |= 625 radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image); 626 627 radv_meta_save(&saved_state, cmd_buffer, 628 RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE); 629 630 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 631 device->meta_state.expand_depth_stencil_compute_pipeline); 632 633 for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) { 634 uint32_t width, height; 635 636 /* Do not decompress levels without HTILE. */ 637 if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l)) 638 continue; 639 640 width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l); 641 height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l); 642 643 for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) { 644 radv_image_view_init( 645 &load_iview, cmd_buffer->device, 646 &(VkImageViewCreateInfo){ 647 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 648 .image = radv_image_to_handle(image), 649 .viewType = VK_IMAGE_VIEW_TYPE_2D, 650 .format = image->vk_format, 651 .subresourceRange = {.aspectMask = subresourceRange->aspectMask, 652 .baseMipLevel = subresourceRange->baseMipLevel + l, 653 .levelCount = 1, 654 .baseArrayLayer = subresourceRange->baseArrayLayer + s, 655 .layerCount = 1}, 656 }, 657 &(struct radv_image_view_extra_create_info){.enable_compression = true}); 658 radv_image_view_init( 659 &store_iview, cmd_buffer->device, 660 &(VkImageViewCreateInfo){ 661 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 662 .image = radv_image_to_handle(image), 663 .viewType = VK_IMAGE_VIEW_TYPE_2D, 664 .format = image->vk_format, 665 .subresourceRange = {.aspectMask = subresourceRange->aspectMask, 666 .baseMipLevel = subresourceRange->baseMipLevel + l, 667 .levelCount = 1, 668 .baseArrayLayer = subresourceRange->baseArrayLayer + s, 669 .layerCount = 1}, 670 }, 671 &(struct radv_image_view_extra_create_info){.disable_compression = true}); 672 673 radv_meta_push_descriptor_set( 674 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, 675 device->meta_state.expand_depth_stencil_compute_p_layout, 0, /* set */ 676 2, /* descriptorWriteCount */ 677 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 678 .dstBinding = 0, 679 .dstArrayElement = 0, 680 .descriptorCount = 1, 681 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 682 .pImageInfo = 683 (VkDescriptorImageInfo[]){ 684 { 685 .sampler = VK_NULL_HANDLE, 686 .imageView = radv_image_view_to_handle(&load_iview), 687 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 688 }, 689 }}, 690 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 691 .dstBinding = 1, 692 .dstArrayElement = 0, 693 .descriptorCount = 1, 694 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 695 .pImageInfo = (VkDescriptorImageInfo[]){ 696 { 697 .sampler = VK_NULL_HANDLE, 698 .imageView = radv_image_view_to_handle(&store_iview), 699 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 700 }, 701 }}}); 702 703 radv_unaligned_dispatch(cmd_buffer, width, height, 1); 704 705 radv_image_view_finish(&load_iview); 706 radv_image_view_finish(&store_iview); 707 } 708 } 709 710 radv_meta_restore(&saved_state, cmd_buffer); 711 712 cmd_buffer->state.flush_bits |= 713 RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | 714 radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image); 715 716 /* Initialize the HTILE metadata as "fully expanded". */ 717 uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image); 718 719 cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, image, subresourceRange, htile_value); 720} 721 722void 723radv_expand_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 724 const VkImageSubresourceRange *subresourceRange, 725 struct radv_sample_locations_state *sample_locs) 726{ 727 struct radv_barrier_data barrier = {0}; 728 729 barrier.layout_transitions.depth_stencil_expand = 1; 730 radv_describe_layout_transition(cmd_buffer, &barrier); 731 732 if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) { 733 radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_DECOMPRESS); 734 } else { 735 radv_expand_depth_stencil_compute(cmd_buffer, image, subresourceRange); 736 } 737} 738 739void 740radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 741 const VkImageSubresourceRange *subresourceRange, 742 struct radv_sample_locations_state *sample_locs) 743{ 744 struct radv_barrier_data barrier = {0}; 745 746 barrier.layout_transitions.depth_stencil_resummarize = 1; 747 radv_describe_layout_transition(cmd_buffer, &barrier); 748 749 assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL); 750 radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_RESUMMARIZE); 751} 752