radv_meta_clear.c revision 7ec681f3
1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir/nir_builder.h" 25#include "radv_debug.h" 26#include "radv_meta.h" 27#include "radv_private.h" 28 29#include "util/format_rgb9e5.h" 30#include "vk_format.h" 31 32enum { DEPTH_CLEAR_SLOW, DEPTH_CLEAR_FAST }; 33 34static void 35build_color_shaders(struct nir_shader **out_vs, struct nir_shader **out_fs, uint32_t frag_output) 36{ 37 nir_builder vs_b = 38 nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_clear_color_vs"); 39 nir_builder fs_b = 40 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_clear_color_fs"); 41 42 const struct glsl_type *position_type = glsl_vec4_type(); 43 const struct glsl_type *color_type = glsl_vec4_type(); 44 45 nir_variable *vs_out_pos = 46 nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position"); 47 vs_out_pos->data.location = VARYING_SLOT_POS; 48 49 nir_ssa_def *in_color_load = 50 nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16); 51 52 nir_variable *fs_out_color = 53 nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color"); 54 fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output; 55 56 nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf); 57 58 nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&vs_b); 59 nir_store_var(&vs_b, vs_out_pos, outvec, 0xf); 60 61 const struct glsl_type *layer_type = glsl_int_type(); 62 nir_variable *vs_out_layer = 63 nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer"); 64 vs_out_layer->data.location = VARYING_SLOT_LAYER; 65 vs_out_layer->data.interpolation = INTERP_MODE_FLAT; 66 nir_ssa_def *inst_id = nir_load_instance_id(&vs_b); 67 nir_ssa_def *base_instance = nir_load_base_instance(&vs_b); 68 69 nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance); 70 nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1); 71 72 *out_vs = vs_b.shader; 73 *out_fs = fs_b.shader; 74} 75 76static VkResult 77create_pipeline(struct radv_device *device, struct radv_render_pass *render_pass, uint32_t samples, 78 struct nir_shader *vs_nir, struct nir_shader *fs_nir, 79 const VkPipelineVertexInputStateCreateInfo *vi_state, 80 const VkPipelineDepthStencilStateCreateInfo *ds_state, 81 const VkPipelineColorBlendStateCreateInfo *cb_state, const VkPipelineLayout layout, 82 const struct radv_graphics_pipeline_create_info *extra, 83 const VkAllocationCallbacks *alloc, VkPipeline *pipeline) 84{ 85 VkDevice device_h = radv_device_to_handle(device); 86 VkResult result; 87 88 result = radv_graphics_pipeline_create( 89 device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache), 90 &(VkGraphicsPipelineCreateInfo){ 91 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 92 .stageCount = fs_nir ? 2 : 1, 93 .pStages = 94 (VkPipelineShaderStageCreateInfo[]){ 95 { 96 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 97 .stage = VK_SHADER_STAGE_VERTEX_BIT, 98 .module = vk_shader_module_handle_from_nir(vs_nir), 99 .pName = "main", 100 }, 101 { 102 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 103 .stage = VK_SHADER_STAGE_FRAGMENT_BIT, 104 .module = vk_shader_module_handle_from_nir(fs_nir), 105 .pName = "main", 106 }, 107 }, 108 .pVertexInputState = vi_state, 109 .pInputAssemblyState = 110 &(VkPipelineInputAssemblyStateCreateInfo){ 111 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, 112 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 113 .primitiveRestartEnable = false, 114 }, 115 .pViewportState = 116 &(VkPipelineViewportStateCreateInfo){ 117 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, 118 .viewportCount = 1, 119 .scissorCount = 1, 120 }, 121 .pRasterizationState = 122 &(VkPipelineRasterizationStateCreateInfo){ 123 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, 124 .rasterizerDiscardEnable = false, 125 .polygonMode = VK_POLYGON_MODE_FILL, 126 .cullMode = VK_CULL_MODE_NONE, 127 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, 128 .depthBiasEnable = false, 129 }, 130 .pMultisampleState = 131 &(VkPipelineMultisampleStateCreateInfo){ 132 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 133 .rasterizationSamples = samples, 134 .sampleShadingEnable = false, 135 .pSampleMask = NULL, 136 .alphaToCoverageEnable = false, 137 .alphaToOneEnable = false, 138 }, 139 .pDepthStencilState = ds_state, 140 .pColorBlendState = cb_state, 141 .pDynamicState = 142 &(VkPipelineDynamicStateCreateInfo){ 143 /* The meta clear pipeline declares all state as dynamic. 144 * As a consequence, vkCmdBindPipeline writes no dynamic state 145 * to the cmd buffer. Therefore, at the end of the meta clear, 146 * we need only restore dynamic state was vkCmdSet. 147 */ 148 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, 149 .dynamicStateCount = 8, 150 .pDynamicStates = 151 (VkDynamicState[]){ 152 /* Everything except stencil write mask */ 153 VK_DYNAMIC_STATE_VIEWPORT, 154 VK_DYNAMIC_STATE_SCISSOR, 155 VK_DYNAMIC_STATE_LINE_WIDTH, 156 VK_DYNAMIC_STATE_DEPTH_BIAS, 157 VK_DYNAMIC_STATE_BLEND_CONSTANTS, 158 VK_DYNAMIC_STATE_DEPTH_BOUNDS, 159 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 160 VK_DYNAMIC_STATE_STENCIL_REFERENCE, 161 }, 162 }, 163 .layout = layout, 164 .flags = 0, 165 .renderPass = radv_render_pass_to_handle(render_pass), 166 .subpass = 0, 167 }, 168 extra, alloc, pipeline); 169 170 ralloc_free(vs_nir); 171 ralloc_free(fs_nir); 172 173 return result; 174} 175 176static VkResult 177create_color_renderpass(struct radv_device *device, VkFormat vk_format, uint32_t samples, 178 VkRenderPass *pass) 179{ 180 mtx_lock(&device->meta_state.mtx); 181 if (*pass) { 182 mtx_unlock(&device->meta_state.mtx); 183 return VK_SUCCESS; 184 } 185 186 VkResult result = radv_CreateRenderPass2( 187 radv_device_to_handle(device), 188 &(VkRenderPassCreateInfo2){ 189 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2, 190 .attachmentCount = 1, 191 .pAttachments = 192 &(VkAttachmentDescription2){ 193 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2, 194 .format = vk_format, 195 .samples = samples, 196 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, 197 .storeOp = VK_ATTACHMENT_STORE_OP_STORE, 198 .initialLayout = VK_IMAGE_LAYOUT_GENERAL, 199 .finalLayout = VK_IMAGE_LAYOUT_GENERAL, 200 }, 201 .subpassCount = 1, 202 .pSubpasses = 203 &(VkSubpassDescription2){ 204 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2, 205 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, 206 .inputAttachmentCount = 0, 207 .colorAttachmentCount = 1, 208 .pColorAttachments = 209 &(VkAttachmentReference2){ 210 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 211 .attachment = 0, 212 .layout = VK_IMAGE_LAYOUT_GENERAL, 213 }, 214 .pResolveAttachments = NULL, 215 .pDepthStencilAttachment = 216 &(VkAttachmentReference2){ 217 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 218 .attachment = VK_ATTACHMENT_UNUSED, 219 .layout = VK_IMAGE_LAYOUT_GENERAL, 220 }, 221 .preserveAttachmentCount = 0, 222 .pPreserveAttachments = NULL, 223 }, 224 .dependencyCount = 2, 225 .pDependencies = 226 (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, 227 .srcSubpass = VK_SUBPASS_EXTERNAL, 228 .dstSubpass = 0, 229 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 230 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 231 .srcAccessMask = 0, 232 .dstAccessMask = 0, 233 .dependencyFlags = 0}, 234 {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, 235 .srcSubpass = 0, 236 .dstSubpass = VK_SUBPASS_EXTERNAL, 237 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 238 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 239 .srcAccessMask = 0, 240 .dstAccessMask = 0, 241 .dependencyFlags = 0}}, 242 }, 243 &device->meta_state.alloc, pass); 244 mtx_unlock(&device->meta_state.mtx); 245 return result; 246} 247 248static VkResult 249create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output, 250 VkPipeline *pipeline, VkRenderPass pass) 251{ 252 struct nir_shader *vs_nir; 253 struct nir_shader *fs_nir; 254 VkResult result; 255 256 mtx_lock(&device->meta_state.mtx); 257 if (*pipeline) { 258 mtx_unlock(&device->meta_state.mtx); 259 return VK_SUCCESS; 260 } 261 262 build_color_shaders(&vs_nir, &fs_nir, frag_output); 263 264 const VkPipelineVertexInputStateCreateInfo vi_state = { 265 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, 266 .vertexBindingDescriptionCount = 0, 267 .vertexAttributeDescriptionCount = 0, 268 }; 269 270 const VkPipelineDepthStencilStateCreateInfo ds_state = { 271 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, 272 .depthTestEnable = false, 273 .depthWriteEnable = false, 274 .depthBoundsTestEnable = false, 275 .stencilTestEnable = false, 276 }; 277 278 VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = {0}; 279 blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState){ 280 .blendEnable = false, 281 .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | 282 VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT, 283 }; 284 285 const VkPipelineColorBlendStateCreateInfo cb_state = { 286 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, 287 .logicOpEnable = false, 288 .attachmentCount = MAX_RTS, 289 .pAttachments = blend_attachment_state}; 290 291 struct radv_graphics_pipeline_create_info extra = { 292 .use_rectlist = true, 293 }; 294 result = 295 create_pipeline(device, radv_render_pass_from_handle(pass), samples, vs_nir, fs_nir, 296 &vi_state, &ds_state, &cb_state, device->meta_state.clear_color_p_layout, 297 &extra, &device->meta_state.alloc, pipeline); 298 299 mtx_unlock(&device->meta_state.mtx); 300 return result; 301} 302 303static void 304finish_meta_clear_htile_mask_state(struct radv_device *device) 305{ 306 struct radv_meta_state *state = &device->meta_state; 307 308 radv_DestroyPipeline(radv_device_to_handle(device), state->clear_htile_mask_pipeline, 309 &state->alloc); 310 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_htile_mask_p_layout, 311 &state->alloc); 312 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->clear_htile_mask_ds_layout, 313 &state->alloc); 314} 315 316static void 317finish_meta_clear_dcc_comp_to_single_state(struct radv_device *device) 318{ 319 struct radv_meta_state *state = &device->meta_state; 320 321 for (uint32_t i = 0; i < 2; i++) { 322 radv_DestroyPipeline(radv_device_to_handle(device), 323 state->clear_dcc_comp_to_single_pipeline[i], &state->alloc); 324 } 325 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_dcc_comp_to_single_p_layout, 326 &state->alloc); 327 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->clear_dcc_comp_to_single_ds_layout, 328 &state->alloc); 329} 330 331void 332radv_device_finish_meta_clear_state(struct radv_device *device) 333{ 334 struct radv_meta_state *state = &device->meta_state; 335 336 for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { 337 for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { 338 radv_DestroyPipeline(radv_device_to_handle(device), state->clear[i].color_pipelines[j], 339 &state->alloc); 340 radv_DestroyRenderPass(radv_device_to_handle(device), state->clear[i].render_pass[j], 341 &state->alloc); 342 } 343 344 for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) { 345 radv_DestroyPipeline(radv_device_to_handle(device), state->clear[i].depth_only_pipeline[j], 346 &state->alloc); 347 radv_DestroyPipeline(radv_device_to_handle(device), 348 state->clear[i].stencil_only_pipeline[j], &state->alloc); 349 radv_DestroyPipeline(radv_device_to_handle(device), 350 state->clear[i].depthstencil_pipeline[j], &state->alloc); 351 352 radv_DestroyPipeline(radv_device_to_handle(device), 353 state->clear[i].depth_only_unrestricted_pipeline[j], &state->alloc); 354 radv_DestroyPipeline(radv_device_to_handle(device), 355 state->clear[i].stencil_only_unrestricted_pipeline[j], &state->alloc); 356 radv_DestroyPipeline(radv_device_to_handle(device), 357 state->clear[i].depthstencil_unrestricted_pipeline[j], &state->alloc); 358 } 359 radv_DestroyRenderPass(radv_device_to_handle(device), state->clear[i].depthstencil_rp, 360 &state->alloc); 361 } 362 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_color_p_layout, 363 &state->alloc); 364 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_p_layout, 365 &state->alloc); 366 radv_DestroyPipelineLayout(radv_device_to_handle(device), 367 state->clear_depth_unrestricted_p_layout, &state->alloc); 368 369 finish_meta_clear_htile_mask_state(device); 370 finish_meta_clear_dcc_comp_to_single_state(device); 371} 372 373static void 374emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, 375 const VkClearRect *clear_rect, uint32_t view_mask) 376{ 377 struct radv_device *device = cmd_buffer->device; 378 const struct radv_subpass *subpass = cmd_buffer->state.subpass; 379 const uint32_t subpass_att = clear_att->colorAttachment; 380 const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment; 381 const struct radv_image_view *iview = 382 cmd_buffer->state.attachments ? cmd_buffer->state.attachments[pass_att].iview : NULL; 383 uint32_t samples, samples_log2; 384 VkFormat format; 385 unsigned fs_key; 386 VkClearColorValue clear_value = clear_att->clearValue.color; 387 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); 388 VkPipeline pipeline; 389 390 /* When a framebuffer is bound to the current command buffer, get the 391 * number of samples from it. Otherwise, get the number of samples from 392 * the render pass because it's likely a secondary command buffer. 393 */ 394 if (iview) { 395 samples = iview->image->info.samples; 396 format = iview->vk_format; 397 } else { 398 samples = cmd_buffer->state.pass->attachments[pass_att].samples; 399 format = cmd_buffer->state.pass->attachments[pass_att].format; 400 } 401 402 samples_log2 = ffs(samples) - 1; 403 fs_key = radv_format_meta_fs_key(device, format); 404 assert(fs_key != -1); 405 406 if (device->meta_state.clear[samples_log2].render_pass[fs_key] == VK_NULL_HANDLE) { 407 VkResult ret = 408 create_color_renderpass(device, radv_fs_key_format_exemplars[fs_key], samples, 409 &device->meta_state.clear[samples_log2].render_pass[fs_key]); 410 if (ret != VK_SUCCESS) { 411 cmd_buffer->record_result = ret; 412 return; 413 } 414 } 415 416 if (device->meta_state.clear[samples_log2].color_pipelines[fs_key] == VK_NULL_HANDLE) { 417 VkResult ret = create_color_pipeline( 418 device, samples, 0, &device->meta_state.clear[samples_log2].color_pipelines[fs_key], 419 device->meta_state.clear[samples_log2].render_pass[fs_key]); 420 if (ret != VK_SUCCESS) { 421 cmd_buffer->record_result = ret; 422 return; 423 } 424 } 425 426 pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key]; 427 428 assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear)); 429 assert(pipeline); 430 assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); 431 assert(clear_att->colorAttachment < subpass->color_count); 432 433 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 434 device->meta_state.clear_color_p_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, 435 16, &clear_value); 436 437 struct radv_subpass clear_subpass = { 438 .color_count = 1, 439 .color_attachments = 440 (struct radv_subpass_attachment[]){subpass->color_attachments[clear_att->colorAttachment]}, 441 .depth_stencil_attachment = NULL, 442 }; 443 444 radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass); 445 446 radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); 447 448 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, 449 &(VkViewport){.x = clear_rect->rect.offset.x, 450 .y = clear_rect->rect.offset.y, 451 .width = clear_rect->rect.extent.width, 452 .height = clear_rect->rect.extent.height, 453 .minDepth = 0.0f, 454 .maxDepth = 1.0f}); 455 456 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect); 457 458 if (view_mask) { 459 u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i); 460 } else { 461 radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer); 462 } 463 464 radv_cmd_buffer_restore_subpass(cmd_buffer, subpass); 465} 466 467static void 468build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs, bool unrestricted) 469{ 470 nir_builder vs_b = nir_builder_init_simple_shader( 471 MESA_SHADER_VERTEX, NULL, 472 unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs"); 473 nir_builder fs_b = nir_builder_init_simple_shader( 474 MESA_SHADER_FRAGMENT, NULL, 475 unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs"); 476 477 const struct glsl_type *position_out_type = glsl_vec4_type(); 478 479 nir_variable *vs_out_pos = 480 nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position"); 481 vs_out_pos->data.location = VARYING_SLOT_POS; 482 483 nir_ssa_def *z; 484 if (unrestricted) { 485 nir_ssa_def *in_color_load = 486 nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4); 487 488 nir_variable *fs_out_depth = 489 nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth"); 490 fs_out_depth->data.location = FRAG_RESULT_DEPTH; 491 nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1); 492 493 z = nir_imm_float(&vs_b, 0.0); 494 } else { 495 z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4); 496 } 497 498 nir_ssa_def *outvec = radv_meta_gen_rect_vertices_comp2(&vs_b, z); 499 nir_store_var(&vs_b, vs_out_pos, outvec, 0xf); 500 501 const struct glsl_type *layer_type = glsl_int_type(); 502 nir_variable *vs_out_layer = 503 nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer"); 504 vs_out_layer->data.location = VARYING_SLOT_LAYER; 505 vs_out_layer->data.interpolation = INTERP_MODE_FLAT; 506 nir_ssa_def *inst_id = nir_load_instance_id(&vs_b); 507 nir_ssa_def *base_instance = nir_load_base_instance(&vs_b); 508 509 nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance); 510 nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1); 511 512 *out_vs = vs_b.shader; 513 *out_fs = fs_b.shader; 514} 515 516static VkResult 517create_depthstencil_renderpass(struct radv_device *device, uint32_t samples, 518 VkRenderPass *render_pass) 519{ 520 mtx_lock(&device->meta_state.mtx); 521 if (*render_pass) { 522 mtx_unlock(&device->meta_state.mtx); 523 return VK_SUCCESS; 524 } 525 526 VkResult result = radv_CreateRenderPass2( 527 radv_device_to_handle(device), 528 &(VkRenderPassCreateInfo2){ 529 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2, 530 .attachmentCount = 1, 531 .pAttachments = 532 &(VkAttachmentDescription2){ 533 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2, 534 .format = VK_FORMAT_D32_SFLOAT_S8_UINT, 535 .samples = samples, 536 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, 537 .storeOp = VK_ATTACHMENT_STORE_OP_STORE, 538 .initialLayout = VK_IMAGE_LAYOUT_GENERAL, 539 .finalLayout = VK_IMAGE_LAYOUT_GENERAL, 540 }, 541 .subpassCount = 1, 542 .pSubpasses = 543 &(VkSubpassDescription2){ 544 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2, 545 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, 546 .inputAttachmentCount = 0, 547 .colorAttachmentCount = 0, 548 .pColorAttachments = NULL, 549 .pResolveAttachments = NULL, 550 .pDepthStencilAttachment = 551 &(VkAttachmentReference2){ 552 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 553 .attachment = 0, 554 .layout = VK_IMAGE_LAYOUT_GENERAL, 555 }, 556 .preserveAttachmentCount = 0, 557 .pPreserveAttachments = NULL, 558 }, 559 .dependencyCount = 2, 560 .pDependencies = 561 (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, 562 .srcSubpass = VK_SUBPASS_EXTERNAL, 563 .dstSubpass = 0, 564 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 565 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 566 .srcAccessMask = 0, 567 .dstAccessMask = 0, 568 .dependencyFlags = 0}, 569 {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, 570 .srcSubpass = 0, 571 .dstSubpass = VK_SUBPASS_EXTERNAL, 572 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 573 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 574 .srcAccessMask = 0, 575 .dstAccessMask = 0, 576 .dependencyFlags = 0}}}, 577 &device->meta_state.alloc, render_pass); 578 mtx_unlock(&device->meta_state.mtx); 579 return result; 580} 581 582static VkResult 583create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspects, 584 uint32_t samples, int index, bool unrestricted, VkPipeline *pipeline, 585 VkRenderPass render_pass) 586{ 587 struct nir_shader *vs_nir, *fs_nir; 588 VkResult result; 589 590 mtx_lock(&device->meta_state.mtx); 591 if (*pipeline) { 592 mtx_unlock(&device->meta_state.mtx); 593 return VK_SUCCESS; 594 } 595 596 build_depthstencil_shader(&vs_nir, &fs_nir, unrestricted); 597 598 const VkPipelineVertexInputStateCreateInfo vi_state = { 599 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, 600 .vertexBindingDescriptionCount = 0, 601 .vertexAttributeDescriptionCount = 0, 602 }; 603 604 const VkPipelineDepthStencilStateCreateInfo ds_state = { 605 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, 606 .depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT), 607 .depthCompareOp = VK_COMPARE_OP_ALWAYS, 608 .depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT), 609 .depthBoundsTestEnable = false, 610 .stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT), 611 .front = 612 { 613 .passOp = VK_STENCIL_OP_REPLACE, 614 .compareOp = VK_COMPARE_OP_ALWAYS, 615 .writeMask = UINT32_MAX, 616 .reference = 0, /* dynamic */ 617 }, 618 .back = {0 /* dont care */}, 619 }; 620 621 const VkPipelineColorBlendStateCreateInfo cb_state = { 622 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, 623 .logicOpEnable = false, 624 .attachmentCount = 0, 625 .pAttachments = NULL, 626 }; 627 628 struct radv_graphics_pipeline_create_info extra = { 629 .use_rectlist = true, 630 }; 631 632 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { 633 extra.db_depth_clear = index == DEPTH_CLEAR_SLOW ? false : true; 634 } 635 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 636 extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true; 637 } 638 result = 639 create_pipeline(device, radv_render_pass_from_handle(render_pass), samples, vs_nir, fs_nir, 640 &vi_state, &ds_state, &cb_state, device->meta_state.clear_depth_p_layout, 641 &extra, &device->meta_state.alloc, pipeline); 642 643 mtx_unlock(&device->meta_state.mtx); 644 return result; 645} 646 647static bool 648depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, 649 VkImageAspectFlags aspects, VkImageLayout layout, bool in_render_loop, 650 const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value) 651{ 652 if (!iview) 653 return false; 654 655 uint32_t queue_mask = radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, 656 cmd_buffer->queue_family_index); 657 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y || 658 clear_rect->rect.extent.width != iview->extent.width || 659 clear_rect->rect.extent.height != iview->extent.height) 660 return false; 661 if (radv_image_is_tc_compat_htile(iview->image) && 662 (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 && 663 clear_value.depth != 1.0) || 664 ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0))) 665 return false; 666 if (radv_htile_enabled(iview->image, iview->base_mip) && iview->base_mip == 0 && 667 iview->base_layer == 0 && iview->layer_count == iview->image->info.array_size && 668 radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, layout, in_render_loop, 669 queue_mask) && 670 radv_image_extent_compare(iview->image, &iview->extent)) 671 return true; 672 return false; 673} 674 675static VkPipeline 676pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_state *meta_state, 677 const struct radv_image_view *iview, int samples_log2, 678 VkImageAspectFlags aspects, VkImageLayout layout, bool in_render_loop, 679 const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value) 680{ 681 bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, in_render_loop, 682 clear_rect, clear_value); 683 bool unrestricted = cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted; 684 int index = fast ? DEPTH_CLEAR_FAST : DEPTH_CLEAR_SLOW; 685 VkPipeline *pipeline; 686 687 switch (aspects) { 688 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: 689 pipeline = unrestricted 690 ? &meta_state->clear[samples_log2].depthstencil_unrestricted_pipeline[index] 691 : &meta_state->clear[samples_log2].depthstencil_pipeline[index]; 692 break; 693 case VK_IMAGE_ASPECT_DEPTH_BIT: 694 pipeline = unrestricted 695 ? &meta_state->clear[samples_log2].depth_only_unrestricted_pipeline[index] 696 : &meta_state->clear[samples_log2].depth_only_pipeline[index]; 697 break; 698 case VK_IMAGE_ASPECT_STENCIL_BIT: 699 pipeline = unrestricted 700 ? &meta_state->clear[samples_log2].stencil_only_unrestricted_pipeline[index] 701 : &meta_state->clear[samples_log2].stencil_only_pipeline[index]; 702 break; 703 default: 704 unreachable("expected depth or stencil aspect"); 705 } 706 707 if (cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp == VK_NULL_HANDLE) { 708 VkResult ret = create_depthstencil_renderpass( 709 cmd_buffer->device, 1u << samples_log2, 710 &cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp); 711 if (ret != VK_SUCCESS) { 712 cmd_buffer->record_result = ret; 713 return VK_NULL_HANDLE; 714 } 715 } 716 717 if (*pipeline == VK_NULL_HANDLE) { 718 VkResult ret = create_depthstencil_pipeline( 719 cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted, pipeline, 720 cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp); 721 if (ret != VK_SUCCESS) { 722 cmd_buffer->record_result = ret; 723 return VK_NULL_HANDLE; 724 } 725 } 726 return *pipeline; 727} 728 729static void 730emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, 731 const VkClearRect *clear_rect, struct radv_subpass_attachment *ds_att, 732 uint32_t view_mask) 733{ 734 struct radv_device *device = cmd_buffer->device; 735 struct radv_meta_state *meta_state = &device->meta_state; 736 const struct radv_subpass *subpass = cmd_buffer->state.subpass; 737 const uint32_t pass_att = ds_att->attachment; 738 VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; 739 VkImageAspectFlags aspects = clear_att->aspectMask; 740 const struct radv_image_view *iview = 741 cmd_buffer->state.attachments ? cmd_buffer->state.attachments[pass_att].iview : NULL; 742 uint32_t samples, samples_log2; 743 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); 744 745 /* When a framebuffer is bound to the current command buffer, get the 746 * number of samples from it. Otherwise, get the number of samples from 747 * the render pass because it's likely a secondary command buffer. 748 */ 749 if (iview) { 750 samples = iview->image->info.samples; 751 } else { 752 samples = cmd_buffer->state.pass->attachments[pass_att].samples; 753 } 754 755 samples_log2 = ffs(samples) - 1; 756 757 assert(pass_att != VK_ATTACHMENT_UNUSED); 758 759 if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) 760 clear_value.depth = 1.0f; 761 762 if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted) { 763 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 764 device->meta_state.clear_depth_unrestricted_p_layout, 765 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4, &clear_value.depth); 766 } else { 767 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 768 device->meta_state.clear_depth_p_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 769 4, &clear_value.depth); 770 } 771 772 uint32_t prev_reference = cmd_buffer->state.dynamic.stencil_reference.front; 773 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 774 radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, clear_value.stencil); 775 } 776 777 VkPipeline pipeline = 778 pick_depthstencil_pipeline(cmd_buffer, meta_state, iview, samples_log2, aspects, 779 ds_att->layout, ds_att->in_render_loop, clear_rect, clear_value); 780 if (!pipeline) 781 return; 782 783 struct radv_subpass clear_subpass = { 784 .color_count = 0, 785 .color_attachments = NULL, 786 .depth_stencil_attachment = ds_att, 787 }; 788 789 radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass); 790 791 radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); 792 793 if (depth_view_can_fast_clear(cmd_buffer, iview, aspects, ds_att->layout, ds_att->in_render_loop, 794 clear_rect, clear_value)) 795 radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects); 796 797 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, 798 &(VkViewport){.x = clear_rect->rect.offset.x, 799 .y = clear_rect->rect.offset.y, 800 .width = clear_rect->rect.extent.width, 801 .height = clear_rect->rect.extent.height, 802 .minDepth = 0.0f, 803 .maxDepth = 1.0f}); 804 805 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect); 806 807 if (view_mask) { 808 u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i); 809 } else { 810 radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer); 811 } 812 813 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 814 radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, prev_reference); 815 } 816 817 radv_cmd_buffer_restore_subpass(cmd_buffer, subpass); 818} 819 820static uint32_t 821clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, 822 struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t htile_value, 823 uint32_t htile_mask) 824{ 825 struct radv_device *device = cmd_buffer->device; 826 struct radv_meta_state *state = &device->meta_state; 827 uint64_t block_count = round_up_u64(size, 1024); 828 struct radv_meta_saved_state saved_state; 829 struct radv_buffer dst_buffer; 830 831 radv_meta_save( 832 &saved_state, cmd_buffer, 833 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); 834 835 radv_buffer_init(&dst_buffer, device, bo, size, offset); 836 837 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 838 state->clear_htile_mask_pipeline); 839 840 radv_meta_push_descriptor_set( 841 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->clear_htile_mask_p_layout, 0, /* set */ 842 1, /* descriptorWriteCount */ 843 (VkWriteDescriptorSet[]){ 844 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 845 .dstBinding = 0, 846 .dstArrayElement = 0, 847 .descriptorCount = 1, 848 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 849 .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer), 850 .offset = 0, 851 .range = size}}}); 852 853 const unsigned constants[2] = { 854 htile_value & htile_mask, 855 ~htile_mask, 856 }; 857 858 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), state->clear_htile_mask_p_layout, 859 VK_SHADER_STAGE_COMPUTE_BIT, 0, 8, constants); 860 861 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1); 862 863 radv_buffer_finish(&dst_buffer); 864 865 radv_meta_restore(&saved_state, cmd_buffer); 866 867 return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | 868 radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image); 869} 870 871static uint32_t 872radv_get_htile_fast_clear_value(const struct radv_device *device, const struct radv_image *image, 873 VkClearDepthStencilValue value) 874{ 875 uint32_t max_zval = 0x3fff; /* maximum 14-bit value. */ 876 uint32_t zmask = 0, smem = 0; 877 uint32_t htile_value; 878 uint32_t zmin, zmax; 879 880 /* Convert the depth value to 14-bit zmin/zmax values. */ 881 zmin = lroundf(value.depth * max_zval); 882 zmax = zmin; 883 884 if (radv_image_tile_stencil_disabled(device, image)) { 885 /* Z only (no stencil): 886 * 887 * |31 18|17 4|3 0| 888 * +---------+---------+-------+ 889 * | Max Z | Min Z | ZMask | 890 */ 891 htile_value = (((zmax & 0x3fff) << 18) | 892 ((zmin & 0x3fff) << 4) | 893 ((zmask & 0xf) << 0)); 894 } else { 895 896 /* Z and stencil: 897 * 898 * |31 12|11 10|9 8|7 6|5 4|3 0| 899 * +-----------+-----+------+-----+-----+-------+ 900 * | Z Range | | SMem | SR1 | SR0 | ZMask | 901 * 902 * Z, stencil, 4 bit VRS encoding: 903 * |31 12| 11 10 |9 8|7 6 |5 4|3 0| 904 * +-----------+------------+------+------------+-----+-------+ 905 * | Z Range | VRS Y-rate | SMem | VRS X-rate | SR0 | ZMask | 906 */ 907 uint32_t delta = 0; 908 uint32_t zrange = ((zmax << 6) | delta); 909 uint32_t sresults = 0xf; /* SR0/SR1 both as 0x3. */ 910 911 if (radv_image_has_vrs_htile(device, image)) 912 sresults = 0x3; 913 914 htile_value = (((zrange & 0xfffff) << 12) | 915 ((smem & 0x3) << 8) | 916 ((sresults & 0xf) << 4) | 917 ((zmask & 0xf) << 0)); 918 } 919 920 return htile_value; 921} 922 923static uint32_t 924radv_get_htile_mask(const struct radv_device *device, const struct radv_image *image, 925 VkImageAspectFlags aspects) 926{ 927 uint32_t mask = 0; 928 929 if (radv_image_tile_stencil_disabled(device, image)) { 930 /* All the HTILE buffer is used when there is no stencil. */ 931 mask = UINT32_MAX; 932 } else { 933 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) 934 mask |= 0xfffffc0f; 935 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) 936 mask |= 0x000003f0; 937 } 938 939 return mask; 940} 941 942static bool 943radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value) 944{ 945 return value.depth == 1.0f || value.depth == 0.0f; 946} 947 948static bool 949radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value) 950{ 951 return value.stencil == 0; 952} 953 954static bool 955radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, 956 VkImageLayout image_layout, bool in_render_loop, 957 VkImageAspectFlags aspects, const VkClearRect *clear_rect, 958 const VkClearDepthStencilValue clear_value, uint32_t view_mask) 959{ 960 if (!iview || !iview->support_fast_clear) 961 return false; 962 963 if (!radv_layout_is_htile_compressed( 964 cmd_buffer->device, iview->image, image_layout, in_render_loop, 965 radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, 966 cmd_buffer->queue_family_index))) 967 return false; 968 969 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y || 970 clear_rect->rect.extent.width != iview->image->info.width || 971 clear_rect->rect.extent.height != iview->image->info.height) 972 return false; 973 974 if (view_mask && (iview->image->info.array_size >= 32 || 975 (1u << iview->image->info.array_size) - 1u != view_mask)) 976 return false; 977 if (!view_mask && clear_rect->baseArrayLayer != 0) 978 return false; 979 if (!view_mask && clear_rect->layerCount != iview->image->info.array_size) 980 return false; 981 982 if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted && 983 (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && 984 (clear_value.depth < 0.0 || clear_value.depth > 1.0)) 985 return false; 986 987 if (radv_image_is_tc_compat_htile(iview->image) && 988 (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && !radv_is_fast_clear_depth_allowed(clear_value)) || 989 ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && 990 !radv_is_fast_clear_stencil_allowed(clear_value)))) 991 return false; 992 993 if (iview->image->info.levels > 1) { 994 uint32_t last_level = iview->base_mip + iview->level_count - 1; 995 if (last_level >= iview->image->planes[0].surface.num_meta_levels) { 996 /* Do not fast clears if one level can't be fast cleared. */ 997 return false; 998 } 999 } 1000 1001 return true; 1002} 1003 1004static void 1005radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, 1006 const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush, 1007 enum radv_cmd_flush_bits *post_flush) 1008{ 1009 VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; 1010 VkImageAspectFlags aspects = clear_att->aspectMask; 1011 uint32_t clear_word, flush_bits; 1012 1013 clear_word = radv_get_htile_fast_clear_value(cmd_buffer->device, iview->image, clear_value); 1014 1015 if (pre_flush) { 1016 enum radv_cmd_flush_bits bits = 1017 radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, 1018 iview->image) | 1019 radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT | 1020 VK_ACCESS_SHADER_READ_BIT, iview->image); 1021 cmd_buffer->state.flush_bits |= bits & ~*pre_flush; 1022 *pre_flush |= cmd_buffer->state.flush_bits; 1023 } 1024 1025 VkImageSubresourceRange range = { 1026 .aspectMask = aspects, 1027 .baseMipLevel = iview->base_mip, 1028 .levelCount = iview->level_count, 1029 .baseArrayLayer = iview->base_layer, 1030 .layerCount = iview->layer_count, 1031 }; 1032 1033 flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word); 1034 1035 if (iview->image->planes[0].surface.has_stencil && 1036 !(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) { 1037 /* Synchronize after performing a depth-only or a stencil-only 1038 * fast clear because the driver uses an optimized path which 1039 * performs a read-modify-write operation, and the two separate 1040 * aspects might use the same HTILE memory. 1041 */ 1042 cmd_buffer->state.flush_bits |= flush_bits; 1043 } 1044 1045 radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects); 1046 if (post_flush) { 1047 *post_flush |= flush_bits; 1048 } 1049} 1050 1051static nir_shader * 1052build_clear_htile_mask_shader() 1053{ 1054 nir_builder b = 1055 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_clear_htile_mask"); 1056 b.shader->info.workgroup_size[0] = 64; 1057 b.shader->info.workgroup_size[1] = 1; 1058 b.shader->info.workgroup_size[2] = 1; 1059 1060 nir_ssa_def *global_id = get_global_ids(&b, 1); 1061 1062 nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16)); 1063 offset = nir_channel(&b, offset, 0); 1064 1065 nir_ssa_def *buf = radv_meta_load_descriptor(&b, 0, 0); 1066 1067 nir_ssa_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8); 1068 1069 nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul = 16); 1070 1071 /* data = (data & ~htile_mask) | (htile_value & htile_mask) */ 1072 nir_ssa_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1)); 1073 data = nir_ior(&b, data, nir_channel(&b, constants, 0)); 1074 1075 nir_store_ssbo(&b, data, buf, offset, .write_mask = 0xf, .access = ACCESS_NON_READABLE, 1076 .align_mul = 16); 1077 1078 return b.shader; 1079} 1080 1081static VkResult 1082init_meta_clear_htile_mask_state(struct radv_device *device) 1083{ 1084 struct radv_meta_state *state = &device->meta_state; 1085 VkResult result; 1086 nir_shader *cs = build_clear_htile_mask_shader(); 1087 1088 VkDescriptorSetLayoutCreateInfo ds_layout_info = { 1089 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 1090 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 1091 .bindingCount = 1, 1092 .pBindings = (VkDescriptorSetLayoutBinding[]){ 1093 {.binding = 0, 1094 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1095 .descriptorCount = 1, 1096 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 1097 .pImmutableSamplers = NULL}, 1098 }}; 1099 1100 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, 1101 &state->alloc, &state->clear_htile_mask_ds_layout); 1102 if (result != VK_SUCCESS) 1103 goto fail; 1104 1105 VkPipelineLayoutCreateInfo p_layout_info = { 1106 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 1107 .setLayoutCount = 1, 1108 .pSetLayouts = &state->clear_htile_mask_ds_layout, 1109 .pushConstantRangeCount = 1, 1110 .pPushConstantRanges = 1111 &(VkPushConstantRange){ 1112 VK_SHADER_STAGE_COMPUTE_BIT, 1113 0, 1114 8, 1115 }, 1116 }; 1117 1118 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &p_layout_info, &state->alloc, 1119 &state->clear_htile_mask_p_layout); 1120 if (result != VK_SUCCESS) 1121 goto fail; 1122 1123 VkPipelineShaderStageCreateInfo shader_stage = { 1124 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 1125 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 1126 .module = vk_shader_module_handle_from_nir(cs), 1127 .pName = "main", 1128 .pSpecializationInfo = NULL, 1129 }; 1130 1131 VkComputePipelineCreateInfo pipeline_info = { 1132 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 1133 .stage = shader_stage, 1134 .flags = 0, 1135 .layout = state->clear_htile_mask_p_layout, 1136 }; 1137 1138 result = radv_CreateComputePipelines(radv_device_to_handle(device), 1139 radv_pipeline_cache_to_handle(&state->cache), 1, 1140 &pipeline_info, NULL, &state->clear_htile_mask_pipeline); 1141 1142 ralloc_free(cs); 1143 return result; 1144fail: 1145 ralloc_free(cs); 1146 return result; 1147} 1148 1149/* Clear DCC using comp-to-single by storing the clear value at the beginning of every 256B block. 1150 * For MSAA images, clearing the first sample should be enough as long as CMASK is also cleared. 1151 */ 1152static nir_shader * 1153build_clear_dcc_comp_to_single_shader(bool is_msaa) 1154{ 1155 enum glsl_sampler_dim dim = is_msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D; 1156 const struct glsl_type *img_type = glsl_image_type(dim, true, GLSL_TYPE_FLOAT); 1157 1158 nir_builder b = 1159 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_clear_dcc_comp_to_single-%s", 1160 is_msaa ? "multisampled" : "singlesampled"); 1161 b.shader->info.workgroup_size[0] = 8; 1162 b.shader->info.workgroup_size[1] = 8; 1163 b.shader->info.workgroup_size[2] = 1; 1164 1165 nir_ssa_def *global_id = get_global_ids(&b, 3); 1166 1167 /* Load the dimensions in pixels of a block that gets compressed to one DCC byte. */ 1168 nir_ssa_def *dcc_block_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8); 1169 1170 /* Compute the coordinates. */ 1171 nir_ssa_def *coord = nir_channels(&b, global_id, 0x3); 1172 coord = nir_imul(&b, coord, dcc_block_size); 1173 coord = nir_vec4(&b, nir_channel(&b, coord, 0), 1174 nir_channel(&b, coord, 1), 1175 nir_channel(&b, global_id, 2), 1176 nir_ssa_undef(&b, 1, 32)); 1177 1178 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img"); 1179 output_img->data.descriptor_set = 0; 1180 output_img->data.binding = 0; 1181 1182 /* Load the clear color values. */ 1183 nir_ssa_def *clear_values = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8); 1184 1185 nir_ssa_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), 1186 nir_channel(&b, clear_values, 1), 1187 nir_channel(&b, clear_values, 1), 1188 nir_channel(&b, clear_values, 1)); 1189 1190 /* Store the clear color values. */ 1191 nir_ssa_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_ssa_undef(&b, 1, 32); 1192 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, 1193 sample_id, data, nir_imm_int(&b, 0), 1194 .image_dim = dim, .image_array = true); 1195 1196 return b.shader; 1197} 1198 1199static VkResult 1200create_dcc_comp_to_single_pipeline(struct radv_device *device, bool is_msaa, VkPipeline *pipeline) 1201{ 1202 struct radv_meta_state *state = &device->meta_state; 1203 VkResult result; 1204 nir_shader *cs = build_clear_dcc_comp_to_single_shader(is_msaa); 1205 1206 VkPipelineShaderStageCreateInfo shader_stage = { 1207 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 1208 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 1209 .module = vk_shader_module_handle_from_nir(cs), 1210 .pName = "main", 1211 .pSpecializationInfo = NULL, 1212 }; 1213 1214 VkComputePipelineCreateInfo pipeline_info = { 1215 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 1216 .stage = shader_stage, 1217 .flags = 0, 1218 .layout = state->clear_dcc_comp_to_single_p_layout, 1219 }; 1220 1221 result = radv_CreateComputePipelines(radv_device_to_handle(device), 1222 radv_pipeline_cache_to_handle(&state->cache), 1, 1223 &pipeline_info, NULL, pipeline); 1224 1225 ralloc_free(cs); 1226 return result; 1227} 1228 1229static VkResult 1230init_meta_clear_dcc_comp_to_single_state(struct radv_device *device) 1231{ 1232 struct radv_meta_state *state = &device->meta_state; 1233 VkResult result; 1234 1235 VkDescriptorSetLayoutCreateInfo ds_layout_info = { 1236 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 1237 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 1238 .bindingCount = 1, 1239 .pBindings = (VkDescriptorSetLayoutBinding[]){ 1240 {.binding = 0, 1241 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1242 .descriptorCount = 1, 1243 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 1244 .pImmutableSamplers = NULL}, 1245 }}; 1246 1247 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, 1248 &state->alloc, &state->clear_dcc_comp_to_single_ds_layout); 1249 if (result != VK_SUCCESS) 1250 goto fail; 1251 1252 VkPipelineLayoutCreateInfo p_layout_info = { 1253 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 1254 .setLayoutCount = 1, 1255 .pSetLayouts = &state->clear_dcc_comp_to_single_ds_layout, 1256 .pushConstantRangeCount = 1, 1257 .pPushConstantRanges = 1258 &(VkPushConstantRange){ 1259 VK_SHADER_STAGE_COMPUTE_BIT, 1260 0, 1261 16, 1262 }, 1263 }; 1264 1265 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &p_layout_info, &state->alloc, 1266 &state->clear_dcc_comp_to_single_p_layout); 1267 if (result != VK_SUCCESS) 1268 goto fail; 1269 1270 for (uint32_t i = 0; i < 2; i++) { 1271 result = create_dcc_comp_to_single_pipeline(device, !!i, 1272 &state->clear_dcc_comp_to_single_pipeline[i]); 1273 if (result != VK_SUCCESS) 1274 goto fail; 1275 } 1276 1277fail: 1278 return result; 1279} 1280 1281VkResult 1282radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand) 1283{ 1284 VkResult res; 1285 struct radv_meta_state *state = &device->meta_state; 1286 1287 VkPipelineLayoutCreateInfo pl_color_create_info = { 1288 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 1289 .setLayoutCount = 0, 1290 .pushConstantRangeCount = 1, 1291 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16}, 1292 }; 1293 1294 res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_color_create_info, 1295 &device->meta_state.alloc, 1296 &device->meta_state.clear_color_p_layout); 1297 if (res != VK_SUCCESS) 1298 goto fail; 1299 1300 VkPipelineLayoutCreateInfo pl_depth_create_info = { 1301 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 1302 .setLayoutCount = 0, 1303 .pushConstantRangeCount = 1, 1304 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4}, 1305 }; 1306 1307 res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_depth_create_info, 1308 &device->meta_state.alloc, 1309 &device->meta_state.clear_depth_p_layout); 1310 if (res != VK_SUCCESS) 1311 goto fail; 1312 1313 VkPipelineLayoutCreateInfo pl_depth_unrestricted_create_info = { 1314 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 1315 .setLayoutCount = 0, 1316 .pushConstantRangeCount = 1, 1317 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4}, 1318 }; 1319 1320 res = radv_CreatePipelineLayout(radv_device_to_handle(device), 1321 &pl_depth_unrestricted_create_info, &device->meta_state.alloc, 1322 &device->meta_state.clear_depth_unrestricted_p_layout); 1323 if (res != VK_SUCCESS) 1324 goto fail; 1325 1326 res = init_meta_clear_htile_mask_state(device); 1327 if (res != VK_SUCCESS) 1328 goto fail; 1329 1330 res = init_meta_clear_dcc_comp_to_single_state(device); 1331 if (res != VK_SUCCESS) 1332 goto fail; 1333 1334 if (on_demand) 1335 return VK_SUCCESS; 1336 1337 for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { 1338 uint32_t samples = 1 << i; 1339 for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) { 1340 VkFormat format = radv_fs_key_format_exemplars[j]; 1341 unsigned fs_key = radv_format_meta_fs_key(device, format); 1342 assert(!state->clear[i].color_pipelines[fs_key]); 1343 1344 res = 1345 create_color_renderpass(device, format, samples, &state->clear[i].render_pass[fs_key]); 1346 if (res != VK_SUCCESS) 1347 goto fail; 1348 1349 res = create_color_pipeline(device, samples, 0, &state->clear[i].color_pipelines[fs_key], 1350 state->clear[i].render_pass[fs_key]); 1351 if (res != VK_SUCCESS) 1352 goto fail; 1353 } 1354 1355 res = create_depthstencil_renderpass(device, samples, &state->clear[i].depthstencil_rp); 1356 if (res != VK_SUCCESS) 1357 goto fail; 1358 1359 for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) { 1360 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, false, 1361 &state->clear[i].depth_only_pipeline[j], 1362 state->clear[i].depthstencil_rp); 1363 if (res != VK_SUCCESS) 1364 goto fail; 1365 1366 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false, 1367 &state->clear[i].stencil_only_pipeline[j], 1368 state->clear[i].depthstencil_rp); 1369 if (res != VK_SUCCESS) 1370 goto fail; 1371 1372 res = create_depthstencil_pipeline( 1373 device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false, 1374 &state->clear[i].depthstencil_pipeline[j], state->clear[i].depthstencil_rp); 1375 if (res != VK_SUCCESS) 1376 goto fail; 1377 1378 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, true, 1379 &state->clear[i].depth_only_unrestricted_pipeline[j], 1380 state->clear[i].depthstencil_rp); 1381 if (res != VK_SUCCESS) 1382 goto fail; 1383 1384 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true, 1385 &state->clear[i].stencil_only_unrestricted_pipeline[j], 1386 state->clear[i].depthstencil_rp); 1387 if (res != VK_SUCCESS) 1388 goto fail; 1389 1390 res = create_depthstencil_pipeline( 1391 device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true, 1392 &state->clear[i].depthstencil_unrestricted_pipeline[j], 1393 state->clear[i].depthstencil_rp); 1394 if (res != VK_SUCCESS) 1395 goto fail; 1396 } 1397 } 1398 return VK_SUCCESS; 1399 1400fail: 1401 radv_device_finish_meta_clear_state(device); 1402 return res; 1403} 1404 1405static uint32_t 1406radv_get_cmask_fast_clear_value(const struct radv_image *image) 1407{ 1408 uint32_t value = 0; /* Default value when no DCC. */ 1409 1410 /* The fast-clear value is different for images that have both DCC and 1411 * CMASK metadata. 1412 */ 1413 if (radv_image_has_dcc(image)) { 1414 /* DCC fast clear with MSAA should clear CMASK to 0xC. */ 1415 return image->info.samples > 1 ? 0xcccccccc : 0xffffffff; 1416 } 1417 1418 return value; 1419} 1420 1421uint32_t 1422radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 1423 const VkImageSubresourceRange *range, uint32_t value) 1424{ 1425 uint64_t offset = image->offset + image->planes[0].surface.cmask_offset; 1426 uint64_t size; 1427 1428 if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { 1429 /* TODO: clear layers. */ 1430 size = image->planes[0].surface.cmask_size; 1431 } else { 1432 unsigned slice_size = image->planes[0].surface.cmask_slice_size; 1433 1434 offset += slice_size * range->baseArrayLayer; 1435 size = slice_size * radv_get_layerCount(image, range); 1436 } 1437 1438 return radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value); 1439} 1440 1441uint32_t 1442radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 1443 const VkImageSubresourceRange *range, uint32_t value) 1444{ 1445 uint64_t offset = image->offset + image->planes[0].surface.fmask_offset; 1446 unsigned slice_size = image->planes[0].surface.fmask_slice_size; 1447 uint64_t size; 1448 1449 /* MSAA images do not support mipmap levels. */ 1450 assert(range->baseMipLevel == 0 && radv_get_levelCount(image, range) == 1); 1451 1452 offset += slice_size * range->baseArrayLayer; 1453 size = slice_size * radv_get_layerCount(image, range); 1454 1455 return radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value); 1456} 1457 1458uint32_t 1459radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 1460 const VkImageSubresourceRange *range, uint32_t value) 1461{ 1462 uint32_t level_count = radv_get_levelCount(image, range); 1463 uint32_t layer_count = radv_get_layerCount(image, range); 1464 uint32_t flush_bits = 0; 1465 1466 /* Mark the image as being compressed. */ 1467 radv_update_dcc_metadata(cmd_buffer, image, range, true); 1468 1469 for (uint32_t l = 0; l < level_count; l++) { 1470 uint64_t offset = image->offset + image->planes[0].surface.meta_offset; 1471 uint32_t level = range->baseMipLevel + l; 1472 uint64_t size; 1473 1474 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { 1475 /* DCC for mipmaps+layers is currently disabled. */ 1476 offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer + 1477 image->planes[0].surface.u.gfx9.meta_levels[level].offset; 1478 size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count; 1479 } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { 1480 /* Mipmap levels and layers aren't implemented. */ 1481 assert(level == 0); 1482 size = image->planes[0].surface.meta_size; 1483 } else { 1484 const struct legacy_surf_dcc_level *dcc_level = 1485 &image->planes[0].surface.u.legacy.color.dcc_level[level]; 1486 1487 /* If dcc_fast_clear_size is 0 (which might happens for 1488 * mipmaps) the fill buffer operation below is a no-op. 1489 * This can only happen during initialization as the 1490 * fast clear path fallbacks to slow clears if one 1491 * level can't be fast cleared. 1492 */ 1493 offset += 1494 dcc_level->dcc_offset + dcc_level->dcc_slice_fast_clear_size * range->baseArrayLayer; 1495 size = dcc_level->dcc_slice_fast_clear_size * radv_get_layerCount(image, range); 1496 } 1497 1498 /* Do not clear this level if it can't be compressed. */ 1499 if (!size) 1500 continue; 1501 1502 flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value); 1503 } 1504 1505 return flush_bits; 1506} 1507 1508static uint32_t 1509radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer, 1510 struct radv_image *image, 1511 const VkImageSubresourceRange *range, 1512 uint32_t color_values[2]) 1513{ 1514 struct radv_device *device = cmd_buffer->device; 1515 unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk_format); 1516 unsigned layer_count = radv_get_layerCount(image, range); 1517 struct radv_meta_saved_state saved_state; 1518 bool is_msaa = image->info.samples > 1; 1519 struct radv_image_view iview; 1520 VkFormat format; 1521 1522 switch (bytes_per_pixel) { 1523 case 1: 1524 format = VK_FORMAT_R8_UINT; 1525 break; 1526 case 2: 1527 format = VK_FORMAT_R16_UINT; 1528 break; 1529 case 4: 1530 format = VK_FORMAT_R32_UINT; 1531 break; 1532 case 8: 1533 format = VK_FORMAT_R32G32_UINT; 1534 break; 1535 case 16: 1536 format = VK_FORMAT_R32G32B32A32_UINT; 1537 break; 1538 default: 1539 unreachable("Unsupported number of bytes per pixel"); 1540 } 1541 1542 radv_meta_save( 1543 &saved_state, cmd_buffer, 1544 RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS); 1545 1546 VkPipeline pipeline = device->meta_state.clear_dcc_comp_to_single_pipeline[is_msaa]; 1547 1548 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 1549 pipeline); 1550 1551 for (uint32_t l = 0; l < radv_get_levelCount(image, range); l++) { 1552 uint32_t width, height; 1553 1554 /* Do not write the clear color value for levels without DCC. */ 1555 if (!radv_dcc_enabled(image, range->baseMipLevel + l)) 1556 continue; 1557 1558 width = radv_minify(image->info.width, range->baseMipLevel + l); 1559 height = radv_minify(image->info.height, range->baseMipLevel + l); 1560 1561 radv_image_view_init( 1562 &iview, cmd_buffer->device, 1563 &(VkImageViewCreateInfo){ 1564 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 1565 .image = radv_image_to_handle(image), 1566 .viewType = VK_IMAGE_VIEW_TYPE_2D, 1567 .format = format, 1568 .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 1569 .baseMipLevel = range->baseMipLevel + l, 1570 .levelCount = 1, 1571 .baseArrayLayer = range->baseArrayLayer, 1572 .layerCount = layer_count}, 1573 }, 1574 &(struct radv_image_view_extra_create_info){.disable_compression = true}); 1575 1576 radv_meta_push_descriptor_set( 1577 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, 1578 device->meta_state.clear_dcc_comp_to_single_p_layout, 0, 1579 1, 1580 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1581 .dstBinding = 0, 1582 .dstArrayElement = 0, 1583 .descriptorCount = 1, 1584 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1585 .pImageInfo = 1586 (VkDescriptorImageInfo[]){ 1587 { 1588 .sampler = VK_NULL_HANDLE, 1589 .imageView = radv_image_view_to_handle(&iview), 1590 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 1591 }, 1592 }}}); 1593 1594 unsigned dcc_width = 1595 DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width); 1596 unsigned dcc_height = 1597 DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height); 1598 1599 const unsigned constants[4] = { 1600 image->planes[0].surface.u.gfx9.color.dcc_block_width, 1601 image->planes[0].surface.u.gfx9.color.dcc_block_height, 1602 color_values[0], 1603 color_values[1], 1604 }; 1605 1606 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 1607 device->meta_state.clear_dcc_comp_to_single_p_layout, 1608 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, constants); 1609 1610 radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, layer_count); 1611 1612 radv_image_view_finish(&iview); 1613 } 1614 1615 radv_meta_restore(&saved_state, cmd_buffer); 1616 1617 return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | 1618 radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image); 1619} 1620 1621uint32_t 1622radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, 1623 const VkImageSubresourceRange *range, uint32_t value) 1624{ 1625 uint32_t level_count = radv_get_levelCount(image, range); 1626 uint32_t flush_bits = 0; 1627 uint32_t htile_mask; 1628 1629 htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask); 1630 1631 if (level_count != image->info.levels) { 1632 assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10); 1633 1634 /* Clear individuals levels separately. */ 1635 for (uint32_t l = 0; l < level_count; l++) { 1636 uint32_t level = range->baseMipLevel + l; 1637 uint64_t offset = image->offset + image->planes[0].surface.meta_offset + 1638 image->planes[0].surface.u.gfx9.meta_levels[level].offset; 1639 uint32_t size = image->planes[0].surface.u.gfx9.meta_levels[level].size; 1640 1641 /* Do not clear this level if it can be compressed. */ 1642 if (!size) 1643 continue; 1644 1645 if (htile_mask == UINT_MAX) { 1646 /* Clear the whole HTILE buffer. */ 1647 flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value); 1648 } else { 1649 /* Only clear depth or stencil bytes in the HTILE buffer. */ 1650 flush_bits |= 1651 clear_htile_mask(cmd_buffer, image, image->bo, offset, size, value, htile_mask); 1652 } 1653 } 1654 } else { 1655 unsigned layer_count = radv_get_layerCount(image, range); 1656 uint64_t size = image->planes[0].surface.meta_slice_size * layer_count; 1657 uint64_t offset = image->offset + image->planes[0].surface.meta_offset + 1658 image->planes[0].surface.meta_slice_size * range->baseArrayLayer; 1659 1660 if (htile_mask == UINT_MAX) { 1661 /* Clear the whole HTILE buffer. */ 1662 flush_bits = radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value); 1663 } else { 1664 /* Only clear depth or stencil bytes in the HTILE buffer. */ 1665 flush_bits = 1666 clear_htile_mask(cmd_buffer, image, image->bo, offset, size, value, htile_mask); 1667 } 1668 } 1669 1670 return flush_bits; 1671} 1672 1673enum { 1674 RADV_DCC_CLEAR_0000 = 0x00000000U, 1675 RADV_DCC_CLEAR_0001 = 0x40404040U, 1676 RADV_DCC_CLEAR_1110 = 0x80808080U, 1677 RADV_DCC_CLEAR_1111 = 0xC0C0C0C0U, 1678 RADV_DCC_CLEAR_REG = 0x20202020U, 1679 RADV_DCC_CLEAR_SINGLE = 0x10101010U, 1680}; 1681 1682static void 1683vi_get_fast_clear_parameters(struct radv_device *device, const struct radv_image_view *iview, 1684 const VkClearColorValue *clear_value, 1685 uint32_t *reset_value, bool *can_avoid_fast_clear_elim) 1686{ 1687 bool values[4] = {0}; 1688 int extra_channel; 1689 bool main_value = false; 1690 bool extra_value = false; 1691 bool has_color = false; 1692 bool has_alpha = false; 1693 1694 /* comp-to-single allows to perform DCC fast clears without requiring a FCE. */ 1695 if (iview->image->support_comp_to_single) { 1696 *reset_value = RADV_DCC_CLEAR_SINGLE; 1697 *can_avoid_fast_clear_elim = true; 1698 } else { 1699 *reset_value = RADV_DCC_CLEAR_REG; 1700 *can_avoid_fast_clear_elim = false; 1701 } 1702 1703 const struct util_format_description *desc = vk_format_description(iview->vk_format); 1704 if (iview->vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || 1705 iview->vk_format == VK_FORMAT_R5G6B5_UNORM_PACK16 || iview->vk_format == VK_FORMAT_B5G6R5_UNORM_PACK16) 1706 extra_channel = -1; 1707 else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { 1708 if (vi_alpha_is_on_msb(device, iview->vk_format)) 1709 extra_channel = desc->nr_channels - 1; 1710 else 1711 extra_channel = 0; 1712 } else 1713 return; 1714 1715 for (int i = 0; i < 4; i++) { 1716 int index = desc->swizzle[i] - PIPE_SWIZZLE_X; 1717 if (desc->swizzle[i] < PIPE_SWIZZLE_X || desc->swizzle[i] > PIPE_SWIZZLE_W) 1718 continue; 1719 1720 if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 1721 /* Use the maximum value for clamping the clear color. */ 1722 int max = u_bit_consecutive(0, desc->channel[i].size - 1); 1723 1724 values[i] = clear_value->int32[i] != 0; 1725 if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max) 1726 return; 1727 } else if (desc->channel[i].pure_integer && 1728 desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 1729 /* Use the maximum value for clamping the clear color. */ 1730 unsigned max = u_bit_consecutive(0, desc->channel[i].size); 1731 1732 values[i] = clear_value->uint32[i] != 0U; 1733 if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max) 1734 return; 1735 } else { 1736 values[i] = clear_value->float32[i] != 0.0F; 1737 if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F) 1738 return; 1739 } 1740 1741 if (index == extra_channel) { 1742 extra_value = values[i]; 1743 has_alpha = true; 1744 } else { 1745 main_value = values[i]; 1746 has_color = true; 1747 } 1748 } 1749 1750 /* If alpha isn't present, make it the same as color, and vice versa. */ 1751 if (!has_alpha) 1752 extra_value = main_value; 1753 else if (!has_color) 1754 main_value = extra_value; 1755 1756 for (int i = 0; i < 4; ++i) 1757 if (values[i] != main_value && desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel && 1758 desc->swizzle[i] >= PIPE_SWIZZLE_X && desc->swizzle[i] <= PIPE_SWIZZLE_W) 1759 return; 1760 1761 /* Only DCC clear code 0000 is allowed for signed<->unsigned formats. */ 1762 if ((main_value || extra_value) && iview->image->dcc_sign_reinterpret) 1763 return; 1764 1765 *can_avoid_fast_clear_elim = true; 1766 1767 if (main_value) { 1768 if (extra_value) 1769 *reset_value = RADV_DCC_CLEAR_1111; 1770 else 1771 *reset_value = RADV_DCC_CLEAR_1110; 1772 } else { 1773 if (extra_value) 1774 *reset_value = RADV_DCC_CLEAR_0001; 1775 else 1776 *reset_value = RADV_DCC_CLEAR_0000; 1777 } 1778} 1779 1780static bool 1781radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, 1782 VkImageLayout image_layout, bool in_render_loop, 1783 const VkClearRect *clear_rect, VkClearColorValue clear_value, 1784 uint32_t view_mask) 1785{ 1786 uint32_t clear_color[2]; 1787 1788 if (!iview || !iview->support_fast_clear) 1789 return false; 1790 1791 if (!radv_layout_can_fast_clear( 1792 cmd_buffer->device, iview->image, iview->base_mip, image_layout, in_render_loop, 1793 radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, 1794 cmd_buffer->queue_family_index))) 1795 return false; 1796 1797 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y || 1798 clear_rect->rect.extent.width != iview->image->info.width || 1799 clear_rect->rect.extent.height != iview->image->info.height) 1800 return false; 1801 1802 if (view_mask && (iview->image->info.array_size >= 32 || 1803 (1u << iview->image->info.array_size) - 1u != view_mask)) 1804 return false; 1805 if (!view_mask && clear_rect->baseArrayLayer != 0) 1806 return false; 1807 if (!view_mask && clear_rect->layerCount != iview->image->info.array_size) 1808 return false; 1809 1810 /* DCC */ 1811 if (!radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value)) 1812 return false; 1813 1814 if (!radv_image_has_clear_value(iview->image) && (clear_color[0] != 0 || clear_color[1] != 0)) 1815 return false; 1816 1817 if (radv_dcc_enabled(iview->image, iview->base_mip)) { 1818 bool can_avoid_fast_clear_elim; 1819 uint32_t reset_value; 1820 1821 vi_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value, 1822 &can_avoid_fast_clear_elim); 1823 1824 if (iview->image->info.levels > 1) { 1825 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { 1826 uint32_t last_level = iview->base_mip + iview->level_count - 1; 1827 if (last_level >= iview->image->planes[0].surface.num_meta_levels) { 1828 /* Do not fast clears if one level can't be fast cleard. */ 1829 return false; 1830 } 1831 } else { 1832 for (uint32_t l = 0; l < iview->level_count; l++) { 1833 uint32_t level = iview->base_mip + l; 1834 struct legacy_surf_dcc_level *dcc_level = 1835 &iview->image->planes[0].surface.u.legacy.color.dcc_level[level]; 1836 1837 /* Do not fast clears if one level can't be 1838 * fast cleared. 1839 */ 1840 if (!dcc_level->dcc_fast_clear_size) 1841 return false; 1842 } 1843 } 1844 } 1845 } 1846 1847 return true; 1848} 1849 1850static void 1851radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, 1852 const VkClearAttachment *clear_att, uint32_t subpass_att, 1853 enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush) 1854{ 1855 VkClearColorValue clear_value = clear_att->clearValue.color; 1856 uint32_t clear_color[2], flush_bits = 0; 1857 uint32_t cmask_clear_value; 1858 VkImageSubresourceRange range = { 1859 .aspectMask = iview->aspect_mask, 1860 .baseMipLevel = iview->base_mip, 1861 .levelCount = iview->level_count, 1862 .baseArrayLayer = iview->base_layer, 1863 .layerCount = iview->layer_count, 1864 }; 1865 1866 if (pre_flush) { 1867 enum radv_cmd_flush_bits bits = 1868 radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, iview->image) | 1869 radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, iview->image); 1870 cmd_buffer->state.flush_bits |= bits & ~*pre_flush; 1871 *pre_flush |= cmd_buffer->state.flush_bits; 1872 } 1873 1874 /* DCC */ 1875 radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value); 1876 1877 cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image); 1878 1879 /* clear cmask buffer */ 1880 bool need_decompress_pass = false; 1881 if (radv_dcc_enabled(iview->image, iview->base_mip)) { 1882 uint32_t reset_value; 1883 bool can_avoid_fast_clear_elim; 1884 1885 vi_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value, 1886 &can_avoid_fast_clear_elim); 1887 1888 if (radv_image_has_cmask(iview->image)) { 1889 flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value); 1890 } 1891 1892 if (!can_avoid_fast_clear_elim) 1893 need_decompress_pass = true; 1894 1895 flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, &range, reset_value); 1896 1897 if (reset_value == RADV_DCC_CLEAR_SINGLE) { 1898 /* Write the clear color to the first byte of each 256B block when the image supports DCC 1899 * fast clears with comp-to-single. 1900 */ 1901 flush_bits |= radv_clear_dcc_comp_to_single(cmd_buffer, iview->image, &range, clear_color); 1902 } 1903 } else { 1904 flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value); 1905 1906 /* Fast clearing with CMASK should always be eliminated. */ 1907 need_decompress_pass = true; 1908 } 1909 1910 if (post_flush) { 1911 *post_flush |= flush_bits; 1912 } 1913 1914 /* Update the FCE predicate to perform a fast-clear eliminate. */ 1915 radv_update_fce_metadata(cmd_buffer, iview->image, &range, need_decompress_pass); 1916 1917 radv_update_color_clear_metadata(cmd_buffer, iview, subpass_att, clear_color); 1918} 1919 1920/** 1921 * The parameters mean that same as those in vkCmdClearAttachments. 1922 */ 1923static void 1924emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, 1925 const VkClearRect *clear_rect, enum radv_cmd_flush_bits *pre_flush, 1926 enum radv_cmd_flush_bits *post_flush, uint32_t view_mask, bool ds_resolve_clear) 1927{ 1928 const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer; 1929 const struct radv_subpass *subpass = cmd_buffer->state.subpass; 1930 VkImageAspectFlags aspects = clear_att->aspectMask; 1931 1932 if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { 1933 const uint32_t subpass_att = clear_att->colorAttachment; 1934 assert(subpass_att < subpass->color_count); 1935 const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment; 1936 if (pass_att == VK_ATTACHMENT_UNUSED) 1937 return; 1938 1939 VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout; 1940 bool in_render_loop = subpass->color_attachments[subpass_att].in_render_loop; 1941 const struct radv_image_view *iview = 1942 fb ? cmd_buffer->state.attachments[pass_att].iview : NULL; 1943 VkClearColorValue clear_value = clear_att->clearValue.color; 1944 1945 if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout, in_render_loop, clear_rect, 1946 clear_value, view_mask)) { 1947 radv_fast_clear_color(cmd_buffer, iview, clear_att, subpass_att, pre_flush, post_flush); 1948 } else { 1949 emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask); 1950 } 1951 } else { 1952 struct radv_subpass_attachment *ds_att = subpass->depth_stencil_attachment; 1953 1954 if (ds_resolve_clear) 1955 ds_att = subpass->ds_resolve_attachment; 1956 1957 if (!ds_att || ds_att->attachment == VK_ATTACHMENT_UNUSED) 1958 return; 1959 1960 VkImageLayout image_layout = ds_att->layout; 1961 bool in_render_loop = ds_att->in_render_loop; 1962 const struct radv_image_view *iview = 1963 fb ? cmd_buffer->state.attachments[ds_att->attachment].iview : NULL; 1964 VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; 1965 1966 assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); 1967 1968 if (radv_can_fast_clear_depth(cmd_buffer, iview, image_layout, in_render_loop, aspects, 1969 clear_rect, clear_value, view_mask)) { 1970 radv_fast_clear_depth(cmd_buffer, iview, clear_att, pre_flush, post_flush); 1971 } else { 1972 emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect, ds_att, view_mask); 1973 } 1974 } 1975} 1976 1977static inline bool 1978radv_attachment_needs_clear(struct radv_cmd_state *cmd_state, uint32_t a) 1979{ 1980 uint32_t view_mask = cmd_state->subpass->view_mask; 1981 return (a != VK_ATTACHMENT_UNUSED && cmd_state->attachments[a].pending_clear_aspects && 1982 (!view_mask || (view_mask & ~cmd_state->attachments[a].cleared_views))); 1983} 1984 1985static bool 1986radv_subpass_needs_clear(struct radv_cmd_buffer *cmd_buffer) 1987{ 1988 struct radv_cmd_state *cmd_state = &cmd_buffer->state; 1989 uint32_t a; 1990 1991 if (!cmd_state->subpass) 1992 return false; 1993 1994 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { 1995 a = cmd_state->subpass->color_attachments[i].attachment; 1996 if (radv_attachment_needs_clear(cmd_state, a)) 1997 return true; 1998 } 1999 2000 if (cmd_state->subpass->depth_stencil_attachment) { 2001 a = cmd_state->subpass->depth_stencil_attachment->attachment; 2002 if (radv_attachment_needs_clear(cmd_state, a)) 2003 return true; 2004 } 2005 2006 if (!cmd_state->subpass->ds_resolve_attachment) 2007 return false; 2008 2009 a = cmd_state->subpass->ds_resolve_attachment->attachment; 2010 return radv_attachment_needs_clear(cmd_state, a); 2011} 2012 2013static void 2014radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer, 2015 struct radv_attachment_state *attachment, 2016 const VkClearAttachment *clear_att, 2017 enum radv_cmd_flush_bits *pre_flush, 2018 enum radv_cmd_flush_bits *post_flush, bool ds_resolve_clear) 2019{ 2020 struct radv_cmd_state *cmd_state = &cmd_buffer->state; 2021 uint32_t view_mask = cmd_state->subpass->view_mask; 2022 2023 VkClearRect clear_rect = { 2024 .rect = cmd_state->render_area, 2025 .baseArrayLayer = 0, 2026 .layerCount = cmd_state->framebuffer->layers, 2027 }; 2028 2029 radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask); 2030 2031 emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush, 2032 view_mask & ~attachment->cleared_views, ds_resolve_clear); 2033 if (view_mask) 2034 attachment->cleared_views |= view_mask; 2035 else 2036 attachment->pending_clear_aspects = 0; 2037 2038 radv_describe_end_render_pass_clear(cmd_buffer); 2039} 2040 2041/** 2042 * Emit any pending attachment clears for the current subpass. 2043 * 2044 * @see radv_attachment_state::pending_clear_aspects 2045 */ 2046void 2047radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer) 2048{ 2049 struct radv_cmd_state *cmd_state = &cmd_buffer->state; 2050 struct radv_meta_saved_state saved_state; 2051 enum radv_cmd_flush_bits pre_flush = 0; 2052 enum radv_cmd_flush_bits post_flush = 0; 2053 2054 if (!radv_subpass_needs_clear(cmd_buffer)) 2055 return; 2056 2057 radv_meta_save(&saved_state, cmd_buffer, 2058 RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS); 2059 2060 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { 2061 uint32_t a = cmd_state->subpass->color_attachments[i].attachment; 2062 2063 if (!radv_attachment_needs_clear(cmd_state, a)) 2064 continue; 2065 2066 assert(cmd_state->attachments[a].pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT); 2067 2068 VkClearAttachment clear_att = { 2069 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 2070 .colorAttachment = i, /* Use attachment index relative to subpass */ 2071 .clearValue = cmd_state->attachments[a].clear_value, 2072 }; 2073 2074 radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[a], &clear_att, &pre_flush, 2075 &post_flush, false); 2076 } 2077 2078 if (cmd_state->subpass->depth_stencil_attachment) { 2079 uint32_t ds = cmd_state->subpass->depth_stencil_attachment->attachment; 2080 if (radv_attachment_needs_clear(cmd_state, ds)) { 2081 VkClearAttachment clear_att = { 2082 .aspectMask = cmd_state->attachments[ds].pending_clear_aspects, 2083 .clearValue = cmd_state->attachments[ds].clear_value, 2084 }; 2085 2086 radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[ds], &clear_att, 2087 &pre_flush, &post_flush, false); 2088 } 2089 } 2090 2091 if (cmd_state->subpass->ds_resolve_attachment) { 2092 uint32_t ds_resolve = cmd_state->subpass->ds_resolve_attachment->attachment; 2093 if (radv_attachment_needs_clear(cmd_state, ds_resolve)) { 2094 VkClearAttachment clear_att = { 2095 .aspectMask = cmd_state->attachments[ds_resolve].pending_clear_aspects, 2096 .clearValue = cmd_state->attachments[ds_resolve].clear_value, 2097 }; 2098 2099 radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[ds_resolve], &clear_att, 2100 &pre_flush, &post_flush, true); 2101 } 2102 } 2103 2104 radv_meta_restore(&saved_state, cmd_buffer); 2105 cmd_buffer->state.flush_bits |= post_flush; 2106} 2107 2108static void 2109radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 2110 VkImageLayout image_layout, const VkImageSubresourceRange *range, 2111 VkFormat format, int level, unsigned layer_count, 2112 const VkClearValue *clear_val) 2113{ 2114 VkDevice device_h = radv_device_to_handle(cmd_buffer->device); 2115 struct radv_image_view iview; 2116 uint32_t width = radv_minify(image->info.width, range->baseMipLevel + level); 2117 uint32_t height = radv_minify(image->info.height, range->baseMipLevel + level); 2118 2119 radv_image_view_init(&iview, cmd_buffer->device, 2120 &(VkImageViewCreateInfo){ 2121 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 2122 .image = radv_image_to_handle(image), 2123 .viewType = radv_meta_get_view_type(image), 2124 .format = format, 2125 .subresourceRange = {.aspectMask = range->aspectMask, 2126 .baseMipLevel = range->baseMipLevel + level, 2127 .levelCount = 1, 2128 .baseArrayLayer = range->baseArrayLayer, 2129 .layerCount = layer_count}, 2130 }, 2131 NULL); 2132 2133 VkFramebuffer fb; 2134 radv_CreateFramebuffer( 2135 device_h, 2136 &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, 2137 .attachmentCount = 1, 2138 .pAttachments = 2139 (VkImageView[]){ 2140 radv_image_view_to_handle(&iview), 2141 }, 2142 .width = width, 2143 .height = height, 2144 .layers = layer_count}, 2145 &cmd_buffer->pool->alloc, &fb); 2146 2147 VkAttachmentDescription2 att_desc = { 2148 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2, 2149 .format = iview.vk_format, 2150 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, 2151 .storeOp = VK_ATTACHMENT_STORE_OP_STORE, 2152 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, 2153 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, 2154 .initialLayout = image_layout, 2155 .finalLayout = image_layout, 2156 }; 2157 2158 VkSubpassDescription2 subpass_desc = { 2159 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2, 2160 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, 2161 .inputAttachmentCount = 0, 2162 .colorAttachmentCount = 0, 2163 .pColorAttachments = NULL, 2164 .pResolveAttachments = NULL, 2165 .pDepthStencilAttachment = NULL, 2166 .preserveAttachmentCount = 0, 2167 .pPreserveAttachments = NULL, 2168 }; 2169 2170 const VkAttachmentReference2 att_ref = { 2171 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 2172 .attachment = 0, 2173 .layout = image_layout, 2174 }; 2175 2176 if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { 2177 subpass_desc.colorAttachmentCount = 1; 2178 subpass_desc.pColorAttachments = &att_ref; 2179 } else { 2180 subpass_desc.pDepthStencilAttachment = &att_ref; 2181 } 2182 2183 VkRenderPass pass; 2184 radv_CreateRenderPass2( 2185 device_h, 2186 &(VkRenderPassCreateInfo2){ 2187 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2, 2188 .attachmentCount = 1, 2189 .pAttachments = &att_desc, 2190 .subpassCount = 1, 2191 .pSubpasses = &subpass_desc, 2192 .dependencyCount = 2, 2193 .pDependencies = 2194 (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, 2195 .srcSubpass = VK_SUBPASS_EXTERNAL, 2196 .dstSubpass = 0, 2197 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 2198 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 2199 .srcAccessMask = 0, 2200 .dstAccessMask = 0, 2201 .dependencyFlags = 0}, 2202 {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, 2203 .srcSubpass = 0, 2204 .dstSubpass = VK_SUBPASS_EXTERNAL, 2205 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 2206 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 2207 .srcAccessMask = 0, 2208 .dstAccessMask = 0, 2209 .dependencyFlags = 0}}}, 2210 &cmd_buffer->pool->alloc, &pass); 2211 2212 radv_cmd_buffer_begin_render_pass(cmd_buffer, 2213 &(VkRenderPassBeginInfo){ 2214 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 2215 .renderArea = 2216 { 2217 .offset = 2218 { 2219 0, 2220 0, 2221 }, 2222 .extent = 2223 { 2224 .width = width, 2225 .height = height, 2226 }, 2227 }, 2228 .renderPass = pass, 2229 .framebuffer = fb, 2230 .clearValueCount = 0, 2231 .pClearValues = NULL, 2232 }, 2233 NULL); 2234 2235 radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]); 2236 2237 VkClearAttachment clear_att = { 2238 .aspectMask = range->aspectMask, 2239 .colorAttachment = 0, 2240 .clearValue = *clear_val, 2241 }; 2242 2243 VkClearRect clear_rect = { 2244 .rect = 2245 { 2246 .offset = {0, 0}, 2247 .extent = {width, height}, 2248 }, 2249 .baseArrayLayer = 0, 2250 .layerCount = layer_count, 2251 }; 2252 2253 emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0, false); 2254 2255 radv_image_view_finish(&iview); 2256 radv_cmd_buffer_end_render_pass(cmd_buffer); 2257 radv_DestroyRenderPass(device_h, pass, &cmd_buffer->pool->alloc); 2258 radv_DestroyFramebuffer(device_h, fb, &cmd_buffer->pool->alloc); 2259} 2260 2261/** 2262 * Return TRUE if a fast color or depth clear has been performed. 2263 */ 2264static bool 2265radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkFormat format, 2266 VkImageLayout image_layout, bool in_render_loop, 2267 const VkImageSubresourceRange *range, const VkClearValue *clear_val) 2268{ 2269 struct radv_image_view iview; 2270 bool fast_cleared = false; 2271 2272 radv_image_view_init(&iview, cmd_buffer->device, 2273 &(VkImageViewCreateInfo){ 2274 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 2275 .image = radv_image_to_handle(image), 2276 .viewType = radv_meta_get_view_type(image), 2277 .format = image->vk_format, 2278 .subresourceRange = 2279 { 2280 .aspectMask = range->aspectMask, 2281 .baseMipLevel = range->baseMipLevel, 2282 .levelCount = range->levelCount, 2283 .baseArrayLayer = range->baseArrayLayer, 2284 .layerCount = range->layerCount, 2285 }, 2286 }, 2287 NULL); 2288 2289 VkClearRect clear_rect = { 2290 .rect = 2291 { 2292 .offset = {0, 0}, 2293 .extent = 2294 { 2295 radv_minify(image->info.width, range->baseMipLevel), 2296 radv_minify(image->info.height, range->baseMipLevel), 2297 }, 2298 }, 2299 .baseArrayLayer = range->baseArrayLayer, 2300 .layerCount = range->layerCount, 2301 }; 2302 2303 VkClearAttachment clear_att = { 2304 .aspectMask = range->aspectMask, 2305 .colorAttachment = 0, 2306 .clearValue = *clear_val, 2307 }; 2308 2309 if (vk_format_is_color(format)) { 2310 if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout, in_render_loop, &clear_rect, 2311 clear_att.clearValue.color, 0)) { 2312 radv_fast_clear_color(cmd_buffer, &iview, &clear_att, clear_att.colorAttachment, NULL, 2313 NULL); 2314 fast_cleared = true; 2315 } 2316 } else { 2317 if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout, in_render_loop, 2318 range->aspectMask, &clear_rect, 2319 clear_att.clearValue.depthStencil, 0)) { 2320 radv_fast_clear_depth(cmd_buffer, &iview, &clear_att, NULL, NULL); 2321 fast_cleared = true; 2322 } 2323 } 2324 2325 radv_image_view_finish(&iview); 2326 return fast_cleared; 2327} 2328 2329static void 2330radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 2331 VkImageLayout image_layout, const VkClearValue *clear_value, 2332 uint32_t range_count, const VkImageSubresourceRange *ranges, bool cs) 2333{ 2334 VkFormat format = image->vk_format; 2335 VkClearValue internal_clear_value; 2336 2337 if (ranges->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) 2338 internal_clear_value.color = clear_value->color; 2339 else 2340 internal_clear_value.depthStencil = clear_value->depthStencil; 2341 2342 bool disable_compression = false; 2343 2344 if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) { 2345 bool blendable; 2346 if (cs ? !radv_is_storage_image_format_supported(cmd_buffer->device->physical_device, format) 2347 : !radv_is_colorbuffer_format_supported(cmd_buffer->device->physical_device, format, 2348 &blendable)) { 2349 format = VK_FORMAT_R32_UINT; 2350 internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32); 2351 2352 uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->queue_family_index, 2353 cmd_buffer->queue_family_index); 2354 2355 for (uint32_t r = 0; r < range_count; r++) { 2356 const VkImageSubresourceRange *range = &ranges[r]; 2357 2358 /* Don't use compressed image stores because they will use an incompatible format. */ 2359 if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, 2360 image_layout, false, queue_mask)) { 2361 disable_compression = cs; 2362 break; 2363 } 2364 } 2365 } 2366 } 2367 2368 if (format == VK_FORMAT_R4G4_UNORM_PACK8) { 2369 uint8_t r, g; 2370 format = VK_FORMAT_R8_UINT; 2371 r = float_to_ubyte(clear_value->color.float32[0]) >> 4; 2372 g = float_to_ubyte(clear_value->color.float32[1]) >> 4; 2373 internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf); 2374 } 2375 2376 for (uint32_t r = 0; r < range_count; r++) { 2377 const VkImageSubresourceRange *range = &ranges[r]; 2378 2379 /* Try to perform a fast clear first, otherwise fallback to 2380 * the legacy path. 2381 */ 2382 if (!cs && radv_fast_clear_range(cmd_buffer, image, format, image_layout, false, range, 2383 &internal_clear_value)) { 2384 continue; 2385 } 2386 2387 for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) { 2388 const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D 2389 ? radv_minify(image->info.depth, range->baseMipLevel + l) 2390 : radv_get_layerCount(image, range); 2391 2392 if (cs) { 2393 for (uint32_t s = 0; s < layer_count; ++s) { 2394 struct radv_meta_blit2d_surf surf; 2395 surf.format = format; 2396 surf.image = image; 2397 surf.level = range->baseMipLevel + l; 2398 surf.layer = range->baseArrayLayer + s; 2399 surf.aspect_mask = range->aspectMask; 2400 surf.disable_compression = disable_compression; 2401 radv_meta_clear_image_cs(cmd_buffer, &surf, &internal_clear_value.color); 2402 } 2403 } else { 2404 assert(!disable_compression); 2405 radv_clear_image_layer(cmd_buffer, image, image_layout, range, format, l, layer_count, 2406 &internal_clear_value); 2407 } 2408 } 2409 } 2410 2411 if (disable_compression) { 2412 enum radv_cmd_flush_bits flush_bits = 0; 2413 for (unsigned i = 0; i < range_count; i++) { 2414 if (radv_dcc_enabled(image, ranges[i].baseMipLevel)) 2415 flush_bits |= radv_clear_dcc(cmd_buffer, image, &ranges[i], 0xffffffffu); 2416 } 2417 cmd_buffer->state.flush_bits |= flush_bits; 2418 } 2419} 2420 2421void 2422radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout, 2423 const VkClearColorValue *pColor, uint32_t rangeCount, 2424 const VkImageSubresourceRange *pRanges) 2425{ 2426 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2427 RADV_FROM_HANDLE(radv_image, image, image_h); 2428 struct radv_meta_saved_state saved_state; 2429 bool cs; 2430 2431 cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE || 2432 !radv_image_is_renderable(cmd_buffer->device, image); 2433 2434 if (cs) { 2435 radv_meta_save( 2436 &saved_state, cmd_buffer, 2437 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); 2438 } else { 2439 radv_meta_save(&saved_state, cmd_buffer, 2440 RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS); 2441 } 2442 2443 radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pColor, rangeCount, 2444 pRanges, cs); 2445 2446 radv_meta_restore(&saved_state, cmd_buffer); 2447} 2448 2449void 2450radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, 2451 VkImageLayout imageLayout, 2452 const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount, 2453 const VkImageSubresourceRange *pRanges) 2454{ 2455 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2456 RADV_FROM_HANDLE(radv_image, image, image_h); 2457 struct radv_meta_saved_state saved_state; 2458 2459 radv_meta_save(&saved_state, cmd_buffer, 2460 RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS); 2461 2462 radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pDepthStencil, 2463 rangeCount, pRanges, false); 2464 2465 radv_meta_restore(&saved_state, cmd_buffer); 2466} 2467 2468void 2469radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, 2470 const VkClearAttachment *pAttachments, uint32_t rectCount, 2471 const VkClearRect *pRects) 2472{ 2473 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 2474 struct radv_meta_saved_state saved_state; 2475 enum radv_cmd_flush_bits pre_flush = 0; 2476 enum radv_cmd_flush_bits post_flush = 0; 2477 2478 if (!cmd_buffer->state.subpass) 2479 return; 2480 2481 radv_meta_save(&saved_state, cmd_buffer, 2482 RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS); 2483 2484 /* FINISHME: We can do better than this dumb loop. It thrashes too much 2485 * state. 2486 */ 2487 for (uint32_t a = 0; a < attachmentCount; ++a) { 2488 for (uint32_t r = 0; r < rectCount; ++r) { 2489 emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush, 2490 cmd_buffer->state.subpass->view_mask, false); 2491 } 2492 } 2493 2494 radv_meta_restore(&saved_state, cmd_buffer); 2495 cmd_buffer->state.flush_bits |= post_flush; 2496} 2497