radv_meta_decompress.c revision 7ec681f3
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26
27#include "radv_meta.h"
28#include "radv_private.h"
29#include "sid.h"
30
31enum radv_depth_op {
32   DEPTH_DECOMPRESS,
33   DEPTH_RESUMMARIZE,
34};
35
36static nir_shader *
37build_expand_depth_stencil_compute_shader(struct radv_device *dev)
38{
39   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
40
41   nir_builder b =
42      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "expand_depth_stencil_compute");
43
44   /* We need at least 8/8/1 to cover an entire HTILE block in a single workgroup. */
45   b.shader->info.workgroup_size[0] = 8;
46   b.shader->info.workgroup_size[1] = 8;
47   b.shader->info.workgroup_size[2] = 1;
48   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "in_img");
49   input_img->data.descriptor_set = 0;
50   input_img->data.binding = 0;
51
52   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
53   output_img->data.descriptor_set = 0;
54   output_img->data.binding = 1;
55
56   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
57   nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
58   nir_ssa_def *block_size =
59      nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
60                    b.shader->info.workgroup_size[2], 0);
61
62   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
63
64   nir_ssa_def *data = nir_image_deref_load(
65      &b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id, nir_ssa_undef(&b, 1, 32),
66      nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
67
68   /* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid
69    * creating a vmcnt(0) because it expects the L1 cache to keep memory
70    * operations in-order for the same workgroup. The vmcnt(0) seems
71    * necessary however. */
72   nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE,
73                      .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
74
75   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
76                         nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0),
77                         .image_dim = GLSL_SAMPLER_DIM_2D);
78   return b.shader;
79}
80
81static VkResult
82create_expand_depth_stencil_compute(struct radv_device *device)
83{
84   VkResult result = VK_SUCCESS;
85   nir_shader *cs = build_expand_depth_stencil_compute_shader(device);
86
87   VkDescriptorSetLayoutCreateInfo ds_create_info = {
88      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
89      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
90      .bindingCount = 2,
91      .pBindings = (VkDescriptorSetLayoutBinding[]){
92         {.binding = 0,
93          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
94          .descriptorCount = 1,
95          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
96          .pImmutableSamplers = NULL},
97         {.binding = 1,
98          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
99          .descriptorCount = 1,
100          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
101          .pImmutableSamplers = NULL},
102      }};
103
104   result = radv_CreateDescriptorSetLayout(
105      radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
106      &device->meta_state.expand_depth_stencil_compute_ds_layout);
107   if (result != VK_SUCCESS)
108      goto cleanup;
109
110   VkPipelineLayoutCreateInfo pl_create_info = {
111      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
112      .setLayoutCount = 1,
113      .pSetLayouts = &device->meta_state.expand_depth_stencil_compute_ds_layout,
114      .pushConstantRangeCount = 0,
115      .pPushConstantRanges = NULL,
116   };
117
118   result = radv_CreatePipelineLayout(
119      radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
120      &device->meta_state.expand_depth_stencil_compute_p_layout);
121   if (result != VK_SUCCESS)
122      goto cleanup;
123
124   /* compute shader */
125
126   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
127      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
128      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
129      .module = vk_shader_module_handle_from_nir(cs),
130      .pName = "main",
131      .pSpecializationInfo = NULL,
132   };
133
134   VkComputePipelineCreateInfo vk_pipeline_info = {
135      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
136      .stage = pipeline_shader_stage,
137      .flags = 0,
138      .layout = device->meta_state.expand_depth_stencil_compute_p_layout,
139   };
140
141   result = radv_CreateComputePipelines(
142      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
143      &vk_pipeline_info, NULL,
144      &device->meta_state.expand_depth_stencil_compute_pipeline);
145   if (result != VK_SUCCESS)
146      goto cleanup;
147
148cleanup:
149   ralloc_free(cs);
150   return result;
151}
152
153static VkResult
154create_pass(struct radv_device *device, uint32_t samples, VkRenderPass *pass)
155{
156   VkResult result;
157   VkDevice device_h = radv_device_to_handle(device);
158   const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
159   VkAttachmentDescription2 attachment;
160
161   attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
162   attachment.pNext = NULL;
163   attachment.flags = 0;
164   attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
165   attachment.samples = samples;
166   attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
167   attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
168   attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
169   attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
170   attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
171   attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
172
173   result = radv_CreateRenderPass2(
174      device_h,
175      &(VkRenderPassCreateInfo2){
176         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
177         .attachmentCount = 1,
178         .pAttachments = &attachment,
179         .subpassCount = 1,
180         .pSubpasses =
181            &(VkSubpassDescription2){
182               .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
183               .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
184               .inputAttachmentCount = 0,
185               .colorAttachmentCount = 0,
186               .pColorAttachments = NULL,
187               .pResolveAttachments = NULL,
188               .pDepthStencilAttachment =
189                  &(VkAttachmentReference2){
190                     .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
191                     .attachment = 0,
192                     .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
193                  },
194               .preserveAttachmentCount = 0,
195               .pPreserveAttachments = NULL,
196            },
197         .dependencyCount = 2,
198         .pDependencies =
199            (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
200                                      .srcSubpass = VK_SUBPASS_EXTERNAL,
201                                      .dstSubpass = 0,
202                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
203                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
204                                      .srcAccessMask = 0,
205                                      .dstAccessMask = 0,
206                                      .dependencyFlags = 0},
207                                     {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
208                                      .srcSubpass = 0,
209                                      .dstSubpass = VK_SUBPASS_EXTERNAL,
210                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
211                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
212                                      .srcAccessMask = 0,
213                                      .dstAccessMask = 0,
214                                      .dependencyFlags = 0}},
215      },
216      alloc, pass);
217
218   return result;
219}
220
221static VkResult
222create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
223{
224   VkPipelineLayoutCreateInfo pl_create_info = {
225      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
226      .setLayoutCount = 0,
227      .pSetLayouts = NULL,
228      .pushConstantRangeCount = 0,
229      .pPushConstantRanges = NULL,
230   };
231
232   return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
233                                    &device->meta_state.alloc, layout);
234}
235
236static VkResult
237create_pipeline(struct radv_device *device, uint32_t samples, VkRenderPass pass,
238                VkPipelineLayout layout, enum radv_depth_op op, VkPipeline *pipeline)
239{
240   VkResult result;
241   VkDevice device_h = radv_device_to_handle(device);
242
243   mtx_lock(&device->meta_state.mtx);
244   if (*pipeline) {
245      mtx_unlock(&device->meta_state.mtx);
246      return VK_SUCCESS;
247   }
248
249   nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
250   nir_shader *fs_module = radv_meta_build_nir_fs_noop();
251
252   if (!vs_module || !fs_module) {
253      /* XXX: Need more accurate error */
254      result = VK_ERROR_OUT_OF_HOST_MEMORY;
255      goto cleanup;
256   }
257
258   const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
259      .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
260      .sampleLocationsEnable = false,
261   };
262
263   const VkGraphicsPipelineCreateInfo pipeline_create_info = {
264      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
265      .stageCount = 2,
266      .pStages =
267         (VkPipelineShaderStageCreateInfo[]){
268            {
269               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
270               .stage = VK_SHADER_STAGE_VERTEX_BIT,
271               .module = vk_shader_module_handle_from_nir(vs_module),
272               .pName = "main",
273            },
274            {
275               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
276               .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
277               .module = vk_shader_module_handle_from_nir(fs_module),
278               .pName = "main",
279            },
280         },
281      .pVertexInputState =
282         &(VkPipelineVertexInputStateCreateInfo){
283            .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
284            .vertexBindingDescriptionCount = 0,
285            .vertexAttributeDescriptionCount = 0,
286         },
287      .pInputAssemblyState =
288         &(VkPipelineInputAssemblyStateCreateInfo){
289            .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
290            .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
291            .primitiveRestartEnable = false,
292         },
293      .pViewportState =
294         &(VkPipelineViewportStateCreateInfo){
295            .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
296            .viewportCount = 1,
297            .scissorCount = 1,
298         },
299      .pRasterizationState =
300         &(VkPipelineRasterizationStateCreateInfo){
301            .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
302            .depthClampEnable = false,
303            .rasterizerDiscardEnable = false,
304            .polygonMode = VK_POLYGON_MODE_FILL,
305            .cullMode = VK_CULL_MODE_NONE,
306            .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
307         },
308      .pMultisampleState =
309         &(VkPipelineMultisampleStateCreateInfo){
310            .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
311            .pNext = &sample_locs_create_info,
312            .rasterizationSamples = samples,
313            .sampleShadingEnable = false,
314            .pSampleMask = NULL,
315            .alphaToCoverageEnable = false,
316            .alphaToOneEnable = false,
317         },
318      .pColorBlendState =
319         &(VkPipelineColorBlendStateCreateInfo){
320            .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
321            .logicOpEnable = false,
322            .attachmentCount = 0,
323            .pAttachments = NULL,
324         },
325      .pDepthStencilState =
326         &(VkPipelineDepthStencilStateCreateInfo){
327            .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
328            .depthTestEnable = false,
329            .depthWriteEnable = false,
330            .depthBoundsTestEnable = false,
331            .stencilTestEnable = false,
332         },
333      .pDynamicState =
334         &(VkPipelineDynamicStateCreateInfo){
335            .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
336            .dynamicStateCount = 3,
337            .pDynamicStates =
338               (VkDynamicState[]){
339                  VK_DYNAMIC_STATE_VIEWPORT,
340                  VK_DYNAMIC_STATE_SCISSOR,
341                  VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
342               },
343         },
344      .layout = layout,
345      .renderPass = pass,
346      .subpass = 0,
347   };
348
349   struct radv_graphics_pipeline_create_info extra = {
350      .use_rectlist = true,
351      .depth_compress_disable = true,
352      .stencil_compress_disable = true,
353      .resummarize_enable = op == DEPTH_RESUMMARIZE,
354   };
355
356   result = radv_graphics_pipeline_create(
357      device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache), &pipeline_create_info,
358      &extra, &device->meta_state.alloc, pipeline);
359
360cleanup:
361   ralloc_free(fs_module);
362   ralloc_free(vs_module);
363   mtx_unlock(&device->meta_state.mtx);
364   return result;
365}
366
367void
368radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
369{
370   struct radv_meta_state *state = &device->meta_state;
371
372   for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
373      radv_DestroyRenderPass(radv_device_to_handle(device), state->depth_decomp[i].pass,
374                             &state->alloc);
375      radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp[i].p_layout,
376                                 &state->alloc);
377
378      radv_DestroyPipeline(radv_device_to_handle(device),
379                           state->depth_decomp[i].decompress_pipeline, &state->alloc);
380      radv_DestroyPipeline(radv_device_to_handle(device),
381                           state->depth_decomp[i].resummarize_pipeline, &state->alloc);
382   }
383
384   radv_DestroyPipeline(radv_device_to_handle(device),
385                        state->expand_depth_stencil_compute_pipeline, &state->alloc);
386   radv_DestroyPipelineLayout(radv_device_to_handle(device),
387                              state->expand_depth_stencil_compute_p_layout, &state->alloc);
388   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
389                                   state->expand_depth_stencil_compute_ds_layout, &state->alloc);
390}
391
392VkResult
393radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
394{
395   struct radv_meta_state *state = &device->meta_state;
396   VkResult res = VK_SUCCESS;
397
398   for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
399      uint32_t samples = 1 << i;
400
401      res = create_pass(device, samples, &state->depth_decomp[i].pass);
402      if (res != VK_SUCCESS)
403         goto fail;
404
405      res = create_pipeline_layout(device, &state->depth_decomp[i].p_layout);
406      if (res != VK_SUCCESS)
407         goto fail;
408
409      if (on_demand)
410         continue;
411
412      res = create_pipeline(device, samples, state->depth_decomp[i].pass,
413                            state->depth_decomp[i].p_layout, DEPTH_DECOMPRESS,
414                            &state->depth_decomp[i].decompress_pipeline);
415      if (res != VK_SUCCESS)
416         goto fail;
417
418      res = create_pipeline(device, samples, state->depth_decomp[i].pass,
419                            state->depth_decomp[i].p_layout, DEPTH_RESUMMARIZE,
420                            &state->depth_decomp[i].resummarize_pipeline);
421      if (res != VK_SUCCESS)
422         goto fail;
423   }
424
425   res = create_expand_depth_stencil_compute(device);
426   if (res != VK_SUCCESS)
427      goto fail;
428
429   return VK_SUCCESS;
430
431fail:
432   radv_device_finish_meta_depth_decomp_state(device);
433   return res;
434}
435
436static VkPipeline *
437radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
438                        const VkImageSubresourceRange *subresourceRange, enum radv_depth_op op)
439{
440   struct radv_meta_state *state = &cmd_buffer->device->meta_state;
441   uint32_t samples = image->info.samples;
442   uint32_t samples_log2 = ffs(samples) - 1;
443   VkPipeline *pipeline;
444
445   if (!state->depth_decomp[samples_log2].decompress_pipeline) {
446      VkResult ret;
447
448      ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].pass,
449                            state->depth_decomp[samples_log2].p_layout, DEPTH_DECOMPRESS,
450                             &state->depth_decomp[samples_log2].decompress_pipeline);
451      if (ret != VK_SUCCESS) {
452         cmd_buffer->record_result = ret;
453         return NULL;
454      }
455
456      ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].pass,
457                            state->depth_decomp[samples_log2].p_layout, DEPTH_RESUMMARIZE,
458                            &state->depth_decomp[samples_log2].resummarize_pipeline);
459      if (ret != VK_SUCCESS) {
460         cmd_buffer->record_result = ret;
461         return NULL;
462      }
463   }
464
465   switch (op) {
466   case DEPTH_DECOMPRESS:
467      pipeline = &state->depth_decomp[samples_log2].decompress_pipeline;
468      break;
469   case DEPTH_RESUMMARIZE:
470      pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
471      break;
472   default:
473      unreachable("unknown operation");
474   }
475
476   return pipeline;
477}
478
479static void
480radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
481                               const VkImageSubresourceRange *range, int level, int layer)
482{
483   struct radv_device *device = cmd_buffer->device;
484   struct radv_meta_state *state = &device->meta_state;
485   uint32_t samples_log2 = ffs(image->info.samples) - 1;
486   struct radv_image_view iview;
487   uint32_t width, height;
488
489   width = radv_minify(image->info.width, range->baseMipLevel + level);
490   height = radv_minify(image->info.height, range->baseMipLevel + level);
491
492   radv_image_view_init(&iview, device,
493                        &(VkImageViewCreateInfo){
494                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
495                           .image = radv_image_to_handle(image),
496                           .viewType = radv_meta_get_view_type(image),
497                           .format = image->vk_format,
498                           .subresourceRange =
499                              {
500                                 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
501                                 .baseMipLevel = range->baseMipLevel + level,
502                                 .levelCount = 1,
503                                 .baseArrayLayer = range->baseArrayLayer + layer,
504                                 .layerCount = 1,
505                              },
506                        },
507                        NULL);
508
509   VkFramebuffer fb_h;
510   radv_CreateFramebuffer(
511      radv_device_to_handle(device),
512      &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
513                                 .attachmentCount = 1,
514                                 .pAttachments = (VkImageView[]){radv_image_view_to_handle(&iview)},
515                                 .width = width,
516                                 .height = height,
517                                 .layers = 1},
518      &cmd_buffer->pool->alloc, &fb_h);
519
520   radv_cmd_buffer_begin_render_pass(cmd_buffer,
521                                     &(VkRenderPassBeginInfo){
522                                        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
523                                        .renderPass = state->depth_decomp[samples_log2].pass,
524                                        .framebuffer = fb_h,
525                                        .renderArea = {.offset =
526                                                          {
527                                                             0,
528                                                             0,
529                                                          },
530                                                       .extent =
531                                                          {
532                                                             width,
533                                                             height,
534                                                          }},
535                                        .clearValueCount = 0,
536                                        .pClearValues = NULL,
537                                     },
538                                     NULL);
539   radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
540
541   radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
542   radv_cmd_buffer_end_render_pass(cmd_buffer);
543
544   radv_image_view_finish(&iview);
545   radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc);
546}
547
548static void
549radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
550                           const VkImageSubresourceRange *subresourceRange,
551                           struct radv_sample_locations_state *sample_locs, enum radv_depth_op op)
552{
553   struct radv_meta_saved_state saved_state;
554   VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
555   VkPipeline *pipeline;
556
557   radv_meta_save(
558      &saved_state, cmd_buffer,
559      RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_SAMPLE_LOCATIONS | RADV_META_SAVE_PASS);
560
561   pipeline = radv_get_depth_pipeline(cmd_buffer, image, subresourceRange, op);
562
563   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
564                        *pipeline);
565
566   if (sample_locs) {
567      assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
568
569      /* Set the sample locations specified during explicit or
570       * automatic layout transitions, otherwise the depth decompress
571       * pass uses the default HW locations.
572       */
573      radv_CmdSetSampleLocationsEXT(cmd_buffer_h,
574                                    &(VkSampleLocationsInfoEXT){
575                                       .sampleLocationsPerPixel = sample_locs->per_pixel,
576                                       .sampleLocationGridSize = sample_locs->grid_size,
577                                       .sampleLocationsCount = sample_locs->count,
578                                       .pSampleLocations = sample_locs->locations,
579                                    });
580   }
581
582   for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
583
584      /* Do not decompress levels without HTILE. */
585      if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
586         continue;
587
588      uint32_t width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
589      uint32_t height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
590
591      radv_CmdSetViewport(cmd_buffer_h, 0, 1,
592                          &(VkViewport){.x = 0,
593                                        .y = 0,
594                                        .width = width,
595                                        .height = height,
596                                        .minDepth = 0.0f,
597                                        .maxDepth = 1.0f});
598
599      radv_CmdSetScissor(cmd_buffer_h, 0, 1,
600                         &(VkRect2D){
601                            .offset = {0, 0},
602                            .extent = {width, height},
603                         });
604
605      for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
606         radv_process_depth_image_layer(cmd_buffer, image, subresourceRange, l, s);
607      }
608   }
609
610   radv_meta_restore(&saved_state, cmd_buffer);
611}
612
613static void
614radv_expand_depth_stencil_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
615                                  const VkImageSubresourceRange *subresourceRange)
616{
617   struct radv_meta_saved_state saved_state;
618   struct radv_image_view load_iview = {0};
619   struct radv_image_view store_iview = {0};
620   struct radv_device *device = cmd_buffer->device;
621
622   assert(radv_image_is_tc_compat_htile(image));
623
624   cmd_buffer->state.flush_bits |=
625      radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
626
627   radv_meta_save(&saved_state, cmd_buffer,
628                  RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
629
630   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
631                        device->meta_state.expand_depth_stencil_compute_pipeline);
632
633   for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) {
634      uint32_t width, height;
635
636      /* Do not decompress levels without HTILE. */
637      if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
638         continue;
639
640      width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
641      height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
642
643      for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
644         radv_image_view_init(
645            &load_iview, cmd_buffer->device,
646            &(VkImageViewCreateInfo){
647               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
648               .image = radv_image_to_handle(image),
649               .viewType = VK_IMAGE_VIEW_TYPE_2D,
650               .format = image->vk_format,
651               .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
652                                    .baseMipLevel = subresourceRange->baseMipLevel + l,
653                                    .levelCount = 1,
654                                    .baseArrayLayer = subresourceRange->baseArrayLayer + s,
655                                    .layerCount = 1},
656            },
657            &(struct radv_image_view_extra_create_info){.enable_compression = true});
658         radv_image_view_init(
659            &store_iview, cmd_buffer->device,
660            &(VkImageViewCreateInfo){
661               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
662               .image = radv_image_to_handle(image),
663               .viewType = VK_IMAGE_VIEW_TYPE_2D,
664               .format = image->vk_format,
665               .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
666                                    .baseMipLevel = subresourceRange->baseMipLevel + l,
667                                    .levelCount = 1,
668                                    .baseArrayLayer = subresourceRange->baseArrayLayer + s,
669                                    .layerCount = 1},
670            },
671            &(struct radv_image_view_extra_create_info){.disable_compression = true});
672
673         radv_meta_push_descriptor_set(
674            cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
675            device->meta_state.expand_depth_stencil_compute_p_layout, 0, /* set */
676            2, /* descriptorWriteCount */
677            (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
678                                      .dstBinding = 0,
679                                      .dstArrayElement = 0,
680                                      .descriptorCount = 1,
681                                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
682                                      .pImageInfo =
683                                         (VkDescriptorImageInfo[]){
684                                            {
685                                               .sampler = VK_NULL_HANDLE,
686                                               .imageView = radv_image_view_to_handle(&load_iview),
687                                               .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
688                                            },
689                                         }},
690                                     {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
691                                      .dstBinding = 1,
692                                      .dstArrayElement = 0,
693                                      .descriptorCount = 1,
694                                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
695                                      .pImageInfo = (VkDescriptorImageInfo[]){
696                                         {
697                                            .sampler = VK_NULL_HANDLE,
698                                            .imageView = radv_image_view_to_handle(&store_iview),
699                                            .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
700                                         },
701                                      }}});
702
703         radv_unaligned_dispatch(cmd_buffer, width, height, 1);
704
705         radv_image_view_finish(&load_iview);
706         radv_image_view_finish(&store_iview);
707      }
708   }
709
710   radv_meta_restore(&saved_state, cmd_buffer);
711
712   cmd_buffer->state.flush_bits |=
713      RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
714      radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
715
716   /* Initialize the HTILE metadata as "fully expanded". */
717   uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image);
718
719   cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, image, subresourceRange, htile_value);
720}
721
722void
723radv_expand_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
724                          const VkImageSubresourceRange *subresourceRange,
725                          struct radv_sample_locations_state *sample_locs)
726{
727   struct radv_barrier_data barrier = {0};
728
729   barrier.layout_transitions.depth_stencil_expand = 1;
730   radv_describe_layout_transition(cmd_buffer, &barrier);
731
732   if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
733      radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_DECOMPRESS);
734   } else {
735      radv_expand_depth_stencil_compute(cmd_buffer, image, subresourceRange);
736   }
737}
738
739void
740radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
741                               const VkImageSubresourceRange *subresourceRange,
742                               struct radv_sample_locations_state *sample_locs)
743{
744   struct radv_barrier_data barrier = {0};
745
746   barrier.layout_transitions.depth_stencil_resummarize = 1;
747   radv_describe_layout_transition(cmd_buffer, &barrier);
748
749   assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
750   radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_RESUMMARIZE);
751}
752