1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26
27#include "radv_meta.h"
28#include "radv_private.h"
29#include "sid.h"
30
31enum radv_color_op {
32   FAST_CLEAR_ELIMINATE,
33   FMASK_DECOMPRESS,
34   DCC_DECOMPRESS,
35};
36
37static nir_shader *
38build_dcc_decompress_compute_shader(struct radv_device *dev)
39{
40   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
41
42   nir_builder b =
43      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_decompress_compute");
44
45   /* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
46   b.shader->info.workgroup_size[0] = 16;
47   b.shader->info.workgroup_size[1] = 16;
48   b.shader->info.workgroup_size[2] = 1;
49   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "in_img");
50   input_img->data.descriptor_set = 0;
51   input_img->data.binding = 0;
52
53   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
54   output_img->data.descriptor_set = 0;
55   output_img->data.binding = 1;
56
57   nir_ssa_def *global_id = get_global_ids(&b, 2);
58   nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0),
59                                         nir_channel(&b, global_id, 1),
60                                         nir_ssa_undef(&b, 1, 32),
61                                         nir_ssa_undef(&b, 1, 32));
62
63   nir_ssa_def *data = nir_image_deref_load(
64      &b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
65      nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
66
67   /* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid
68    * creating a vmcnt(0) because it expects the L1 cache to keep memory
69    * operations in-order for the same workgroup. The vmcnt(0) seems
70    * necessary however. */
71   nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE,
72                      .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
73
74   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
75                         nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0),
76                         .image_dim = GLSL_SAMPLER_DIM_2D);
77   return b.shader;
78}
79
80static VkResult
81create_dcc_compress_compute(struct radv_device *device)
82{
83   VkResult result = VK_SUCCESS;
84   nir_shader *cs = build_dcc_decompress_compute_shader(device);
85
86   VkDescriptorSetLayoutCreateInfo ds_create_info = {
87      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
88      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
89      .bindingCount = 2,
90      .pBindings = (VkDescriptorSetLayoutBinding[]){
91         {.binding = 0,
92          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
93          .descriptorCount = 1,
94          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
95          .pImmutableSamplers = NULL},
96         {.binding = 1,
97          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
98          .descriptorCount = 1,
99          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
100          .pImmutableSamplers = NULL},
101      }};
102
103   result = radv_CreateDescriptorSetLayout(
104      radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
105      &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
106   if (result != VK_SUCCESS)
107      goto cleanup;
108
109   VkPipelineLayoutCreateInfo pl_create_info = {
110      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
111      .setLayoutCount = 1,
112      .pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
113      .pushConstantRangeCount = 0,
114      .pPushConstantRanges = NULL,
115   };
116
117   result = radv_CreatePipelineLayout(
118      radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
119      &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
120   if (result != VK_SUCCESS)
121      goto cleanup;
122
123   /* compute shader */
124
125   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
126      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
127      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
128      .module = vk_shader_module_handle_from_nir(cs),
129      .pName = "main",
130      .pSpecializationInfo = NULL,
131   };
132
133   VkComputePipelineCreateInfo vk_pipeline_info = {
134      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
135      .stage = pipeline_shader_stage,
136      .flags = 0,
137      .layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
138   };
139
140   result = radv_CreateComputePipelines(
141      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
142      &vk_pipeline_info, NULL,
143      &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
144   if (result != VK_SUCCESS)
145      goto cleanup;
146
147cleanup:
148   ralloc_free(cs);
149   return result;
150}
151
152static VkResult
153create_pass(struct radv_device *device)
154{
155   VkResult result;
156   VkDevice device_h = radv_device_to_handle(device);
157   const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
158   VkAttachmentDescription2 attachment;
159
160   attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
161   attachment.pNext = NULL;
162   attachment.format = VK_FORMAT_UNDEFINED;
163   attachment.samples = 1;
164   attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
165   attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
166   attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
167   attachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
168
169   result = radv_CreateRenderPass2(
170      device_h,
171      &(VkRenderPassCreateInfo2){
172         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
173         .attachmentCount = 1,
174         .pAttachments = &attachment,
175         .subpassCount = 1,
176         .pSubpasses =
177            &(VkSubpassDescription2){
178               .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
179               .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
180               .inputAttachmentCount = 0,
181               .colorAttachmentCount = 1,
182               .pColorAttachments =
183                  (VkAttachmentReference2[]){
184                     {
185                        .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
186                        .attachment = 0,
187                        .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
188                     },
189                  },
190               .pResolveAttachments = NULL,
191               .pDepthStencilAttachment =
192                  &(VkAttachmentReference2){
193                     .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
194                     .attachment = VK_ATTACHMENT_UNUSED,
195                  },
196               .preserveAttachmentCount = 0,
197               .pPreserveAttachments = NULL,
198            },
199         .dependencyCount = 2,
200         .pDependencies =
201            (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
202                                      .srcSubpass = VK_SUBPASS_EXTERNAL,
203                                      .dstSubpass = 0,
204                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
205                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
206                                      .srcAccessMask = 0,
207                                      .dstAccessMask = 0,
208                                      .dependencyFlags = 0},
209                                     {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
210                                      .srcSubpass = 0,
211                                      .dstSubpass = VK_SUBPASS_EXTERNAL,
212                                      .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
213                                      .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
214                                      .srcAccessMask = 0,
215                                      .dstAccessMask = 0,
216                                      .dependencyFlags = 0}},
217      },
218      alloc, &device->meta_state.fast_clear_flush.pass);
219
220   return result;
221}
222
223static VkResult
224create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
225{
226   VkPipelineLayoutCreateInfo pl_create_info = {
227      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
228      .setLayoutCount = 0,
229      .pSetLayouts = NULL,
230      .pushConstantRangeCount = 0,
231      .pPushConstantRanges = NULL,
232   };
233
234   return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
235                                    &device->meta_state.alloc, layout);
236}
237
238static VkResult
239create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout)
240{
241   VkResult result;
242   VkDevice device_h = radv_device_to_handle(device);
243
244   nir_shader *fs_module = radv_meta_build_nir_fs_noop();
245
246   if (!fs_module) {
247      /* XXX: Need more accurate error */
248      result = VK_ERROR_OUT_OF_HOST_MEMORY;
249      goto cleanup;
250   }
251
252   const VkPipelineShaderStageCreateInfo stages[2] = {
253      {
254         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
255         .stage = VK_SHADER_STAGE_VERTEX_BIT,
256         .module = vs_module_h,
257         .pName = "main",
258      },
259      {
260         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
261         .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
262         .module = vk_shader_module_handle_from_nir(fs_module),
263         .pName = "main",
264      },
265   };
266
267   const VkPipelineVertexInputStateCreateInfo vi_state = {
268      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
269      .vertexBindingDescriptionCount = 0,
270      .vertexAttributeDescriptionCount = 0,
271   };
272
273   const VkPipelineInputAssemblyStateCreateInfo ia_state = {
274      .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
275      .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
276      .primitiveRestartEnable = false,
277   };
278
279   const VkPipelineColorBlendStateCreateInfo blend_state = {
280      .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
281      .logicOpEnable = false,
282      .attachmentCount = 1,
283      .pAttachments = (VkPipelineColorBlendAttachmentState[]){
284         {
285            .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
286                              VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
287         },
288      }};
289   const VkPipelineRasterizationStateCreateInfo rs_state = {
290      .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
291      .depthClampEnable = false,
292      .rasterizerDiscardEnable = false,
293      .polygonMode = VK_POLYGON_MODE_FILL,
294      .cullMode = VK_CULL_MODE_NONE,
295      .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
296   };
297
298   result = radv_graphics_pipeline_create(
299      device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
300      &(VkGraphicsPipelineCreateInfo){
301         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
302         .stageCount = 2,
303         .pStages = stages,
304
305         .pVertexInputState = &vi_state,
306         .pInputAssemblyState = &ia_state,
307
308         .pViewportState =
309            &(VkPipelineViewportStateCreateInfo){
310               .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
311               .viewportCount = 1,
312               .scissorCount = 1,
313            },
314         .pRasterizationState = &rs_state,
315         .pMultisampleState =
316            &(VkPipelineMultisampleStateCreateInfo){
317               .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
318               .rasterizationSamples = 1,
319               .sampleShadingEnable = false,
320               .pSampleMask = NULL,
321               .alphaToCoverageEnable = false,
322               .alphaToOneEnable = false,
323            },
324         .pColorBlendState = &blend_state,
325         .pDynamicState =
326            &(VkPipelineDynamicStateCreateInfo){
327               .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
328               .dynamicStateCount = 2,
329               .pDynamicStates =
330                  (VkDynamicState[]){
331                     VK_DYNAMIC_STATE_VIEWPORT,
332                     VK_DYNAMIC_STATE_SCISSOR,
333                  },
334            },
335         .layout = layout,
336         .renderPass = device->meta_state.fast_clear_flush.pass,
337         .subpass = 0,
338      },
339      &(struct radv_graphics_pipeline_create_info){
340         .use_rectlist = true,
341         .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,
342      },
343      &device->meta_state.alloc, &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);
344   if (result != VK_SUCCESS)
345      goto cleanup;
346
347   result = radv_graphics_pipeline_create(
348      device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
349      &(VkGraphicsPipelineCreateInfo){
350         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
351         .stageCount = 2,
352         .pStages = stages,
353
354         .pVertexInputState = &vi_state,
355         .pInputAssemblyState = &ia_state,
356
357         .pViewportState =
358            &(VkPipelineViewportStateCreateInfo){
359               .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
360               .viewportCount = 1,
361               .scissorCount = 1,
362            },
363         .pRasterizationState = &rs_state,
364         .pMultisampleState =
365            &(VkPipelineMultisampleStateCreateInfo){
366               .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
367               .rasterizationSamples = 1,
368               .sampleShadingEnable = false,
369               .pSampleMask = NULL,
370               .alphaToCoverageEnable = false,
371               .alphaToOneEnable = false,
372            },
373         .pColorBlendState = &blend_state,
374         .pDynamicState =
375            &(VkPipelineDynamicStateCreateInfo){
376               .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
377               .dynamicStateCount = 2,
378               .pDynamicStates =
379                  (VkDynamicState[]){
380                     VK_DYNAMIC_STATE_VIEWPORT,
381                     VK_DYNAMIC_STATE_SCISSOR,
382                  },
383            },
384         .layout = layout,
385         .renderPass = device->meta_state.fast_clear_flush.pass,
386         .subpass = 0,
387      },
388      &(struct radv_graphics_pipeline_create_info){
389         .use_rectlist = true,
390         .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,
391      },
392      &device->meta_state.alloc, &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
393   if (result != VK_SUCCESS)
394      goto cleanup;
395
396   result = radv_graphics_pipeline_create(
397      device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
398      &(VkGraphicsPipelineCreateInfo){
399         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
400         .stageCount = 2,
401         .pStages = stages,
402
403         .pVertexInputState = &vi_state,
404         .pInputAssemblyState = &ia_state,
405
406         .pViewportState =
407            &(VkPipelineViewportStateCreateInfo){
408               .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
409               .viewportCount = 1,
410               .scissorCount = 1,
411            },
412         .pRasterizationState = &rs_state,
413         .pMultisampleState =
414            &(VkPipelineMultisampleStateCreateInfo){
415               .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
416               .rasterizationSamples = 1,
417               .sampleShadingEnable = false,
418               .pSampleMask = NULL,
419               .alphaToCoverageEnable = false,
420               .alphaToOneEnable = false,
421            },
422         .pColorBlendState = &blend_state,
423         .pDynamicState =
424            &(VkPipelineDynamicStateCreateInfo){
425               .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
426               .dynamicStateCount = 2,
427               .pDynamicStates =
428                  (VkDynamicState[]){
429                     VK_DYNAMIC_STATE_VIEWPORT,
430                     VK_DYNAMIC_STATE_SCISSOR,
431                  },
432            },
433         .layout = layout,
434         .renderPass = device->meta_state.fast_clear_flush.pass,
435         .subpass = 0,
436      },
437      &(struct radv_graphics_pipeline_create_info){
438         .use_rectlist = true,
439         .custom_blend_mode = V_028808_CB_DCC_DECOMPRESS,
440      },
441      &device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
442   if (result != VK_SUCCESS)
443      goto cleanup;
444
445   goto cleanup;
446
447cleanup:
448   ralloc_free(fs_module);
449   return result;
450}
451
452void
453radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
454{
455   struct radv_meta_state *state = &device->meta_state;
456
457   radv_DestroyPipeline(radv_device_to_handle(device),
458                        state->fast_clear_flush.dcc_decompress_pipeline, &state->alloc);
459   radv_DestroyPipeline(radv_device_to_handle(device),
460                        state->fast_clear_flush.fmask_decompress_pipeline, &state->alloc);
461   radv_DestroyPipeline(radv_device_to_handle(device),
462                        state->fast_clear_flush.cmask_eliminate_pipeline, &state->alloc);
463   radv_DestroyRenderPass(radv_device_to_handle(device), state->fast_clear_flush.pass,
464                          &state->alloc);
465   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout,
466                              &state->alloc);
467
468   radv_DestroyPipeline(radv_device_to_handle(device),
469                        state->fast_clear_flush.dcc_decompress_compute_pipeline, &state->alloc);
470   radv_DestroyPipelineLayout(radv_device_to_handle(device),
471                              state->fast_clear_flush.dcc_decompress_compute_p_layout,
472                              &state->alloc);
473   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
474                                   state->fast_clear_flush.dcc_decompress_compute_ds_layout,
475                                   &state->alloc);
476}
477
478static VkResult
479radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device)
480{
481   VkResult res = VK_SUCCESS;
482
483   mtx_lock(&device->meta_state.mtx);
484   if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
485      mtx_unlock(&device->meta_state.mtx);
486      return VK_SUCCESS;
487   }
488
489   nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
490   if (!vs_module) {
491      /* XXX: Need more accurate error */
492      res = VK_ERROR_OUT_OF_HOST_MEMORY;
493      goto fail;
494   }
495
496   res = create_pass(device);
497   if (res != VK_SUCCESS)
498      goto fail;
499
500   res = create_pipeline_layout(device, &device->meta_state.fast_clear_flush.p_layout);
501   if (res != VK_SUCCESS)
502      goto fail;
503
504   VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
505   res = create_pipeline(device, vs_module_h, device->meta_state.fast_clear_flush.p_layout);
506   if (res != VK_SUCCESS)
507      goto fail;
508
509   res = create_dcc_compress_compute(device);
510   if (res != VK_SUCCESS)
511      goto fail;
512
513   goto cleanup;
514
515fail:
516   radv_device_finish_meta_fast_clear_flush_state(device);
517
518cleanup:
519   ralloc_free(vs_module);
520   mtx_unlock(&device->meta_state.mtx);
521
522   return res;
523}
524
525VkResult
526radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand)
527{
528   if (on_demand)
529      return VK_SUCCESS;
530
531   return radv_device_init_meta_fast_clear_flush_state_internal(device);
532}
533
534static void
535radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,
536                                           struct radv_image *image, uint64_t pred_offset,
537                                           bool value)
538{
539   uint64_t va = 0;
540
541   if (value) {
542      va = radv_buffer_get_va(image->bo) + image->offset;
543      va += pred_offset;
544   }
545
546   si_emit_set_predication_state(cmd_buffer, true, PREDICATION_OP_BOOL64, va);
547}
548
549static void
550radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
551                               const VkImageSubresourceRange *range, int level, int layer,
552                               bool flush_cb)
553{
554   struct radv_device *device = cmd_buffer->device;
555   struct radv_image_view iview;
556   uint32_t width, height;
557
558   width = radv_minify(image->info.width, range->baseMipLevel + level);
559   height = radv_minify(image->info.height, range->baseMipLevel + level);
560
561   radv_image_view_init(&iview, device,
562                        &(VkImageViewCreateInfo){
563                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
564                           .image = radv_image_to_handle(image),
565                           .viewType = radv_meta_get_view_type(image),
566                           .format = image->vk_format,
567                           .subresourceRange =
568                              {
569                                 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
570                                 .baseMipLevel = range->baseMipLevel + level,
571                                 .levelCount = 1,
572                                 .baseArrayLayer = range->baseArrayLayer + layer,
573                                 .layerCount = 1,
574                              },
575                        },
576                        NULL);
577
578   VkFramebuffer fb_h;
579   radv_CreateFramebuffer(
580      radv_device_to_handle(device),
581      &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
582                                 .attachmentCount = 1,
583                                 .pAttachments = (VkImageView[]){radv_image_view_to_handle(&iview)},
584                                 .width = width,
585                                 .height = height,
586                                 .layers = 1},
587      &cmd_buffer->pool->alloc, &fb_h);
588
589   radv_cmd_buffer_begin_render_pass(cmd_buffer,
590                                     &(VkRenderPassBeginInfo){
591                                        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
592                                        .renderPass = device->meta_state.fast_clear_flush.pass,
593                                        .framebuffer = fb_h,
594                                        .renderArea = {.offset =
595                                                          {
596                                                             0,
597                                                             0,
598                                                          },
599                                                       .extent =
600                                                          {
601                                                             width,
602                                                             height,
603                                                          }},
604                                        .clearValueCount = 0,
605                                        .pClearValues = NULL,
606                                     },
607                                     NULL);
608
609   radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
610
611   if (flush_cb)
612      cmd_buffer->state.flush_bits |=
613         radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
614
615   radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
616
617   if (flush_cb)
618      cmd_buffer->state.flush_bits |=
619         radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
620
621   radv_cmd_buffer_end_render_pass(cmd_buffer);
622
623   radv_image_view_finish(&iview);
624   radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc);
625}
626
627static void
628radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
629                         const VkImageSubresourceRange *subresourceRange, enum radv_color_op op)
630{
631   struct radv_device *device = cmd_buffer->device;
632   struct radv_meta_saved_state saved_state;
633   bool old_predicating = false;
634   bool flush_cb = false;
635   uint64_t pred_offset;
636   VkPipeline *pipeline;
637
638   switch (op) {
639   case FAST_CLEAR_ELIMINATE:
640      pipeline = &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
641      pred_offset = image->fce_pred_offset;
642      break;
643   case FMASK_DECOMPRESS:
644      pipeline = &device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
645      pred_offset = 0; /* FMASK_DECOMPRESS is never predicated. */
646
647      /* Flushing CB is required before and after FMASK_DECOMPRESS. */
648      flush_cb = true;
649      break;
650   case DCC_DECOMPRESS:
651      pipeline = &device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
652      pred_offset = image->dcc_pred_offset;
653
654      /* Flushing CB is required before and after DCC_DECOMPRESS. */
655      flush_cb = true;
656      break;
657   default:
658      unreachable("Invalid color op");
659   }
660
661   if (radv_dcc_enabled(image, subresourceRange->baseMipLevel) &&
662       (image->info.array_size != radv_get_layerCount(image, subresourceRange) ||
663        subresourceRange->baseArrayLayer != 0)) {
664      /* Only use predication if the image has DCC with mipmaps or
665       * if the range of layers covers the whole image because the
666       * predication is based on mip level.
667       */
668      pred_offset = 0;
669   }
670
671   if (!*pipeline) {
672      VkResult ret;
673
674      ret = radv_device_init_meta_fast_clear_flush_state_internal(device);
675      if (ret != VK_SUCCESS) {
676         cmd_buffer->record_result = ret;
677         return;
678      }
679   }
680
681   radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_PASS);
682
683   if (pred_offset) {
684      pred_offset += 8 * subresourceRange->baseMipLevel;
685
686      old_predicating = cmd_buffer->state.predicating;
687
688      radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);
689      cmd_buffer->state.predicating = true;
690   }
691
692   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
693                        *pipeline);
694
695   for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
696      uint32_t width, height;
697
698      /* Do not decompress levels without DCC. */
699      if (op == DCC_DECOMPRESS && !radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
700         continue;
701
702      width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
703      height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
704
705      radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
706                          &(VkViewport){.x = 0,
707                                        .y = 0,
708                                        .width = width,
709                                        .height = height,
710                                        .minDepth = 0.0f,
711                                        .maxDepth = 1.0f});
712
713      radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
714                         &(VkRect2D){
715                            .offset = {0, 0},
716                            .extent = {width, height},
717                         });
718
719      for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
720         radv_process_color_image_layer(cmd_buffer, image, subresourceRange, l, s, flush_cb);
721      }
722   }
723
724   cmd_buffer->state.flush_bits |=
725      RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
726
727   if (pred_offset) {
728      pred_offset += 8 * subresourceRange->baseMipLevel;
729
730      cmd_buffer->state.predicating = old_predicating;
731
732      radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);
733
734      if (cmd_buffer->state.predication_type != -1) {
735         /* Restore previous conditional rendering user state. */
736         si_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type,
737                                       cmd_buffer->state.predication_op,
738                                       cmd_buffer->state.predication_va);
739      }
740   }
741
742   radv_meta_restore(&saved_state, cmd_buffer);
743
744   /* Clear the image's fast-clear eliminate predicate because FMASK_DECOMPRESS and DCC_DECOMPRESS
745    * also perform a fast-clear eliminate.
746    */
747   radv_update_fce_metadata(cmd_buffer, image, subresourceRange, false);
748
749   /* Mark the image as being decompressed. */
750   if (op == DCC_DECOMPRESS)
751      radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
752}
753
754static void
755radv_fast_clear_eliminate(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
756                          const VkImageSubresourceRange *subresourceRange)
757{
758   struct radv_barrier_data barrier = {0};
759
760   barrier.layout_transitions.fast_clear_eliminate = 1;
761   radv_describe_layout_transition(cmd_buffer, &barrier);
762
763   radv_process_color_image(cmd_buffer, image, subresourceRange, FAST_CLEAR_ELIMINATE);
764}
765
766static void
767radv_fmask_decompress(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
768                      const VkImageSubresourceRange *subresourceRange)
769{
770   struct radv_barrier_data barrier = {0};
771
772   barrier.layout_transitions.fmask_decompress = 1;
773   radv_describe_layout_transition(cmd_buffer, &barrier);
774
775   radv_process_color_image(cmd_buffer, image, subresourceRange, FMASK_DECOMPRESS);
776}
777
778void
779radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
780                                    const VkImageSubresourceRange *subresourceRange)
781{
782   if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
783      if (radv_image_has_dcc(image) && radv_image_has_cmask(image)) {
784         /* MSAA images with DCC and CMASK might have been fast-cleared and might require a FCE but
785          * FMASK_DECOMPRESS can't eliminate DCC fast clears.
786          */
787         radv_fast_clear_eliminate(cmd_buffer, image, subresourceRange);
788      }
789
790      radv_fmask_decompress(cmd_buffer, image, subresourceRange);
791   } else {
792      /* Skip fast clear eliminate for images that support comp-to-single fast clears. */
793      if (image->support_comp_to_single)
794         return;
795
796      radv_fast_clear_eliminate(cmd_buffer, image, subresourceRange);
797   }
798}
799
800static void
801radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
802                            const VkImageSubresourceRange *subresourceRange)
803{
804   struct radv_meta_saved_state saved_state;
805   struct radv_image_view load_iview = {0};
806   struct radv_image_view store_iview = {0};
807   struct radv_device *device = cmd_buffer->device;
808
809   cmd_buffer->state.flush_bits |=
810      radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
811
812   if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
813      VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
814      if (ret != VK_SUCCESS) {
815         cmd_buffer->record_result = ret;
816         return;
817      }
818   }
819
820   radv_meta_save(&saved_state, cmd_buffer,
821                  RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
822
823   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
824                        device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
825
826   for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) {
827      uint32_t width, height;
828
829      /* Do not decompress levels without DCC. */
830      if (!radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
831         continue;
832
833      width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
834      height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
835
836      for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
837         radv_image_view_init(
838            &load_iview, cmd_buffer->device,
839            &(VkImageViewCreateInfo){
840               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
841               .image = radv_image_to_handle(image),
842               .viewType = VK_IMAGE_VIEW_TYPE_2D,
843               .format = image->vk_format,
844               .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
845                                    .baseMipLevel = subresourceRange->baseMipLevel + l,
846                                    .levelCount = 1,
847                                    .baseArrayLayer = subresourceRange->baseArrayLayer + s,
848                                    .layerCount = 1},
849            },
850            &(struct radv_image_view_extra_create_info){.enable_compression = true});
851         radv_image_view_init(
852            &store_iview, cmd_buffer->device,
853            &(VkImageViewCreateInfo){
854               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
855               .image = radv_image_to_handle(image),
856               .viewType = VK_IMAGE_VIEW_TYPE_2D,
857               .format = image->vk_format,
858               .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
859                                    .baseMipLevel = subresourceRange->baseMipLevel + l,
860                                    .levelCount = 1,
861                                    .baseArrayLayer = subresourceRange->baseArrayLayer + s,
862                                    .layerCount = 1},
863            },
864            &(struct radv_image_view_extra_create_info){.disable_compression = true});
865
866         radv_meta_push_descriptor_set(
867            cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
868            device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, 0, /* set */
869            2, /* descriptorWriteCount */
870            (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
871                                      .dstBinding = 0,
872                                      .dstArrayElement = 0,
873                                      .descriptorCount = 1,
874                                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
875                                      .pImageInfo =
876                                         (VkDescriptorImageInfo[]){
877                                            {
878                                               .sampler = VK_NULL_HANDLE,
879                                               .imageView = radv_image_view_to_handle(&load_iview),
880                                               .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
881                                            },
882                                         }},
883                                     {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
884                                      .dstBinding = 1,
885                                      .dstArrayElement = 0,
886                                      .descriptorCount = 1,
887                                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
888                                      .pImageInfo = (VkDescriptorImageInfo[]){
889                                         {
890                                            .sampler = VK_NULL_HANDLE,
891                                            .imageView = radv_image_view_to_handle(&store_iview),
892                                            .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
893                                         },
894                                      }}});
895
896         radv_unaligned_dispatch(cmd_buffer, width, height, 1);
897
898         radv_image_view_finish(&load_iview);
899         radv_image_view_finish(&store_iview);
900      }
901   }
902
903   /* Mark this image as actually being decompressed. */
904   radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
905
906   radv_meta_restore(&saved_state, cmd_buffer);
907
908   cmd_buffer->state.flush_bits |=
909      RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
910      radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
911
912   /* Initialize the DCC metadata as "fully expanded". */
913   cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff);
914}
915
916void
917radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
918                    const VkImageSubresourceRange *subresourceRange)
919{
920   struct radv_barrier_data barrier = {0};
921
922   barrier.layout_transitions.dcc_decompress = 1;
923   radv_describe_layout_transition(cmd_buffer, &barrier);
924
925   if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
926      radv_process_color_image(cmd_buffer, image, subresourceRange, DCC_DECOMPRESS);
927   else
928      radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
929}
930