1/*
2 * Copyright © 2016 Dave Airlie
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26
27#include "nir/nir_builder.h"
28#include "radv_meta.h"
29#include "radv_private.h"
30#include "sid.h"
31#include "vk_format.h"
32
33static nir_ssa_def *
34radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_ssa_def *input)
35{
36   unsigned i;
37
38   nir_ssa_def *cmp[3];
39   for (i = 0; i < 3; i++)
40      cmp[i] = nir_flt(b, nir_channel(b, input, i), nir_imm_int(b, 0x3b4d2e1c));
41
42   nir_ssa_def *ltvals[3];
43   for (i = 0; i < 3; i++)
44      ltvals[i] = nir_fmul(b, nir_channel(b, input, i), nir_imm_float(b, 12.92));
45
46   nir_ssa_def *gtvals[3];
47
48   for (i = 0; i < 3; i++) {
49      gtvals[i] = nir_fpow(b, nir_channel(b, input, i), nir_imm_float(b, 1.0 / 2.4));
50      gtvals[i] = nir_fmul(b, gtvals[i], nir_imm_float(b, 1.055));
51      gtvals[i] = nir_fsub(b, gtvals[i], nir_imm_float(b, 0.055));
52   }
53
54   nir_ssa_def *comp[4];
55   for (i = 0; i < 3; i++)
56      comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
57   comp[3] = nir_channels(b, input, 1 << 3);
58   return nir_vec(b, comp, 4);
59}
60
61static nir_shader *
62build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
63{
64   const struct glsl_type *sampler_type =
65      glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
66   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
67   nir_builder b =
68      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_resolve_cs-%d-%s", samples,
69                                     is_integer ? "int" : (is_srgb ? "srgb" : "float"));
70   b.shader->info.workgroup_size[0] = 8;
71   b.shader->info.workgroup_size[1] = 8;
72   b.shader->info.workgroup_size[2] = 1;
73
74   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
75   input_img->data.descriptor_set = 0;
76   input_img->data.binding = 0;
77
78   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
79   output_img->data.descriptor_set = 0;
80   output_img->data.binding = 1;
81
82   nir_ssa_def *global_id = get_global_ids(&b, 2);
83
84   nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 16);
85   nir_ssa_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
86
87   nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
88   nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
89
90   nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
91
92   radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img, color, src_coord);
93
94   nir_ssa_def *outval = nir_load_var(&b, color);
95   if (is_srgb)
96      outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
97
98   nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0),
99                                         nir_channel(&b, dst_coord, 1),
100                                         nir_ssa_undef(&b, 1, 32),
101                                         nir_ssa_undef(&b, 1, 32));
102
103   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
104                         nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
105                         .image_dim = GLSL_SAMPLER_DIM_2D);
106   return b.shader;
107}
108
109enum {
110   DEPTH_RESOLVE,
111   STENCIL_RESOLVE,
112};
113
114static const char *
115get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
116{
117   switch (resolve_mode) {
118   case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
119      return "zero";
120   case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
121      return "average";
122   case VK_RESOLVE_MODE_MIN_BIT_KHR:
123      return "min";
124   case VK_RESOLVE_MODE_MAX_BIT_KHR:
125      return "max";
126   default:
127      unreachable("invalid resolve mode");
128   }
129}
130
131static nir_shader *
132build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, int index,
133                                           VkResolveModeFlagBits resolve_mode)
134{
135   const struct glsl_type *sampler_type =
136      glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, GLSL_TYPE_FLOAT);
137   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT);
138
139   nir_builder b = nir_builder_init_simple_shader(
140      MESA_SHADER_COMPUTE, NULL, "meta_resolve_cs_%s-%s-%d",
141      index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples);
142   b.shader->info.workgroup_size[0] = 8;
143   b.shader->info.workgroup_size[1] = 8;
144   b.shader->info.workgroup_size[2] = 1;
145
146   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
147   input_img->data.descriptor_set = 0;
148   input_img->data.binding = 0;
149
150   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
151   output_img->data.descriptor_set = 0;
152   output_img->data.binding = 1;
153
154   nir_ssa_def *img_coord = get_global_ids(&b, 3);
155
156   nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
157
158   nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;
159
160   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
161   tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
162   tex->op = nir_texop_txf_ms;
163   tex->src[0].src_type = nir_tex_src_coord;
164   tex->src[0].src = nir_src_for_ssa(img_coord);
165   tex->src[1].src_type = nir_tex_src_ms_index;
166   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
167   tex->src[2].src_type = nir_tex_src_texture_deref;
168   tex->src[2].src = nir_src_for_ssa(input_img_deref);
169   tex->dest_type = type;
170   tex->is_array = true;
171   tex->coord_components = 3;
172
173   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
174   nir_builder_instr_insert(&b, &tex->instr);
175
176   nir_ssa_def *outval = &tex->dest.ssa;
177
178   if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
179      for (int i = 1; i < samples; i++) {
180         nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
181         tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
182         tex_add->op = nir_texop_txf_ms;
183         tex_add->src[0].src_type = nir_tex_src_coord;
184         tex_add->src[0].src = nir_src_for_ssa(img_coord);
185         tex_add->src[1].src_type = nir_tex_src_ms_index;
186         tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
187         tex_add->src[2].src_type = nir_tex_src_texture_deref;
188         tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
189         tex_add->dest_type = type;
190         tex_add->is_array = true;
191         tex_add->coord_components = 3;
192
193         nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
194         nir_builder_instr_insert(&b, &tex_add->instr);
195
196         switch (resolve_mode) {
197         case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
198            assert(index == DEPTH_RESOLVE);
199            outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
200            break;
201         case VK_RESOLVE_MODE_MIN_BIT_KHR:
202            if (index == DEPTH_RESOLVE)
203               outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
204            else
205               outval = nir_umin(&b, outval, &tex_add->dest.ssa);
206            break;
207         case VK_RESOLVE_MODE_MAX_BIT_KHR:
208            if (index == DEPTH_RESOLVE)
209               outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
210            else
211               outval = nir_umax(&b, outval, &tex_add->dest.ssa);
212            break;
213         default:
214            unreachable("invalid resolve mode");
215         }
216      }
217
218      if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
219         outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
220   }
221
222   nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
223                                 nir_channel(&b, img_coord, 2), nir_ssa_undef(&b, 1, 32));
224   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
225                         nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
226                         .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
227   return b.shader;
228}
229
230static VkResult
231create_layout(struct radv_device *device)
232{
233   VkResult result;
234   /*
235    * two descriptors one for the image being sampled
236    * one for the buffer being written.
237    */
238   VkDescriptorSetLayoutCreateInfo ds_create_info = {
239      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
240      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
241      .bindingCount = 2,
242      .pBindings = (VkDescriptorSetLayoutBinding[]){
243         {.binding = 0,
244          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
245          .descriptorCount = 1,
246          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
247          .pImmutableSamplers = NULL},
248         {.binding = 1,
249          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
250          .descriptorCount = 1,
251          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
252          .pImmutableSamplers = NULL},
253      }};
254
255   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
256                                           &device->meta_state.alloc,
257                                           &device->meta_state.resolve_compute.ds_layout);
258   if (result != VK_SUCCESS)
259      goto fail;
260
261   VkPipelineLayoutCreateInfo pl_create_info = {
262      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
263      .setLayoutCount = 1,
264      .pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
265      .pushConstantRangeCount = 1,
266      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
267   };
268
269   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
270                                      &device->meta_state.alloc,
271                                      &device->meta_state.resolve_compute.p_layout);
272   if (result != VK_SUCCESS)
273      goto fail;
274   return VK_SUCCESS;
275fail:
276   return result;
277}
278
279static VkResult
280create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb,
281                        VkPipeline *pipeline)
282{
283   VkResult result;
284
285   mtx_lock(&device->meta_state.mtx);
286   if (*pipeline) {
287      mtx_unlock(&device->meta_state.mtx);
288      return VK_SUCCESS;
289   }
290
291   nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
292
293   /* compute shader */
294
295   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
296      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
297      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
298      .module = vk_shader_module_handle_from_nir(cs),
299      .pName = "main",
300      .pSpecializationInfo = NULL,
301   };
302
303   VkComputePipelineCreateInfo vk_pipeline_info = {
304      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
305      .stage = pipeline_shader_stage,
306      .flags = 0,
307      .layout = device->meta_state.resolve_compute.p_layout,
308   };
309
310   result = radv_CreateComputePipelines(radv_device_to_handle(device),
311                                        radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
312                                        &vk_pipeline_info, NULL, pipeline);
313   if (result != VK_SUCCESS)
314      goto fail;
315
316   ralloc_free(cs);
317   mtx_unlock(&device->meta_state.mtx);
318   return VK_SUCCESS;
319fail:
320   ralloc_free(cs);
321   mtx_unlock(&device->meta_state.mtx);
322   return result;
323}
324
325static VkResult
326create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, int index,
327                                      VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline)
328{
329   VkResult result;
330
331   mtx_lock(&device->meta_state.mtx);
332   if (*pipeline) {
333      mtx_unlock(&device->meta_state.mtx);
334      return VK_SUCCESS;
335   }
336
337   nir_shader *cs =
338      build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);
339
340   /* compute shader */
341   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
342      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
343      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
344      .module = vk_shader_module_handle_from_nir(cs),
345      .pName = "main",
346      .pSpecializationInfo = NULL,
347   };
348
349   VkComputePipelineCreateInfo vk_pipeline_info = {
350      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
351      .stage = pipeline_shader_stage,
352      .flags = 0,
353      .layout = device->meta_state.resolve_compute.p_layout,
354   };
355
356   result = radv_CreateComputePipelines(radv_device_to_handle(device),
357                                        radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
358                                        &vk_pipeline_info, NULL, pipeline);
359   if (result != VK_SUCCESS)
360      goto fail;
361
362   ralloc_free(cs);
363   mtx_unlock(&device->meta_state.mtx);
364   return VK_SUCCESS;
365fail:
366   ralloc_free(cs);
367   mtx_unlock(&device->meta_state.mtx);
368   return result;
369}
370
371VkResult
372radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
373{
374   struct radv_meta_state *state = &device->meta_state;
375   VkResult res;
376
377   res = create_layout(device);
378   if (res != VK_SUCCESS)
379      goto fail;
380
381   if (on_demand)
382      return VK_SUCCESS;
383
384   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
385      uint32_t samples = 1 << i;
386
387      res = create_resolve_pipeline(device, samples, false, false,
388                                    &state->resolve_compute.rc[i].pipeline);
389      if (res != VK_SUCCESS)
390         goto fail;
391
392      res = create_resolve_pipeline(device, samples, true, false,
393                                    &state->resolve_compute.rc[i].i_pipeline);
394      if (res != VK_SUCCESS)
395         goto fail;
396
397      res = create_resolve_pipeline(device, samples, false, true,
398                                    &state->resolve_compute.rc[i].srgb_pipeline);
399      if (res != VK_SUCCESS)
400         goto fail;
401
402      res = create_depth_stencil_resolve_pipeline(
403         device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
404         &state->resolve_compute.depth[i].average_pipeline);
405      if (res != VK_SUCCESS)
406         goto fail;
407
408      res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
409                                                  VK_RESOLVE_MODE_MAX_BIT_KHR,
410                                                  &state->resolve_compute.depth[i].max_pipeline);
411      if (res != VK_SUCCESS)
412         goto fail;
413
414      res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
415                                                  VK_RESOLVE_MODE_MIN_BIT_KHR,
416                                                  &state->resolve_compute.depth[i].min_pipeline);
417      if (res != VK_SUCCESS)
418         goto fail;
419
420      res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
421                                                  VK_RESOLVE_MODE_MAX_BIT_KHR,
422                                                  &state->resolve_compute.stencil[i].max_pipeline);
423      if (res != VK_SUCCESS)
424         goto fail;
425
426      res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
427                                                  VK_RESOLVE_MODE_MIN_BIT_KHR,
428                                                  &state->resolve_compute.stencil[i].min_pipeline);
429      if (res != VK_SUCCESS)
430         goto fail;
431   }
432
433   res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,
434                                               VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
435                                               &state->resolve_compute.depth_zero_pipeline);
436   if (res != VK_SUCCESS)
437      goto fail;
438
439   res = create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,
440                                               VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
441                                               &state->resolve_compute.stencil_zero_pipeline);
442   if (res != VK_SUCCESS)
443      goto fail;
444
445   return VK_SUCCESS;
446fail:
447   radv_device_finish_meta_resolve_compute_state(device);
448   return res;
449}
450
451void
452radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
453{
454   struct radv_meta_state *state = &device->meta_state;
455   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
456      radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline,
457                           &state->alloc);
458
459      radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline,
460                           &state->alloc);
461
462      radv_DestroyPipeline(radv_device_to_handle(device),
463                           state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);
464
465      radv_DestroyPipeline(radv_device_to_handle(device),
466                           state->resolve_compute.depth[i].average_pipeline, &state->alloc);
467
468      radv_DestroyPipeline(radv_device_to_handle(device),
469                           state->resolve_compute.depth[i].max_pipeline, &state->alloc);
470
471      radv_DestroyPipeline(radv_device_to_handle(device),
472                           state->resolve_compute.depth[i].min_pipeline, &state->alloc);
473
474      radv_DestroyPipeline(radv_device_to_handle(device),
475                           state->resolve_compute.stencil[i].max_pipeline, &state->alloc);
476
477      radv_DestroyPipeline(radv_device_to_handle(device),
478                           state->resolve_compute.stencil[i].min_pipeline, &state->alloc);
479   }
480
481   radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline,
482                        &state->alloc);
483
484   radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline,
485                        &state->alloc);
486
487   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_compute.ds_layout,
488                                   &state->alloc);
489   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout,
490                              &state->alloc);
491}
492
493static VkPipeline *
494radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview)
495{
496   struct radv_device *device = cmd_buffer->device;
497   struct radv_meta_state *state = &device->meta_state;
498   uint32_t samples = src_iview->image->info.samples;
499   uint32_t samples_log2 = ffs(samples) - 1;
500   VkPipeline *pipeline;
501
502   if (vk_format_is_int(src_iview->vk_format))
503      pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
504   else if (vk_format_is_srgb(src_iview->vk_format))
505      pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
506   else
507      pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
508
509   if (!*pipeline) {
510      VkResult ret;
511
512      ret = create_resolve_pipeline(device, samples, vk_format_is_int(src_iview->vk_format),
513                                    vk_format_is_srgb(src_iview->vk_format), pipeline);
514      if (ret != VK_SUCCESS) {
515         cmd_buffer->record_result = ret;
516         return NULL;
517      }
518   }
519
520   return pipeline;
521}
522
523static void
524emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
525             struct radv_image_view *dest_iview, const VkOffset2D *src_offset,
526             const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent)
527{
528   struct radv_device *device = cmd_buffer->device;
529   VkPipeline *pipeline;
530
531   radv_meta_push_descriptor_set(
532      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
533      0, /* set */
534      2, /* descriptorWriteCount */
535      (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
536                                .dstBinding = 0,
537                                .dstArrayElement = 0,
538                                .descriptorCount = 1,
539                                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
540                                .pImageInfo =
541                                   (VkDescriptorImageInfo[]){
542                                      {.sampler = VK_NULL_HANDLE,
543                                       .imageView = radv_image_view_to_handle(src_iview),
544                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
545                                   }},
546                               {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
547                                .dstBinding = 1,
548                                .dstArrayElement = 0,
549                                .descriptorCount = 1,
550                                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
551                                .pImageInfo = (VkDescriptorImageInfo[]){
552                                   {
553                                      .sampler = VK_NULL_HANDLE,
554                                      .imageView = radv_image_view_to_handle(dest_iview),
555                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
556                                   },
557                                }}});
558
559   pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
560
561   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
562                        *pipeline);
563
564   unsigned push_constants[4] = {
565      src_offset->x,
566      src_offset->y,
567      dest_offset->x,
568      dest_offset->y,
569   };
570   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
571                         device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
572                         0, 16, push_constants);
573   radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
574}
575
576static void
577emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
578                           struct radv_image_view *dest_iview, const VkExtent3D *resolve_extent,
579                           VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode)
580{
581   struct radv_device *device = cmd_buffer->device;
582   const uint32_t samples = src_iview->image->info.samples;
583   const uint32_t samples_log2 = ffs(samples) - 1;
584   VkPipeline *pipeline;
585
586   radv_meta_push_descriptor_set(
587      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
588      0, /* set */
589      2, /* descriptorWriteCount */
590      (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
591                                .dstBinding = 0,
592                                .dstArrayElement = 0,
593                                .descriptorCount = 1,
594                                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
595                                .pImageInfo =
596                                   (VkDescriptorImageInfo[]){
597                                      {.sampler = VK_NULL_HANDLE,
598                                       .imageView = radv_image_view_to_handle(src_iview),
599                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
600                                   }},
601                               {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
602                                .dstBinding = 1,
603                                .dstArrayElement = 0,
604                                .descriptorCount = 1,
605                                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
606                                .pImageInfo = (VkDescriptorImageInfo[]){
607                                   {
608                                      .sampler = VK_NULL_HANDLE,
609                                      .imageView = radv_image_view_to_handle(dest_iview),
610                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
611                                   },
612                                }}});
613
614   switch (resolve_mode) {
615   case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
616      if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
617         pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
618      else
619         pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
620      break;
621   case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
622      assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
623      pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
624      break;
625   case VK_RESOLVE_MODE_MIN_BIT_KHR:
626      if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
627         pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
628      else
629         pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
630      break;
631   case VK_RESOLVE_MODE_MAX_BIT_KHR:
632      if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
633         pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
634      else
635         pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
636      break;
637   default:
638      unreachable("invalid resolve mode");
639   }
640
641   if (!*pipeline) {
642      int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
643      VkResult ret;
644
645      ret = create_depth_stencil_resolve_pipeline(device, samples, index, resolve_mode, pipeline);
646      if (ret != VK_SUCCESS) {
647         cmd_buffer->record_result = ret;
648         return;
649      }
650   }
651
652   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
653                        *pipeline);
654
655   radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height,
656                           resolve_extent->depth);
657}
658
659void
660radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
661                                VkFormat src_format, VkImageLayout src_image_layout,
662                                struct radv_image *dest_image, VkFormat dest_format,
663                                VkImageLayout dest_image_layout, const VkImageResolve2KHR *region)
664{
665   struct radv_meta_saved_state saved_state;
666
667   radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);
668
669   /* For partial resolves, DCC should be decompressed before resolving
670    * because the metadata is re-initialized to the uncompressed after.
671    */
672   uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->queue_family_index,
673                                                      cmd_buffer->queue_family_index);
674
675   if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
676       radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel,
677                                  dest_image_layout, false, queue_mask) &&
678       (region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
679        region->extent.width != dest_image->info.width ||
680        region->extent.height != dest_image->info.height ||
681        region->extent.depth != dest_image->info.depth)) {
682      radv_decompress_dcc(cmd_buffer, dest_image,
683                          &(VkImageSubresourceRange){
684                             .aspectMask = region->dstSubresource.aspectMask,
685                             .baseMipLevel = region->dstSubresource.mipLevel,
686                             .levelCount = 1,
687                             .baseArrayLayer = region->dstSubresource.baseArrayLayer,
688                             .layerCount = region->dstSubresource.layerCount,
689                          });
690   }
691
692   radv_meta_save(
693      &saved_state, cmd_buffer,
694      RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
695
696   assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
697   assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
698   assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
699
700   const uint32_t src_base_layer =
701      radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
702
703   const uint32_t dest_base_layer =
704      radv_meta_get_iview_layer(dest_image, &region->dstSubresource, &region->dstOffset);
705
706   const struct VkExtent3D extent = radv_sanitize_image_extent(src_image->type, region->extent);
707   const struct VkOffset3D srcOffset =
708      radv_sanitize_image_offset(src_image->type, region->srcOffset);
709   const struct VkOffset3D dstOffset =
710      radv_sanitize_image_offset(dest_image->type, region->dstOffset);
711
712   for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {
713
714      struct radv_image_view src_iview;
715      radv_image_view_init(&src_iview, cmd_buffer->device,
716                           &(VkImageViewCreateInfo){
717                              .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
718                              .image = radv_image_to_handle(src_image),
719                              .viewType = radv_meta_get_view_type(src_image),
720                              .format = src_format,
721                              .subresourceRange =
722                                 {
723                                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
724                                    .baseMipLevel = region->srcSubresource.mipLevel,
725                                    .levelCount = 1,
726                                    .baseArrayLayer = src_base_layer + layer,
727                                    .layerCount = 1,
728                                 },
729                           },
730                           NULL);
731
732      struct radv_image_view dest_iview;
733      radv_image_view_init(&dest_iview, cmd_buffer->device,
734                           &(VkImageViewCreateInfo){
735                              .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
736                              .image = radv_image_to_handle(dest_image),
737                              .viewType = radv_meta_get_view_type(dest_image),
738                              .format = vk_to_non_srgb_format(dest_format),
739                              .subresourceRange =
740                                 {
741                                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
742                                    .baseMipLevel = region->dstSubresource.mipLevel,
743                                    .levelCount = 1,
744                                    .baseArrayLayer = dest_base_layer + layer,
745                                    .layerCount = 1,
746                                 },
747                           },
748                           NULL);
749
750      emit_resolve(cmd_buffer, &src_iview, &dest_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
751                   &(VkOffset2D){dstOffset.x, dstOffset.y},
752                   &(VkExtent2D){extent.width, extent.height});
753
754      radv_image_view_finish(&src_iview);
755      radv_image_view_finish(&dest_iview);
756   }
757
758   radv_meta_restore(&saved_state, cmd_buffer);
759
760   if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
761       radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel,
762                                  dest_image_layout, false, queue_mask)) {
763
764      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
765
766      VkImageSubresourceRange range = {
767         .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
768         .baseMipLevel = region->dstSubresource.mipLevel,
769         .levelCount = 1,
770         .baseArrayLayer = dest_base_layer,
771         .layerCount = region->dstSubresource.layerCount,
772      };
773
774      cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dest_image, &range, 0xffffffff);
775   }
776}
777
778/**
779 * Emit any needed resolves for the current subpass.
780 */
781void
782radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
783{
784   struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
785   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
786   struct radv_subpass_barrier barrier;
787   uint32_t layer_count = fb->layers;
788
789   if (subpass->view_mask)
790      layer_count = util_last_bit(subpass->view_mask);
791
792   /* Resolves happen before the end-of-subpass barriers get executed, so
793    * we have to make the attachment shader-readable.
794    */
795   barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
796   barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
797   barrier.dst_access_mask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
798   radv_emit_subpass_barrier(cmd_buffer, &barrier);
799
800   for (uint32_t i = 0; i < subpass->color_count; ++i) {
801      struct radv_subpass_attachment src_att = subpass->color_attachments[i];
802      struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];
803
804      if (dst_att.attachment == VK_ATTACHMENT_UNUSED)
805         continue;
806
807      struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
808      struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
809
810      VkImageResolve2KHR region = {
811         .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR,
812         .extent = (VkExtent3D){fb->width, fb->height, 1},
813         .srcSubresource =
814            (VkImageSubresourceLayers){
815               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
816               .mipLevel = src_iview->base_mip,
817               .baseArrayLayer = src_iview->base_layer,
818               .layerCount = layer_count,
819            },
820         .dstSubresource =
821            (VkImageSubresourceLayers){
822               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
823               .mipLevel = dst_iview->base_mip,
824               .baseArrayLayer = dst_iview->base_layer,
825               .layerCount = layer_count,
826            },
827         .srcOffset = (VkOffset3D){0, 0, 0},
828         .dstOffset = (VkOffset3D){0, 0, 0},
829      };
830
831      radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk_format,
832                                      src_att.layout, dst_iview->image, dst_iview->vk_format,
833                                      dst_att.layout, &region);
834   }
835
836   cmd_buffer->state.flush_bits |=
837      RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
838      radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
839}
840
841void
842radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
843                                      VkImageAspectFlags aspects,
844                                      VkResolveModeFlagBits resolve_mode)
845{
846   struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
847   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
848   struct radv_meta_saved_state saved_state;
849   uint32_t layer_count = fb->layers;
850
851   if (subpass->view_mask)
852      layer_count = util_last_bit(subpass->view_mask);
853
854   /* Resolves happen before the end-of-subpass barriers get executed, so
855    * we have to make the attachment shader-readable.
856    */
857   cmd_buffer->state.flush_bits |=
858      radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
859      radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, NULL) |
860      radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
861
862   struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
863   struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
864   struct radv_image *src_image = src_iview->image;
865
866   VkImageResolve2KHR region = {0};
867   region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
868   region.srcSubresource.aspectMask = aspects;
869   region.srcSubresource.mipLevel = 0;
870   region.srcSubresource.baseArrayLayer = src_iview->base_layer;
871   region.srcSubresource.layerCount = layer_count;
872
873   radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
874
875   radv_meta_save(&saved_state, cmd_buffer,
876                  RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
877
878   struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
879   struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
880   struct radv_image *dst_image = dst_iview->image;
881
882   struct radv_image_view tsrc_iview;
883   radv_image_view_init(&tsrc_iview, cmd_buffer->device,
884                        &(VkImageViewCreateInfo){
885                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
886                           .image = radv_image_to_handle(src_image),
887                           .viewType = radv_meta_get_view_type(src_image),
888                           .format = src_iview->vk_format,
889                           .subresourceRange =
890                              {
891                                 .aspectMask = aspects,
892                                 .baseMipLevel = src_iview->base_mip,
893                                 .levelCount = 1,
894                                 .baseArrayLayer = src_iview->base_layer,
895                                 .layerCount = layer_count,
896                              },
897                        },
898                        NULL);
899
900   struct radv_image_view tdst_iview;
901   radv_image_view_init(&tdst_iview, cmd_buffer->device,
902                        &(VkImageViewCreateInfo){
903                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
904                           .image = radv_image_to_handle(dst_image),
905                           .viewType = radv_meta_get_view_type(dst_image),
906                           .format = dst_iview->vk_format,
907                           .subresourceRange =
908                              {
909                                 .aspectMask = aspects,
910                                 .baseMipLevel = dst_iview->base_mip,
911                                 .levelCount = 1,
912                                 .baseArrayLayer = dst_iview->base_layer,
913                                 .layerCount = layer_count,
914                              },
915                        },
916                        NULL);
917
918   emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
919                              &(VkExtent3D){fb->width, fb->height, layer_count}, aspects,
920                              resolve_mode);
921
922   cmd_buffer->state.flush_bits |=
923      RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
924      radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
925
926   VkImageLayout layout = cmd_buffer->state.attachments[dest_att.attachment].current_layout;
927   uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->queue_family_index,
928                                                      cmd_buffer->queue_family_index);
929
930   if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, layout, false, queue_mask)) {
931      VkImageSubresourceRange range = {0};
932      range.aspectMask = aspects;
933      range.baseMipLevel = dst_iview->base_mip;
934      range.levelCount = 1;
935      range.baseArrayLayer = dst_iview->base_layer;
936      range.layerCount = layer_count;
937
938      uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, dst_image);
939
940      cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);
941   }
942
943   radv_image_view_finish(&tsrc_iview);
944   radv_image_view_finish(&tdst_iview);
945
946   radv_meta_restore(&saved_state, cmd_buffer);
947}
948