101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2016 Red Hat.
301e04c3fSmrg * Copyright © 2016 Bas Nieuwenhuizen
401e04c3fSmrg *
501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
601e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
701e04c3fSmrg * to deal in the Software without restriction, including without limitation
801e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
901e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
1001e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1101e04c3fSmrg *
1201e04c3fSmrg * The above copyright notice and this permission notice (including the next
1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1401e04c3fSmrg * Software.
1501e04c3fSmrg *
1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1901e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2001e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2101e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2201e04c3fSmrg * IN THE SOFTWARE.
2301e04c3fSmrg */
2401e04c3fSmrg#include "nir/nir_builder.h"
257ec681f3Smrg#include "radv_meta.h"
2601e04c3fSmrg
2701e04c3fSmrg/*
2801e04c3fSmrg * GFX queue: Compute shader implementation of image->buffer copy
2901e04c3fSmrg * Compute queue: implementation also of buffer->image, image->image, and image clear.
3001e04c3fSmrg */
3101e04c3fSmrg
3201e04c3fSmrg/* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options
3301e04c3fSmrg * for that.
3401e04c3fSmrg */
3501e04c3fSmrgstatic nir_shader *
3601e04c3fSmrgbuild_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
3701e04c3fSmrg{
387ec681f3Smrg   enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
397ec681f3Smrg   const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
407ec681f3Smrg   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
417ec681f3Smrg   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
427ec681f3Smrg                                                  is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
437ec681f3Smrg   b.shader->info.workgroup_size[0] = 8;
447ec681f3Smrg   b.shader->info.workgroup_size[1] = 8;
457ec681f3Smrg   b.shader->info.workgroup_size[2] = 1;
467ec681f3Smrg   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
477ec681f3Smrg   input_img->data.descriptor_set = 0;
487ec681f3Smrg   input_img->data.binding = 0;
497ec681f3Smrg
507ec681f3Smrg   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
517ec681f3Smrg   output_img->data.descriptor_set = 0;
527ec681f3Smrg   output_img->data.binding = 1;
537ec681f3Smrg
547ec681f3Smrg   nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
557ec681f3Smrg
567ec681f3Smrg   nir_ssa_def *offset =
577ec681f3Smrg      nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 16);
587ec681f3Smrg   nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
597ec681f3Smrg
607ec681f3Smrg   nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
617ec681f3Smrg   nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
627ec681f3Smrg
637ec681f3Smrg   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
647ec681f3Smrg   tex->sampler_dim = dim;
657ec681f3Smrg   tex->op = nir_texop_txf;
667ec681f3Smrg   tex->src[0].src_type = nir_tex_src_coord;
677ec681f3Smrg   tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
687ec681f3Smrg   tex->src[1].src_type = nir_tex_src_lod;
697ec681f3Smrg   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
707ec681f3Smrg   tex->src[2].src_type = nir_tex_src_texture_deref;
717ec681f3Smrg   tex->src[2].src = nir_src_for_ssa(input_img_deref);
727ec681f3Smrg   tex->dest_type = nir_type_float32;
737ec681f3Smrg   tex->is_array = false;
747ec681f3Smrg   tex->coord_components = is_3d ? 3 : 2;
757ec681f3Smrg
767ec681f3Smrg   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
777ec681f3Smrg   nir_builder_instr_insert(&b, &tex->instr);
787ec681f3Smrg
797ec681f3Smrg   nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
807ec681f3Smrg   nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
817ec681f3Smrg
827ec681f3Smrg   nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
837ec681f3Smrg   tmp = nir_iadd(&b, tmp, pos_x);
847ec681f3Smrg
857ec681f3Smrg   nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
867ec681f3Smrg
877ec681f3Smrg   nir_ssa_def *outval = &tex->dest.ssa;
887ec681f3Smrg   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
897ec681f3Smrg                         nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
907ec681f3Smrg                         .image_dim = GLSL_SAMPLER_DIM_BUF);
917ec681f3Smrg
927ec681f3Smrg   return b.shader;
9301e04c3fSmrg}
9401e04c3fSmrg
9501e04c3fSmrg/* Image to buffer - don't write use image accessors */
9601e04c3fSmrgstatic VkResult
9701e04c3fSmrgradv_device_init_meta_itob_state(struct radv_device *device)
9801e04c3fSmrg{
997ec681f3Smrg   VkResult result;
1007ec681f3Smrg   nir_shader *cs = build_nir_itob_compute_shader(device, false);
1017ec681f3Smrg   nir_shader *cs_3d = NULL;
1027ec681f3Smrg
1037ec681f3Smrg   if (device->physical_device->rad_info.chip_class >= GFX9)
1047ec681f3Smrg      cs_3d = build_nir_itob_compute_shader(device, true);
1057ec681f3Smrg
1067ec681f3Smrg   /*
1077ec681f3Smrg    * two descriptors one for the image being sampled
1087ec681f3Smrg    * one for the buffer being written.
1097ec681f3Smrg    */
1107ec681f3Smrg   VkDescriptorSetLayoutCreateInfo ds_create_info = {
1117ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1127ec681f3Smrg      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1137ec681f3Smrg      .bindingCount = 2,
1147ec681f3Smrg      .pBindings = (VkDescriptorSetLayoutBinding[]){
1157ec681f3Smrg         {.binding = 0,
1167ec681f3Smrg          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1177ec681f3Smrg          .descriptorCount = 1,
1187ec681f3Smrg          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1197ec681f3Smrg          .pImmutableSamplers = NULL},
1207ec681f3Smrg         {.binding = 1,
1217ec681f3Smrg          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1227ec681f3Smrg          .descriptorCount = 1,
1237ec681f3Smrg          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1247ec681f3Smrg          .pImmutableSamplers = NULL},
1257ec681f3Smrg      }};
1267ec681f3Smrg
1277ec681f3Smrg   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
1287ec681f3Smrg                                           &device->meta_state.alloc,
1297ec681f3Smrg                                           &device->meta_state.itob.img_ds_layout);
1307ec681f3Smrg   if (result != VK_SUCCESS)
1317ec681f3Smrg      goto fail;
1327ec681f3Smrg
1337ec681f3Smrg   VkPipelineLayoutCreateInfo pl_create_info = {
1347ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1357ec681f3Smrg      .setLayoutCount = 1,
1367ec681f3Smrg      .pSetLayouts = &device->meta_state.itob.img_ds_layout,
1377ec681f3Smrg      .pushConstantRangeCount = 1,
1387ec681f3Smrg      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
1397ec681f3Smrg   };
1407ec681f3Smrg
1417ec681f3Smrg   result =
1427ec681f3Smrg      radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
1437ec681f3Smrg                                &device->meta_state.alloc, &device->meta_state.itob.img_p_layout);
1447ec681f3Smrg   if (result != VK_SUCCESS)
1457ec681f3Smrg      goto fail;
1467ec681f3Smrg
1477ec681f3Smrg   /* compute shader */
1487ec681f3Smrg
1497ec681f3Smrg   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1507ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1517ec681f3Smrg      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1527ec681f3Smrg      .module = vk_shader_module_handle_from_nir(cs),
1537ec681f3Smrg      .pName = "main",
1547ec681f3Smrg      .pSpecializationInfo = NULL,
1557ec681f3Smrg   };
1567ec681f3Smrg
1577ec681f3Smrg   VkComputePipelineCreateInfo vk_pipeline_info = {
1587ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1597ec681f3Smrg      .stage = pipeline_shader_stage,
1607ec681f3Smrg      .flags = 0,
1617ec681f3Smrg      .layout = device->meta_state.itob.img_p_layout,
1627ec681f3Smrg   };
1637ec681f3Smrg
1647ec681f3Smrg   result = radv_CreateComputePipelines(radv_device_to_handle(device),
1657ec681f3Smrg                                        radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
1667ec681f3Smrg                                        &vk_pipeline_info, NULL, &device->meta_state.itob.pipeline);
1677ec681f3Smrg   if (result != VK_SUCCESS)
1687ec681f3Smrg      goto fail;
1697ec681f3Smrg
1707ec681f3Smrg   if (device->physical_device->rad_info.chip_class >= GFX9) {
1717ec681f3Smrg      VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
1727ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1737ec681f3Smrg         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1747ec681f3Smrg         .module = vk_shader_module_handle_from_nir(cs_3d),
1757ec681f3Smrg         .pName = "main",
1767ec681f3Smrg         .pSpecializationInfo = NULL,
1777ec681f3Smrg      };
1787ec681f3Smrg
1797ec681f3Smrg      VkComputePipelineCreateInfo vk_pipeline_info_3d = {
1807ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1817ec681f3Smrg         .stage = pipeline_shader_stage_3d,
1827ec681f3Smrg         .flags = 0,
1837ec681f3Smrg         .layout = device->meta_state.itob.img_p_layout,
1847ec681f3Smrg      };
1857ec681f3Smrg
1867ec681f3Smrg      result = radv_CreateComputePipelines(
1877ec681f3Smrg         radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
1887ec681f3Smrg         &vk_pipeline_info_3d, NULL, &device->meta_state.itob.pipeline_3d);
1897ec681f3Smrg      if (result != VK_SUCCESS)
1907ec681f3Smrg         goto fail;
1917ec681f3Smrg      ralloc_free(cs_3d);
1927ec681f3Smrg   }
1937ec681f3Smrg   ralloc_free(cs);
1947ec681f3Smrg
1957ec681f3Smrg   return VK_SUCCESS;
19601e04c3fSmrgfail:
1977ec681f3Smrg   ralloc_free(cs);
1987ec681f3Smrg   ralloc_free(cs_3d);
1997ec681f3Smrg   return result;
20001e04c3fSmrg}
20101e04c3fSmrg
20201e04c3fSmrgstatic void
20301e04c3fSmrgradv_device_finish_meta_itob_state(struct radv_device *device)
20401e04c3fSmrg{
2057ec681f3Smrg   struct radv_meta_state *state = &device->meta_state;
2067ec681f3Smrg
2077ec681f3Smrg   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itob.img_p_layout,
2087ec681f3Smrg                              &state->alloc);
2097ec681f3Smrg   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itob.img_ds_layout,
2107ec681f3Smrg                                   &state->alloc);
2117ec681f3Smrg   radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline, &state->alloc);
2127ec681f3Smrg   if (device->physical_device->rad_info.chip_class >= GFX9)
2137ec681f3Smrg      radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline_3d, &state->alloc);
21401e04c3fSmrg}
21501e04c3fSmrg
21601e04c3fSmrgstatic nir_shader *
21701e04c3fSmrgbuild_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
21801e04c3fSmrg{
2197ec681f3Smrg   enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
2207ec681f3Smrg   const struct glsl_type *buf_type =
2217ec681f3Smrg      glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
2227ec681f3Smrg   const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
2237ec681f3Smrg   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
2247ec681f3Smrg                                                  is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
2257ec681f3Smrg   b.shader->info.workgroup_size[0] = 8;
2267ec681f3Smrg   b.shader->info.workgroup_size[1] = 8;
2277ec681f3Smrg   b.shader->info.workgroup_size[2] = 1;
2287ec681f3Smrg   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
2297ec681f3Smrg   input_img->data.descriptor_set = 0;
2307ec681f3Smrg   input_img->data.binding = 0;
2317ec681f3Smrg
2327ec681f3Smrg   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
2337ec681f3Smrg   output_img->data.descriptor_set = 0;
2347ec681f3Smrg   output_img->data.binding = 1;
2357ec681f3Smrg
2367ec681f3Smrg   nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
2377ec681f3Smrg
2387ec681f3Smrg   nir_ssa_def *offset =
2397ec681f3Smrg      nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 16);
2407ec681f3Smrg   nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
2417ec681f3Smrg
2427ec681f3Smrg   nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
2437ec681f3Smrg   nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
2447ec681f3Smrg
2457ec681f3Smrg   nir_ssa_def *buf_coord = nir_imul(&b, pos_y, stride);
2467ec681f3Smrg   buf_coord = nir_iadd(&b, buf_coord, pos_x);
2477ec681f3Smrg
2487ec681f3Smrg   nir_ssa_def *coord = nir_iadd(&b, global_id, offset);
2497ec681f3Smrg   nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
2507ec681f3Smrg
2517ec681f3Smrg   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
2527ec681f3Smrg   tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
2537ec681f3Smrg   tex->op = nir_texop_txf;
2547ec681f3Smrg   tex->src[0].src_type = nir_tex_src_coord;
2557ec681f3Smrg   tex->src[0].src = nir_src_for_ssa(buf_coord);
2567ec681f3Smrg   tex->src[1].src_type = nir_tex_src_lod;
2577ec681f3Smrg   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
2587ec681f3Smrg   tex->src[2].src_type = nir_tex_src_texture_deref;
2597ec681f3Smrg   tex->src[2].src = nir_src_for_ssa(input_img_deref);
2607ec681f3Smrg   tex->dest_type = nir_type_float32;
2617ec681f3Smrg   tex->is_array = false;
2627ec681f3Smrg   tex->coord_components = 1;
2637ec681f3Smrg
2647ec681f3Smrg   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
2657ec681f3Smrg   nir_builder_instr_insert(&b, &tex->instr);
2667ec681f3Smrg
2677ec681f3Smrg   nir_ssa_def *outval = &tex->dest.ssa;
2687ec681f3Smrg
2697ec681f3Smrg   nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0),
2707ec681f3Smrg                                         nir_channel(&b, coord, 1),
2717ec681f3Smrg                                         is_3d ? nir_channel(&b, coord, 2) : nir_ssa_undef(&b, 1, 32),
2727ec681f3Smrg                                         nir_ssa_undef(&b, 1, 32));
2737ec681f3Smrg
2747ec681f3Smrg   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
2757ec681f3Smrg                         nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), .image_dim = dim);
2767ec681f3Smrg
2777ec681f3Smrg   return b.shader;
27801e04c3fSmrg}
27901e04c3fSmrg
28001e04c3fSmrg/* Buffer to image - don't write use image accessors */
28101e04c3fSmrgstatic VkResult
28201e04c3fSmrgradv_device_init_meta_btoi_state(struct radv_device *device)
28301e04c3fSmrg{
2847ec681f3Smrg   VkResult result;
2857ec681f3Smrg   nir_shader *cs = build_nir_btoi_compute_shader(device, false);
2867ec681f3Smrg   nir_shader *cs_3d = NULL;
2877ec681f3Smrg   if (device->physical_device->rad_info.chip_class >= GFX9)
2887ec681f3Smrg      cs_3d = build_nir_btoi_compute_shader(device, true);
2897ec681f3Smrg   /*
2907ec681f3Smrg    * two descriptors one for the image being sampled
2917ec681f3Smrg    * one for the buffer being written.
2927ec681f3Smrg    */
2937ec681f3Smrg   VkDescriptorSetLayoutCreateInfo ds_create_info = {
2947ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
2957ec681f3Smrg      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
2967ec681f3Smrg      .bindingCount = 2,
2977ec681f3Smrg      .pBindings = (VkDescriptorSetLayoutBinding[]){
2987ec681f3Smrg         {.binding = 0,
2997ec681f3Smrg          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
3007ec681f3Smrg          .descriptorCount = 1,
3017ec681f3Smrg          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
3027ec681f3Smrg          .pImmutableSamplers = NULL},
3037ec681f3Smrg         {.binding = 1,
3047ec681f3Smrg          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
3057ec681f3Smrg          .descriptorCount = 1,
3067ec681f3Smrg          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
3077ec681f3Smrg          .pImmutableSamplers = NULL},
3087ec681f3Smrg      }};
3097ec681f3Smrg
3107ec681f3Smrg   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
3117ec681f3Smrg                                           &device->meta_state.alloc,
3127ec681f3Smrg                                           &device->meta_state.btoi.img_ds_layout);
3137ec681f3Smrg   if (result != VK_SUCCESS)
3147ec681f3Smrg      goto fail;
3157ec681f3Smrg
3167ec681f3Smrg   VkPipelineLayoutCreateInfo pl_create_info = {
3177ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
3187ec681f3Smrg      .setLayoutCount = 1,
3197ec681f3Smrg      .pSetLayouts = &device->meta_state.btoi.img_ds_layout,
3207ec681f3Smrg      .pushConstantRangeCount = 1,
3217ec681f3Smrg      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
3227ec681f3Smrg   };
3237ec681f3Smrg
3247ec681f3Smrg   result =
3257ec681f3Smrg      radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
3267ec681f3Smrg                                &device->meta_state.alloc, &device->meta_state.btoi.img_p_layout);
3277ec681f3Smrg   if (result != VK_SUCCESS)
3287ec681f3Smrg      goto fail;
3297ec681f3Smrg
3307ec681f3Smrg   /* compute shader */
3317ec681f3Smrg
3327ec681f3Smrg   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
3337ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3347ec681f3Smrg      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
3357ec681f3Smrg      .module = vk_shader_module_handle_from_nir(cs),
3367ec681f3Smrg      .pName = "main",
3377ec681f3Smrg      .pSpecializationInfo = NULL,
3387ec681f3Smrg   };
3397ec681f3Smrg
3407ec681f3Smrg   VkComputePipelineCreateInfo vk_pipeline_info = {
3417ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
3427ec681f3Smrg      .stage = pipeline_shader_stage,
3437ec681f3Smrg      .flags = 0,
3447ec681f3Smrg      .layout = device->meta_state.btoi.img_p_layout,
3457ec681f3Smrg   };
3467ec681f3Smrg
3477ec681f3Smrg   result = radv_CreateComputePipelines(radv_device_to_handle(device),
3487ec681f3Smrg                                        radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
3497ec681f3Smrg                                        &vk_pipeline_info, NULL, &device->meta_state.btoi.pipeline);
3507ec681f3Smrg   if (result != VK_SUCCESS)
3517ec681f3Smrg      goto fail;
3527ec681f3Smrg
3537ec681f3Smrg   if (device->physical_device->rad_info.chip_class >= GFX9) {
3547ec681f3Smrg      VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
3557ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3567ec681f3Smrg         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
3577ec681f3Smrg         .module = vk_shader_module_handle_from_nir(cs_3d),
3587ec681f3Smrg         .pName = "main",
3597ec681f3Smrg         .pSpecializationInfo = NULL,
3607ec681f3Smrg      };
3617ec681f3Smrg
3627ec681f3Smrg      VkComputePipelineCreateInfo vk_pipeline_info_3d = {
3637ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
3647ec681f3Smrg         .stage = pipeline_shader_stage_3d,
3657ec681f3Smrg         .flags = 0,
3667ec681f3Smrg         .layout = device->meta_state.btoi.img_p_layout,
3677ec681f3Smrg      };
3687ec681f3Smrg
3697ec681f3Smrg      result = radv_CreateComputePipelines(
3707ec681f3Smrg         radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
3717ec681f3Smrg         &vk_pipeline_info_3d, NULL, &device->meta_state.btoi.pipeline_3d);
3727ec681f3Smrg      ralloc_free(cs_3d);
3737ec681f3Smrg   }
3747ec681f3Smrg   ralloc_free(cs);
3757ec681f3Smrg
3767ec681f3Smrg   return VK_SUCCESS;
37701e04c3fSmrgfail:
3787ec681f3Smrg   ralloc_free(cs_3d);
3797ec681f3Smrg   ralloc_free(cs);
3807ec681f3Smrg   return result;
38101e04c3fSmrg}
38201e04c3fSmrg
38301e04c3fSmrgstatic void
38401e04c3fSmrgradv_device_finish_meta_btoi_state(struct radv_device *device)
38501e04c3fSmrg{
3867ec681f3Smrg   struct radv_meta_state *state = &device->meta_state;
3877ec681f3Smrg
3887ec681f3Smrg   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi.img_p_layout,
3897ec681f3Smrg                              &state->alloc);
3907ec681f3Smrg   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->btoi.img_ds_layout,
3917ec681f3Smrg                                   &state->alloc);
3927ec681f3Smrg   radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline, &state->alloc);
3937ec681f3Smrg   radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline_3d, &state->alloc);
39401e04c3fSmrg}
39501e04c3fSmrg
39601e04c3fSmrg/* Buffer to image - special path for R32G32B32 */
39701e04c3fSmrgstatic nir_shader *
39801e04c3fSmrgbuild_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
39901e04c3fSmrg{
4007ec681f3Smrg   const struct glsl_type *buf_type =
4017ec681f3Smrg      glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
4027ec681f3Smrg   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
4037ec681f3Smrg   nir_builder b =
4047ec681f3Smrg      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_btoi_r32g32b32_cs");
4057ec681f3Smrg   b.shader->info.workgroup_size[0] = 8;
4067ec681f3Smrg   b.shader->info.workgroup_size[1] = 8;
4077ec681f3Smrg   b.shader->info.workgroup_size[2] = 1;
4087ec681f3Smrg   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
4097ec681f3Smrg   input_img->data.descriptor_set = 0;
4107ec681f3Smrg   input_img->data.binding = 0;
4117ec681f3Smrg
4127ec681f3Smrg   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
4137ec681f3Smrg   output_img->data.descriptor_set = 0;
4147ec681f3Smrg   output_img->data.binding = 1;
4157ec681f3Smrg
4167ec681f3Smrg   nir_ssa_def *global_id = get_global_ids(&b, 2);
4177ec681f3Smrg
4187ec681f3Smrg   nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 16);
4197ec681f3Smrg   nir_ssa_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 16);
4207ec681f3Smrg   nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
4217ec681f3Smrg
4227ec681f3Smrg   nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
4237ec681f3Smrg   nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
4247ec681f3Smrg
4257ec681f3Smrg   nir_ssa_def *buf_coord = nir_imul(&b, pos_y, stride);
4267ec681f3Smrg   buf_coord = nir_iadd(&b, buf_coord, pos_x);
4277ec681f3Smrg
4287ec681f3Smrg   nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
4297ec681f3Smrg
4307ec681f3Smrg   nir_ssa_def *global_pos =
4317ec681f3Smrg      nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
4327ec681f3Smrg               nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
4337ec681f3Smrg
4347ec681f3Smrg   nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
4357ec681f3Smrg
4367ec681f3Smrg   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
4377ec681f3Smrg   tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
4387ec681f3Smrg   tex->op = nir_texop_txf;
4397ec681f3Smrg   tex->src[0].src_type = nir_tex_src_coord;
4407ec681f3Smrg   tex->src[0].src = nir_src_for_ssa(buf_coord);
4417ec681f3Smrg   tex->src[1].src_type = nir_tex_src_lod;
4427ec681f3Smrg   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
4437ec681f3Smrg   tex->src[2].src_type = nir_tex_src_texture_deref;
4447ec681f3Smrg   tex->src[2].src = nir_src_for_ssa(input_img_deref);
4457ec681f3Smrg   tex->dest_type = nir_type_float32;
4467ec681f3Smrg   tex->is_array = false;
4477ec681f3Smrg   tex->coord_components = 1;
4487ec681f3Smrg   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
4497ec681f3Smrg   nir_builder_instr_insert(&b, &tex->instr);
4507ec681f3Smrg
4517ec681f3Smrg   nir_ssa_def *outval = &tex->dest.ssa;
4527ec681f3Smrg
4537ec681f3Smrg   for (int chan = 0; chan < 3; chan++) {
4547ec681f3Smrg      nir_ssa_def *local_pos = nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
4557ec681f3Smrg
4567ec681f3Smrg      nir_ssa_def *coord = nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
4577ec681f3Smrg
4587ec681f3Smrg      nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
4597ec681f3Smrg                            nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, chan),
4607ec681f3Smrg                            nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
4617ec681f3Smrg   }
4627ec681f3Smrg
4637ec681f3Smrg   return b.shader;
46401e04c3fSmrg}
46501e04c3fSmrg
46601e04c3fSmrgstatic VkResult
46701e04c3fSmrgradv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
46801e04c3fSmrg{
4697ec681f3Smrg   VkResult result;
4707ec681f3Smrg   nir_shader *cs = build_nir_btoi_r32g32b32_compute_shader(device);
4717ec681f3Smrg
4727ec681f3Smrg   VkDescriptorSetLayoutCreateInfo ds_create_info = {
4737ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
4747ec681f3Smrg      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
4757ec681f3Smrg      .bindingCount = 2,
4767ec681f3Smrg      .pBindings = (VkDescriptorSetLayoutBinding[]){
4777ec681f3Smrg         {.binding = 0,
4787ec681f3Smrg          .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
4797ec681f3Smrg          .descriptorCount = 1,
4807ec681f3Smrg          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
4817ec681f3Smrg          .pImmutableSamplers = NULL},
4827ec681f3Smrg         {.binding = 1,
4837ec681f3Smrg          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
4847ec681f3Smrg          .descriptorCount = 1,
4857ec681f3Smrg          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
4867ec681f3Smrg          .pImmutableSamplers = NULL},
4877ec681f3Smrg      }};
4887ec681f3Smrg
4897ec681f3Smrg   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
4907ec681f3Smrg                                           &device->meta_state.alloc,
4917ec681f3Smrg                                           &device->meta_state.btoi_r32g32b32.img_ds_layout);
4927ec681f3Smrg   if (result != VK_SUCCESS)
4937ec681f3Smrg      goto fail;
4947ec681f3Smrg
4957ec681f3Smrg   VkPipelineLayoutCreateInfo pl_create_info = {
4967ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
4977ec681f3Smrg      .setLayoutCount = 1,
4987ec681f3Smrg      .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
4997ec681f3Smrg      .pushConstantRangeCount = 1,
5007ec681f3Smrg      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
5017ec681f3Smrg   };
5027ec681f3Smrg
5037ec681f3Smrg   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
5047ec681f3Smrg                                      &device->meta_state.alloc,
5057ec681f3Smrg                                      &device->meta_state.btoi_r32g32b32.img_p_layout);
5067ec681f3Smrg   if (result != VK_SUCCESS)
5077ec681f3Smrg      goto fail;
5087ec681f3Smrg
5097ec681f3Smrg   /* compute shader */
5107ec681f3Smrg
5117ec681f3Smrg   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
5127ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
5137ec681f3Smrg      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
5147ec681f3Smrg      .module = vk_shader_module_handle_from_nir(cs),
5157ec681f3Smrg      .pName = "main",
5167ec681f3Smrg      .pSpecializationInfo = NULL,
5177ec681f3Smrg   };
5187ec681f3Smrg
5197ec681f3Smrg   VkComputePipelineCreateInfo vk_pipeline_info = {
5207ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
5217ec681f3Smrg      .stage = pipeline_shader_stage,
5227ec681f3Smrg      .flags = 0,
5237ec681f3Smrg      .layout = device->meta_state.btoi_r32g32b32.img_p_layout,
5247ec681f3Smrg   };
5257ec681f3Smrg
5267ec681f3Smrg   result = radv_CreateComputePipelines(
5277ec681f3Smrg      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
5287ec681f3Smrg      &vk_pipeline_info, NULL, &device->meta_state.btoi_r32g32b32.pipeline);
52901e04c3fSmrg
53001e04c3fSmrgfail:
5317ec681f3Smrg   ralloc_free(cs);
5327ec681f3Smrg   return result;
53301e04c3fSmrg}
53401e04c3fSmrg
53501e04c3fSmrgstatic void
53601e04c3fSmrgradv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
53701e04c3fSmrg{
5387ec681f3Smrg   struct radv_meta_state *state = &device->meta_state;
5397ec681f3Smrg
5407ec681f3Smrg   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi_r32g32b32.img_p_layout,
5417ec681f3Smrg                              &state->alloc);
5427ec681f3Smrg   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
5437ec681f3Smrg                                   state->btoi_r32g32b32.img_ds_layout, &state->alloc);
5447ec681f3Smrg   radv_DestroyPipeline(radv_device_to_handle(device), state->btoi_r32g32b32.pipeline,
5457ec681f3Smrg                        &state->alloc);
54601e04c3fSmrg}
54701e04c3fSmrg
54801e04c3fSmrgstatic nir_shader *
5497ec681f3Smrgbuild_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples)
5507ec681f3Smrg{
5517ec681f3Smrg   bool is_multisampled = samples > 1;
5527ec681f3Smrg   enum glsl_sampler_dim dim = is_3d             ? GLSL_SAMPLER_DIM_3D
5537ec681f3Smrg                               : is_multisampled ? GLSL_SAMPLER_DIM_MS
5547ec681f3Smrg                                                 : GLSL_SAMPLER_DIM_2D;
5557ec681f3Smrg   const struct glsl_type *buf_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
5567ec681f3Smrg   const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
5577ec681f3Smrg   nir_builder b = nir_builder_init_simple_shader(
5587ec681f3Smrg      MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_itoi_cs_3d-%d" : "meta_itoi_cs-%d", samples);
5597ec681f3Smrg   b.shader->info.workgroup_size[0] = 8;
5607ec681f3Smrg   b.shader->info.workgroup_size[1] = 8;
5617ec681f3Smrg   b.shader->info.workgroup_size[2] = 1;
5627ec681f3Smrg   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
5637ec681f3Smrg   input_img->data.descriptor_set = 0;
5647ec681f3Smrg   input_img->data.binding = 0;
5657ec681f3Smrg
5667ec681f3Smrg   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
5677ec681f3Smrg   output_img->data.descriptor_set = 0;
5687ec681f3Smrg   output_img->data.binding = 1;
5697ec681f3Smrg
5707ec681f3Smrg   nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
5717ec681f3Smrg
5727ec681f3Smrg   nir_ssa_def *src_offset =
5737ec681f3Smrg      nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 24);
5747ec681f3Smrg   nir_ssa_def *dst_offset =
5757ec681f3Smrg      nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = 24);
5767ec681f3Smrg
5777ec681f3Smrg   nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
5787ec681f3Smrg   nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
5797ec681f3Smrg
5807ec681f3Smrg   nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
5817ec681f3Smrg
5827ec681f3Smrg   nir_tex_instr *tex_instr[8];
5837ec681f3Smrg   for (uint32_t i = 0; i < samples; i++) {
5847ec681f3Smrg      tex_instr[i] = nir_tex_instr_create(b.shader, is_multisampled ? 4 : 3);
5857ec681f3Smrg
5867ec681f3Smrg      nir_tex_instr *tex = tex_instr[i];
5877ec681f3Smrg      tex->sampler_dim = dim;
5887ec681f3Smrg      tex->op = is_multisampled ? nir_texop_txf_ms : nir_texop_txf;
5897ec681f3Smrg      tex->src[0].src_type = nir_tex_src_coord;
5907ec681f3Smrg      tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
5917ec681f3Smrg      tex->src[1].src_type = nir_tex_src_lod;
5927ec681f3Smrg      tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
5937ec681f3Smrg      tex->src[2].src_type = nir_tex_src_texture_deref;
5947ec681f3Smrg      tex->src[2].src = nir_src_for_ssa(input_img_deref);
5957ec681f3Smrg      if (is_multisampled) {
5967ec681f3Smrg         tex->src[3].src_type = nir_tex_src_ms_index;
5977ec681f3Smrg         tex->src[3].src = nir_src_for_ssa(nir_imm_int(&b, i));
5987ec681f3Smrg      }
5997ec681f3Smrg      tex->dest_type = nir_type_float32;
6007ec681f3Smrg      tex->is_array = false;
6017ec681f3Smrg      tex->coord_components = is_3d ? 3 : 2;
6027ec681f3Smrg
6037ec681f3Smrg      nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
6047ec681f3Smrg      nir_builder_instr_insert(&b, &tex->instr);
6057ec681f3Smrg   }
6067ec681f3Smrg
6077ec681f3Smrg   nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0),
6087ec681f3Smrg                                         nir_channel(&b, dst_coord, 1),
6097ec681f3Smrg                                         is_3d ? nir_channel(&b, dst_coord, 2) : nir_ssa_undef(&b, 1, 32),
6107ec681f3Smrg                                         nir_ssa_undef(&b, 1, 32));
6117ec681f3Smrg
6127ec681f3Smrg   for (uint32_t i = 0; i < samples; i++) {
6137ec681f3Smrg      nir_ssa_def *outval = &tex_instr[i]->dest.ssa;
6147ec681f3Smrg      nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
6157ec681f3Smrg                            nir_imm_int(&b, i), outval, nir_imm_int(&b, 0), .image_dim = dim);
6167ec681f3Smrg   }
6177ec681f3Smrg
6187ec681f3Smrg   return b.shader;
6197ec681f3Smrg}
6207ec681f3Smrg
6217ec681f3Smrgstatic VkResult
6227ec681f3Smrgcreate_itoi_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
62301e04c3fSmrg{
6247ec681f3Smrg   struct radv_meta_state *state = &device->meta_state;
6257ec681f3Smrg   nir_shader *cs = build_nir_itoi_compute_shader(device, false, samples);
6267ec681f3Smrg   VkResult result;
6277ec681f3Smrg
6287ec681f3Smrg   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
6297ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
6307ec681f3Smrg      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
6317ec681f3Smrg      .module = vk_shader_module_handle_from_nir(cs),
6327ec681f3Smrg      .pName = "main",
6337ec681f3Smrg      .pSpecializationInfo = NULL,
6347ec681f3Smrg   };
6357ec681f3Smrg
6367ec681f3Smrg   VkComputePipelineCreateInfo vk_pipeline_info = {
6377ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
6387ec681f3Smrg      .stage = pipeline_shader_stage,
6397ec681f3Smrg      .flags = 0,
6407ec681f3Smrg      .layout = state->itoi.img_p_layout,
6417ec681f3Smrg   };
6427ec681f3Smrg
6437ec681f3Smrg   result = radv_CreateComputePipelines(radv_device_to_handle(device),
6447ec681f3Smrg                                        radv_pipeline_cache_to_handle(&state->cache), 1,
6457ec681f3Smrg                                        &vk_pipeline_info, NULL, pipeline);
6467ec681f3Smrg   ralloc_free(cs);
6477ec681f3Smrg   return result;
64801e04c3fSmrg}
64901e04c3fSmrg
65001e04c3fSmrg/* image to image - don't write use image accessors */
65101e04c3fSmrgstatic VkResult
65201e04c3fSmrgradv_device_init_meta_itoi_state(struct radv_device *device)
65301e04c3fSmrg{
6547ec681f3Smrg   VkResult result;
6557ec681f3Smrg
6567ec681f3Smrg   /*
6577ec681f3Smrg    * two descriptors one for the image being sampled
6587ec681f3Smrg    * one for the buffer being written.
6597ec681f3Smrg    */
6607ec681f3Smrg   VkDescriptorSetLayoutCreateInfo ds_create_info = {
6617ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
6627ec681f3Smrg      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
6637ec681f3Smrg      .bindingCount = 2,
6647ec681f3Smrg      .pBindings = (VkDescriptorSetLayoutBinding[]){
6657ec681f3Smrg         {.binding = 0,
6667ec681f3Smrg          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
6677ec681f3Smrg          .descriptorCount = 1,
6687ec681f3Smrg          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
6697ec681f3Smrg          .pImmutableSamplers = NULL},
6707ec681f3Smrg         {.binding = 1,
6717ec681f3Smrg          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
6727ec681f3Smrg          .descriptorCount = 1,
6737ec681f3Smrg          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
6747ec681f3Smrg          .pImmutableSamplers = NULL},
6757ec681f3Smrg      }};
6767ec681f3Smrg
6777ec681f3Smrg   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
6787ec681f3Smrg                                           &device->meta_state.alloc,
6797ec681f3Smrg                                           &device->meta_state.itoi.img_ds_layout);
6807ec681f3Smrg   if (result != VK_SUCCESS)
6817ec681f3Smrg      goto fail;
6827ec681f3Smrg
6837ec681f3Smrg   VkPipelineLayoutCreateInfo pl_create_info = {
6847ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
6857ec681f3Smrg      .setLayoutCount = 1,
6867ec681f3Smrg      .pSetLayouts = &device->meta_state.itoi.img_ds_layout,
6877ec681f3Smrg      .pushConstantRangeCount = 1,
6887ec681f3Smrg      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
6897ec681f3Smrg   };
6907ec681f3Smrg
6917ec681f3Smrg   result =
6927ec681f3Smrg      radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
6937ec681f3Smrg                                &device->meta_state.alloc, &device->meta_state.itoi.img_p_layout);
6947ec681f3Smrg   if (result != VK_SUCCESS)
6957ec681f3Smrg      goto fail;
6967ec681f3Smrg
6977ec681f3Smrg   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
6987ec681f3Smrg      uint32_t samples = 1 << i;
6997ec681f3Smrg      result = create_itoi_pipeline(device, samples, &device->meta_state.itoi.pipeline[i]);
7007ec681f3Smrg      if (result != VK_SUCCESS)
7017ec681f3Smrg         goto fail;
7027ec681f3Smrg   }
7037ec681f3Smrg
7047ec681f3Smrg   if (device->physical_device->rad_info.chip_class >= GFX9) {
7057ec681f3Smrg      nir_shader *cs_3d = build_nir_itoi_compute_shader(device, true, 1);
7067ec681f3Smrg
7077ec681f3Smrg      VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
7087ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
7097ec681f3Smrg         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
7107ec681f3Smrg         .module = vk_shader_module_handle_from_nir(cs_3d),
7117ec681f3Smrg         .pName = "main",
7127ec681f3Smrg         .pSpecializationInfo = NULL,
7137ec681f3Smrg      };
7147ec681f3Smrg
7157ec681f3Smrg      VkComputePipelineCreateInfo vk_pipeline_info_3d = {
7167ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
7177ec681f3Smrg         .stage = pipeline_shader_stage_3d,
7187ec681f3Smrg         .flags = 0,
7197ec681f3Smrg         .layout = device->meta_state.itoi.img_p_layout,
7207ec681f3Smrg      };
7217ec681f3Smrg
7227ec681f3Smrg      result = radv_CreateComputePipelines(
7237ec681f3Smrg         radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
7247ec681f3Smrg         &vk_pipeline_info_3d, NULL, &device->meta_state.itoi.pipeline_3d);
7257ec681f3Smrg      ralloc_free(cs_3d);
7267ec681f3Smrg   }
7277ec681f3Smrg
7287ec681f3Smrg   return VK_SUCCESS;
72901e04c3fSmrgfail:
7307ec681f3Smrg   return result;
73101e04c3fSmrg}
73201e04c3fSmrg
73301e04c3fSmrgstatic void
73401e04c3fSmrgradv_device_finish_meta_itoi_state(struct radv_device *device)
73501e04c3fSmrg{
7367ec681f3Smrg   struct radv_meta_state *state = &device->meta_state;
7377ec681f3Smrg
7387ec681f3Smrg   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi.img_p_layout,
7397ec681f3Smrg                              &state->alloc);
7407ec681f3Smrg   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itoi.img_ds_layout,
7417ec681f3Smrg                                   &state->alloc);
7427ec681f3Smrg
7437ec681f3Smrg   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
7447ec681f3Smrg      radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline[i], &state->alloc);
7457ec681f3Smrg   }
7467ec681f3Smrg
7477ec681f3Smrg   if (device->physical_device->rad_info.chip_class >= GFX9)
7487ec681f3Smrg      radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline_3d, &state->alloc);
74901e04c3fSmrg}
75001e04c3fSmrg
75101e04c3fSmrgstatic nir_shader *
75201e04c3fSmrgbuild_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
75301e04c3fSmrg{
7547ec681f3Smrg   const struct glsl_type *type =
7557ec681f3Smrg      glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
7567ec681f3Smrg   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
7577ec681f3Smrg   nir_builder b =
7587ec681f3Smrg      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_itoi_r32g32b32_cs");
7597ec681f3Smrg   b.shader->info.workgroup_size[0] = 8;
7607ec681f3Smrg   b.shader->info.workgroup_size[1] = 8;
7617ec681f3Smrg   b.shader->info.workgroup_size[2] = 1;
7627ec681f3Smrg   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "input_img");
7637ec681f3Smrg   input_img->data.descriptor_set = 0;
7647ec681f3Smrg   input_img->data.binding = 0;
7657ec681f3Smrg
7667ec681f3Smrg   nir_variable *output_img =
7677ec681f3Smrg      nir_variable_create(b.shader, nir_var_uniform, img_type, "output_img");
7687ec681f3Smrg   output_img->data.descriptor_set = 0;
7697ec681f3Smrg   output_img->data.binding = 1;
7707ec681f3Smrg
7717ec681f3Smrg   nir_ssa_def *global_id = get_global_ids(&b, 2);
7727ec681f3Smrg
7737ec681f3Smrg   nir_ssa_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 24);
7747ec681f3Smrg   nir_ssa_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24);
7757ec681f3Smrg
7767ec681f3Smrg   nir_ssa_def *src_stride = nir_channel(&b, src_offset, 2);
7777ec681f3Smrg   nir_ssa_def *dst_stride = nir_channel(&b, dst_offset, 2);
7787ec681f3Smrg
7797ec681f3Smrg   nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
7807ec681f3Smrg   nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
7817ec681f3Smrg
7827ec681f3Smrg   nir_ssa_def *src_global_pos =
7837ec681f3Smrg      nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
7847ec681f3Smrg               nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3)));
7857ec681f3Smrg
7867ec681f3Smrg   nir_ssa_def *dst_global_pos =
7877ec681f3Smrg      nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
7887ec681f3Smrg               nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3)));
7897ec681f3Smrg
7907ec681f3Smrg   for (int chan = 0; chan < 3; chan++) {
7917ec681f3Smrg      /* src */
7927ec681f3Smrg      nir_ssa_def *src_local_pos = nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan));
7937ec681f3Smrg      nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
7947ec681f3Smrg
7957ec681f3Smrg      nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
7967ec681f3Smrg      tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
7977ec681f3Smrg      tex->op = nir_texop_txf;
7987ec681f3Smrg      tex->src[0].src_type = nir_tex_src_coord;
7997ec681f3Smrg      tex->src[0].src = nir_src_for_ssa(src_local_pos);
8007ec681f3Smrg      tex->src[1].src_type = nir_tex_src_lod;
8017ec681f3Smrg      tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
8027ec681f3Smrg      tex->src[2].src_type = nir_tex_src_texture_deref;
8037ec681f3Smrg      tex->src[2].src = nir_src_for_ssa(input_img_deref);
8047ec681f3Smrg      tex->dest_type = nir_type_float32;
8057ec681f3Smrg      tex->is_array = false;
8067ec681f3Smrg      tex->coord_components = 1;
8077ec681f3Smrg      nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
8087ec681f3Smrg      nir_builder_instr_insert(&b, &tex->instr);
8097ec681f3Smrg
8107ec681f3Smrg      nir_ssa_def *outval = &tex->dest.ssa;
8117ec681f3Smrg
8127ec681f3Smrg      /* dst */
8137ec681f3Smrg      nir_ssa_def *dst_local_pos = nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan));
8147ec681f3Smrg
8157ec681f3Smrg      nir_ssa_def *dst_coord =
8167ec681f3Smrg         nir_vec4(&b, dst_local_pos, dst_local_pos, dst_local_pos, dst_local_pos);
8177ec681f3Smrg
8187ec681f3Smrg      nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord,
8197ec681f3Smrg                            nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, 0),
8207ec681f3Smrg                            nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
8217ec681f3Smrg   }
8227ec681f3Smrg
8237ec681f3Smrg   return b.shader;
82401e04c3fSmrg}
82501e04c3fSmrg
82601e04c3fSmrg/* Image to image - special path for R32G32B32 */
82701e04c3fSmrgstatic VkResult
82801e04c3fSmrgradv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device)
82901e04c3fSmrg{
8307ec681f3Smrg   VkResult result;
8317ec681f3Smrg   nir_shader *cs = build_nir_itoi_r32g32b32_compute_shader(device);
8327ec681f3Smrg
8337ec681f3Smrg   VkDescriptorSetLayoutCreateInfo ds_create_info = {
8347ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
8357ec681f3Smrg      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
8367ec681f3Smrg      .bindingCount = 2,
8377ec681f3Smrg      .pBindings = (VkDescriptorSetLayoutBinding[]){
8387ec681f3Smrg         {.binding = 0,
8397ec681f3Smrg          .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
8407ec681f3Smrg          .descriptorCount = 1,
8417ec681f3Smrg          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
8427ec681f3Smrg          .pImmutableSamplers = NULL},
8437ec681f3Smrg         {.binding = 1,
8447ec681f3Smrg          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
8457ec681f3Smrg          .descriptorCount = 1,
8467ec681f3Smrg          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
8477ec681f3Smrg          .pImmutableSamplers = NULL},
8487ec681f3Smrg      }};
8497ec681f3Smrg
8507ec681f3Smrg   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
8517ec681f3Smrg                                           &device->meta_state.alloc,
8527ec681f3Smrg                                           &device->meta_state.itoi_r32g32b32.img_ds_layout);
8537ec681f3Smrg   if (result != VK_SUCCESS)
8547ec681f3Smrg      goto fail;
8557ec681f3Smrg
8567ec681f3Smrg   VkPipelineLayoutCreateInfo pl_create_info = {
8577ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
8587ec681f3Smrg      .setLayoutCount = 1,
8597ec681f3Smrg      .pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout,
8607ec681f3Smrg      .pushConstantRangeCount = 1,
8617ec681f3Smrg      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
8627ec681f3Smrg   };
8637ec681f3Smrg
8647ec681f3Smrg   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
8657ec681f3Smrg                                      &device->meta_state.alloc,
8667ec681f3Smrg                                      &device->meta_state.itoi_r32g32b32.img_p_layout);
8677ec681f3Smrg   if (result != VK_SUCCESS)
8687ec681f3Smrg      goto fail;
8697ec681f3Smrg
8707ec681f3Smrg   /* compute shader */
8717ec681f3Smrg
8727ec681f3Smrg   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
8737ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
8747ec681f3Smrg      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
8757ec681f3Smrg      .module = vk_shader_module_handle_from_nir(cs),
8767ec681f3Smrg      .pName = "main",
8777ec681f3Smrg      .pSpecializationInfo = NULL,
8787ec681f3Smrg   };
8797ec681f3Smrg
8807ec681f3Smrg   VkComputePipelineCreateInfo vk_pipeline_info = {
8817ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
8827ec681f3Smrg      .stage = pipeline_shader_stage,
8837ec681f3Smrg      .flags = 0,
8847ec681f3Smrg      .layout = device->meta_state.itoi_r32g32b32.img_p_layout,
8857ec681f3Smrg   };
8867ec681f3Smrg
8877ec681f3Smrg   result = radv_CreateComputePipelines(
8887ec681f3Smrg      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
8897ec681f3Smrg      &vk_pipeline_info, NULL, &device->meta_state.itoi_r32g32b32.pipeline);
89001e04c3fSmrg
89101e04c3fSmrgfail:
8927ec681f3Smrg   ralloc_free(cs);
8937ec681f3Smrg   return result;
89401e04c3fSmrg}
89501e04c3fSmrg
89601e04c3fSmrgstatic void
89701e04c3fSmrgradv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device)
89801e04c3fSmrg{
8997ec681f3Smrg   struct radv_meta_state *state = &device->meta_state;
9007ec681f3Smrg
9017ec681f3Smrg   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi_r32g32b32.img_p_layout,
9027ec681f3Smrg                              &state->alloc);
9037ec681f3Smrg   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
9047ec681f3Smrg                                   state->itoi_r32g32b32.img_ds_layout, &state->alloc);
9057ec681f3Smrg   radv_DestroyPipeline(radv_device_to_handle(device), state->itoi_r32g32b32.pipeline,
9067ec681f3Smrg                        &state->alloc);
90701e04c3fSmrg}
90801e04c3fSmrg
90901e04c3fSmrgstatic nir_shader *
9107ec681f3Smrgbuild_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples)
9117ec681f3Smrg{
9127ec681f3Smrg   bool is_multisampled = samples > 1;
9137ec681f3Smrg   enum glsl_sampler_dim dim = is_3d             ? GLSL_SAMPLER_DIM_3D
9147ec681f3Smrg                               : is_multisampled ? GLSL_SAMPLER_DIM_MS
9157ec681f3Smrg                                                 : GLSL_SAMPLER_DIM_2D;
9167ec681f3Smrg   const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
9177ec681f3Smrg   nir_builder b = nir_builder_init_simple_shader(
9187ec681f3Smrg      MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples);
9197ec681f3Smrg   b.shader->info.workgroup_size[0] = 8;
9207ec681f3Smrg   b.shader->info.workgroup_size[1] = 8;
9217ec681f3Smrg   b.shader->info.workgroup_size[2] = 1;
9227ec681f3Smrg
9237ec681f3Smrg   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
9247ec681f3Smrg   output_img->data.descriptor_set = 0;
9257ec681f3Smrg   output_img->data.binding = 0;
9267ec681f3Smrg
9277ec681f3Smrg   nir_ssa_def *global_id = get_global_ids(&b, 2);
9287ec681f3Smrg
9297ec681f3Smrg   nir_ssa_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 20);
9307ec681f3Smrg   nir_ssa_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20);
9317ec681f3Smrg
9327ec681f3Smrg   nir_ssa_def *comps[4];
9337ec681f3Smrg   comps[0] = nir_channel(&b, global_id, 0);
9347ec681f3Smrg   comps[1] = nir_channel(&b, global_id, 1);
9357ec681f3Smrg   comps[2] = layer;
9367ec681f3Smrg   comps[3] = nir_ssa_undef(&b, 1, 32);
9377ec681f3Smrg   global_id = nir_vec(&b, comps, 4);
9387ec681f3Smrg
9397ec681f3Smrg   for (uint32_t i = 0; i < samples; i++) {
9407ec681f3Smrg      nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
9417ec681f3Smrg                            nir_imm_int(&b, i), clear_val, nir_imm_int(&b, 0), .image_dim = dim);
9427ec681f3Smrg   }
9437ec681f3Smrg
9447ec681f3Smrg   return b.shader;
9457ec681f3Smrg}
9467ec681f3Smrg
9477ec681f3Smrgstatic VkResult
9487ec681f3Smrgcreate_cleari_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
94901e04c3fSmrg{
9507ec681f3Smrg   nir_shader *cs = build_nir_cleari_compute_shader(device, false, samples);
9517ec681f3Smrg   VkResult result;
9527ec681f3Smrg
9537ec681f3Smrg   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
9547ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
9557ec681f3Smrg      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
9567ec681f3Smrg      .module = vk_shader_module_handle_from_nir(cs),
9577ec681f3Smrg      .pName = "main",
9587ec681f3Smrg      .pSpecializationInfo = NULL,
9597ec681f3Smrg   };
9607ec681f3Smrg
9617ec681f3Smrg   VkComputePipelineCreateInfo vk_pipeline_info = {
9627ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
9637ec681f3Smrg      .stage = pipeline_shader_stage,
9647ec681f3Smrg      .flags = 0,
9657ec681f3Smrg      .layout = device->meta_state.cleari.img_p_layout,
9667ec681f3Smrg   };
9677ec681f3Smrg
9687ec681f3Smrg   result = radv_CreateComputePipelines(radv_device_to_handle(device),
9697ec681f3Smrg                                        radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
9707ec681f3Smrg                                        &vk_pipeline_info, NULL, pipeline);
9717ec681f3Smrg   ralloc_free(cs);
9727ec681f3Smrg   return result;
97301e04c3fSmrg}
97401e04c3fSmrg
97501e04c3fSmrgstatic VkResult
97601e04c3fSmrgradv_device_init_meta_cleari_state(struct radv_device *device)
97701e04c3fSmrg{
9787ec681f3Smrg   VkResult result;
9797ec681f3Smrg
9807ec681f3Smrg   /*
9817ec681f3Smrg    * two descriptors one for the image being sampled
9827ec681f3Smrg    * one for the buffer being written.
9837ec681f3Smrg    */
9847ec681f3Smrg   VkDescriptorSetLayoutCreateInfo ds_create_info = {
9857ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
9867ec681f3Smrg      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
9877ec681f3Smrg      .bindingCount = 1,
9887ec681f3Smrg      .pBindings = (VkDescriptorSetLayoutBinding[]){
9897ec681f3Smrg         {.binding = 0,
9907ec681f3Smrg          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
9917ec681f3Smrg          .descriptorCount = 1,
9927ec681f3Smrg          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
9937ec681f3Smrg          .pImmutableSamplers = NULL},
9947ec681f3Smrg      }};
9957ec681f3Smrg
9967ec681f3Smrg   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
9977ec681f3Smrg                                           &device->meta_state.alloc,
9987ec681f3Smrg                                           &device->meta_state.cleari.img_ds_layout);
9997ec681f3Smrg   if (result != VK_SUCCESS)
10007ec681f3Smrg      goto fail;
10017ec681f3Smrg
10027ec681f3Smrg   VkPipelineLayoutCreateInfo pl_create_info = {
10037ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
10047ec681f3Smrg      .setLayoutCount = 1,
10057ec681f3Smrg      .pSetLayouts = &device->meta_state.cleari.img_ds_layout,
10067ec681f3Smrg      .pushConstantRangeCount = 1,
10077ec681f3Smrg      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
10087ec681f3Smrg   };
10097ec681f3Smrg
10107ec681f3Smrg   result =
10117ec681f3Smrg      radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
10127ec681f3Smrg                                &device->meta_state.alloc, &device->meta_state.cleari.img_p_layout);
10137ec681f3Smrg   if (result != VK_SUCCESS)
10147ec681f3Smrg      goto fail;
10157ec681f3Smrg
10167ec681f3Smrg   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
10177ec681f3Smrg      uint32_t samples = 1 << i;
10187ec681f3Smrg      result = create_cleari_pipeline(device, samples, &device->meta_state.cleari.pipeline[i]);
10197ec681f3Smrg      if (result != VK_SUCCESS)
10207ec681f3Smrg         goto fail;
10217ec681f3Smrg   }
10227ec681f3Smrg
10237ec681f3Smrg   if (device->physical_device->rad_info.chip_class >= GFX9) {
10247ec681f3Smrg      nir_shader *cs_3d = build_nir_cleari_compute_shader(device, true, 1);
10257ec681f3Smrg
10267ec681f3Smrg      /* compute shader */
10277ec681f3Smrg      VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
10287ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
10297ec681f3Smrg         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
10307ec681f3Smrg         .module = vk_shader_module_handle_from_nir(cs_3d),
10317ec681f3Smrg         .pName = "main",
10327ec681f3Smrg         .pSpecializationInfo = NULL,
10337ec681f3Smrg      };
10347ec681f3Smrg
10357ec681f3Smrg      VkComputePipelineCreateInfo vk_pipeline_info_3d = {
10367ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
10377ec681f3Smrg         .stage = pipeline_shader_stage_3d,
10387ec681f3Smrg         .flags = 0,
10397ec681f3Smrg         .layout = device->meta_state.cleari.img_p_layout,
10407ec681f3Smrg      };
10417ec681f3Smrg
10427ec681f3Smrg      result = radv_CreateComputePipelines(
10437ec681f3Smrg         radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
10447ec681f3Smrg         &vk_pipeline_info_3d, NULL, &device->meta_state.cleari.pipeline_3d);
10457ec681f3Smrg      ralloc_free(cs_3d);
10467ec681f3Smrg   }
10477ec681f3Smrg
10487ec681f3Smrg   return VK_SUCCESS;
104901e04c3fSmrgfail:
10507ec681f3Smrg   return result;
105101e04c3fSmrg}
105201e04c3fSmrg
105301e04c3fSmrgstatic void
105401e04c3fSmrgradv_device_finish_meta_cleari_state(struct radv_device *device)
105501e04c3fSmrg{
10567ec681f3Smrg   struct radv_meta_state *state = &device->meta_state;
10577ec681f3Smrg
10587ec681f3Smrg   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari.img_p_layout,
10597ec681f3Smrg                              &state->alloc);
10607ec681f3Smrg   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->cleari.img_ds_layout,
10617ec681f3Smrg                                   &state->alloc);
10627ec681f3Smrg
10637ec681f3Smrg   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
10647ec681f3Smrg      radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline[i], &state->alloc);
10657ec681f3Smrg   }
10667ec681f3Smrg
10677ec681f3Smrg   radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline_3d, &state->alloc);
106801e04c3fSmrg}
106901e04c3fSmrg
107001e04c3fSmrg/* Special path for clearing R32G32B32 images using a compute shader. */
107101e04c3fSmrgstatic nir_shader *
107201e04c3fSmrgbuild_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
107301e04c3fSmrg{
10747ec681f3Smrg   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
10757ec681f3Smrg   nir_builder b =
10767ec681f3Smrg      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_cleari_r32g32b32_cs");
10777ec681f3Smrg   b.shader->info.workgroup_size[0] = 8;
10787ec681f3Smrg   b.shader->info.workgroup_size[1] = 8;
10797ec681f3Smrg   b.shader->info.workgroup_size[2] = 1;
10807ec681f3Smrg
10817ec681f3Smrg   nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
10827ec681f3Smrg   output_img->data.descriptor_set = 0;
10837ec681f3Smrg   output_img->data.binding = 0;
10847ec681f3Smrg
10857ec681f3Smrg   nir_ssa_def *global_id = get_global_ids(&b, 2);
10867ec681f3Smrg
10877ec681f3Smrg   nir_ssa_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 16);
10887ec681f3Smrg   nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
10897ec681f3Smrg
10907ec681f3Smrg   nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
10917ec681f3Smrg   nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
10927ec681f3Smrg
10937ec681f3Smrg   nir_ssa_def *global_pos =
10947ec681f3Smrg      nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul(&b, global_x, nir_imm_int(&b, 3)));
10957ec681f3Smrg
10967ec681f3Smrg   for (unsigned chan = 0; chan < 3; chan++) {
10977ec681f3Smrg      nir_ssa_def *local_pos = nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
10987ec681f3Smrg
10997ec681f3Smrg      nir_ssa_def *coord = nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
11007ec681f3Smrg
11017ec681f3Smrg      nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
11027ec681f3Smrg                            nir_ssa_undef(&b, 1, 32), nir_channel(&b, clear_val, chan),
11037ec681f3Smrg                            nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
11047ec681f3Smrg   }
11057ec681f3Smrg
11067ec681f3Smrg   return b.shader;
110701e04c3fSmrg}
110801e04c3fSmrg
110901e04c3fSmrgstatic VkResult
111001e04c3fSmrgradv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
111101e04c3fSmrg{
11127ec681f3Smrg   VkResult result;
11137ec681f3Smrg   nir_shader *cs = build_nir_cleari_r32g32b32_compute_shader(device);
11147ec681f3Smrg
11157ec681f3Smrg   VkDescriptorSetLayoutCreateInfo ds_create_info = {
11167ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
11177ec681f3Smrg      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
11187ec681f3Smrg      .bindingCount = 1,
11197ec681f3Smrg      .pBindings = (VkDescriptorSetLayoutBinding[]){
11207ec681f3Smrg         {.binding = 0,
11217ec681f3Smrg          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
11227ec681f3Smrg          .descriptorCount = 1,
11237ec681f3Smrg          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
11247ec681f3Smrg          .pImmutableSamplers = NULL},
11257ec681f3Smrg      }};
11267ec681f3Smrg
11277ec681f3Smrg   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
11287ec681f3Smrg                                           &device->meta_state.alloc,
11297ec681f3Smrg                                           &device->meta_state.cleari_r32g32b32.img_ds_layout);
11307ec681f3Smrg   if (result != VK_SUCCESS)
11317ec681f3Smrg      goto fail;
11327ec681f3Smrg
11337ec681f3Smrg   VkPipelineLayoutCreateInfo pl_create_info = {
11347ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
11357ec681f3Smrg      .setLayoutCount = 1,
11367ec681f3Smrg      .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
11377ec681f3Smrg      .pushConstantRangeCount = 1,
11387ec681f3Smrg      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
11397ec681f3Smrg   };
11407ec681f3Smrg
11417ec681f3Smrg   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
11427ec681f3Smrg                                      &device->meta_state.alloc,
11437ec681f3Smrg                                      &device->meta_state.cleari_r32g32b32.img_p_layout);
11447ec681f3Smrg   if (result != VK_SUCCESS)
11457ec681f3Smrg      goto fail;
11467ec681f3Smrg
11477ec681f3Smrg   /* compute shader */
11487ec681f3Smrg   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
11497ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
11507ec681f3Smrg      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
11517ec681f3Smrg      .module = vk_shader_module_handle_from_nir(cs),
11527ec681f3Smrg      .pName = "main",
11537ec681f3Smrg      .pSpecializationInfo = NULL,
11547ec681f3Smrg   };
11557ec681f3Smrg
11567ec681f3Smrg   VkComputePipelineCreateInfo vk_pipeline_info = {
11577ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
11587ec681f3Smrg      .stage = pipeline_shader_stage,
11597ec681f3Smrg      .flags = 0,
11607ec681f3Smrg      .layout = device->meta_state.cleari_r32g32b32.img_p_layout,
11617ec681f3Smrg   };
11627ec681f3Smrg
11637ec681f3Smrg   result = radv_CreateComputePipelines(
11647ec681f3Smrg      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
11657ec681f3Smrg      &vk_pipeline_info, NULL, &device->meta_state.cleari_r32g32b32.pipeline);
116601e04c3fSmrg
116701e04c3fSmrgfail:
11687ec681f3Smrg   ralloc_free(cs);
11697ec681f3Smrg   return result;
117001e04c3fSmrg}
117101e04c3fSmrg
117201e04c3fSmrgstatic void
117301e04c3fSmrgradv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
117401e04c3fSmrg{
11757ec681f3Smrg   struct radv_meta_state *state = &device->meta_state;
11767ec681f3Smrg
11777ec681f3Smrg   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari_r32g32b32.img_p_layout,
11787ec681f3Smrg                              &state->alloc);
11797ec681f3Smrg   radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
11807ec681f3Smrg                                   state->cleari_r32g32b32.img_ds_layout, &state->alloc);
11817ec681f3Smrg   radv_DestroyPipeline(radv_device_to_handle(device), state->cleari_r32g32b32.pipeline,
11827ec681f3Smrg                        &state->alloc);
118301e04c3fSmrg}
118401e04c3fSmrg
118501e04c3fSmrgvoid
118601e04c3fSmrgradv_device_finish_meta_bufimage_state(struct radv_device *device)
118701e04c3fSmrg{
11887ec681f3Smrg   radv_device_finish_meta_itob_state(device);
11897ec681f3Smrg   radv_device_finish_meta_btoi_state(device);
11907ec681f3Smrg   radv_device_finish_meta_btoi_r32g32b32_state(device);
11917ec681f3Smrg   radv_device_finish_meta_itoi_state(device);
11927ec681f3Smrg   radv_device_finish_meta_itoi_r32g32b32_state(device);
11937ec681f3Smrg   radv_device_finish_meta_cleari_state(device);
11947ec681f3Smrg   radv_device_finish_meta_cleari_r32g32b32_state(device);
119501e04c3fSmrg}
119601e04c3fSmrg
119701e04c3fSmrgVkResult
119801e04c3fSmrgradv_device_init_meta_bufimage_state(struct radv_device *device)
119901e04c3fSmrg{
12007ec681f3Smrg   VkResult result;
120101e04c3fSmrg
12027ec681f3Smrg   result = radv_device_init_meta_itob_state(device);
12037ec681f3Smrg   if (result != VK_SUCCESS)
12047ec681f3Smrg      goto fail_itob;
120501e04c3fSmrg
12067ec681f3Smrg   result = radv_device_init_meta_btoi_state(device);
12077ec681f3Smrg   if (result != VK_SUCCESS)
12087ec681f3Smrg      goto fail_btoi;
120901e04c3fSmrg
12107ec681f3Smrg   result = radv_device_init_meta_btoi_r32g32b32_state(device);
12117ec681f3Smrg   if (result != VK_SUCCESS)
12127ec681f3Smrg      goto fail_btoi_r32g32b32;
121301e04c3fSmrg
12147ec681f3Smrg   result = radv_device_init_meta_itoi_state(device);
12157ec681f3Smrg   if (result != VK_SUCCESS)
12167ec681f3Smrg      goto fail_itoi;
121701e04c3fSmrg
12187ec681f3Smrg   result = radv_device_init_meta_itoi_r32g32b32_state(device);
12197ec681f3Smrg   if (result != VK_SUCCESS)
12207ec681f3Smrg      goto fail_itoi_r32g32b32;
122101e04c3fSmrg
12227ec681f3Smrg   result = radv_device_init_meta_cleari_state(device);
12237ec681f3Smrg   if (result != VK_SUCCESS)
12247ec681f3Smrg      goto fail_cleari;
122501e04c3fSmrg
12267ec681f3Smrg   result = radv_device_init_meta_cleari_r32g32b32_state(device);
12277ec681f3Smrg   if (result != VK_SUCCESS)
12287ec681f3Smrg      goto fail_cleari_r32g32b32;
122901e04c3fSmrg
12307ec681f3Smrg   return VK_SUCCESS;
123101e04c3fSmrgfail_cleari_r32g32b32:
12327ec681f3Smrg   radv_device_finish_meta_cleari_r32g32b32_state(device);
123301e04c3fSmrgfail_cleari:
12347ec681f3Smrg   radv_device_finish_meta_cleari_state(device);
123501e04c3fSmrgfail_itoi_r32g32b32:
12367ec681f3Smrg   radv_device_finish_meta_itoi_r32g32b32_state(device);
123701e04c3fSmrgfail_itoi:
12387ec681f3Smrg   radv_device_finish_meta_itoi_state(device);
123901e04c3fSmrgfail_btoi_r32g32b32:
12407ec681f3Smrg   radv_device_finish_meta_btoi_r32g32b32_state(device);
124101e04c3fSmrgfail_btoi:
12427ec681f3Smrg   radv_device_finish_meta_btoi_state(device);
124301e04c3fSmrgfail_itob:
12447ec681f3Smrg   radv_device_finish_meta_itob_state(device);
12457ec681f3Smrg   return result;
124601e04c3fSmrg}
124701e04c3fSmrg
124801e04c3fSmrgstatic void
12497ec681f3Smrgcreate_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
12507ec681f3Smrg             struct radv_image_view *iview, VkFormat format, VkImageAspectFlagBits aspects)
125101e04c3fSmrg{
12527ec681f3Smrg   VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9
12537ec681f3Smrg                                  ? VK_IMAGE_VIEW_TYPE_2D
12547ec681f3Smrg                                  : radv_meta_get_view_type(surf->image);
12557ec681f3Smrg
12567ec681f3Smrg   if (format == VK_FORMAT_UNDEFINED)
12577ec681f3Smrg      format = surf->format;
12587ec681f3Smrg
12597ec681f3Smrg   radv_image_view_init(iview, cmd_buffer->device,
12607ec681f3Smrg                        &(VkImageViewCreateInfo){
12617ec681f3Smrg                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
12627ec681f3Smrg                           .image = radv_image_to_handle(surf->image),
12637ec681f3Smrg                           .viewType = view_type,
12647ec681f3Smrg                           .format = format,
12657ec681f3Smrg                           .subresourceRange = {.aspectMask = aspects,
12667ec681f3Smrg                                                .baseMipLevel = surf->level,
12677ec681f3Smrg                                                .levelCount = 1,
12687ec681f3Smrg                                                .baseArrayLayer = surf->layer,
12697ec681f3Smrg                                                .layerCount = 1},
12707ec681f3Smrg                        },
12717ec681f3Smrg                        &(struct radv_image_view_extra_create_info){
12727ec681f3Smrg                           .disable_compression = surf->disable_compression,
12737ec681f3Smrg                        });
127401e04c3fSmrg}
127501e04c3fSmrg
127601e04c3fSmrgstatic void
12777ec681f3Smrgcreate_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset,
12787ec681f3Smrg             VkFormat format, struct radv_buffer_view *bview)
127901e04c3fSmrg{
12807ec681f3Smrg   radv_buffer_view_init(bview, cmd_buffer->device,
12817ec681f3Smrg                         &(VkBufferViewCreateInfo){
12827ec681f3Smrg                            .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
12837ec681f3Smrg                            .flags = 0,
12847ec681f3Smrg                            .buffer = radv_buffer_to_handle(buffer),
12857ec681f3Smrg                            .format = format,
12867ec681f3Smrg                            .offset = offset,
12877ec681f3Smrg                            .range = VK_WHOLE_SIZE,
12887ec681f3Smrg                         });
128901e04c3fSmrg}
129001e04c3fSmrg
129101e04c3fSmrgstatic void
12927ec681f3Smrgcreate_buffer_from_image(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
12937ec681f3Smrg                         VkBufferUsageFlagBits usage, VkBuffer *buffer)
129401e04c3fSmrg{
12957ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
12967ec681f3Smrg   struct radv_device_memory mem;
12977ec681f3Smrg
12987ec681f3Smrg   radv_device_memory_init(&mem, device, surf->image->bo);
12997ec681f3Smrg
13007ec681f3Smrg   radv_CreateBuffer(radv_device_to_handle(device),
13017ec681f3Smrg                     &(VkBufferCreateInfo){
13027ec681f3Smrg                        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
13037ec681f3Smrg                        .flags = 0,
13047ec681f3Smrg                        .size = surf->image->size,
13057ec681f3Smrg                        .usage = usage,
13067ec681f3Smrg                        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
13077ec681f3Smrg                     },
13087ec681f3Smrg                     NULL, buffer);
13097ec681f3Smrg
13107ec681f3Smrg   radv_BindBufferMemory2(radv_device_to_handle(device), 1,
13117ec681f3Smrg                          (VkBindBufferMemoryInfo[]){{
13127ec681f3Smrg                             .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
13137ec681f3Smrg                             .buffer = *buffer,
13147ec681f3Smrg                             .memory = radv_device_memory_to_handle(&mem),
13157ec681f3Smrg                             .memoryOffset = surf->image->offset,
13167ec681f3Smrg                          }});
13177ec681f3Smrg
13187ec681f3Smrg   radv_device_memory_finish(&mem);
131901e04c3fSmrg}
132001e04c3fSmrg
132101e04c3fSmrgstatic void
13227ec681f3Smrgcreate_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
13237ec681f3Smrg                           unsigned offset, VkFormat src_format, struct radv_buffer_view *bview)
132401e04c3fSmrg{
13257ec681f3Smrg   VkFormat format;
13267ec681f3Smrg
13277ec681f3Smrg   switch (src_format) {
13287ec681f3Smrg   case VK_FORMAT_R32G32B32_UINT:
13297ec681f3Smrg      format = VK_FORMAT_R32_UINT;
13307ec681f3Smrg      break;
13317ec681f3Smrg   case VK_FORMAT_R32G32B32_SINT:
13327ec681f3Smrg      format = VK_FORMAT_R32_SINT;
13337ec681f3Smrg      break;
13347ec681f3Smrg   case VK_FORMAT_R32G32B32_SFLOAT:
13357ec681f3Smrg      format = VK_FORMAT_R32_SFLOAT;
13367ec681f3Smrg      break;
13377ec681f3Smrg   default:
13387ec681f3Smrg      unreachable("invalid R32G32B32 format");
13397ec681f3Smrg   }
13407ec681f3Smrg
13417ec681f3Smrg   radv_buffer_view_init(bview, cmd_buffer->device,
13427ec681f3Smrg                         &(VkBufferViewCreateInfo){
13437ec681f3Smrg                            .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
13447ec681f3Smrg                            .flags = 0,
13457ec681f3Smrg                            .buffer = radv_buffer_to_handle(buffer),
13467ec681f3Smrg                            .format = format,
13477ec681f3Smrg                            .offset = offset,
13487ec681f3Smrg                            .range = VK_WHOLE_SIZE,
13497ec681f3Smrg                         });
135001e04c3fSmrg}
135101e04c3fSmrg
135201e04c3fSmrgstatic unsigned
135301e04c3fSmrgget_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
13547ec681f3Smrg                               struct radv_meta_blit2d_surf *surf)
135501e04c3fSmrg{
13567ec681f3Smrg   unsigned stride;
135701e04c3fSmrg
13587ec681f3Smrg   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
13597ec681f3Smrg      stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
13607ec681f3Smrg   } else {
13617ec681f3Smrg      stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
13627ec681f3Smrg   }
136301e04c3fSmrg
13647ec681f3Smrg   return stride;
136501e04c3fSmrg}
136601e04c3fSmrg
136701e04c3fSmrgstatic void
13687ec681f3Smrgitob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src,
13697ec681f3Smrg                      struct radv_buffer_view *dst)
137001e04c3fSmrg{
13717ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
13727ec681f3Smrg
13737ec681f3Smrg   radv_meta_push_descriptor_set(
13747ec681f3Smrg      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itob.img_p_layout, 0, /* set */
13757ec681f3Smrg      2, /* descriptorWriteCount */
13767ec681f3Smrg      (VkWriteDescriptorSet[]){
13777ec681f3Smrg         {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
13787ec681f3Smrg          .dstBinding = 0,
13797ec681f3Smrg          .dstArrayElement = 0,
13807ec681f3Smrg          .descriptorCount = 1,
13817ec681f3Smrg          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
13827ec681f3Smrg          .pImageInfo =
13837ec681f3Smrg             (VkDescriptorImageInfo[]){
13847ec681f3Smrg                {
13857ec681f3Smrg                   .sampler = VK_NULL_HANDLE,
13867ec681f3Smrg                   .imageView = radv_image_view_to_handle(src),
13877ec681f3Smrg                   .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
13887ec681f3Smrg                },
13897ec681f3Smrg             }},
13907ec681f3Smrg         {
13917ec681f3Smrg            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
13927ec681f3Smrg            .dstBinding = 1,
13937ec681f3Smrg            .dstArrayElement = 0,
13947ec681f3Smrg            .descriptorCount = 1,
13957ec681f3Smrg            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
13967ec681f3Smrg            .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
13977ec681f3Smrg         }});
139801e04c3fSmrg}
139901e04c3fSmrg
140001e04c3fSmrgvoid
14017ec681f3Smrgradv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
14027ec681f3Smrg                          struct radv_meta_blit2d_buffer *dst, unsigned num_rects,
14037ec681f3Smrg                          struct radv_meta_blit2d_rect *rects)
140401e04c3fSmrg{
14057ec681f3Smrg   VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
14067ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
14077ec681f3Smrg   struct radv_image_view src_view;
14087ec681f3Smrg   struct radv_buffer_view dst_view;
14097ec681f3Smrg
14107ec681f3Smrg   create_iview(cmd_buffer, src, &src_view, VK_FORMAT_UNDEFINED, src->aspect_mask);
14117ec681f3Smrg   create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
14127ec681f3Smrg   itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
14137ec681f3Smrg
14147ec681f3Smrg   if (device->physical_device->rad_info.chip_class >= GFX9 && src->image->type == VK_IMAGE_TYPE_3D)
14157ec681f3Smrg      pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
14167ec681f3Smrg
14177ec681f3Smrg   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
14187ec681f3Smrg                        pipeline);
14197ec681f3Smrg
14207ec681f3Smrg   for (unsigned r = 0; r < num_rects; ++r) {
14217ec681f3Smrg      unsigned push_constants[4] = {rects[r].src_x, rects[r].src_y, src->layer, dst->pitch};
14227ec681f3Smrg      radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
14237ec681f3Smrg                            device->meta_state.itob.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
14247ec681f3Smrg                            16, push_constants);
14257ec681f3Smrg
14267ec681f3Smrg      radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
14277ec681f3Smrg   }
14287ec681f3Smrg
14297ec681f3Smrg   radv_image_view_finish(&src_view);
14307ec681f3Smrg   radv_buffer_view_finish(&dst_view);
143101e04c3fSmrg}
143201e04c3fSmrg
143301e04c3fSmrgstatic void
14347ec681f3Smrgbtoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
14357ec681f3Smrg                                struct radv_buffer_view *dst)
143601e04c3fSmrg{
14377ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
14387ec681f3Smrg
14397ec681f3Smrg   radv_meta_push_descriptor_set(
14407ec681f3Smrg      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi_r32g32b32.img_p_layout,
14417ec681f3Smrg      0, /* set */
14427ec681f3Smrg      2, /* descriptorWriteCount */
14437ec681f3Smrg      (VkWriteDescriptorSet[]){
14447ec681f3Smrg         {
14457ec681f3Smrg            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
14467ec681f3Smrg            .dstBinding = 0,
14477ec681f3Smrg            .dstArrayElement = 0,
14487ec681f3Smrg            .descriptorCount = 1,
14497ec681f3Smrg            .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
14507ec681f3Smrg            .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
14517ec681f3Smrg         },
14527ec681f3Smrg         {
14537ec681f3Smrg            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
14547ec681f3Smrg            .dstBinding = 1,
14557ec681f3Smrg            .dstArrayElement = 0,
14567ec681f3Smrg            .descriptorCount = 1,
14577ec681f3Smrg            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
14587ec681f3Smrg            .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
14597ec681f3Smrg         }});
146001e04c3fSmrg}
146101e04c3fSmrg
146201e04c3fSmrgstatic void
146301e04c3fSmrgradv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
14647ec681f3Smrg                                       struct radv_meta_blit2d_buffer *src,
14657ec681f3Smrg                                       struct radv_meta_blit2d_surf *dst, unsigned num_rects,
14667ec681f3Smrg                                       struct radv_meta_blit2d_rect *rects)
146701e04c3fSmrg{
14687ec681f3Smrg   VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
14697ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
14707ec681f3Smrg   struct radv_buffer_view src_view, dst_view;
14717ec681f3Smrg   unsigned dst_offset = 0;
14727ec681f3Smrg   unsigned stride;
14737ec681f3Smrg   VkBuffer buffer;
14747ec681f3Smrg
14757ec681f3Smrg   /* This special btoi path for R32G32B32 formats will write the linear
14767ec681f3Smrg    * image as a buffer with the same underlying memory. The compute
14777ec681f3Smrg    * shader will copy all components separately using a R32 format.
14787ec681f3Smrg    */
14797ec681f3Smrg   create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer);
14807ec681f3Smrg
14817ec681f3Smrg   create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
14827ec681f3Smrg   create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, dst->format,
14837ec681f3Smrg                              &dst_view);
14847ec681f3Smrg   btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
14857ec681f3Smrg
14867ec681f3Smrg   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
14877ec681f3Smrg                        pipeline);
14887ec681f3Smrg
14897ec681f3Smrg   stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
14907ec681f3Smrg
14917ec681f3Smrg   for (unsigned r = 0; r < num_rects; ++r) {
14927ec681f3Smrg      unsigned push_constants[4] = {
14937ec681f3Smrg         rects[r].dst_x,
14947ec681f3Smrg         rects[r].dst_y,
14957ec681f3Smrg         stride,
14967ec681f3Smrg         src->pitch,
14977ec681f3Smrg      };
14987ec681f3Smrg
14997ec681f3Smrg      radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
15007ec681f3Smrg                            device->meta_state.btoi_r32g32b32.img_p_layout,
15017ec681f3Smrg                            VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
15027ec681f3Smrg
15037ec681f3Smrg      radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
15047ec681f3Smrg   }
15057ec681f3Smrg
15067ec681f3Smrg   radv_buffer_view_finish(&src_view);
15077ec681f3Smrg   radv_buffer_view_finish(&dst_view);
15087ec681f3Smrg   radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
150901e04c3fSmrg}
151001e04c3fSmrg
151101e04c3fSmrgstatic void
15127ec681f3Smrgbtoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
15137ec681f3Smrg                      struct radv_image_view *dst)
151401e04c3fSmrg{
15157ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
15167ec681f3Smrg
15177ec681f3Smrg   radv_meta_push_descriptor_set(
15187ec681f3Smrg      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi.img_p_layout, 0, /* set */
15197ec681f3Smrg      2, /* descriptorWriteCount */
15207ec681f3Smrg      (VkWriteDescriptorSet[]){
15217ec681f3Smrg         {
15227ec681f3Smrg            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
15237ec681f3Smrg            .dstBinding = 0,
15247ec681f3Smrg            .dstArrayElement = 0,
15257ec681f3Smrg            .descriptorCount = 1,
15267ec681f3Smrg            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
15277ec681f3Smrg            .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
15287ec681f3Smrg         },
15297ec681f3Smrg         {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
15307ec681f3Smrg          .dstBinding = 1,
15317ec681f3Smrg          .dstArrayElement = 0,
15327ec681f3Smrg          .descriptorCount = 1,
15337ec681f3Smrg          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
15347ec681f3Smrg          .pImageInfo = (VkDescriptorImageInfo[]){
15357ec681f3Smrg             {
15367ec681f3Smrg                .sampler = VK_NULL_HANDLE,
15377ec681f3Smrg                .imageView = radv_image_view_to_handle(dst),
15387ec681f3Smrg                .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
15397ec681f3Smrg             },
15407ec681f3Smrg          }}});
154101e04c3fSmrg}
154201e04c3fSmrg
154301e04c3fSmrgvoid
154401e04c3fSmrgradv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
15457ec681f3Smrg                             struct radv_meta_blit2d_buffer *src, struct radv_meta_blit2d_surf *dst,
15467ec681f3Smrg                             unsigned num_rects, struct radv_meta_blit2d_rect *rects)
154701e04c3fSmrg{
15487ec681f3Smrg   VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
15497ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
15507ec681f3Smrg   struct radv_buffer_view src_view;
15517ec681f3Smrg   struct radv_image_view dst_view;
15527ec681f3Smrg
15537ec681f3Smrg   if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
15547ec681f3Smrg       dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
15557ec681f3Smrg       dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
15567ec681f3Smrg      radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects);
15577ec681f3Smrg      return;
15587ec681f3Smrg   }
15597ec681f3Smrg
15607ec681f3Smrg   create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
15617ec681f3Smrg   create_iview(cmd_buffer, dst, &dst_view, VK_FORMAT_UNDEFINED, dst->aspect_mask);
15627ec681f3Smrg   btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
15637ec681f3Smrg
15647ec681f3Smrg   if (device->physical_device->rad_info.chip_class >= GFX9 && dst->image->type == VK_IMAGE_TYPE_3D)
15657ec681f3Smrg      pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
15667ec681f3Smrg   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
15677ec681f3Smrg                        pipeline);
15687ec681f3Smrg
15697ec681f3Smrg   for (unsigned r = 0; r < num_rects; ++r) {
15707ec681f3Smrg      unsigned push_constants[4] = {
15717ec681f3Smrg         rects[r].dst_x,
15727ec681f3Smrg         rects[r].dst_y,
15737ec681f3Smrg         dst->layer,
15747ec681f3Smrg         src->pitch,
15757ec681f3Smrg      };
15767ec681f3Smrg      radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
15777ec681f3Smrg                            device->meta_state.btoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
15787ec681f3Smrg                            16, push_constants);
15797ec681f3Smrg
15807ec681f3Smrg      radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
15817ec681f3Smrg   }
15827ec681f3Smrg
15837ec681f3Smrg   radv_image_view_finish(&dst_view);
15847ec681f3Smrg   radv_buffer_view_finish(&src_view);
158501e04c3fSmrg}
158601e04c3fSmrg
158701e04c3fSmrgstatic void
15887ec681f3Smrgitoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
15897ec681f3Smrg                                struct radv_buffer_view *dst)
159001e04c3fSmrg{
15917ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
15927ec681f3Smrg
15937ec681f3Smrg   radv_meta_push_descriptor_set(
15947ec681f3Smrg      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi_r32g32b32.img_p_layout,
15957ec681f3Smrg      0, /* set */
15967ec681f3Smrg      2, /* descriptorWriteCount */
15977ec681f3Smrg      (VkWriteDescriptorSet[]){
15987ec681f3Smrg         {
15997ec681f3Smrg            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
16007ec681f3Smrg            .dstBinding = 0,
16017ec681f3Smrg            .dstArrayElement = 0,
16027ec681f3Smrg            .descriptorCount = 1,
16037ec681f3Smrg            .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
16047ec681f3Smrg            .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
16057ec681f3Smrg         },
16067ec681f3Smrg         {
16077ec681f3Smrg            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
16087ec681f3Smrg            .dstBinding = 1,
16097ec681f3Smrg            .dstArrayElement = 0,
16107ec681f3Smrg            .descriptorCount = 1,
16117ec681f3Smrg            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
16127ec681f3Smrg            .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
16137ec681f3Smrg         }});
161401e04c3fSmrg}
161501e04c3fSmrg
161601e04c3fSmrgstatic void
161701e04c3fSmrgradv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
16187ec681f3Smrg                                      struct radv_meta_blit2d_surf *src,
16197ec681f3Smrg                                      struct radv_meta_blit2d_surf *dst, unsigned num_rects,
16207ec681f3Smrg                                      struct radv_meta_blit2d_rect *rects)
162101e04c3fSmrg{
16227ec681f3Smrg   VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline;
16237ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
16247ec681f3Smrg   struct radv_buffer_view src_view, dst_view;
16257ec681f3Smrg   unsigned src_offset = 0, dst_offset = 0;
16267ec681f3Smrg   unsigned src_stride, dst_stride;
16277ec681f3Smrg   VkBuffer src_buffer, dst_buffer;
16287ec681f3Smrg
16297ec681f3Smrg   /* 96-bit formats are only compatible to themselves. */
16307ec681f3Smrg   assert(dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
16317ec681f3Smrg          dst->format == VK_FORMAT_R32G32B32_SFLOAT);
16327ec681f3Smrg
16337ec681f3Smrg   /* This special itoi path for R32G32B32 formats will write the linear
16347ec681f3Smrg    * image as a buffer with the same underlying memory. The compute
16357ec681f3Smrg    * shader will copy all components separately using a R32 format.
16367ec681f3Smrg    */
16377ec681f3Smrg   create_buffer_from_image(cmd_buffer, src, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, &src_buffer);
16387ec681f3Smrg   create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &dst_buffer);
16397ec681f3Smrg
16407ec681f3Smrg   create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), src_offset,
16417ec681f3Smrg                              src->format, &src_view);
16427ec681f3Smrg   create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), dst_offset,
16437ec681f3Smrg                              dst->format, &dst_view);
16447ec681f3Smrg   itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
16457ec681f3Smrg
16467ec681f3Smrg   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
16477ec681f3Smrg                        pipeline);
16487ec681f3Smrg
16497ec681f3Smrg   src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
16507ec681f3Smrg   dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
16517ec681f3Smrg
16527ec681f3Smrg   for (unsigned r = 0; r < num_rects; ++r) {
16537ec681f3Smrg      unsigned push_constants[6] = {
16547ec681f3Smrg         rects[r].src_x, rects[r].src_y, src_stride, rects[r].dst_x, rects[r].dst_y, dst_stride,
16557ec681f3Smrg      };
16567ec681f3Smrg      radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
16577ec681f3Smrg                            device->meta_state.itoi_r32g32b32.img_p_layout,
16587ec681f3Smrg                            VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants);
16597ec681f3Smrg
16607ec681f3Smrg      radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
16617ec681f3Smrg   }
16627ec681f3Smrg
16637ec681f3Smrg   radv_buffer_view_finish(&src_view);
16647ec681f3Smrg   radv_buffer_view_finish(&dst_view);
16657ec681f3Smrg   radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
16667ec681f3Smrg   radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
166701e04c3fSmrg}
166801e04c3fSmrg
166901e04c3fSmrgstatic void
16707ec681f3Smrgitoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src,
16717ec681f3Smrg                      struct radv_image_view *dst)
167201e04c3fSmrg{
16737ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
16747ec681f3Smrg
16757ec681f3Smrg   radv_meta_push_descriptor_set(
16767ec681f3Smrg      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi.img_p_layout, 0, /* set */
16777ec681f3Smrg      2, /* descriptorWriteCount */
16787ec681f3Smrg      (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
16797ec681f3Smrg                                .dstBinding = 0,
16807ec681f3Smrg                                .dstArrayElement = 0,
16817ec681f3Smrg                                .descriptorCount = 1,
16827ec681f3Smrg                                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
16837ec681f3Smrg                                .pImageInfo =
16847ec681f3Smrg                                   (VkDescriptorImageInfo[]){
16857ec681f3Smrg                                      {
16867ec681f3Smrg                                         .sampler = VK_NULL_HANDLE,
16877ec681f3Smrg                                         .imageView = radv_image_view_to_handle(src),
16887ec681f3Smrg                                         .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
16897ec681f3Smrg                                      },
16907ec681f3Smrg                                   }},
16917ec681f3Smrg                               {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
16927ec681f3Smrg                                .dstBinding = 1,
16937ec681f3Smrg                                .dstArrayElement = 0,
16947ec681f3Smrg                                .descriptorCount = 1,
16957ec681f3Smrg                                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
16967ec681f3Smrg                                .pImageInfo = (VkDescriptorImageInfo[]){
16977ec681f3Smrg                                   {
16987ec681f3Smrg                                      .sampler = VK_NULL_HANDLE,
16997ec681f3Smrg                                      .imageView = radv_image_view_to_handle(dst),
17007ec681f3Smrg                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
17017ec681f3Smrg                                   },
17027ec681f3Smrg                                }}});
170301e04c3fSmrg}
170401e04c3fSmrg
170501e04c3fSmrgvoid
17067ec681f3Smrgradv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
17077ec681f3Smrg                            struct radv_meta_blit2d_surf *dst, unsigned num_rects,
17087ec681f3Smrg                            struct radv_meta_blit2d_rect *rects)
170901e04c3fSmrg{
17107ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
17117ec681f3Smrg   struct radv_image_view src_view, dst_view;
17127ec681f3Smrg   uint32_t samples = src->image->info.samples;
17137ec681f3Smrg   uint32_t samples_log2 = ffs(samples) - 1;
17147ec681f3Smrg
17157ec681f3Smrg   if (src->format == VK_FORMAT_R32G32B32_UINT || src->format == VK_FORMAT_R32G32B32_SINT ||
17167ec681f3Smrg       src->format == VK_FORMAT_R32G32B32_SFLOAT) {
17177ec681f3Smrg      radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects);
17187ec681f3Smrg      return;
17197ec681f3Smrg   }
17207ec681f3Smrg
17217ec681f3Smrg   u_foreach_bit(i, dst->aspect_mask) {
17227ec681f3Smrg      unsigned aspect_mask = 1u << i;
17237ec681f3Smrg      VkFormat depth_format = 0;
17247ec681f3Smrg      if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
17257ec681f3Smrg         depth_format = vk_format_stencil_only(dst->image->vk_format);
17267ec681f3Smrg      else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
17277ec681f3Smrg         depth_format = vk_format_depth_only(dst->image->vk_format);
17287ec681f3Smrg
17297ec681f3Smrg      create_iview(cmd_buffer, src, &src_view, depth_format, aspect_mask);
17307ec681f3Smrg      create_iview(cmd_buffer, dst, &dst_view, depth_format, aspect_mask);
17317ec681f3Smrg
17327ec681f3Smrg      itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
17337ec681f3Smrg
17347ec681f3Smrg      VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline[samples_log2];
17357ec681f3Smrg      if (device->physical_device->rad_info.chip_class >= GFX9 &&
17367ec681f3Smrg          (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D))
17377ec681f3Smrg         pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
17387ec681f3Smrg      radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
17397ec681f3Smrg                           pipeline);
17407ec681f3Smrg
17417ec681f3Smrg      for (unsigned r = 0; r < num_rects; ++r) {
17427ec681f3Smrg         unsigned push_constants[6] = {
17437ec681f3Smrg            rects[r].src_x, rects[r].src_y, src->layer, rects[r].dst_x, rects[r].dst_y, dst->layer,
17447ec681f3Smrg         };
17457ec681f3Smrg         radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
17467ec681f3Smrg                               device->meta_state.itoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
17477ec681f3Smrg                               24, push_constants);
17487ec681f3Smrg
17497ec681f3Smrg         radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
17507ec681f3Smrg      }
17517ec681f3Smrg
17527ec681f3Smrg      radv_image_view_finish(&src_view);
17537ec681f3Smrg      radv_image_view_finish(&dst_view);
17547ec681f3Smrg   }
175501e04c3fSmrg}
175601e04c3fSmrg
175701e04c3fSmrgstatic void
17587ec681f3Smrgcleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *view)
175901e04c3fSmrg{
17607ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
17617ec681f3Smrg
17627ec681f3Smrg   radv_meta_push_descriptor_set(
17637ec681f3Smrg      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.cleari_r32g32b32.img_p_layout,
17647ec681f3Smrg      0, /* set */
17657ec681f3Smrg      1, /* descriptorWriteCount */
17667ec681f3Smrg      (VkWriteDescriptorSet[]){{
17677ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
17687ec681f3Smrg         .dstBinding = 0,
17697ec681f3Smrg         .dstArrayElement = 0,
17707ec681f3Smrg         .descriptorCount = 1,
17717ec681f3Smrg         .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
17727ec681f3Smrg         .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(view)},
17737ec681f3Smrg      }});
177401e04c3fSmrg}
177501e04c3fSmrg
177601e04c3fSmrgstatic void
177701e04c3fSmrgradv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
17787ec681f3Smrg                                   struct radv_meta_blit2d_surf *dst,
17797ec681f3Smrg                                   const VkClearColorValue *clear_color)
178001e04c3fSmrg{
17817ec681f3Smrg   VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
17827ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
17837ec681f3Smrg   struct radv_buffer_view dst_view;
17847ec681f3Smrg   unsigned stride;
17857ec681f3Smrg   VkBuffer buffer;
17867ec681f3Smrg
17877ec681f3Smrg   /* This special clear path for R32G32B32 formats will write the linear
17887ec681f3Smrg    * image as a buffer with the same underlying memory. The compute
17897ec681f3Smrg    * shader will clear all components separately using a R32 format.
17907ec681f3Smrg    */
17917ec681f3Smrg   create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer);
17927ec681f3Smrg
17937ec681f3Smrg   create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 0, dst->format,
17947ec681f3Smrg                              &dst_view);
17957ec681f3Smrg   cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
17967ec681f3Smrg
17977ec681f3Smrg   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
17987ec681f3Smrg                        pipeline);
17997ec681f3Smrg
18007ec681f3Smrg   stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
18017ec681f3Smrg
18027ec681f3Smrg   unsigned push_constants[4] = {
18037ec681f3Smrg      clear_color->uint32[0],
18047ec681f3Smrg      clear_color->uint32[1],
18057ec681f3Smrg      clear_color->uint32[2],
18067ec681f3Smrg      stride,
18077ec681f3Smrg   };
18087ec681f3Smrg
18097ec681f3Smrg   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
18107ec681f3Smrg                         device->meta_state.cleari_r32g32b32.img_p_layout,
18117ec681f3Smrg                         VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
18127ec681f3Smrg
18137ec681f3Smrg   radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
18147ec681f3Smrg
18157ec681f3Smrg   radv_buffer_view_finish(&dst_view);
18167ec681f3Smrg   radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
181701e04c3fSmrg}
181801e04c3fSmrg
181901e04c3fSmrgstatic void
18207ec681f3Smrgcleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *dst_iview)
182101e04c3fSmrg{
18227ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
18237ec681f3Smrg
18247ec681f3Smrg   radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
18257ec681f3Smrg                                 device->meta_state.cleari.img_p_layout, 0, /* set */
18267ec681f3Smrg                                 1, /* descriptorWriteCount */
18277ec681f3Smrg                                 (VkWriteDescriptorSet[]){
18287ec681f3Smrg                                    {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
18297ec681f3Smrg                                     .dstBinding = 0,
18307ec681f3Smrg                                     .dstArrayElement = 0,
18317ec681f3Smrg                                     .descriptorCount = 1,
18327ec681f3Smrg                                     .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
18337ec681f3Smrg                                     .pImageInfo =
18347ec681f3Smrg                                        (VkDescriptorImageInfo[]){
18357ec681f3Smrg                                           {
18367ec681f3Smrg                                              .sampler = VK_NULL_HANDLE,
18377ec681f3Smrg                                              .imageView = radv_image_view_to_handle(dst_iview),
18387ec681f3Smrg                                              .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
18397ec681f3Smrg                                           },
18407ec681f3Smrg                                        }},
18417ec681f3Smrg                                 });
184201e04c3fSmrg}
184301e04c3fSmrg
184401e04c3fSmrgvoid
18457ec681f3Smrgradv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
18467ec681f3Smrg                         const VkClearColorValue *clear_color)
184701e04c3fSmrg{
18487ec681f3Smrg   struct radv_device *device = cmd_buffer->device;
18497ec681f3Smrg   struct radv_image_view dst_iview;
18507ec681f3Smrg   uint32_t samples = dst->image->info.samples;
18517ec681f3Smrg   uint32_t samples_log2 = ffs(samples) - 1;
18527ec681f3Smrg
18537ec681f3Smrg   if (dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
18547ec681f3Smrg       dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
18557ec681f3Smrg      radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
18567ec681f3Smrg      return;
18577ec681f3Smrg   }
18587ec681f3Smrg
18597ec681f3Smrg   create_iview(cmd_buffer, dst, &dst_iview, VK_FORMAT_UNDEFINED, dst->aspect_mask);
18607ec681f3Smrg   cleari_bind_descriptors(cmd_buffer, &dst_iview);
18617ec681f3Smrg
18627ec681f3Smrg   VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline[samples_log2];
18637ec681f3Smrg   if (device->physical_device->rad_info.chip_class >= GFX9 && dst->image->type == VK_IMAGE_TYPE_3D)
18647ec681f3Smrg      pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
18657ec681f3Smrg
18667ec681f3Smrg   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
18677ec681f3Smrg                        pipeline);
18687ec681f3Smrg
18697ec681f3Smrg   unsigned push_constants[5] = {
18707ec681f3Smrg      clear_color->uint32[0],
18717ec681f3Smrg      clear_color->uint32[1],
18727ec681f3Smrg      clear_color->uint32[2],
18737ec681f3Smrg      clear_color->uint32[3],
18747ec681f3Smrg      dst->layer,
18757ec681f3Smrg   };
18767ec681f3Smrg
18777ec681f3Smrg   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
18787ec681f3Smrg                         device->meta_state.cleari.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
18797ec681f3Smrg                         push_constants);
18807ec681f3Smrg
18817ec681f3Smrg   radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
18827ec681f3Smrg
18837ec681f3Smrg   radv_image_view_finish(&dst_iview);
188401e04c3fSmrg}
1885