17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2019 Raspberry Pi
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "v3dv_private.h"
257ec681f3Smrg#include "v3dv_meta_common.h"
267ec681f3Smrg
277ec681f3Smrg#include "compiler/nir/nir_builder.h"
287ec681f3Smrg#include "vk_format_info.h"
297ec681f3Smrg#include "util/u_pack_color.h"
307ec681f3Smrg#include "vulkan/util/vk_common_entrypoints.h"
317ec681f3Smrg
327ec681f3Smrgstatic uint32_t
337ec681f3Smrgmeta_blit_key_hash(const void *key)
347ec681f3Smrg{
357ec681f3Smrg   return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);
367ec681f3Smrg}
377ec681f3Smrg
387ec681f3Smrgstatic bool
397ec681f3Smrgmeta_blit_key_compare(const void *key1, const void *key2)
407ec681f3Smrg{
417ec681f3Smrg   return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;
427ec681f3Smrg}
437ec681f3Smrg
447ec681f3Smrgstatic bool
457ec681f3Smrgcreate_blit_pipeline_layout(struct v3dv_device *device,
467ec681f3Smrg                            VkDescriptorSetLayout *descriptor_set_layout,
477ec681f3Smrg                            VkPipelineLayout *pipeline_layout)
487ec681f3Smrg{
497ec681f3Smrg   VkResult result;
507ec681f3Smrg
517ec681f3Smrg   if (*descriptor_set_layout == 0) {
527ec681f3Smrg      VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
537ec681f3Smrg         .binding = 0,
547ec681f3Smrg         .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
557ec681f3Smrg         .descriptorCount = 1,
567ec681f3Smrg         .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
577ec681f3Smrg      };
587ec681f3Smrg      VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
597ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
607ec681f3Smrg         .bindingCount = 1,
617ec681f3Smrg         .pBindings = &descriptor_set_layout_binding,
627ec681f3Smrg      };
637ec681f3Smrg      result =
647ec681f3Smrg         v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
657ec681f3Smrg                                        &descriptor_set_layout_info,
667ec681f3Smrg                                        &device->vk.alloc,
677ec681f3Smrg                                        descriptor_set_layout);
687ec681f3Smrg      if (result != VK_SUCCESS)
697ec681f3Smrg         return false;
707ec681f3Smrg   }
717ec681f3Smrg
727ec681f3Smrg   assert(*pipeline_layout == 0);
737ec681f3Smrg   VkPipelineLayoutCreateInfo pipeline_layout_info = {
747ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
757ec681f3Smrg      .setLayoutCount = 1,
767ec681f3Smrg      .pSetLayouts = descriptor_set_layout,
777ec681f3Smrg      .pushConstantRangeCount = 1,
787ec681f3Smrg      .pPushConstantRanges =
797ec681f3Smrg         &(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },
807ec681f3Smrg   };
817ec681f3Smrg
827ec681f3Smrg   result =
837ec681f3Smrg      v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
847ec681f3Smrg                                &pipeline_layout_info,
857ec681f3Smrg                                &device->vk.alloc,
867ec681f3Smrg                                pipeline_layout);
877ec681f3Smrg   return result == VK_SUCCESS;
887ec681f3Smrg}
897ec681f3Smrg
907ec681f3Smrgvoid
917ec681f3Smrgv3dv_meta_blit_init(struct v3dv_device *device)
927ec681f3Smrg{
937ec681f3Smrg   for (uint32_t i = 0; i < 3; i++) {
947ec681f3Smrg      device->meta.blit.cache[i] =
957ec681f3Smrg         _mesa_hash_table_create(NULL,
967ec681f3Smrg                                 meta_blit_key_hash,
977ec681f3Smrg                                 meta_blit_key_compare);
987ec681f3Smrg   }
997ec681f3Smrg
1007ec681f3Smrg   create_blit_pipeline_layout(device,
1017ec681f3Smrg                               &device->meta.blit.ds_layout,
1027ec681f3Smrg                               &device->meta.blit.p_layout);
1037ec681f3Smrg}
1047ec681f3Smrg
1057ec681f3Smrgvoid
1067ec681f3Smrgv3dv_meta_blit_finish(struct v3dv_device *device)
1077ec681f3Smrg{
1087ec681f3Smrg   VkDevice _device = v3dv_device_to_handle(device);
1097ec681f3Smrg
1107ec681f3Smrg   for (uint32_t i = 0; i < 3; i++) {
1117ec681f3Smrg      hash_table_foreach(device->meta.blit.cache[i], entry) {
1127ec681f3Smrg         struct v3dv_meta_blit_pipeline *item = entry->data;
1137ec681f3Smrg         v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
1147ec681f3Smrg         v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
1157ec681f3Smrg         v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
1167ec681f3Smrg         vk_free(&device->vk.alloc, item);
1177ec681f3Smrg      }
1187ec681f3Smrg      _mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);
1197ec681f3Smrg   }
1207ec681f3Smrg
1217ec681f3Smrg   if (device->meta.blit.p_layout) {
1227ec681f3Smrg      v3dv_DestroyPipelineLayout(_device, device->meta.blit.p_layout,
1237ec681f3Smrg                                 &device->vk.alloc);
1247ec681f3Smrg   }
1257ec681f3Smrg
1267ec681f3Smrg   if (device->meta.blit.ds_layout) {
1277ec681f3Smrg      v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.ds_layout,
1287ec681f3Smrg                                      &device->vk.alloc);
1297ec681f3Smrg   }
1307ec681f3Smrg}
1317ec681f3Smrg
1327ec681f3Smrgstatic uint32_t
1337ec681f3Smrgmeta_texel_buffer_copy_key_hash(const void *key)
1347ec681f3Smrg{
1357ec681f3Smrg   return _mesa_hash_data(key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
1367ec681f3Smrg}
1377ec681f3Smrg
1387ec681f3Smrgstatic bool
1397ec681f3Smrgmeta_texel_buffer_copy_key_compare(const void *key1, const void *key2)
1407ec681f3Smrg{
1417ec681f3Smrg   return memcmp(key1, key2, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE) == 0;
1427ec681f3Smrg}
1437ec681f3Smrg
1447ec681f3Smrgstatic bool
1457ec681f3Smrgcreate_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,
1467ec681f3Smrg                                         VkDescriptorSetLayout *ds_layout,
1477ec681f3Smrg                                         VkPipelineLayout *p_layout)
1487ec681f3Smrg{
1497ec681f3Smrg   VkResult result;
1507ec681f3Smrg
1517ec681f3Smrg   if (*ds_layout == 0) {
1527ec681f3Smrg      VkDescriptorSetLayoutBinding ds_layout_binding = {
1537ec681f3Smrg         .binding = 0,
1547ec681f3Smrg         .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1557ec681f3Smrg         .descriptorCount = 1,
1567ec681f3Smrg         .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
1577ec681f3Smrg      };
1587ec681f3Smrg      VkDescriptorSetLayoutCreateInfo ds_layout_info = {
1597ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1607ec681f3Smrg         .bindingCount = 1,
1617ec681f3Smrg         .pBindings = &ds_layout_binding,
1627ec681f3Smrg      };
1637ec681f3Smrg      result =
1647ec681f3Smrg         v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
1657ec681f3Smrg                                        &ds_layout_info,
1667ec681f3Smrg                                        &device->vk.alloc,
1677ec681f3Smrg                                        ds_layout);
1687ec681f3Smrg      if (result != VK_SUCCESS)
1697ec681f3Smrg         return false;
1707ec681f3Smrg   }
1717ec681f3Smrg
1727ec681f3Smrg   assert(*p_layout == 0);
1737ec681f3Smrg   /* FIXME: this is abusing a bit the API, since not all of our copy
1747ec681f3Smrg    * pipelines have a geometry shader. We could create 2 different pipeline
1757ec681f3Smrg    * layouts, but this works for us for now.
1767ec681f3Smrg    */
1777ec681f3Smrg#define TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET      0
1787ec681f3Smrg#define TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET  16
1797ec681f3Smrg#define TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET  20
1807ec681f3Smrg#define TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET   24
1817ec681f3Smrg   VkPushConstantRange ranges[2] = {
1827ec681f3Smrg      { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 },
1837ec681f3Smrg      { VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 },
1847ec681f3Smrg   };
1857ec681f3Smrg
1867ec681f3Smrg   VkPipelineLayoutCreateInfo p_layout_info = {
1877ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1887ec681f3Smrg      .setLayoutCount = 1,
1897ec681f3Smrg      .pSetLayouts = ds_layout,
1907ec681f3Smrg      .pushConstantRangeCount = 2,
1917ec681f3Smrg      .pPushConstantRanges = ranges,
1927ec681f3Smrg   };
1937ec681f3Smrg
1947ec681f3Smrg   result =
1957ec681f3Smrg      v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
1967ec681f3Smrg                                &p_layout_info,
1977ec681f3Smrg                                &device->vk.alloc,
1987ec681f3Smrg                                p_layout);
1997ec681f3Smrg   return result == VK_SUCCESS;
2007ec681f3Smrg}
2017ec681f3Smrg
2027ec681f3Smrgvoid
2037ec681f3Smrgv3dv_meta_texel_buffer_copy_init(struct v3dv_device *device)
2047ec681f3Smrg{
2057ec681f3Smrg   for (uint32_t i = 0; i < 3; i++) {
2067ec681f3Smrg      device->meta.texel_buffer_copy.cache[i] =
2077ec681f3Smrg         _mesa_hash_table_create(NULL,
2087ec681f3Smrg                                 meta_texel_buffer_copy_key_hash,
2097ec681f3Smrg                                 meta_texel_buffer_copy_key_compare);
2107ec681f3Smrg   }
2117ec681f3Smrg
2127ec681f3Smrg   create_texel_buffer_copy_pipeline_layout(
2137ec681f3Smrg      device,
2147ec681f3Smrg      &device->meta.texel_buffer_copy.ds_layout,
2157ec681f3Smrg      &device->meta.texel_buffer_copy.p_layout);
2167ec681f3Smrg}
2177ec681f3Smrg
2187ec681f3Smrgvoid
2197ec681f3Smrgv3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device)
2207ec681f3Smrg{
2217ec681f3Smrg   VkDevice _device = v3dv_device_to_handle(device);
2227ec681f3Smrg
2237ec681f3Smrg   for (uint32_t i = 0; i < 3; i++) {
2247ec681f3Smrg      hash_table_foreach(device->meta.texel_buffer_copy.cache[i], entry) {
2257ec681f3Smrg         struct v3dv_meta_texel_buffer_copy_pipeline *item = entry->data;
2267ec681f3Smrg         v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
2277ec681f3Smrg         v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
2287ec681f3Smrg         v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
2297ec681f3Smrg         vk_free(&device->vk.alloc, item);
2307ec681f3Smrg      }
2317ec681f3Smrg      _mesa_hash_table_destroy(device->meta.texel_buffer_copy.cache[i], NULL);
2327ec681f3Smrg   }
2337ec681f3Smrg
2347ec681f3Smrg   if (device->meta.texel_buffer_copy.p_layout) {
2357ec681f3Smrg      v3dv_DestroyPipelineLayout(_device, device->meta.texel_buffer_copy.p_layout,
2367ec681f3Smrg                                 &device->vk.alloc);
2377ec681f3Smrg   }
2387ec681f3Smrg
2397ec681f3Smrg   if (device->meta.texel_buffer_copy.ds_layout) {
2407ec681f3Smrg      v3dv_DestroyDescriptorSetLayout(_device, device->meta.texel_buffer_copy.ds_layout,
2417ec681f3Smrg                                      &device->vk.alloc);
2427ec681f3Smrg   }
2437ec681f3Smrg}
2447ec681f3Smrg
2457ec681f3Smrgstatic VkFormat
2467ec681f3Smrgget_compatible_tlb_format(VkFormat format)
2477ec681f3Smrg{
2487ec681f3Smrg   switch (format) {
2497ec681f3Smrg   case VK_FORMAT_R8G8B8A8_SNORM:
2507ec681f3Smrg      return VK_FORMAT_R8G8B8A8_UINT;
2517ec681f3Smrg
2527ec681f3Smrg   case VK_FORMAT_R8G8_SNORM:
2537ec681f3Smrg      return VK_FORMAT_R8G8_UINT;
2547ec681f3Smrg
2557ec681f3Smrg   case VK_FORMAT_R8_SNORM:
2567ec681f3Smrg      return VK_FORMAT_R8_UINT;
2577ec681f3Smrg
2587ec681f3Smrg   case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
2597ec681f3Smrg      return VK_FORMAT_A8B8G8R8_UINT_PACK32;
2607ec681f3Smrg
2617ec681f3Smrg   case VK_FORMAT_R16_UNORM:
2627ec681f3Smrg   case VK_FORMAT_R16_SNORM:
2637ec681f3Smrg      return VK_FORMAT_R16_UINT;
2647ec681f3Smrg
2657ec681f3Smrg   case VK_FORMAT_R16G16_UNORM:
2667ec681f3Smrg   case VK_FORMAT_R16G16_SNORM:
2677ec681f3Smrg      return VK_FORMAT_R16G16_UINT;
2687ec681f3Smrg
2697ec681f3Smrg   case VK_FORMAT_R16G16B16A16_UNORM:
2707ec681f3Smrg   case VK_FORMAT_R16G16B16A16_SNORM:
2717ec681f3Smrg      return VK_FORMAT_R16G16B16A16_UINT;
2727ec681f3Smrg
2737ec681f3Smrg   case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
2747ec681f3Smrg      return VK_FORMAT_R32_SFLOAT;
2757ec681f3Smrg
2767ec681f3Smrg   /* We can't render to compressed formats using the TLB so instead we use
2777ec681f3Smrg    * a compatible format with the same bpp as the compressed format. Because
2787ec681f3Smrg    * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
2797ec681f3Smrg    * case of ETC), when we implement copies with the compatible format we
2807ec681f3Smrg    * will have to divide offsets and dimensions on the compressed image by
2817ec681f3Smrg    * the compressed block size.
2827ec681f3Smrg    */
2837ec681f3Smrg   case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
2847ec681f3Smrg   case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
2857ec681f3Smrg   case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
2867ec681f3Smrg   case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
2877ec681f3Smrg   case VK_FORMAT_BC2_UNORM_BLOCK:
2887ec681f3Smrg   case VK_FORMAT_BC2_SRGB_BLOCK:
2897ec681f3Smrg   case VK_FORMAT_BC3_SRGB_BLOCK:
2907ec681f3Smrg   case VK_FORMAT_BC3_UNORM_BLOCK:
2917ec681f3Smrg   case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
2927ec681f3Smrg   case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
2937ec681f3Smrg   case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
2947ec681f3Smrg   case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
2957ec681f3Smrg   case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
2967ec681f3Smrg   case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
2977ec681f3Smrg   case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
2987ec681f3Smrg   case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
2997ec681f3Smrg   case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
3007ec681f3Smrg   case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
3017ec681f3Smrg   case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
3027ec681f3Smrg   case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
3037ec681f3Smrg   case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
3047ec681f3Smrg   case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
3057ec681f3Smrg   case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
3067ec681f3Smrg   case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
3077ec681f3Smrg   case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
3087ec681f3Smrg   case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
3097ec681f3Smrg   case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
3107ec681f3Smrg   case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
3117ec681f3Smrg   case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
3127ec681f3Smrg   case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
3137ec681f3Smrg   case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
3147ec681f3Smrg   case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
3157ec681f3Smrg   case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
3167ec681f3Smrg   case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
3177ec681f3Smrg   case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
3187ec681f3Smrg   case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
3197ec681f3Smrg      return VK_FORMAT_R32G32B32A32_UINT;
3207ec681f3Smrg
3217ec681f3Smrg   case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
3227ec681f3Smrg   case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
3237ec681f3Smrg   case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
3247ec681f3Smrg   case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
3257ec681f3Smrg   case VK_FORMAT_EAC_R11_UNORM_BLOCK:
3267ec681f3Smrg   case VK_FORMAT_EAC_R11_SNORM_BLOCK:
3277ec681f3Smrg   case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
3287ec681f3Smrg   case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
3297ec681f3Smrg   case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
3307ec681f3Smrg   case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
3317ec681f3Smrg      return VK_FORMAT_R16G16B16A16_UINT;
3327ec681f3Smrg
3337ec681f3Smrg   default:
3347ec681f3Smrg      return VK_FORMAT_UNDEFINED;
3357ec681f3Smrg   }
3367ec681f3Smrg}
3377ec681f3Smrg
3387ec681f3Smrg/**
3397ec681f3Smrg * Checks if we can implement an image copy or clear operation using the TLB
3407ec681f3Smrg * hardware.
3417ec681f3Smrg */
3427ec681f3Smrgbool
3437ec681f3Smrgv3dv_meta_can_use_tlb(struct v3dv_image *image,
3447ec681f3Smrg                      const VkOffset3D *offset,
3457ec681f3Smrg                      VkFormat *compat_format)
3467ec681f3Smrg{
3477ec681f3Smrg   if (offset->x != 0 || offset->y != 0)
3487ec681f3Smrg      return false;
3497ec681f3Smrg
3507ec681f3Smrg   if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
3517ec681f3Smrg      if (compat_format)
3527ec681f3Smrg         *compat_format = image->vk.format;
3537ec681f3Smrg      return true;
3547ec681f3Smrg   }
3557ec681f3Smrg
3567ec681f3Smrg   /* If the image format is not TLB-supported, then check if we can use
3577ec681f3Smrg    * a compatible format instead.
3587ec681f3Smrg    */
3597ec681f3Smrg   if (compat_format) {
3607ec681f3Smrg      *compat_format = get_compatible_tlb_format(image->vk.format);
3617ec681f3Smrg      if (*compat_format != VK_FORMAT_UNDEFINED)
3627ec681f3Smrg         return true;
3637ec681f3Smrg   }
3647ec681f3Smrg
3657ec681f3Smrg   return false;
3667ec681f3Smrg}
3677ec681f3Smrg
3687ec681f3Smrg/* Implements a copy using the TLB.
3697ec681f3Smrg *
3707ec681f3Smrg * This only works if we are copying from offset (0,0), since a TLB store for
3717ec681f3Smrg * tile (x,y) will be written at the same tile offset into the destination.
3727ec681f3Smrg * When this requirement is not met, we need to use a blit instead.
3737ec681f3Smrg *
3747ec681f3Smrg * Returns true if the implementation supports the requested operation (even if
3757ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error).
3767ec681f3Smrg *
3777ec681f3Smrg */
3787ec681f3Smrgstatic bool
3797ec681f3Smrgcopy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
3807ec681f3Smrg                         struct v3dv_buffer *buffer,
3817ec681f3Smrg                         struct v3dv_image *image,
3827ec681f3Smrg                         const VkBufferImageCopy2KHR *region)
3837ec681f3Smrg{
3847ec681f3Smrg   VkFormat fb_format;
3857ec681f3Smrg   if (!v3dv_meta_can_use_tlb(image, &region->imageOffset, &fb_format))
3867ec681f3Smrg      return false;
3877ec681f3Smrg
3887ec681f3Smrg   uint32_t internal_type, internal_bpp;
3897ec681f3Smrg   v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
3907ec681f3Smrg      (fb_format, region->imageSubresource.aspectMask,
3917ec681f3Smrg       &internal_type, &internal_bpp);
3927ec681f3Smrg
3937ec681f3Smrg   uint32_t num_layers;
3947ec681f3Smrg   if (image->vk.image_type != VK_IMAGE_TYPE_3D)
3957ec681f3Smrg      num_layers = region->imageSubresource.layerCount;
3967ec681f3Smrg   else
3977ec681f3Smrg      num_layers = region->imageExtent.depth;
3987ec681f3Smrg   assert(num_layers > 0);
3997ec681f3Smrg
4007ec681f3Smrg   struct v3dv_job *job =
4017ec681f3Smrg      v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
4027ec681f3Smrg   if (!job)
4037ec681f3Smrg      return true;
4047ec681f3Smrg
4057ec681f3Smrg   /* Handle copy from compressed format using a compatible format */
4067ec681f3Smrg   const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
4077ec681f3Smrg   const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
4087ec681f3Smrg   const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
4097ec681f3Smrg   const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
4107ec681f3Smrg
4117ec681f3Smrg   v3dv_job_start_frame(job, width, height, num_layers, false,
4127ec681f3Smrg                        1, internal_bpp, false);
4137ec681f3Smrg
4147ec681f3Smrg   struct v3dv_meta_framebuffer framebuffer;
4157ec681f3Smrg   v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
4167ec681f3Smrg                                              internal_type, &job->frame_tiling);
4177ec681f3Smrg
4187ec681f3Smrg   v3dv_X(job->device, job_emit_binning_flush)(job);
4197ec681f3Smrg   v3dv_X(job->device, meta_emit_copy_image_to_buffer_rcl)
4207ec681f3Smrg      (job, buffer, image, &framebuffer, region);
4217ec681f3Smrg
4227ec681f3Smrg   v3dv_cmd_buffer_finish_job(cmd_buffer);
4237ec681f3Smrg
4247ec681f3Smrg   return true;
4257ec681f3Smrg}
4267ec681f3Smrg
4277ec681f3Smrgstatic bool
4287ec681f3Smrgblit_shader(struct v3dv_cmd_buffer *cmd_buffer,
4297ec681f3Smrg            struct v3dv_image *dst,
4307ec681f3Smrg            VkFormat dst_format,
4317ec681f3Smrg            struct v3dv_image *src,
4327ec681f3Smrg            VkFormat src_format,
4337ec681f3Smrg            VkColorComponentFlags cmask,
4347ec681f3Smrg            VkComponentMapping *cswizzle,
4357ec681f3Smrg            const VkImageBlit2KHR *region,
4367ec681f3Smrg            VkFilter filter,
4377ec681f3Smrg            bool dst_is_padded_image);
4387ec681f3Smrg
4397ec681f3Smrg/**
4407ec681f3Smrg * Returns true if the implementation supports the requested operation (even if
4417ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error).
4427ec681f3Smrg */
4437ec681f3Smrgstatic bool
4447ec681f3Smrgcopy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
4457ec681f3Smrg                          struct v3dv_buffer *buffer,
4467ec681f3Smrg                          struct v3dv_image *image,
4477ec681f3Smrg                          const VkBufferImageCopy2KHR *region)
4487ec681f3Smrg{
4497ec681f3Smrg   bool handled = false;
4507ec681f3Smrg
4517ec681f3Smrg   /* Generally, the bpp of the data in the buffer matches that of the
4527ec681f3Smrg    * source image. The exception is the case where we are copying
4537ec681f3Smrg    * stencil (8bpp) to a combined d24s8 image (32bpp).
4547ec681f3Smrg    */
4557ec681f3Smrg   uint32_t buffer_bpp = image->cpp;
4567ec681f3Smrg
4577ec681f3Smrg   VkImageAspectFlags copy_aspect = region->imageSubresource.aspectMask;
4587ec681f3Smrg
4597ec681f3Smrg   /* Because we are going to implement the copy as a blit, we need to create
4607ec681f3Smrg    * a linear image from the destination buffer and we also want our blit
4617ec681f3Smrg    * source and destination formats to be the same (to avoid any format
4627ec681f3Smrg    * conversions), so we choose a canonical format that matches the
4637ec681f3Smrg    * source image bpp.
4647ec681f3Smrg    *
4657ec681f3Smrg    * The exception to the above is copying from combined depth/stencil images
4667ec681f3Smrg    * because we are copying only one aspect of the image, so we need to setup
4677ec681f3Smrg    * our formats, color write mask and source swizzle mask to match that.
4687ec681f3Smrg    */
4697ec681f3Smrg   VkFormat dst_format;
4707ec681f3Smrg   VkFormat src_format;
4717ec681f3Smrg   VkColorComponentFlags cmask = 0; /* All components */
4727ec681f3Smrg   VkComponentMapping cswizzle = {
4737ec681f3Smrg      .r = VK_COMPONENT_SWIZZLE_IDENTITY,
4747ec681f3Smrg      .g = VK_COMPONENT_SWIZZLE_IDENTITY,
4757ec681f3Smrg      .b = VK_COMPONENT_SWIZZLE_IDENTITY,
4767ec681f3Smrg      .a = VK_COMPONENT_SWIZZLE_IDENTITY,
4777ec681f3Smrg   };
4787ec681f3Smrg   switch (buffer_bpp) {
4797ec681f3Smrg   case 16:
4807ec681f3Smrg      assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
4817ec681f3Smrg      dst_format = VK_FORMAT_R32G32B32A32_UINT;
4827ec681f3Smrg      src_format = dst_format;
4837ec681f3Smrg      break;
4847ec681f3Smrg   case 8:
4857ec681f3Smrg      assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
4867ec681f3Smrg      dst_format = VK_FORMAT_R16G16B16A16_UINT;
4877ec681f3Smrg      src_format = dst_format;
4887ec681f3Smrg      break;
4897ec681f3Smrg   case 4:
4907ec681f3Smrg      switch (copy_aspect) {
4917ec681f3Smrg      case VK_IMAGE_ASPECT_COLOR_BIT:
4927ec681f3Smrg         src_format = VK_FORMAT_R8G8B8A8_UINT;
4937ec681f3Smrg         dst_format = VK_FORMAT_R8G8B8A8_UINT;
4947ec681f3Smrg         break;
4957ec681f3Smrg      case VK_IMAGE_ASPECT_DEPTH_BIT:
4967ec681f3Smrg         assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
4977ec681f3Smrg                image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
4987ec681f3Smrg                image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
4997ec681f3Smrg         if (image->vk.format == VK_FORMAT_D32_SFLOAT) {
5007ec681f3Smrg            src_format = VK_FORMAT_R32_UINT;
5017ec681f3Smrg            dst_format = VK_FORMAT_R32_UINT;
5027ec681f3Smrg         } else {
5037ec681f3Smrg            /* We want to write depth in the buffer in the first 24-bits,
5047ec681f3Smrg             * however, the hardware has depth in bits 8-31, so swizzle the
5057ec681f3Smrg             * the source components to match what we want. Also, we don't
5067ec681f3Smrg             * want to write bits 24-31 in the destination.
5077ec681f3Smrg             */
5087ec681f3Smrg            src_format = VK_FORMAT_R8G8B8A8_UINT;
5097ec681f3Smrg            dst_format = VK_FORMAT_R8G8B8A8_UINT;
5107ec681f3Smrg            cmask = VK_COLOR_COMPONENT_R_BIT |
5117ec681f3Smrg                    VK_COLOR_COMPONENT_G_BIT |
5127ec681f3Smrg                    VK_COLOR_COMPONENT_B_BIT;
5137ec681f3Smrg            cswizzle.r = VK_COMPONENT_SWIZZLE_G;
5147ec681f3Smrg            cswizzle.g = VK_COMPONENT_SWIZZLE_B;
5157ec681f3Smrg            cswizzle.b = VK_COMPONENT_SWIZZLE_A;
5167ec681f3Smrg            cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;
5177ec681f3Smrg         }
5187ec681f3Smrg         break;
5197ec681f3Smrg      case VK_IMAGE_ASPECT_STENCIL_BIT:
5207ec681f3Smrg         assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
5217ec681f3Smrg         assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
5227ec681f3Smrg         /* Copying from S8D24. We want to write 8-bit stencil values only,
5237ec681f3Smrg          * so adjust the buffer bpp for that. Since the hardware stores stencil
5247ec681f3Smrg          * in the LSB, we can just do a RGBA8UI to R8UI blit.
5257ec681f3Smrg          */
5267ec681f3Smrg         src_format = VK_FORMAT_R8G8B8A8_UINT;
5277ec681f3Smrg         dst_format = VK_FORMAT_R8_UINT;
5287ec681f3Smrg         buffer_bpp = 1;
5297ec681f3Smrg         break;
5307ec681f3Smrg      default:
5317ec681f3Smrg         unreachable("unsupported aspect");
5327ec681f3Smrg         return handled;
5337ec681f3Smrg      };
5347ec681f3Smrg      break;
5357ec681f3Smrg   case 2:
5367ec681f3Smrg      assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
5377ec681f3Smrg             copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
5387ec681f3Smrg      dst_format = VK_FORMAT_R16_UINT;
5397ec681f3Smrg      src_format = dst_format;
5407ec681f3Smrg      break;
5417ec681f3Smrg   case 1:
5427ec681f3Smrg      assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
5437ec681f3Smrg      dst_format = VK_FORMAT_R8_UINT;
5447ec681f3Smrg      src_format = dst_format;
5457ec681f3Smrg      break;
5467ec681f3Smrg   default:
5477ec681f3Smrg      unreachable("unsupported bit-size");
5487ec681f3Smrg      return handled;
5497ec681f3Smrg   };
5507ec681f3Smrg
5517ec681f3Smrg   /* The hardware doesn't support linear depth/stencil stores, so we
5527ec681f3Smrg    * implement copies of depth/stencil aspect as color copies using a
5537ec681f3Smrg    * compatible color format.
5547ec681f3Smrg    */
5557ec681f3Smrg   assert(vk_format_is_color(src_format));
5567ec681f3Smrg   assert(vk_format_is_color(dst_format));
5577ec681f3Smrg   copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
5587ec681f3Smrg
5597ec681f3Smrg   /* We should be able to handle the blit if we got this far */
5607ec681f3Smrg   handled = true;
5617ec681f3Smrg
5627ec681f3Smrg   /* Obtain the 2D buffer region spec */
5637ec681f3Smrg   uint32_t buf_width, buf_height;
5647ec681f3Smrg   if (region->bufferRowLength == 0)
5657ec681f3Smrg      buf_width = region->imageExtent.width;
5667ec681f3Smrg   else
5677ec681f3Smrg      buf_width = region->bufferRowLength;
5687ec681f3Smrg
5697ec681f3Smrg   if (region->bufferImageHeight == 0)
5707ec681f3Smrg      buf_height = region->imageExtent.height;
5717ec681f3Smrg   else
5727ec681f3Smrg      buf_height = region->bufferImageHeight;
5737ec681f3Smrg
5747ec681f3Smrg   /* If the image is compressed, the bpp refers to blocks, not pixels */
5757ec681f3Smrg   uint32_t block_width = vk_format_get_blockwidth(image->vk.format);
5767ec681f3Smrg   uint32_t block_height = vk_format_get_blockheight(image->vk.format);
5777ec681f3Smrg   buf_width = buf_width / block_width;
5787ec681f3Smrg   buf_height = buf_height / block_height;
5797ec681f3Smrg
5807ec681f3Smrg   /* Compute layers to copy */
5817ec681f3Smrg   uint32_t num_layers;
5827ec681f3Smrg   if (image->vk.image_type != VK_IMAGE_TYPE_3D)
5837ec681f3Smrg      num_layers = region->imageSubresource.layerCount;
5847ec681f3Smrg   else
5857ec681f3Smrg      num_layers = region->imageExtent.depth;
5867ec681f3Smrg   assert(num_layers > 0);
5877ec681f3Smrg
5887ec681f3Smrg   /* Our blit interface can see the real format of the images to detect
5897ec681f3Smrg    * copies between compressed and uncompressed images and adapt the
5907ec681f3Smrg    * blit region accordingly. Here we are just doing a raw copy of
5917ec681f3Smrg    * compressed data, but we are passing an uncompressed view of the
5927ec681f3Smrg    * buffer for the blit destination image (since compressed formats are
5937ec681f3Smrg    * not renderable), so we also want to provide an uncompressed view of
5947ec681f3Smrg    * the source image.
5957ec681f3Smrg    */
5967ec681f3Smrg   VkResult result;
5977ec681f3Smrg   struct v3dv_device *device = cmd_buffer->device;
5987ec681f3Smrg   VkDevice _device = v3dv_device_to_handle(device);
5997ec681f3Smrg   if (vk_format_is_compressed(image->vk.format)) {
6007ec681f3Smrg      VkImage uiview;
6017ec681f3Smrg      VkImageCreateInfo uiview_info = {
6027ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
6037ec681f3Smrg         .imageType = VK_IMAGE_TYPE_3D,
6047ec681f3Smrg         .format = dst_format,
6057ec681f3Smrg         .extent = { buf_width, buf_height, image->vk.extent.depth },
6067ec681f3Smrg         .mipLevels = image->vk.mip_levels,
6077ec681f3Smrg         .arrayLayers = image->vk.array_layers,
6087ec681f3Smrg         .samples = image->vk.samples,
6097ec681f3Smrg         .tiling = image->vk.tiling,
6107ec681f3Smrg         .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
6117ec681f3Smrg         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
6127ec681f3Smrg         .queueFamilyIndexCount = 0,
6137ec681f3Smrg         .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
6147ec681f3Smrg      };
6157ec681f3Smrg      result = v3dv_CreateImage(_device, &uiview_info, &device->vk.alloc, &uiview);
6167ec681f3Smrg      if (result != VK_SUCCESS)
6177ec681f3Smrg         return handled;
6187ec681f3Smrg
6197ec681f3Smrg      v3dv_cmd_buffer_add_private_obj(
6207ec681f3Smrg         cmd_buffer, (uintptr_t)uiview,
6217ec681f3Smrg         (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
6227ec681f3Smrg
6237ec681f3Smrg      result =
6247ec681f3Smrg         vk_common_BindImageMemory(_device, uiview,
6257ec681f3Smrg                                   v3dv_device_memory_to_handle(image->mem),
6267ec681f3Smrg                                   image->mem_offset);
6277ec681f3Smrg      if (result != VK_SUCCESS)
6287ec681f3Smrg         return handled;
6297ec681f3Smrg
6307ec681f3Smrg      image = v3dv_image_from_handle(uiview);
6317ec681f3Smrg   }
6327ec681f3Smrg
6337ec681f3Smrg   /* Copy requested layers */
6347ec681f3Smrg   for (uint32_t i = 0; i < num_layers; i++) {
6357ec681f3Smrg      /* Create the destination blit image from the destination buffer */
6367ec681f3Smrg      VkImageCreateInfo image_info = {
6377ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
6387ec681f3Smrg         .imageType = VK_IMAGE_TYPE_2D,
6397ec681f3Smrg         .format = dst_format,
6407ec681f3Smrg         .extent = { buf_width, buf_height, 1 },
6417ec681f3Smrg         .mipLevels = 1,
6427ec681f3Smrg         .arrayLayers = 1,
6437ec681f3Smrg         .samples = VK_SAMPLE_COUNT_1_BIT,
6447ec681f3Smrg         .tiling = VK_IMAGE_TILING_LINEAR,
6457ec681f3Smrg         .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
6467ec681f3Smrg         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
6477ec681f3Smrg         .queueFamilyIndexCount = 0,
6487ec681f3Smrg         .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
6497ec681f3Smrg      };
6507ec681f3Smrg
6517ec681f3Smrg      VkImage buffer_image;
6527ec681f3Smrg      result =
6537ec681f3Smrg         v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);
6547ec681f3Smrg      if (result != VK_SUCCESS)
6557ec681f3Smrg         return handled;
6567ec681f3Smrg
6577ec681f3Smrg      v3dv_cmd_buffer_add_private_obj(
6587ec681f3Smrg         cmd_buffer, (uintptr_t)buffer_image,
6597ec681f3Smrg         (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
6607ec681f3Smrg
6617ec681f3Smrg      /* Bind the buffer memory to the image */
6627ec681f3Smrg      VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
6637ec681f3Smrg         i * buf_width * buf_height * buffer_bpp;
6647ec681f3Smrg      result =
6657ec681f3Smrg         vk_common_BindImageMemory(_device, buffer_image,
6667ec681f3Smrg                                   v3dv_device_memory_to_handle(buffer->mem),
6677ec681f3Smrg                                   buffer_offset);
6687ec681f3Smrg      if (result != VK_SUCCESS)
6697ec681f3Smrg         return handled;
6707ec681f3Smrg
6717ec681f3Smrg      /* Blit-copy the requested image extent.
6727ec681f3Smrg       *
6737ec681f3Smrg       * Since we are copying, the blit must use the same format on the
6747ec681f3Smrg       * destination and source images to avoid format conversions. The
6757ec681f3Smrg       * only exception is copying stencil, which we upload to a R8UI source
6767ec681f3Smrg       * image, but that we need to blit to a S8D24 destination (the only
6777ec681f3Smrg       * stencil format we support).
6787ec681f3Smrg       */
6797ec681f3Smrg      const VkImageBlit2KHR blit_region = {
6807ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
6817ec681f3Smrg         .srcSubresource = {
6827ec681f3Smrg            .aspectMask = copy_aspect,
6837ec681f3Smrg            .mipLevel = region->imageSubresource.mipLevel,
6847ec681f3Smrg            .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
6857ec681f3Smrg            .layerCount = 1,
6867ec681f3Smrg         },
6877ec681f3Smrg         .srcOffsets = {
6887ec681f3Smrg            {
6897ec681f3Smrg               DIV_ROUND_UP(region->imageOffset.x, block_width),
6907ec681f3Smrg               DIV_ROUND_UP(region->imageOffset.y, block_height),
6917ec681f3Smrg               region->imageOffset.z + i,
6927ec681f3Smrg            },
6937ec681f3Smrg            {
6947ec681f3Smrg               DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
6957ec681f3Smrg                            block_width),
6967ec681f3Smrg               DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
6977ec681f3Smrg                            block_height),
6987ec681f3Smrg               region->imageOffset.z + i + 1,
6997ec681f3Smrg            },
7007ec681f3Smrg         },
7017ec681f3Smrg         .dstSubresource = {
7027ec681f3Smrg            .aspectMask = copy_aspect,
7037ec681f3Smrg            .mipLevel = 0,
7047ec681f3Smrg            .baseArrayLayer = 0,
7057ec681f3Smrg            .layerCount = 1,
7067ec681f3Smrg         },
7077ec681f3Smrg         .dstOffsets = {
7087ec681f3Smrg            { 0, 0, 0 },
7097ec681f3Smrg            {
7107ec681f3Smrg               DIV_ROUND_UP(region->imageExtent.width, block_width),
7117ec681f3Smrg               DIV_ROUND_UP(region->imageExtent.height, block_height),
7127ec681f3Smrg               1
7137ec681f3Smrg            },
7147ec681f3Smrg         },
7157ec681f3Smrg      };
7167ec681f3Smrg
7177ec681f3Smrg      handled = blit_shader(cmd_buffer,
7187ec681f3Smrg                            v3dv_image_from_handle(buffer_image), dst_format,
7197ec681f3Smrg                            image, src_format,
7207ec681f3Smrg                            cmask, &cswizzle,
7217ec681f3Smrg                            &blit_region, VK_FILTER_NEAREST, false);
7227ec681f3Smrg      if (!handled) {
7237ec681f3Smrg         /* This is unexpected, we should have a supported blit spec */
7247ec681f3Smrg         unreachable("Unable to blit buffer to destination image");
7257ec681f3Smrg         return false;
7267ec681f3Smrg      }
7277ec681f3Smrg   }
7287ec681f3Smrg
7297ec681f3Smrg   assert(handled);
7307ec681f3Smrg   return true;
7317ec681f3Smrg}
7327ec681f3Smrg
7337ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
7347ec681f3Smrgv3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
7357ec681f3Smrg                              const VkCopyImageToBufferInfo2KHR *info)
7367ec681f3Smrg
7377ec681f3Smrg{
7387ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
7397ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_image, image, info->srcImage);
7407ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->dstBuffer);
7417ec681f3Smrg
7427ec681f3Smrg   assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
7437ec681f3Smrg
7447ec681f3Smrg   for (uint32_t i = 0; i < info->regionCount; i++) {
7457ec681f3Smrg      if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &info->pRegions[i]))
7467ec681f3Smrg         continue;
7477ec681f3Smrg      if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &info->pRegions[i]))
7487ec681f3Smrg         continue;
7497ec681f3Smrg      unreachable("Unsupported image to buffer copy.");
7507ec681f3Smrg   }
7517ec681f3Smrg}
7527ec681f3Smrg
7537ec681f3Smrg/**
7547ec681f3Smrg * Returns true if the implementation supports the requested operation (even if
7557ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error).
7567ec681f3Smrg */
7577ec681f3Smrgstatic bool
7587ec681f3Smrgcopy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
7597ec681f3Smrg               struct v3dv_image *dst,
7607ec681f3Smrg               struct v3dv_image *src,
7617ec681f3Smrg               const VkImageCopy2KHR *region)
7627ec681f3Smrg{
7637ec681f3Smrg   /* Destination can't be raster format */
7647ec681f3Smrg   if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR)
7657ec681f3Smrg      return false;
7667ec681f3Smrg
7677ec681f3Smrg   /* We can only do full copies, so if the format is D24S8 both aspects need
7687ec681f3Smrg    * to be copied. We only need to check the dst format because the spec
7697ec681f3Smrg    * states that depth/stencil formats must match exactly.
7707ec681f3Smrg    */
7717ec681f3Smrg   if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
7727ec681f3Smrg       const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
7737ec681f3Smrg                                             VK_IMAGE_ASPECT_STENCIL_BIT;
7747ec681f3Smrg       if (region->dstSubresource.aspectMask != ds_aspects)
7757ec681f3Smrg         return false;
7767ec681f3Smrg   }
7777ec681f3Smrg
7787ec681f3Smrg   /* Don't handle copies between uncompressed and compressed formats for now.
7797ec681f3Smrg    *
7807ec681f3Smrg    * FIXME: we should be able to handle these easily but there is no coverage
7817ec681f3Smrg    * in CTS at the moment that make such copies with full images (which we
7827ec681f3Smrg    * require here), only partial copies. Also, in that case the code below that
7837ec681f3Smrg    * checks for "dst image complete" requires some changes, since it is
7847ec681f3Smrg    * checking against the region dimensions, which are in units of the source
7857ec681f3Smrg    * image format.
7867ec681f3Smrg    */
7877ec681f3Smrg   if (vk_format_is_compressed(dst->vk.format) !=
7887ec681f3Smrg       vk_format_is_compressed(src->vk.format)) {
7897ec681f3Smrg      return false;
7907ec681f3Smrg   }
7917ec681f3Smrg
7927ec681f3Smrg   /* Source region must start at (0,0) */
7937ec681f3Smrg   if (region->srcOffset.x != 0 || region->srcOffset.y != 0)
7947ec681f3Smrg      return false;
7957ec681f3Smrg
7967ec681f3Smrg   /* Destination image must be complete */
7977ec681f3Smrg   if (region->dstOffset.x != 0 || region->dstOffset.y != 0)
7987ec681f3Smrg      return false;
7997ec681f3Smrg
8007ec681f3Smrg   const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
8017ec681f3Smrg   uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
8027ec681f3Smrg   uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
8037ec681f3Smrg   if (region->extent.width != dst_width || region->extent.height != dst_height)
8047ec681f3Smrg      return false;
8057ec681f3Smrg
8067ec681f3Smrg   /* From vkCmdCopyImage:
8077ec681f3Smrg    *
8087ec681f3Smrg    *   "When copying between compressed and uncompressed formats the extent
8097ec681f3Smrg    *    members represent the texel dimensions of the source image and not
8107ec681f3Smrg    *    the destination."
8117ec681f3Smrg    */
8127ec681f3Smrg   const uint32_t block_w = vk_format_get_blockwidth(src->vk.format);
8137ec681f3Smrg   const uint32_t block_h = vk_format_get_blockheight(src->vk.format);
8147ec681f3Smrg   uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
8157ec681f3Smrg   uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
8167ec681f3Smrg
8177ec681f3Smrg   /* Account for sample count */
8187ec681f3Smrg   assert(dst->vk.samples == src->vk.samples);
8197ec681f3Smrg   if (dst->vk.samples > VK_SAMPLE_COUNT_1_BIT) {
8207ec681f3Smrg      assert(dst->vk.samples == VK_SAMPLE_COUNT_4_BIT);
8217ec681f3Smrg      width *= 2;
8227ec681f3Smrg      height *= 2;
8237ec681f3Smrg   }
8247ec681f3Smrg
8257ec681f3Smrg   /* The TFU unit doesn't handle format conversions so we need the formats to
8267ec681f3Smrg    * match. On the other hand, vkCmdCopyImage allows different color formats
8277ec681f3Smrg    * on the source and destination images, but only if they are texel
8287ec681f3Smrg    * compatible. For us, this means that we can effectively ignore different
8297ec681f3Smrg    * formats and just make the copy using either of them, since we are just
8307ec681f3Smrg    * moving raw data and not making any conversions.
8317ec681f3Smrg    *
8327ec681f3Smrg    * Also, the formats supported by the TFU unit are limited, but again, since
8337ec681f3Smrg    * we are only doing raw copies here without interpreting or converting
8347ec681f3Smrg    * the underlying pixel data according to its format, we can always choose
8357ec681f3Smrg    * to use compatible formats that are supported with the TFU unit.
8367ec681f3Smrg    */
8377ec681f3Smrg   assert(dst->cpp == src->cpp);
8387ec681f3Smrg   const struct v3dv_format *format =
8397ec681f3Smrg      v3dv_get_compatible_tfu_format(cmd_buffer->device,
8407ec681f3Smrg                                     dst->cpp, NULL);
8417ec681f3Smrg
8427ec681f3Smrg   /* Emit a TFU job for each layer to blit */
8437ec681f3Smrg   const uint32_t layer_count = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
8447ec681f3Smrg      region->dstSubresource.layerCount :
8457ec681f3Smrg      region->extent.depth;
8467ec681f3Smrg   const uint32_t src_mip_level = region->srcSubresource.mipLevel;
8477ec681f3Smrg
8487ec681f3Smrg   const uint32_t base_src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
8497ec681f3Smrg      region->srcSubresource.baseArrayLayer : region->srcOffset.z;
8507ec681f3Smrg   const uint32_t base_dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
8517ec681f3Smrg      region->dstSubresource.baseArrayLayer : region->dstOffset.z;
8527ec681f3Smrg   for (uint32_t i = 0; i < layer_count; i++) {
8537ec681f3Smrg      v3dv_X(cmd_buffer->device, meta_emit_tfu_job)
8547ec681f3Smrg         (cmd_buffer, dst, dst_mip_level, base_dst_layer + i,
8557ec681f3Smrg          src, src_mip_level, base_src_layer + i,
8567ec681f3Smrg          width, height, format);
8577ec681f3Smrg   }
8587ec681f3Smrg
8597ec681f3Smrg   return true;
8607ec681f3Smrg}
8617ec681f3Smrg
8627ec681f3Smrg/**
8637ec681f3Smrg * Returns true if the implementation supports the requested operation (even if
8647ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error).
8657ec681f3Smrg */
8667ec681f3Smrgstatic bool
8677ec681f3Smrgcopy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
8687ec681f3Smrg               struct v3dv_image *dst,
8697ec681f3Smrg               struct v3dv_image *src,
8707ec681f3Smrg               const VkImageCopy2KHR *region)
8717ec681f3Smrg{
8727ec681f3Smrg   VkFormat fb_format;
8737ec681f3Smrg   if (!v3dv_meta_can_use_tlb(src, &region->srcOffset, &fb_format) ||
8747ec681f3Smrg       !v3dv_meta_can_use_tlb(dst, &region->dstOffset, &fb_format)) {
8757ec681f3Smrg      return false;
8767ec681f3Smrg   }
8777ec681f3Smrg
8787ec681f3Smrg   /* From the Vulkan spec, VkImageCopy valid usage:
8797ec681f3Smrg    *
8807ec681f3Smrg    *    "If neither the calling command’s srcImage nor the calling command’s
8817ec681f3Smrg    *     dstImage has a multi-planar image format then the aspectMask member
8827ec681f3Smrg    *     of srcSubresource and dstSubresource must match."
8837ec681f3Smrg    */
8847ec681f3Smrg   assert(region->dstSubresource.aspectMask ==
8857ec681f3Smrg          region->srcSubresource.aspectMask);
8867ec681f3Smrg   uint32_t internal_type, internal_bpp;
8877ec681f3Smrg   v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
8887ec681f3Smrg      (fb_format, region->dstSubresource.aspectMask,
8897ec681f3Smrg       &internal_type, &internal_bpp);
8907ec681f3Smrg
8917ec681f3Smrg   /* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:
8927ec681f3Smrg    *
8937ec681f3Smrg    * "The number of slices of the extent (for 3D) or layers of the
8947ec681f3Smrg    *  srcSubresource (for non-3D) must match the number of slices of the
8957ec681f3Smrg    *  extent (for 3D) or layers of the dstSubresource (for non-3D)."
8967ec681f3Smrg    */
8977ec681f3Smrg   assert((src->vk.image_type != VK_IMAGE_TYPE_3D ?
8987ec681f3Smrg           region->srcSubresource.layerCount : region->extent.depth) ==
8997ec681f3Smrg          (dst->vk.image_type != VK_IMAGE_TYPE_3D ?
9007ec681f3Smrg           region->dstSubresource.layerCount : region->extent.depth));
9017ec681f3Smrg   uint32_t num_layers;
9027ec681f3Smrg   if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
9037ec681f3Smrg      num_layers = region->dstSubresource.layerCount;
9047ec681f3Smrg   else
9057ec681f3Smrg      num_layers = region->extent.depth;
9067ec681f3Smrg   assert(num_layers > 0);
9077ec681f3Smrg
9087ec681f3Smrg   struct v3dv_job *job =
9097ec681f3Smrg      v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
9107ec681f3Smrg   if (!job)
9117ec681f3Smrg      return true;
9127ec681f3Smrg
9137ec681f3Smrg   /* Handle copy to compressed image using compatible format */
9147ec681f3Smrg   const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format);
9157ec681f3Smrg   const uint32_t block_h = vk_format_get_blockheight(dst->vk.format);
9167ec681f3Smrg   const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
9177ec681f3Smrg   const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
9187ec681f3Smrg
9197ec681f3Smrg   v3dv_job_start_frame(job, width, height, num_layers, false, 1, internal_bpp,
9207ec681f3Smrg                        src->vk.samples > VK_SAMPLE_COUNT_1_BIT);
9217ec681f3Smrg
9227ec681f3Smrg   struct v3dv_meta_framebuffer framebuffer;
9237ec681f3Smrg   v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
9247ec681f3Smrg                                              internal_type, &job->frame_tiling);
9257ec681f3Smrg
9267ec681f3Smrg   v3dv_X(job->device, job_emit_binning_flush)(job);
9277ec681f3Smrg   v3dv_X(job->device, meta_emit_copy_image_rcl)(job, dst, src, &framebuffer, region);
9287ec681f3Smrg
9297ec681f3Smrg   v3dv_cmd_buffer_finish_job(cmd_buffer);
9307ec681f3Smrg
9317ec681f3Smrg   return true;
9327ec681f3Smrg}
9337ec681f3Smrg
9347ec681f3Smrg/**
9357ec681f3Smrg * Takes the image provided as argument and creates a new image that has
9367ec681f3Smrg * the same specification and aliases the same memory storage, except that:
9377ec681f3Smrg *
9387ec681f3Smrg *   - It has the uncompressed format passed in.
9397ec681f3Smrg *   - Its original width/height are scaled by the factors passed in.
9407ec681f3Smrg *
9417ec681f3Smrg * This is useful to implement copies from compressed images using the blit
9427ec681f3Smrg * path. The idea is that we create uncompressed "image views" of both the
9437ec681f3Smrg * source and destination images using the uncompressed format and then we
9447ec681f3Smrg * define the copy blit in terms of that format.
9457ec681f3Smrg */
9467ec681f3Smrgstatic struct v3dv_image *
9477ec681f3Smrgcreate_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
9487ec681f3Smrg                   struct v3dv_image *src,
9497ec681f3Smrg                   float width_scale,
9507ec681f3Smrg                   float height_scale,
9517ec681f3Smrg                   VkFormat format)
9527ec681f3Smrg{
9537ec681f3Smrg   assert(!vk_format_is_compressed(format));
9547ec681f3Smrg
9557ec681f3Smrg   VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
9567ec681f3Smrg
9577ec681f3Smrg   VkImageCreateInfo info = {
9587ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
9597ec681f3Smrg      .imageType = src->vk.image_type,
9607ec681f3Smrg      .format = format,
9617ec681f3Smrg      .extent = {
9627ec681f3Smrg         .width = src->vk.extent.width * width_scale,
9637ec681f3Smrg         .height = src->vk.extent.height * height_scale,
9647ec681f3Smrg         .depth = src->vk.extent.depth,
9657ec681f3Smrg      },
9667ec681f3Smrg      .mipLevels = src->vk.mip_levels,
9677ec681f3Smrg      .arrayLayers = src->vk.array_layers,
9687ec681f3Smrg      .samples = src->vk.samples,
9697ec681f3Smrg      .tiling = src->vk.tiling,
9707ec681f3Smrg      .usage = src->vk.usage,
9717ec681f3Smrg   };
9727ec681f3Smrg
9737ec681f3Smrg    VkImage _image;
9747ec681f3Smrg    VkResult result =
9757ec681f3Smrg      v3dv_CreateImage(_device, &info, &cmd_buffer->device->vk.alloc, &_image);
9767ec681f3Smrg    if (result != VK_SUCCESS) {
9777ec681f3Smrg       v3dv_flag_oom(cmd_buffer, NULL);
9787ec681f3Smrg       return NULL;
9797ec681f3Smrg    }
9807ec681f3Smrg
9817ec681f3Smrg    struct v3dv_image *image = v3dv_image_from_handle(_image);
9827ec681f3Smrg    image->mem = src->mem;
9837ec681f3Smrg    image->mem_offset = src->mem_offset;
9847ec681f3Smrg    return image;
9857ec681f3Smrg}
9867ec681f3Smrg
9877ec681f3Smrg/**
9887ec681f3Smrg * Returns true if the implementation supports the requested operation (even if
9897ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error).
9907ec681f3Smrg */
9917ec681f3Smrgstatic bool
9927ec681f3Smrgcopy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
9937ec681f3Smrg                struct v3dv_image *dst,
9947ec681f3Smrg                struct v3dv_image *src,
9957ec681f3Smrg                const VkImageCopy2KHR *region)
9967ec681f3Smrg{
9977ec681f3Smrg   const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format);
9987ec681f3Smrg   const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format);
9997ec681f3Smrg   const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format);
10007ec681f3Smrg   const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format);
10017ec681f3Smrg   const float block_scale_w = (float)src_block_w / (float)dst_block_w;
10027ec681f3Smrg   const float block_scale_h = (float)src_block_h / (float)dst_block_h;
10037ec681f3Smrg
10047ec681f3Smrg   /* We need to choose a single format for the blit to ensure that this is
10057ec681f3Smrg    * really a copy and there are not format conversions going on. Since we
10067ec681f3Smrg    * going to blit, we need to make sure that the selected format can be
10077ec681f3Smrg    * both rendered to and textured from.
10087ec681f3Smrg    */
10097ec681f3Smrg   VkFormat format;
10107ec681f3Smrg   float src_scale_w = 1.0f;
10117ec681f3Smrg   float src_scale_h = 1.0f;
10127ec681f3Smrg   float dst_scale_w = block_scale_w;
10137ec681f3Smrg   float dst_scale_h = block_scale_h;
10147ec681f3Smrg   if (vk_format_is_compressed(src->vk.format)) {
10157ec681f3Smrg      /* If we are copying from a compressed format we should be aware that we
10167ec681f3Smrg       * are going to texture from the source image, and the texture setup
10177ec681f3Smrg       * knows the actual size of the image, so we need to choose a format
10187ec681f3Smrg       * that has a per-texel (not per-block) bpp that is compatible for that
10197ec681f3Smrg       * image size. For example, for a source image with size Bw*WxBh*H
10207ec681f3Smrg       * and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI,
10217ec681f3Smrg       * each of the Bw*WxBh*H texels in the compressed source image is 8-bit
10227ec681f3Smrg       * (which translates to a 128-bit 4x4 RGBA32 block when uncompressed),
10237ec681f3Smrg       * so we could specify a blit with size Bw*WxBh*H and a format with
10247ec681f3Smrg       * a bpp of 8-bit per texel (R8_UINT).
10257ec681f3Smrg       *
10267ec681f3Smrg       * Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM,
10277ec681f3Smrg       * which is 64-bit per texel, then we would need a 4-bit format, which
10287ec681f3Smrg       * we don't have, so instead we still choose an 8-bit format, but we
10297ec681f3Smrg       * apply a divisor to the row dimensions of the blit, since we are
10307ec681f3Smrg       * copying two texels per item.
10317ec681f3Smrg       *
10327ec681f3Smrg       * Generally, we can choose any format so long as we compute appropriate
10337ec681f3Smrg       * divisors for the width and height depending on the source image's
10347ec681f3Smrg       * bpp.
10357ec681f3Smrg       */
10367ec681f3Smrg      assert(src->cpp == dst->cpp);
10377ec681f3Smrg
10387ec681f3Smrg      format = VK_FORMAT_R32G32_UINT;
10397ec681f3Smrg      switch (src->cpp) {
10407ec681f3Smrg      case 16:
10417ec681f3Smrg         format = VK_FORMAT_R32G32B32A32_UINT;
10427ec681f3Smrg         break;
10437ec681f3Smrg      case 8:
10447ec681f3Smrg         format = VK_FORMAT_R16G16B16A16_UINT;
10457ec681f3Smrg         break;
10467ec681f3Smrg      default:
10477ec681f3Smrg         unreachable("Unsupported compressed format");
10487ec681f3Smrg      }
10497ec681f3Smrg
10507ec681f3Smrg      /* Create image views of the src/dst images that we can interpret in
10517ec681f3Smrg       * terms of the canonical format.
10527ec681f3Smrg       */
10537ec681f3Smrg      src_scale_w /= src_block_w;
10547ec681f3Smrg      src_scale_h /= src_block_h;
10557ec681f3Smrg      dst_scale_w /= src_block_w;
10567ec681f3Smrg      dst_scale_h /= src_block_h;
10577ec681f3Smrg
10587ec681f3Smrg      src = create_image_alias(cmd_buffer, src,
10597ec681f3Smrg                               src_scale_w, src_scale_h, format);
10607ec681f3Smrg
10617ec681f3Smrg      dst = create_image_alias(cmd_buffer, dst,
10627ec681f3Smrg                               dst_scale_w, dst_scale_h, format);
10637ec681f3Smrg   } else {
10647ec681f3Smrg      format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?
10657ec681f3Smrg         src->vk.format : get_compatible_tlb_format(src->vk.format);
10667ec681f3Smrg      if (format == VK_FORMAT_UNDEFINED)
10677ec681f3Smrg         return false;
10687ec681f3Smrg
10697ec681f3Smrg      const struct v3dv_format *f = v3dv_X(cmd_buffer->device, get_format)(format);
10707ec681f3Smrg      if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO)
10717ec681f3Smrg         return false;
10727ec681f3Smrg   }
10737ec681f3Smrg
10747ec681f3Smrg   /* Given an uncompressed image with size WxH, if we copy it to a compressed
10757ec681f3Smrg    * image, it will result in an image with size W*bWxH*bH, where bW and bH
10767ec681f3Smrg    * are the compressed format's block width and height. This means that
10777ec681f3Smrg    * copies between compressed and uncompressed images involve different
10787ec681f3Smrg    * image sizes, and therefore, we need to take that into account when
10797ec681f3Smrg    * setting up the source and destination blit regions below, so they are
10807ec681f3Smrg    * consistent from the point of view of the single compatible format
10817ec681f3Smrg    * selected for the copy.
10827ec681f3Smrg    *
10837ec681f3Smrg    * We should take into account that the dimensions of the region provided
10847ec681f3Smrg    * to the copy command are specified in terms of the source image. With that
10857ec681f3Smrg    * in mind, below we adjust the blit destination region to be consistent with
10867ec681f3Smrg    * the source region for the compatible format, so basically, we apply
10877ec681f3Smrg    * the block scale factor to the destination offset provided by the copy
10887ec681f3Smrg    * command (because it is specified in terms of the destination image, not
10897ec681f3Smrg    * the source), and then we just add the region copy dimensions to that
10907ec681f3Smrg    * (since the region dimensions are already specified in terms of the source
10917ec681f3Smrg    * image).
10927ec681f3Smrg    */
10937ec681f3Smrg   const VkOffset3D src_start = {
10947ec681f3Smrg      region->srcOffset.x * src_scale_w,
10957ec681f3Smrg      region->srcOffset.y * src_scale_h,
10967ec681f3Smrg      region->srcOffset.z,
10977ec681f3Smrg   };
10987ec681f3Smrg   const VkOffset3D src_end = {
10997ec681f3Smrg      src_start.x + region->extent.width * src_scale_w,
11007ec681f3Smrg      src_start.y + region->extent.height * src_scale_h,
11017ec681f3Smrg      src_start.z + region->extent.depth,
11027ec681f3Smrg   };
11037ec681f3Smrg
11047ec681f3Smrg   const VkOffset3D dst_start = {
11057ec681f3Smrg      region->dstOffset.x * dst_scale_w,
11067ec681f3Smrg      region->dstOffset.y * dst_scale_h,
11077ec681f3Smrg      region->dstOffset.z,
11087ec681f3Smrg   };
11097ec681f3Smrg   const VkOffset3D dst_end = {
11107ec681f3Smrg      dst_start.x + region->extent.width * src_scale_w,
11117ec681f3Smrg      dst_start.y + region->extent.height * src_scale_h,
11127ec681f3Smrg      dst_start.z + region->extent.depth,
11137ec681f3Smrg   };
11147ec681f3Smrg
11157ec681f3Smrg   const VkImageBlit2KHR blit_region = {
11167ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
11177ec681f3Smrg      .srcSubresource = region->srcSubresource,
11187ec681f3Smrg      .srcOffsets = { src_start, src_end },
11197ec681f3Smrg      .dstSubresource = region->dstSubresource,
11207ec681f3Smrg      .dstOffsets = { dst_start, dst_end },
11217ec681f3Smrg   };
11227ec681f3Smrg   bool handled = blit_shader(cmd_buffer,
11237ec681f3Smrg                              dst, format,
11247ec681f3Smrg                              src, format,
11257ec681f3Smrg                              0, NULL,
11267ec681f3Smrg                              &blit_region, VK_FILTER_NEAREST, true);
11277ec681f3Smrg
11287ec681f3Smrg   /* We should have selected formats that we can blit */
11297ec681f3Smrg   assert(handled);
11307ec681f3Smrg   return handled;
11317ec681f3Smrg}
11327ec681f3Smrg
11337ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
11347ec681f3Smrgv3dv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,
11357ec681f3Smrg                      const VkCopyImageInfo2KHR *info)
11367ec681f3Smrg
11377ec681f3Smrg{
11387ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
11397ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
11407ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
11417ec681f3Smrg
11427ec681f3Smrg   assert(src->vk.samples == dst->vk.samples);
11437ec681f3Smrg
11447ec681f3Smrg   for (uint32_t i = 0; i < info->regionCount; i++) {
11457ec681f3Smrg      if (copy_image_tfu(cmd_buffer, dst, src, &info->pRegions[i]))
11467ec681f3Smrg         continue;
11477ec681f3Smrg      if (copy_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
11487ec681f3Smrg         continue;
11497ec681f3Smrg      if (copy_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
11507ec681f3Smrg         continue;
11517ec681f3Smrg      unreachable("Image copy not supported");
11527ec681f3Smrg   }
11537ec681f3Smrg}
11547ec681f3Smrg
11557ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
11567ec681f3Smrgv3dv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
11577ec681f3Smrg                       const VkCopyBufferInfo2KHR *pCopyBufferInfo)
11587ec681f3Smrg{
11597ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
11607ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
11617ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
11627ec681f3Smrg
11637ec681f3Smrg   for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
11647ec681f3Smrg      v3dv_X(cmd_buffer->device, meta_copy_buffer)
11657ec681f3Smrg         (cmd_buffer,
11667ec681f3Smrg          dst_buffer->mem->bo, dst_buffer->mem_offset,
11677ec681f3Smrg          src_buffer->mem->bo, src_buffer->mem_offset,
11687ec681f3Smrg          &pCopyBufferInfo->pRegions[i]);
11697ec681f3Smrg   }
11707ec681f3Smrg}
11717ec681f3Smrg
11727ec681f3Smrgstatic void
11737ec681f3Smrgdestroy_update_buffer_cb(VkDevice _device,
11747ec681f3Smrg                         uint64_t pobj,
11757ec681f3Smrg                         VkAllocationCallbacks *alloc)
11767ec681f3Smrg{
11777ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_device, device, _device);
11787ec681f3Smrg   struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj);
11797ec681f3Smrg   v3dv_bo_free(device, bo);
11807ec681f3Smrg}
11817ec681f3Smrg
11827ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
11837ec681f3Smrgv3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
11847ec681f3Smrg                     VkBuffer dstBuffer,
11857ec681f3Smrg                     VkDeviceSize dstOffset,
11867ec681f3Smrg                     VkDeviceSize dataSize,
11877ec681f3Smrg                     const void *pData)
11887ec681f3Smrg{
11897ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
11907ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
11917ec681f3Smrg
11927ec681f3Smrg   struct v3dv_bo *src_bo =
11937ec681f3Smrg      v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
11947ec681f3Smrg   if (!src_bo) {
11957ec681f3Smrg      fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");
11967ec681f3Smrg      return;
11977ec681f3Smrg   }
11987ec681f3Smrg
11997ec681f3Smrg   bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size);
12007ec681f3Smrg   if (!ok) {
12017ec681f3Smrg      fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n");
12027ec681f3Smrg      return;
12037ec681f3Smrg   }
12047ec681f3Smrg
12057ec681f3Smrg   memcpy(src_bo->map, pData, dataSize);
12067ec681f3Smrg
12077ec681f3Smrg   v3dv_bo_unmap(cmd_buffer->device, src_bo);
12087ec681f3Smrg
12097ec681f3Smrg   VkBufferCopy2KHR region = {
12107ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR,
12117ec681f3Smrg      .srcOffset = 0,
12127ec681f3Smrg      .dstOffset = dstOffset,
12137ec681f3Smrg      .size = dataSize,
12147ec681f3Smrg   };
12157ec681f3Smrg   struct v3dv_job *copy_job =
12167ec681f3Smrg      v3dv_X(cmd_buffer->device, meta_copy_buffer)
12177ec681f3Smrg      (cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset,
12187ec681f3Smrg       src_bo, 0, &region);
12197ec681f3Smrg
12207ec681f3Smrg   if (!copy_job)
12217ec681f3Smrg      return;
12227ec681f3Smrg
12237ec681f3Smrg   v3dv_cmd_buffer_add_private_obj(
12247ec681f3Smrg      cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
12257ec681f3Smrg}
12267ec681f3Smrg
12277ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
12287ec681f3Smrgv3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
12297ec681f3Smrg                   VkBuffer dstBuffer,
12307ec681f3Smrg                   VkDeviceSize dstOffset,
12317ec681f3Smrg                   VkDeviceSize size,
12327ec681f3Smrg                   uint32_t data)
12337ec681f3Smrg{
12347ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
12357ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
12367ec681f3Smrg
12377ec681f3Smrg   struct v3dv_bo *bo = dst_buffer->mem->bo;
12387ec681f3Smrg
12397ec681f3Smrg   /* From the Vulkan spec:
12407ec681f3Smrg    *
12417ec681f3Smrg    *   "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
12427ec681f3Smrg    *    a multiple of 4, then the nearest smaller multiple is used."
12437ec681f3Smrg    */
12447ec681f3Smrg   if (size == VK_WHOLE_SIZE) {
12457ec681f3Smrg      size = dst_buffer->size - dstOffset;
12467ec681f3Smrg      size -= size % 4;
12477ec681f3Smrg   }
12487ec681f3Smrg
12497ec681f3Smrg   v3dv_X(cmd_buffer->device, meta_fill_buffer)
12507ec681f3Smrg      (cmd_buffer, bo, dstOffset, size, data);
12517ec681f3Smrg}
12527ec681f3Smrg
12537ec681f3Smrg/**
12547ec681f3Smrg * Returns true if the implementation supports the requested operation (even if
12557ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error).
12567ec681f3Smrg */
12577ec681f3Smrgstatic bool
12587ec681f3Smrgcopy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
12597ec681f3Smrg                         struct v3dv_image *image,
12607ec681f3Smrg                         struct v3dv_buffer *buffer,
12617ec681f3Smrg                         const VkBufferImageCopy2KHR *region)
12627ec681f3Smrg{
12637ec681f3Smrg   assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
12647ec681f3Smrg
12657ec681f3Smrg   /* Destination can't be raster format */
12667ec681f3Smrg   if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
12677ec681f3Smrg      return false;
12687ec681f3Smrg
12697ec681f3Smrg   /* We can't copy D24S8 because buffer to image copies only copy one aspect
12707ec681f3Smrg    * at a time, and the TFU copies full images. Also, V3D depth bits for
12717ec681f3Smrg    * both D24S8 and D24X8 stored in the 24-bit MSB of each 32-bit word, but
12727ec681f3Smrg    * the Vulkan spec has the buffer data specified the other way around, so it
12737ec681f3Smrg    * is not a straight copy, we would havew to swizzle the channels, which the
12747ec681f3Smrg    * TFU can't do.
12757ec681f3Smrg    */
12767ec681f3Smrg   if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
12777ec681f3Smrg       image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
12787ec681f3Smrg         return false;
12797ec681f3Smrg   }
12807ec681f3Smrg
12817ec681f3Smrg   /* Region must include full slice */
12827ec681f3Smrg   const uint32_t offset_x = region->imageOffset.x;
12837ec681f3Smrg   const uint32_t offset_y = region->imageOffset.y;
12847ec681f3Smrg   if (offset_x != 0 || offset_y != 0)
12857ec681f3Smrg      return false;
12867ec681f3Smrg
12877ec681f3Smrg   uint32_t width, height;
12887ec681f3Smrg   if (region->bufferRowLength == 0)
12897ec681f3Smrg      width = region->imageExtent.width;
12907ec681f3Smrg   else
12917ec681f3Smrg      width = region->bufferRowLength;
12927ec681f3Smrg
12937ec681f3Smrg   if (region->bufferImageHeight == 0)
12947ec681f3Smrg      height = region->imageExtent.height;
12957ec681f3Smrg   else
12967ec681f3Smrg      height = region->bufferImageHeight;
12977ec681f3Smrg
12987ec681f3Smrg   if (width != image->vk.extent.width || height != image->vk.extent.height)
12997ec681f3Smrg      return false;
13007ec681f3Smrg
13017ec681f3Smrg   /* Handle region semantics for compressed images */
13027ec681f3Smrg   const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
13037ec681f3Smrg   const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
13047ec681f3Smrg   width = DIV_ROUND_UP(width, block_w);
13057ec681f3Smrg   height = DIV_ROUND_UP(height, block_h);
13067ec681f3Smrg
13077ec681f3Smrg   /* Format must be supported for texturing via the TFU. Since we are just
13087ec681f3Smrg    * copying raw data and not converting between pixel formats, we can ignore
13097ec681f3Smrg    * the image's format and choose a compatible TFU format for the image
13107ec681f3Smrg    * texel size instead, which expands the list of formats we can handle here.
13117ec681f3Smrg    */
13127ec681f3Smrg   const struct v3dv_format *format =
13137ec681f3Smrg      v3dv_get_compatible_tfu_format(cmd_buffer->device,
13147ec681f3Smrg                                     image->cpp, NULL);
13157ec681f3Smrg
13167ec681f3Smrg   const uint32_t mip_level = region->imageSubresource.mipLevel;
13177ec681f3Smrg   const struct v3d_resource_slice *slice = &image->slices[mip_level];
13187ec681f3Smrg
13197ec681f3Smrg   uint32_t num_layers;
13207ec681f3Smrg   if (image->vk.image_type != VK_IMAGE_TYPE_3D)
13217ec681f3Smrg      num_layers = region->imageSubresource.layerCount;
13227ec681f3Smrg   else
13237ec681f3Smrg      num_layers = region->imageExtent.depth;
13247ec681f3Smrg   assert(num_layers > 0);
13257ec681f3Smrg
13267ec681f3Smrg   assert(image->mem && image->mem->bo);
13277ec681f3Smrg   const struct v3dv_bo *dst_bo = image->mem->bo;
13287ec681f3Smrg
13297ec681f3Smrg   assert(buffer->mem && buffer->mem->bo);
13307ec681f3Smrg   const struct v3dv_bo *src_bo = buffer->mem->bo;
13317ec681f3Smrg
13327ec681f3Smrg   /* Emit a TFU job per layer to copy */
13337ec681f3Smrg   const uint32_t buffer_stride = width * image->cpp;
13347ec681f3Smrg   for (int i = 0; i < num_layers; i++) {
13357ec681f3Smrg      uint32_t layer;
13367ec681f3Smrg      if (image->vk.image_type != VK_IMAGE_TYPE_3D)
13377ec681f3Smrg         layer = region->imageSubresource.baseArrayLayer + i;
13387ec681f3Smrg      else
13397ec681f3Smrg         layer = region->imageOffset.z + i;
13407ec681f3Smrg
13417ec681f3Smrg      struct drm_v3d_submit_tfu tfu = {
13427ec681f3Smrg         .ios = (height << 16) | width,
13437ec681f3Smrg         .bo_handles = {
13447ec681f3Smrg            dst_bo->handle,
13457ec681f3Smrg            src_bo->handle != dst_bo->handle ? src_bo->handle : 0
13467ec681f3Smrg         },
13477ec681f3Smrg      };
13487ec681f3Smrg
13497ec681f3Smrg      const uint32_t buffer_offset =
13507ec681f3Smrg         buffer->mem_offset + region->bufferOffset +
13517ec681f3Smrg         height * buffer_stride * i;
13527ec681f3Smrg
13537ec681f3Smrg      const uint32_t src_offset = src_bo->offset + buffer_offset;
13547ec681f3Smrg      tfu.iia |= src_offset;
13557ec681f3Smrg      tfu.icfg |= V3D_TFU_ICFG_FORMAT_RASTER << V3D_TFU_ICFG_FORMAT_SHIFT;
13567ec681f3Smrg      tfu.iis |= width;
13577ec681f3Smrg
13587ec681f3Smrg      const uint32_t dst_offset =
13597ec681f3Smrg         dst_bo->offset + v3dv_layer_offset(image, mip_level, layer);
13607ec681f3Smrg      tfu.ioa |= dst_offset;
13617ec681f3Smrg
13627ec681f3Smrg      tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
13637ec681f3Smrg                  (slice->tiling - V3D_TILING_LINEARTILE)) <<
13647ec681f3Smrg                   V3D_TFU_IOA_FORMAT_SHIFT;
13657ec681f3Smrg      tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
13667ec681f3Smrg
13677ec681f3Smrg      /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
13687ec681f3Smrg       * OPAD field for the destination (how many extra UIF blocks beyond
13697ec681f3Smrg       * those necessary to cover the height).
13707ec681f3Smrg       */
13717ec681f3Smrg      if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
13727ec681f3Smrg          slice->tiling == V3D_TILING_UIF_XOR) {
13737ec681f3Smrg         uint32_t uif_block_h = 2 * v3d_utile_height(image->cpp);
13747ec681f3Smrg         uint32_t implicit_padded_height = align(height, uif_block_h);
13757ec681f3Smrg         uint32_t icfg =
13767ec681f3Smrg            (slice->padded_height - implicit_padded_height) / uif_block_h;
13777ec681f3Smrg         tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;
13787ec681f3Smrg      }
13797ec681f3Smrg
13807ec681f3Smrg      v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
13817ec681f3Smrg   }
13827ec681f3Smrg
13837ec681f3Smrg   return true;
13847ec681f3Smrg}
13857ec681f3Smrg
13867ec681f3Smrg/**
13877ec681f3Smrg * Returns true if the implementation supports the requested operation (even if
13887ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error).
13897ec681f3Smrg */
13907ec681f3Smrgstatic bool
13917ec681f3Smrgcopy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
13927ec681f3Smrg                         struct v3dv_image *image,
13937ec681f3Smrg                         struct v3dv_buffer *buffer,
13947ec681f3Smrg                         const VkBufferImageCopy2KHR *region)
13957ec681f3Smrg{
13967ec681f3Smrg   VkFormat fb_format;
13977ec681f3Smrg   if (!v3dv_meta_can_use_tlb(image, &region->imageOffset, &fb_format))
13987ec681f3Smrg      return false;
13997ec681f3Smrg
14007ec681f3Smrg   uint32_t internal_type, internal_bpp;
14017ec681f3Smrg   v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
14027ec681f3Smrg      (fb_format, region->imageSubresource.aspectMask,
14037ec681f3Smrg       &internal_type, &internal_bpp);
14047ec681f3Smrg
14057ec681f3Smrg   uint32_t num_layers;
14067ec681f3Smrg   if (image->vk.image_type != VK_IMAGE_TYPE_3D)
14077ec681f3Smrg      num_layers = region->imageSubresource.layerCount;
14087ec681f3Smrg   else
14097ec681f3Smrg      num_layers = region->imageExtent.depth;
14107ec681f3Smrg   assert(num_layers > 0);
14117ec681f3Smrg
14127ec681f3Smrg   struct v3dv_job *job =
14137ec681f3Smrg      v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
14147ec681f3Smrg   if (!job)
14157ec681f3Smrg      return true;
14167ec681f3Smrg
14177ec681f3Smrg   /* Handle copy to compressed format using a compatible format */
14187ec681f3Smrg   const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
14197ec681f3Smrg   const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
14207ec681f3Smrg   const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
14217ec681f3Smrg   const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
14227ec681f3Smrg
14237ec681f3Smrg   v3dv_job_start_frame(job, width, height, num_layers, false,
14247ec681f3Smrg                        1, internal_bpp, false);
14257ec681f3Smrg
14267ec681f3Smrg   struct v3dv_meta_framebuffer framebuffer;
14277ec681f3Smrg   v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
14287ec681f3Smrg                                              internal_type, &job->frame_tiling);
14297ec681f3Smrg
14307ec681f3Smrg   v3dv_X(job->device, job_emit_binning_flush)(job);
14317ec681f3Smrg   v3dv_X(job->device, meta_emit_copy_buffer_to_image_rcl)
14327ec681f3Smrg      (job, image, buffer, &framebuffer, region);
14337ec681f3Smrg
14347ec681f3Smrg   v3dv_cmd_buffer_finish_job(cmd_buffer);
14357ec681f3Smrg
14367ec681f3Smrg   return true;
14377ec681f3Smrg}
14387ec681f3Smrg
14397ec681f3Smrgstatic bool
14407ec681f3Smrgcreate_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
14417ec681f3Smrg                               struct v3dv_image *image,
14427ec681f3Smrg                               struct v3dv_buffer *buffer,
14437ec681f3Smrg                               const VkBufferImageCopy2KHR *region)
14447ec681f3Smrg{
14457ec681f3Smrg   if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))
14467ec681f3Smrg      return true;
14477ec681f3Smrg   if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))
14487ec681f3Smrg      return true;
14497ec681f3Smrg   return false;
14507ec681f3Smrg}
14517ec681f3Smrg
14527ec681f3Smrgstatic VkResult
14537ec681f3Smrgcreate_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
14547ec681f3Smrg{
14557ec681f3Smrg   /* If this is not the first pool we create for this command buffer
14567ec681f3Smrg    * size it based on the size of the currently exhausted pool.
14577ec681f3Smrg    */
14587ec681f3Smrg   uint32_t descriptor_count = 64;
14597ec681f3Smrg   if (cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE) {
14607ec681f3Smrg      struct v3dv_descriptor_pool *exhausted_pool =
14617ec681f3Smrg         v3dv_descriptor_pool_from_handle(cmd_buffer->meta.texel_buffer_copy.dspool);
14627ec681f3Smrg      descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
14637ec681f3Smrg   }
14647ec681f3Smrg
14657ec681f3Smrg   /* Create the descriptor pool */
14667ec681f3Smrg   cmd_buffer->meta.texel_buffer_copy.dspool = VK_NULL_HANDLE;
14677ec681f3Smrg   VkDescriptorPoolSize pool_size = {
14687ec681f3Smrg      .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
14697ec681f3Smrg      .descriptorCount = descriptor_count,
14707ec681f3Smrg   };
14717ec681f3Smrg   VkDescriptorPoolCreateInfo info = {
14727ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
14737ec681f3Smrg      .maxSets = descriptor_count,
14747ec681f3Smrg      .poolSizeCount = 1,
14757ec681f3Smrg      .pPoolSizes = &pool_size,
14767ec681f3Smrg      .flags = 0,
14777ec681f3Smrg   };
14787ec681f3Smrg   VkResult result =
14797ec681f3Smrg      v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
14807ec681f3Smrg                                &info,
14817ec681f3Smrg                                &cmd_buffer->device->vk.alloc,
14827ec681f3Smrg                                &cmd_buffer->meta.texel_buffer_copy.dspool);
14837ec681f3Smrg
14847ec681f3Smrg   if (result == VK_SUCCESS) {
14857ec681f3Smrg      assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
14867ec681f3Smrg      const VkDescriptorPool _pool = cmd_buffer->meta.texel_buffer_copy.dspool;
14877ec681f3Smrg
14887ec681f3Smrg      v3dv_cmd_buffer_add_private_obj(
14897ec681f3Smrg         cmd_buffer, (uintptr_t) _pool,
14907ec681f3Smrg         (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
14917ec681f3Smrg
14927ec681f3Smrg      struct v3dv_descriptor_pool *pool =
14937ec681f3Smrg         v3dv_descriptor_pool_from_handle(_pool);
14947ec681f3Smrg      pool->is_driver_internal = true;
14957ec681f3Smrg   }
14967ec681f3Smrg
14977ec681f3Smrg   return result;
14987ec681f3Smrg}
14997ec681f3Smrg
15007ec681f3Smrgstatic VkResult
15017ec681f3Smrgallocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
15027ec681f3Smrg                                          VkDescriptorSet *set)
15037ec681f3Smrg{
15047ec681f3Smrg   /* Make sure we have a descriptor pool */
15057ec681f3Smrg   VkResult result;
15067ec681f3Smrg   if (cmd_buffer->meta.texel_buffer_copy.dspool == VK_NULL_HANDLE) {
15077ec681f3Smrg      result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
15087ec681f3Smrg      if (result != VK_SUCCESS)
15097ec681f3Smrg         return result;
15107ec681f3Smrg   }
15117ec681f3Smrg   assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
15127ec681f3Smrg
15137ec681f3Smrg   /* Allocate descriptor set */
15147ec681f3Smrg   struct v3dv_device *device = cmd_buffer->device;
15157ec681f3Smrg   VkDevice _device = v3dv_device_to_handle(device);
15167ec681f3Smrg   VkDescriptorSetAllocateInfo info = {
15177ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
15187ec681f3Smrg      .descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool,
15197ec681f3Smrg      .descriptorSetCount = 1,
15207ec681f3Smrg      .pSetLayouts = &device->meta.texel_buffer_copy.ds_layout,
15217ec681f3Smrg   };
15227ec681f3Smrg   result = v3dv_AllocateDescriptorSets(_device, &info, set);
15237ec681f3Smrg
15247ec681f3Smrg   /* If we ran out of pool space, grow the pool and try again */
15257ec681f3Smrg   if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
15267ec681f3Smrg      result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
15277ec681f3Smrg      if (result == VK_SUCCESS) {
15287ec681f3Smrg         info.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool;
15297ec681f3Smrg         result = v3dv_AllocateDescriptorSets(_device, &info, set);
15307ec681f3Smrg      }
15317ec681f3Smrg   }
15327ec681f3Smrg
15337ec681f3Smrg   return result;
15347ec681f3Smrg}
15357ec681f3Smrg
15367ec681f3Smrgstatic void
15377ec681f3Smrgget_texel_buffer_copy_pipeline_cache_key(VkFormat format,
15387ec681f3Smrg                                         VkColorComponentFlags cmask,
15397ec681f3Smrg                                         VkComponentMapping *cswizzle,
15407ec681f3Smrg                                         bool is_layered,
15417ec681f3Smrg                                         uint8_t *key)
15427ec681f3Smrg{
15437ec681f3Smrg   memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
15447ec681f3Smrg
15457ec681f3Smrg   uint32_t *p = (uint32_t *) key;
15467ec681f3Smrg
15477ec681f3Smrg   *p = format;
15487ec681f3Smrg   p++;
15497ec681f3Smrg
15507ec681f3Smrg   *p = cmask;
15517ec681f3Smrg   p++;
15527ec681f3Smrg
15537ec681f3Smrg   /* Note that that we are using a single byte for this, so we could pack
15547ec681f3Smrg    * more data into this 32-bit slot in the future.
15557ec681f3Smrg    */
15567ec681f3Smrg   *p = is_layered ? 1 : 0;
15577ec681f3Smrg   p++;
15587ec681f3Smrg
15597ec681f3Smrg   memcpy(p, cswizzle, sizeof(VkComponentMapping));
15607ec681f3Smrg   p += sizeof(VkComponentMapping) / sizeof(uint32_t);
15617ec681f3Smrg
15627ec681f3Smrg   assert(((uint8_t*)p - key) == V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
15637ec681f3Smrg}
15647ec681f3Smrg
15657ec681f3Smrgstatic bool
15667ec681f3Smrgcreate_blit_render_pass(struct v3dv_device *device,
15677ec681f3Smrg                        VkFormat dst_format,
15687ec681f3Smrg                        VkFormat src_format,
15697ec681f3Smrg                        VkRenderPass *pass_load,
15707ec681f3Smrg                        VkRenderPass *pass_no_load);
15717ec681f3Smrg
15727ec681f3Smrgstatic nir_ssa_def *gen_rect_vertices(nir_builder *b);
15737ec681f3Smrg
15747ec681f3Smrgstatic bool
15757ec681f3Smrgcreate_pipeline(struct v3dv_device *device,
15767ec681f3Smrg                struct v3dv_render_pass *pass,
15777ec681f3Smrg                struct nir_shader *vs_nir,
15787ec681f3Smrg                struct nir_shader *gs_nir,
15797ec681f3Smrg                struct nir_shader *fs_nir,
15807ec681f3Smrg                const VkPipelineVertexInputStateCreateInfo *vi_state,
15817ec681f3Smrg                const VkPipelineDepthStencilStateCreateInfo *ds_state,
15827ec681f3Smrg                const VkPipelineColorBlendStateCreateInfo *cb_state,
15837ec681f3Smrg                const VkPipelineMultisampleStateCreateInfo *ms_state,
15847ec681f3Smrg                const VkPipelineLayout layout,
15857ec681f3Smrg                VkPipeline *pipeline);
15867ec681f3Smrg
15877ec681f3Smrgstatic nir_shader *
15887ec681f3Smrgget_texel_buffer_copy_vs()
15897ec681f3Smrg{
15907ec681f3Smrg   const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
15917ec681f3Smrg   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
15927ec681f3Smrg                                                  "meta texel buffer copy vs");
15937ec681f3Smrg   nir_variable *vs_out_pos =
15947ec681f3Smrg      nir_variable_create(b.shader, nir_var_shader_out,
15957ec681f3Smrg                          glsl_vec4_type(), "gl_Position");
15967ec681f3Smrg   vs_out_pos->data.location = VARYING_SLOT_POS;
15977ec681f3Smrg
15987ec681f3Smrg   nir_ssa_def *pos = gen_rect_vertices(&b);
15997ec681f3Smrg   nir_store_var(&b, vs_out_pos, pos, 0xf);
16007ec681f3Smrg
16017ec681f3Smrg   return b.shader;
16027ec681f3Smrg}
16037ec681f3Smrg
16047ec681f3Smrgstatic nir_shader *
16057ec681f3Smrgget_texel_buffer_copy_gs()
16067ec681f3Smrg{
16077ec681f3Smrg   /* FIXME: this creates a geometry shader that takes the index of a single
16087ec681f3Smrg    * layer to clear from push constants, so we need to emit a draw call for
16097ec681f3Smrg    * each layer that we want to clear. We could actually do better and have it
16107ec681f3Smrg    * take a range of layers however, if we were to do this, we would need to
16117ec681f3Smrg    * be careful not to exceed the maximum number of output vertices allowed in
16127ec681f3Smrg    * a geometry shader.
16137ec681f3Smrg    */
16147ec681f3Smrg   const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
16157ec681f3Smrg   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
16167ec681f3Smrg                                                  "meta texel buffer copy gs");
16177ec681f3Smrg   nir_shader *nir = b.shader;
16187ec681f3Smrg   nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
16197ec681f3Smrg   nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
16207ec681f3Smrg                               (1ull << VARYING_SLOT_LAYER);
16217ec681f3Smrg   nir->info.gs.input_primitive = GL_TRIANGLES;
16227ec681f3Smrg   nir->info.gs.output_primitive = GL_TRIANGLE_STRIP;
16237ec681f3Smrg   nir->info.gs.vertices_in = 3;
16247ec681f3Smrg   nir->info.gs.vertices_out = 3;
16257ec681f3Smrg   nir->info.gs.invocations = 1;
16267ec681f3Smrg   nir->info.gs.active_stream_mask = 0x1;
16277ec681f3Smrg
16287ec681f3Smrg   /* in vec4 gl_Position[3] */
16297ec681f3Smrg   nir_variable *gs_in_pos =
16307ec681f3Smrg      nir_variable_create(b.shader, nir_var_shader_in,
16317ec681f3Smrg                          glsl_array_type(glsl_vec4_type(), 3, 0),
16327ec681f3Smrg                          "in_gl_Position");
16337ec681f3Smrg   gs_in_pos->data.location = VARYING_SLOT_POS;
16347ec681f3Smrg
16357ec681f3Smrg   /* out vec4 gl_Position */
16367ec681f3Smrg   nir_variable *gs_out_pos =
16377ec681f3Smrg      nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
16387ec681f3Smrg                          "out_gl_Position");
16397ec681f3Smrg   gs_out_pos->data.location = VARYING_SLOT_POS;
16407ec681f3Smrg
16417ec681f3Smrg   /* out float gl_Layer */
16427ec681f3Smrg   nir_variable *gs_out_layer =
16437ec681f3Smrg      nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
16447ec681f3Smrg                          "out_gl_Layer");
16457ec681f3Smrg   gs_out_layer->data.location = VARYING_SLOT_LAYER;
16467ec681f3Smrg
16477ec681f3Smrg   /* Emit output triangle */
16487ec681f3Smrg   for (uint32_t i = 0; i < 3; i++) {
16497ec681f3Smrg      /* gl_Position from shader input */
16507ec681f3Smrg      nir_deref_instr *in_pos_i =
16517ec681f3Smrg         nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
16527ec681f3Smrg      nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
16537ec681f3Smrg
16547ec681f3Smrg      /* gl_Layer from push constants */
16557ec681f3Smrg      nir_ssa_def *layer =
16567ec681f3Smrg         nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
16577ec681f3Smrg                                .base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,
16587ec681f3Smrg                                .range = 4);
16597ec681f3Smrg      nir_store_var(&b, gs_out_layer, layer, 0x1);
16607ec681f3Smrg
16617ec681f3Smrg      nir_emit_vertex(&b, 0);
16627ec681f3Smrg   }
16637ec681f3Smrg
16647ec681f3Smrg   nir_end_primitive(&b, 0);
16657ec681f3Smrg
16667ec681f3Smrg   return nir;
16677ec681f3Smrg}
16687ec681f3Smrg
16697ec681f3Smrgstatic nir_ssa_def *
16707ec681f3Smrgload_frag_coord(nir_builder *b)
16717ec681f3Smrg{
16727ec681f3Smrg   nir_foreach_shader_in_variable(var, b->shader) {
16737ec681f3Smrg      if (var->data.location == VARYING_SLOT_POS)
16747ec681f3Smrg         return nir_load_var(b, var);
16757ec681f3Smrg   }
16767ec681f3Smrg   nir_variable *pos = nir_variable_create(b->shader, nir_var_shader_in,
16777ec681f3Smrg                                           glsl_vec4_type(), NULL);
16787ec681f3Smrg   pos->data.location = VARYING_SLOT_POS;
16797ec681f3Smrg   return nir_load_var(b, pos);
16807ec681f3Smrg}
16817ec681f3Smrg
16827ec681f3Smrgstatic uint32_t
16837ec681f3Smrgcomponent_swizzle_to_nir_swizzle(VkComponentSwizzle comp, VkComponentSwizzle swz)
16847ec681f3Smrg{
16857ec681f3Smrg   if (swz == VK_COMPONENT_SWIZZLE_IDENTITY)
16867ec681f3Smrg      swz = comp;
16877ec681f3Smrg
16887ec681f3Smrg   switch (swz) {
16897ec681f3Smrg   case VK_COMPONENT_SWIZZLE_R:
16907ec681f3Smrg      return 0;
16917ec681f3Smrg   case VK_COMPONENT_SWIZZLE_G:
16927ec681f3Smrg      return 1;
16937ec681f3Smrg   case VK_COMPONENT_SWIZZLE_B:
16947ec681f3Smrg      return 2;
16957ec681f3Smrg   case VK_COMPONENT_SWIZZLE_A:
16967ec681f3Smrg      return 3;
16977ec681f3Smrg   default:
16987ec681f3Smrg      unreachable("Invalid swizzle");
16997ec681f3Smrg   };
17007ec681f3Smrg}
17017ec681f3Smrg
17027ec681f3Smrgstatic nir_shader *
17037ec681f3Smrgget_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
17047ec681f3Smrg                         VkComponentMapping *cswizzle)
17057ec681f3Smrg{
17067ec681f3Smrg   const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
17077ec681f3Smrg   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
17087ec681f3Smrg                                                  "meta texel buffer copy fs");
17097ec681f3Smrg
17107ec681f3Smrg   /* We only use the copy from texel buffer shader to implement
17117ec681f3Smrg    * copy_buffer_to_image_shader, which always selects a compatible integer
17127ec681f3Smrg    * format for the copy.
17137ec681f3Smrg    */
17147ec681f3Smrg   assert(vk_format_is_int(format));
17157ec681f3Smrg
17167ec681f3Smrg   /* Fragment shader output color */
17177ec681f3Smrg   nir_variable *fs_out_color =
17187ec681f3Smrg      nir_variable_create(b.shader, nir_var_shader_out,
17197ec681f3Smrg                          glsl_uvec4_type(), "out_color");
17207ec681f3Smrg   fs_out_color->data.location = FRAG_RESULT_DATA0;
17217ec681f3Smrg
17227ec681f3Smrg   /* Texel buffer input */
17237ec681f3Smrg   const struct glsl_type *sampler_type =
17247ec681f3Smrg      glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
17257ec681f3Smrg   nir_variable *sampler =
17267ec681f3Smrg      nir_variable_create(b.shader, nir_var_uniform, sampler_type, "texel_buf");
17277ec681f3Smrg   sampler->data.descriptor_set = 0;
17287ec681f3Smrg   sampler->data.binding = 0;
17297ec681f3Smrg
17307ec681f3Smrg   /* Load the box describing the pixel region we want to copy from the
17317ec681f3Smrg    * texel buffer.
17327ec681f3Smrg    */
17337ec681f3Smrg   nir_ssa_def *box =
17347ec681f3Smrg      nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),
17357ec681f3Smrg                             .base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,
17367ec681f3Smrg                             .range = 16);
17377ec681f3Smrg
17387ec681f3Smrg   /* Load the buffer stride (this comes in texel units) */
17397ec681f3Smrg   nir_ssa_def *stride =
17407ec681f3Smrg      nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
17417ec681f3Smrg                             .base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,
17427ec681f3Smrg                             .range = 4);
17437ec681f3Smrg
17447ec681f3Smrg   /* Load the buffer offset (this comes in texel units) */
17457ec681f3Smrg   nir_ssa_def *offset =
17467ec681f3Smrg      nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
17477ec681f3Smrg                             .base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,
17487ec681f3Smrg                             .range = 4);
17497ec681f3Smrg
17507ec681f3Smrg   nir_ssa_def *coord = nir_f2i32(&b, load_frag_coord(&b));
17517ec681f3Smrg
17527ec681f3Smrg   /* Load pixel data from texel buffer based on the x,y offset of the pixel
17537ec681f3Smrg    * within the box. Texel buffers are 1D arrays of texels.
17547ec681f3Smrg    *
17557ec681f3Smrg    * Notice that we already make sure that we only generate fragments that are
17567ec681f3Smrg    * inside the box through the scissor/viewport state, so our offset into the
17577ec681f3Smrg    * texel buffer should always be within its bounds and we we don't need
17587ec681f3Smrg    * to add a check for that here.
17597ec681f3Smrg    */
17607ec681f3Smrg   nir_ssa_def *x_offset =
17617ec681f3Smrg      nir_isub(&b, nir_channel(&b, coord, 0),
17627ec681f3Smrg                   nir_channel(&b, box, 0));
17637ec681f3Smrg   nir_ssa_def *y_offset =
17647ec681f3Smrg      nir_isub(&b, nir_channel(&b, coord, 1),
17657ec681f3Smrg                   nir_channel(&b, box, 1));
17667ec681f3Smrg   nir_ssa_def *texel_offset =
17677ec681f3Smrg      nir_iadd(&b, nir_iadd(&b, offset, x_offset),
17687ec681f3Smrg                   nir_imul(&b, y_offset, stride));
17697ec681f3Smrg
17707ec681f3Smrg   nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
17717ec681f3Smrg   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
17727ec681f3Smrg   tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
17737ec681f3Smrg   tex->op = nir_texop_txf;
17747ec681f3Smrg   tex->src[0].src_type = nir_tex_src_coord;
17757ec681f3Smrg   tex->src[0].src = nir_src_for_ssa(texel_offset);
17767ec681f3Smrg   tex->src[1].src_type = nir_tex_src_texture_deref;
17777ec681f3Smrg   tex->src[1].src = nir_src_for_ssa(tex_deref);
17787ec681f3Smrg   tex->dest_type = nir_type_uint32;
17797ec681f3Smrg   tex->is_array = false;
17807ec681f3Smrg   tex->coord_components = 1;
17817ec681f3Smrg   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "texel buffer result");
17827ec681f3Smrg   nir_builder_instr_insert(&b, &tex->instr);
17837ec681f3Smrg
17847ec681f3Smrg   uint32_t swiz[4];
17857ec681f3Smrg   swiz[0] =
17867ec681f3Smrg      component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_R, cswizzle->r);
17877ec681f3Smrg   swiz[1] =
17887ec681f3Smrg      component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_G, cswizzle->g);
17897ec681f3Smrg   swiz[2] =
17907ec681f3Smrg      component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);
17917ec681f3Smrg   swiz[3] =
17927ec681f3Smrg      component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);
17937ec681f3Smrg   nir_ssa_def *s = nir_swizzle(&b, &tex->dest.ssa, swiz, 4);
17947ec681f3Smrg   nir_store_var(&b, fs_out_color, s, 0xf);
17957ec681f3Smrg
17967ec681f3Smrg   return b.shader;
17977ec681f3Smrg}
17987ec681f3Smrg
17997ec681f3Smrgstatic bool
18007ec681f3Smrgcreate_texel_buffer_copy_pipeline(struct v3dv_device *device,
18017ec681f3Smrg                                  VkFormat format,
18027ec681f3Smrg                                  VkColorComponentFlags cmask,
18037ec681f3Smrg                                  VkComponentMapping *cswizzle,
18047ec681f3Smrg                                  bool is_layered,
18057ec681f3Smrg                                  VkRenderPass _pass,
18067ec681f3Smrg                                  VkPipelineLayout pipeline_layout,
18077ec681f3Smrg                                  VkPipeline *pipeline)
18087ec681f3Smrg{
18097ec681f3Smrg   struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
18107ec681f3Smrg
18117ec681f3Smrg   assert(vk_format_is_color(format));
18127ec681f3Smrg
18137ec681f3Smrg   nir_shader *vs_nir = get_texel_buffer_copy_vs();
18147ec681f3Smrg   nir_shader *fs_nir = get_texel_buffer_copy_fs(device, format, cswizzle);
18157ec681f3Smrg   nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs() : NULL;
18167ec681f3Smrg
18177ec681f3Smrg   const VkPipelineVertexInputStateCreateInfo vi_state = {
18187ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
18197ec681f3Smrg      .vertexBindingDescriptionCount = 0,
18207ec681f3Smrg      .vertexAttributeDescriptionCount = 0,
18217ec681f3Smrg   };
18227ec681f3Smrg
18237ec681f3Smrg   VkPipelineDepthStencilStateCreateInfo ds_state = {
18247ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
18257ec681f3Smrg   };
18267ec681f3Smrg
18277ec681f3Smrg   VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
18287ec681f3Smrg   blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
18297ec681f3Smrg      .blendEnable = false,
18307ec681f3Smrg      .colorWriteMask = cmask,
18317ec681f3Smrg   };
18327ec681f3Smrg
18337ec681f3Smrg   const VkPipelineColorBlendStateCreateInfo cb_state = {
18347ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
18357ec681f3Smrg      .logicOpEnable = false,
18367ec681f3Smrg      .attachmentCount = 1,
18377ec681f3Smrg      .pAttachments = blend_att_state
18387ec681f3Smrg   };
18397ec681f3Smrg
18407ec681f3Smrg   const VkPipelineMultisampleStateCreateInfo ms_state = {
18417ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
18427ec681f3Smrg      .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
18437ec681f3Smrg      .sampleShadingEnable = false,
18447ec681f3Smrg      .pSampleMask = NULL,
18457ec681f3Smrg      .alphaToCoverageEnable = false,
18467ec681f3Smrg      .alphaToOneEnable = false,
18477ec681f3Smrg   };
18487ec681f3Smrg
18497ec681f3Smrg   return create_pipeline(device,
18507ec681f3Smrg                          pass,
18517ec681f3Smrg                          vs_nir, gs_nir, fs_nir,
18527ec681f3Smrg                          &vi_state,
18537ec681f3Smrg                          &ds_state,
18547ec681f3Smrg                          &cb_state,
18557ec681f3Smrg                          &ms_state,
18567ec681f3Smrg                          pipeline_layout,
18577ec681f3Smrg                          pipeline);
18587ec681f3Smrg}
18597ec681f3Smrg
18607ec681f3Smrgstatic bool
18617ec681f3Smrgget_copy_texel_buffer_pipeline(
18627ec681f3Smrg   struct v3dv_device *device,
18637ec681f3Smrg   VkFormat format,
18647ec681f3Smrg   VkColorComponentFlags cmask,
18657ec681f3Smrg   VkComponentMapping *cswizzle,
18667ec681f3Smrg   VkImageType image_type,
18677ec681f3Smrg   bool is_layered,
18687ec681f3Smrg   struct v3dv_meta_texel_buffer_copy_pipeline **pipeline)
18697ec681f3Smrg{
18707ec681f3Smrg   bool ok = true;
18717ec681f3Smrg
18727ec681f3Smrg   uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
18737ec681f3Smrg   get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered,
18747ec681f3Smrg                                            key);
18757ec681f3Smrg
18767ec681f3Smrg   mtx_lock(&device->meta.mtx);
18777ec681f3Smrg   struct hash_entry *entry =
18787ec681f3Smrg      _mesa_hash_table_search(device->meta.texel_buffer_copy.cache[image_type],
18797ec681f3Smrg                              &key);
18807ec681f3Smrg   if (entry) {
18817ec681f3Smrg      mtx_unlock(&device->meta.mtx);
18827ec681f3Smrg      *pipeline = entry->data;
18837ec681f3Smrg      return true;
18847ec681f3Smrg   }
18857ec681f3Smrg
18867ec681f3Smrg   *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
18877ec681f3Smrg                          VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
18887ec681f3Smrg
18897ec681f3Smrg   if (*pipeline == NULL)
18907ec681f3Smrg      goto fail;
18917ec681f3Smrg
18927ec681f3Smrg   /* The blit render pass is compatible */
18937ec681f3Smrg   ok = create_blit_render_pass(device, format, format,
18947ec681f3Smrg                                &(*pipeline)->pass,
18957ec681f3Smrg                                &(*pipeline)->pass_no_load);
18967ec681f3Smrg   if (!ok)
18977ec681f3Smrg      goto fail;
18987ec681f3Smrg
18997ec681f3Smrg   ok =
19007ec681f3Smrg      create_texel_buffer_copy_pipeline(device,
19017ec681f3Smrg                                        format, cmask, cswizzle, is_layered,
19027ec681f3Smrg                                        (*pipeline)->pass,
19037ec681f3Smrg                                        device->meta.texel_buffer_copy.p_layout,
19047ec681f3Smrg                                        &(*pipeline)->pipeline);
19057ec681f3Smrg   if (!ok)
19067ec681f3Smrg      goto fail;
19077ec681f3Smrg
19087ec681f3Smrg   _mesa_hash_table_insert(device->meta.texel_buffer_copy.cache[image_type],
19097ec681f3Smrg                           &key, *pipeline);
19107ec681f3Smrg
19117ec681f3Smrg   mtx_unlock(&device->meta.mtx);
19127ec681f3Smrg   return true;
19137ec681f3Smrg
19147ec681f3Smrgfail:
19157ec681f3Smrg   mtx_unlock(&device->meta.mtx);
19167ec681f3Smrg
19177ec681f3Smrg   VkDevice _device = v3dv_device_to_handle(device);
19187ec681f3Smrg   if (*pipeline) {
19197ec681f3Smrg      if ((*pipeline)->pass)
19207ec681f3Smrg         v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
19217ec681f3Smrg      if ((*pipeline)->pipeline)
19227ec681f3Smrg         v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
19237ec681f3Smrg      vk_free(&device->vk.alloc, *pipeline);
19247ec681f3Smrg      *pipeline = NULL;
19257ec681f3Smrg   }
19267ec681f3Smrg
19277ec681f3Smrg   return false;
19287ec681f3Smrg}
19297ec681f3Smrg
19307ec681f3Smrgstatic bool
19317ec681f3Smrgtexel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
19327ec681f3Smrg                         VkImageAspectFlags aspect,
19337ec681f3Smrg                         struct v3dv_image *image,
19347ec681f3Smrg                         VkFormat dst_format,
19357ec681f3Smrg                         VkFormat src_format,
19367ec681f3Smrg                         struct v3dv_buffer *buffer,
19377ec681f3Smrg                         uint32_t buffer_bpp,
19387ec681f3Smrg                         VkColorComponentFlags cmask,
19397ec681f3Smrg                         VkComponentMapping *cswizzle,
19407ec681f3Smrg                         uint32_t region_count,
19417ec681f3Smrg                         const VkBufferImageCopy2KHR *regions)
19427ec681f3Smrg{
19437ec681f3Smrg   VkResult result;
19447ec681f3Smrg   bool handled = false;
19457ec681f3Smrg
19467ec681f3Smrg   assert(cswizzle);
19477ec681f3Smrg
19487ec681f3Smrg   /* This is a copy path, so we don't handle format conversions. The only
19497ec681f3Smrg    * exception are stencil to D24S8 copies, which are handled as a color
19507ec681f3Smrg    * masked R8->RGBA8 copy.
19517ec681f3Smrg    */
19527ec681f3Smrg   assert(src_format == dst_format ||
19537ec681f3Smrg          (dst_format == VK_FORMAT_R8G8B8A8_UINT &&
19547ec681f3Smrg           src_format == VK_FORMAT_R8_UINT &&
19557ec681f3Smrg           cmask == VK_COLOR_COMPONENT_R_BIT));
19567ec681f3Smrg
19577ec681f3Smrg   /* We only handle color copies. Callers can copy D/S aspects by using
19587ec681f3Smrg    * a compatible color format and maybe a cmask/cswizzle for D24 formats.
19597ec681f3Smrg    */
19607ec681f3Smrg   if (aspect != VK_IMAGE_ASPECT_COLOR_BIT)
19617ec681f3Smrg      return handled;
19627ec681f3Smrg
19637ec681f3Smrg   /* FIXME: we only handle uncompressed images for now. */
19647ec681f3Smrg   if (vk_format_is_compressed(image->vk.format))
19657ec681f3Smrg      return handled;
19667ec681f3Smrg
19677ec681f3Smrg   const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
19687ec681f3Smrg                                            VK_COLOR_COMPONENT_G_BIT |
19697ec681f3Smrg                                            VK_COLOR_COMPONENT_B_BIT |
19707ec681f3Smrg                                            VK_COLOR_COMPONENT_A_BIT;
19717ec681f3Smrg   if (cmask == 0)
19727ec681f3Smrg      cmask = full_cmask;
19737ec681f3Smrg
19747ec681f3Smrg   /* The buffer needs to have VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT
19757ec681f3Smrg    * so we can bind it as a texel buffer. Otherwise, the buffer view
19767ec681f3Smrg    * we create below won't setup the texture state that we need for this.
19777ec681f3Smrg    */
19787ec681f3Smrg   if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) {
19797ec681f3Smrg      if (v3dv_buffer_format_supports_features(
19807ec681f3Smrg             cmd_buffer->device, src_format,
19817ec681f3Smrg             VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) {
19827ec681f3Smrg         buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
19837ec681f3Smrg      } else {
19847ec681f3Smrg         return handled;
19857ec681f3Smrg      }
19867ec681f3Smrg   }
19877ec681f3Smrg
19887ec681f3Smrg   /* At this point we should be able to handle the copy unless an unexpected
19897ec681f3Smrg    * error occurs, such as an OOM.
19907ec681f3Smrg    */
19917ec681f3Smrg   handled = true;
19927ec681f3Smrg
19937ec681f3Smrg
19947ec681f3Smrg   /* Compute the number of layers to copy.
19957ec681f3Smrg    *
19967ec681f3Smrg    * If we are batching (region_count > 1) all our regions have the same
19977ec681f3Smrg    * image subresource so we can take this from the first region. For 3D
19987ec681f3Smrg    * images we require the same depth extent.
19997ec681f3Smrg    */
20007ec681f3Smrg   const VkImageSubresourceLayers *resource = &regions[0].imageSubresource;
20017ec681f3Smrg   uint32_t num_layers;
20027ec681f3Smrg   if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
20037ec681f3Smrg      num_layers = resource->layerCount;
20047ec681f3Smrg   } else {
20057ec681f3Smrg      assert(region_count == 1);
20067ec681f3Smrg      num_layers = regions[0].imageExtent.depth;
20077ec681f3Smrg   }
20087ec681f3Smrg   assert(num_layers > 0);
20097ec681f3Smrg
20107ec681f3Smrg   /* Get the texel buffer copy pipeline */
20117ec681f3Smrg   struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL;
20127ec681f3Smrg   bool ok = get_copy_texel_buffer_pipeline(cmd_buffer->device,
20137ec681f3Smrg                                            dst_format, cmask, cswizzle,
20147ec681f3Smrg                                            image->vk.image_type, num_layers > 1,
20157ec681f3Smrg                                            &pipeline);
20167ec681f3Smrg   if (!ok)
20177ec681f3Smrg      return handled;
20187ec681f3Smrg   assert(pipeline && pipeline->pipeline && pipeline->pass);
20197ec681f3Smrg
20207ec681f3Smrg   /* Setup descriptor set for the source texel buffer. We don't have to
20217ec681f3Smrg    * register the descriptor as a private command buffer object since
20227ec681f3Smrg    * all descriptors will be freed automatically with the descriptor
20237ec681f3Smrg    * pool.
20247ec681f3Smrg    */
20257ec681f3Smrg   VkDescriptorSet set;
20267ec681f3Smrg   result = allocate_texel_buffer_copy_descriptor_set(cmd_buffer, &set);
20277ec681f3Smrg   if (result != VK_SUCCESS)
20287ec681f3Smrg      return handled;
20297ec681f3Smrg
20307ec681f3Smrg   /* FIXME: for some reason passing region->bufferOffset here for the
20317ec681f3Smrg    * offset field doesn't work, making the following CTS tests fail:
20327ec681f3Smrg    *
20337ec681f3Smrg    * dEQP-VK.api.copy_and_blit.core.buffer_to_image.*buffer_offset*
20347ec681f3Smrg    *
20357ec681f3Smrg    * So instead we pass 0 here and we pass the offset in texels as a push
20367ec681f3Smrg    * constant to the shader, which seems to work correctly.
20377ec681f3Smrg    */
20387ec681f3Smrg   VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
20397ec681f3Smrg   VkBufferViewCreateInfo buffer_view_info = {
20407ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
20417ec681f3Smrg      .buffer = v3dv_buffer_to_handle(buffer),
20427ec681f3Smrg      .format = src_format,
20437ec681f3Smrg      .offset = 0,
20447ec681f3Smrg      .range = VK_WHOLE_SIZE,
20457ec681f3Smrg   };
20467ec681f3Smrg
20477ec681f3Smrg   VkBufferView texel_buffer_view;
20487ec681f3Smrg   result = v3dv_CreateBufferView(_device, &buffer_view_info,
20497ec681f3Smrg                                  &cmd_buffer->device->vk.alloc,
20507ec681f3Smrg                                  &texel_buffer_view);
20517ec681f3Smrg   if (result != VK_SUCCESS)
20527ec681f3Smrg      return handled;
20537ec681f3Smrg
20547ec681f3Smrg   v3dv_cmd_buffer_add_private_obj(
20557ec681f3Smrg      cmd_buffer, (uintptr_t)texel_buffer_view,
20567ec681f3Smrg      (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyBufferView);
20577ec681f3Smrg
20587ec681f3Smrg   VkWriteDescriptorSet write = {
20597ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
20607ec681f3Smrg      .dstSet = set,
20617ec681f3Smrg      .dstBinding = 0,
20627ec681f3Smrg      .dstArrayElement = 0,
20637ec681f3Smrg      .descriptorCount = 1,
20647ec681f3Smrg      .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
20657ec681f3Smrg      .pTexelBufferView = &texel_buffer_view,
20667ec681f3Smrg   };
20677ec681f3Smrg   v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
20687ec681f3Smrg
20697ec681f3Smrg   /* Push command buffer state before starting meta operation */
20707ec681f3Smrg   v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
20717ec681f3Smrg   uint32_t dirty_dynamic_state = 0;
20727ec681f3Smrg
20737ec681f3Smrg   /* Bind common state for all layers and regions  */
20747ec681f3Smrg   VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
20757ec681f3Smrg   v3dv_CmdBindPipeline(_cmd_buffer,
20767ec681f3Smrg                        VK_PIPELINE_BIND_POINT_GRAPHICS,
20777ec681f3Smrg                        pipeline->pipeline);
20787ec681f3Smrg
20797ec681f3Smrg   v3dv_CmdBindDescriptorSets(_cmd_buffer,
20807ec681f3Smrg                              VK_PIPELINE_BIND_POINT_GRAPHICS,
20817ec681f3Smrg                              cmd_buffer->device->meta.texel_buffer_copy.p_layout,
20827ec681f3Smrg                              0, 1, &set,
20837ec681f3Smrg                              0, NULL);
20847ec681f3Smrg
20857ec681f3Smrg   /* Setup framebuffer.
20867ec681f3Smrg    *
20877ec681f3Smrg    * For 3D images, this creates a layered framebuffer with a number of
20887ec681f3Smrg    * layers matching the depth extent of the 3D image.
20897ec681f3Smrg    */
20907ec681f3Smrg   uint32_t fb_width = u_minify(image->vk.extent.width, resource->mipLevel);
20917ec681f3Smrg   uint32_t fb_height = u_minify(image->vk.extent.height, resource->mipLevel);
20927ec681f3Smrg   VkImageViewCreateInfo image_view_info = {
20937ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
20947ec681f3Smrg      .image = v3dv_image_to_handle(image),
20957ec681f3Smrg      .viewType = v3dv_image_type_to_view_type(image->vk.image_type),
20967ec681f3Smrg      .format = dst_format,
20977ec681f3Smrg      .subresourceRange = {
20987ec681f3Smrg         .aspectMask = aspect,
20997ec681f3Smrg         .baseMipLevel = resource->mipLevel,
21007ec681f3Smrg         .levelCount = 1,
21017ec681f3Smrg         .baseArrayLayer = resource->baseArrayLayer,
21027ec681f3Smrg         .layerCount = num_layers,
21037ec681f3Smrg      },
21047ec681f3Smrg   };
21057ec681f3Smrg   VkImageView image_view;
21067ec681f3Smrg   result = v3dv_CreateImageView(_device, &image_view_info,
21077ec681f3Smrg                                 &cmd_buffer->device->vk.alloc, &image_view);
21087ec681f3Smrg   if (result != VK_SUCCESS)
21097ec681f3Smrg      goto fail;
21107ec681f3Smrg
21117ec681f3Smrg   v3dv_cmd_buffer_add_private_obj(
21127ec681f3Smrg      cmd_buffer, (uintptr_t)image_view,
21137ec681f3Smrg      (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
21147ec681f3Smrg
21157ec681f3Smrg   VkFramebufferCreateInfo fb_info = {
21167ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
21177ec681f3Smrg      .renderPass = pipeline->pass,
21187ec681f3Smrg      .attachmentCount = 1,
21197ec681f3Smrg      .pAttachments = &image_view,
21207ec681f3Smrg      .width = fb_width,
21217ec681f3Smrg      .height = fb_height,
21227ec681f3Smrg      .layers = num_layers,
21237ec681f3Smrg   };
21247ec681f3Smrg
21257ec681f3Smrg   VkFramebuffer fb;
21267ec681f3Smrg   result = v3dv_CreateFramebuffer(_device, &fb_info,
21277ec681f3Smrg                                   &cmd_buffer->device->vk.alloc, &fb);
21287ec681f3Smrg   if (result != VK_SUCCESS)
21297ec681f3Smrg      goto fail;
21307ec681f3Smrg
21317ec681f3Smrg    v3dv_cmd_buffer_add_private_obj(
21327ec681f3Smrg       cmd_buffer, (uintptr_t)fb,
21337ec681f3Smrg       (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
21347ec681f3Smrg
21357ec681f3Smrg   /* For each layer */
21367ec681f3Smrg   for (uint32_t l = 0; l < num_layers; l++) {
21377ec681f3Smrg       /* Start render pass for this layer.
21387ec681f3Smrg        *
21397ec681f3Smrg        * If the we only have one region to copy, then we might be able to
21407ec681f3Smrg        * skip the TLB load if it is aligned to tile boundaries. All layers
21417ec681f3Smrg        * copy the same area, so we only need to check this once.
21427ec681f3Smrg        */
21437ec681f3Smrg      bool can_skip_tlb_load = false;
21447ec681f3Smrg      VkRect2D render_area;
21457ec681f3Smrg      if (region_count == 1) {
21467ec681f3Smrg         render_area.offset.x = regions[0].imageOffset.x;
21477ec681f3Smrg         render_area.offset.y = regions[0].imageOffset.y;
21487ec681f3Smrg         render_area.extent.width = regions[0].imageExtent.width;
21497ec681f3Smrg         render_area.extent.height = regions[0].imageExtent.height;
21507ec681f3Smrg
21517ec681f3Smrg         if (l == 0) {
21527ec681f3Smrg            struct v3dv_render_pass *pipeline_pass =
21537ec681f3Smrg               v3dv_render_pass_from_handle(pipeline->pass);
21547ec681f3Smrg            can_skip_tlb_load =
21557ec681f3Smrg               cmask == full_cmask &&
21567ec681f3Smrg               v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
21577ec681f3Smrg                                                 v3dv_framebuffer_from_handle(fb),
21587ec681f3Smrg                                                 pipeline_pass, 0);
21597ec681f3Smrg         }
21607ec681f3Smrg      } else {
21617ec681f3Smrg         render_area.offset.x = 0;
21627ec681f3Smrg         render_area.offset.y = 0;
21637ec681f3Smrg         render_area.extent.width = fb_width;
21647ec681f3Smrg         render_area.extent.height = fb_height;
21657ec681f3Smrg      }
21667ec681f3Smrg
21677ec681f3Smrg      VkRenderPassBeginInfo rp_info = {
21687ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
21697ec681f3Smrg         .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
21707ec681f3Smrg                                           pipeline->pass,
21717ec681f3Smrg         .framebuffer = fb,
21727ec681f3Smrg         .renderArea = render_area,
21737ec681f3Smrg         .clearValueCount = 0,
21747ec681f3Smrg      };
21757ec681f3Smrg
21767ec681f3Smrg      v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE);
21777ec681f3Smrg      struct v3dv_job *job = cmd_buffer->state.job;
21787ec681f3Smrg      if (!job)
21797ec681f3Smrg         goto fail;
21807ec681f3Smrg
21817ec681f3Smrg      /* If we are using a layered copy we need to specify the layer for the
21827ec681f3Smrg       * Geometry Shader.
21837ec681f3Smrg       */
21847ec681f3Smrg      if (num_layers > 1) {
21857ec681f3Smrg         uint32_t layer = resource->baseArrayLayer + l;
21867ec681f3Smrg         v3dv_CmdPushConstants(_cmd_buffer,
21877ec681f3Smrg                               cmd_buffer->device->meta.texel_buffer_copy.p_layout,
21887ec681f3Smrg                               VK_SHADER_STAGE_GEOMETRY_BIT,
21897ec681f3Smrg                               24, 4, &layer);
21907ec681f3Smrg      }
21917ec681f3Smrg
21927ec681f3Smrg      /* For each region */
21937ec681f3Smrg      dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
21947ec681f3Smrg      for (uint32_t r = 0; r < region_count; r++) {
21957ec681f3Smrg         const VkBufferImageCopy2KHR *region = &regions[r];
21967ec681f3Smrg
21977ec681f3Smrg         /* Obtain the 2D buffer region spec */
21987ec681f3Smrg         uint32_t buf_width, buf_height;
21997ec681f3Smrg         if (region->bufferRowLength == 0)
22007ec681f3Smrg             buf_width = region->imageExtent.width;
22017ec681f3Smrg         else
22027ec681f3Smrg             buf_width = region->bufferRowLength;
22037ec681f3Smrg
22047ec681f3Smrg         if (region->bufferImageHeight == 0)
22057ec681f3Smrg             buf_height = region->imageExtent.height;
22067ec681f3Smrg         else
22077ec681f3Smrg             buf_height = region->bufferImageHeight;
22087ec681f3Smrg
22097ec681f3Smrg         const VkViewport viewport = {
22107ec681f3Smrg            .x = region->imageOffset.x,
22117ec681f3Smrg            .y = region->imageOffset.y,
22127ec681f3Smrg            .width = region->imageExtent.width,
22137ec681f3Smrg            .height = region->imageExtent.height,
22147ec681f3Smrg            .minDepth = 0.0f,
22157ec681f3Smrg            .maxDepth = 1.0f
22167ec681f3Smrg         };
22177ec681f3Smrg         v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
22187ec681f3Smrg         const VkRect2D scissor = {
22197ec681f3Smrg            .offset = { region->imageOffset.x, region->imageOffset.y },
22207ec681f3Smrg            .extent = { region->imageExtent.width, region->imageExtent.height }
22217ec681f3Smrg         };
22227ec681f3Smrg         v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
22237ec681f3Smrg
22247ec681f3Smrg         const VkDeviceSize buf_offset =
22257ec681f3Smrg            region->bufferOffset / buffer_bpp  + l * buf_height * buf_width;
22267ec681f3Smrg         uint32_t push_data[6] = {
22277ec681f3Smrg            region->imageOffset.x,
22287ec681f3Smrg            region->imageOffset.y,
22297ec681f3Smrg            region->imageOffset.x + region->imageExtent.width - 1,
22307ec681f3Smrg            region->imageOffset.y + region->imageExtent.height - 1,
22317ec681f3Smrg            buf_width,
22327ec681f3Smrg            buf_offset,
22337ec681f3Smrg         };
22347ec681f3Smrg
22357ec681f3Smrg         v3dv_CmdPushConstants(_cmd_buffer,
22367ec681f3Smrg                               cmd_buffer->device->meta.texel_buffer_copy.p_layout,
22377ec681f3Smrg                               VK_SHADER_STAGE_FRAGMENT_BIT,
22387ec681f3Smrg                               0, sizeof(push_data), &push_data);
22397ec681f3Smrg
22407ec681f3Smrg         v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
22417ec681f3Smrg      } /* For each region */
22427ec681f3Smrg
22437ec681f3Smrg      v3dv_CmdEndRenderPass(_cmd_buffer);
22447ec681f3Smrg   } /* For each layer */
22457ec681f3Smrg
22467ec681f3Smrgfail:
22477ec681f3Smrg   v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
22487ec681f3Smrg   return handled;
22497ec681f3Smrg}
22507ec681f3Smrg
22517ec681f3Smrg/**
22527ec681f3Smrg * Returns true if the implementation supports the requested operation (even if
22537ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error).
22547ec681f3Smrg */
22557ec681f3Smrgstatic bool
22567ec681f3Smrgcopy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
22577ec681f3Smrg                          VkImageAspectFlags aspect,
22587ec681f3Smrg                          struct v3dv_image *image,
22597ec681f3Smrg                          VkFormat dst_format,
22607ec681f3Smrg                          VkFormat src_format,
22617ec681f3Smrg                          struct v3dv_buffer *buffer,
22627ec681f3Smrg                          uint32_t buffer_bpp,
22637ec681f3Smrg                          VkColorComponentFlags cmask,
22647ec681f3Smrg                          VkComponentMapping *cswizzle,
22657ec681f3Smrg                          uint32_t region_count,
22667ec681f3Smrg                          const VkBufferImageCopy2KHR *regions)
22677ec681f3Smrg{
22687ec681f3Smrg   /* Since we can't sample linear images we need to upload the linear
22697ec681f3Smrg    * buffer to a tiled image that we can use as a blit source, which
22707ec681f3Smrg    * is slow.
22717ec681f3Smrg    */
22727ec681f3Smrg   perf_debug("Falling back to blit path for buffer to image copy.\n");
22737ec681f3Smrg
22747ec681f3Smrg   struct v3dv_device *device = cmd_buffer->device;
22757ec681f3Smrg   VkDevice _device = v3dv_device_to_handle(device);
22767ec681f3Smrg   bool handled = true;
22777ec681f3Smrg
22787ec681f3Smrg   /* Allocate memory for the tiled image. Since we copy layer by layer
22797ec681f3Smrg    * we allocate memory to hold a full layer, which is the worse case.
22807ec681f3Smrg    * For that we create a dummy image with that spec, get memory requirements
22817ec681f3Smrg    * for it and use that information to create the memory allocation.
22827ec681f3Smrg    * We will then reuse this memory store for all the regions we want to
22837ec681f3Smrg    * copy.
22847ec681f3Smrg    */
22857ec681f3Smrg   VkImage dummy_image;
22867ec681f3Smrg   VkImageCreateInfo dummy_info = {
22877ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
22887ec681f3Smrg      .imageType = VK_IMAGE_TYPE_2D,
22897ec681f3Smrg      .format = src_format,
22907ec681f3Smrg      .extent = { image->vk.extent.width, image->vk.extent.height, 1 },
22917ec681f3Smrg      .mipLevels = 1,
22927ec681f3Smrg      .arrayLayers = 1,
22937ec681f3Smrg      .samples = VK_SAMPLE_COUNT_1_BIT,
22947ec681f3Smrg      .tiling = VK_IMAGE_TILING_OPTIMAL,
22957ec681f3Smrg      .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
22967ec681f3Smrg               VK_IMAGE_USAGE_TRANSFER_DST_BIT,
22977ec681f3Smrg      .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
22987ec681f3Smrg      .queueFamilyIndexCount = 0,
22997ec681f3Smrg      .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
23007ec681f3Smrg   };
23017ec681f3Smrg   VkResult result =
23027ec681f3Smrg      v3dv_CreateImage(_device, &dummy_info, &device->vk.alloc, &dummy_image);
23037ec681f3Smrg   if (result != VK_SUCCESS)
23047ec681f3Smrg      return handled;
23057ec681f3Smrg
23067ec681f3Smrg   VkMemoryRequirements reqs;
23077ec681f3Smrg   vk_common_GetImageMemoryRequirements(_device, dummy_image, &reqs);
23087ec681f3Smrg   v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc);
23097ec681f3Smrg
23107ec681f3Smrg   VkDeviceMemory mem;
23117ec681f3Smrg   VkMemoryAllocateInfo alloc_info = {
23127ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
23137ec681f3Smrg      .allocationSize = reqs.size,
23147ec681f3Smrg      .memoryTypeIndex = 0,
23157ec681f3Smrg   };
23167ec681f3Smrg   result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, &mem);
23177ec681f3Smrg   if (result != VK_SUCCESS)
23187ec681f3Smrg      return handled;
23197ec681f3Smrg
23207ec681f3Smrg   v3dv_cmd_buffer_add_private_obj(
23217ec681f3Smrg      cmd_buffer, (uintptr_t)mem,
23227ec681f3Smrg      (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
23237ec681f3Smrg
23247ec681f3Smrg   /* Obtain the layer count.
23257ec681f3Smrg    *
23267ec681f3Smrg    * If we are batching (region_count > 1) all our regions have the same
23277ec681f3Smrg    * image subresource so we can take this from the first region.
23287ec681f3Smrg    */
23297ec681f3Smrg   uint32_t num_layers;
23307ec681f3Smrg   if (image->vk.image_type != VK_IMAGE_TYPE_3D)
23317ec681f3Smrg      num_layers = regions[0].imageSubresource.layerCount;
23327ec681f3Smrg   else
23337ec681f3Smrg      num_layers = regions[0].imageExtent.depth;
23347ec681f3Smrg   assert(num_layers > 0);
23357ec681f3Smrg
23367ec681f3Smrg   /* Sanity check: we can only batch multiple regions together if they have
23377ec681f3Smrg    * the same framebuffer (so the same layer).
23387ec681f3Smrg    */
23397ec681f3Smrg   assert(num_layers == 1 || region_count == 1);
23407ec681f3Smrg
23417ec681f3Smrg   const uint32_t block_width = vk_format_get_blockwidth(image->vk.format);
23427ec681f3Smrg   const uint32_t block_height = vk_format_get_blockheight(image->vk.format);
23437ec681f3Smrg
23447ec681f3Smrg   /* Copy regions by uploading each region to a temporary tiled image using
23457ec681f3Smrg    * the memory we have just allocated as storage.
23467ec681f3Smrg    */
23477ec681f3Smrg   for (uint32_t r = 0; r < region_count; r++) {
23487ec681f3Smrg      const VkBufferImageCopy2KHR *region = &regions[r];
23497ec681f3Smrg
23507ec681f3Smrg      /* Obtain the 2D buffer region spec */
23517ec681f3Smrg      uint32_t buf_width, buf_height;
23527ec681f3Smrg      if (region->bufferRowLength == 0)
23537ec681f3Smrg          buf_width = region->imageExtent.width;
23547ec681f3Smrg      else
23557ec681f3Smrg          buf_width = region->bufferRowLength;
23567ec681f3Smrg
23577ec681f3Smrg      if (region->bufferImageHeight == 0)
23587ec681f3Smrg          buf_height = region->imageExtent.height;
23597ec681f3Smrg      else
23607ec681f3Smrg          buf_height = region->bufferImageHeight;
23617ec681f3Smrg
23627ec681f3Smrg      /* If the image is compressed, the bpp refers to blocks, not pixels */
23637ec681f3Smrg      buf_width = buf_width / block_width;
23647ec681f3Smrg      buf_height = buf_height / block_height;
23657ec681f3Smrg
23667ec681f3Smrg      for (uint32_t i = 0; i < num_layers; i++) {
23677ec681f3Smrg         /* Create the tiled image */
23687ec681f3Smrg         VkImageCreateInfo image_info = {
23697ec681f3Smrg            .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
23707ec681f3Smrg            .imageType = VK_IMAGE_TYPE_2D,
23717ec681f3Smrg            .format = src_format,
23727ec681f3Smrg            .extent = { buf_width, buf_height, 1 },
23737ec681f3Smrg            .mipLevels = 1,
23747ec681f3Smrg            .arrayLayers = 1,
23757ec681f3Smrg            .samples = VK_SAMPLE_COUNT_1_BIT,
23767ec681f3Smrg            .tiling = VK_IMAGE_TILING_OPTIMAL,
23777ec681f3Smrg            .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
23787ec681f3Smrg                     VK_IMAGE_USAGE_TRANSFER_DST_BIT,
23797ec681f3Smrg            .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
23807ec681f3Smrg            .queueFamilyIndexCount = 0,
23817ec681f3Smrg            .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
23827ec681f3Smrg         };
23837ec681f3Smrg
23847ec681f3Smrg         VkImage buffer_image;
23857ec681f3Smrg         VkResult result =
23867ec681f3Smrg            v3dv_CreateImage(_device, &image_info, &device->vk.alloc,
23877ec681f3Smrg                             &buffer_image);
23887ec681f3Smrg         if (result != VK_SUCCESS)
23897ec681f3Smrg            return handled;
23907ec681f3Smrg
23917ec681f3Smrg         v3dv_cmd_buffer_add_private_obj(
23927ec681f3Smrg            cmd_buffer, (uintptr_t)buffer_image,
23937ec681f3Smrg            (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
23947ec681f3Smrg
23957ec681f3Smrg         result = vk_common_BindImageMemory(_device, buffer_image, mem, 0);
23967ec681f3Smrg         if (result != VK_SUCCESS)
23977ec681f3Smrg            return handled;
23987ec681f3Smrg
23997ec681f3Smrg         /* Upload buffer contents for the selected layer */
24007ec681f3Smrg         const VkDeviceSize buf_offset_bytes =
24017ec681f3Smrg            region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
24027ec681f3Smrg         const VkBufferImageCopy2KHR buffer_image_copy = {
24037ec681f3Smrg            .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2_KHR,
24047ec681f3Smrg            .bufferOffset = buf_offset_bytes,
24057ec681f3Smrg            .bufferRowLength = region->bufferRowLength / block_width,
24067ec681f3Smrg            .bufferImageHeight = region->bufferImageHeight / block_height,
24077ec681f3Smrg            .imageSubresource = {
24087ec681f3Smrg               .aspectMask = aspect,
24097ec681f3Smrg               .mipLevel = 0,
24107ec681f3Smrg               .baseArrayLayer = 0,
24117ec681f3Smrg               .layerCount = 1,
24127ec681f3Smrg            },
24137ec681f3Smrg            .imageOffset = { 0, 0, 0 },
24147ec681f3Smrg            .imageExtent = { buf_width, buf_height, 1 }
24157ec681f3Smrg         };
24167ec681f3Smrg         handled =
24177ec681f3Smrg            create_tiled_image_from_buffer(cmd_buffer,
24187ec681f3Smrg                                           v3dv_image_from_handle(buffer_image),
24197ec681f3Smrg                                           buffer, &buffer_image_copy);
24207ec681f3Smrg         if (!handled) {
24217ec681f3Smrg            /* This is unexpected, we should have setup the upload to be
24227ec681f3Smrg             * conformant to a TFU or TLB copy.
24237ec681f3Smrg             */
24247ec681f3Smrg            unreachable("Unable to copy buffer to image through TLB");
24257ec681f3Smrg            return false;
24267ec681f3Smrg         }
24277ec681f3Smrg
24287ec681f3Smrg         /* Blit-copy the requested image extent from the buffer image to the
24297ec681f3Smrg          * destination image.
24307ec681f3Smrg          *
24317ec681f3Smrg          * Since we are copying, the blit must use the same format on the
24327ec681f3Smrg          * destination and source images to avoid format conversions. The
24337ec681f3Smrg          * only exception is copying stencil, which we upload to a R8UI source
24347ec681f3Smrg          * image, but that we need to blit to a S8D24 destination (the only
24357ec681f3Smrg          * stencil format we support).
24367ec681f3Smrg          */
24377ec681f3Smrg         const VkImageBlit2KHR blit_region = {
24387ec681f3Smrg            .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
24397ec681f3Smrg            .srcSubresource = {
24407ec681f3Smrg               .aspectMask = aspect,
24417ec681f3Smrg               .mipLevel = 0,
24427ec681f3Smrg               .baseArrayLayer = 0,
24437ec681f3Smrg               .layerCount = 1,
24447ec681f3Smrg            },
24457ec681f3Smrg            .srcOffsets = {
24467ec681f3Smrg               { 0, 0, 0 },
24477ec681f3Smrg               { region->imageExtent.width, region->imageExtent.height, 1 },
24487ec681f3Smrg            },
24497ec681f3Smrg            .dstSubresource = {
24507ec681f3Smrg               .aspectMask = aspect,
24517ec681f3Smrg               .mipLevel = region->imageSubresource.mipLevel,
24527ec681f3Smrg               .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
24537ec681f3Smrg               .layerCount = 1,
24547ec681f3Smrg            },
24557ec681f3Smrg            .dstOffsets = {
24567ec681f3Smrg               {
24577ec681f3Smrg                  DIV_ROUND_UP(region->imageOffset.x, block_width),
24587ec681f3Smrg                  DIV_ROUND_UP(region->imageOffset.y, block_height),
24597ec681f3Smrg                  region->imageOffset.z + i,
24607ec681f3Smrg               },
24617ec681f3Smrg               {
24627ec681f3Smrg                  DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
24637ec681f3Smrg                               block_width),
24647ec681f3Smrg                  DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
24657ec681f3Smrg                               block_height),
24667ec681f3Smrg                  region->imageOffset.z + i + 1,
24677ec681f3Smrg               },
24687ec681f3Smrg            },
24697ec681f3Smrg         };
24707ec681f3Smrg
24717ec681f3Smrg         handled = blit_shader(cmd_buffer,
24727ec681f3Smrg                               image, dst_format,
24737ec681f3Smrg                               v3dv_image_from_handle(buffer_image), src_format,
24747ec681f3Smrg                               cmask, cswizzle,
24757ec681f3Smrg                               &blit_region, VK_FILTER_NEAREST, true);
24767ec681f3Smrg         if (!handled) {
24777ec681f3Smrg            /* This is unexpected, we should have a supported blit spec */
24787ec681f3Smrg            unreachable("Unable to blit buffer to destination image");
24797ec681f3Smrg            return false;
24807ec681f3Smrg         }
24817ec681f3Smrg      }
24827ec681f3Smrg   }
24837ec681f3Smrg
24847ec681f3Smrg   return handled;
24857ec681f3Smrg}
24867ec681f3Smrg
24877ec681f3Smrg/**
24887ec681f3Smrg * Returns true if the implementation supports the requested operation (even if
24897ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error).
24907ec681f3Smrg */
24917ec681f3Smrgstatic bool
24927ec681f3Smrgcopy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
24937ec681f3Smrg                            struct v3dv_image *image,
24947ec681f3Smrg                            struct v3dv_buffer *buffer,
24957ec681f3Smrg                            uint32_t region_count,
24967ec681f3Smrg                            const VkBufferImageCopy2KHR *regions,
24977ec681f3Smrg                            bool use_texel_buffer)
24987ec681f3Smrg{
24997ec681f3Smrg   /* We can only call this with region_count > 1 if we can batch the regions
25007ec681f3Smrg    * together, in which case they share the same image subresource, and so
25017ec681f3Smrg    * the same aspect.
25027ec681f3Smrg    */
25037ec681f3Smrg   VkImageAspectFlags aspect = regions[0].imageSubresource.aspectMask;
25047ec681f3Smrg
25057ec681f3Smrg   /* Generally, the bpp of the data in the buffer matches that of the
25067ec681f3Smrg    * destination image. The exception is the case where we are uploading
25077ec681f3Smrg    * stencil (8bpp) to a combined d24s8 image (32bpp).
25087ec681f3Smrg    */
25097ec681f3Smrg   uint32_t buf_bpp = image->cpp;
25107ec681f3Smrg
25117ec681f3Smrg   /* We are about to upload the buffer data to an image so we can then
25127ec681f3Smrg    * blit that to our destination region. Because we are going to implement
25137ec681f3Smrg    * the copy as a blit, we want our blit source and destination formats to be
25147ec681f3Smrg    * the same (to avoid any format conversions), so we choose a canonical
25157ec681f3Smrg    * format that matches the destination image bpp.
25167ec681f3Smrg    */
25177ec681f3Smrg   VkComponentMapping ident_swizzle = {
25187ec681f3Smrg      .r = VK_COMPONENT_SWIZZLE_IDENTITY,
25197ec681f3Smrg      .g = VK_COMPONENT_SWIZZLE_IDENTITY,
25207ec681f3Smrg      .b = VK_COMPONENT_SWIZZLE_IDENTITY,
25217ec681f3Smrg      .a = VK_COMPONENT_SWIZZLE_IDENTITY,
25227ec681f3Smrg   };
25237ec681f3Smrg
25247ec681f3Smrg   VkComponentMapping cswizzle = ident_swizzle;
25257ec681f3Smrg   VkColorComponentFlags cmask = 0; /* Write all components */
25267ec681f3Smrg   VkFormat src_format;
25277ec681f3Smrg   VkFormat dst_format;
25287ec681f3Smrg   switch (buf_bpp) {
25297ec681f3Smrg   case 16:
25307ec681f3Smrg      assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
25317ec681f3Smrg      src_format = VK_FORMAT_R32G32B32A32_UINT;
25327ec681f3Smrg      dst_format = src_format;
25337ec681f3Smrg      break;
25347ec681f3Smrg   case 8:
25357ec681f3Smrg      assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
25367ec681f3Smrg      src_format = VK_FORMAT_R16G16B16A16_UINT;
25377ec681f3Smrg      dst_format = src_format;
25387ec681f3Smrg      break;
25397ec681f3Smrg   case 4:
25407ec681f3Smrg      switch (aspect) {
25417ec681f3Smrg      case VK_IMAGE_ASPECT_COLOR_BIT:
25427ec681f3Smrg         src_format = VK_FORMAT_R8G8B8A8_UINT;
25437ec681f3Smrg         dst_format = src_format;
25447ec681f3Smrg         break;
25457ec681f3Smrg      case VK_IMAGE_ASPECT_DEPTH_BIT:
25467ec681f3Smrg         assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
25477ec681f3Smrg                image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
25487ec681f3Smrg                image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
25497ec681f3Smrg         src_format = VK_FORMAT_R8G8B8A8_UINT;
25507ec681f3Smrg         dst_format = src_format;
25517ec681f3Smrg         aspect = VK_IMAGE_ASPECT_COLOR_BIT;
25527ec681f3Smrg
25537ec681f3Smrg         /* For D24 formats, the Vulkan spec states that the depth component
25547ec681f3Smrg          * in the buffer is stored in the 24-LSB, but V3D wants it in the
25557ec681f3Smrg          * 24-MSB.
25567ec681f3Smrg          */
25577ec681f3Smrg         if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
25587ec681f3Smrg             image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
25597ec681f3Smrg            cmask = VK_COLOR_COMPONENT_G_BIT |
25607ec681f3Smrg                    VK_COLOR_COMPONENT_B_BIT |
25617ec681f3Smrg                    VK_COLOR_COMPONENT_A_BIT;
25627ec681f3Smrg            cswizzle.r = VK_COMPONENT_SWIZZLE_R;
25637ec681f3Smrg            cswizzle.g = VK_COMPONENT_SWIZZLE_R;
25647ec681f3Smrg            cswizzle.b = VK_COMPONENT_SWIZZLE_G;
25657ec681f3Smrg            cswizzle.a = VK_COMPONENT_SWIZZLE_B;
25667ec681f3Smrg         }
25677ec681f3Smrg         break;
25687ec681f3Smrg      case VK_IMAGE_ASPECT_STENCIL_BIT:
25697ec681f3Smrg         /* Since we don't support separate stencil this is always a stencil
25707ec681f3Smrg          * copy to a combined depth/stencil image. Because we don't support
25717ec681f3Smrg          * separate stencil images, we interpret the buffer data as a
25727ec681f3Smrg          * color R8UI image, and implement the blit as a compatible color
25737ec681f3Smrg          * blit to an RGBA8UI destination masking out writes to components
25747ec681f3Smrg          * GBA (which map to the D24 component of a S8D24 image).
25757ec681f3Smrg          */
25767ec681f3Smrg         assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
25777ec681f3Smrg         buf_bpp = 1;
25787ec681f3Smrg         src_format = VK_FORMAT_R8_UINT;
25797ec681f3Smrg         dst_format = VK_FORMAT_R8G8B8A8_UINT;
25807ec681f3Smrg         cmask = VK_COLOR_COMPONENT_R_BIT;
25817ec681f3Smrg         aspect = VK_IMAGE_ASPECT_COLOR_BIT;
25827ec681f3Smrg         break;
25837ec681f3Smrg      default:
25847ec681f3Smrg         unreachable("unsupported aspect");
25857ec681f3Smrg         return false;
25867ec681f3Smrg      };
25877ec681f3Smrg      break;
25887ec681f3Smrg   case 2:
25897ec681f3Smrg      aspect = VK_IMAGE_ASPECT_COLOR_BIT;
25907ec681f3Smrg      src_format = VK_FORMAT_R16_UINT;
25917ec681f3Smrg      dst_format = src_format;
25927ec681f3Smrg      break;
25937ec681f3Smrg   case 1:
25947ec681f3Smrg      assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
25957ec681f3Smrg      src_format = VK_FORMAT_R8_UINT;
25967ec681f3Smrg      dst_format = src_format;
25977ec681f3Smrg      break;
25987ec681f3Smrg   default:
25997ec681f3Smrg      unreachable("unsupported bit-size");
26007ec681f3Smrg      return false;
26017ec681f3Smrg   }
26027ec681f3Smrg
26037ec681f3Smrg   if (use_texel_buffer) {
26047ec681f3Smrg      return texel_buffer_shader_copy(cmd_buffer, aspect, image,
26057ec681f3Smrg                                      dst_format, src_format,
26067ec681f3Smrg                                      buffer, buf_bpp,
26077ec681f3Smrg                                      cmask, &cswizzle,
26087ec681f3Smrg                                      region_count, regions);
26097ec681f3Smrg   } else {
26107ec681f3Smrg      return copy_buffer_to_image_blit(cmd_buffer, aspect, image,
26117ec681f3Smrg                                       dst_format, src_format,
26127ec681f3Smrg                                       buffer, buf_bpp,
26137ec681f3Smrg                                       cmask, &cswizzle,
26147ec681f3Smrg                                       region_count, regions);
26157ec681f3Smrg   }
26167ec681f3Smrg}
26177ec681f3Smrg
26187ec681f3Smrg/**
26197ec681f3Smrg * Returns true if the implementation supports the requested operation (even if
26207ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error).
26217ec681f3Smrg */
26227ec681f3Smrgstatic bool
26237ec681f3Smrgcopy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer,
26247ec681f3Smrg                         struct v3dv_image *image,
26257ec681f3Smrg                         struct v3dv_buffer *buffer,
26267ec681f3Smrg                         const VkBufferImageCopy2KHR *region)
26277ec681f3Smrg{
26287ec681f3Smrg   /* FIXME */
26297ec681f3Smrg   if (vk_format_is_depth_or_stencil(image->vk.format))
26307ec681f3Smrg      return false;
26317ec681f3Smrg
26327ec681f3Smrg   if (vk_format_is_compressed(image->vk.format))
26337ec681f3Smrg      return false;
26347ec681f3Smrg
26357ec681f3Smrg   if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
26367ec681f3Smrg      return false;
26377ec681f3Smrg
26387ec681f3Smrg   uint32_t buffer_width, buffer_height;
26397ec681f3Smrg   if (region->bufferRowLength == 0)
26407ec681f3Smrg      buffer_width = region->imageExtent.width;
26417ec681f3Smrg   else
26427ec681f3Smrg      buffer_width = region->bufferRowLength;
26437ec681f3Smrg
26447ec681f3Smrg   if (region->bufferImageHeight == 0)
26457ec681f3Smrg      buffer_height = region->imageExtent.height;
26467ec681f3Smrg   else
26477ec681f3Smrg      buffer_height = region->bufferImageHeight;
26487ec681f3Smrg
26497ec681f3Smrg   uint32_t buffer_stride = buffer_width * image->cpp;
26507ec681f3Smrg   uint32_t buffer_layer_stride = buffer_stride * buffer_height;
26517ec681f3Smrg
26527ec681f3Smrg   uint32_t num_layers;
26537ec681f3Smrg   if (image->vk.image_type != VK_IMAGE_TYPE_3D)
26547ec681f3Smrg      num_layers = region->imageSubresource.layerCount;
26557ec681f3Smrg   else
26567ec681f3Smrg      num_layers = region->imageExtent.depth;
26577ec681f3Smrg   assert(num_layers > 0);
26587ec681f3Smrg
26597ec681f3Smrg   struct v3dv_job *job =
26607ec681f3Smrg      v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
26617ec681f3Smrg                                     V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
26627ec681f3Smrg                                     cmd_buffer, -1);
26637ec681f3Smrg   if (!job)
26647ec681f3Smrg      return true;
26657ec681f3Smrg
26667ec681f3Smrg   job->cpu.copy_buffer_to_image.image = image;
26677ec681f3Smrg   job->cpu.copy_buffer_to_image.buffer = buffer;
26687ec681f3Smrg   job->cpu.copy_buffer_to_image.buffer_stride = buffer_stride;
26697ec681f3Smrg   job->cpu.copy_buffer_to_image.buffer_layer_stride = buffer_layer_stride;
26707ec681f3Smrg   job->cpu.copy_buffer_to_image.buffer_offset = region->bufferOffset;
26717ec681f3Smrg   job->cpu.copy_buffer_to_image.image_extent = region->imageExtent;
26727ec681f3Smrg   job->cpu.copy_buffer_to_image.image_offset = region->imageOffset;
26737ec681f3Smrg   job->cpu.copy_buffer_to_image.mip_level =
26747ec681f3Smrg      region->imageSubresource.mipLevel;
26757ec681f3Smrg   job->cpu.copy_buffer_to_image.base_layer =
26767ec681f3Smrg      region->imageSubresource.baseArrayLayer;
26777ec681f3Smrg   job->cpu.copy_buffer_to_image.layer_count = num_layers;
26787ec681f3Smrg
26797ec681f3Smrg   list_addtail(&job->list_link, &cmd_buffer->jobs);
26807ec681f3Smrg
26817ec681f3Smrg   return true;
26827ec681f3Smrg}
26837ec681f3Smrg
26847ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
26857ec681f3Smrgv3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
26867ec681f3Smrg                              const VkCopyBufferToImageInfo2KHR *info)
26877ec681f3Smrg{
26887ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
26897ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->srcBuffer);
26907ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_image, image, info->dstImage);
26917ec681f3Smrg
26927ec681f3Smrg   assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
26937ec681f3Smrg
26947ec681f3Smrg   uint32_t r = 0;
26957ec681f3Smrg   while (r < info->regionCount) {
26967ec681f3Smrg      /* The TFU and TLB paths can only copy one region at a time and the region
26977ec681f3Smrg       * needs to start at the origin. We try these first for the common case
26987ec681f3Smrg       * where we are copying full images, since they should be the fastest.
26997ec681f3Smrg       */
27007ec681f3Smrg      uint32_t batch_size = 1;
27017ec681f3Smrg      if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r]))
27027ec681f3Smrg         goto handled;
27037ec681f3Smrg
27047ec681f3Smrg      if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r]))
27057ec681f3Smrg         goto handled;
27067ec681f3Smrg
27077ec681f3Smrg      /* Otherwise, we are copying subrects, so we fallback to copying
27087ec681f3Smrg       * via shader and texel buffers and we try to batch the regions
27097ec681f3Smrg       * if possible. We can only batch copies if they have the same
27107ec681f3Smrg       * framebuffer spec, which is mostly determined by the image
27117ec681f3Smrg       * subresource of the region.
27127ec681f3Smrg       */
27137ec681f3Smrg      const VkImageSubresourceLayers *rsc = &info->pRegions[r].imageSubresource;
27147ec681f3Smrg      for (uint32_t s = r + 1; s < info->regionCount; s++) {
27157ec681f3Smrg         const VkImageSubresourceLayers *rsc_s =
27167ec681f3Smrg            &info->pRegions[s].imageSubresource;
27177ec681f3Smrg
27187ec681f3Smrg         if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0)
27197ec681f3Smrg            break;
27207ec681f3Smrg
27217ec681f3Smrg         /* For 3D images we also need to check the depth extent */
27227ec681f3Smrg         if (image->vk.image_type == VK_IMAGE_TYPE_3D &&
27237ec681f3Smrg             info->pRegions[s].imageExtent.depth !=
27247ec681f3Smrg             info->pRegions[r].imageExtent.depth) {
27257ec681f3Smrg               break;
27267ec681f3Smrg         }
27277ec681f3Smrg
27287ec681f3Smrg         batch_size++;
27297ec681f3Smrg      }
27307ec681f3Smrg
27317ec681f3Smrg      if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
27327ec681f3Smrg                                      batch_size, &info->pRegions[r], true)) {
27337ec681f3Smrg         goto handled;
27347ec681f3Smrg      }
27357ec681f3Smrg
27367ec681f3Smrg      /* If we still could not copy, fallback to slower paths.
27377ec681f3Smrg       *
27387ec681f3Smrg       * FIXME: we could try to batch these too, but since they are bound to be
27397ec681f3Smrg       * slow it might not be worth it and we should instead put more effort
27407ec681f3Smrg       * in handling more cases with the other paths.
27417ec681f3Smrg       */
27427ec681f3Smrg      if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer,
27437ec681f3Smrg                                   &info->pRegions[r])) {
27447ec681f3Smrg         batch_size = 1;
27457ec681f3Smrg         goto handled;
27467ec681f3Smrg      }
27477ec681f3Smrg
27487ec681f3Smrg      if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
27497ec681f3Smrg                                      batch_size, &info->pRegions[r], false)) {
27507ec681f3Smrg         goto handled;
27517ec681f3Smrg      }
27527ec681f3Smrg
27537ec681f3Smrg      unreachable("Unsupported buffer to image copy.");
27547ec681f3Smrg
27557ec681f3Smrghandled:
27567ec681f3Smrg      r += batch_size;
27577ec681f3Smrg   }
27587ec681f3Smrg}
27597ec681f3Smrg
27607ec681f3Smrgstatic void
27617ec681f3Smrgcompute_blit_3d_layers(const VkOffset3D *offsets,
27627ec681f3Smrg                       uint32_t *min_layer, uint32_t *max_layer,
27637ec681f3Smrg                       bool *mirror_z);
27647ec681f3Smrg
27657ec681f3Smrg/**
27667ec681f3Smrg * Returns true if the implementation supports the requested operation (even if
27677ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error).
27687ec681f3Smrg *
27697ec681f3Smrg * The TFU blit path doesn't handle scaling so the blit filter parameter can
27707ec681f3Smrg * be ignored.
27717ec681f3Smrg */
27727ec681f3Smrgstatic bool
27737ec681f3Smrgblit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
27747ec681f3Smrg         struct v3dv_image *dst,
27757ec681f3Smrg         struct v3dv_image *src,
27767ec681f3Smrg         const VkImageBlit2KHR *region)
27777ec681f3Smrg{
27787ec681f3Smrg   assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
27797ec681f3Smrg   assert(src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
27807ec681f3Smrg
27817ec681f3Smrg   /* Format must match */
27827ec681f3Smrg   if (src->vk.format != dst->vk.format)
27837ec681f3Smrg      return false;
27847ec681f3Smrg
27857ec681f3Smrg   /* Destination can't be raster format */
27867ec681f3Smrg   if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR)
27877ec681f3Smrg      return false;
27887ec681f3Smrg
27897ec681f3Smrg   /* Source region must start at (0,0) */
27907ec681f3Smrg   if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0)
27917ec681f3Smrg      return false;
27927ec681f3Smrg
27937ec681f3Smrg   /* Destination image must be complete */
27947ec681f3Smrg   if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0)
27957ec681f3Smrg      return false;
27967ec681f3Smrg
27977ec681f3Smrg   const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
27987ec681f3Smrg   const uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
27997ec681f3Smrg   const uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
28007ec681f3Smrg   if (region->dstOffsets[1].x < dst_width - 1||
28017ec681f3Smrg       region->dstOffsets[1].y < dst_height - 1) {
28027ec681f3Smrg      return false;
28037ec681f3Smrg   }
28047ec681f3Smrg
28057ec681f3Smrg   /* No XY scaling */
28067ec681f3Smrg   if (region->srcOffsets[1].x != region->dstOffsets[1].x ||
28077ec681f3Smrg       region->srcOffsets[1].y != region->dstOffsets[1].y) {
28087ec681f3Smrg      return false;
28097ec681f3Smrg   }
28107ec681f3Smrg
28117ec681f3Smrg   /* If the format is D24S8 both aspects need to be copied, since the TFU
28127ec681f3Smrg    * can't be programmed to copy only one aspect of the image.
28137ec681f3Smrg    */
28147ec681f3Smrg   if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
28157ec681f3Smrg       const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
28167ec681f3Smrg                                             VK_IMAGE_ASPECT_STENCIL_BIT;
28177ec681f3Smrg       if (region->dstSubresource.aspectMask != ds_aspects)
28187ec681f3Smrg          return false;
28197ec681f3Smrg   }
28207ec681f3Smrg
28217ec681f3Smrg   /* Our TFU blits only handle exact copies (it requires same formats
28227ec681f3Smrg    * on input and output, no scaling, etc), so there is no pixel format
28237ec681f3Smrg    * conversions and we can rewrite the format to use one that is TFU
28247ec681f3Smrg    * compatible based on its texel size.
28257ec681f3Smrg    */
28267ec681f3Smrg   const struct v3dv_format *format =
28277ec681f3Smrg      v3dv_get_compatible_tfu_format(cmd_buffer->device,
28287ec681f3Smrg                                     dst->cpp, NULL);
28297ec681f3Smrg
28307ec681f3Smrg   /* Emit a TFU job for each layer to blit */
28317ec681f3Smrg   assert(region->dstSubresource.layerCount ==
28327ec681f3Smrg          region->srcSubresource.layerCount);
28337ec681f3Smrg
28347ec681f3Smrg   uint32_t min_dst_layer;
28357ec681f3Smrg   uint32_t max_dst_layer;
28367ec681f3Smrg   bool dst_mirror_z = false;
28377ec681f3Smrg   if (dst->vk.image_type == VK_IMAGE_TYPE_3D) {
28387ec681f3Smrg      compute_blit_3d_layers(region->dstOffsets,
28397ec681f3Smrg                             &min_dst_layer, &max_dst_layer,
28407ec681f3Smrg                             &dst_mirror_z);
28417ec681f3Smrg   } else {
28427ec681f3Smrg      min_dst_layer = region->dstSubresource.baseArrayLayer;
28437ec681f3Smrg      max_dst_layer = min_dst_layer + region->dstSubresource.layerCount;
28447ec681f3Smrg   }
28457ec681f3Smrg
28467ec681f3Smrg   uint32_t min_src_layer;
28477ec681f3Smrg   uint32_t max_src_layer;
28487ec681f3Smrg   bool src_mirror_z = false;
28497ec681f3Smrg   if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
28507ec681f3Smrg      compute_blit_3d_layers(region->srcOffsets,
28517ec681f3Smrg                             &min_src_layer, &max_src_layer,
28527ec681f3Smrg                             &src_mirror_z);
28537ec681f3Smrg   } else {
28547ec681f3Smrg      min_src_layer = region->srcSubresource.baseArrayLayer;
28557ec681f3Smrg      max_src_layer = min_src_layer + region->srcSubresource.layerCount;
28567ec681f3Smrg   }
28577ec681f3Smrg
28587ec681f3Smrg   /* No Z scaling for 3D images (for non-3D images both src and dst must
28597ec681f3Smrg    * have the same layerCount).
28607ec681f3Smrg    */
28617ec681f3Smrg   if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer)
28627ec681f3Smrg      return false;
28637ec681f3Smrg
28647ec681f3Smrg   const uint32_t layer_count = max_dst_layer - min_dst_layer;
28657ec681f3Smrg   const uint32_t src_mip_level = region->srcSubresource.mipLevel;
28667ec681f3Smrg   for (uint32_t i = 0; i < layer_count; i++) {
28677ec681f3Smrg      /* Since the TFU path doesn't handle scaling, Z mirroring for 3D images
28687ec681f3Smrg       * only involves reversing the order of the slices.
28697ec681f3Smrg       */
28707ec681f3Smrg      const uint32_t dst_layer =
28717ec681f3Smrg         dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i;
28727ec681f3Smrg      const uint32_t src_layer =
28737ec681f3Smrg         src_mirror_z ? max_src_layer - i - 1: min_src_layer + i;
28747ec681f3Smrg      v3dv_X(cmd_buffer->device, meta_emit_tfu_job)
28757ec681f3Smrg         (cmd_buffer, dst, dst_mip_level, dst_layer,
28767ec681f3Smrg          src, src_mip_level, src_layer,
28777ec681f3Smrg          dst_width, dst_height, format);
28787ec681f3Smrg   }
28797ec681f3Smrg
28807ec681f3Smrg   return true;
28817ec681f3Smrg}
28827ec681f3Smrg
28837ec681f3Smrgstatic bool
28847ec681f3Smrgformat_needs_software_int_clamp(VkFormat format)
28857ec681f3Smrg{
28867ec681f3Smrg   switch (format) {
28877ec681f3Smrg      case VK_FORMAT_A2R10G10B10_UINT_PACK32:
28887ec681f3Smrg      case VK_FORMAT_A2R10G10B10_SINT_PACK32:
28897ec681f3Smrg      case VK_FORMAT_A2B10G10R10_UINT_PACK32:
28907ec681f3Smrg      case VK_FORMAT_A2B10G10R10_SINT_PACK32:
28917ec681f3Smrg         return true;
28927ec681f3Smrg      default:
28937ec681f3Smrg         return false;
28947ec681f3Smrg   };
28957ec681f3Smrg}
28967ec681f3Smrg
28977ec681f3Smrgstatic void
28987ec681f3Smrgget_blit_pipeline_cache_key(VkFormat dst_format,
28997ec681f3Smrg                            VkFormat src_format,
29007ec681f3Smrg                            VkColorComponentFlags cmask,
29017ec681f3Smrg                            VkSampleCountFlagBits dst_samples,
29027ec681f3Smrg                            VkSampleCountFlagBits src_samples,
29037ec681f3Smrg                            uint8_t *key)
29047ec681f3Smrg{
29057ec681f3Smrg   memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE);
29067ec681f3Smrg
29077ec681f3Smrg   uint32_t *p = (uint32_t *) key;
29087ec681f3Smrg
29097ec681f3Smrg   *p = dst_format;
29107ec681f3Smrg   p++;
29117ec681f3Smrg
29127ec681f3Smrg   /* Generally, when blitting from a larger format to a smaller format
29137ec681f3Smrg    * the hardware takes care of clamping the source to the RT range.
29147ec681f3Smrg    * Specifically, for integer formats, this is done by using
29157ec681f3Smrg    * V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this
29167ec681f3Smrg    * clamps to the bit-size of the render type, and some formats, such as
29177ec681f3Smrg    * rgb10a2_uint have a 16-bit type, so it won't do what we need and we
29187ec681f3Smrg    * require to clamp in software. In these cases, we need to amend the blit
29197ec681f3Smrg    * shader with clamp code that depends on both the src and dst formats, so
29207ec681f3Smrg    * we need the src format to be part of the key.
29217ec681f3Smrg    */
29227ec681f3Smrg   *p = format_needs_software_int_clamp(dst_format) ? src_format : 0;
29237ec681f3Smrg   p++;
29247ec681f3Smrg
29257ec681f3Smrg   *p = cmask;
29267ec681f3Smrg   p++;
29277ec681f3Smrg
29287ec681f3Smrg   *p = (dst_samples << 8) | src_samples;
29297ec681f3Smrg   p++;
29307ec681f3Smrg
29317ec681f3Smrg   assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE);
29327ec681f3Smrg}
29337ec681f3Smrg
29347ec681f3Smrgstatic bool
29357ec681f3Smrgcreate_blit_render_pass(struct v3dv_device *device,
29367ec681f3Smrg                        VkFormat dst_format,
29377ec681f3Smrg                        VkFormat src_format,
29387ec681f3Smrg                        VkRenderPass *pass_load,
29397ec681f3Smrg                        VkRenderPass *pass_no_load)
29407ec681f3Smrg{
29417ec681f3Smrg   const bool is_color_blit = vk_format_is_color(dst_format);
29427ec681f3Smrg
29437ec681f3Smrg   /* Attachment load operation is specified below */
29447ec681f3Smrg   VkAttachmentDescription att = {
29457ec681f3Smrg      .format = dst_format,
29467ec681f3Smrg      .samples = VK_SAMPLE_COUNT_1_BIT,
29477ec681f3Smrg      .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
29487ec681f3Smrg      .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
29497ec681f3Smrg      .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
29507ec681f3Smrg   };
29517ec681f3Smrg
29527ec681f3Smrg   VkAttachmentReference att_ref = {
29537ec681f3Smrg      .attachment = 0,
29547ec681f3Smrg      .layout = VK_IMAGE_LAYOUT_GENERAL,
29557ec681f3Smrg   };
29567ec681f3Smrg
29577ec681f3Smrg   VkSubpassDescription subpass = {
29587ec681f3Smrg      .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
29597ec681f3Smrg      .inputAttachmentCount = 0,
29607ec681f3Smrg      .colorAttachmentCount = is_color_blit ? 1 : 0,
29617ec681f3Smrg      .pColorAttachments = is_color_blit ? &att_ref : NULL,
29627ec681f3Smrg      .pResolveAttachments = NULL,
29637ec681f3Smrg      .pDepthStencilAttachment = is_color_blit ? NULL : &att_ref,
29647ec681f3Smrg      .preserveAttachmentCount = 0,
29657ec681f3Smrg      .pPreserveAttachments = NULL,
29667ec681f3Smrg   };
29677ec681f3Smrg
29687ec681f3Smrg   VkRenderPassCreateInfo info = {
29697ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
29707ec681f3Smrg      .attachmentCount = 1,
29717ec681f3Smrg      .pAttachments = &att,
29727ec681f3Smrg      .subpassCount = 1,
29737ec681f3Smrg      .pSubpasses = &subpass,
29747ec681f3Smrg      .dependencyCount = 0,
29757ec681f3Smrg      .pDependencies = NULL,
29767ec681f3Smrg   };
29777ec681f3Smrg
29787ec681f3Smrg   VkResult result;
29797ec681f3Smrg   att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
29807ec681f3Smrg   result = v3dv_CreateRenderPass(v3dv_device_to_handle(device),
29817ec681f3Smrg                                  &info, &device->vk.alloc, pass_load);
29827ec681f3Smrg   if (result != VK_SUCCESS)
29837ec681f3Smrg      return false;
29847ec681f3Smrg
29857ec681f3Smrg   att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
29867ec681f3Smrg   result = v3dv_CreateRenderPass(v3dv_device_to_handle(device),
29877ec681f3Smrg                                  &info, &device->vk.alloc, pass_no_load);
29887ec681f3Smrg   return result == VK_SUCCESS;
29897ec681f3Smrg}
29907ec681f3Smrg
29917ec681f3Smrgstatic nir_ssa_def *
29927ec681f3Smrggen_rect_vertices(nir_builder *b)
29937ec681f3Smrg{
29947ec681f3Smrg   nir_ssa_def *vertex_id = nir_load_vertex_id(b);
29957ec681f3Smrg
29967ec681f3Smrg   /* vertex 0: -1.0, -1.0
29977ec681f3Smrg    * vertex 1: -1.0,  1.0
29987ec681f3Smrg    * vertex 2:  1.0, -1.0
29997ec681f3Smrg    * vertex 3:  1.0,  1.0
30007ec681f3Smrg    *
30017ec681f3Smrg    * so:
30027ec681f3Smrg    *
30037ec681f3Smrg    * channel 0 is vertex_id < 2 ? -1.0 :  1.0
30047ec681f3Smrg    * channel 1 is vertex id & 1 ?  1.0 : -1.0
30057ec681f3Smrg    */
30067ec681f3Smrg
30077ec681f3Smrg   nir_ssa_def *one = nir_imm_int(b, 1);
30087ec681f3Smrg   nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
30097ec681f3Smrg   nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
30107ec681f3Smrg
30117ec681f3Smrg   nir_ssa_def *comp[4];
30127ec681f3Smrg   comp[0] = nir_bcsel(b, c0cmp,
30137ec681f3Smrg                       nir_imm_float(b, -1.0f),
30147ec681f3Smrg                       nir_imm_float(b, 1.0f));
30157ec681f3Smrg
30167ec681f3Smrg   comp[1] = nir_bcsel(b, c1cmp,
30177ec681f3Smrg                       nir_imm_float(b, 1.0f),
30187ec681f3Smrg                       nir_imm_float(b, -1.0f));
30197ec681f3Smrg   comp[2] = nir_imm_float(b, 0.0f);
30207ec681f3Smrg   comp[3] = nir_imm_float(b, 1.0f);
30217ec681f3Smrg   return nir_vec(b, comp, 4);
30227ec681f3Smrg}
30237ec681f3Smrg
30247ec681f3Smrgstatic nir_ssa_def *
30257ec681f3Smrggen_tex_coords(nir_builder *b)
30267ec681f3Smrg{
30277ec681f3Smrg   nir_ssa_def *tex_box =
30287ec681f3Smrg      nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16);
30297ec681f3Smrg
30307ec681f3Smrg   nir_ssa_def *tex_z =
30317ec681f3Smrg      nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
30327ec681f3Smrg
30337ec681f3Smrg   nir_ssa_def *vertex_id = nir_load_vertex_id(b);
30347ec681f3Smrg
30357ec681f3Smrg   /* vertex 0: src0_x, src0_y
30367ec681f3Smrg    * vertex 1: src0_x, src1_y
30377ec681f3Smrg    * vertex 2: src1_x, src0_y
30387ec681f3Smrg    * vertex 3: src1_x, src1_y
30397ec681f3Smrg    *
30407ec681f3Smrg    * So:
30417ec681f3Smrg    *
30427ec681f3Smrg    * channel 0 is vertex_id < 2 ? src0_x : src1_x
30437ec681f3Smrg    * channel 1 is vertex id & 1 ? src1_y : src0_y
30447ec681f3Smrg    */
30457ec681f3Smrg
30467ec681f3Smrg   nir_ssa_def *one = nir_imm_int(b, 1);
30477ec681f3Smrg   nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
30487ec681f3Smrg   nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
30497ec681f3Smrg
30507ec681f3Smrg   nir_ssa_def *comp[4];
30517ec681f3Smrg   comp[0] = nir_bcsel(b, c0cmp,
30527ec681f3Smrg                       nir_channel(b, tex_box, 0),
30537ec681f3Smrg                       nir_channel(b, tex_box, 2));
30547ec681f3Smrg
30557ec681f3Smrg   comp[1] = nir_bcsel(b, c1cmp,
30567ec681f3Smrg                       nir_channel(b, tex_box, 3),
30577ec681f3Smrg                       nir_channel(b, tex_box, 1));
30587ec681f3Smrg   comp[2] = tex_z;
30597ec681f3Smrg   comp[3] = nir_imm_float(b, 1.0f);
30607ec681f3Smrg   return nir_vec(b, comp, 4);
30617ec681f3Smrg}
30627ec681f3Smrg
30637ec681f3Smrgstatic nir_ssa_def *
30647ec681f3Smrgbuild_nir_tex_op_read(struct nir_builder *b,
30657ec681f3Smrg                      nir_ssa_def *tex_pos,
30667ec681f3Smrg                      enum glsl_base_type tex_type,
30677ec681f3Smrg                      enum glsl_sampler_dim dim)
30687ec681f3Smrg{
30697ec681f3Smrg   assert(dim != GLSL_SAMPLER_DIM_MS);
30707ec681f3Smrg
30717ec681f3Smrg   const struct glsl_type *sampler_type =
30727ec681f3Smrg      glsl_sampler_type(dim, false, false, tex_type);
30737ec681f3Smrg   nir_variable *sampler =
30747ec681f3Smrg      nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
30757ec681f3Smrg   sampler->data.descriptor_set = 0;
30767ec681f3Smrg   sampler->data.binding = 0;
30777ec681f3Smrg
30787ec681f3Smrg   nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
30797ec681f3Smrg   nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
30807ec681f3Smrg   tex->sampler_dim = dim;
30817ec681f3Smrg   tex->op = nir_texop_tex;
30827ec681f3Smrg   tex->src[0].src_type = nir_tex_src_coord;
30837ec681f3Smrg   tex->src[0].src = nir_src_for_ssa(tex_pos);
30847ec681f3Smrg   tex->src[1].src_type = nir_tex_src_texture_deref;
30857ec681f3Smrg   tex->src[1].src = nir_src_for_ssa(tex_deref);
30867ec681f3Smrg   tex->src[2].src_type = nir_tex_src_sampler_deref;
30877ec681f3Smrg   tex->src[2].src = nir_src_for_ssa(tex_deref);
30887ec681f3Smrg   tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
30897ec681f3Smrg   tex->is_array = glsl_sampler_type_is_array(sampler_type);
30907ec681f3Smrg   tex->coord_components = tex_pos->num_components;
30917ec681f3Smrg
30927ec681f3Smrg   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
30937ec681f3Smrg   nir_builder_instr_insert(b, &tex->instr);
30947ec681f3Smrg   return &tex->dest.ssa;
30957ec681f3Smrg}
30967ec681f3Smrg
30977ec681f3Smrgstatic nir_ssa_def *
30987ec681f3Smrgbuild_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
30997ec681f3Smrg                                 nir_variable *sampler,
31007ec681f3Smrg                                 nir_ssa_def *tex_deref,
31017ec681f3Smrg                                 enum glsl_base_type tex_type,
31027ec681f3Smrg                                 nir_ssa_def *tex_pos,
31037ec681f3Smrg                                 nir_ssa_def *sample_idx)
31047ec681f3Smrg{
31057ec681f3Smrg   nir_tex_instr *tex = nir_tex_instr_create(b->shader, 4);
31067ec681f3Smrg   tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
31077ec681f3Smrg   tex->op = nir_texop_txf_ms;
31087ec681f3Smrg   tex->src[0].src_type = nir_tex_src_coord;
31097ec681f3Smrg   tex->src[0].src = nir_src_for_ssa(tex_pos);
31107ec681f3Smrg   tex->src[1].src_type = nir_tex_src_texture_deref;
31117ec681f3Smrg   tex->src[1].src = nir_src_for_ssa(tex_deref);
31127ec681f3Smrg   tex->src[2].src_type = nir_tex_src_sampler_deref;
31137ec681f3Smrg   tex->src[2].src = nir_src_for_ssa(tex_deref);
31147ec681f3Smrg   tex->src[3].src_type = nir_tex_src_ms_index;
31157ec681f3Smrg   tex->src[3].src = nir_src_for_ssa(sample_idx);
31167ec681f3Smrg   tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
31177ec681f3Smrg   tex->is_array = false;
31187ec681f3Smrg   tex->coord_components = tex_pos->num_components;
31197ec681f3Smrg
31207ec681f3Smrg   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
31217ec681f3Smrg   nir_builder_instr_insert(b, &tex->instr);
31227ec681f3Smrg   return &tex->dest.ssa;
31237ec681f3Smrg}
31247ec681f3Smrg
31257ec681f3Smrg/* Fetches all samples at the given position and averages them */
31267ec681f3Smrgstatic nir_ssa_def *
31277ec681f3Smrgbuild_nir_tex_op_ms_resolve(struct nir_builder *b,
31287ec681f3Smrg                            nir_ssa_def *tex_pos,
31297ec681f3Smrg                            enum glsl_base_type tex_type,
31307ec681f3Smrg                            VkSampleCountFlagBits src_samples)
31317ec681f3Smrg{
31327ec681f3Smrg   assert(src_samples > VK_SAMPLE_COUNT_1_BIT);
31337ec681f3Smrg   const struct glsl_type *sampler_type =
31347ec681f3Smrg      glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
31357ec681f3Smrg   nir_variable *sampler =
31367ec681f3Smrg      nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
31377ec681f3Smrg   sampler->data.descriptor_set = 0;
31387ec681f3Smrg   sampler->data.binding = 0;
31397ec681f3Smrg
31407ec681f3Smrg   const bool is_int = glsl_base_type_is_integer(tex_type);
31417ec681f3Smrg
31427ec681f3Smrg   nir_ssa_def *tmp = NULL;
31437ec681f3Smrg   nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
31447ec681f3Smrg   for (uint32_t i = 0; i < src_samples; i++) {
31457ec681f3Smrg      nir_ssa_def *s =
31467ec681f3Smrg         build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
31477ec681f3Smrg                                          tex_type, tex_pos,
31487ec681f3Smrg                                          nir_imm_int(b, i));
31497ec681f3Smrg
31507ec681f3Smrg      /* For integer formats, the multisample resolve operation is expected to
31517ec681f3Smrg       * return one of the samples, we just return the first one.
31527ec681f3Smrg       */
31537ec681f3Smrg      if (is_int)
31547ec681f3Smrg         return s;
31557ec681f3Smrg
31567ec681f3Smrg      tmp = i == 0 ? s : nir_fadd(b, tmp, s);
31577ec681f3Smrg   }
31587ec681f3Smrg
31597ec681f3Smrg   assert(!is_int);
31607ec681f3Smrg   return nir_fmul(b, tmp, nir_imm_float(b, 1.0f / src_samples));
31617ec681f3Smrg}
31627ec681f3Smrg
31637ec681f3Smrg/* Fetches the current sample (gl_SampleID) at the given position */
31647ec681f3Smrgstatic nir_ssa_def *
31657ec681f3Smrgbuild_nir_tex_op_ms_read(struct nir_builder *b,
31667ec681f3Smrg                         nir_ssa_def *tex_pos,
31677ec681f3Smrg                         enum glsl_base_type tex_type)
31687ec681f3Smrg{
31697ec681f3Smrg   const struct glsl_type *sampler_type =
31707ec681f3Smrg      glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
31717ec681f3Smrg   nir_variable *sampler =
31727ec681f3Smrg      nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
31737ec681f3Smrg   sampler->data.descriptor_set = 0;
31747ec681f3Smrg   sampler->data.binding = 0;
31757ec681f3Smrg
31767ec681f3Smrg   nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
31777ec681f3Smrg
31787ec681f3Smrg   return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
31797ec681f3Smrg                                           tex_type, tex_pos,
31807ec681f3Smrg                                           nir_load_sample_id(b));
31817ec681f3Smrg}
31827ec681f3Smrg
31837ec681f3Smrgstatic nir_ssa_def *
31847ec681f3Smrgbuild_nir_tex_op(struct nir_builder *b,
31857ec681f3Smrg                 struct v3dv_device *device,
31867ec681f3Smrg                 nir_ssa_def *tex_pos,
31877ec681f3Smrg                 enum glsl_base_type tex_type,
31887ec681f3Smrg                 VkSampleCountFlagBits dst_samples,
31897ec681f3Smrg                 VkSampleCountFlagBits src_samples,
31907ec681f3Smrg                 enum glsl_sampler_dim dim)
31917ec681f3Smrg{
31927ec681f3Smrg   switch (dim) {
31937ec681f3Smrg   case GLSL_SAMPLER_DIM_MS:
31947ec681f3Smrg      assert(src_samples == VK_SAMPLE_COUNT_4_BIT);
31957ec681f3Smrg      /* For multisampled texture sources we need to use fetching instead of
31967ec681f3Smrg       * normalized texture coordinates. We already configured our blit
31977ec681f3Smrg       * coordinates to be in texel units, but here we still need to convert
31987ec681f3Smrg       * them from floating point to integer.
31997ec681f3Smrg       */
32007ec681f3Smrg      tex_pos = nir_f2i32(b, tex_pos);
32017ec681f3Smrg
32027ec681f3Smrg      if (dst_samples == VK_SAMPLE_COUNT_1_BIT)
32037ec681f3Smrg         return build_nir_tex_op_ms_resolve(b, tex_pos, tex_type, src_samples);
32047ec681f3Smrg      else
32057ec681f3Smrg         return build_nir_tex_op_ms_read(b, tex_pos, tex_type);
32067ec681f3Smrg   default:
32077ec681f3Smrg      assert(src_samples == VK_SAMPLE_COUNT_1_BIT);
32087ec681f3Smrg      return build_nir_tex_op_read(b, tex_pos, tex_type, dim);
32097ec681f3Smrg   }
32107ec681f3Smrg}
32117ec681f3Smrg
32127ec681f3Smrgstatic nir_shader *
32137ec681f3Smrgget_blit_vs()
32147ec681f3Smrg{
32157ec681f3Smrg   const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
32167ec681f3Smrg   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
32177ec681f3Smrg                                                  "meta blit vs");
32187ec681f3Smrg
32197ec681f3Smrg   const struct glsl_type *vec4 = glsl_vec4_type();
32207ec681f3Smrg
32217ec681f3Smrg   nir_variable *vs_out_pos =
32227ec681f3Smrg      nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
32237ec681f3Smrg   vs_out_pos->data.location = VARYING_SLOT_POS;
32247ec681f3Smrg
32257ec681f3Smrg   nir_variable *vs_out_tex_coord =
32267ec681f3Smrg      nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord");
32277ec681f3Smrg   vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
32287ec681f3Smrg   vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
32297ec681f3Smrg
32307ec681f3Smrg   nir_ssa_def *pos = gen_rect_vertices(&b);
32317ec681f3Smrg   nir_store_var(&b, vs_out_pos, pos, 0xf);
32327ec681f3Smrg
32337ec681f3Smrg   nir_ssa_def *tex_coord = gen_tex_coords(&b);
32347ec681f3Smrg   nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
32357ec681f3Smrg
32367ec681f3Smrg   return b.shader;
32377ec681f3Smrg}
32387ec681f3Smrg
32397ec681f3Smrgstatic uint32_t
32407ec681f3Smrgget_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)
32417ec681f3Smrg{
32427ec681f3Smrg   switch (sampler_dim) {
32437ec681f3Smrg   case GLSL_SAMPLER_DIM_1D: return 0x1;
32447ec681f3Smrg   case GLSL_SAMPLER_DIM_2D: return 0x3;
32457ec681f3Smrg   case GLSL_SAMPLER_DIM_MS: return 0x3;
32467ec681f3Smrg   case GLSL_SAMPLER_DIM_3D: return 0x7;
32477ec681f3Smrg   default:
32487ec681f3Smrg      unreachable("invalid sampler dim");
32497ec681f3Smrg   };
32507ec681f3Smrg}
32517ec681f3Smrg
32527ec681f3Smrgstatic nir_shader *
32537ec681f3Smrgget_color_blit_fs(struct v3dv_device *device,
32547ec681f3Smrg                  VkFormat dst_format,
32557ec681f3Smrg                  VkFormat src_format,
32567ec681f3Smrg                  VkSampleCountFlagBits dst_samples,
32577ec681f3Smrg                  VkSampleCountFlagBits src_samples,
32587ec681f3Smrg                  enum glsl_sampler_dim sampler_dim)
32597ec681f3Smrg{
32607ec681f3Smrg   const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
32617ec681f3Smrg   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
32627ec681f3Smrg                                                  "meta blit fs");
32637ec681f3Smrg
32647ec681f3Smrg   const struct glsl_type *vec4 = glsl_vec4_type();
32657ec681f3Smrg
32667ec681f3Smrg   nir_variable *fs_in_tex_coord =
32677ec681f3Smrg      nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord");
32687ec681f3Smrg   fs_in_tex_coord->data.location = VARYING_SLOT_VAR0;
32697ec681f3Smrg
32707ec681f3Smrg   const struct glsl_type *fs_out_type =
32717ec681f3Smrg      vk_format_is_sint(dst_format) ? glsl_ivec4_type() :
32727ec681f3Smrg      vk_format_is_uint(dst_format) ? glsl_uvec4_type() :
32737ec681f3Smrg                                      glsl_vec4_type();
32747ec681f3Smrg
32757ec681f3Smrg   enum glsl_base_type src_base_type =
32767ec681f3Smrg      vk_format_is_sint(src_format) ? GLSL_TYPE_INT :
32777ec681f3Smrg      vk_format_is_uint(src_format) ? GLSL_TYPE_UINT :
32787ec681f3Smrg                                      GLSL_TYPE_FLOAT;
32797ec681f3Smrg
32807ec681f3Smrg   nir_variable *fs_out_color =
32817ec681f3Smrg      nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
32827ec681f3Smrg   fs_out_color->data.location = FRAG_RESULT_DATA0;
32837ec681f3Smrg
32847ec681f3Smrg   nir_ssa_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
32857ec681f3Smrg   const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
32867ec681f3Smrg   tex_coord = nir_channels(&b, tex_coord, channel_mask);
32877ec681f3Smrg
32887ec681f3Smrg   nir_ssa_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
32897ec681f3Smrg                                         dst_samples, src_samples, sampler_dim);
32907ec681f3Smrg
32917ec681f3Smrg   /* For integer textures, if the bit-size of the destination is too small to
32927ec681f3Smrg    * hold source value, Vulkan (CTS) expects the implementation to clamp to the
32937ec681f3Smrg    * maximum value the destination can hold. The hardware can clamp to the
32947ec681f3Smrg    * render target type, which usually matches the component bit-size, but
32957ec681f3Smrg    * there are some cases that won't match, such as rgb10a2, which has a 16-bit
32967ec681f3Smrg    * render target type, so in these cases we need to clamp manually.
32977ec681f3Smrg    */
32987ec681f3Smrg   if (format_needs_software_int_clamp(dst_format)) {
32997ec681f3Smrg      assert(vk_format_is_int(dst_format));
33007ec681f3Smrg      enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
33017ec681f3Smrg      enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
33027ec681f3Smrg
33037ec681f3Smrg      nir_ssa_def *c[4];
33047ec681f3Smrg      for (uint32_t i = 0; i < 4; i++) {
33057ec681f3Smrg         c[i] = nir_channel(&b, color, i);
33067ec681f3Smrg
33077ec681f3Smrg         const uint32_t src_bit_size =
33087ec681f3Smrg            util_format_get_component_bits(src_pformat,
33097ec681f3Smrg                                           UTIL_FORMAT_COLORSPACE_RGB,
33107ec681f3Smrg                                           i);
33117ec681f3Smrg         const uint32_t dst_bit_size =
33127ec681f3Smrg            util_format_get_component_bits(dst_pformat,
33137ec681f3Smrg                                           UTIL_FORMAT_COLORSPACE_RGB,
33147ec681f3Smrg                                           i);
33157ec681f3Smrg
33167ec681f3Smrg         if (dst_bit_size >= src_bit_size)
33177ec681f3Smrg            continue;
33187ec681f3Smrg
33197ec681f3Smrg         assert(dst_bit_size > 0);
33207ec681f3Smrg         if (util_format_is_pure_uint(dst_pformat)) {
33217ec681f3Smrg            nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
33227ec681f3Smrg            c[i] = nir_umin(&b, c[i], max);
33237ec681f3Smrg         } else {
33247ec681f3Smrg            nir_ssa_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
33257ec681f3Smrg            nir_ssa_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
33267ec681f3Smrg            c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
33277ec681f3Smrg         }
33287ec681f3Smrg      }
33297ec681f3Smrg
33307ec681f3Smrg      color = nir_vec4(&b, c[0], c[1], c[2], c[3]);
33317ec681f3Smrg   }
33327ec681f3Smrg
33337ec681f3Smrg   nir_store_var(&b, fs_out_color, color, 0xf);
33347ec681f3Smrg
33357ec681f3Smrg   return b.shader;
33367ec681f3Smrg}
33377ec681f3Smrg
33387ec681f3Smrgstatic bool
33397ec681f3Smrgcreate_pipeline(struct v3dv_device *device,
33407ec681f3Smrg                struct v3dv_render_pass *pass,
33417ec681f3Smrg                struct nir_shader *vs_nir,
33427ec681f3Smrg                struct nir_shader *gs_nir,
33437ec681f3Smrg                struct nir_shader *fs_nir,
33447ec681f3Smrg                const VkPipelineVertexInputStateCreateInfo *vi_state,
33457ec681f3Smrg                const VkPipelineDepthStencilStateCreateInfo *ds_state,
33467ec681f3Smrg                const VkPipelineColorBlendStateCreateInfo *cb_state,
33477ec681f3Smrg                const VkPipelineMultisampleStateCreateInfo *ms_state,
33487ec681f3Smrg                const VkPipelineLayout layout,
33497ec681f3Smrg                VkPipeline *pipeline)
33507ec681f3Smrg{
33517ec681f3Smrg   struct vk_shader_module vs_m;
33527ec681f3Smrg   struct vk_shader_module gs_m;
33537ec681f3Smrg   struct vk_shader_module fs_m;
33547ec681f3Smrg
33557ec681f3Smrg   uint32_t num_stages = gs_nir ? 3 : 2;
33567ec681f3Smrg
33577ec681f3Smrg   v3dv_shader_module_internal_init(device, &vs_m, vs_nir);
33587ec681f3Smrg   v3dv_shader_module_internal_init(device, &fs_m, fs_nir);
33597ec681f3Smrg
33607ec681f3Smrg   VkPipelineShaderStageCreateInfo stages[3] = {
33617ec681f3Smrg      {
33627ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
33637ec681f3Smrg         .stage = VK_SHADER_STAGE_VERTEX_BIT,
33647ec681f3Smrg         .module = vk_shader_module_to_handle(&vs_m),
33657ec681f3Smrg         .pName = "main",
33667ec681f3Smrg      },
33677ec681f3Smrg      {
33687ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
33697ec681f3Smrg         .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
33707ec681f3Smrg         .module = vk_shader_module_to_handle(&fs_m),
33717ec681f3Smrg         .pName = "main",
33727ec681f3Smrg      },
33737ec681f3Smrg      {
33747ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
33757ec681f3Smrg         .stage = VK_SHADER_STAGE_GEOMETRY_BIT,
33767ec681f3Smrg         .module = VK_NULL_HANDLE,
33777ec681f3Smrg         .pName = "main",
33787ec681f3Smrg      },
33797ec681f3Smrg   };
33807ec681f3Smrg
33817ec681f3Smrg   if (gs_nir) {
33827ec681f3Smrg      v3dv_shader_module_internal_init(device, &gs_m, gs_nir);
33837ec681f3Smrg      stages[2].module = vk_shader_module_to_handle(&gs_m);
33847ec681f3Smrg   }
33857ec681f3Smrg
33867ec681f3Smrg   VkGraphicsPipelineCreateInfo info = {
33877ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
33887ec681f3Smrg
33897ec681f3Smrg      .stageCount = num_stages,
33907ec681f3Smrg      .pStages = stages,
33917ec681f3Smrg
33927ec681f3Smrg      .pVertexInputState = vi_state,
33937ec681f3Smrg
33947ec681f3Smrg      .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
33957ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
33967ec681f3Smrg         .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
33977ec681f3Smrg         .primitiveRestartEnable = false,
33987ec681f3Smrg      },
33997ec681f3Smrg
34007ec681f3Smrg      .pViewportState = &(VkPipelineViewportStateCreateInfo) {
34017ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
34027ec681f3Smrg         .viewportCount = 1,
34037ec681f3Smrg         .scissorCount = 1,
34047ec681f3Smrg      },
34057ec681f3Smrg
34067ec681f3Smrg      .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
34077ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
34087ec681f3Smrg         .rasterizerDiscardEnable = false,
34097ec681f3Smrg         .polygonMode = VK_POLYGON_MODE_FILL,
34107ec681f3Smrg         .cullMode = VK_CULL_MODE_NONE,
34117ec681f3Smrg         .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
34127ec681f3Smrg         .depthBiasEnable = false,
34137ec681f3Smrg      },
34147ec681f3Smrg
34157ec681f3Smrg      .pMultisampleState = ms_state,
34167ec681f3Smrg
34177ec681f3Smrg      .pDepthStencilState = ds_state,
34187ec681f3Smrg
34197ec681f3Smrg      .pColorBlendState = cb_state,
34207ec681f3Smrg
34217ec681f3Smrg      /* The meta clear pipeline declares all state as dynamic.
34227ec681f3Smrg       * As a consequence, vkCmdBindPipeline writes no dynamic state
34237ec681f3Smrg       * to the cmd buffer. Therefore, at the end of the meta clear,
34247ec681f3Smrg       * we need only restore dynamic state that was vkCmdSet.
34257ec681f3Smrg       */
34267ec681f3Smrg      .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
34277ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
34287ec681f3Smrg         .dynamicStateCount = 6,
34297ec681f3Smrg         .pDynamicStates = (VkDynamicState[]) {
34307ec681f3Smrg            VK_DYNAMIC_STATE_VIEWPORT,
34317ec681f3Smrg            VK_DYNAMIC_STATE_SCISSOR,
34327ec681f3Smrg            VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
34337ec681f3Smrg            VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
34347ec681f3Smrg            VK_DYNAMIC_STATE_STENCIL_REFERENCE,
34357ec681f3Smrg            VK_DYNAMIC_STATE_BLEND_CONSTANTS,
34367ec681f3Smrg            VK_DYNAMIC_STATE_DEPTH_BIAS,
34377ec681f3Smrg            VK_DYNAMIC_STATE_LINE_WIDTH,
34387ec681f3Smrg         },
34397ec681f3Smrg      },
34407ec681f3Smrg
34417ec681f3Smrg      .flags = 0,
34427ec681f3Smrg      .layout = layout,
34437ec681f3Smrg      .renderPass = v3dv_render_pass_to_handle(pass),
34447ec681f3Smrg      .subpass = 0,
34457ec681f3Smrg   };
34467ec681f3Smrg
34477ec681f3Smrg   VkResult result =
34487ec681f3Smrg      v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
34497ec681f3Smrg                                   VK_NULL_HANDLE,
34507ec681f3Smrg                                   1, &info,
34517ec681f3Smrg                                   &device->vk.alloc,
34527ec681f3Smrg                                   pipeline);
34537ec681f3Smrg
34547ec681f3Smrg   ralloc_free(vs_nir);
34557ec681f3Smrg   ralloc_free(fs_nir);
34567ec681f3Smrg
34577ec681f3Smrg   return result == VK_SUCCESS;
34587ec681f3Smrg}
34597ec681f3Smrg
34607ec681f3Smrgstatic enum glsl_sampler_dim
34617ec681f3Smrgget_sampler_dim(VkImageType type, VkSampleCountFlagBits src_samples)
34627ec681f3Smrg{
34637ec681f3Smrg   /* From the Vulkan 1.0 spec, VkImageCreateInfo Validu Usage:
34647ec681f3Smrg    *
34657ec681f3Smrg    *   "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
34667ec681f3Smrg    *    VK_IMAGE_TYPE_2D, ..."
34677ec681f3Smrg    */
34687ec681f3Smrg   assert(src_samples == VK_SAMPLE_COUNT_1_BIT || type == VK_IMAGE_TYPE_2D);
34697ec681f3Smrg
34707ec681f3Smrg   switch (type) {
34717ec681f3Smrg   case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D;
34727ec681f3Smrg   case VK_IMAGE_TYPE_2D:
34737ec681f3Smrg      return src_samples == VK_SAMPLE_COUNT_1_BIT ? GLSL_SAMPLER_DIM_2D :
34747ec681f3Smrg                                                    GLSL_SAMPLER_DIM_MS;
34757ec681f3Smrg   case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D;
34767ec681f3Smrg   default:
34777ec681f3Smrg      unreachable("Invalid image type");
34787ec681f3Smrg   }
34797ec681f3Smrg}
34807ec681f3Smrg
34817ec681f3Smrgstatic bool
34827ec681f3Smrgcreate_blit_pipeline(struct v3dv_device *device,
34837ec681f3Smrg                     VkFormat dst_format,
34847ec681f3Smrg                     VkFormat src_format,
34857ec681f3Smrg                     VkColorComponentFlags cmask,
34867ec681f3Smrg                     VkImageType src_type,
34877ec681f3Smrg                     VkSampleCountFlagBits dst_samples,
34887ec681f3Smrg                     VkSampleCountFlagBits src_samples,
34897ec681f3Smrg                     VkRenderPass _pass,
34907ec681f3Smrg                     VkPipelineLayout pipeline_layout,
34917ec681f3Smrg                     VkPipeline *pipeline)
34927ec681f3Smrg{
34937ec681f3Smrg   struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
34947ec681f3Smrg
34957ec681f3Smrg   /* We always rewrite depth/stencil blits to compatible color blits */
34967ec681f3Smrg   assert(vk_format_is_color(dst_format));
34977ec681f3Smrg   assert(vk_format_is_color(src_format));
34987ec681f3Smrg
34997ec681f3Smrg   const enum glsl_sampler_dim sampler_dim =
35007ec681f3Smrg      get_sampler_dim(src_type, src_samples);
35017ec681f3Smrg
35027ec681f3Smrg   nir_shader *vs_nir = get_blit_vs();
35037ec681f3Smrg   nir_shader *fs_nir =
35047ec681f3Smrg      get_color_blit_fs(device, dst_format, src_format,
35057ec681f3Smrg                        dst_samples, src_samples, sampler_dim);
35067ec681f3Smrg
35077ec681f3Smrg   const VkPipelineVertexInputStateCreateInfo vi_state = {
35087ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
35097ec681f3Smrg      .vertexBindingDescriptionCount = 0,
35107ec681f3Smrg      .vertexAttributeDescriptionCount = 0,
35117ec681f3Smrg   };
35127ec681f3Smrg
35137ec681f3Smrg   VkPipelineDepthStencilStateCreateInfo ds_state = {
35147ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
35157ec681f3Smrg   };
35167ec681f3Smrg
35177ec681f3Smrg   VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
35187ec681f3Smrg   blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
35197ec681f3Smrg      .blendEnable = false,
35207ec681f3Smrg      .colorWriteMask = cmask,
35217ec681f3Smrg   };
35227ec681f3Smrg
35237ec681f3Smrg   const VkPipelineColorBlendStateCreateInfo cb_state = {
35247ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
35257ec681f3Smrg      .logicOpEnable = false,
35267ec681f3Smrg      .attachmentCount = 1,
35277ec681f3Smrg      .pAttachments = blend_att_state
35287ec681f3Smrg   };
35297ec681f3Smrg
35307ec681f3Smrg   const VkPipelineMultisampleStateCreateInfo ms_state = {
35317ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
35327ec681f3Smrg      .rasterizationSamples = dst_samples,
35337ec681f3Smrg      .sampleShadingEnable = dst_samples > VK_SAMPLE_COUNT_1_BIT,
35347ec681f3Smrg      .pSampleMask = NULL,
35357ec681f3Smrg      .alphaToCoverageEnable = false,
35367ec681f3Smrg      .alphaToOneEnable = false,
35377ec681f3Smrg   };
35387ec681f3Smrg
35397ec681f3Smrg   return create_pipeline(device,
35407ec681f3Smrg                          pass,
35417ec681f3Smrg                          vs_nir, NULL, fs_nir,
35427ec681f3Smrg                          &vi_state,
35437ec681f3Smrg                          &ds_state,
35447ec681f3Smrg                          &cb_state,
35457ec681f3Smrg                          &ms_state,
35467ec681f3Smrg                          pipeline_layout,
35477ec681f3Smrg                          pipeline);
35487ec681f3Smrg}
35497ec681f3Smrg
35507ec681f3Smrg/**
35517ec681f3Smrg * Return a pipeline suitable for blitting the requested aspect given the
35527ec681f3Smrg * destination and source formats.
35537ec681f3Smrg */
35547ec681f3Smrgstatic bool
35557ec681f3Smrgget_blit_pipeline(struct v3dv_device *device,
35567ec681f3Smrg                  VkFormat dst_format,
35577ec681f3Smrg                  VkFormat src_format,
35587ec681f3Smrg                  VkColorComponentFlags cmask,
35597ec681f3Smrg                  VkImageType src_type,
35607ec681f3Smrg                  VkSampleCountFlagBits dst_samples,
35617ec681f3Smrg                  VkSampleCountFlagBits src_samples,
35627ec681f3Smrg                  struct v3dv_meta_blit_pipeline **pipeline)
35637ec681f3Smrg{
35647ec681f3Smrg   bool ok = true;
35657ec681f3Smrg
35667ec681f3Smrg   uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
35677ec681f3Smrg   get_blit_pipeline_cache_key(dst_format, src_format, cmask,
35687ec681f3Smrg                               dst_samples, src_samples, key);
35697ec681f3Smrg   mtx_lock(&device->meta.mtx);
35707ec681f3Smrg   struct hash_entry *entry =
35717ec681f3Smrg      _mesa_hash_table_search(device->meta.blit.cache[src_type], &key);
35727ec681f3Smrg   if (entry) {
35737ec681f3Smrg      mtx_unlock(&device->meta.mtx);
35747ec681f3Smrg      *pipeline = entry->data;
35757ec681f3Smrg      return true;
35767ec681f3Smrg   }
35777ec681f3Smrg
35787ec681f3Smrg   *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
35797ec681f3Smrg                          VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
35807ec681f3Smrg
35817ec681f3Smrg   if (*pipeline == NULL)
35827ec681f3Smrg      goto fail;
35837ec681f3Smrg
35847ec681f3Smrg   ok = create_blit_render_pass(device, dst_format, src_format,
35857ec681f3Smrg                                &(*pipeline)->pass,
35867ec681f3Smrg                                &(*pipeline)->pass_no_load);
35877ec681f3Smrg   if (!ok)
35887ec681f3Smrg      goto fail;
35897ec681f3Smrg
35907ec681f3Smrg   /* Create the pipeline using one of the render passes, they are both
35917ec681f3Smrg    * compatible, so we don't care which one we use here.
35927ec681f3Smrg    */
35937ec681f3Smrg   ok = create_blit_pipeline(device,
35947ec681f3Smrg                             dst_format,
35957ec681f3Smrg                             src_format,
35967ec681f3Smrg                             cmask,
35977ec681f3Smrg                             src_type,
35987ec681f3Smrg                             dst_samples,
35997ec681f3Smrg                             src_samples,
36007ec681f3Smrg                             (*pipeline)->pass,
36017ec681f3Smrg                             device->meta.blit.p_layout,
36027ec681f3Smrg                             &(*pipeline)->pipeline);
36037ec681f3Smrg   if (!ok)
36047ec681f3Smrg      goto fail;
36057ec681f3Smrg
36067ec681f3Smrg   memcpy((*pipeline)->key, key, sizeof((*pipeline)->key));
36077ec681f3Smrg   _mesa_hash_table_insert(device->meta.blit.cache[src_type],
36087ec681f3Smrg                           &(*pipeline)->key, *pipeline);
36097ec681f3Smrg
36107ec681f3Smrg   mtx_unlock(&device->meta.mtx);
36117ec681f3Smrg   return true;
36127ec681f3Smrg
36137ec681f3Smrgfail:
36147ec681f3Smrg   mtx_unlock(&device->meta.mtx);
36157ec681f3Smrg
36167ec681f3Smrg   VkDevice _device = v3dv_device_to_handle(device);
36177ec681f3Smrg   if (*pipeline) {
36187ec681f3Smrg      if ((*pipeline)->pass)
36197ec681f3Smrg         v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
36207ec681f3Smrg      if ((*pipeline)->pass_no_load)
36217ec681f3Smrg         v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->vk.alloc);
36227ec681f3Smrg      if ((*pipeline)->pipeline)
36237ec681f3Smrg         v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
36247ec681f3Smrg      vk_free(&device->vk.alloc, *pipeline);
36257ec681f3Smrg      *pipeline = NULL;
36267ec681f3Smrg   }
36277ec681f3Smrg
36287ec681f3Smrg   return false;
36297ec681f3Smrg}
36307ec681f3Smrg
36317ec681f3Smrgstatic void
36327ec681f3Smrgcompute_blit_box(const VkOffset3D *offsets,
36337ec681f3Smrg                 uint32_t image_w, uint32_t image_h,
36347ec681f3Smrg                 uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h,
36357ec681f3Smrg                 bool *mirror_x, bool *mirror_y)
36367ec681f3Smrg{
36377ec681f3Smrg   if (offsets[1].x >= offsets[0].x) {
36387ec681f3Smrg      *mirror_x = false;
36397ec681f3Smrg      *x = MIN2(offsets[0].x, image_w - 1);
36407ec681f3Smrg      *w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x);
36417ec681f3Smrg   } else {
36427ec681f3Smrg      *mirror_x = true;
36437ec681f3Smrg      *x = MIN2(offsets[1].x, image_w - 1);
36447ec681f3Smrg      *w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x);
36457ec681f3Smrg   }
36467ec681f3Smrg   if (offsets[1].y >= offsets[0].y) {
36477ec681f3Smrg      *mirror_y = false;
36487ec681f3Smrg      *y = MIN2(offsets[0].y, image_h - 1);
36497ec681f3Smrg      *h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y);
36507ec681f3Smrg   } else {
36517ec681f3Smrg      *mirror_y = true;
36527ec681f3Smrg      *y = MIN2(offsets[1].y, image_h - 1);
36537ec681f3Smrg      *h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y);
36547ec681f3Smrg   }
36557ec681f3Smrg}
36567ec681f3Smrg
36577ec681f3Smrgstatic void
36587ec681f3Smrgcompute_blit_3d_layers(const VkOffset3D *offsets,
36597ec681f3Smrg                       uint32_t *min_layer, uint32_t *max_layer,
36607ec681f3Smrg                       bool *mirror_z)
36617ec681f3Smrg{
36627ec681f3Smrg   if (offsets[1].z >= offsets[0].z) {
36637ec681f3Smrg      *mirror_z = false;
36647ec681f3Smrg      *min_layer = offsets[0].z;
36657ec681f3Smrg      *max_layer = offsets[1].z;
36667ec681f3Smrg   } else {
36677ec681f3Smrg      *mirror_z = true;
36687ec681f3Smrg      *min_layer = offsets[1].z;
36697ec681f3Smrg      *max_layer = offsets[0].z;
36707ec681f3Smrg   }
36717ec681f3Smrg}
36727ec681f3Smrg
36737ec681f3Smrgstatic VkResult
36747ec681f3Smrgcreate_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
36757ec681f3Smrg{
36767ec681f3Smrg   /* If this is not the first pool we create for this command buffer
36777ec681f3Smrg    * size it based on the size of the currently exhausted pool.
36787ec681f3Smrg    */
36797ec681f3Smrg   uint32_t descriptor_count = 64;
36807ec681f3Smrg   if (cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE) {
36817ec681f3Smrg      struct v3dv_descriptor_pool *exhausted_pool =
36827ec681f3Smrg         v3dv_descriptor_pool_from_handle(cmd_buffer->meta.blit.dspool);
36837ec681f3Smrg      descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
36847ec681f3Smrg   }
36857ec681f3Smrg
36867ec681f3Smrg   /* Create the descriptor pool */
36877ec681f3Smrg   cmd_buffer->meta.blit.dspool = VK_NULL_HANDLE;
36887ec681f3Smrg   VkDescriptorPoolSize pool_size = {
36897ec681f3Smrg      .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
36907ec681f3Smrg      .descriptorCount = descriptor_count,
36917ec681f3Smrg   };
36927ec681f3Smrg   VkDescriptorPoolCreateInfo info = {
36937ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
36947ec681f3Smrg      .maxSets = descriptor_count,
36957ec681f3Smrg      .poolSizeCount = 1,
36967ec681f3Smrg      .pPoolSizes = &pool_size,
36977ec681f3Smrg      .flags = 0,
36987ec681f3Smrg   };
36997ec681f3Smrg   VkResult result =
37007ec681f3Smrg      v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
37017ec681f3Smrg                                &info,
37027ec681f3Smrg                                &cmd_buffer->device->vk.alloc,
37037ec681f3Smrg                                &cmd_buffer->meta.blit.dspool);
37047ec681f3Smrg
37057ec681f3Smrg   if (result == VK_SUCCESS) {
37067ec681f3Smrg      assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
37077ec681f3Smrg      const VkDescriptorPool _pool = cmd_buffer->meta.blit.dspool;
37087ec681f3Smrg
37097ec681f3Smrg      v3dv_cmd_buffer_add_private_obj(
37107ec681f3Smrg         cmd_buffer, (uintptr_t) _pool,
37117ec681f3Smrg         (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
37127ec681f3Smrg
37137ec681f3Smrg      struct v3dv_descriptor_pool *pool =
37147ec681f3Smrg         v3dv_descriptor_pool_from_handle(_pool);
37157ec681f3Smrg      pool->is_driver_internal = true;
37167ec681f3Smrg   }
37177ec681f3Smrg
37187ec681f3Smrg   return result;
37197ec681f3Smrg}
37207ec681f3Smrg
37217ec681f3Smrgstatic VkResult
37227ec681f3Smrgallocate_blit_source_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
37237ec681f3Smrg                                    VkDescriptorSet *set)
37247ec681f3Smrg{
37257ec681f3Smrg   /* Make sure we have a descriptor pool */
37267ec681f3Smrg   VkResult result;
37277ec681f3Smrg   if (cmd_buffer->meta.blit.dspool == VK_NULL_HANDLE) {
37287ec681f3Smrg      result = create_blit_descriptor_pool(cmd_buffer);
37297ec681f3Smrg      if (result != VK_SUCCESS)
37307ec681f3Smrg         return result;
37317ec681f3Smrg   }
37327ec681f3Smrg   assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
37337ec681f3Smrg
37347ec681f3Smrg   /* Allocate descriptor set */
37357ec681f3Smrg   struct v3dv_device *device = cmd_buffer->device;
37367ec681f3Smrg   VkDevice _device = v3dv_device_to_handle(device);
37377ec681f3Smrg   VkDescriptorSetAllocateInfo info = {
37387ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
37397ec681f3Smrg      .descriptorPool = cmd_buffer->meta.blit.dspool,
37407ec681f3Smrg      .descriptorSetCount = 1,
37417ec681f3Smrg      .pSetLayouts = &device->meta.blit.ds_layout,
37427ec681f3Smrg   };
37437ec681f3Smrg   result = v3dv_AllocateDescriptorSets(_device, &info, set);
37447ec681f3Smrg
37457ec681f3Smrg   /* If we ran out of pool space, grow the pool and try again */
37467ec681f3Smrg   if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
37477ec681f3Smrg      result = create_blit_descriptor_pool(cmd_buffer);
37487ec681f3Smrg      if (result == VK_SUCCESS) {
37497ec681f3Smrg         info.descriptorPool = cmd_buffer->meta.blit.dspool;
37507ec681f3Smrg         result = v3dv_AllocateDescriptorSets(_device, &info, set);
37517ec681f3Smrg      }
37527ec681f3Smrg   }
37537ec681f3Smrg
37547ec681f3Smrg   return result;
37557ec681f3Smrg}
37567ec681f3Smrg
37577ec681f3Smrg/**
37587ec681f3Smrg * Returns true if the implementation supports the requested operation (even if
37597ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error).
37607ec681f3Smrg *
37617ec681f3Smrg * The caller can specify the channels on the destination to be written via the
37627ec681f3Smrg * cmask parameter (which can be 0 to default to all channels), as well as a
37637ec681f3Smrg * swizzle to apply to the source via the cswizzle parameter  (which can be NULL
37647ec681f3Smrg * to use the default identity swizzle).
37657ec681f3Smrg */
37667ec681f3Smrgstatic bool
37677ec681f3Smrgblit_shader(struct v3dv_cmd_buffer *cmd_buffer,
37687ec681f3Smrg            struct v3dv_image *dst,
37697ec681f3Smrg            VkFormat dst_format,
37707ec681f3Smrg            struct v3dv_image *src,
37717ec681f3Smrg            VkFormat src_format,
37727ec681f3Smrg            VkColorComponentFlags cmask,
37737ec681f3Smrg            VkComponentMapping *cswizzle,
37747ec681f3Smrg            const VkImageBlit2KHR *_region,
37757ec681f3Smrg            VkFilter filter,
37767ec681f3Smrg            bool dst_is_padded_image)
37777ec681f3Smrg{
37787ec681f3Smrg   bool handled = true;
37797ec681f3Smrg   VkResult result;
37807ec681f3Smrg   uint32_t dirty_dynamic_state = 0;
37817ec681f3Smrg
37827ec681f3Smrg   /* We don't support rendering to linear depth/stencil, this should have
37837ec681f3Smrg    * been rewritten to a compatible color blit by the caller.
37847ec681f3Smrg    */
37857ec681f3Smrg   assert(dst->vk.tiling != VK_IMAGE_TILING_LINEAR ||
37867ec681f3Smrg          !vk_format_is_depth_or_stencil(dst_format));
37877ec681f3Smrg
37887ec681f3Smrg   /* Can't sample from linear images */
37897ec681f3Smrg   if (src->vk.tiling == VK_IMAGE_TILING_LINEAR && src->vk.image_type != VK_IMAGE_TYPE_1D)
37907ec681f3Smrg      return false;
37917ec681f3Smrg
37927ec681f3Smrg   VkImageBlit2KHR region = *_region;
37937ec681f3Smrg   /* Rewrite combined D/S blits to compatible color blits */
37947ec681f3Smrg   if (vk_format_is_depth_or_stencil(dst_format)) {
37957ec681f3Smrg      assert(src_format == dst_format);
37967ec681f3Smrg      assert(cmask == 0);
37977ec681f3Smrg      switch(dst_format) {
37987ec681f3Smrg      case VK_FORMAT_D16_UNORM:
37997ec681f3Smrg         dst_format = VK_FORMAT_R16_UINT;
38007ec681f3Smrg         break;
38017ec681f3Smrg      case VK_FORMAT_D32_SFLOAT:
38027ec681f3Smrg         dst_format = VK_FORMAT_R32_UINT;
38037ec681f3Smrg         break;
38047ec681f3Smrg      case VK_FORMAT_X8_D24_UNORM_PACK32:
38057ec681f3Smrg      case VK_FORMAT_D24_UNORM_S8_UINT:
38067ec681f3Smrg         if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
38077ec681f3Smrg            cmask |= VK_COLOR_COMPONENT_G_BIT |
38087ec681f3Smrg                     VK_COLOR_COMPONENT_B_BIT |
38097ec681f3Smrg                     VK_COLOR_COMPONENT_A_BIT;
38107ec681f3Smrg         }
38117ec681f3Smrg         if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
38127ec681f3Smrg            assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT);
38137ec681f3Smrg            cmask |= VK_COLOR_COMPONENT_R_BIT;
38147ec681f3Smrg         }
38157ec681f3Smrg         dst_format = VK_FORMAT_R8G8B8A8_UINT;
38167ec681f3Smrg         break;
38177ec681f3Smrg      default:
38187ec681f3Smrg         unreachable("Unsupported depth/stencil format");
38197ec681f3Smrg      };
38207ec681f3Smrg      src_format = dst_format;
38217ec681f3Smrg      region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
38227ec681f3Smrg      region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
38237ec681f3Smrg   }
38247ec681f3Smrg
38257ec681f3Smrg   const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
38267ec681f3Smrg                                            VK_COLOR_COMPONENT_G_BIT |
38277ec681f3Smrg                                            VK_COLOR_COMPONENT_B_BIT |
38287ec681f3Smrg                                            VK_COLOR_COMPONENT_A_BIT;
38297ec681f3Smrg   if (cmask == 0)
38307ec681f3Smrg      cmask = full_cmask;
38317ec681f3Smrg
38327ec681f3Smrg   VkComponentMapping ident_swizzle = {
38337ec681f3Smrg      .r = VK_COMPONENT_SWIZZLE_IDENTITY,
38347ec681f3Smrg      .g = VK_COMPONENT_SWIZZLE_IDENTITY,
38357ec681f3Smrg      .b = VK_COMPONENT_SWIZZLE_IDENTITY,
38367ec681f3Smrg      .a = VK_COMPONENT_SWIZZLE_IDENTITY,
38377ec681f3Smrg   };
38387ec681f3Smrg   if (!cswizzle)
38397ec681f3Smrg      cswizzle = &ident_swizzle;
38407ec681f3Smrg
38417ec681f3Smrg   /* When we get here from a copy between compressed / uncompressed images
38427ec681f3Smrg    * we choose to specify the destination blit region based on the size
38437ec681f3Smrg    * semantics of the source image of the copy (see copy_image_blit), so we
38447ec681f3Smrg    * need to apply those same semantics here when we compute the size of the
38457ec681f3Smrg    * destination image level.
38467ec681f3Smrg    */
38477ec681f3Smrg   const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format);
38487ec681f3Smrg   const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format);
38497ec681f3Smrg   const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format);
38507ec681f3Smrg   const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format);
38517ec681f3Smrg   const uint32_t dst_level_w =
38527ec681f3Smrg      u_minify(DIV_ROUND_UP(dst->vk.extent.width * src_block_w, dst_block_w),
38537ec681f3Smrg               region.dstSubresource.mipLevel);
38547ec681f3Smrg   const uint32_t dst_level_h =
38557ec681f3Smrg      u_minify(DIV_ROUND_UP(dst->vk.extent.height * src_block_h, dst_block_h),
38567ec681f3Smrg               region.dstSubresource.mipLevel);
38577ec681f3Smrg
38587ec681f3Smrg   const uint32_t src_level_w =
38597ec681f3Smrg      u_minify(src->vk.extent.width, region.srcSubresource.mipLevel);
38607ec681f3Smrg   const uint32_t src_level_h =
38617ec681f3Smrg      u_minify(src->vk.extent.height, region.srcSubresource.mipLevel);
38627ec681f3Smrg   const uint32_t src_level_d =
38637ec681f3Smrg      u_minify(src->vk.extent.depth, region.srcSubresource.mipLevel);
38647ec681f3Smrg
38657ec681f3Smrg   uint32_t dst_x, dst_y, dst_w, dst_h;
38667ec681f3Smrg   bool dst_mirror_x, dst_mirror_y;
38677ec681f3Smrg   compute_blit_box(region.dstOffsets,
38687ec681f3Smrg                    dst_level_w, dst_level_h,
38697ec681f3Smrg                    &dst_x, &dst_y, &dst_w, &dst_h,
38707ec681f3Smrg                    &dst_mirror_x, &dst_mirror_y);
38717ec681f3Smrg
38727ec681f3Smrg   uint32_t src_x, src_y, src_w, src_h;
38737ec681f3Smrg   bool src_mirror_x, src_mirror_y;
38747ec681f3Smrg   compute_blit_box(region.srcOffsets,
38757ec681f3Smrg                    src_level_w, src_level_h,
38767ec681f3Smrg                    &src_x, &src_y, &src_w, &src_h,
38777ec681f3Smrg                    &src_mirror_x, &src_mirror_y);
38787ec681f3Smrg
38797ec681f3Smrg   uint32_t min_dst_layer;
38807ec681f3Smrg   uint32_t max_dst_layer;
38817ec681f3Smrg   bool dst_mirror_z = false;
38827ec681f3Smrg   if (dst->vk.image_type != VK_IMAGE_TYPE_3D) {
38837ec681f3Smrg      min_dst_layer = region.dstSubresource.baseArrayLayer;
38847ec681f3Smrg      max_dst_layer = min_dst_layer + region.dstSubresource.layerCount;
38857ec681f3Smrg   } else {
38867ec681f3Smrg      compute_blit_3d_layers(region.dstOffsets,
38877ec681f3Smrg                             &min_dst_layer, &max_dst_layer,
38887ec681f3Smrg                             &dst_mirror_z);
38897ec681f3Smrg   }
38907ec681f3Smrg
38917ec681f3Smrg   uint32_t min_src_layer;
38927ec681f3Smrg   uint32_t max_src_layer;
38937ec681f3Smrg   bool src_mirror_z = false;
38947ec681f3Smrg   if (src->vk.image_type != VK_IMAGE_TYPE_3D) {
38957ec681f3Smrg      min_src_layer = region.srcSubresource.baseArrayLayer;
38967ec681f3Smrg      max_src_layer = min_src_layer + region.srcSubresource.layerCount;
38977ec681f3Smrg   } else {
38987ec681f3Smrg      compute_blit_3d_layers(region.srcOffsets,
38997ec681f3Smrg                             &min_src_layer, &max_src_layer,
39007ec681f3Smrg                             &src_mirror_z);
39017ec681f3Smrg   }
39027ec681f3Smrg
39037ec681f3Smrg   uint32_t layer_count = max_dst_layer - min_dst_layer;
39047ec681f3Smrg
39057ec681f3Smrg   /* Translate source blit coordinates to normalized texture coordinates for
39067ec681f3Smrg    * single sampled textures. For multisampled textures we require
39077ec681f3Smrg    * unnormalized coordinates, since we can only do texelFetch on them.
39087ec681f3Smrg    */
39097ec681f3Smrg   float coords[4] =  {
39107ec681f3Smrg      (float)src_x,
39117ec681f3Smrg      (float)src_y,
39127ec681f3Smrg      (float)(src_x + src_w),
39137ec681f3Smrg      (float)(src_y + src_h),
39147ec681f3Smrg   };
39157ec681f3Smrg
39167ec681f3Smrg   if (src->vk.samples == VK_SAMPLE_COUNT_1_BIT) {
39177ec681f3Smrg      coords[0] /= (float)src_level_w;
39187ec681f3Smrg      coords[1] /= (float)src_level_h;
39197ec681f3Smrg      coords[2] /= (float)src_level_w;
39207ec681f3Smrg      coords[3] /= (float)src_level_h;
39217ec681f3Smrg   }
39227ec681f3Smrg
39237ec681f3Smrg   /* Handle mirroring */
39247ec681f3Smrg   const bool mirror_x = dst_mirror_x != src_mirror_x;
39257ec681f3Smrg   const bool mirror_y = dst_mirror_y != src_mirror_y;
39267ec681f3Smrg   const bool mirror_z = dst_mirror_z != src_mirror_z;
39277ec681f3Smrg   float tex_coords[5] = {
39287ec681f3Smrg      !mirror_x ? coords[0] : coords[2],
39297ec681f3Smrg      !mirror_y ? coords[1] : coords[3],
39307ec681f3Smrg      !mirror_x ? coords[2] : coords[0],
39317ec681f3Smrg      !mirror_y ? coords[3] : coords[1],
39327ec681f3Smrg      /* Z coordinate for 3D blit sources, to be filled for each
39337ec681f3Smrg       * destination layer
39347ec681f3Smrg       */
39357ec681f3Smrg      0.0f
39367ec681f3Smrg   };
39377ec681f3Smrg
39387ec681f3Smrg   /* For blits from 3D images we also need to compute the slice coordinate to
39397ec681f3Smrg    * sample from, which will change for each layer in the destination.
39407ec681f3Smrg    * Compute the step we should increase for each iteration.
39417ec681f3Smrg    */
39427ec681f3Smrg   const float src_z_step =
39437ec681f3Smrg      (float)(max_src_layer - min_src_layer) / (float)layer_count;
39447ec681f3Smrg
39457ec681f3Smrg   /* Get the blit pipeline */
39467ec681f3Smrg   struct v3dv_meta_blit_pipeline *pipeline = NULL;
39477ec681f3Smrg   bool ok = get_blit_pipeline(cmd_buffer->device,
39487ec681f3Smrg                               dst_format, src_format, cmask, src->vk.image_type,
39497ec681f3Smrg                               dst->vk.samples, src->vk.samples,
39507ec681f3Smrg                               &pipeline);
39517ec681f3Smrg   if (!ok)
39527ec681f3Smrg      return handled;
39537ec681f3Smrg   assert(pipeline && pipeline->pipeline &&
39547ec681f3Smrg          pipeline->pass && pipeline->pass_no_load);
39557ec681f3Smrg
39567ec681f3Smrg   struct v3dv_device *device = cmd_buffer->device;
39577ec681f3Smrg   assert(device->meta.blit.ds_layout);
39587ec681f3Smrg
39597ec681f3Smrg   VkDevice _device = v3dv_device_to_handle(device);
39607ec681f3Smrg   VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
39617ec681f3Smrg
39627ec681f3Smrg   /* Create sampler for blit source image */
39637ec681f3Smrg   VkSamplerCreateInfo sampler_info = {
39647ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
39657ec681f3Smrg      .magFilter = filter,
39667ec681f3Smrg      .minFilter = filter,
39677ec681f3Smrg      .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
39687ec681f3Smrg      .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
39697ec681f3Smrg      .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
39707ec681f3Smrg      .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
39717ec681f3Smrg   };
39727ec681f3Smrg   VkSampler sampler;
39737ec681f3Smrg   result = v3dv_CreateSampler(_device, &sampler_info, &device->vk.alloc,
39747ec681f3Smrg                               &sampler);
39757ec681f3Smrg   if (result != VK_SUCCESS)
39767ec681f3Smrg      goto fail;
39777ec681f3Smrg
39787ec681f3Smrg   v3dv_cmd_buffer_add_private_obj(
39797ec681f3Smrg      cmd_buffer, (uintptr_t)sampler,
39807ec681f3Smrg      (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler);
39817ec681f3Smrg
39827ec681f3Smrg   /* Push command buffer state before starting meta operation */
39837ec681f3Smrg   v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
39847ec681f3Smrg
39857ec681f3Smrg   /* Push state that is common for all layers */
39867ec681f3Smrg   v3dv_CmdBindPipeline(_cmd_buffer,
39877ec681f3Smrg                        VK_PIPELINE_BIND_POINT_GRAPHICS,
39887ec681f3Smrg                        pipeline->pipeline);
39897ec681f3Smrg
39907ec681f3Smrg   const VkViewport viewport = {
39917ec681f3Smrg      .x = dst_x,
39927ec681f3Smrg      .y = dst_y,
39937ec681f3Smrg      .width = dst_w,
39947ec681f3Smrg      .height = dst_h,
39957ec681f3Smrg      .minDepth = 0.0f,
39967ec681f3Smrg      .maxDepth = 1.0f
39977ec681f3Smrg   };
39987ec681f3Smrg   v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
39997ec681f3Smrg
40007ec681f3Smrg   const VkRect2D scissor = {
40017ec681f3Smrg      .offset = { dst_x, dst_y },
40027ec681f3Smrg      .extent = { dst_w, dst_h }
40037ec681f3Smrg   };
40047ec681f3Smrg   v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
40057ec681f3Smrg
40067ec681f3Smrg   bool can_skip_tlb_load = false;
40077ec681f3Smrg   const VkRect2D render_area = {
40087ec681f3Smrg      .offset = { dst_x, dst_y },
40097ec681f3Smrg      .extent = { dst_w, dst_h },
40107ec681f3Smrg   };
40117ec681f3Smrg
40127ec681f3Smrg   /* Record per-layer commands */
40137ec681f3Smrg   VkImageAspectFlags aspects = region.dstSubresource.aspectMask;
40147ec681f3Smrg   for (uint32_t i = 0; i < layer_count; i++) {
40157ec681f3Smrg      /* Setup framebuffer */
40167ec681f3Smrg      VkImageViewCreateInfo dst_image_view_info = {
40177ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
40187ec681f3Smrg         .image = v3dv_image_to_handle(dst),
40197ec681f3Smrg         .viewType = v3dv_image_type_to_view_type(dst->vk.image_type),
40207ec681f3Smrg         .format = dst_format,
40217ec681f3Smrg         .subresourceRange = {
40227ec681f3Smrg            .aspectMask = aspects,
40237ec681f3Smrg            .baseMipLevel = region.dstSubresource.mipLevel,
40247ec681f3Smrg            .levelCount = 1,
40257ec681f3Smrg            .baseArrayLayer = min_dst_layer + i,
40267ec681f3Smrg            .layerCount = 1
40277ec681f3Smrg         },
40287ec681f3Smrg      };
40297ec681f3Smrg      VkImageView dst_image_view;
40307ec681f3Smrg      result = v3dv_CreateImageView(_device, &dst_image_view_info,
40317ec681f3Smrg                                    &device->vk.alloc, &dst_image_view);
40327ec681f3Smrg      if (result != VK_SUCCESS)
40337ec681f3Smrg         goto fail;
40347ec681f3Smrg
40357ec681f3Smrg      v3dv_cmd_buffer_add_private_obj(
40367ec681f3Smrg         cmd_buffer, (uintptr_t)dst_image_view,
40377ec681f3Smrg         (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
40387ec681f3Smrg
40397ec681f3Smrg      VkFramebufferCreateInfo fb_info = {
40407ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
40417ec681f3Smrg         .renderPass = pipeline->pass,
40427ec681f3Smrg         .attachmentCount = 1,
40437ec681f3Smrg         .pAttachments = &dst_image_view,
40447ec681f3Smrg         .width = dst_x + dst_w,
40457ec681f3Smrg         .height = dst_y + dst_h,
40467ec681f3Smrg         .layers = 1,
40477ec681f3Smrg      };
40487ec681f3Smrg
40497ec681f3Smrg      VkFramebuffer fb;
40507ec681f3Smrg      result = v3dv_CreateFramebuffer(_device, &fb_info,
40517ec681f3Smrg                                      &cmd_buffer->device->vk.alloc, &fb);
40527ec681f3Smrg      if (result != VK_SUCCESS)
40537ec681f3Smrg         goto fail;
40547ec681f3Smrg
40557ec681f3Smrg      struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb);
40567ec681f3Smrg      framebuffer->has_edge_padding = fb_info.width == dst_level_w &&
40577ec681f3Smrg                                      fb_info.height == dst_level_h &&
40587ec681f3Smrg                                      dst_is_padded_image;
40597ec681f3Smrg
40607ec681f3Smrg      v3dv_cmd_buffer_add_private_obj(
40617ec681f3Smrg         cmd_buffer, (uintptr_t)fb,
40627ec681f3Smrg         (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
40637ec681f3Smrg
40647ec681f3Smrg      /* Setup descriptor set for blit source texture. We don't have to
40657ec681f3Smrg       * register the descriptor as a private command buffer object since
40667ec681f3Smrg       * all descriptors will be freed automatically with the descriptor
40677ec681f3Smrg       * pool.
40687ec681f3Smrg       */
40697ec681f3Smrg      VkDescriptorSet set;
40707ec681f3Smrg      result = allocate_blit_source_descriptor_set(cmd_buffer, &set);
40717ec681f3Smrg      if (result != VK_SUCCESS)
40727ec681f3Smrg         goto fail;
40737ec681f3Smrg
40747ec681f3Smrg      VkImageViewCreateInfo src_image_view_info = {
40757ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
40767ec681f3Smrg         .image = v3dv_image_to_handle(src),
40777ec681f3Smrg         .viewType = v3dv_image_type_to_view_type(src->vk.image_type),
40787ec681f3Smrg         .format = src_format,
40797ec681f3Smrg         .components = *cswizzle,
40807ec681f3Smrg         .subresourceRange = {
40817ec681f3Smrg            .aspectMask = aspects,
40827ec681f3Smrg            .baseMipLevel = region.srcSubresource.mipLevel,
40837ec681f3Smrg            .levelCount = 1,
40847ec681f3Smrg            .baseArrayLayer =
40857ec681f3Smrg               src->vk.image_type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,
40867ec681f3Smrg            .layerCount = 1
40877ec681f3Smrg         },
40887ec681f3Smrg      };
40897ec681f3Smrg      VkImageView src_image_view;
40907ec681f3Smrg      result = v3dv_CreateImageView(_device, &src_image_view_info,
40917ec681f3Smrg                                    &device->vk.alloc, &src_image_view);
40927ec681f3Smrg      if (result != VK_SUCCESS)
40937ec681f3Smrg         goto fail;
40947ec681f3Smrg
40957ec681f3Smrg      v3dv_cmd_buffer_add_private_obj(
40967ec681f3Smrg         cmd_buffer, (uintptr_t)src_image_view,
40977ec681f3Smrg         (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
40987ec681f3Smrg
40997ec681f3Smrg      VkDescriptorImageInfo image_info = {
41007ec681f3Smrg         .sampler = sampler,
41017ec681f3Smrg         .imageView = src_image_view,
41027ec681f3Smrg         .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
41037ec681f3Smrg      };
41047ec681f3Smrg      VkWriteDescriptorSet write = {
41057ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
41067ec681f3Smrg         .dstSet = set,
41077ec681f3Smrg         .dstBinding = 0,
41087ec681f3Smrg         .dstArrayElement = 0,
41097ec681f3Smrg         .descriptorCount = 1,
41107ec681f3Smrg         .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
41117ec681f3Smrg         .pImageInfo = &image_info,
41127ec681f3Smrg      };
41137ec681f3Smrg      v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
41147ec681f3Smrg
41157ec681f3Smrg      v3dv_CmdBindDescriptorSets(_cmd_buffer,
41167ec681f3Smrg                                 VK_PIPELINE_BIND_POINT_GRAPHICS,
41177ec681f3Smrg                                 device->meta.blit.p_layout,
41187ec681f3Smrg                                 0, 1, &set,
41197ec681f3Smrg                                 0, NULL);
41207ec681f3Smrg
41217ec681f3Smrg      /* If the region we are about to blit is tile-aligned, then we can
41227ec681f3Smrg       * use the render pass version that won't pre-load the tile buffer
41237ec681f3Smrg       * with the dst image contents before the blit. The exception is when we
41247ec681f3Smrg       * don't have a full color mask, since in that case we need to preserve
41257ec681f3Smrg       * the original value of some of the color components.
41267ec681f3Smrg       *
41277ec681f3Smrg       * Since all layers have the same area, we only need to compute this for
41287ec681f3Smrg       * the first.
41297ec681f3Smrg       */
41307ec681f3Smrg      if (i == 0) {
41317ec681f3Smrg         struct v3dv_render_pass *pipeline_pass =
41327ec681f3Smrg            v3dv_render_pass_from_handle(pipeline->pass);
41337ec681f3Smrg         can_skip_tlb_load =
41347ec681f3Smrg            cmask == full_cmask &&
41357ec681f3Smrg            v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
41367ec681f3Smrg                                              framebuffer, pipeline_pass, 0);
41377ec681f3Smrg      }
41387ec681f3Smrg
41397ec681f3Smrg      /* Record blit */
41407ec681f3Smrg      VkRenderPassBeginInfo rp_info = {
41417ec681f3Smrg         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
41427ec681f3Smrg         .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
41437ec681f3Smrg                                           pipeline->pass,
41447ec681f3Smrg         .framebuffer = fb,
41457ec681f3Smrg         .renderArea = render_area,
41467ec681f3Smrg         .clearValueCount = 0,
41477ec681f3Smrg      };
41487ec681f3Smrg
41497ec681f3Smrg      v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE);
41507ec681f3Smrg      struct v3dv_job *job = cmd_buffer->state.job;
41517ec681f3Smrg      if (!job)
41527ec681f3Smrg         goto fail;
41537ec681f3Smrg
41547ec681f3Smrg      /* For 3D blits we need to compute the source slice to blit from (the Z
41557ec681f3Smrg       * coordinate of the source sample operation). We want to choose this
41567ec681f3Smrg       * based on the ratio of the depth of the source and the destination
41577ec681f3Smrg       * images, picking the coordinate in the middle of each step.
41587ec681f3Smrg       */
41597ec681f3Smrg      if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
41607ec681f3Smrg         tex_coords[4] =
41617ec681f3Smrg            !mirror_z ?
41627ec681f3Smrg            (min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d :
41637ec681f3Smrg            (max_src_layer - (i + 0.5f) * src_z_step) / (float)src_level_d;
41647ec681f3Smrg      }
41657ec681f3Smrg
41667ec681f3Smrg      v3dv_CmdPushConstants(_cmd_buffer,
41677ec681f3Smrg                            device->meta.blit.p_layout,
41687ec681f3Smrg                            VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
41697ec681f3Smrg                            &tex_coords);
41707ec681f3Smrg
41717ec681f3Smrg      v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
41727ec681f3Smrg
41737ec681f3Smrg      v3dv_CmdEndRenderPass(_cmd_buffer);
41747ec681f3Smrg      dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
41757ec681f3Smrg   }
41767ec681f3Smrg
41777ec681f3Smrgfail:
41787ec681f3Smrg   v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
41797ec681f3Smrg
41807ec681f3Smrg   return handled;
41817ec681f3Smrg}
41827ec681f3Smrg
41837ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
41847ec681f3Smrgv3dv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,
41857ec681f3Smrg                      const VkBlitImageInfo2KHR *pBlitImageInfo)
41867ec681f3Smrg{
41877ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
41887ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_image, src, pBlitImageInfo->srcImage);
41897ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_image, dst, pBlitImageInfo->dstImage);
41907ec681f3Smrg
41917ec681f3Smrg    /* This command can only happen outside a render pass */
41927ec681f3Smrg   assert(cmd_buffer->state.pass == NULL);
41937ec681f3Smrg   assert(cmd_buffer->state.job == NULL);
41947ec681f3Smrg
41957ec681f3Smrg   /* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */
41967ec681f3Smrg   assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT &&
41977ec681f3Smrg          src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
41987ec681f3Smrg
41997ec681f3Smrg   /* We don't export VK_FORMAT_FEATURE_BLIT_DST_BIT on compressed formats */
42007ec681f3Smrg   assert(!vk_format_is_compressed(dst->vk.format));
42017ec681f3Smrg
42027ec681f3Smrg   for (uint32_t i = 0; i < pBlitImageInfo->regionCount; i++) {
42037ec681f3Smrg      if (blit_tfu(cmd_buffer, dst, src, &pBlitImageInfo->pRegions[i]))
42047ec681f3Smrg         continue;
42057ec681f3Smrg      if (blit_shader(cmd_buffer,
42067ec681f3Smrg                      dst, dst->vk.format,
42077ec681f3Smrg                      src, src->vk.format,
42087ec681f3Smrg                      0, NULL,
42097ec681f3Smrg                      &pBlitImageInfo->pRegions[i],
42107ec681f3Smrg                      pBlitImageInfo->filter, true)) {
42117ec681f3Smrg         continue;
42127ec681f3Smrg      }
42137ec681f3Smrg      unreachable("Unsupported blit operation");
42147ec681f3Smrg   }
42157ec681f3Smrg}
42167ec681f3Smrg
42177ec681f3Smrgstatic bool
42187ec681f3Smrgresolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
42197ec681f3Smrg                  struct v3dv_image *dst,
42207ec681f3Smrg                  struct v3dv_image *src,
42217ec681f3Smrg                  const VkImageResolve2KHR *region)
42227ec681f3Smrg{
42237ec681f3Smrg   if (!v3dv_meta_can_use_tlb(src, &region->srcOffset, NULL) ||
42247ec681f3Smrg       !v3dv_meta_can_use_tlb(dst, &region->dstOffset, NULL)) {
42257ec681f3Smrg      return false;
42267ec681f3Smrg   }
42277ec681f3Smrg
42287ec681f3Smrg   if (!v3dv_X(cmd_buffer->device, format_supports_tlb_resolve)(src->format))
42297ec681f3Smrg      return false;
42307ec681f3Smrg
42317ec681f3Smrg   const VkFormat fb_format = src->vk.format;
42327ec681f3Smrg
42337ec681f3Smrg   uint32_t num_layers;
42347ec681f3Smrg   if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
42357ec681f3Smrg      num_layers = region->dstSubresource.layerCount;
42367ec681f3Smrg   else
42377ec681f3Smrg      num_layers = region->extent.depth;
42387ec681f3Smrg   assert(num_layers > 0);
42397ec681f3Smrg
42407ec681f3Smrg   struct v3dv_job *job =
42417ec681f3Smrg      v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
42427ec681f3Smrg   if (!job)
42437ec681f3Smrg      return true;
42447ec681f3Smrg
42457ec681f3Smrg   const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format);
42467ec681f3Smrg   const uint32_t block_h = vk_format_get_blockheight(dst->vk.format);
42477ec681f3Smrg   const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
42487ec681f3Smrg   const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
42497ec681f3Smrg
42507ec681f3Smrg   uint32_t internal_type, internal_bpp;
42517ec681f3Smrg   v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
42527ec681f3Smrg      (fb_format, region->srcSubresource.aspectMask,
42537ec681f3Smrg       &internal_type, &internal_bpp);
42547ec681f3Smrg
42557ec681f3Smrg   v3dv_job_start_frame(job, width, height, num_layers, false,
42567ec681f3Smrg                        1, internal_bpp, true);
42577ec681f3Smrg
42587ec681f3Smrg   struct v3dv_meta_framebuffer framebuffer;
42597ec681f3Smrg   v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
42607ec681f3Smrg                                              internal_type, &job->frame_tiling);
42617ec681f3Smrg
42627ec681f3Smrg   v3dv_X(job->device, job_emit_binning_flush)(job);
42637ec681f3Smrg   v3dv_X(job->device, meta_emit_resolve_image_rcl)(job, dst, src,
42647ec681f3Smrg                                                    &framebuffer, region);
42657ec681f3Smrg
42667ec681f3Smrg   v3dv_cmd_buffer_finish_job(cmd_buffer);
42677ec681f3Smrg   return true;
42687ec681f3Smrg}
42697ec681f3Smrg
42707ec681f3Smrgstatic bool
42717ec681f3Smrgresolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
42727ec681f3Smrg                   struct v3dv_image *dst,
42737ec681f3Smrg                   struct v3dv_image *src,
42747ec681f3Smrg                   const VkImageResolve2KHR *region)
42757ec681f3Smrg{
42767ec681f3Smrg   const VkImageBlit2KHR blit_region = {
42777ec681f3Smrg      .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
42787ec681f3Smrg      .srcSubresource = region->srcSubresource,
42797ec681f3Smrg      .srcOffsets = {
42807ec681f3Smrg         region->srcOffset,
42817ec681f3Smrg         {
42827ec681f3Smrg            region->srcOffset.x + region->extent.width,
42837ec681f3Smrg            region->srcOffset.y + region->extent.height,
42847ec681f3Smrg         }
42857ec681f3Smrg      },
42867ec681f3Smrg      .dstSubresource = region->dstSubresource,
42877ec681f3Smrg      .dstOffsets = {
42887ec681f3Smrg         region->dstOffset,
42897ec681f3Smrg         {
42907ec681f3Smrg            region->dstOffset.x + region->extent.width,
42917ec681f3Smrg            region->dstOffset.y + region->extent.height,
42927ec681f3Smrg         }
42937ec681f3Smrg      },
42947ec681f3Smrg   };
42957ec681f3Smrg   return blit_shader(cmd_buffer,
42967ec681f3Smrg                      dst, dst->vk.format,
42977ec681f3Smrg                      src, src->vk.format,
42987ec681f3Smrg                      0, NULL,
42997ec681f3Smrg                      &blit_region, VK_FILTER_NEAREST, true);
43007ec681f3Smrg}
43017ec681f3Smrg
43027ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
43037ec681f3Smrgv3dv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
43047ec681f3Smrg                         const VkResolveImageInfo2KHR *info)
43057ec681f3Smrg
43067ec681f3Smrg{
43077ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
43087ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
43097ec681f3Smrg   V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
43107ec681f3Smrg
43117ec681f3Smrg    /* This command can only happen outside a render pass */
43127ec681f3Smrg   assert(cmd_buffer->state.pass == NULL);
43137ec681f3Smrg   assert(cmd_buffer->state.job == NULL);
43147ec681f3Smrg
43157ec681f3Smrg   assert(src->vk.samples == VK_SAMPLE_COUNT_4_BIT);
43167ec681f3Smrg   assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
43177ec681f3Smrg
43187ec681f3Smrg   for (uint32_t i = 0; i < info->regionCount; i++) {
43197ec681f3Smrg      if (resolve_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
43207ec681f3Smrg         continue;
43217ec681f3Smrg      if (resolve_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
43227ec681f3Smrg         continue;
43237ec681f3Smrg      unreachable("Unsupported multismaple resolve operation");
43247ec681f3Smrg   }
43257ec681f3Smrg}
4326