17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2019 Raspberry Pi 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "v3dv_private.h" 257ec681f3Smrg#include "v3dv_meta_common.h" 267ec681f3Smrg 277ec681f3Smrg#include "compiler/nir/nir_builder.h" 287ec681f3Smrg#include "vk_format_info.h" 297ec681f3Smrg#include "util/u_pack_color.h" 307ec681f3Smrg#include "vulkan/util/vk_common_entrypoints.h" 317ec681f3Smrg 327ec681f3Smrgstatic uint32_t 337ec681f3Smrgmeta_blit_key_hash(const void *key) 347ec681f3Smrg{ 357ec681f3Smrg return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE); 367ec681f3Smrg} 377ec681f3Smrg 387ec681f3Smrgstatic bool 397ec681f3Smrgmeta_blit_key_compare(const void *key1, const void *key2) 407ec681f3Smrg{ 417ec681f3Smrg return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0; 427ec681f3Smrg} 437ec681f3Smrg 447ec681f3Smrgstatic bool 457ec681f3Smrgcreate_blit_pipeline_layout(struct v3dv_device *device, 467ec681f3Smrg VkDescriptorSetLayout *descriptor_set_layout, 477ec681f3Smrg VkPipelineLayout *pipeline_layout) 487ec681f3Smrg{ 497ec681f3Smrg VkResult result; 507ec681f3Smrg 517ec681f3Smrg if (*descriptor_set_layout == 0) { 527ec681f3Smrg VkDescriptorSetLayoutBinding descriptor_set_layout_binding = { 537ec681f3Smrg .binding = 0, 547ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 557ec681f3Smrg .descriptorCount = 1, 567ec681f3Smrg .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, 577ec681f3Smrg }; 587ec681f3Smrg VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = { 597ec681f3Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 607ec681f3Smrg .bindingCount = 1, 617ec681f3Smrg .pBindings = &descriptor_set_layout_binding, 627ec681f3Smrg }; 637ec681f3Smrg result = 647ec681f3Smrg v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device), 657ec681f3Smrg &descriptor_set_layout_info, 667ec681f3Smrg &device->vk.alloc, 677ec681f3Smrg descriptor_set_layout); 687ec681f3Smrg if (result != VK_SUCCESS) 697ec681f3Smrg return false; 707ec681f3Smrg } 717ec681f3Smrg 727ec681f3Smrg assert(*pipeline_layout == 0); 737ec681f3Smrg VkPipelineLayoutCreateInfo pipeline_layout_info = { 747ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 757ec681f3Smrg .setLayoutCount = 1, 767ec681f3Smrg .pSetLayouts = descriptor_set_layout, 777ec681f3Smrg .pushConstantRangeCount = 1, 787ec681f3Smrg .pPushConstantRanges = 797ec681f3Smrg &(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 }, 807ec681f3Smrg }; 817ec681f3Smrg 827ec681f3Smrg result = 837ec681f3Smrg v3dv_CreatePipelineLayout(v3dv_device_to_handle(device), 847ec681f3Smrg &pipeline_layout_info, 857ec681f3Smrg &device->vk.alloc, 867ec681f3Smrg pipeline_layout); 877ec681f3Smrg return result == VK_SUCCESS; 887ec681f3Smrg} 897ec681f3Smrg 907ec681f3Smrgvoid 917ec681f3Smrgv3dv_meta_blit_init(struct v3dv_device *device) 927ec681f3Smrg{ 937ec681f3Smrg for (uint32_t i = 0; i < 3; i++) { 947ec681f3Smrg device->meta.blit.cache[i] = 957ec681f3Smrg _mesa_hash_table_create(NULL, 967ec681f3Smrg meta_blit_key_hash, 977ec681f3Smrg meta_blit_key_compare); 987ec681f3Smrg } 997ec681f3Smrg 1007ec681f3Smrg create_blit_pipeline_layout(device, 1017ec681f3Smrg &device->meta.blit.ds_layout, 1027ec681f3Smrg &device->meta.blit.p_layout); 1037ec681f3Smrg} 1047ec681f3Smrg 1057ec681f3Smrgvoid 1067ec681f3Smrgv3dv_meta_blit_finish(struct v3dv_device *device) 1077ec681f3Smrg{ 1087ec681f3Smrg VkDevice _device = v3dv_device_to_handle(device); 1097ec681f3Smrg 1107ec681f3Smrg for (uint32_t i = 0; i < 3; i++) { 1117ec681f3Smrg hash_table_foreach(device->meta.blit.cache[i], entry) { 1127ec681f3Smrg struct v3dv_meta_blit_pipeline *item = entry->data; 1137ec681f3Smrg v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc); 1147ec681f3Smrg v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc); 1157ec681f3Smrg v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc); 1167ec681f3Smrg vk_free(&device->vk.alloc, item); 1177ec681f3Smrg } 1187ec681f3Smrg _mesa_hash_table_destroy(device->meta.blit.cache[i], NULL); 1197ec681f3Smrg } 1207ec681f3Smrg 1217ec681f3Smrg if (device->meta.blit.p_layout) { 1227ec681f3Smrg v3dv_DestroyPipelineLayout(_device, device->meta.blit.p_layout, 1237ec681f3Smrg &device->vk.alloc); 1247ec681f3Smrg } 1257ec681f3Smrg 1267ec681f3Smrg if (device->meta.blit.ds_layout) { 1277ec681f3Smrg v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.ds_layout, 1287ec681f3Smrg &device->vk.alloc); 1297ec681f3Smrg } 1307ec681f3Smrg} 1317ec681f3Smrg 1327ec681f3Smrgstatic uint32_t 1337ec681f3Smrgmeta_texel_buffer_copy_key_hash(const void *key) 1347ec681f3Smrg{ 1357ec681f3Smrg return _mesa_hash_data(key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE); 1367ec681f3Smrg} 1377ec681f3Smrg 1387ec681f3Smrgstatic bool 1397ec681f3Smrgmeta_texel_buffer_copy_key_compare(const void *key1, const void *key2) 1407ec681f3Smrg{ 1417ec681f3Smrg return memcmp(key1, key2, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE) == 0; 1427ec681f3Smrg} 1437ec681f3Smrg 1447ec681f3Smrgstatic bool 1457ec681f3Smrgcreate_texel_buffer_copy_pipeline_layout(struct v3dv_device *device, 1467ec681f3Smrg VkDescriptorSetLayout *ds_layout, 1477ec681f3Smrg VkPipelineLayout *p_layout) 1487ec681f3Smrg{ 1497ec681f3Smrg VkResult result; 1507ec681f3Smrg 1517ec681f3Smrg if (*ds_layout == 0) { 1527ec681f3Smrg VkDescriptorSetLayoutBinding ds_layout_binding = { 1537ec681f3Smrg .binding = 0, 1547ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1557ec681f3Smrg .descriptorCount = 1, 1567ec681f3Smrg .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, 1577ec681f3Smrg }; 1587ec681f3Smrg VkDescriptorSetLayoutCreateInfo ds_layout_info = { 1597ec681f3Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 1607ec681f3Smrg .bindingCount = 1, 1617ec681f3Smrg .pBindings = &ds_layout_binding, 1627ec681f3Smrg }; 1637ec681f3Smrg result = 1647ec681f3Smrg v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device), 1657ec681f3Smrg &ds_layout_info, 1667ec681f3Smrg &device->vk.alloc, 1677ec681f3Smrg ds_layout); 1687ec681f3Smrg if (result != VK_SUCCESS) 1697ec681f3Smrg return false; 1707ec681f3Smrg } 1717ec681f3Smrg 1727ec681f3Smrg assert(*p_layout == 0); 1737ec681f3Smrg /* FIXME: this is abusing a bit the API, since not all of our copy 1747ec681f3Smrg * pipelines have a geometry shader. We could create 2 different pipeline 1757ec681f3Smrg * layouts, but this works for us for now. 1767ec681f3Smrg */ 1777ec681f3Smrg#define TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET 0 1787ec681f3Smrg#define TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET 16 1797ec681f3Smrg#define TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET 20 1807ec681f3Smrg#define TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET 24 1817ec681f3Smrg VkPushConstantRange ranges[2] = { 1827ec681f3Smrg { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 }, 1837ec681f3Smrg { VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 }, 1847ec681f3Smrg }; 1857ec681f3Smrg 1867ec681f3Smrg VkPipelineLayoutCreateInfo p_layout_info = { 1877ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 1887ec681f3Smrg .setLayoutCount = 1, 1897ec681f3Smrg .pSetLayouts = ds_layout, 1907ec681f3Smrg .pushConstantRangeCount = 2, 1917ec681f3Smrg .pPushConstantRanges = ranges, 1927ec681f3Smrg }; 1937ec681f3Smrg 1947ec681f3Smrg result = 1957ec681f3Smrg v3dv_CreatePipelineLayout(v3dv_device_to_handle(device), 1967ec681f3Smrg &p_layout_info, 1977ec681f3Smrg &device->vk.alloc, 1987ec681f3Smrg p_layout); 1997ec681f3Smrg return result == VK_SUCCESS; 2007ec681f3Smrg} 2017ec681f3Smrg 2027ec681f3Smrgvoid 2037ec681f3Smrgv3dv_meta_texel_buffer_copy_init(struct v3dv_device *device) 2047ec681f3Smrg{ 2057ec681f3Smrg for (uint32_t i = 0; i < 3; i++) { 2067ec681f3Smrg device->meta.texel_buffer_copy.cache[i] = 2077ec681f3Smrg _mesa_hash_table_create(NULL, 2087ec681f3Smrg meta_texel_buffer_copy_key_hash, 2097ec681f3Smrg meta_texel_buffer_copy_key_compare); 2107ec681f3Smrg } 2117ec681f3Smrg 2127ec681f3Smrg create_texel_buffer_copy_pipeline_layout( 2137ec681f3Smrg device, 2147ec681f3Smrg &device->meta.texel_buffer_copy.ds_layout, 2157ec681f3Smrg &device->meta.texel_buffer_copy.p_layout); 2167ec681f3Smrg} 2177ec681f3Smrg 2187ec681f3Smrgvoid 2197ec681f3Smrgv3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device) 2207ec681f3Smrg{ 2217ec681f3Smrg VkDevice _device = v3dv_device_to_handle(device); 2227ec681f3Smrg 2237ec681f3Smrg for (uint32_t i = 0; i < 3; i++) { 2247ec681f3Smrg hash_table_foreach(device->meta.texel_buffer_copy.cache[i], entry) { 2257ec681f3Smrg struct v3dv_meta_texel_buffer_copy_pipeline *item = entry->data; 2267ec681f3Smrg v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc); 2277ec681f3Smrg v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc); 2287ec681f3Smrg v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc); 2297ec681f3Smrg vk_free(&device->vk.alloc, item); 2307ec681f3Smrg } 2317ec681f3Smrg _mesa_hash_table_destroy(device->meta.texel_buffer_copy.cache[i], NULL); 2327ec681f3Smrg } 2337ec681f3Smrg 2347ec681f3Smrg if (device->meta.texel_buffer_copy.p_layout) { 2357ec681f3Smrg v3dv_DestroyPipelineLayout(_device, device->meta.texel_buffer_copy.p_layout, 2367ec681f3Smrg &device->vk.alloc); 2377ec681f3Smrg } 2387ec681f3Smrg 2397ec681f3Smrg if (device->meta.texel_buffer_copy.ds_layout) { 2407ec681f3Smrg v3dv_DestroyDescriptorSetLayout(_device, device->meta.texel_buffer_copy.ds_layout, 2417ec681f3Smrg &device->vk.alloc); 2427ec681f3Smrg } 2437ec681f3Smrg} 2447ec681f3Smrg 2457ec681f3Smrgstatic VkFormat 2467ec681f3Smrgget_compatible_tlb_format(VkFormat format) 2477ec681f3Smrg{ 2487ec681f3Smrg switch (format) { 2497ec681f3Smrg case VK_FORMAT_R8G8B8A8_SNORM: 2507ec681f3Smrg return VK_FORMAT_R8G8B8A8_UINT; 2517ec681f3Smrg 2527ec681f3Smrg case VK_FORMAT_R8G8_SNORM: 2537ec681f3Smrg return VK_FORMAT_R8G8_UINT; 2547ec681f3Smrg 2557ec681f3Smrg case VK_FORMAT_R8_SNORM: 2567ec681f3Smrg return VK_FORMAT_R8_UINT; 2577ec681f3Smrg 2587ec681f3Smrg case VK_FORMAT_A8B8G8R8_SNORM_PACK32: 2597ec681f3Smrg return VK_FORMAT_A8B8G8R8_UINT_PACK32; 2607ec681f3Smrg 2617ec681f3Smrg case VK_FORMAT_R16_UNORM: 2627ec681f3Smrg case VK_FORMAT_R16_SNORM: 2637ec681f3Smrg return VK_FORMAT_R16_UINT; 2647ec681f3Smrg 2657ec681f3Smrg case VK_FORMAT_R16G16_UNORM: 2667ec681f3Smrg case VK_FORMAT_R16G16_SNORM: 2677ec681f3Smrg return VK_FORMAT_R16G16_UINT; 2687ec681f3Smrg 2697ec681f3Smrg case VK_FORMAT_R16G16B16A16_UNORM: 2707ec681f3Smrg case VK_FORMAT_R16G16B16A16_SNORM: 2717ec681f3Smrg return VK_FORMAT_R16G16B16A16_UINT; 2727ec681f3Smrg 2737ec681f3Smrg case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: 2747ec681f3Smrg return VK_FORMAT_R32_SFLOAT; 2757ec681f3Smrg 2767ec681f3Smrg /* We can't render to compressed formats using the TLB so instead we use 2777ec681f3Smrg * a compatible format with the same bpp as the compressed format. Because 2787ec681f3Smrg * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the 2797ec681f3Smrg * case of ETC), when we implement copies with the compatible format we 2807ec681f3Smrg * will have to divide offsets and dimensions on the compressed image by 2817ec681f3Smrg * the compressed block size. 2827ec681f3Smrg */ 2837ec681f3Smrg case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK: 2847ec681f3Smrg case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: 2857ec681f3Smrg case VK_FORMAT_EAC_R11G11_UNORM_BLOCK: 2867ec681f3Smrg case VK_FORMAT_EAC_R11G11_SNORM_BLOCK: 2877ec681f3Smrg case VK_FORMAT_BC2_UNORM_BLOCK: 2887ec681f3Smrg case VK_FORMAT_BC2_SRGB_BLOCK: 2897ec681f3Smrg case VK_FORMAT_BC3_SRGB_BLOCK: 2907ec681f3Smrg case VK_FORMAT_BC3_UNORM_BLOCK: 2917ec681f3Smrg case VK_FORMAT_ASTC_4x4_UNORM_BLOCK: 2927ec681f3Smrg case VK_FORMAT_ASTC_4x4_SRGB_BLOCK: 2937ec681f3Smrg case VK_FORMAT_ASTC_5x4_UNORM_BLOCK: 2947ec681f3Smrg case VK_FORMAT_ASTC_5x4_SRGB_BLOCK: 2957ec681f3Smrg case VK_FORMAT_ASTC_5x5_UNORM_BLOCK: 2967ec681f3Smrg case VK_FORMAT_ASTC_5x5_SRGB_BLOCK: 2977ec681f3Smrg case VK_FORMAT_ASTC_6x5_UNORM_BLOCK: 2987ec681f3Smrg case VK_FORMAT_ASTC_6x5_SRGB_BLOCK: 2997ec681f3Smrg case VK_FORMAT_ASTC_6x6_UNORM_BLOCK: 3007ec681f3Smrg case VK_FORMAT_ASTC_6x6_SRGB_BLOCK: 3017ec681f3Smrg case VK_FORMAT_ASTC_8x5_UNORM_BLOCK: 3027ec681f3Smrg case VK_FORMAT_ASTC_8x5_SRGB_BLOCK: 3037ec681f3Smrg case VK_FORMAT_ASTC_8x6_UNORM_BLOCK: 3047ec681f3Smrg case VK_FORMAT_ASTC_8x6_SRGB_BLOCK: 3057ec681f3Smrg case VK_FORMAT_ASTC_8x8_UNORM_BLOCK: 3067ec681f3Smrg case VK_FORMAT_ASTC_8x8_SRGB_BLOCK: 3077ec681f3Smrg case VK_FORMAT_ASTC_10x5_UNORM_BLOCK: 3087ec681f3Smrg case VK_FORMAT_ASTC_10x5_SRGB_BLOCK: 3097ec681f3Smrg case VK_FORMAT_ASTC_10x6_UNORM_BLOCK: 3107ec681f3Smrg case VK_FORMAT_ASTC_10x6_SRGB_BLOCK: 3117ec681f3Smrg case VK_FORMAT_ASTC_10x8_UNORM_BLOCK: 3127ec681f3Smrg case VK_FORMAT_ASTC_10x8_SRGB_BLOCK: 3137ec681f3Smrg case VK_FORMAT_ASTC_10x10_UNORM_BLOCK: 3147ec681f3Smrg case VK_FORMAT_ASTC_10x10_SRGB_BLOCK: 3157ec681f3Smrg case VK_FORMAT_ASTC_12x10_UNORM_BLOCK: 3167ec681f3Smrg case VK_FORMAT_ASTC_12x10_SRGB_BLOCK: 3177ec681f3Smrg case VK_FORMAT_ASTC_12x12_UNORM_BLOCK: 3187ec681f3Smrg case VK_FORMAT_ASTC_12x12_SRGB_BLOCK: 3197ec681f3Smrg return VK_FORMAT_R32G32B32A32_UINT; 3207ec681f3Smrg 3217ec681f3Smrg case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK: 3227ec681f3Smrg case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: 3237ec681f3Smrg case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK: 3247ec681f3Smrg case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: 3257ec681f3Smrg case VK_FORMAT_EAC_R11_UNORM_BLOCK: 3267ec681f3Smrg case VK_FORMAT_EAC_R11_SNORM_BLOCK: 3277ec681f3Smrg case VK_FORMAT_BC1_RGB_UNORM_BLOCK: 3287ec681f3Smrg case VK_FORMAT_BC1_RGB_SRGB_BLOCK: 3297ec681f3Smrg case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: 3307ec681f3Smrg case VK_FORMAT_BC1_RGBA_SRGB_BLOCK: 3317ec681f3Smrg return VK_FORMAT_R16G16B16A16_UINT; 3327ec681f3Smrg 3337ec681f3Smrg default: 3347ec681f3Smrg return VK_FORMAT_UNDEFINED; 3357ec681f3Smrg } 3367ec681f3Smrg} 3377ec681f3Smrg 3387ec681f3Smrg/** 3397ec681f3Smrg * Checks if we can implement an image copy or clear operation using the TLB 3407ec681f3Smrg * hardware. 3417ec681f3Smrg */ 3427ec681f3Smrgbool 3437ec681f3Smrgv3dv_meta_can_use_tlb(struct v3dv_image *image, 3447ec681f3Smrg const VkOffset3D *offset, 3457ec681f3Smrg VkFormat *compat_format) 3467ec681f3Smrg{ 3477ec681f3Smrg if (offset->x != 0 || offset->y != 0) 3487ec681f3Smrg return false; 3497ec681f3Smrg 3507ec681f3Smrg if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) { 3517ec681f3Smrg if (compat_format) 3527ec681f3Smrg *compat_format = image->vk.format; 3537ec681f3Smrg return true; 3547ec681f3Smrg } 3557ec681f3Smrg 3567ec681f3Smrg /* If the image format is not TLB-supported, then check if we can use 3577ec681f3Smrg * a compatible format instead. 3587ec681f3Smrg */ 3597ec681f3Smrg if (compat_format) { 3607ec681f3Smrg *compat_format = get_compatible_tlb_format(image->vk.format); 3617ec681f3Smrg if (*compat_format != VK_FORMAT_UNDEFINED) 3627ec681f3Smrg return true; 3637ec681f3Smrg } 3647ec681f3Smrg 3657ec681f3Smrg return false; 3667ec681f3Smrg} 3677ec681f3Smrg 3687ec681f3Smrg/* Implements a copy using the TLB. 3697ec681f3Smrg * 3707ec681f3Smrg * This only works if we are copying from offset (0,0), since a TLB store for 3717ec681f3Smrg * tile (x,y) will be written at the same tile offset into the destination. 3727ec681f3Smrg * When this requirement is not met, we need to use a blit instead. 3737ec681f3Smrg * 3747ec681f3Smrg * Returns true if the implementation supports the requested operation (even if 3757ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error). 3767ec681f3Smrg * 3777ec681f3Smrg */ 3787ec681f3Smrgstatic bool 3797ec681f3Smrgcopy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer, 3807ec681f3Smrg struct v3dv_buffer *buffer, 3817ec681f3Smrg struct v3dv_image *image, 3827ec681f3Smrg const VkBufferImageCopy2KHR *region) 3837ec681f3Smrg{ 3847ec681f3Smrg VkFormat fb_format; 3857ec681f3Smrg if (!v3dv_meta_can_use_tlb(image, ®ion->imageOffset, &fb_format)) 3867ec681f3Smrg return false; 3877ec681f3Smrg 3887ec681f3Smrg uint32_t internal_type, internal_bpp; 3897ec681f3Smrg v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects) 3907ec681f3Smrg (fb_format, region->imageSubresource.aspectMask, 3917ec681f3Smrg &internal_type, &internal_bpp); 3927ec681f3Smrg 3937ec681f3Smrg uint32_t num_layers; 3947ec681f3Smrg if (image->vk.image_type != VK_IMAGE_TYPE_3D) 3957ec681f3Smrg num_layers = region->imageSubresource.layerCount; 3967ec681f3Smrg else 3977ec681f3Smrg num_layers = region->imageExtent.depth; 3987ec681f3Smrg assert(num_layers > 0); 3997ec681f3Smrg 4007ec681f3Smrg struct v3dv_job *job = 4017ec681f3Smrg v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); 4027ec681f3Smrg if (!job) 4037ec681f3Smrg return true; 4047ec681f3Smrg 4057ec681f3Smrg /* Handle copy from compressed format using a compatible format */ 4067ec681f3Smrg const uint32_t block_w = vk_format_get_blockwidth(image->vk.format); 4077ec681f3Smrg const uint32_t block_h = vk_format_get_blockheight(image->vk.format); 4087ec681f3Smrg const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w); 4097ec681f3Smrg const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h); 4107ec681f3Smrg 4117ec681f3Smrg v3dv_job_start_frame(job, width, height, num_layers, false, 4127ec681f3Smrg 1, internal_bpp, false); 4137ec681f3Smrg 4147ec681f3Smrg struct v3dv_meta_framebuffer framebuffer; 4157ec681f3Smrg v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format, 4167ec681f3Smrg internal_type, &job->frame_tiling); 4177ec681f3Smrg 4187ec681f3Smrg v3dv_X(job->device, job_emit_binning_flush)(job); 4197ec681f3Smrg v3dv_X(job->device, meta_emit_copy_image_to_buffer_rcl) 4207ec681f3Smrg (job, buffer, image, &framebuffer, region); 4217ec681f3Smrg 4227ec681f3Smrg v3dv_cmd_buffer_finish_job(cmd_buffer); 4237ec681f3Smrg 4247ec681f3Smrg return true; 4257ec681f3Smrg} 4267ec681f3Smrg 4277ec681f3Smrgstatic bool 4287ec681f3Smrgblit_shader(struct v3dv_cmd_buffer *cmd_buffer, 4297ec681f3Smrg struct v3dv_image *dst, 4307ec681f3Smrg VkFormat dst_format, 4317ec681f3Smrg struct v3dv_image *src, 4327ec681f3Smrg VkFormat src_format, 4337ec681f3Smrg VkColorComponentFlags cmask, 4347ec681f3Smrg VkComponentMapping *cswizzle, 4357ec681f3Smrg const VkImageBlit2KHR *region, 4367ec681f3Smrg VkFilter filter, 4377ec681f3Smrg bool dst_is_padded_image); 4387ec681f3Smrg 4397ec681f3Smrg/** 4407ec681f3Smrg * Returns true if the implementation supports the requested operation (even if 4417ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error). 4427ec681f3Smrg */ 4437ec681f3Smrgstatic bool 4447ec681f3Smrgcopy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, 4457ec681f3Smrg struct v3dv_buffer *buffer, 4467ec681f3Smrg struct v3dv_image *image, 4477ec681f3Smrg const VkBufferImageCopy2KHR *region) 4487ec681f3Smrg{ 4497ec681f3Smrg bool handled = false; 4507ec681f3Smrg 4517ec681f3Smrg /* Generally, the bpp of the data in the buffer matches that of the 4527ec681f3Smrg * source image. The exception is the case where we are copying 4537ec681f3Smrg * stencil (8bpp) to a combined d24s8 image (32bpp). 4547ec681f3Smrg */ 4557ec681f3Smrg uint32_t buffer_bpp = image->cpp; 4567ec681f3Smrg 4577ec681f3Smrg VkImageAspectFlags copy_aspect = region->imageSubresource.aspectMask; 4587ec681f3Smrg 4597ec681f3Smrg /* Because we are going to implement the copy as a blit, we need to create 4607ec681f3Smrg * a linear image from the destination buffer and we also want our blit 4617ec681f3Smrg * source and destination formats to be the same (to avoid any format 4627ec681f3Smrg * conversions), so we choose a canonical format that matches the 4637ec681f3Smrg * source image bpp. 4647ec681f3Smrg * 4657ec681f3Smrg * The exception to the above is copying from combined depth/stencil images 4667ec681f3Smrg * because we are copying only one aspect of the image, so we need to setup 4677ec681f3Smrg * our formats, color write mask and source swizzle mask to match that. 4687ec681f3Smrg */ 4697ec681f3Smrg VkFormat dst_format; 4707ec681f3Smrg VkFormat src_format; 4717ec681f3Smrg VkColorComponentFlags cmask = 0; /* All components */ 4727ec681f3Smrg VkComponentMapping cswizzle = { 4737ec681f3Smrg .r = VK_COMPONENT_SWIZZLE_IDENTITY, 4747ec681f3Smrg .g = VK_COMPONENT_SWIZZLE_IDENTITY, 4757ec681f3Smrg .b = VK_COMPONENT_SWIZZLE_IDENTITY, 4767ec681f3Smrg .a = VK_COMPONENT_SWIZZLE_IDENTITY, 4777ec681f3Smrg }; 4787ec681f3Smrg switch (buffer_bpp) { 4797ec681f3Smrg case 16: 4807ec681f3Smrg assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT); 4817ec681f3Smrg dst_format = VK_FORMAT_R32G32B32A32_UINT; 4827ec681f3Smrg src_format = dst_format; 4837ec681f3Smrg break; 4847ec681f3Smrg case 8: 4857ec681f3Smrg assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT); 4867ec681f3Smrg dst_format = VK_FORMAT_R16G16B16A16_UINT; 4877ec681f3Smrg src_format = dst_format; 4887ec681f3Smrg break; 4897ec681f3Smrg case 4: 4907ec681f3Smrg switch (copy_aspect) { 4917ec681f3Smrg case VK_IMAGE_ASPECT_COLOR_BIT: 4927ec681f3Smrg src_format = VK_FORMAT_R8G8B8A8_UINT; 4937ec681f3Smrg dst_format = VK_FORMAT_R8G8B8A8_UINT; 4947ec681f3Smrg break; 4957ec681f3Smrg case VK_IMAGE_ASPECT_DEPTH_BIT: 4967ec681f3Smrg assert(image->vk.format == VK_FORMAT_D32_SFLOAT || 4977ec681f3Smrg image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT || 4987ec681f3Smrg image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32); 4997ec681f3Smrg if (image->vk.format == VK_FORMAT_D32_SFLOAT) { 5007ec681f3Smrg src_format = VK_FORMAT_R32_UINT; 5017ec681f3Smrg dst_format = VK_FORMAT_R32_UINT; 5027ec681f3Smrg } else { 5037ec681f3Smrg /* We want to write depth in the buffer in the first 24-bits, 5047ec681f3Smrg * however, the hardware has depth in bits 8-31, so swizzle the 5057ec681f3Smrg * the source components to match what we want. Also, we don't 5067ec681f3Smrg * want to write bits 24-31 in the destination. 5077ec681f3Smrg */ 5087ec681f3Smrg src_format = VK_FORMAT_R8G8B8A8_UINT; 5097ec681f3Smrg dst_format = VK_FORMAT_R8G8B8A8_UINT; 5107ec681f3Smrg cmask = VK_COLOR_COMPONENT_R_BIT | 5117ec681f3Smrg VK_COLOR_COMPONENT_G_BIT | 5127ec681f3Smrg VK_COLOR_COMPONENT_B_BIT; 5137ec681f3Smrg cswizzle.r = VK_COMPONENT_SWIZZLE_G; 5147ec681f3Smrg cswizzle.g = VK_COMPONENT_SWIZZLE_B; 5157ec681f3Smrg cswizzle.b = VK_COMPONENT_SWIZZLE_A; 5167ec681f3Smrg cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO; 5177ec681f3Smrg } 5187ec681f3Smrg break; 5197ec681f3Smrg case VK_IMAGE_ASPECT_STENCIL_BIT: 5207ec681f3Smrg assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT); 5217ec681f3Smrg assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT); 5227ec681f3Smrg /* Copying from S8D24. We want to write 8-bit stencil values only, 5237ec681f3Smrg * so adjust the buffer bpp for that. Since the hardware stores stencil 5247ec681f3Smrg * in the LSB, we can just do a RGBA8UI to R8UI blit. 5257ec681f3Smrg */ 5267ec681f3Smrg src_format = VK_FORMAT_R8G8B8A8_UINT; 5277ec681f3Smrg dst_format = VK_FORMAT_R8_UINT; 5287ec681f3Smrg buffer_bpp = 1; 5297ec681f3Smrg break; 5307ec681f3Smrg default: 5317ec681f3Smrg unreachable("unsupported aspect"); 5327ec681f3Smrg return handled; 5337ec681f3Smrg }; 5347ec681f3Smrg break; 5357ec681f3Smrg case 2: 5367ec681f3Smrg assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT || 5377ec681f3Smrg copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT); 5387ec681f3Smrg dst_format = VK_FORMAT_R16_UINT; 5397ec681f3Smrg src_format = dst_format; 5407ec681f3Smrg break; 5417ec681f3Smrg case 1: 5427ec681f3Smrg assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT); 5437ec681f3Smrg dst_format = VK_FORMAT_R8_UINT; 5447ec681f3Smrg src_format = dst_format; 5457ec681f3Smrg break; 5467ec681f3Smrg default: 5477ec681f3Smrg unreachable("unsupported bit-size"); 5487ec681f3Smrg return handled; 5497ec681f3Smrg }; 5507ec681f3Smrg 5517ec681f3Smrg /* The hardware doesn't support linear depth/stencil stores, so we 5527ec681f3Smrg * implement copies of depth/stencil aspect as color copies using a 5537ec681f3Smrg * compatible color format. 5547ec681f3Smrg */ 5557ec681f3Smrg assert(vk_format_is_color(src_format)); 5567ec681f3Smrg assert(vk_format_is_color(dst_format)); 5577ec681f3Smrg copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT; 5587ec681f3Smrg 5597ec681f3Smrg /* We should be able to handle the blit if we got this far */ 5607ec681f3Smrg handled = true; 5617ec681f3Smrg 5627ec681f3Smrg /* Obtain the 2D buffer region spec */ 5637ec681f3Smrg uint32_t buf_width, buf_height; 5647ec681f3Smrg if (region->bufferRowLength == 0) 5657ec681f3Smrg buf_width = region->imageExtent.width; 5667ec681f3Smrg else 5677ec681f3Smrg buf_width = region->bufferRowLength; 5687ec681f3Smrg 5697ec681f3Smrg if (region->bufferImageHeight == 0) 5707ec681f3Smrg buf_height = region->imageExtent.height; 5717ec681f3Smrg else 5727ec681f3Smrg buf_height = region->bufferImageHeight; 5737ec681f3Smrg 5747ec681f3Smrg /* If the image is compressed, the bpp refers to blocks, not pixels */ 5757ec681f3Smrg uint32_t block_width = vk_format_get_blockwidth(image->vk.format); 5767ec681f3Smrg uint32_t block_height = vk_format_get_blockheight(image->vk.format); 5777ec681f3Smrg buf_width = buf_width / block_width; 5787ec681f3Smrg buf_height = buf_height / block_height; 5797ec681f3Smrg 5807ec681f3Smrg /* Compute layers to copy */ 5817ec681f3Smrg uint32_t num_layers; 5827ec681f3Smrg if (image->vk.image_type != VK_IMAGE_TYPE_3D) 5837ec681f3Smrg num_layers = region->imageSubresource.layerCount; 5847ec681f3Smrg else 5857ec681f3Smrg num_layers = region->imageExtent.depth; 5867ec681f3Smrg assert(num_layers > 0); 5877ec681f3Smrg 5887ec681f3Smrg /* Our blit interface can see the real format of the images to detect 5897ec681f3Smrg * copies between compressed and uncompressed images and adapt the 5907ec681f3Smrg * blit region accordingly. Here we are just doing a raw copy of 5917ec681f3Smrg * compressed data, but we are passing an uncompressed view of the 5927ec681f3Smrg * buffer for the blit destination image (since compressed formats are 5937ec681f3Smrg * not renderable), so we also want to provide an uncompressed view of 5947ec681f3Smrg * the source image. 5957ec681f3Smrg */ 5967ec681f3Smrg VkResult result; 5977ec681f3Smrg struct v3dv_device *device = cmd_buffer->device; 5987ec681f3Smrg VkDevice _device = v3dv_device_to_handle(device); 5997ec681f3Smrg if (vk_format_is_compressed(image->vk.format)) { 6007ec681f3Smrg VkImage uiview; 6017ec681f3Smrg VkImageCreateInfo uiview_info = { 6027ec681f3Smrg .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 6037ec681f3Smrg .imageType = VK_IMAGE_TYPE_3D, 6047ec681f3Smrg .format = dst_format, 6057ec681f3Smrg .extent = { buf_width, buf_height, image->vk.extent.depth }, 6067ec681f3Smrg .mipLevels = image->vk.mip_levels, 6077ec681f3Smrg .arrayLayers = image->vk.array_layers, 6087ec681f3Smrg .samples = image->vk.samples, 6097ec681f3Smrg .tiling = image->vk.tiling, 6107ec681f3Smrg .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, 6117ec681f3Smrg .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 6127ec681f3Smrg .queueFamilyIndexCount = 0, 6137ec681f3Smrg .initialLayout = VK_IMAGE_LAYOUT_GENERAL, 6147ec681f3Smrg }; 6157ec681f3Smrg result = v3dv_CreateImage(_device, &uiview_info, &device->vk.alloc, &uiview); 6167ec681f3Smrg if (result != VK_SUCCESS) 6177ec681f3Smrg return handled; 6187ec681f3Smrg 6197ec681f3Smrg v3dv_cmd_buffer_add_private_obj( 6207ec681f3Smrg cmd_buffer, (uintptr_t)uiview, 6217ec681f3Smrg (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage); 6227ec681f3Smrg 6237ec681f3Smrg result = 6247ec681f3Smrg vk_common_BindImageMemory(_device, uiview, 6257ec681f3Smrg v3dv_device_memory_to_handle(image->mem), 6267ec681f3Smrg image->mem_offset); 6277ec681f3Smrg if (result != VK_SUCCESS) 6287ec681f3Smrg return handled; 6297ec681f3Smrg 6307ec681f3Smrg image = v3dv_image_from_handle(uiview); 6317ec681f3Smrg } 6327ec681f3Smrg 6337ec681f3Smrg /* Copy requested layers */ 6347ec681f3Smrg for (uint32_t i = 0; i < num_layers; i++) { 6357ec681f3Smrg /* Create the destination blit image from the destination buffer */ 6367ec681f3Smrg VkImageCreateInfo image_info = { 6377ec681f3Smrg .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 6387ec681f3Smrg .imageType = VK_IMAGE_TYPE_2D, 6397ec681f3Smrg .format = dst_format, 6407ec681f3Smrg .extent = { buf_width, buf_height, 1 }, 6417ec681f3Smrg .mipLevels = 1, 6427ec681f3Smrg .arrayLayers = 1, 6437ec681f3Smrg .samples = VK_SAMPLE_COUNT_1_BIT, 6447ec681f3Smrg .tiling = VK_IMAGE_TILING_LINEAR, 6457ec681f3Smrg .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, 6467ec681f3Smrg .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 6477ec681f3Smrg .queueFamilyIndexCount = 0, 6487ec681f3Smrg .initialLayout = VK_IMAGE_LAYOUT_GENERAL, 6497ec681f3Smrg }; 6507ec681f3Smrg 6517ec681f3Smrg VkImage buffer_image; 6527ec681f3Smrg result = 6537ec681f3Smrg v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image); 6547ec681f3Smrg if (result != VK_SUCCESS) 6557ec681f3Smrg return handled; 6567ec681f3Smrg 6577ec681f3Smrg v3dv_cmd_buffer_add_private_obj( 6587ec681f3Smrg cmd_buffer, (uintptr_t)buffer_image, 6597ec681f3Smrg (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage); 6607ec681f3Smrg 6617ec681f3Smrg /* Bind the buffer memory to the image */ 6627ec681f3Smrg VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset + 6637ec681f3Smrg i * buf_width * buf_height * buffer_bpp; 6647ec681f3Smrg result = 6657ec681f3Smrg vk_common_BindImageMemory(_device, buffer_image, 6667ec681f3Smrg v3dv_device_memory_to_handle(buffer->mem), 6677ec681f3Smrg buffer_offset); 6687ec681f3Smrg if (result != VK_SUCCESS) 6697ec681f3Smrg return handled; 6707ec681f3Smrg 6717ec681f3Smrg /* Blit-copy the requested image extent. 6727ec681f3Smrg * 6737ec681f3Smrg * Since we are copying, the blit must use the same format on the 6747ec681f3Smrg * destination and source images to avoid format conversions. The 6757ec681f3Smrg * only exception is copying stencil, which we upload to a R8UI source 6767ec681f3Smrg * image, but that we need to blit to a S8D24 destination (the only 6777ec681f3Smrg * stencil format we support). 6787ec681f3Smrg */ 6797ec681f3Smrg const VkImageBlit2KHR blit_region = { 6807ec681f3Smrg .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR, 6817ec681f3Smrg .srcSubresource = { 6827ec681f3Smrg .aspectMask = copy_aspect, 6837ec681f3Smrg .mipLevel = region->imageSubresource.mipLevel, 6847ec681f3Smrg .baseArrayLayer = region->imageSubresource.baseArrayLayer + i, 6857ec681f3Smrg .layerCount = 1, 6867ec681f3Smrg }, 6877ec681f3Smrg .srcOffsets = { 6887ec681f3Smrg { 6897ec681f3Smrg DIV_ROUND_UP(region->imageOffset.x, block_width), 6907ec681f3Smrg DIV_ROUND_UP(region->imageOffset.y, block_height), 6917ec681f3Smrg region->imageOffset.z + i, 6927ec681f3Smrg }, 6937ec681f3Smrg { 6947ec681f3Smrg DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width, 6957ec681f3Smrg block_width), 6967ec681f3Smrg DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height, 6977ec681f3Smrg block_height), 6987ec681f3Smrg region->imageOffset.z + i + 1, 6997ec681f3Smrg }, 7007ec681f3Smrg }, 7017ec681f3Smrg .dstSubresource = { 7027ec681f3Smrg .aspectMask = copy_aspect, 7037ec681f3Smrg .mipLevel = 0, 7047ec681f3Smrg .baseArrayLayer = 0, 7057ec681f3Smrg .layerCount = 1, 7067ec681f3Smrg }, 7077ec681f3Smrg .dstOffsets = { 7087ec681f3Smrg { 0, 0, 0 }, 7097ec681f3Smrg { 7107ec681f3Smrg DIV_ROUND_UP(region->imageExtent.width, block_width), 7117ec681f3Smrg DIV_ROUND_UP(region->imageExtent.height, block_height), 7127ec681f3Smrg 1 7137ec681f3Smrg }, 7147ec681f3Smrg }, 7157ec681f3Smrg }; 7167ec681f3Smrg 7177ec681f3Smrg handled = blit_shader(cmd_buffer, 7187ec681f3Smrg v3dv_image_from_handle(buffer_image), dst_format, 7197ec681f3Smrg image, src_format, 7207ec681f3Smrg cmask, &cswizzle, 7217ec681f3Smrg &blit_region, VK_FILTER_NEAREST, false); 7227ec681f3Smrg if (!handled) { 7237ec681f3Smrg /* This is unexpected, we should have a supported blit spec */ 7247ec681f3Smrg unreachable("Unable to blit buffer to destination image"); 7257ec681f3Smrg return false; 7267ec681f3Smrg } 7277ec681f3Smrg } 7287ec681f3Smrg 7297ec681f3Smrg assert(handled); 7307ec681f3Smrg return true; 7317ec681f3Smrg} 7327ec681f3Smrg 7337ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 7347ec681f3Smrgv3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer, 7357ec681f3Smrg const VkCopyImageToBufferInfo2KHR *info) 7367ec681f3Smrg 7377ec681f3Smrg{ 7387ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 7397ec681f3Smrg V3DV_FROM_HANDLE(v3dv_image, image, info->srcImage); 7407ec681f3Smrg V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->dstBuffer); 7417ec681f3Smrg 7427ec681f3Smrg assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT); 7437ec681f3Smrg 7447ec681f3Smrg for (uint32_t i = 0; i < info->regionCount; i++) { 7457ec681f3Smrg if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &info->pRegions[i])) 7467ec681f3Smrg continue; 7477ec681f3Smrg if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &info->pRegions[i])) 7487ec681f3Smrg continue; 7497ec681f3Smrg unreachable("Unsupported image to buffer copy."); 7507ec681f3Smrg } 7517ec681f3Smrg} 7527ec681f3Smrg 7537ec681f3Smrg/** 7547ec681f3Smrg * Returns true if the implementation supports the requested operation (even if 7557ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error). 7567ec681f3Smrg */ 7577ec681f3Smrgstatic bool 7587ec681f3Smrgcopy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, 7597ec681f3Smrg struct v3dv_image *dst, 7607ec681f3Smrg struct v3dv_image *src, 7617ec681f3Smrg const VkImageCopy2KHR *region) 7627ec681f3Smrg{ 7637ec681f3Smrg /* Destination can't be raster format */ 7647ec681f3Smrg if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR) 7657ec681f3Smrg return false; 7667ec681f3Smrg 7677ec681f3Smrg /* We can only do full copies, so if the format is D24S8 both aspects need 7687ec681f3Smrg * to be copied. We only need to check the dst format because the spec 7697ec681f3Smrg * states that depth/stencil formats must match exactly. 7707ec681f3Smrg */ 7717ec681f3Smrg if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) { 7727ec681f3Smrg const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT | 7737ec681f3Smrg VK_IMAGE_ASPECT_STENCIL_BIT; 7747ec681f3Smrg if (region->dstSubresource.aspectMask != ds_aspects) 7757ec681f3Smrg return false; 7767ec681f3Smrg } 7777ec681f3Smrg 7787ec681f3Smrg /* Don't handle copies between uncompressed and compressed formats for now. 7797ec681f3Smrg * 7807ec681f3Smrg * FIXME: we should be able to handle these easily but there is no coverage 7817ec681f3Smrg * in CTS at the moment that make such copies with full images (which we 7827ec681f3Smrg * require here), only partial copies. Also, in that case the code below that 7837ec681f3Smrg * checks for "dst image complete" requires some changes, since it is 7847ec681f3Smrg * checking against the region dimensions, which are in units of the source 7857ec681f3Smrg * image format. 7867ec681f3Smrg */ 7877ec681f3Smrg if (vk_format_is_compressed(dst->vk.format) != 7887ec681f3Smrg vk_format_is_compressed(src->vk.format)) { 7897ec681f3Smrg return false; 7907ec681f3Smrg } 7917ec681f3Smrg 7927ec681f3Smrg /* Source region must start at (0,0) */ 7937ec681f3Smrg if (region->srcOffset.x != 0 || region->srcOffset.y != 0) 7947ec681f3Smrg return false; 7957ec681f3Smrg 7967ec681f3Smrg /* Destination image must be complete */ 7977ec681f3Smrg if (region->dstOffset.x != 0 || region->dstOffset.y != 0) 7987ec681f3Smrg return false; 7997ec681f3Smrg 8007ec681f3Smrg const uint32_t dst_mip_level = region->dstSubresource.mipLevel; 8017ec681f3Smrg uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level); 8027ec681f3Smrg uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level); 8037ec681f3Smrg if (region->extent.width != dst_width || region->extent.height != dst_height) 8047ec681f3Smrg return false; 8057ec681f3Smrg 8067ec681f3Smrg /* From vkCmdCopyImage: 8077ec681f3Smrg * 8087ec681f3Smrg * "When copying between compressed and uncompressed formats the extent 8097ec681f3Smrg * members represent the texel dimensions of the source image and not 8107ec681f3Smrg * the destination." 8117ec681f3Smrg */ 8127ec681f3Smrg const uint32_t block_w = vk_format_get_blockwidth(src->vk.format); 8137ec681f3Smrg const uint32_t block_h = vk_format_get_blockheight(src->vk.format); 8147ec681f3Smrg uint32_t width = DIV_ROUND_UP(region->extent.width, block_w); 8157ec681f3Smrg uint32_t height = DIV_ROUND_UP(region->extent.height, block_h); 8167ec681f3Smrg 8177ec681f3Smrg /* Account for sample count */ 8187ec681f3Smrg assert(dst->vk.samples == src->vk.samples); 8197ec681f3Smrg if (dst->vk.samples > VK_SAMPLE_COUNT_1_BIT) { 8207ec681f3Smrg assert(dst->vk.samples == VK_SAMPLE_COUNT_4_BIT); 8217ec681f3Smrg width *= 2; 8227ec681f3Smrg height *= 2; 8237ec681f3Smrg } 8247ec681f3Smrg 8257ec681f3Smrg /* The TFU unit doesn't handle format conversions so we need the formats to 8267ec681f3Smrg * match. On the other hand, vkCmdCopyImage allows different color formats 8277ec681f3Smrg * on the source and destination images, but only if they are texel 8287ec681f3Smrg * compatible. For us, this means that we can effectively ignore different 8297ec681f3Smrg * formats and just make the copy using either of them, since we are just 8307ec681f3Smrg * moving raw data and not making any conversions. 8317ec681f3Smrg * 8327ec681f3Smrg * Also, the formats supported by the TFU unit are limited, but again, since 8337ec681f3Smrg * we are only doing raw copies here without interpreting or converting 8347ec681f3Smrg * the underlying pixel data according to its format, we can always choose 8357ec681f3Smrg * to use compatible formats that are supported with the TFU unit. 8367ec681f3Smrg */ 8377ec681f3Smrg assert(dst->cpp == src->cpp); 8387ec681f3Smrg const struct v3dv_format *format = 8397ec681f3Smrg v3dv_get_compatible_tfu_format(cmd_buffer->device, 8407ec681f3Smrg dst->cpp, NULL); 8417ec681f3Smrg 8427ec681f3Smrg /* Emit a TFU job for each layer to blit */ 8437ec681f3Smrg const uint32_t layer_count = dst->vk.image_type != VK_IMAGE_TYPE_3D ? 8447ec681f3Smrg region->dstSubresource.layerCount : 8457ec681f3Smrg region->extent.depth; 8467ec681f3Smrg const uint32_t src_mip_level = region->srcSubresource.mipLevel; 8477ec681f3Smrg 8487ec681f3Smrg const uint32_t base_src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ? 8497ec681f3Smrg region->srcSubresource.baseArrayLayer : region->srcOffset.z; 8507ec681f3Smrg const uint32_t base_dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ? 8517ec681f3Smrg region->dstSubresource.baseArrayLayer : region->dstOffset.z; 8527ec681f3Smrg for (uint32_t i = 0; i < layer_count; i++) { 8537ec681f3Smrg v3dv_X(cmd_buffer->device, meta_emit_tfu_job) 8547ec681f3Smrg (cmd_buffer, dst, dst_mip_level, base_dst_layer + i, 8557ec681f3Smrg src, src_mip_level, base_src_layer + i, 8567ec681f3Smrg width, height, format); 8577ec681f3Smrg } 8587ec681f3Smrg 8597ec681f3Smrg return true; 8607ec681f3Smrg} 8617ec681f3Smrg 8627ec681f3Smrg/** 8637ec681f3Smrg * Returns true if the implementation supports the requested operation (even if 8647ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error). 8657ec681f3Smrg */ 8667ec681f3Smrgstatic bool 8677ec681f3Smrgcopy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, 8687ec681f3Smrg struct v3dv_image *dst, 8697ec681f3Smrg struct v3dv_image *src, 8707ec681f3Smrg const VkImageCopy2KHR *region) 8717ec681f3Smrg{ 8727ec681f3Smrg VkFormat fb_format; 8737ec681f3Smrg if (!v3dv_meta_can_use_tlb(src, ®ion->srcOffset, &fb_format) || 8747ec681f3Smrg !v3dv_meta_can_use_tlb(dst, ®ion->dstOffset, &fb_format)) { 8757ec681f3Smrg return false; 8767ec681f3Smrg } 8777ec681f3Smrg 8787ec681f3Smrg /* From the Vulkan spec, VkImageCopy valid usage: 8797ec681f3Smrg * 8807ec681f3Smrg * "If neither the calling command’s srcImage nor the calling command’s 8817ec681f3Smrg * dstImage has a multi-planar image format then the aspectMask member 8827ec681f3Smrg * of srcSubresource and dstSubresource must match." 8837ec681f3Smrg */ 8847ec681f3Smrg assert(region->dstSubresource.aspectMask == 8857ec681f3Smrg region->srcSubresource.aspectMask); 8867ec681f3Smrg uint32_t internal_type, internal_bpp; 8877ec681f3Smrg v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects) 8887ec681f3Smrg (fb_format, region->dstSubresource.aspectMask, 8897ec681f3Smrg &internal_type, &internal_bpp); 8907ec681f3Smrg 8917ec681f3Smrg /* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage: 8927ec681f3Smrg * 8937ec681f3Smrg * "The number of slices of the extent (for 3D) or layers of the 8947ec681f3Smrg * srcSubresource (for non-3D) must match the number of slices of the 8957ec681f3Smrg * extent (for 3D) or layers of the dstSubresource (for non-3D)." 8967ec681f3Smrg */ 8977ec681f3Smrg assert((src->vk.image_type != VK_IMAGE_TYPE_3D ? 8987ec681f3Smrg region->srcSubresource.layerCount : region->extent.depth) == 8997ec681f3Smrg (dst->vk.image_type != VK_IMAGE_TYPE_3D ? 9007ec681f3Smrg region->dstSubresource.layerCount : region->extent.depth)); 9017ec681f3Smrg uint32_t num_layers; 9027ec681f3Smrg if (dst->vk.image_type != VK_IMAGE_TYPE_3D) 9037ec681f3Smrg num_layers = region->dstSubresource.layerCount; 9047ec681f3Smrg else 9057ec681f3Smrg num_layers = region->extent.depth; 9067ec681f3Smrg assert(num_layers > 0); 9077ec681f3Smrg 9087ec681f3Smrg struct v3dv_job *job = 9097ec681f3Smrg v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); 9107ec681f3Smrg if (!job) 9117ec681f3Smrg return true; 9127ec681f3Smrg 9137ec681f3Smrg /* Handle copy to compressed image using compatible format */ 9147ec681f3Smrg const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format); 9157ec681f3Smrg const uint32_t block_h = vk_format_get_blockheight(dst->vk.format); 9167ec681f3Smrg const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w); 9177ec681f3Smrg const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h); 9187ec681f3Smrg 9197ec681f3Smrg v3dv_job_start_frame(job, width, height, num_layers, false, 1, internal_bpp, 9207ec681f3Smrg src->vk.samples > VK_SAMPLE_COUNT_1_BIT); 9217ec681f3Smrg 9227ec681f3Smrg struct v3dv_meta_framebuffer framebuffer; 9237ec681f3Smrg v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format, 9247ec681f3Smrg internal_type, &job->frame_tiling); 9257ec681f3Smrg 9267ec681f3Smrg v3dv_X(job->device, job_emit_binning_flush)(job); 9277ec681f3Smrg v3dv_X(job->device, meta_emit_copy_image_rcl)(job, dst, src, &framebuffer, region); 9287ec681f3Smrg 9297ec681f3Smrg v3dv_cmd_buffer_finish_job(cmd_buffer); 9307ec681f3Smrg 9317ec681f3Smrg return true; 9327ec681f3Smrg} 9337ec681f3Smrg 9347ec681f3Smrg/** 9357ec681f3Smrg * Takes the image provided as argument and creates a new image that has 9367ec681f3Smrg * the same specification and aliases the same memory storage, except that: 9377ec681f3Smrg * 9387ec681f3Smrg * - It has the uncompressed format passed in. 9397ec681f3Smrg * - Its original width/height are scaled by the factors passed in. 9407ec681f3Smrg * 9417ec681f3Smrg * This is useful to implement copies from compressed images using the blit 9427ec681f3Smrg * path. The idea is that we create uncompressed "image views" of both the 9437ec681f3Smrg * source and destination images using the uncompressed format and then we 9447ec681f3Smrg * define the copy blit in terms of that format. 9457ec681f3Smrg */ 9467ec681f3Smrgstatic struct v3dv_image * 9477ec681f3Smrgcreate_image_alias(struct v3dv_cmd_buffer *cmd_buffer, 9487ec681f3Smrg struct v3dv_image *src, 9497ec681f3Smrg float width_scale, 9507ec681f3Smrg float height_scale, 9517ec681f3Smrg VkFormat format) 9527ec681f3Smrg{ 9537ec681f3Smrg assert(!vk_format_is_compressed(format)); 9547ec681f3Smrg 9557ec681f3Smrg VkDevice _device = v3dv_device_to_handle(cmd_buffer->device); 9567ec681f3Smrg 9577ec681f3Smrg VkImageCreateInfo info = { 9587ec681f3Smrg .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 9597ec681f3Smrg .imageType = src->vk.image_type, 9607ec681f3Smrg .format = format, 9617ec681f3Smrg .extent = { 9627ec681f3Smrg .width = src->vk.extent.width * width_scale, 9637ec681f3Smrg .height = src->vk.extent.height * height_scale, 9647ec681f3Smrg .depth = src->vk.extent.depth, 9657ec681f3Smrg }, 9667ec681f3Smrg .mipLevels = src->vk.mip_levels, 9677ec681f3Smrg .arrayLayers = src->vk.array_layers, 9687ec681f3Smrg .samples = src->vk.samples, 9697ec681f3Smrg .tiling = src->vk.tiling, 9707ec681f3Smrg .usage = src->vk.usage, 9717ec681f3Smrg }; 9727ec681f3Smrg 9737ec681f3Smrg VkImage _image; 9747ec681f3Smrg VkResult result = 9757ec681f3Smrg v3dv_CreateImage(_device, &info, &cmd_buffer->device->vk.alloc, &_image); 9767ec681f3Smrg if (result != VK_SUCCESS) { 9777ec681f3Smrg v3dv_flag_oom(cmd_buffer, NULL); 9787ec681f3Smrg return NULL; 9797ec681f3Smrg } 9807ec681f3Smrg 9817ec681f3Smrg struct v3dv_image *image = v3dv_image_from_handle(_image); 9827ec681f3Smrg image->mem = src->mem; 9837ec681f3Smrg image->mem_offset = src->mem_offset; 9847ec681f3Smrg return image; 9857ec681f3Smrg} 9867ec681f3Smrg 9877ec681f3Smrg/** 9887ec681f3Smrg * Returns true if the implementation supports the requested operation (even if 9897ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error). 9907ec681f3Smrg */ 9917ec681f3Smrgstatic bool 9927ec681f3Smrgcopy_image_blit(struct v3dv_cmd_buffer *cmd_buffer, 9937ec681f3Smrg struct v3dv_image *dst, 9947ec681f3Smrg struct v3dv_image *src, 9957ec681f3Smrg const VkImageCopy2KHR *region) 9967ec681f3Smrg{ 9977ec681f3Smrg const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format); 9987ec681f3Smrg const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format); 9997ec681f3Smrg const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format); 10007ec681f3Smrg const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format); 10017ec681f3Smrg const float block_scale_w = (float)src_block_w / (float)dst_block_w; 10027ec681f3Smrg const float block_scale_h = (float)src_block_h / (float)dst_block_h; 10037ec681f3Smrg 10047ec681f3Smrg /* We need to choose a single format for the blit to ensure that this is 10057ec681f3Smrg * really a copy and there are not format conversions going on. Since we 10067ec681f3Smrg * going to blit, we need to make sure that the selected format can be 10077ec681f3Smrg * both rendered to and textured from. 10087ec681f3Smrg */ 10097ec681f3Smrg VkFormat format; 10107ec681f3Smrg float src_scale_w = 1.0f; 10117ec681f3Smrg float src_scale_h = 1.0f; 10127ec681f3Smrg float dst_scale_w = block_scale_w; 10137ec681f3Smrg float dst_scale_h = block_scale_h; 10147ec681f3Smrg if (vk_format_is_compressed(src->vk.format)) { 10157ec681f3Smrg /* If we are copying from a compressed format we should be aware that we 10167ec681f3Smrg * are going to texture from the source image, and the texture setup 10177ec681f3Smrg * knows the actual size of the image, so we need to choose a format 10187ec681f3Smrg * that has a per-texel (not per-block) bpp that is compatible for that 10197ec681f3Smrg * image size. For example, for a source image with size Bw*WxBh*H 10207ec681f3Smrg * and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI, 10217ec681f3Smrg * each of the Bw*WxBh*H texels in the compressed source image is 8-bit 10227ec681f3Smrg * (which translates to a 128-bit 4x4 RGBA32 block when uncompressed), 10237ec681f3Smrg * so we could specify a blit with size Bw*WxBh*H and a format with 10247ec681f3Smrg * a bpp of 8-bit per texel (R8_UINT). 10257ec681f3Smrg * 10267ec681f3Smrg * Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM, 10277ec681f3Smrg * which is 64-bit per texel, then we would need a 4-bit format, which 10287ec681f3Smrg * we don't have, so instead we still choose an 8-bit format, but we 10297ec681f3Smrg * apply a divisor to the row dimensions of the blit, since we are 10307ec681f3Smrg * copying two texels per item. 10317ec681f3Smrg * 10327ec681f3Smrg * Generally, we can choose any format so long as we compute appropriate 10337ec681f3Smrg * divisors for the width and height depending on the source image's 10347ec681f3Smrg * bpp. 10357ec681f3Smrg */ 10367ec681f3Smrg assert(src->cpp == dst->cpp); 10377ec681f3Smrg 10387ec681f3Smrg format = VK_FORMAT_R32G32_UINT; 10397ec681f3Smrg switch (src->cpp) { 10407ec681f3Smrg case 16: 10417ec681f3Smrg format = VK_FORMAT_R32G32B32A32_UINT; 10427ec681f3Smrg break; 10437ec681f3Smrg case 8: 10447ec681f3Smrg format = VK_FORMAT_R16G16B16A16_UINT; 10457ec681f3Smrg break; 10467ec681f3Smrg default: 10477ec681f3Smrg unreachable("Unsupported compressed format"); 10487ec681f3Smrg } 10497ec681f3Smrg 10507ec681f3Smrg /* Create image views of the src/dst images that we can interpret in 10517ec681f3Smrg * terms of the canonical format. 10527ec681f3Smrg */ 10537ec681f3Smrg src_scale_w /= src_block_w; 10547ec681f3Smrg src_scale_h /= src_block_h; 10557ec681f3Smrg dst_scale_w /= src_block_w; 10567ec681f3Smrg dst_scale_h /= src_block_h; 10577ec681f3Smrg 10587ec681f3Smrg src = create_image_alias(cmd_buffer, src, 10597ec681f3Smrg src_scale_w, src_scale_h, format); 10607ec681f3Smrg 10617ec681f3Smrg dst = create_image_alias(cmd_buffer, dst, 10627ec681f3Smrg dst_scale_w, dst_scale_h, format); 10637ec681f3Smrg } else { 10647ec681f3Smrg format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ? 10657ec681f3Smrg src->vk.format : get_compatible_tlb_format(src->vk.format); 10667ec681f3Smrg if (format == VK_FORMAT_UNDEFINED) 10677ec681f3Smrg return false; 10687ec681f3Smrg 10697ec681f3Smrg const struct v3dv_format *f = v3dv_X(cmd_buffer->device, get_format)(format); 10707ec681f3Smrg if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO) 10717ec681f3Smrg return false; 10727ec681f3Smrg } 10737ec681f3Smrg 10747ec681f3Smrg /* Given an uncompressed image with size WxH, if we copy it to a compressed 10757ec681f3Smrg * image, it will result in an image with size W*bWxH*bH, where bW and bH 10767ec681f3Smrg * are the compressed format's block width and height. This means that 10777ec681f3Smrg * copies between compressed and uncompressed images involve different 10787ec681f3Smrg * image sizes, and therefore, we need to take that into account when 10797ec681f3Smrg * setting up the source and destination blit regions below, so they are 10807ec681f3Smrg * consistent from the point of view of the single compatible format 10817ec681f3Smrg * selected for the copy. 10827ec681f3Smrg * 10837ec681f3Smrg * We should take into account that the dimensions of the region provided 10847ec681f3Smrg * to the copy command are specified in terms of the source image. With that 10857ec681f3Smrg * in mind, below we adjust the blit destination region to be consistent with 10867ec681f3Smrg * the source region for the compatible format, so basically, we apply 10877ec681f3Smrg * the block scale factor to the destination offset provided by the copy 10887ec681f3Smrg * command (because it is specified in terms of the destination image, not 10897ec681f3Smrg * the source), and then we just add the region copy dimensions to that 10907ec681f3Smrg * (since the region dimensions are already specified in terms of the source 10917ec681f3Smrg * image). 10927ec681f3Smrg */ 10937ec681f3Smrg const VkOffset3D src_start = { 10947ec681f3Smrg region->srcOffset.x * src_scale_w, 10957ec681f3Smrg region->srcOffset.y * src_scale_h, 10967ec681f3Smrg region->srcOffset.z, 10977ec681f3Smrg }; 10987ec681f3Smrg const VkOffset3D src_end = { 10997ec681f3Smrg src_start.x + region->extent.width * src_scale_w, 11007ec681f3Smrg src_start.y + region->extent.height * src_scale_h, 11017ec681f3Smrg src_start.z + region->extent.depth, 11027ec681f3Smrg }; 11037ec681f3Smrg 11047ec681f3Smrg const VkOffset3D dst_start = { 11057ec681f3Smrg region->dstOffset.x * dst_scale_w, 11067ec681f3Smrg region->dstOffset.y * dst_scale_h, 11077ec681f3Smrg region->dstOffset.z, 11087ec681f3Smrg }; 11097ec681f3Smrg const VkOffset3D dst_end = { 11107ec681f3Smrg dst_start.x + region->extent.width * src_scale_w, 11117ec681f3Smrg dst_start.y + region->extent.height * src_scale_h, 11127ec681f3Smrg dst_start.z + region->extent.depth, 11137ec681f3Smrg }; 11147ec681f3Smrg 11157ec681f3Smrg const VkImageBlit2KHR blit_region = { 11167ec681f3Smrg .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR, 11177ec681f3Smrg .srcSubresource = region->srcSubresource, 11187ec681f3Smrg .srcOffsets = { src_start, src_end }, 11197ec681f3Smrg .dstSubresource = region->dstSubresource, 11207ec681f3Smrg .dstOffsets = { dst_start, dst_end }, 11217ec681f3Smrg }; 11227ec681f3Smrg bool handled = blit_shader(cmd_buffer, 11237ec681f3Smrg dst, format, 11247ec681f3Smrg src, format, 11257ec681f3Smrg 0, NULL, 11267ec681f3Smrg &blit_region, VK_FILTER_NEAREST, true); 11277ec681f3Smrg 11287ec681f3Smrg /* We should have selected formats that we can blit */ 11297ec681f3Smrg assert(handled); 11307ec681f3Smrg return handled; 11317ec681f3Smrg} 11327ec681f3Smrg 11337ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 11347ec681f3Smrgv3dv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, 11357ec681f3Smrg const VkCopyImageInfo2KHR *info) 11367ec681f3Smrg 11377ec681f3Smrg{ 11387ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 11397ec681f3Smrg V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage); 11407ec681f3Smrg V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage); 11417ec681f3Smrg 11427ec681f3Smrg assert(src->vk.samples == dst->vk.samples); 11437ec681f3Smrg 11447ec681f3Smrg for (uint32_t i = 0; i < info->regionCount; i++) { 11457ec681f3Smrg if (copy_image_tfu(cmd_buffer, dst, src, &info->pRegions[i])) 11467ec681f3Smrg continue; 11477ec681f3Smrg if (copy_image_tlb(cmd_buffer, dst, src, &info->pRegions[i])) 11487ec681f3Smrg continue; 11497ec681f3Smrg if (copy_image_blit(cmd_buffer, dst, src, &info->pRegions[i])) 11507ec681f3Smrg continue; 11517ec681f3Smrg unreachable("Image copy not supported"); 11527ec681f3Smrg } 11537ec681f3Smrg} 11547ec681f3Smrg 11557ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 11567ec681f3Smrgv3dv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, 11577ec681f3Smrg const VkCopyBufferInfo2KHR *pCopyBufferInfo) 11587ec681f3Smrg{ 11597ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 11607ec681f3Smrg V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, pCopyBufferInfo->srcBuffer); 11617ec681f3Smrg V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer); 11627ec681f3Smrg 11637ec681f3Smrg for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) { 11647ec681f3Smrg v3dv_X(cmd_buffer->device, meta_copy_buffer) 11657ec681f3Smrg (cmd_buffer, 11667ec681f3Smrg dst_buffer->mem->bo, dst_buffer->mem_offset, 11677ec681f3Smrg src_buffer->mem->bo, src_buffer->mem_offset, 11687ec681f3Smrg &pCopyBufferInfo->pRegions[i]); 11697ec681f3Smrg } 11707ec681f3Smrg} 11717ec681f3Smrg 11727ec681f3Smrgstatic void 11737ec681f3Smrgdestroy_update_buffer_cb(VkDevice _device, 11747ec681f3Smrg uint64_t pobj, 11757ec681f3Smrg VkAllocationCallbacks *alloc) 11767ec681f3Smrg{ 11777ec681f3Smrg V3DV_FROM_HANDLE(v3dv_device, device, _device); 11787ec681f3Smrg struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj); 11797ec681f3Smrg v3dv_bo_free(device, bo); 11807ec681f3Smrg} 11817ec681f3Smrg 11827ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 11837ec681f3Smrgv3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, 11847ec681f3Smrg VkBuffer dstBuffer, 11857ec681f3Smrg VkDeviceSize dstOffset, 11867ec681f3Smrg VkDeviceSize dataSize, 11877ec681f3Smrg const void *pData) 11887ec681f3Smrg{ 11897ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 11907ec681f3Smrg V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer); 11917ec681f3Smrg 11927ec681f3Smrg struct v3dv_bo *src_bo = 11937ec681f3Smrg v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true); 11947ec681f3Smrg if (!src_bo) { 11957ec681f3Smrg fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n"); 11967ec681f3Smrg return; 11977ec681f3Smrg } 11987ec681f3Smrg 11997ec681f3Smrg bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size); 12007ec681f3Smrg if (!ok) { 12017ec681f3Smrg fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n"); 12027ec681f3Smrg return; 12037ec681f3Smrg } 12047ec681f3Smrg 12057ec681f3Smrg memcpy(src_bo->map, pData, dataSize); 12067ec681f3Smrg 12077ec681f3Smrg v3dv_bo_unmap(cmd_buffer->device, src_bo); 12087ec681f3Smrg 12097ec681f3Smrg VkBufferCopy2KHR region = { 12107ec681f3Smrg .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR, 12117ec681f3Smrg .srcOffset = 0, 12127ec681f3Smrg .dstOffset = dstOffset, 12137ec681f3Smrg .size = dataSize, 12147ec681f3Smrg }; 12157ec681f3Smrg struct v3dv_job *copy_job = 12167ec681f3Smrg v3dv_X(cmd_buffer->device, meta_copy_buffer) 12177ec681f3Smrg (cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset, 12187ec681f3Smrg src_bo, 0, ®ion); 12197ec681f3Smrg 12207ec681f3Smrg if (!copy_job) 12217ec681f3Smrg return; 12227ec681f3Smrg 12237ec681f3Smrg v3dv_cmd_buffer_add_private_obj( 12247ec681f3Smrg cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb); 12257ec681f3Smrg} 12267ec681f3Smrg 12277ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 12287ec681f3Smrgv3dv_CmdFillBuffer(VkCommandBuffer commandBuffer, 12297ec681f3Smrg VkBuffer dstBuffer, 12307ec681f3Smrg VkDeviceSize dstOffset, 12317ec681f3Smrg VkDeviceSize size, 12327ec681f3Smrg uint32_t data) 12337ec681f3Smrg{ 12347ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 12357ec681f3Smrg V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer); 12367ec681f3Smrg 12377ec681f3Smrg struct v3dv_bo *bo = dst_buffer->mem->bo; 12387ec681f3Smrg 12397ec681f3Smrg /* From the Vulkan spec: 12407ec681f3Smrg * 12417ec681f3Smrg * "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not 12427ec681f3Smrg * a multiple of 4, then the nearest smaller multiple is used." 12437ec681f3Smrg */ 12447ec681f3Smrg if (size == VK_WHOLE_SIZE) { 12457ec681f3Smrg size = dst_buffer->size - dstOffset; 12467ec681f3Smrg size -= size % 4; 12477ec681f3Smrg } 12487ec681f3Smrg 12497ec681f3Smrg v3dv_X(cmd_buffer->device, meta_fill_buffer) 12507ec681f3Smrg (cmd_buffer, bo, dstOffset, size, data); 12517ec681f3Smrg} 12527ec681f3Smrg 12537ec681f3Smrg/** 12547ec681f3Smrg * Returns true if the implementation supports the requested operation (even if 12557ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error). 12567ec681f3Smrg */ 12577ec681f3Smrgstatic bool 12587ec681f3Smrgcopy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, 12597ec681f3Smrg struct v3dv_image *image, 12607ec681f3Smrg struct v3dv_buffer *buffer, 12617ec681f3Smrg const VkBufferImageCopy2KHR *region) 12627ec681f3Smrg{ 12637ec681f3Smrg assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT); 12647ec681f3Smrg 12657ec681f3Smrg /* Destination can't be raster format */ 12667ec681f3Smrg if (image->vk.tiling == VK_IMAGE_TILING_LINEAR) 12677ec681f3Smrg return false; 12687ec681f3Smrg 12697ec681f3Smrg /* We can't copy D24S8 because buffer to image copies only copy one aspect 12707ec681f3Smrg * at a time, and the TFU copies full images. Also, V3D depth bits for 12717ec681f3Smrg * both D24S8 and D24X8 stored in the 24-bit MSB of each 32-bit word, but 12727ec681f3Smrg * the Vulkan spec has the buffer data specified the other way around, so it 12737ec681f3Smrg * is not a straight copy, we would havew to swizzle the channels, which the 12747ec681f3Smrg * TFU can't do. 12757ec681f3Smrg */ 12767ec681f3Smrg if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT || 12777ec681f3Smrg image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) { 12787ec681f3Smrg return false; 12797ec681f3Smrg } 12807ec681f3Smrg 12817ec681f3Smrg /* Region must include full slice */ 12827ec681f3Smrg const uint32_t offset_x = region->imageOffset.x; 12837ec681f3Smrg const uint32_t offset_y = region->imageOffset.y; 12847ec681f3Smrg if (offset_x != 0 || offset_y != 0) 12857ec681f3Smrg return false; 12867ec681f3Smrg 12877ec681f3Smrg uint32_t width, height; 12887ec681f3Smrg if (region->bufferRowLength == 0) 12897ec681f3Smrg width = region->imageExtent.width; 12907ec681f3Smrg else 12917ec681f3Smrg width = region->bufferRowLength; 12927ec681f3Smrg 12937ec681f3Smrg if (region->bufferImageHeight == 0) 12947ec681f3Smrg height = region->imageExtent.height; 12957ec681f3Smrg else 12967ec681f3Smrg height = region->bufferImageHeight; 12977ec681f3Smrg 12987ec681f3Smrg if (width != image->vk.extent.width || height != image->vk.extent.height) 12997ec681f3Smrg return false; 13007ec681f3Smrg 13017ec681f3Smrg /* Handle region semantics for compressed images */ 13027ec681f3Smrg const uint32_t block_w = vk_format_get_blockwidth(image->vk.format); 13037ec681f3Smrg const uint32_t block_h = vk_format_get_blockheight(image->vk.format); 13047ec681f3Smrg width = DIV_ROUND_UP(width, block_w); 13057ec681f3Smrg height = DIV_ROUND_UP(height, block_h); 13067ec681f3Smrg 13077ec681f3Smrg /* Format must be supported for texturing via the TFU. Since we are just 13087ec681f3Smrg * copying raw data and not converting between pixel formats, we can ignore 13097ec681f3Smrg * the image's format and choose a compatible TFU format for the image 13107ec681f3Smrg * texel size instead, which expands the list of formats we can handle here. 13117ec681f3Smrg */ 13127ec681f3Smrg const struct v3dv_format *format = 13137ec681f3Smrg v3dv_get_compatible_tfu_format(cmd_buffer->device, 13147ec681f3Smrg image->cpp, NULL); 13157ec681f3Smrg 13167ec681f3Smrg const uint32_t mip_level = region->imageSubresource.mipLevel; 13177ec681f3Smrg const struct v3d_resource_slice *slice = &image->slices[mip_level]; 13187ec681f3Smrg 13197ec681f3Smrg uint32_t num_layers; 13207ec681f3Smrg if (image->vk.image_type != VK_IMAGE_TYPE_3D) 13217ec681f3Smrg num_layers = region->imageSubresource.layerCount; 13227ec681f3Smrg else 13237ec681f3Smrg num_layers = region->imageExtent.depth; 13247ec681f3Smrg assert(num_layers > 0); 13257ec681f3Smrg 13267ec681f3Smrg assert(image->mem && image->mem->bo); 13277ec681f3Smrg const struct v3dv_bo *dst_bo = image->mem->bo; 13287ec681f3Smrg 13297ec681f3Smrg assert(buffer->mem && buffer->mem->bo); 13307ec681f3Smrg const struct v3dv_bo *src_bo = buffer->mem->bo; 13317ec681f3Smrg 13327ec681f3Smrg /* Emit a TFU job per layer to copy */ 13337ec681f3Smrg const uint32_t buffer_stride = width * image->cpp; 13347ec681f3Smrg for (int i = 0; i < num_layers; i++) { 13357ec681f3Smrg uint32_t layer; 13367ec681f3Smrg if (image->vk.image_type != VK_IMAGE_TYPE_3D) 13377ec681f3Smrg layer = region->imageSubresource.baseArrayLayer + i; 13387ec681f3Smrg else 13397ec681f3Smrg layer = region->imageOffset.z + i; 13407ec681f3Smrg 13417ec681f3Smrg struct drm_v3d_submit_tfu tfu = { 13427ec681f3Smrg .ios = (height << 16) | width, 13437ec681f3Smrg .bo_handles = { 13447ec681f3Smrg dst_bo->handle, 13457ec681f3Smrg src_bo->handle != dst_bo->handle ? src_bo->handle : 0 13467ec681f3Smrg }, 13477ec681f3Smrg }; 13487ec681f3Smrg 13497ec681f3Smrg const uint32_t buffer_offset = 13507ec681f3Smrg buffer->mem_offset + region->bufferOffset + 13517ec681f3Smrg height * buffer_stride * i; 13527ec681f3Smrg 13537ec681f3Smrg const uint32_t src_offset = src_bo->offset + buffer_offset; 13547ec681f3Smrg tfu.iia |= src_offset; 13557ec681f3Smrg tfu.icfg |= V3D_TFU_ICFG_FORMAT_RASTER << V3D_TFU_ICFG_FORMAT_SHIFT; 13567ec681f3Smrg tfu.iis |= width; 13577ec681f3Smrg 13587ec681f3Smrg const uint32_t dst_offset = 13597ec681f3Smrg dst_bo->offset + v3dv_layer_offset(image, mip_level, layer); 13607ec681f3Smrg tfu.ioa |= dst_offset; 13617ec681f3Smrg 13627ec681f3Smrg tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE + 13637ec681f3Smrg (slice->tiling - V3D_TILING_LINEARTILE)) << 13647ec681f3Smrg V3D_TFU_IOA_FORMAT_SHIFT; 13657ec681f3Smrg tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT; 13667ec681f3Smrg 13677ec681f3Smrg /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the 13687ec681f3Smrg * OPAD field for the destination (how many extra UIF blocks beyond 13697ec681f3Smrg * those necessary to cover the height). 13707ec681f3Smrg */ 13717ec681f3Smrg if (slice->tiling == V3D_TILING_UIF_NO_XOR || 13727ec681f3Smrg slice->tiling == V3D_TILING_UIF_XOR) { 13737ec681f3Smrg uint32_t uif_block_h = 2 * v3d_utile_height(image->cpp); 13747ec681f3Smrg uint32_t implicit_padded_height = align(height, uif_block_h); 13757ec681f3Smrg uint32_t icfg = 13767ec681f3Smrg (slice->padded_height - implicit_padded_height) / uif_block_h; 13777ec681f3Smrg tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT; 13787ec681f3Smrg } 13797ec681f3Smrg 13807ec681f3Smrg v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu); 13817ec681f3Smrg } 13827ec681f3Smrg 13837ec681f3Smrg return true; 13847ec681f3Smrg} 13857ec681f3Smrg 13867ec681f3Smrg/** 13877ec681f3Smrg * Returns true if the implementation supports the requested operation (even if 13887ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error). 13897ec681f3Smrg */ 13907ec681f3Smrgstatic bool 13917ec681f3Smrgcopy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, 13927ec681f3Smrg struct v3dv_image *image, 13937ec681f3Smrg struct v3dv_buffer *buffer, 13947ec681f3Smrg const VkBufferImageCopy2KHR *region) 13957ec681f3Smrg{ 13967ec681f3Smrg VkFormat fb_format; 13977ec681f3Smrg if (!v3dv_meta_can_use_tlb(image, ®ion->imageOffset, &fb_format)) 13987ec681f3Smrg return false; 13997ec681f3Smrg 14007ec681f3Smrg uint32_t internal_type, internal_bpp; 14017ec681f3Smrg v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects) 14027ec681f3Smrg (fb_format, region->imageSubresource.aspectMask, 14037ec681f3Smrg &internal_type, &internal_bpp); 14047ec681f3Smrg 14057ec681f3Smrg uint32_t num_layers; 14067ec681f3Smrg if (image->vk.image_type != VK_IMAGE_TYPE_3D) 14077ec681f3Smrg num_layers = region->imageSubresource.layerCount; 14087ec681f3Smrg else 14097ec681f3Smrg num_layers = region->imageExtent.depth; 14107ec681f3Smrg assert(num_layers > 0); 14117ec681f3Smrg 14127ec681f3Smrg struct v3dv_job *job = 14137ec681f3Smrg v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); 14147ec681f3Smrg if (!job) 14157ec681f3Smrg return true; 14167ec681f3Smrg 14177ec681f3Smrg /* Handle copy to compressed format using a compatible format */ 14187ec681f3Smrg const uint32_t block_w = vk_format_get_blockwidth(image->vk.format); 14197ec681f3Smrg const uint32_t block_h = vk_format_get_blockheight(image->vk.format); 14207ec681f3Smrg const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w); 14217ec681f3Smrg const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h); 14227ec681f3Smrg 14237ec681f3Smrg v3dv_job_start_frame(job, width, height, num_layers, false, 14247ec681f3Smrg 1, internal_bpp, false); 14257ec681f3Smrg 14267ec681f3Smrg struct v3dv_meta_framebuffer framebuffer; 14277ec681f3Smrg v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format, 14287ec681f3Smrg internal_type, &job->frame_tiling); 14297ec681f3Smrg 14307ec681f3Smrg v3dv_X(job->device, job_emit_binning_flush)(job); 14317ec681f3Smrg v3dv_X(job->device, meta_emit_copy_buffer_to_image_rcl) 14327ec681f3Smrg (job, image, buffer, &framebuffer, region); 14337ec681f3Smrg 14347ec681f3Smrg v3dv_cmd_buffer_finish_job(cmd_buffer); 14357ec681f3Smrg 14367ec681f3Smrg return true; 14377ec681f3Smrg} 14387ec681f3Smrg 14397ec681f3Smrgstatic bool 14407ec681f3Smrgcreate_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer, 14417ec681f3Smrg struct v3dv_image *image, 14427ec681f3Smrg struct v3dv_buffer *buffer, 14437ec681f3Smrg const VkBufferImageCopy2KHR *region) 14447ec681f3Smrg{ 14457ec681f3Smrg if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region)) 14467ec681f3Smrg return true; 14477ec681f3Smrg if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region)) 14487ec681f3Smrg return true; 14497ec681f3Smrg return false; 14507ec681f3Smrg} 14517ec681f3Smrg 14527ec681f3Smrgstatic VkResult 14537ec681f3Smrgcreate_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer) 14547ec681f3Smrg{ 14557ec681f3Smrg /* If this is not the first pool we create for this command buffer 14567ec681f3Smrg * size it based on the size of the currently exhausted pool. 14577ec681f3Smrg */ 14587ec681f3Smrg uint32_t descriptor_count = 64; 14597ec681f3Smrg if (cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE) { 14607ec681f3Smrg struct v3dv_descriptor_pool *exhausted_pool = 14617ec681f3Smrg v3dv_descriptor_pool_from_handle(cmd_buffer->meta.texel_buffer_copy.dspool); 14627ec681f3Smrg descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024); 14637ec681f3Smrg } 14647ec681f3Smrg 14657ec681f3Smrg /* Create the descriptor pool */ 14667ec681f3Smrg cmd_buffer->meta.texel_buffer_copy.dspool = VK_NULL_HANDLE; 14677ec681f3Smrg VkDescriptorPoolSize pool_size = { 14687ec681f3Smrg .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 14697ec681f3Smrg .descriptorCount = descriptor_count, 14707ec681f3Smrg }; 14717ec681f3Smrg VkDescriptorPoolCreateInfo info = { 14727ec681f3Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, 14737ec681f3Smrg .maxSets = descriptor_count, 14747ec681f3Smrg .poolSizeCount = 1, 14757ec681f3Smrg .pPoolSizes = &pool_size, 14767ec681f3Smrg .flags = 0, 14777ec681f3Smrg }; 14787ec681f3Smrg VkResult result = 14797ec681f3Smrg v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device), 14807ec681f3Smrg &info, 14817ec681f3Smrg &cmd_buffer->device->vk.alloc, 14827ec681f3Smrg &cmd_buffer->meta.texel_buffer_copy.dspool); 14837ec681f3Smrg 14847ec681f3Smrg if (result == VK_SUCCESS) { 14857ec681f3Smrg assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE); 14867ec681f3Smrg const VkDescriptorPool _pool = cmd_buffer->meta.texel_buffer_copy.dspool; 14877ec681f3Smrg 14887ec681f3Smrg v3dv_cmd_buffer_add_private_obj( 14897ec681f3Smrg cmd_buffer, (uintptr_t) _pool, 14907ec681f3Smrg (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool); 14917ec681f3Smrg 14927ec681f3Smrg struct v3dv_descriptor_pool *pool = 14937ec681f3Smrg v3dv_descriptor_pool_from_handle(_pool); 14947ec681f3Smrg pool->is_driver_internal = true; 14957ec681f3Smrg } 14967ec681f3Smrg 14977ec681f3Smrg return result; 14987ec681f3Smrg} 14997ec681f3Smrg 15007ec681f3Smrgstatic VkResult 15017ec681f3Smrgallocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer, 15027ec681f3Smrg VkDescriptorSet *set) 15037ec681f3Smrg{ 15047ec681f3Smrg /* Make sure we have a descriptor pool */ 15057ec681f3Smrg VkResult result; 15067ec681f3Smrg if (cmd_buffer->meta.texel_buffer_copy.dspool == VK_NULL_HANDLE) { 15077ec681f3Smrg result = create_texel_buffer_copy_descriptor_pool(cmd_buffer); 15087ec681f3Smrg if (result != VK_SUCCESS) 15097ec681f3Smrg return result; 15107ec681f3Smrg } 15117ec681f3Smrg assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE); 15127ec681f3Smrg 15137ec681f3Smrg /* Allocate descriptor set */ 15147ec681f3Smrg struct v3dv_device *device = cmd_buffer->device; 15157ec681f3Smrg VkDevice _device = v3dv_device_to_handle(device); 15167ec681f3Smrg VkDescriptorSetAllocateInfo info = { 15177ec681f3Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, 15187ec681f3Smrg .descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool, 15197ec681f3Smrg .descriptorSetCount = 1, 15207ec681f3Smrg .pSetLayouts = &device->meta.texel_buffer_copy.ds_layout, 15217ec681f3Smrg }; 15227ec681f3Smrg result = v3dv_AllocateDescriptorSets(_device, &info, set); 15237ec681f3Smrg 15247ec681f3Smrg /* If we ran out of pool space, grow the pool and try again */ 15257ec681f3Smrg if (result == VK_ERROR_OUT_OF_POOL_MEMORY) { 15267ec681f3Smrg result = create_texel_buffer_copy_descriptor_pool(cmd_buffer); 15277ec681f3Smrg if (result == VK_SUCCESS) { 15287ec681f3Smrg info.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool; 15297ec681f3Smrg result = v3dv_AllocateDescriptorSets(_device, &info, set); 15307ec681f3Smrg } 15317ec681f3Smrg } 15327ec681f3Smrg 15337ec681f3Smrg return result; 15347ec681f3Smrg} 15357ec681f3Smrg 15367ec681f3Smrgstatic void 15377ec681f3Smrgget_texel_buffer_copy_pipeline_cache_key(VkFormat format, 15387ec681f3Smrg VkColorComponentFlags cmask, 15397ec681f3Smrg VkComponentMapping *cswizzle, 15407ec681f3Smrg bool is_layered, 15417ec681f3Smrg uint8_t *key) 15427ec681f3Smrg{ 15437ec681f3Smrg memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE); 15447ec681f3Smrg 15457ec681f3Smrg uint32_t *p = (uint32_t *) key; 15467ec681f3Smrg 15477ec681f3Smrg *p = format; 15487ec681f3Smrg p++; 15497ec681f3Smrg 15507ec681f3Smrg *p = cmask; 15517ec681f3Smrg p++; 15527ec681f3Smrg 15537ec681f3Smrg /* Note that that we are using a single byte for this, so we could pack 15547ec681f3Smrg * more data into this 32-bit slot in the future. 15557ec681f3Smrg */ 15567ec681f3Smrg *p = is_layered ? 1 : 0; 15577ec681f3Smrg p++; 15587ec681f3Smrg 15597ec681f3Smrg memcpy(p, cswizzle, sizeof(VkComponentMapping)); 15607ec681f3Smrg p += sizeof(VkComponentMapping) / sizeof(uint32_t); 15617ec681f3Smrg 15627ec681f3Smrg assert(((uint8_t*)p - key) == V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE); 15637ec681f3Smrg} 15647ec681f3Smrg 15657ec681f3Smrgstatic bool 15667ec681f3Smrgcreate_blit_render_pass(struct v3dv_device *device, 15677ec681f3Smrg VkFormat dst_format, 15687ec681f3Smrg VkFormat src_format, 15697ec681f3Smrg VkRenderPass *pass_load, 15707ec681f3Smrg VkRenderPass *pass_no_load); 15717ec681f3Smrg 15727ec681f3Smrgstatic nir_ssa_def *gen_rect_vertices(nir_builder *b); 15737ec681f3Smrg 15747ec681f3Smrgstatic bool 15757ec681f3Smrgcreate_pipeline(struct v3dv_device *device, 15767ec681f3Smrg struct v3dv_render_pass *pass, 15777ec681f3Smrg struct nir_shader *vs_nir, 15787ec681f3Smrg struct nir_shader *gs_nir, 15797ec681f3Smrg struct nir_shader *fs_nir, 15807ec681f3Smrg const VkPipelineVertexInputStateCreateInfo *vi_state, 15817ec681f3Smrg const VkPipelineDepthStencilStateCreateInfo *ds_state, 15827ec681f3Smrg const VkPipelineColorBlendStateCreateInfo *cb_state, 15837ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *ms_state, 15847ec681f3Smrg const VkPipelineLayout layout, 15857ec681f3Smrg VkPipeline *pipeline); 15867ec681f3Smrg 15877ec681f3Smrgstatic nir_shader * 15887ec681f3Smrgget_texel_buffer_copy_vs() 15897ec681f3Smrg{ 15907ec681f3Smrg const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); 15917ec681f3Smrg nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options, 15927ec681f3Smrg "meta texel buffer copy vs"); 15937ec681f3Smrg nir_variable *vs_out_pos = 15947ec681f3Smrg nir_variable_create(b.shader, nir_var_shader_out, 15957ec681f3Smrg glsl_vec4_type(), "gl_Position"); 15967ec681f3Smrg vs_out_pos->data.location = VARYING_SLOT_POS; 15977ec681f3Smrg 15987ec681f3Smrg nir_ssa_def *pos = gen_rect_vertices(&b); 15997ec681f3Smrg nir_store_var(&b, vs_out_pos, pos, 0xf); 16007ec681f3Smrg 16017ec681f3Smrg return b.shader; 16027ec681f3Smrg} 16037ec681f3Smrg 16047ec681f3Smrgstatic nir_shader * 16057ec681f3Smrgget_texel_buffer_copy_gs() 16067ec681f3Smrg{ 16077ec681f3Smrg /* FIXME: this creates a geometry shader that takes the index of a single 16087ec681f3Smrg * layer to clear from push constants, so we need to emit a draw call for 16097ec681f3Smrg * each layer that we want to clear. We could actually do better and have it 16107ec681f3Smrg * take a range of layers however, if we were to do this, we would need to 16117ec681f3Smrg * be careful not to exceed the maximum number of output vertices allowed in 16127ec681f3Smrg * a geometry shader. 16137ec681f3Smrg */ 16147ec681f3Smrg const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); 16157ec681f3Smrg nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options, 16167ec681f3Smrg "meta texel buffer copy gs"); 16177ec681f3Smrg nir_shader *nir = b.shader; 16187ec681f3Smrg nir->info.inputs_read = 1ull << VARYING_SLOT_POS; 16197ec681f3Smrg nir->info.outputs_written = (1ull << VARYING_SLOT_POS) | 16207ec681f3Smrg (1ull << VARYING_SLOT_LAYER); 16217ec681f3Smrg nir->info.gs.input_primitive = GL_TRIANGLES; 16227ec681f3Smrg nir->info.gs.output_primitive = GL_TRIANGLE_STRIP; 16237ec681f3Smrg nir->info.gs.vertices_in = 3; 16247ec681f3Smrg nir->info.gs.vertices_out = 3; 16257ec681f3Smrg nir->info.gs.invocations = 1; 16267ec681f3Smrg nir->info.gs.active_stream_mask = 0x1; 16277ec681f3Smrg 16287ec681f3Smrg /* in vec4 gl_Position[3] */ 16297ec681f3Smrg nir_variable *gs_in_pos = 16307ec681f3Smrg nir_variable_create(b.shader, nir_var_shader_in, 16317ec681f3Smrg glsl_array_type(glsl_vec4_type(), 3, 0), 16327ec681f3Smrg "in_gl_Position"); 16337ec681f3Smrg gs_in_pos->data.location = VARYING_SLOT_POS; 16347ec681f3Smrg 16357ec681f3Smrg /* out vec4 gl_Position */ 16367ec681f3Smrg nir_variable *gs_out_pos = 16377ec681f3Smrg nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(), 16387ec681f3Smrg "out_gl_Position"); 16397ec681f3Smrg gs_out_pos->data.location = VARYING_SLOT_POS; 16407ec681f3Smrg 16417ec681f3Smrg /* out float gl_Layer */ 16427ec681f3Smrg nir_variable *gs_out_layer = 16437ec681f3Smrg nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(), 16447ec681f3Smrg "out_gl_Layer"); 16457ec681f3Smrg gs_out_layer->data.location = VARYING_SLOT_LAYER; 16467ec681f3Smrg 16477ec681f3Smrg /* Emit output triangle */ 16487ec681f3Smrg for (uint32_t i = 0; i < 3; i++) { 16497ec681f3Smrg /* gl_Position from shader input */ 16507ec681f3Smrg nir_deref_instr *in_pos_i = 16517ec681f3Smrg nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i); 16527ec681f3Smrg nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i); 16537ec681f3Smrg 16547ec681f3Smrg /* gl_Layer from push constants */ 16557ec681f3Smrg nir_ssa_def *layer = 16567ec681f3Smrg nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), 16577ec681f3Smrg .base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET, 16587ec681f3Smrg .range = 4); 16597ec681f3Smrg nir_store_var(&b, gs_out_layer, layer, 0x1); 16607ec681f3Smrg 16617ec681f3Smrg nir_emit_vertex(&b, 0); 16627ec681f3Smrg } 16637ec681f3Smrg 16647ec681f3Smrg nir_end_primitive(&b, 0); 16657ec681f3Smrg 16667ec681f3Smrg return nir; 16677ec681f3Smrg} 16687ec681f3Smrg 16697ec681f3Smrgstatic nir_ssa_def * 16707ec681f3Smrgload_frag_coord(nir_builder *b) 16717ec681f3Smrg{ 16727ec681f3Smrg nir_foreach_shader_in_variable(var, b->shader) { 16737ec681f3Smrg if (var->data.location == VARYING_SLOT_POS) 16747ec681f3Smrg return nir_load_var(b, var); 16757ec681f3Smrg } 16767ec681f3Smrg nir_variable *pos = nir_variable_create(b->shader, nir_var_shader_in, 16777ec681f3Smrg glsl_vec4_type(), NULL); 16787ec681f3Smrg pos->data.location = VARYING_SLOT_POS; 16797ec681f3Smrg return nir_load_var(b, pos); 16807ec681f3Smrg} 16817ec681f3Smrg 16827ec681f3Smrgstatic uint32_t 16837ec681f3Smrgcomponent_swizzle_to_nir_swizzle(VkComponentSwizzle comp, VkComponentSwizzle swz) 16847ec681f3Smrg{ 16857ec681f3Smrg if (swz == VK_COMPONENT_SWIZZLE_IDENTITY) 16867ec681f3Smrg swz = comp; 16877ec681f3Smrg 16887ec681f3Smrg switch (swz) { 16897ec681f3Smrg case VK_COMPONENT_SWIZZLE_R: 16907ec681f3Smrg return 0; 16917ec681f3Smrg case VK_COMPONENT_SWIZZLE_G: 16927ec681f3Smrg return 1; 16937ec681f3Smrg case VK_COMPONENT_SWIZZLE_B: 16947ec681f3Smrg return 2; 16957ec681f3Smrg case VK_COMPONENT_SWIZZLE_A: 16967ec681f3Smrg return 3; 16977ec681f3Smrg default: 16987ec681f3Smrg unreachable("Invalid swizzle"); 16997ec681f3Smrg }; 17007ec681f3Smrg} 17017ec681f3Smrg 17027ec681f3Smrgstatic nir_shader * 17037ec681f3Smrgget_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format, 17047ec681f3Smrg VkComponentMapping *cswizzle) 17057ec681f3Smrg{ 17067ec681f3Smrg const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); 17077ec681f3Smrg nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options, 17087ec681f3Smrg "meta texel buffer copy fs"); 17097ec681f3Smrg 17107ec681f3Smrg /* We only use the copy from texel buffer shader to implement 17117ec681f3Smrg * copy_buffer_to_image_shader, which always selects a compatible integer 17127ec681f3Smrg * format for the copy. 17137ec681f3Smrg */ 17147ec681f3Smrg assert(vk_format_is_int(format)); 17157ec681f3Smrg 17167ec681f3Smrg /* Fragment shader output color */ 17177ec681f3Smrg nir_variable *fs_out_color = 17187ec681f3Smrg nir_variable_create(b.shader, nir_var_shader_out, 17197ec681f3Smrg glsl_uvec4_type(), "out_color"); 17207ec681f3Smrg fs_out_color->data.location = FRAG_RESULT_DATA0; 17217ec681f3Smrg 17227ec681f3Smrg /* Texel buffer input */ 17237ec681f3Smrg const struct glsl_type *sampler_type = 17247ec681f3Smrg glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT); 17257ec681f3Smrg nir_variable *sampler = 17267ec681f3Smrg nir_variable_create(b.shader, nir_var_uniform, sampler_type, "texel_buf"); 17277ec681f3Smrg sampler->data.descriptor_set = 0; 17287ec681f3Smrg sampler->data.binding = 0; 17297ec681f3Smrg 17307ec681f3Smrg /* Load the box describing the pixel region we want to copy from the 17317ec681f3Smrg * texel buffer. 17327ec681f3Smrg */ 17337ec681f3Smrg nir_ssa_def *box = 17347ec681f3Smrg nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), 17357ec681f3Smrg .base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET, 17367ec681f3Smrg .range = 16); 17377ec681f3Smrg 17387ec681f3Smrg /* Load the buffer stride (this comes in texel units) */ 17397ec681f3Smrg nir_ssa_def *stride = 17407ec681f3Smrg nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), 17417ec681f3Smrg .base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET, 17427ec681f3Smrg .range = 4); 17437ec681f3Smrg 17447ec681f3Smrg /* Load the buffer offset (this comes in texel units) */ 17457ec681f3Smrg nir_ssa_def *offset = 17467ec681f3Smrg nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), 17477ec681f3Smrg .base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET, 17487ec681f3Smrg .range = 4); 17497ec681f3Smrg 17507ec681f3Smrg nir_ssa_def *coord = nir_f2i32(&b, load_frag_coord(&b)); 17517ec681f3Smrg 17527ec681f3Smrg /* Load pixel data from texel buffer based on the x,y offset of the pixel 17537ec681f3Smrg * within the box. Texel buffers are 1D arrays of texels. 17547ec681f3Smrg * 17557ec681f3Smrg * Notice that we already make sure that we only generate fragments that are 17567ec681f3Smrg * inside the box through the scissor/viewport state, so our offset into the 17577ec681f3Smrg * texel buffer should always be within its bounds and we we don't need 17587ec681f3Smrg * to add a check for that here. 17597ec681f3Smrg */ 17607ec681f3Smrg nir_ssa_def *x_offset = 17617ec681f3Smrg nir_isub(&b, nir_channel(&b, coord, 0), 17627ec681f3Smrg nir_channel(&b, box, 0)); 17637ec681f3Smrg nir_ssa_def *y_offset = 17647ec681f3Smrg nir_isub(&b, nir_channel(&b, coord, 1), 17657ec681f3Smrg nir_channel(&b, box, 1)); 17667ec681f3Smrg nir_ssa_def *texel_offset = 17677ec681f3Smrg nir_iadd(&b, nir_iadd(&b, offset, x_offset), 17687ec681f3Smrg nir_imul(&b, y_offset, stride)); 17697ec681f3Smrg 17707ec681f3Smrg nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa; 17717ec681f3Smrg nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); 17727ec681f3Smrg tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; 17737ec681f3Smrg tex->op = nir_texop_txf; 17747ec681f3Smrg tex->src[0].src_type = nir_tex_src_coord; 17757ec681f3Smrg tex->src[0].src = nir_src_for_ssa(texel_offset); 17767ec681f3Smrg tex->src[1].src_type = nir_tex_src_texture_deref; 17777ec681f3Smrg tex->src[1].src = nir_src_for_ssa(tex_deref); 17787ec681f3Smrg tex->dest_type = nir_type_uint32; 17797ec681f3Smrg tex->is_array = false; 17807ec681f3Smrg tex->coord_components = 1; 17817ec681f3Smrg nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "texel buffer result"); 17827ec681f3Smrg nir_builder_instr_insert(&b, &tex->instr); 17837ec681f3Smrg 17847ec681f3Smrg uint32_t swiz[4]; 17857ec681f3Smrg swiz[0] = 17867ec681f3Smrg component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_R, cswizzle->r); 17877ec681f3Smrg swiz[1] = 17887ec681f3Smrg component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_G, cswizzle->g); 17897ec681f3Smrg swiz[2] = 17907ec681f3Smrg component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b); 17917ec681f3Smrg swiz[3] = 17927ec681f3Smrg component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a); 17937ec681f3Smrg nir_ssa_def *s = nir_swizzle(&b, &tex->dest.ssa, swiz, 4); 17947ec681f3Smrg nir_store_var(&b, fs_out_color, s, 0xf); 17957ec681f3Smrg 17967ec681f3Smrg return b.shader; 17977ec681f3Smrg} 17987ec681f3Smrg 17997ec681f3Smrgstatic bool 18007ec681f3Smrgcreate_texel_buffer_copy_pipeline(struct v3dv_device *device, 18017ec681f3Smrg VkFormat format, 18027ec681f3Smrg VkColorComponentFlags cmask, 18037ec681f3Smrg VkComponentMapping *cswizzle, 18047ec681f3Smrg bool is_layered, 18057ec681f3Smrg VkRenderPass _pass, 18067ec681f3Smrg VkPipelineLayout pipeline_layout, 18077ec681f3Smrg VkPipeline *pipeline) 18087ec681f3Smrg{ 18097ec681f3Smrg struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass); 18107ec681f3Smrg 18117ec681f3Smrg assert(vk_format_is_color(format)); 18127ec681f3Smrg 18137ec681f3Smrg nir_shader *vs_nir = get_texel_buffer_copy_vs(); 18147ec681f3Smrg nir_shader *fs_nir = get_texel_buffer_copy_fs(device, format, cswizzle); 18157ec681f3Smrg nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs() : NULL; 18167ec681f3Smrg 18177ec681f3Smrg const VkPipelineVertexInputStateCreateInfo vi_state = { 18187ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, 18197ec681f3Smrg .vertexBindingDescriptionCount = 0, 18207ec681f3Smrg .vertexAttributeDescriptionCount = 0, 18217ec681f3Smrg }; 18227ec681f3Smrg 18237ec681f3Smrg VkPipelineDepthStencilStateCreateInfo ds_state = { 18247ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, 18257ec681f3Smrg }; 18267ec681f3Smrg 18277ec681f3Smrg VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 }; 18287ec681f3Smrg blend_att_state[0] = (VkPipelineColorBlendAttachmentState) { 18297ec681f3Smrg .blendEnable = false, 18307ec681f3Smrg .colorWriteMask = cmask, 18317ec681f3Smrg }; 18327ec681f3Smrg 18337ec681f3Smrg const VkPipelineColorBlendStateCreateInfo cb_state = { 18347ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, 18357ec681f3Smrg .logicOpEnable = false, 18367ec681f3Smrg .attachmentCount = 1, 18377ec681f3Smrg .pAttachments = blend_att_state 18387ec681f3Smrg }; 18397ec681f3Smrg 18407ec681f3Smrg const VkPipelineMultisampleStateCreateInfo ms_state = { 18417ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 18427ec681f3Smrg .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, 18437ec681f3Smrg .sampleShadingEnable = false, 18447ec681f3Smrg .pSampleMask = NULL, 18457ec681f3Smrg .alphaToCoverageEnable = false, 18467ec681f3Smrg .alphaToOneEnable = false, 18477ec681f3Smrg }; 18487ec681f3Smrg 18497ec681f3Smrg return create_pipeline(device, 18507ec681f3Smrg pass, 18517ec681f3Smrg vs_nir, gs_nir, fs_nir, 18527ec681f3Smrg &vi_state, 18537ec681f3Smrg &ds_state, 18547ec681f3Smrg &cb_state, 18557ec681f3Smrg &ms_state, 18567ec681f3Smrg pipeline_layout, 18577ec681f3Smrg pipeline); 18587ec681f3Smrg} 18597ec681f3Smrg 18607ec681f3Smrgstatic bool 18617ec681f3Smrgget_copy_texel_buffer_pipeline( 18627ec681f3Smrg struct v3dv_device *device, 18637ec681f3Smrg VkFormat format, 18647ec681f3Smrg VkColorComponentFlags cmask, 18657ec681f3Smrg VkComponentMapping *cswizzle, 18667ec681f3Smrg VkImageType image_type, 18677ec681f3Smrg bool is_layered, 18687ec681f3Smrg struct v3dv_meta_texel_buffer_copy_pipeline **pipeline) 18697ec681f3Smrg{ 18707ec681f3Smrg bool ok = true; 18717ec681f3Smrg 18727ec681f3Smrg uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE]; 18737ec681f3Smrg get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered, 18747ec681f3Smrg key); 18757ec681f3Smrg 18767ec681f3Smrg mtx_lock(&device->meta.mtx); 18777ec681f3Smrg struct hash_entry *entry = 18787ec681f3Smrg _mesa_hash_table_search(device->meta.texel_buffer_copy.cache[image_type], 18797ec681f3Smrg &key); 18807ec681f3Smrg if (entry) { 18817ec681f3Smrg mtx_unlock(&device->meta.mtx); 18827ec681f3Smrg *pipeline = entry->data; 18837ec681f3Smrg return true; 18847ec681f3Smrg } 18857ec681f3Smrg 18867ec681f3Smrg *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8, 18877ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 18887ec681f3Smrg 18897ec681f3Smrg if (*pipeline == NULL) 18907ec681f3Smrg goto fail; 18917ec681f3Smrg 18927ec681f3Smrg /* The blit render pass is compatible */ 18937ec681f3Smrg ok = create_blit_render_pass(device, format, format, 18947ec681f3Smrg &(*pipeline)->pass, 18957ec681f3Smrg &(*pipeline)->pass_no_load); 18967ec681f3Smrg if (!ok) 18977ec681f3Smrg goto fail; 18987ec681f3Smrg 18997ec681f3Smrg ok = 19007ec681f3Smrg create_texel_buffer_copy_pipeline(device, 19017ec681f3Smrg format, cmask, cswizzle, is_layered, 19027ec681f3Smrg (*pipeline)->pass, 19037ec681f3Smrg device->meta.texel_buffer_copy.p_layout, 19047ec681f3Smrg &(*pipeline)->pipeline); 19057ec681f3Smrg if (!ok) 19067ec681f3Smrg goto fail; 19077ec681f3Smrg 19087ec681f3Smrg _mesa_hash_table_insert(device->meta.texel_buffer_copy.cache[image_type], 19097ec681f3Smrg &key, *pipeline); 19107ec681f3Smrg 19117ec681f3Smrg mtx_unlock(&device->meta.mtx); 19127ec681f3Smrg return true; 19137ec681f3Smrg 19147ec681f3Smrgfail: 19157ec681f3Smrg mtx_unlock(&device->meta.mtx); 19167ec681f3Smrg 19177ec681f3Smrg VkDevice _device = v3dv_device_to_handle(device); 19187ec681f3Smrg if (*pipeline) { 19197ec681f3Smrg if ((*pipeline)->pass) 19207ec681f3Smrg v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc); 19217ec681f3Smrg if ((*pipeline)->pipeline) 19227ec681f3Smrg v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc); 19237ec681f3Smrg vk_free(&device->vk.alloc, *pipeline); 19247ec681f3Smrg *pipeline = NULL; 19257ec681f3Smrg } 19267ec681f3Smrg 19277ec681f3Smrg return false; 19287ec681f3Smrg} 19297ec681f3Smrg 19307ec681f3Smrgstatic bool 19317ec681f3Smrgtexel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer, 19327ec681f3Smrg VkImageAspectFlags aspect, 19337ec681f3Smrg struct v3dv_image *image, 19347ec681f3Smrg VkFormat dst_format, 19357ec681f3Smrg VkFormat src_format, 19367ec681f3Smrg struct v3dv_buffer *buffer, 19377ec681f3Smrg uint32_t buffer_bpp, 19387ec681f3Smrg VkColorComponentFlags cmask, 19397ec681f3Smrg VkComponentMapping *cswizzle, 19407ec681f3Smrg uint32_t region_count, 19417ec681f3Smrg const VkBufferImageCopy2KHR *regions) 19427ec681f3Smrg{ 19437ec681f3Smrg VkResult result; 19447ec681f3Smrg bool handled = false; 19457ec681f3Smrg 19467ec681f3Smrg assert(cswizzle); 19477ec681f3Smrg 19487ec681f3Smrg /* This is a copy path, so we don't handle format conversions. The only 19497ec681f3Smrg * exception are stencil to D24S8 copies, which are handled as a color 19507ec681f3Smrg * masked R8->RGBA8 copy. 19517ec681f3Smrg */ 19527ec681f3Smrg assert(src_format == dst_format || 19537ec681f3Smrg (dst_format == VK_FORMAT_R8G8B8A8_UINT && 19547ec681f3Smrg src_format == VK_FORMAT_R8_UINT && 19557ec681f3Smrg cmask == VK_COLOR_COMPONENT_R_BIT)); 19567ec681f3Smrg 19577ec681f3Smrg /* We only handle color copies. Callers can copy D/S aspects by using 19587ec681f3Smrg * a compatible color format and maybe a cmask/cswizzle for D24 formats. 19597ec681f3Smrg */ 19607ec681f3Smrg if (aspect != VK_IMAGE_ASPECT_COLOR_BIT) 19617ec681f3Smrg return handled; 19627ec681f3Smrg 19637ec681f3Smrg /* FIXME: we only handle uncompressed images for now. */ 19647ec681f3Smrg if (vk_format_is_compressed(image->vk.format)) 19657ec681f3Smrg return handled; 19667ec681f3Smrg 19677ec681f3Smrg const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT | 19687ec681f3Smrg VK_COLOR_COMPONENT_G_BIT | 19697ec681f3Smrg VK_COLOR_COMPONENT_B_BIT | 19707ec681f3Smrg VK_COLOR_COMPONENT_A_BIT; 19717ec681f3Smrg if (cmask == 0) 19727ec681f3Smrg cmask = full_cmask; 19737ec681f3Smrg 19747ec681f3Smrg /* The buffer needs to have VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT 19757ec681f3Smrg * so we can bind it as a texel buffer. Otherwise, the buffer view 19767ec681f3Smrg * we create below won't setup the texture state that we need for this. 19777ec681f3Smrg */ 19787ec681f3Smrg if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) { 19797ec681f3Smrg if (v3dv_buffer_format_supports_features( 19807ec681f3Smrg cmd_buffer->device, src_format, 19817ec681f3Smrg VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) { 19827ec681f3Smrg buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; 19837ec681f3Smrg } else { 19847ec681f3Smrg return handled; 19857ec681f3Smrg } 19867ec681f3Smrg } 19877ec681f3Smrg 19887ec681f3Smrg /* At this point we should be able to handle the copy unless an unexpected 19897ec681f3Smrg * error occurs, such as an OOM. 19907ec681f3Smrg */ 19917ec681f3Smrg handled = true; 19927ec681f3Smrg 19937ec681f3Smrg 19947ec681f3Smrg /* Compute the number of layers to copy. 19957ec681f3Smrg * 19967ec681f3Smrg * If we are batching (region_count > 1) all our regions have the same 19977ec681f3Smrg * image subresource so we can take this from the first region. For 3D 19987ec681f3Smrg * images we require the same depth extent. 19997ec681f3Smrg */ 20007ec681f3Smrg const VkImageSubresourceLayers *resource = ®ions[0].imageSubresource; 20017ec681f3Smrg uint32_t num_layers; 20027ec681f3Smrg if (image->vk.image_type != VK_IMAGE_TYPE_3D) { 20037ec681f3Smrg num_layers = resource->layerCount; 20047ec681f3Smrg } else { 20057ec681f3Smrg assert(region_count == 1); 20067ec681f3Smrg num_layers = regions[0].imageExtent.depth; 20077ec681f3Smrg } 20087ec681f3Smrg assert(num_layers > 0); 20097ec681f3Smrg 20107ec681f3Smrg /* Get the texel buffer copy pipeline */ 20117ec681f3Smrg struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL; 20127ec681f3Smrg bool ok = get_copy_texel_buffer_pipeline(cmd_buffer->device, 20137ec681f3Smrg dst_format, cmask, cswizzle, 20147ec681f3Smrg image->vk.image_type, num_layers > 1, 20157ec681f3Smrg &pipeline); 20167ec681f3Smrg if (!ok) 20177ec681f3Smrg return handled; 20187ec681f3Smrg assert(pipeline && pipeline->pipeline && pipeline->pass); 20197ec681f3Smrg 20207ec681f3Smrg /* Setup descriptor set for the source texel buffer. We don't have to 20217ec681f3Smrg * register the descriptor as a private command buffer object since 20227ec681f3Smrg * all descriptors will be freed automatically with the descriptor 20237ec681f3Smrg * pool. 20247ec681f3Smrg */ 20257ec681f3Smrg VkDescriptorSet set; 20267ec681f3Smrg result = allocate_texel_buffer_copy_descriptor_set(cmd_buffer, &set); 20277ec681f3Smrg if (result != VK_SUCCESS) 20287ec681f3Smrg return handled; 20297ec681f3Smrg 20307ec681f3Smrg /* FIXME: for some reason passing region->bufferOffset here for the 20317ec681f3Smrg * offset field doesn't work, making the following CTS tests fail: 20327ec681f3Smrg * 20337ec681f3Smrg * dEQP-VK.api.copy_and_blit.core.buffer_to_image.*buffer_offset* 20347ec681f3Smrg * 20357ec681f3Smrg * So instead we pass 0 here and we pass the offset in texels as a push 20367ec681f3Smrg * constant to the shader, which seems to work correctly. 20377ec681f3Smrg */ 20387ec681f3Smrg VkDevice _device = v3dv_device_to_handle(cmd_buffer->device); 20397ec681f3Smrg VkBufferViewCreateInfo buffer_view_info = { 20407ec681f3Smrg .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, 20417ec681f3Smrg .buffer = v3dv_buffer_to_handle(buffer), 20427ec681f3Smrg .format = src_format, 20437ec681f3Smrg .offset = 0, 20447ec681f3Smrg .range = VK_WHOLE_SIZE, 20457ec681f3Smrg }; 20467ec681f3Smrg 20477ec681f3Smrg VkBufferView texel_buffer_view; 20487ec681f3Smrg result = v3dv_CreateBufferView(_device, &buffer_view_info, 20497ec681f3Smrg &cmd_buffer->device->vk.alloc, 20507ec681f3Smrg &texel_buffer_view); 20517ec681f3Smrg if (result != VK_SUCCESS) 20527ec681f3Smrg return handled; 20537ec681f3Smrg 20547ec681f3Smrg v3dv_cmd_buffer_add_private_obj( 20557ec681f3Smrg cmd_buffer, (uintptr_t)texel_buffer_view, 20567ec681f3Smrg (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyBufferView); 20577ec681f3Smrg 20587ec681f3Smrg VkWriteDescriptorSet write = { 20597ec681f3Smrg .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 20607ec681f3Smrg .dstSet = set, 20617ec681f3Smrg .dstBinding = 0, 20627ec681f3Smrg .dstArrayElement = 0, 20637ec681f3Smrg .descriptorCount = 1, 20647ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 20657ec681f3Smrg .pTexelBufferView = &texel_buffer_view, 20667ec681f3Smrg }; 20677ec681f3Smrg v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL); 20687ec681f3Smrg 20697ec681f3Smrg /* Push command buffer state before starting meta operation */ 20707ec681f3Smrg v3dv_cmd_buffer_meta_state_push(cmd_buffer, true); 20717ec681f3Smrg uint32_t dirty_dynamic_state = 0; 20727ec681f3Smrg 20737ec681f3Smrg /* Bind common state for all layers and regions */ 20747ec681f3Smrg VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer); 20757ec681f3Smrg v3dv_CmdBindPipeline(_cmd_buffer, 20767ec681f3Smrg VK_PIPELINE_BIND_POINT_GRAPHICS, 20777ec681f3Smrg pipeline->pipeline); 20787ec681f3Smrg 20797ec681f3Smrg v3dv_CmdBindDescriptorSets(_cmd_buffer, 20807ec681f3Smrg VK_PIPELINE_BIND_POINT_GRAPHICS, 20817ec681f3Smrg cmd_buffer->device->meta.texel_buffer_copy.p_layout, 20827ec681f3Smrg 0, 1, &set, 20837ec681f3Smrg 0, NULL); 20847ec681f3Smrg 20857ec681f3Smrg /* Setup framebuffer. 20867ec681f3Smrg * 20877ec681f3Smrg * For 3D images, this creates a layered framebuffer with a number of 20887ec681f3Smrg * layers matching the depth extent of the 3D image. 20897ec681f3Smrg */ 20907ec681f3Smrg uint32_t fb_width = u_minify(image->vk.extent.width, resource->mipLevel); 20917ec681f3Smrg uint32_t fb_height = u_minify(image->vk.extent.height, resource->mipLevel); 20927ec681f3Smrg VkImageViewCreateInfo image_view_info = { 20937ec681f3Smrg .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 20947ec681f3Smrg .image = v3dv_image_to_handle(image), 20957ec681f3Smrg .viewType = v3dv_image_type_to_view_type(image->vk.image_type), 20967ec681f3Smrg .format = dst_format, 20977ec681f3Smrg .subresourceRange = { 20987ec681f3Smrg .aspectMask = aspect, 20997ec681f3Smrg .baseMipLevel = resource->mipLevel, 21007ec681f3Smrg .levelCount = 1, 21017ec681f3Smrg .baseArrayLayer = resource->baseArrayLayer, 21027ec681f3Smrg .layerCount = num_layers, 21037ec681f3Smrg }, 21047ec681f3Smrg }; 21057ec681f3Smrg VkImageView image_view; 21067ec681f3Smrg result = v3dv_CreateImageView(_device, &image_view_info, 21077ec681f3Smrg &cmd_buffer->device->vk.alloc, &image_view); 21087ec681f3Smrg if (result != VK_SUCCESS) 21097ec681f3Smrg goto fail; 21107ec681f3Smrg 21117ec681f3Smrg v3dv_cmd_buffer_add_private_obj( 21127ec681f3Smrg cmd_buffer, (uintptr_t)image_view, 21137ec681f3Smrg (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView); 21147ec681f3Smrg 21157ec681f3Smrg VkFramebufferCreateInfo fb_info = { 21167ec681f3Smrg .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, 21177ec681f3Smrg .renderPass = pipeline->pass, 21187ec681f3Smrg .attachmentCount = 1, 21197ec681f3Smrg .pAttachments = &image_view, 21207ec681f3Smrg .width = fb_width, 21217ec681f3Smrg .height = fb_height, 21227ec681f3Smrg .layers = num_layers, 21237ec681f3Smrg }; 21247ec681f3Smrg 21257ec681f3Smrg VkFramebuffer fb; 21267ec681f3Smrg result = v3dv_CreateFramebuffer(_device, &fb_info, 21277ec681f3Smrg &cmd_buffer->device->vk.alloc, &fb); 21287ec681f3Smrg if (result != VK_SUCCESS) 21297ec681f3Smrg goto fail; 21307ec681f3Smrg 21317ec681f3Smrg v3dv_cmd_buffer_add_private_obj( 21327ec681f3Smrg cmd_buffer, (uintptr_t)fb, 21337ec681f3Smrg (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer); 21347ec681f3Smrg 21357ec681f3Smrg /* For each layer */ 21367ec681f3Smrg for (uint32_t l = 0; l < num_layers; l++) { 21377ec681f3Smrg /* Start render pass for this layer. 21387ec681f3Smrg * 21397ec681f3Smrg * If the we only have one region to copy, then we might be able to 21407ec681f3Smrg * skip the TLB load if it is aligned to tile boundaries. All layers 21417ec681f3Smrg * copy the same area, so we only need to check this once. 21427ec681f3Smrg */ 21437ec681f3Smrg bool can_skip_tlb_load = false; 21447ec681f3Smrg VkRect2D render_area; 21457ec681f3Smrg if (region_count == 1) { 21467ec681f3Smrg render_area.offset.x = regions[0].imageOffset.x; 21477ec681f3Smrg render_area.offset.y = regions[0].imageOffset.y; 21487ec681f3Smrg render_area.extent.width = regions[0].imageExtent.width; 21497ec681f3Smrg render_area.extent.height = regions[0].imageExtent.height; 21507ec681f3Smrg 21517ec681f3Smrg if (l == 0) { 21527ec681f3Smrg struct v3dv_render_pass *pipeline_pass = 21537ec681f3Smrg v3dv_render_pass_from_handle(pipeline->pass); 21547ec681f3Smrg can_skip_tlb_load = 21557ec681f3Smrg cmask == full_cmask && 21567ec681f3Smrg v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area, 21577ec681f3Smrg v3dv_framebuffer_from_handle(fb), 21587ec681f3Smrg pipeline_pass, 0); 21597ec681f3Smrg } 21607ec681f3Smrg } else { 21617ec681f3Smrg render_area.offset.x = 0; 21627ec681f3Smrg render_area.offset.y = 0; 21637ec681f3Smrg render_area.extent.width = fb_width; 21647ec681f3Smrg render_area.extent.height = fb_height; 21657ec681f3Smrg } 21667ec681f3Smrg 21677ec681f3Smrg VkRenderPassBeginInfo rp_info = { 21687ec681f3Smrg .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 21697ec681f3Smrg .renderPass = can_skip_tlb_load ? pipeline->pass_no_load : 21707ec681f3Smrg pipeline->pass, 21717ec681f3Smrg .framebuffer = fb, 21727ec681f3Smrg .renderArea = render_area, 21737ec681f3Smrg .clearValueCount = 0, 21747ec681f3Smrg }; 21757ec681f3Smrg 21767ec681f3Smrg v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE); 21777ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 21787ec681f3Smrg if (!job) 21797ec681f3Smrg goto fail; 21807ec681f3Smrg 21817ec681f3Smrg /* If we are using a layered copy we need to specify the layer for the 21827ec681f3Smrg * Geometry Shader. 21837ec681f3Smrg */ 21847ec681f3Smrg if (num_layers > 1) { 21857ec681f3Smrg uint32_t layer = resource->baseArrayLayer + l; 21867ec681f3Smrg v3dv_CmdPushConstants(_cmd_buffer, 21877ec681f3Smrg cmd_buffer->device->meta.texel_buffer_copy.p_layout, 21887ec681f3Smrg VK_SHADER_STAGE_GEOMETRY_BIT, 21897ec681f3Smrg 24, 4, &layer); 21907ec681f3Smrg } 21917ec681f3Smrg 21927ec681f3Smrg /* For each region */ 21937ec681f3Smrg dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR; 21947ec681f3Smrg for (uint32_t r = 0; r < region_count; r++) { 21957ec681f3Smrg const VkBufferImageCopy2KHR *region = ®ions[r]; 21967ec681f3Smrg 21977ec681f3Smrg /* Obtain the 2D buffer region spec */ 21987ec681f3Smrg uint32_t buf_width, buf_height; 21997ec681f3Smrg if (region->bufferRowLength == 0) 22007ec681f3Smrg buf_width = region->imageExtent.width; 22017ec681f3Smrg else 22027ec681f3Smrg buf_width = region->bufferRowLength; 22037ec681f3Smrg 22047ec681f3Smrg if (region->bufferImageHeight == 0) 22057ec681f3Smrg buf_height = region->imageExtent.height; 22067ec681f3Smrg else 22077ec681f3Smrg buf_height = region->bufferImageHeight; 22087ec681f3Smrg 22097ec681f3Smrg const VkViewport viewport = { 22107ec681f3Smrg .x = region->imageOffset.x, 22117ec681f3Smrg .y = region->imageOffset.y, 22127ec681f3Smrg .width = region->imageExtent.width, 22137ec681f3Smrg .height = region->imageExtent.height, 22147ec681f3Smrg .minDepth = 0.0f, 22157ec681f3Smrg .maxDepth = 1.0f 22167ec681f3Smrg }; 22177ec681f3Smrg v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport); 22187ec681f3Smrg const VkRect2D scissor = { 22197ec681f3Smrg .offset = { region->imageOffset.x, region->imageOffset.y }, 22207ec681f3Smrg .extent = { region->imageExtent.width, region->imageExtent.height } 22217ec681f3Smrg }; 22227ec681f3Smrg v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor); 22237ec681f3Smrg 22247ec681f3Smrg const VkDeviceSize buf_offset = 22257ec681f3Smrg region->bufferOffset / buffer_bpp + l * buf_height * buf_width; 22267ec681f3Smrg uint32_t push_data[6] = { 22277ec681f3Smrg region->imageOffset.x, 22287ec681f3Smrg region->imageOffset.y, 22297ec681f3Smrg region->imageOffset.x + region->imageExtent.width - 1, 22307ec681f3Smrg region->imageOffset.y + region->imageExtent.height - 1, 22317ec681f3Smrg buf_width, 22327ec681f3Smrg buf_offset, 22337ec681f3Smrg }; 22347ec681f3Smrg 22357ec681f3Smrg v3dv_CmdPushConstants(_cmd_buffer, 22367ec681f3Smrg cmd_buffer->device->meta.texel_buffer_copy.p_layout, 22377ec681f3Smrg VK_SHADER_STAGE_FRAGMENT_BIT, 22387ec681f3Smrg 0, sizeof(push_data), &push_data); 22397ec681f3Smrg 22407ec681f3Smrg v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0); 22417ec681f3Smrg } /* For each region */ 22427ec681f3Smrg 22437ec681f3Smrg v3dv_CmdEndRenderPass(_cmd_buffer); 22447ec681f3Smrg } /* For each layer */ 22457ec681f3Smrg 22467ec681f3Smrgfail: 22477ec681f3Smrg v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true); 22487ec681f3Smrg return handled; 22497ec681f3Smrg} 22507ec681f3Smrg 22517ec681f3Smrg/** 22527ec681f3Smrg * Returns true if the implementation supports the requested operation (even if 22537ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error). 22547ec681f3Smrg */ 22557ec681f3Smrgstatic bool 22567ec681f3Smrgcopy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer, 22577ec681f3Smrg VkImageAspectFlags aspect, 22587ec681f3Smrg struct v3dv_image *image, 22597ec681f3Smrg VkFormat dst_format, 22607ec681f3Smrg VkFormat src_format, 22617ec681f3Smrg struct v3dv_buffer *buffer, 22627ec681f3Smrg uint32_t buffer_bpp, 22637ec681f3Smrg VkColorComponentFlags cmask, 22647ec681f3Smrg VkComponentMapping *cswizzle, 22657ec681f3Smrg uint32_t region_count, 22667ec681f3Smrg const VkBufferImageCopy2KHR *regions) 22677ec681f3Smrg{ 22687ec681f3Smrg /* Since we can't sample linear images we need to upload the linear 22697ec681f3Smrg * buffer to a tiled image that we can use as a blit source, which 22707ec681f3Smrg * is slow. 22717ec681f3Smrg */ 22727ec681f3Smrg perf_debug("Falling back to blit path for buffer to image copy.\n"); 22737ec681f3Smrg 22747ec681f3Smrg struct v3dv_device *device = cmd_buffer->device; 22757ec681f3Smrg VkDevice _device = v3dv_device_to_handle(device); 22767ec681f3Smrg bool handled = true; 22777ec681f3Smrg 22787ec681f3Smrg /* Allocate memory for the tiled image. Since we copy layer by layer 22797ec681f3Smrg * we allocate memory to hold a full layer, which is the worse case. 22807ec681f3Smrg * For that we create a dummy image with that spec, get memory requirements 22817ec681f3Smrg * for it and use that information to create the memory allocation. 22827ec681f3Smrg * We will then reuse this memory store for all the regions we want to 22837ec681f3Smrg * copy. 22847ec681f3Smrg */ 22857ec681f3Smrg VkImage dummy_image; 22867ec681f3Smrg VkImageCreateInfo dummy_info = { 22877ec681f3Smrg .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 22887ec681f3Smrg .imageType = VK_IMAGE_TYPE_2D, 22897ec681f3Smrg .format = src_format, 22907ec681f3Smrg .extent = { image->vk.extent.width, image->vk.extent.height, 1 }, 22917ec681f3Smrg .mipLevels = 1, 22927ec681f3Smrg .arrayLayers = 1, 22937ec681f3Smrg .samples = VK_SAMPLE_COUNT_1_BIT, 22947ec681f3Smrg .tiling = VK_IMAGE_TILING_OPTIMAL, 22957ec681f3Smrg .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | 22967ec681f3Smrg VK_IMAGE_USAGE_TRANSFER_DST_BIT, 22977ec681f3Smrg .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 22987ec681f3Smrg .queueFamilyIndexCount = 0, 22997ec681f3Smrg .initialLayout = VK_IMAGE_LAYOUT_GENERAL, 23007ec681f3Smrg }; 23017ec681f3Smrg VkResult result = 23027ec681f3Smrg v3dv_CreateImage(_device, &dummy_info, &device->vk.alloc, &dummy_image); 23037ec681f3Smrg if (result != VK_SUCCESS) 23047ec681f3Smrg return handled; 23057ec681f3Smrg 23067ec681f3Smrg VkMemoryRequirements reqs; 23077ec681f3Smrg vk_common_GetImageMemoryRequirements(_device, dummy_image, &reqs); 23087ec681f3Smrg v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc); 23097ec681f3Smrg 23107ec681f3Smrg VkDeviceMemory mem; 23117ec681f3Smrg VkMemoryAllocateInfo alloc_info = { 23127ec681f3Smrg .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, 23137ec681f3Smrg .allocationSize = reqs.size, 23147ec681f3Smrg .memoryTypeIndex = 0, 23157ec681f3Smrg }; 23167ec681f3Smrg result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, &mem); 23177ec681f3Smrg if (result != VK_SUCCESS) 23187ec681f3Smrg return handled; 23197ec681f3Smrg 23207ec681f3Smrg v3dv_cmd_buffer_add_private_obj( 23217ec681f3Smrg cmd_buffer, (uintptr_t)mem, 23227ec681f3Smrg (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory); 23237ec681f3Smrg 23247ec681f3Smrg /* Obtain the layer count. 23257ec681f3Smrg * 23267ec681f3Smrg * If we are batching (region_count > 1) all our regions have the same 23277ec681f3Smrg * image subresource so we can take this from the first region. 23287ec681f3Smrg */ 23297ec681f3Smrg uint32_t num_layers; 23307ec681f3Smrg if (image->vk.image_type != VK_IMAGE_TYPE_3D) 23317ec681f3Smrg num_layers = regions[0].imageSubresource.layerCount; 23327ec681f3Smrg else 23337ec681f3Smrg num_layers = regions[0].imageExtent.depth; 23347ec681f3Smrg assert(num_layers > 0); 23357ec681f3Smrg 23367ec681f3Smrg /* Sanity check: we can only batch multiple regions together if they have 23377ec681f3Smrg * the same framebuffer (so the same layer). 23387ec681f3Smrg */ 23397ec681f3Smrg assert(num_layers == 1 || region_count == 1); 23407ec681f3Smrg 23417ec681f3Smrg const uint32_t block_width = vk_format_get_blockwidth(image->vk.format); 23427ec681f3Smrg const uint32_t block_height = vk_format_get_blockheight(image->vk.format); 23437ec681f3Smrg 23447ec681f3Smrg /* Copy regions by uploading each region to a temporary tiled image using 23457ec681f3Smrg * the memory we have just allocated as storage. 23467ec681f3Smrg */ 23477ec681f3Smrg for (uint32_t r = 0; r < region_count; r++) { 23487ec681f3Smrg const VkBufferImageCopy2KHR *region = ®ions[r]; 23497ec681f3Smrg 23507ec681f3Smrg /* Obtain the 2D buffer region spec */ 23517ec681f3Smrg uint32_t buf_width, buf_height; 23527ec681f3Smrg if (region->bufferRowLength == 0) 23537ec681f3Smrg buf_width = region->imageExtent.width; 23547ec681f3Smrg else 23557ec681f3Smrg buf_width = region->bufferRowLength; 23567ec681f3Smrg 23577ec681f3Smrg if (region->bufferImageHeight == 0) 23587ec681f3Smrg buf_height = region->imageExtent.height; 23597ec681f3Smrg else 23607ec681f3Smrg buf_height = region->bufferImageHeight; 23617ec681f3Smrg 23627ec681f3Smrg /* If the image is compressed, the bpp refers to blocks, not pixels */ 23637ec681f3Smrg buf_width = buf_width / block_width; 23647ec681f3Smrg buf_height = buf_height / block_height; 23657ec681f3Smrg 23667ec681f3Smrg for (uint32_t i = 0; i < num_layers; i++) { 23677ec681f3Smrg /* Create the tiled image */ 23687ec681f3Smrg VkImageCreateInfo image_info = { 23697ec681f3Smrg .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 23707ec681f3Smrg .imageType = VK_IMAGE_TYPE_2D, 23717ec681f3Smrg .format = src_format, 23727ec681f3Smrg .extent = { buf_width, buf_height, 1 }, 23737ec681f3Smrg .mipLevels = 1, 23747ec681f3Smrg .arrayLayers = 1, 23757ec681f3Smrg .samples = VK_SAMPLE_COUNT_1_BIT, 23767ec681f3Smrg .tiling = VK_IMAGE_TILING_OPTIMAL, 23777ec681f3Smrg .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | 23787ec681f3Smrg VK_IMAGE_USAGE_TRANSFER_DST_BIT, 23797ec681f3Smrg .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 23807ec681f3Smrg .queueFamilyIndexCount = 0, 23817ec681f3Smrg .initialLayout = VK_IMAGE_LAYOUT_GENERAL, 23827ec681f3Smrg }; 23837ec681f3Smrg 23847ec681f3Smrg VkImage buffer_image; 23857ec681f3Smrg VkResult result = 23867ec681f3Smrg v3dv_CreateImage(_device, &image_info, &device->vk.alloc, 23877ec681f3Smrg &buffer_image); 23887ec681f3Smrg if (result != VK_SUCCESS) 23897ec681f3Smrg return handled; 23907ec681f3Smrg 23917ec681f3Smrg v3dv_cmd_buffer_add_private_obj( 23927ec681f3Smrg cmd_buffer, (uintptr_t)buffer_image, 23937ec681f3Smrg (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage); 23947ec681f3Smrg 23957ec681f3Smrg result = vk_common_BindImageMemory(_device, buffer_image, mem, 0); 23967ec681f3Smrg if (result != VK_SUCCESS) 23977ec681f3Smrg return handled; 23987ec681f3Smrg 23997ec681f3Smrg /* Upload buffer contents for the selected layer */ 24007ec681f3Smrg const VkDeviceSize buf_offset_bytes = 24017ec681f3Smrg region->bufferOffset + i * buf_height * buf_width * buffer_bpp; 24027ec681f3Smrg const VkBufferImageCopy2KHR buffer_image_copy = { 24037ec681f3Smrg .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2_KHR, 24047ec681f3Smrg .bufferOffset = buf_offset_bytes, 24057ec681f3Smrg .bufferRowLength = region->bufferRowLength / block_width, 24067ec681f3Smrg .bufferImageHeight = region->bufferImageHeight / block_height, 24077ec681f3Smrg .imageSubresource = { 24087ec681f3Smrg .aspectMask = aspect, 24097ec681f3Smrg .mipLevel = 0, 24107ec681f3Smrg .baseArrayLayer = 0, 24117ec681f3Smrg .layerCount = 1, 24127ec681f3Smrg }, 24137ec681f3Smrg .imageOffset = { 0, 0, 0 }, 24147ec681f3Smrg .imageExtent = { buf_width, buf_height, 1 } 24157ec681f3Smrg }; 24167ec681f3Smrg handled = 24177ec681f3Smrg create_tiled_image_from_buffer(cmd_buffer, 24187ec681f3Smrg v3dv_image_from_handle(buffer_image), 24197ec681f3Smrg buffer, &buffer_image_copy); 24207ec681f3Smrg if (!handled) { 24217ec681f3Smrg /* This is unexpected, we should have setup the upload to be 24227ec681f3Smrg * conformant to a TFU or TLB copy. 24237ec681f3Smrg */ 24247ec681f3Smrg unreachable("Unable to copy buffer to image through TLB"); 24257ec681f3Smrg return false; 24267ec681f3Smrg } 24277ec681f3Smrg 24287ec681f3Smrg /* Blit-copy the requested image extent from the buffer image to the 24297ec681f3Smrg * destination image. 24307ec681f3Smrg * 24317ec681f3Smrg * Since we are copying, the blit must use the same format on the 24327ec681f3Smrg * destination and source images to avoid format conversions. The 24337ec681f3Smrg * only exception is copying stencil, which we upload to a R8UI source 24347ec681f3Smrg * image, but that we need to blit to a S8D24 destination (the only 24357ec681f3Smrg * stencil format we support). 24367ec681f3Smrg */ 24377ec681f3Smrg const VkImageBlit2KHR blit_region = { 24387ec681f3Smrg .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR, 24397ec681f3Smrg .srcSubresource = { 24407ec681f3Smrg .aspectMask = aspect, 24417ec681f3Smrg .mipLevel = 0, 24427ec681f3Smrg .baseArrayLayer = 0, 24437ec681f3Smrg .layerCount = 1, 24447ec681f3Smrg }, 24457ec681f3Smrg .srcOffsets = { 24467ec681f3Smrg { 0, 0, 0 }, 24477ec681f3Smrg { region->imageExtent.width, region->imageExtent.height, 1 }, 24487ec681f3Smrg }, 24497ec681f3Smrg .dstSubresource = { 24507ec681f3Smrg .aspectMask = aspect, 24517ec681f3Smrg .mipLevel = region->imageSubresource.mipLevel, 24527ec681f3Smrg .baseArrayLayer = region->imageSubresource.baseArrayLayer + i, 24537ec681f3Smrg .layerCount = 1, 24547ec681f3Smrg }, 24557ec681f3Smrg .dstOffsets = { 24567ec681f3Smrg { 24577ec681f3Smrg DIV_ROUND_UP(region->imageOffset.x, block_width), 24587ec681f3Smrg DIV_ROUND_UP(region->imageOffset.y, block_height), 24597ec681f3Smrg region->imageOffset.z + i, 24607ec681f3Smrg }, 24617ec681f3Smrg { 24627ec681f3Smrg DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width, 24637ec681f3Smrg block_width), 24647ec681f3Smrg DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height, 24657ec681f3Smrg block_height), 24667ec681f3Smrg region->imageOffset.z + i + 1, 24677ec681f3Smrg }, 24687ec681f3Smrg }, 24697ec681f3Smrg }; 24707ec681f3Smrg 24717ec681f3Smrg handled = blit_shader(cmd_buffer, 24727ec681f3Smrg image, dst_format, 24737ec681f3Smrg v3dv_image_from_handle(buffer_image), src_format, 24747ec681f3Smrg cmask, cswizzle, 24757ec681f3Smrg &blit_region, VK_FILTER_NEAREST, true); 24767ec681f3Smrg if (!handled) { 24777ec681f3Smrg /* This is unexpected, we should have a supported blit spec */ 24787ec681f3Smrg unreachable("Unable to blit buffer to destination image"); 24797ec681f3Smrg return false; 24807ec681f3Smrg } 24817ec681f3Smrg } 24827ec681f3Smrg } 24837ec681f3Smrg 24847ec681f3Smrg return handled; 24857ec681f3Smrg} 24867ec681f3Smrg 24877ec681f3Smrg/** 24887ec681f3Smrg * Returns true if the implementation supports the requested operation (even if 24897ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error). 24907ec681f3Smrg */ 24917ec681f3Smrgstatic bool 24927ec681f3Smrgcopy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer, 24937ec681f3Smrg struct v3dv_image *image, 24947ec681f3Smrg struct v3dv_buffer *buffer, 24957ec681f3Smrg uint32_t region_count, 24967ec681f3Smrg const VkBufferImageCopy2KHR *regions, 24977ec681f3Smrg bool use_texel_buffer) 24987ec681f3Smrg{ 24997ec681f3Smrg /* We can only call this with region_count > 1 if we can batch the regions 25007ec681f3Smrg * together, in which case they share the same image subresource, and so 25017ec681f3Smrg * the same aspect. 25027ec681f3Smrg */ 25037ec681f3Smrg VkImageAspectFlags aspect = regions[0].imageSubresource.aspectMask; 25047ec681f3Smrg 25057ec681f3Smrg /* Generally, the bpp of the data in the buffer matches that of the 25067ec681f3Smrg * destination image. The exception is the case where we are uploading 25077ec681f3Smrg * stencil (8bpp) to a combined d24s8 image (32bpp). 25087ec681f3Smrg */ 25097ec681f3Smrg uint32_t buf_bpp = image->cpp; 25107ec681f3Smrg 25117ec681f3Smrg /* We are about to upload the buffer data to an image so we can then 25127ec681f3Smrg * blit that to our destination region. Because we are going to implement 25137ec681f3Smrg * the copy as a blit, we want our blit source and destination formats to be 25147ec681f3Smrg * the same (to avoid any format conversions), so we choose a canonical 25157ec681f3Smrg * format that matches the destination image bpp. 25167ec681f3Smrg */ 25177ec681f3Smrg VkComponentMapping ident_swizzle = { 25187ec681f3Smrg .r = VK_COMPONENT_SWIZZLE_IDENTITY, 25197ec681f3Smrg .g = VK_COMPONENT_SWIZZLE_IDENTITY, 25207ec681f3Smrg .b = VK_COMPONENT_SWIZZLE_IDENTITY, 25217ec681f3Smrg .a = VK_COMPONENT_SWIZZLE_IDENTITY, 25227ec681f3Smrg }; 25237ec681f3Smrg 25247ec681f3Smrg VkComponentMapping cswizzle = ident_swizzle; 25257ec681f3Smrg VkColorComponentFlags cmask = 0; /* Write all components */ 25267ec681f3Smrg VkFormat src_format; 25277ec681f3Smrg VkFormat dst_format; 25287ec681f3Smrg switch (buf_bpp) { 25297ec681f3Smrg case 16: 25307ec681f3Smrg assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); 25317ec681f3Smrg src_format = VK_FORMAT_R32G32B32A32_UINT; 25327ec681f3Smrg dst_format = src_format; 25337ec681f3Smrg break; 25347ec681f3Smrg case 8: 25357ec681f3Smrg assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); 25367ec681f3Smrg src_format = VK_FORMAT_R16G16B16A16_UINT; 25377ec681f3Smrg dst_format = src_format; 25387ec681f3Smrg break; 25397ec681f3Smrg case 4: 25407ec681f3Smrg switch (aspect) { 25417ec681f3Smrg case VK_IMAGE_ASPECT_COLOR_BIT: 25427ec681f3Smrg src_format = VK_FORMAT_R8G8B8A8_UINT; 25437ec681f3Smrg dst_format = src_format; 25447ec681f3Smrg break; 25457ec681f3Smrg case VK_IMAGE_ASPECT_DEPTH_BIT: 25467ec681f3Smrg assert(image->vk.format == VK_FORMAT_D32_SFLOAT || 25477ec681f3Smrg image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT || 25487ec681f3Smrg image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32); 25497ec681f3Smrg src_format = VK_FORMAT_R8G8B8A8_UINT; 25507ec681f3Smrg dst_format = src_format; 25517ec681f3Smrg aspect = VK_IMAGE_ASPECT_COLOR_BIT; 25527ec681f3Smrg 25537ec681f3Smrg /* For D24 formats, the Vulkan spec states that the depth component 25547ec681f3Smrg * in the buffer is stored in the 24-LSB, but V3D wants it in the 25557ec681f3Smrg * 24-MSB. 25567ec681f3Smrg */ 25577ec681f3Smrg if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT || 25587ec681f3Smrg image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) { 25597ec681f3Smrg cmask = VK_COLOR_COMPONENT_G_BIT | 25607ec681f3Smrg VK_COLOR_COMPONENT_B_BIT | 25617ec681f3Smrg VK_COLOR_COMPONENT_A_BIT; 25627ec681f3Smrg cswizzle.r = VK_COMPONENT_SWIZZLE_R; 25637ec681f3Smrg cswizzle.g = VK_COMPONENT_SWIZZLE_R; 25647ec681f3Smrg cswizzle.b = VK_COMPONENT_SWIZZLE_G; 25657ec681f3Smrg cswizzle.a = VK_COMPONENT_SWIZZLE_B; 25667ec681f3Smrg } 25677ec681f3Smrg break; 25687ec681f3Smrg case VK_IMAGE_ASPECT_STENCIL_BIT: 25697ec681f3Smrg /* Since we don't support separate stencil this is always a stencil 25707ec681f3Smrg * copy to a combined depth/stencil image. Because we don't support 25717ec681f3Smrg * separate stencil images, we interpret the buffer data as a 25727ec681f3Smrg * color R8UI image, and implement the blit as a compatible color 25737ec681f3Smrg * blit to an RGBA8UI destination masking out writes to components 25747ec681f3Smrg * GBA (which map to the D24 component of a S8D24 image). 25757ec681f3Smrg */ 25767ec681f3Smrg assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT); 25777ec681f3Smrg buf_bpp = 1; 25787ec681f3Smrg src_format = VK_FORMAT_R8_UINT; 25797ec681f3Smrg dst_format = VK_FORMAT_R8G8B8A8_UINT; 25807ec681f3Smrg cmask = VK_COLOR_COMPONENT_R_BIT; 25817ec681f3Smrg aspect = VK_IMAGE_ASPECT_COLOR_BIT; 25827ec681f3Smrg break; 25837ec681f3Smrg default: 25847ec681f3Smrg unreachable("unsupported aspect"); 25857ec681f3Smrg return false; 25867ec681f3Smrg }; 25877ec681f3Smrg break; 25887ec681f3Smrg case 2: 25897ec681f3Smrg aspect = VK_IMAGE_ASPECT_COLOR_BIT; 25907ec681f3Smrg src_format = VK_FORMAT_R16_UINT; 25917ec681f3Smrg dst_format = src_format; 25927ec681f3Smrg break; 25937ec681f3Smrg case 1: 25947ec681f3Smrg assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); 25957ec681f3Smrg src_format = VK_FORMAT_R8_UINT; 25967ec681f3Smrg dst_format = src_format; 25977ec681f3Smrg break; 25987ec681f3Smrg default: 25997ec681f3Smrg unreachable("unsupported bit-size"); 26007ec681f3Smrg return false; 26017ec681f3Smrg } 26027ec681f3Smrg 26037ec681f3Smrg if (use_texel_buffer) { 26047ec681f3Smrg return texel_buffer_shader_copy(cmd_buffer, aspect, image, 26057ec681f3Smrg dst_format, src_format, 26067ec681f3Smrg buffer, buf_bpp, 26077ec681f3Smrg cmask, &cswizzle, 26087ec681f3Smrg region_count, regions); 26097ec681f3Smrg } else { 26107ec681f3Smrg return copy_buffer_to_image_blit(cmd_buffer, aspect, image, 26117ec681f3Smrg dst_format, src_format, 26127ec681f3Smrg buffer, buf_bpp, 26137ec681f3Smrg cmask, &cswizzle, 26147ec681f3Smrg region_count, regions); 26157ec681f3Smrg } 26167ec681f3Smrg} 26177ec681f3Smrg 26187ec681f3Smrg/** 26197ec681f3Smrg * Returns true if the implementation supports the requested operation (even if 26207ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error). 26217ec681f3Smrg */ 26227ec681f3Smrgstatic bool 26237ec681f3Smrgcopy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer, 26247ec681f3Smrg struct v3dv_image *image, 26257ec681f3Smrg struct v3dv_buffer *buffer, 26267ec681f3Smrg const VkBufferImageCopy2KHR *region) 26277ec681f3Smrg{ 26287ec681f3Smrg /* FIXME */ 26297ec681f3Smrg if (vk_format_is_depth_or_stencil(image->vk.format)) 26307ec681f3Smrg return false; 26317ec681f3Smrg 26327ec681f3Smrg if (vk_format_is_compressed(image->vk.format)) 26337ec681f3Smrg return false; 26347ec681f3Smrg 26357ec681f3Smrg if (image->vk.tiling == VK_IMAGE_TILING_LINEAR) 26367ec681f3Smrg return false; 26377ec681f3Smrg 26387ec681f3Smrg uint32_t buffer_width, buffer_height; 26397ec681f3Smrg if (region->bufferRowLength == 0) 26407ec681f3Smrg buffer_width = region->imageExtent.width; 26417ec681f3Smrg else 26427ec681f3Smrg buffer_width = region->bufferRowLength; 26437ec681f3Smrg 26447ec681f3Smrg if (region->bufferImageHeight == 0) 26457ec681f3Smrg buffer_height = region->imageExtent.height; 26467ec681f3Smrg else 26477ec681f3Smrg buffer_height = region->bufferImageHeight; 26487ec681f3Smrg 26497ec681f3Smrg uint32_t buffer_stride = buffer_width * image->cpp; 26507ec681f3Smrg uint32_t buffer_layer_stride = buffer_stride * buffer_height; 26517ec681f3Smrg 26527ec681f3Smrg uint32_t num_layers; 26537ec681f3Smrg if (image->vk.image_type != VK_IMAGE_TYPE_3D) 26547ec681f3Smrg num_layers = region->imageSubresource.layerCount; 26557ec681f3Smrg else 26567ec681f3Smrg num_layers = region->imageExtent.depth; 26577ec681f3Smrg assert(num_layers > 0); 26587ec681f3Smrg 26597ec681f3Smrg struct v3dv_job *job = 26607ec681f3Smrg v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 26617ec681f3Smrg V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE, 26627ec681f3Smrg cmd_buffer, -1); 26637ec681f3Smrg if (!job) 26647ec681f3Smrg return true; 26657ec681f3Smrg 26667ec681f3Smrg job->cpu.copy_buffer_to_image.image = image; 26677ec681f3Smrg job->cpu.copy_buffer_to_image.buffer = buffer; 26687ec681f3Smrg job->cpu.copy_buffer_to_image.buffer_stride = buffer_stride; 26697ec681f3Smrg job->cpu.copy_buffer_to_image.buffer_layer_stride = buffer_layer_stride; 26707ec681f3Smrg job->cpu.copy_buffer_to_image.buffer_offset = region->bufferOffset; 26717ec681f3Smrg job->cpu.copy_buffer_to_image.image_extent = region->imageExtent; 26727ec681f3Smrg job->cpu.copy_buffer_to_image.image_offset = region->imageOffset; 26737ec681f3Smrg job->cpu.copy_buffer_to_image.mip_level = 26747ec681f3Smrg region->imageSubresource.mipLevel; 26757ec681f3Smrg job->cpu.copy_buffer_to_image.base_layer = 26767ec681f3Smrg region->imageSubresource.baseArrayLayer; 26777ec681f3Smrg job->cpu.copy_buffer_to_image.layer_count = num_layers; 26787ec681f3Smrg 26797ec681f3Smrg list_addtail(&job->list_link, &cmd_buffer->jobs); 26807ec681f3Smrg 26817ec681f3Smrg return true; 26827ec681f3Smrg} 26837ec681f3Smrg 26847ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 26857ec681f3Smrgv3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer, 26867ec681f3Smrg const VkCopyBufferToImageInfo2KHR *info) 26877ec681f3Smrg{ 26887ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 26897ec681f3Smrg V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->srcBuffer); 26907ec681f3Smrg V3DV_FROM_HANDLE(v3dv_image, image, info->dstImage); 26917ec681f3Smrg 26927ec681f3Smrg assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT); 26937ec681f3Smrg 26947ec681f3Smrg uint32_t r = 0; 26957ec681f3Smrg while (r < info->regionCount) { 26967ec681f3Smrg /* The TFU and TLB paths can only copy one region at a time and the region 26977ec681f3Smrg * needs to start at the origin. We try these first for the common case 26987ec681f3Smrg * where we are copying full images, since they should be the fastest. 26997ec681f3Smrg */ 27007ec681f3Smrg uint32_t batch_size = 1; 27017ec681f3Smrg if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r])) 27027ec681f3Smrg goto handled; 27037ec681f3Smrg 27047ec681f3Smrg if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r])) 27057ec681f3Smrg goto handled; 27067ec681f3Smrg 27077ec681f3Smrg /* Otherwise, we are copying subrects, so we fallback to copying 27087ec681f3Smrg * via shader and texel buffers and we try to batch the regions 27097ec681f3Smrg * if possible. We can only batch copies if they have the same 27107ec681f3Smrg * framebuffer spec, which is mostly determined by the image 27117ec681f3Smrg * subresource of the region. 27127ec681f3Smrg */ 27137ec681f3Smrg const VkImageSubresourceLayers *rsc = &info->pRegions[r].imageSubresource; 27147ec681f3Smrg for (uint32_t s = r + 1; s < info->regionCount; s++) { 27157ec681f3Smrg const VkImageSubresourceLayers *rsc_s = 27167ec681f3Smrg &info->pRegions[s].imageSubresource; 27177ec681f3Smrg 27187ec681f3Smrg if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0) 27197ec681f3Smrg break; 27207ec681f3Smrg 27217ec681f3Smrg /* For 3D images we also need to check the depth extent */ 27227ec681f3Smrg if (image->vk.image_type == VK_IMAGE_TYPE_3D && 27237ec681f3Smrg info->pRegions[s].imageExtent.depth != 27247ec681f3Smrg info->pRegions[r].imageExtent.depth) { 27257ec681f3Smrg break; 27267ec681f3Smrg } 27277ec681f3Smrg 27287ec681f3Smrg batch_size++; 27297ec681f3Smrg } 27307ec681f3Smrg 27317ec681f3Smrg if (copy_buffer_to_image_shader(cmd_buffer, image, buffer, 27327ec681f3Smrg batch_size, &info->pRegions[r], true)) { 27337ec681f3Smrg goto handled; 27347ec681f3Smrg } 27357ec681f3Smrg 27367ec681f3Smrg /* If we still could not copy, fallback to slower paths. 27377ec681f3Smrg * 27387ec681f3Smrg * FIXME: we could try to batch these too, but since they are bound to be 27397ec681f3Smrg * slow it might not be worth it and we should instead put more effort 27407ec681f3Smrg * in handling more cases with the other paths. 27417ec681f3Smrg */ 27427ec681f3Smrg if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer, 27437ec681f3Smrg &info->pRegions[r])) { 27447ec681f3Smrg batch_size = 1; 27457ec681f3Smrg goto handled; 27467ec681f3Smrg } 27477ec681f3Smrg 27487ec681f3Smrg if (copy_buffer_to_image_shader(cmd_buffer, image, buffer, 27497ec681f3Smrg batch_size, &info->pRegions[r], false)) { 27507ec681f3Smrg goto handled; 27517ec681f3Smrg } 27527ec681f3Smrg 27537ec681f3Smrg unreachable("Unsupported buffer to image copy."); 27547ec681f3Smrg 27557ec681f3Smrghandled: 27567ec681f3Smrg r += batch_size; 27577ec681f3Smrg } 27587ec681f3Smrg} 27597ec681f3Smrg 27607ec681f3Smrgstatic void 27617ec681f3Smrgcompute_blit_3d_layers(const VkOffset3D *offsets, 27627ec681f3Smrg uint32_t *min_layer, uint32_t *max_layer, 27637ec681f3Smrg bool *mirror_z); 27647ec681f3Smrg 27657ec681f3Smrg/** 27667ec681f3Smrg * Returns true if the implementation supports the requested operation (even if 27677ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error). 27687ec681f3Smrg * 27697ec681f3Smrg * The TFU blit path doesn't handle scaling so the blit filter parameter can 27707ec681f3Smrg * be ignored. 27717ec681f3Smrg */ 27727ec681f3Smrgstatic bool 27737ec681f3Smrgblit_tfu(struct v3dv_cmd_buffer *cmd_buffer, 27747ec681f3Smrg struct v3dv_image *dst, 27757ec681f3Smrg struct v3dv_image *src, 27767ec681f3Smrg const VkImageBlit2KHR *region) 27777ec681f3Smrg{ 27787ec681f3Smrg assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT); 27797ec681f3Smrg assert(src->vk.samples == VK_SAMPLE_COUNT_1_BIT); 27807ec681f3Smrg 27817ec681f3Smrg /* Format must match */ 27827ec681f3Smrg if (src->vk.format != dst->vk.format) 27837ec681f3Smrg return false; 27847ec681f3Smrg 27857ec681f3Smrg /* Destination can't be raster format */ 27867ec681f3Smrg if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR) 27877ec681f3Smrg return false; 27887ec681f3Smrg 27897ec681f3Smrg /* Source region must start at (0,0) */ 27907ec681f3Smrg if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0) 27917ec681f3Smrg return false; 27927ec681f3Smrg 27937ec681f3Smrg /* Destination image must be complete */ 27947ec681f3Smrg if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0) 27957ec681f3Smrg return false; 27967ec681f3Smrg 27977ec681f3Smrg const uint32_t dst_mip_level = region->dstSubresource.mipLevel; 27987ec681f3Smrg const uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level); 27997ec681f3Smrg const uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level); 28007ec681f3Smrg if (region->dstOffsets[1].x < dst_width - 1|| 28017ec681f3Smrg region->dstOffsets[1].y < dst_height - 1) { 28027ec681f3Smrg return false; 28037ec681f3Smrg } 28047ec681f3Smrg 28057ec681f3Smrg /* No XY scaling */ 28067ec681f3Smrg if (region->srcOffsets[1].x != region->dstOffsets[1].x || 28077ec681f3Smrg region->srcOffsets[1].y != region->dstOffsets[1].y) { 28087ec681f3Smrg return false; 28097ec681f3Smrg } 28107ec681f3Smrg 28117ec681f3Smrg /* If the format is D24S8 both aspects need to be copied, since the TFU 28127ec681f3Smrg * can't be programmed to copy only one aspect of the image. 28137ec681f3Smrg */ 28147ec681f3Smrg if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) { 28157ec681f3Smrg const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT | 28167ec681f3Smrg VK_IMAGE_ASPECT_STENCIL_BIT; 28177ec681f3Smrg if (region->dstSubresource.aspectMask != ds_aspects) 28187ec681f3Smrg return false; 28197ec681f3Smrg } 28207ec681f3Smrg 28217ec681f3Smrg /* Our TFU blits only handle exact copies (it requires same formats 28227ec681f3Smrg * on input and output, no scaling, etc), so there is no pixel format 28237ec681f3Smrg * conversions and we can rewrite the format to use one that is TFU 28247ec681f3Smrg * compatible based on its texel size. 28257ec681f3Smrg */ 28267ec681f3Smrg const struct v3dv_format *format = 28277ec681f3Smrg v3dv_get_compatible_tfu_format(cmd_buffer->device, 28287ec681f3Smrg dst->cpp, NULL); 28297ec681f3Smrg 28307ec681f3Smrg /* Emit a TFU job for each layer to blit */ 28317ec681f3Smrg assert(region->dstSubresource.layerCount == 28327ec681f3Smrg region->srcSubresource.layerCount); 28337ec681f3Smrg 28347ec681f3Smrg uint32_t min_dst_layer; 28357ec681f3Smrg uint32_t max_dst_layer; 28367ec681f3Smrg bool dst_mirror_z = false; 28377ec681f3Smrg if (dst->vk.image_type == VK_IMAGE_TYPE_3D) { 28387ec681f3Smrg compute_blit_3d_layers(region->dstOffsets, 28397ec681f3Smrg &min_dst_layer, &max_dst_layer, 28407ec681f3Smrg &dst_mirror_z); 28417ec681f3Smrg } else { 28427ec681f3Smrg min_dst_layer = region->dstSubresource.baseArrayLayer; 28437ec681f3Smrg max_dst_layer = min_dst_layer + region->dstSubresource.layerCount; 28447ec681f3Smrg } 28457ec681f3Smrg 28467ec681f3Smrg uint32_t min_src_layer; 28477ec681f3Smrg uint32_t max_src_layer; 28487ec681f3Smrg bool src_mirror_z = false; 28497ec681f3Smrg if (src->vk.image_type == VK_IMAGE_TYPE_3D) { 28507ec681f3Smrg compute_blit_3d_layers(region->srcOffsets, 28517ec681f3Smrg &min_src_layer, &max_src_layer, 28527ec681f3Smrg &src_mirror_z); 28537ec681f3Smrg } else { 28547ec681f3Smrg min_src_layer = region->srcSubresource.baseArrayLayer; 28557ec681f3Smrg max_src_layer = min_src_layer + region->srcSubresource.layerCount; 28567ec681f3Smrg } 28577ec681f3Smrg 28587ec681f3Smrg /* No Z scaling for 3D images (for non-3D images both src and dst must 28597ec681f3Smrg * have the same layerCount). 28607ec681f3Smrg */ 28617ec681f3Smrg if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer) 28627ec681f3Smrg return false; 28637ec681f3Smrg 28647ec681f3Smrg const uint32_t layer_count = max_dst_layer - min_dst_layer; 28657ec681f3Smrg const uint32_t src_mip_level = region->srcSubresource.mipLevel; 28667ec681f3Smrg for (uint32_t i = 0; i < layer_count; i++) { 28677ec681f3Smrg /* Since the TFU path doesn't handle scaling, Z mirroring for 3D images 28687ec681f3Smrg * only involves reversing the order of the slices. 28697ec681f3Smrg */ 28707ec681f3Smrg const uint32_t dst_layer = 28717ec681f3Smrg dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i; 28727ec681f3Smrg const uint32_t src_layer = 28737ec681f3Smrg src_mirror_z ? max_src_layer - i - 1: min_src_layer + i; 28747ec681f3Smrg v3dv_X(cmd_buffer->device, meta_emit_tfu_job) 28757ec681f3Smrg (cmd_buffer, dst, dst_mip_level, dst_layer, 28767ec681f3Smrg src, src_mip_level, src_layer, 28777ec681f3Smrg dst_width, dst_height, format); 28787ec681f3Smrg } 28797ec681f3Smrg 28807ec681f3Smrg return true; 28817ec681f3Smrg} 28827ec681f3Smrg 28837ec681f3Smrgstatic bool 28847ec681f3Smrgformat_needs_software_int_clamp(VkFormat format) 28857ec681f3Smrg{ 28867ec681f3Smrg switch (format) { 28877ec681f3Smrg case VK_FORMAT_A2R10G10B10_UINT_PACK32: 28887ec681f3Smrg case VK_FORMAT_A2R10G10B10_SINT_PACK32: 28897ec681f3Smrg case VK_FORMAT_A2B10G10R10_UINT_PACK32: 28907ec681f3Smrg case VK_FORMAT_A2B10G10R10_SINT_PACK32: 28917ec681f3Smrg return true; 28927ec681f3Smrg default: 28937ec681f3Smrg return false; 28947ec681f3Smrg }; 28957ec681f3Smrg} 28967ec681f3Smrg 28977ec681f3Smrgstatic void 28987ec681f3Smrgget_blit_pipeline_cache_key(VkFormat dst_format, 28997ec681f3Smrg VkFormat src_format, 29007ec681f3Smrg VkColorComponentFlags cmask, 29017ec681f3Smrg VkSampleCountFlagBits dst_samples, 29027ec681f3Smrg VkSampleCountFlagBits src_samples, 29037ec681f3Smrg uint8_t *key) 29047ec681f3Smrg{ 29057ec681f3Smrg memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE); 29067ec681f3Smrg 29077ec681f3Smrg uint32_t *p = (uint32_t *) key; 29087ec681f3Smrg 29097ec681f3Smrg *p = dst_format; 29107ec681f3Smrg p++; 29117ec681f3Smrg 29127ec681f3Smrg /* Generally, when blitting from a larger format to a smaller format 29137ec681f3Smrg * the hardware takes care of clamping the source to the RT range. 29147ec681f3Smrg * Specifically, for integer formats, this is done by using 29157ec681f3Smrg * V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this 29167ec681f3Smrg * clamps to the bit-size of the render type, and some formats, such as 29177ec681f3Smrg * rgb10a2_uint have a 16-bit type, so it won't do what we need and we 29187ec681f3Smrg * require to clamp in software. In these cases, we need to amend the blit 29197ec681f3Smrg * shader with clamp code that depends on both the src and dst formats, so 29207ec681f3Smrg * we need the src format to be part of the key. 29217ec681f3Smrg */ 29227ec681f3Smrg *p = format_needs_software_int_clamp(dst_format) ? src_format : 0; 29237ec681f3Smrg p++; 29247ec681f3Smrg 29257ec681f3Smrg *p = cmask; 29267ec681f3Smrg p++; 29277ec681f3Smrg 29287ec681f3Smrg *p = (dst_samples << 8) | src_samples; 29297ec681f3Smrg p++; 29307ec681f3Smrg 29317ec681f3Smrg assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE); 29327ec681f3Smrg} 29337ec681f3Smrg 29347ec681f3Smrgstatic bool 29357ec681f3Smrgcreate_blit_render_pass(struct v3dv_device *device, 29367ec681f3Smrg VkFormat dst_format, 29377ec681f3Smrg VkFormat src_format, 29387ec681f3Smrg VkRenderPass *pass_load, 29397ec681f3Smrg VkRenderPass *pass_no_load) 29407ec681f3Smrg{ 29417ec681f3Smrg const bool is_color_blit = vk_format_is_color(dst_format); 29427ec681f3Smrg 29437ec681f3Smrg /* Attachment load operation is specified below */ 29447ec681f3Smrg VkAttachmentDescription att = { 29457ec681f3Smrg .format = dst_format, 29467ec681f3Smrg .samples = VK_SAMPLE_COUNT_1_BIT, 29477ec681f3Smrg .storeOp = VK_ATTACHMENT_STORE_OP_STORE, 29487ec681f3Smrg .initialLayout = VK_IMAGE_LAYOUT_GENERAL, 29497ec681f3Smrg .finalLayout = VK_IMAGE_LAYOUT_GENERAL, 29507ec681f3Smrg }; 29517ec681f3Smrg 29527ec681f3Smrg VkAttachmentReference att_ref = { 29537ec681f3Smrg .attachment = 0, 29547ec681f3Smrg .layout = VK_IMAGE_LAYOUT_GENERAL, 29557ec681f3Smrg }; 29567ec681f3Smrg 29577ec681f3Smrg VkSubpassDescription subpass = { 29587ec681f3Smrg .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, 29597ec681f3Smrg .inputAttachmentCount = 0, 29607ec681f3Smrg .colorAttachmentCount = is_color_blit ? 1 : 0, 29617ec681f3Smrg .pColorAttachments = is_color_blit ? &att_ref : NULL, 29627ec681f3Smrg .pResolveAttachments = NULL, 29637ec681f3Smrg .pDepthStencilAttachment = is_color_blit ? NULL : &att_ref, 29647ec681f3Smrg .preserveAttachmentCount = 0, 29657ec681f3Smrg .pPreserveAttachments = NULL, 29667ec681f3Smrg }; 29677ec681f3Smrg 29687ec681f3Smrg VkRenderPassCreateInfo info = { 29697ec681f3Smrg .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, 29707ec681f3Smrg .attachmentCount = 1, 29717ec681f3Smrg .pAttachments = &att, 29727ec681f3Smrg .subpassCount = 1, 29737ec681f3Smrg .pSubpasses = &subpass, 29747ec681f3Smrg .dependencyCount = 0, 29757ec681f3Smrg .pDependencies = NULL, 29767ec681f3Smrg }; 29777ec681f3Smrg 29787ec681f3Smrg VkResult result; 29797ec681f3Smrg att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 29807ec681f3Smrg result = v3dv_CreateRenderPass(v3dv_device_to_handle(device), 29817ec681f3Smrg &info, &device->vk.alloc, pass_load); 29827ec681f3Smrg if (result != VK_SUCCESS) 29837ec681f3Smrg return false; 29847ec681f3Smrg 29857ec681f3Smrg att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; 29867ec681f3Smrg result = v3dv_CreateRenderPass(v3dv_device_to_handle(device), 29877ec681f3Smrg &info, &device->vk.alloc, pass_no_load); 29887ec681f3Smrg return result == VK_SUCCESS; 29897ec681f3Smrg} 29907ec681f3Smrg 29917ec681f3Smrgstatic nir_ssa_def * 29927ec681f3Smrggen_rect_vertices(nir_builder *b) 29937ec681f3Smrg{ 29947ec681f3Smrg nir_ssa_def *vertex_id = nir_load_vertex_id(b); 29957ec681f3Smrg 29967ec681f3Smrg /* vertex 0: -1.0, -1.0 29977ec681f3Smrg * vertex 1: -1.0, 1.0 29987ec681f3Smrg * vertex 2: 1.0, -1.0 29997ec681f3Smrg * vertex 3: 1.0, 1.0 30007ec681f3Smrg * 30017ec681f3Smrg * so: 30027ec681f3Smrg * 30037ec681f3Smrg * channel 0 is vertex_id < 2 ? -1.0 : 1.0 30047ec681f3Smrg * channel 1 is vertex id & 1 ? 1.0 : -1.0 30057ec681f3Smrg */ 30067ec681f3Smrg 30077ec681f3Smrg nir_ssa_def *one = nir_imm_int(b, 1); 30087ec681f3Smrg nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2)); 30097ec681f3Smrg nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one); 30107ec681f3Smrg 30117ec681f3Smrg nir_ssa_def *comp[4]; 30127ec681f3Smrg comp[0] = nir_bcsel(b, c0cmp, 30137ec681f3Smrg nir_imm_float(b, -1.0f), 30147ec681f3Smrg nir_imm_float(b, 1.0f)); 30157ec681f3Smrg 30167ec681f3Smrg comp[1] = nir_bcsel(b, c1cmp, 30177ec681f3Smrg nir_imm_float(b, 1.0f), 30187ec681f3Smrg nir_imm_float(b, -1.0f)); 30197ec681f3Smrg comp[2] = nir_imm_float(b, 0.0f); 30207ec681f3Smrg comp[3] = nir_imm_float(b, 1.0f); 30217ec681f3Smrg return nir_vec(b, comp, 4); 30227ec681f3Smrg} 30237ec681f3Smrg 30247ec681f3Smrgstatic nir_ssa_def * 30257ec681f3Smrggen_tex_coords(nir_builder *b) 30267ec681f3Smrg{ 30277ec681f3Smrg nir_ssa_def *tex_box = 30287ec681f3Smrg nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16); 30297ec681f3Smrg 30307ec681f3Smrg nir_ssa_def *tex_z = 30317ec681f3Smrg nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4); 30327ec681f3Smrg 30337ec681f3Smrg nir_ssa_def *vertex_id = nir_load_vertex_id(b); 30347ec681f3Smrg 30357ec681f3Smrg /* vertex 0: src0_x, src0_y 30367ec681f3Smrg * vertex 1: src0_x, src1_y 30377ec681f3Smrg * vertex 2: src1_x, src0_y 30387ec681f3Smrg * vertex 3: src1_x, src1_y 30397ec681f3Smrg * 30407ec681f3Smrg * So: 30417ec681f3Smrg * 30427ec681f3Smrg * channel 0 is vertex_id < 2 ? src0_x : src1_x 30437ec681f3Smrg * channel 1 is vertex id & 1 ? src1_y : src0_y 30447ec681f3Smrg */ 30457ec681f3Smrg 30467ec681f3Smrg nir_ssa_def *one = nir_imm_int(b, 1); 30477ec681f3Smrg nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2)); 30487ec681f3Smrg nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one); 30497ec681f3Smrg 30507ec681f3Smrg nir_ssa_def *comp[4]; 30517ec681f3Smrg comp[0] = nir_bcsel(b, c0cmp, 30527ec681f3Smrg nir_channel(b, tex_box, 0), 30537ec681f3Smrg nir_channel(b, tex_box, 2)); 30547ec681f3Smrg 30557ec681f3Smrg comp[1] = nir_bcsel(b, c1cmp, 30567ec681f3Smrg nir_channel(b, tex_box, 3), 30577ec681f3Smrg nir_channel(b, tex_box, 1)); 30587ec681f3Smrg comp[2] = tex_z; 30597ec681f3Smrg comp[3] = nir_imm_float(b, 1.0f); 30607ec681f3Smrg return nir_vec(b, comp, 4); 30617ec681f3Smrg} 30627ec681f3Smrg 30637ec681f3Smrgstatic nir_ssa_def * 30647ec681f3Smrgbuild_nir_tex_op_read(struct nir_builder *b, 30657ec681f3Smrg nir_ssa_def *tex_pos, 30667ec681f3Smrg enum glsl_base_type tex_type, 30677ec681f3Smrg enum glsl_sampler_dim dim) 30687ec681f3Smrg{ 30697ec681f3Smrg assert(dim != GLSL_SAMPLER_DIM_MS); 30707ec681f3Smrg 30717ec681f3Smrg const struct glsl_type *sampler_type = 30727ec681f3Smrg glsl_sampler_type(dim, false, false, tex_type); 30737ec681f3Smrg nir_variable *sampler = 30747ec681f3Smrg nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex"); 30757ec681f3Smrg sampler->data.descriptor_set = 0; 30767ec681f3Smrg sampler->data.binding = 0; 30777ec681f3Smrg 30787ec681f3Smrg nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa; 30797ec681f3Smrg nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3); 30807ec681f3Smrg tex->sampler_dim = dim; 30817ec681f3Smrg tex->op = nir_texop_tex; 30827ec681f3Smrg tex->src[0].src_type = nir_tex_src_coord; 30837ec681f3Smrg tex->src[0].src = nir_src_for_ssa(tex_pos); 30847ec681f3Smrg tex->src[1].src_type = nir_tex_src_texture_deref; 30857ec681f3Smrg tex->src[1].src = nir_src_for_ssa(tex_deref); 30867ec681f3Smrg tex->src[2].src_type = nir_tex_src_sampler_deref; 30877ec681f3Smrg tex->src[2].src = nir_src_for_ssa(tex_deref); 30887ec681f3Smrg tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type); 30897ec681f3Smrg tex->is_array = glsl_sampler_type_is_array(sampler_type); 30907ec681f3Smrg tex->coord_components = tex_pos->num_components; 30917ec681f3Smrg 30927ec681f3Smrg nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 30937ec681f3Smrg nir_builder_instr_insert(b, &tex->instr); 30947ec681f3Smrg return &tex->dest.ssa; 30957ec681f3Smrg} 30967ec681f3Smrg 30977ec681f3Smrgstatic nir_ssa_def * 30987ec681f3Smrgbuild_nir_tex_op_ms_fetch_sample(struct nir_builder *b, 30997ec681f3Smrg nir_variable *sampler, 31007ec681f3Smrg nir_ssa_def *tex_deref, 31017ec681f3Smrg enum glsl_base_type tex_type, 31027ec681f3Smrg nir_ssa_def *tex_pos, 31037ec681f3Smrg nir_ssa_def *sample_idx) 31047ec681f3Smrg{ 31057ec681f3Smrg nir_tex_instr *tex = nir_tex_instr_create(b->shader, 4); 31067ec681f3Smrg tex->sampler_dim = GLSL_SAMPLER_DIM_MS; 31077ec681f3Smrg tex->op = nir_texop_txf_ms; 31087ec681f3Smrg tex->src[0].src_type = nir_tex_src_coord; 31097ec681f3Smrg tex->src[0].src = nir_src_for_ssa(tex_pos); 31107ec681f3Smrg tex->src[1].src_type = nir_tex_src_texture_deref; 31117ec681f3Smrg tex->src[1].src = nir_src_for_ssa(tex_deref); 31127ec681f3Smrg tex->src[2].src_type = nir_tex_src_sampler_deref; 31137ec681f3Smrg tex->src[2].src = nir_src_for_ssa(tex_deref); 31147ec681f3Smrg tex->src[3].src_type = nir_tex_src_ms_index; 31157ec681f3Smrg tex->src[3].src = nir_src_for_ssa(sample_idx); 31167ec681f3Smrg tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type); 31177ec681f3Smrg tex->is_array = false; 31187ec681f3Smrg tex->coord_components = tex_pos->num_components; 31197ec681f3Smrg 31207ec681f3Smrg nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 31217ec681f3Smrg nir_builder_instr_insert(b, &tex->instr); 31227ec681f3Smrg return &tex->dest.ssa; 31237ec681f3Smrg} 31247ec681f3Smrg 31257ec681f3Smrg/* Fetches all samples at the given position and averages them */ 31267ec681f3Smrgstatic nir_ssa_def * 31277ec681f3Smrgbuild_nir_tex_op_ms_resolve(struct nir_builder *b, 31287ec681f3Smrg nir_ssa_def *tex_pos, 31297ec681f3Smrg enum glsl_base_type tex_type, 31307ec681f3Smrg VkSampleCountFlagBits src_samples) 31317ec681f3Smrg{ 31327ec681f3Smrg assert(src_samples > VK_SAMPLE_COUNT_1_BIT); 31337ec681f3Smrg const struct glsl_type *sampler_type = 31347ec681f3Smrg glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type); 31357ec681f3Smrg nir_variable *sampler = 31367ec681f3Smrg nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex"); 31377ec681f3Smrg sampler->data.descriptor_set = 0; 31387ec681f3Smrg sampler->data.binding = 0; 31397ec681f3Smrg 31407ec681f3Smrg const bool is_int = glsl_base_type_is_integer(tex_type); 31417ec681f3Smrg 31427ec681f3Smrg nir_ssa_def *tmp = NULL; 31437ec681f3Smrg nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa; 31447ec681f3Smrg for (uint32_t i = 0; i < src_samples; i++) { 31457ec681f3Smrg nir_ssa_def *s = 31467ec681f3Smrg build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref, 31477ec681f3Smrg tex_type, tex_pos, 31487ec681f3Smrg nir_imm_int(b, i)); 31497ec681f3Smrg 31507ec681f3Smrg /* For integer formats, the multisample resolve operation is expected to 31517ec681f3Smrg * return one of the samples, we just return the first one. 31527ec681f3Smrg */ 31537ec681f3Smrg if (is_int) 31547ec681f3Smrg return s; 31557ec681f3Smrg 31567ec681f3Smrg tmp = i == 0 ? s : nir_fadd(b, tmp, s); 31577ec681f3Smrg } 31587ec681f3Smrg 31597ec681f3Smrg assert(!is_int); 31607ec681f3Smrg return nir_fmul(b, tmp, nir_imm_float(b, 1.0f / src_samples)); 31617ec681f3Smrg} 31627ec681f3Smrg 31637ec681f3Smrg/* Fetches the current sample (gl_SampleID) at the given position */ 31647ec681f3Smrgstatic nir_ssa_def * 31657ec681f3Smrgbuild_nir_tex_op_ms_read(struct nir_builder *b, 31667ec681f3Smrg nir_ssa_def *tex_pos, 31677ec681f3Smrg enum glsl_base_type tex_type) 31687ec681f3Smrg{ 31697ec681f3Smrg const struct glsl_type *sampler_type = 31707ec681f3Smrg glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type); 31717ec681f3Smrg nir_variable *sampler = 31727ec681f3Smrg nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex"); 31737ec681f3Smrg sampler->data.descriptor_set = 0; 31747ec681f3Smrg sampler->data.binding = 0; 31757ec681f3Smrg 31767ec681f3Smrg nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa; 31777ec681f3Smrg 31787ec681f3Smrg return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref, 31797ec681f3Smrg tex_type, tex_pos, 31807ec681f3Smrg nir_load_sample_id(b)); 31817ec681f3Smrg} 31827ec681f3Smrg 31837ec681f3Smrgstatic nir_ssa_def * 31847ec681f3Smrgbuild_nir_tex_op(struct nir_builder *b, 31857ec681f3Smrg struct v3dv_device *device, 31867ec681f3Smrg nir_ssa_def *tex_pos, 31877ec681f3Smrg enum glsl_base_type tex_type, 31887ec681f3Smrg VkSampleCountFlagBits dst_samples, 31897ec681f3Smrg VkSampleCountFlagBits src_samples, 31907ec681f3Smrg enum glsl_sampler_dim dim) 31917ec681f3Smrg{ 31927ec681f3Smrg switch (dim) { 31937ec681f3Smrg case GLSL_SAMPLER_DIM_MS: 31947ec681f3Smrg assert(src_samples == VK_SAMPLE_COUNT_4_BIT); 31957ec681f3Smrg /* For multisampled texture sources we need to use fetching instead of 31967ec681f3Smrg * normalized texture coordinates. We already configured our blit 31977ec681f3Smrg * coordinates to be in texel units, but here we still need to convert 31987ec681f3Smrg * them from floating point to integer. 31997ec681f3Smrg */ 32007ec681f3Smrg tex_pos = nir_f2i32(b, tex_pos); 32017ec681f3Smrg 32027ec681f3Smrg if (dst_samples == VK_SAMPLE_COUNT_1_BIT) 32037ec681f3Smrg return build_nir_tex_op_ms_resolve(b, tex_pos, tex_type, src_samples); 32047ec681f3Smrg else 32057ec681f3Smrg return build_nir_tex_op_ms_read(b, tex_pos, tex_type); 32067ec681f3Smrg default: 32077ec681f3Smrg assert(src_samples == VK_SAMPLE_COUNT_1_BIT); 32087ec681f3Smrg return build_nir_tex_op_read(b, tex_pos, tex_type, dim); 32097ec681f3Smrg } 32107ec681f3Smrg} 32117ec681f3Smrg 32127ec681f3Smrgstatic nir_shader * 32137ec681f3Smrgget_blit_vs() 32147ec681f3Smrg{ 32157ec681f3Smrg const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); 32167ec681f3Smrg nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options, 32177ec681f3Smrg "meta blit vs"); 32187ec681f3Smrg 32197ec681f3Smrg const struct glsl_type *vec4 = glsl_vec4_type(); 32207ec681f3Smrg 32217ec681f3Smrg nir_variable *vs_out_pos = 32227ec681f3Smrg nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position"); 32237ec681f3Smrg vs_out_pos->data.location = VARYING_SLOT_POS; 32247ec681f3Smrg 32257ec681f3Smrg nir_variable *vs_out_tex_coord = 32267ec681f3Smrg nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord"); 32277ec681f3Smrg vs_out_tex_coord->data.location = VARYING_SLOT_VAR0; 32287ec681f3Smrg vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH; 32297ec681f3Smrg 32307ec681f3Smrg nir_ssa_def *pos = gen_rect_vertices(&b); 32317ec681f3Smrg nir_store_var(&b, vs_out_pos, pos, 0xf); 32327ec681f3Smrg 32337ec681f3Smrg nir_ssa_def *tex_coord = gen_tex_coords(&b); 32347ec681f3Smrg nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf); 32357ec681f3Smrg 32367ec681f3Smrg return b.shader; 32377ec681f3Smrg} 32387ec681f3Smrg 32397ec681f3Smrgstatic uint32_t 32407ec681f3Smrgget_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim) 32417ec681f3Smrg{ 32427ec681f3Smrg switch (sampler_dim) { 32437ec681f3Smrg case GLSL_SAMPLER_DIM_1D: return 0x1; 32447ec681f3Smrg case GLSL_SAMPLER_DIM_2D: return 0x3; 32457ec681f3Smrg case GLSL_SAMPLER_DIM_MS: return 0x3; 32467ec681f3Smrg case GLSL_SAMPLER_DIM_3D: return 0x7; 32477ec681f3Smrg default: 32487ec681f3Smrg unreachable("invalid sampler dim"); 32497ec681f3Smrg }; 32507ec681f3Smrg} 32517ec681f3Smrg 32527ec681f3Smrgstatic nir_shader * 32537ec681f3Smrgget_color_blit_fs(struct v3dv_device *device, 32547ec681f3Smrg VkFormat dst_format, 32557ec681f3Smrg VkFormat src_format, 32567ec681f3Smrg VkSampleCountFlagBits dst_samples, 32577ec681f3Smrg VkSampleCountFlagBits src_samples, 32587ec681f3Smrg enum glsl_sampler_dim sampler_dim) 32597ec681f3Smrg{ 32607ec681f3Smrg const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); 32617ec681f3Smrg nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options, 32627ec681f3Smrg "meta blit fs"); 32637ec681f3Smrg 32647ec681f3Smrg const struct glsl_type *vec4 = glsl_vec4_type(); 32657ec681f3Smrg 32667ec681f3Smrg nir_variable *fs_in_tex_coord = 32677ec681f3Smrg nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord"); 32687ec681f3Smrg fs_in_tex_coord->data.location = VARYING_SLOT_VAR0; 32697ec681f3Smrg 32707ec681f3Smrg const struct glsl_type *fs_out_type = 32717ec681f3Smrg vk_format_is_sint(dst_format) ? glsl_ivec4_type() : 32727ec681f3Smrg vk_format_is_uint(dst_format) ? glsl_uvec4_type() : 32737ec681f3Smrg glsl_vec4_type(); 32747ec681f3Smrg 32757ec681f3Smrg enum glsl_base_type src_base_type = 32767ec681f3Smrg vk_format_is_sint(src_format) ? GLSL_TYPE_INT : 32777ec681f3Smrg vk_format_is_uint(src_format) ? GLSL_TYPE_UINT : 32787ec681f3Smrg GLSL_TYPE_FLOAT; 32797ec681f3Smrg 32807ec681f3Smrg nir_variable *fs_out_color = 32817ec681f3Smrg nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color"); 32827ec681f3Smrg fs_out_color->data.location = FRAG_RESULT_DATA0; 32837ec681f3Smrg 32847ec681f3Smrg nir_ssa_def *tex_coord = nir_load_var(&b, fs_in_tex_coord); 32857ec681f3Smrg const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim); 32867ec681f3Smrg tex_coord = nir_channels(&b, tex_coord, channel_mask); 32877ec681f3Smrg 32887ec681f3Smrg nir_ssa_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type, 32897ec681f3Smrg dst_samples, src_samples, sampler_dim); 32907ec681f3Smrg 32917ec681f3Smrg /* For integer textures, if the bit-size of the destination is too small to 32927ec681f3Smrg * hold source value, Vulkan (CTS) expects the implementation to clamp to the 32937ec681f3Smrg * maximum value the destination can hold. The hardware can clamp to the 32947ec681f3Smrg * render target type, which usually matches the component bit-size, but 32957ec681f3Smrg * there are some cases that won't match, such as rgb10a2, which has a 16-bit 32967ec681f3Smrg * render target type, so in these cases we need to clamp manually. 32977ec681f3Smrg */ 32987ec681f3Smrg if (format_needs_software_int_clamp(dst_format)) { 32997ec681f3Smrg assert(vk_format_is_int(dst_format)); 33007ec681f3Smrg enum pipe_format src_pformat = vk_format_to_pipe_format(src_format); 33017ec681f3Smrg enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format); 33027ec681f3Smrg 33037ec681f3Smrg nir_ssa_def *c[4]; 33047ec681f3Smrg for (uint32_t i = 0; i < 4; i++) { 33057ec681f3Smrg c[i] = nir_channel(&b, color, i); 33067ec681f3Smrg 33077ec681f3Smrg const uint32_t src_bit_size = 33087ec681f3Smrg util_format_get_component_bits(src_pformat, 33097ec681f3Smrg UTIL_FORMAT_COLORSPACE_RGB, 33107ec681f3Smrg i); 33117ec681f3Smrg const uint32_t dst_bit_size = 33127ec681f3Smrg util_format_get_component_bits(dst_pformat, 33137ec681f3Smrg UTIL_FORMAT_COLORSPACE_RGB, 33147ec681f3Smrg i); 33157ec681f3Smrg 33167ec681f3Smrg if (dst_bit_size >= src_bit_size) 33177ec681f3Smrg continue; 33187ec681f3Smrg 33197ec681f3Smrg assert(dst_bit_size > 0); 33207ec681f3Smrg if (util_format_is_pure_uint(dst_pformat)) { 33217ec681f3Smrg nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1); 33227ec681f3Smrg c[i] = nir_umin(&b, c[i], max); 33237ec681f3Smrg } else { 33247ec681f3Smrg nir_ssa_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1); 33257ec681f3Smrg nir_ssa_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1))); 33267ec681f3Smrg c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min); 33277ec681f3Smrg } 33287ec681f3Smrg } 33297ec681f3Smrg 33307ec681f3Smrg color = nir_vec4(&b, c[0], c[1], c[2], c[3]); 33317ec681f3Smrg } 33327ec681f3Smrg 33337ec681f3Smrg nir_store_var(&b, fs_out_color, color, 0xf); 33347ec681f3Smrg 33357ec681f3Smrg return b.shader; 33367ec681f3Smrg} 33377ec681f3Smrg 33387ec681f3Smrgstatic bool 33397ec681f3Smrgcreate_pipeline(struct v3dv_device *device, 33407ec681f3Smrg struct v3dv_render_pass *pass, 33417ec681f3Smrg struct nir_shader *vs_nir, 33427ec681f3Smrg struct nir_shader *gs_nir, 33437ec681f3Smrg struct nir_shader *fs_nir, 33447ec681f3Smrg const VkPipelineVertexInputStateCreateInfo *vi_state, 33457ec681f3Smrg const VkPipelineDepthStencilStateCreateInfo *ds_state, 33467ec681f3Smrg const VkPipelineColorBlendStateCreateInfo *cb_state, 33477ec681f3Smrg const VkPipelineMultisampleStateCreateInfo *ms_state, 33487ec681f3Smrg const VkPipelineLayout layout, 33497ec681f3Smrg VkPipeline *pipeline) 33507ec681f3Smrg{ 33517ec681f3Smrg struct vk_shader_module vs_m; 33527ec681f3Smrg struct vk_shader_module gs_m; 33537ec681f3Smrg struct vk_shader_module fs_m; 33547ec681f3Smrg 33557ec681f3Smrg uint32_t num_stages = gs_nir ? 3 : 2; 33567ec681f3Smrg 33577ec681f3Smrg v3dv_shader_module_internal_init(device, &vs_m, vs_nir); 33587ec681f3Smrg v3dv_shader_module_internal_init(device, &fs_m, fs_nir); 33597ec681f3Smrg 33607ec681f3Smrg VkPipelineShaderStageCreateInfo stages[3] = { 33617ec681f3Smrg { 33627ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 33637ec681f3Smrg .stage = VK_SHADER_STAGE_VERTEX_BIT, 33647ec681f3Smrg .module = vk_shader_module_to_handle(&vs_m), 33657ec681f3Smrg .pName = "main", 33667ec681f3Smrg }, 33677ec681f3Smrg { 33687ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 33697ec681f3Smrg .stage = VK_SHADER_STAGE_FRAGMENT_BIT, 33707ec681f3Smrg .module = vk_shader_module_to_handle(&fs_m), 33717ec681f3Smrg .pName = "main", 33727ec681f3Smrg }, 33737ec681f3Smrg { 33747ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 33757ec681f3Smrg .stage = VK_SHADER_STAGE_GEOMETRY_BIT, 33767ec681f3Smrg .module = VK_NULL_HANDLE, 33777ec681f3Smrg .pName = "main", 33787ec681f3Smrg }, 33797ec681f3Smrg }; 33807ec681f3Smrg 33817ec681f3Smrg if (gs_nir) { 33827ec681f3Smrg v3dv_shader_module_internal_init(device, &gs_m, gs_nir); 33837ec681f3Smrg stages[2].module = vk_shader_module_to_handle(&gs_m); 33847ec681f3Smrg } 33857ec681f3Smrg 33867ec681f3Smrg VkGraphicsPipelineCreateInfo info = { 33877ec681f3Smrg .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 33887ec681f3Smrg 33897ec681f3Smrg .stageCount = num_stages, 33907ec681f3Smrg .pStages = stages, 33917ec681f3Smrg 33927ec681f3Smrg .pVertexInputState = vi_state, 33937ec681f3Smrg 33947ec681f3Smrg .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { 33957ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, 33967ec681f3Smrg .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 33977ec681f3Smrg .primitiveRestartEnable = false, 33987ec681f3Smrg }, 33997ec681f3Smrg 34007ec681f3Smrg .pViewportState = &(VkPipelineViewportStateCreateInfo) { 34017ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, 34027ec681f3Smrg .viewportCount = 1, 34037ec681f3Smrg .scissorCount = 1, 34047ec681f3Smrg }, 34057ec681f3Smrg 34067ec681f3Smrg .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { 34077ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, 34087ec681f3Smrg .rasterizerDiscardEnable = false, 34097ec681f3Smrg .polygonMode = VK_POLYGON_MODE_FILL, 34107ec681f3Smrg .cullMode = VK_CULL_MODE_NONE, 34117ec681f3Smrg .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, 34127ec681f3Smrg .depthBiasEnable = false, 34137ec681f3Smrg }, 34147ec681f3Smrg 34157ec681f3Smrg .pMultisampleState = ms_state, 34167ec681f3Smrg 34177ec681f3Smrg .pDepthStencilState = ds_state, 34187ec681f3Smrg 34197ec681f3Smrg .pColorBlendState = cb_state, 34207ec681f3Smrg 34217ec681f3Smrg /* The meta clear pipeline declares all state as dynamic. 34227ec681f3Smrg * As a consequence, vkCmdBindPipeline writes no dynamic state 34237ec681f3Smrg * to the cmd buffer. Therefore, at the end of the meta clear, 34247ec681f3Smrg * we need only restore dynamic state that was vkCmdSet. 34257ec681f3Smrg */ 34267ec681f3Smrg .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { 34277ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, 34287ec681f3Smrg .dynamicStateCount = 6, 34297ec681f3Smrg .pDynamicStates = (VkDynamicState[]) { 34307ec681f3Smrg VK_DYNAMIC_STATE_VIEWPORT, 34317ec681f3Smrg VK_DYNAMIC_STATE_SCISSOR, 34327ec681f3Smrg VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 34337ec681f3Smrg VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, 34347ec681f3Smrg VK_DYNAMIC_STATE_STENCIL_REFERENCE, 34357ec681f3Smrg VK_DYNAMIC_STATE_BLEND_CONSTANTS, 34367ec681f3Smrg VK_DYNAMIC_STATE_DEPTH_BIAS, 34377ec681f3Smrg VK_DYNAMIC_STATE_LINE_WIDTH, 34387ec681f3Smrg }, 34397ec681f3Smrg }, 34407ec681f3Smrg 34417ec681f3Smrg .flags = 0, 34427ec681f3Smrg .layout = layout, 34437ec681f3Smrg .renderPass = v3dv_render_pass_to_handle(pass), 34447ec681f3Smrg .subpass = 0, 34457ec681f3Smrg }; 34467ec681f3Smrg 34477ec681f3Smrg VkResult result = 34487ec681f3Smrg v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device), 34497ec681f3Smrg VK_NULL_HANDLE, 34507ec681f3Smrg 1, &info, 34517ec681f3Smrg &device->vk.alloc, 34527ec681f3Smrg pipeline); 34537ec681f3Smrg 34547ec681f3Smrg ralloc_free(vs_nir); 34557ec681f3Smrg ralloc_free(fs_nir); 34567ec681f3Smrg 34577ec681f3Smrg return result == VK_SUCCESS; 34587ec681f3Smrg} 34597ec681f3Smrg 34607ec681f3Smrgstatic enum glsl_sampler_dim 34617ec681f3Smrgget_sampler_dim(VkImageType type, VkSampleCountFlagBits src_samples) 34627ec681f3Smrg{ 34637ec681f3Smrg /* From the Vulkan 1.0 spec, VkImageCreateInfo Validu Usage: 34647ec681f3Smrg * 34657ec681f3Smrg * "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be 34667ec681f3Smrg * VK_IMAGE_TYPE_2D, ..." 34677ec681f3Smrg */ 34687ec681f3Smrg assert(src_samples == VK_SAMPLE_COUNT_1_BIT || type == VK_IMAGE_TYPE_2D); 34697ec681f3Smrg 34707ec681f3Smrg switch (type) { 34717ec681f3Smrg case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D; 34727ec681f3Smrg case VK_IMAGE_TYPE_2D: 34737ec681f3Smrg return src_samples == VK_SAMPLE_COUNT_1_BIT ? GLSL_SAMPLER_DIM_2D : 34747ec681f3Smrg GLSL_SAMPLER_DIM_MS; 34757ec681f3Smrg case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D; 34767ec681f3Smrg default: 34777ec681f3Smrg unreachable("Invalid image type"); 34787ec681f3Smrg } 34797ec681f3Smrg} 34807ec681f3Smrg 34817ec681f3Smrgstatic bool 34827ec681f3Smrgcreate_blit_pipeline(struct v3dv_device *device, 34837ec681f3Smrg VkFormat dst_format, 34847ec681f3Smrg VkFormat src_format, 34857ec681f3Smrg VkColorComponentFlags cmask, 34867ec681f3Smrg VkImageType src_type, 34877ec681f3Smrg VkSampleCountFlagBits dst_samples, 34887ec681f3Smrg VkSampleCountFlagBits src_samples, 34897ec681f3Smrg VkRenderPass _pass, 34907ec681f3Smrg VkPipelineLayout pipeline_layout, 34917ec681f3Smrg VkPipeline *pipeline) 34927ec681f3Smrg{ 34937ec681f3Smrg struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass); 34947ec681f3Smrg 34957ec681f3Smrg /* We always rewrite depth/stencil blits to compatible color blits */ 34967ec681f3Smrg assert(vk_format_is_color(dst_format)); 34977ec681f3Smrg assert(vk_format_is_color(src_format)); 34987ec681f3Smrg 34997ec681f3Smrg const enum glsl_sampler_dim sampler_dim = 35007ec681f3Smrg get_sampler_dim(src_type, src_samples); 35017ec681f3Smrg 35027ec681f3Smrg nir_shader *vs_nir = get_blit_vs(); 35037ec681f3Smrg nir_shader *fs_nir = 35047ec681f3Smrg get_color_blit_fs(device, dst_format, src_format, 35057ec681f3Smrg dst_samples, src_samples, sampler_dim); 35067ec681f3Smrg 35077ec681f3Smrg const VkPipelineVertexInputStateCreateInfo vi_state = { 35087ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, 35097ec681f3Smrg .vertexBindingDescriptionCount = 0, 35107ec681f3Smrg .vertexAttributeDescriptionCount = 0, 35117ec681f3Smrg }; 35127ec681f3Smrg 35137ec681f3Smrg VkPipelineDepthStencilStateCreateInfo ds_state = { 35147ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, 35157ec681f3Smrg }; 35167ec681f3Smrg 35177ec681f3Smrg VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 }; 35187ec681f3Smrg blend_att_state[0] = (VkPipelineColorBlendAttachmentState) { 35197ec681f3Smrg .blendEnable = false, 35207ec681f3Smrg .colorWriteMask = cmask, 35217ec681f3Smrg }; 35227ec681f3Smrg 35237ec681f3Smrg const VkPipelineColorBlendStateCreateInfo cb_state = { 35247ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, 35257ec681f3Smrg .logicOpEnable = false, 35267ec681f3Smrg .attachmentCount = 1, 35277ec681f3Smrg .pAttachments = blend_att_state 35287ec681f3Smrg }; 35297ec681f3Smrg 35307ec681f3Smrg const VkPipelineMultisampleStateCreateInfo ms_state = { 35317ec681f3Smrg .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 35327ec681f3Smrg .rasterizationSamples = dst_samples, 35337ec681f3Smrg .sampleShadingEnable = dst_samples > VK_SAMPLE_COUNT_1_BIT, 35347ec681f3Smrg .pSampleMask = NULL, 35357ec681f3Smrg .alphaToCoverageEnable = false, 35367ec681f3Smrg .alphaToOneEnable = false, 35377ec681f3Smrg }; 35387ec681f3Smrg 35397ec681f3Smrg return create_pipeline(device, 35407ec681f3Smrg pass, 35417ec681f3Smrg vs_nir, NULL, fs_nir, 35427ec681f3Smrg &vi_state, 35437ec681f3Smrg &ds_state, 35447ec681f3Smrg &cb_state, 35457ec681f3Smrg &ms_state, 35467ec681f3Smrg pipeline_layout, 35477ec681f3Smrg pipeline); 35487ec681f3Smrg} 35497ec681f3Smrg 35507ec681f3Smrg/** 35517ec681f3Smrg * Return a pipeline suitable for blitting the requested aspect given the 35527ec681f3Smrg * destination and source formats. 35537ec681f3Smrg */ 35547ec681f3Smrgstatic bool 35557ec681f3Smrgget_blit_pipeline(struct v3dv_device *device, 35567ec681f3Smrg VkFormat dst_format, 35577ec681f3Smrg VkFormat src_format, 35587ec681f3Smrg VkColorComponentFlags cmask, 35597ec681f3Smrg VkImageType src_type, 35607ec681f3Smrg VkSampleCountFlagBits dst_samples, 35617ec681f3Smrg VkSampleCountFlagBits src_samples, 35627ec681f3Smrg struct v3dv_meta_blit_pipeline **pipeline) 35637ec681f3Smrg{ 35647ec681f3Smrg bool ok = true; 35657ec681f3Smrg 35667ec681f3Smrg uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE]; 35677ec681f3Smrg get_blit_pipeline_cache_key(dst_format, src_format, cmask, 35687ec681f3Smrg dst_samples, src_samples, key); 35697ec681f3Smrg mtx_lock(&device->meta.mtx); 35707ec681f3Smrg struct hash_entry *entry = 35717ec681f3Smrg _mesa_hash_table_search(device->meta.blit.cache[src_type], &key); 35727ec681f3Smrg if (entry) { 35737ec681f3Smrg mtx_unlock(&device->meta.mtx); 35747ec681f3Smrg *pipeline = entry->data; 35757ec681f3Smrg return true; 35767ec681f3Smrg } 35777ec681f3Smrg 35787ec681f3Smrg *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8, 35797ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 35807ec681f3Smrg 35817ec681f3Smrg if (*pipeline == NULL) 35827ec681f3Smrg goto fail; 35837ec681f3Smrg 35847ec681f3Smrg ok = create_blit_render_pass(device, dst_format, src_format, 35857ec681f3Smrg &(*pipeline)->pass, 35867ec681f3Smrg &(*pipeline)->pass_no_load); 35877ec681f3Smrg if (!ok) 35887ec681f3Smrg goto fail; 35897ec681f3Smrg 35907ec681f3Smrg /* Create the pipeline using one of the render passes, they are both 35917ec681f3Smrg * compatible, so we don't care which one we use here. 35927ec681f3Smrg */ 35937ec681f3Smrg ok = create_blit_pipeline(device, 35947ec681f3Smrg dst_format, 35957ec681f3Smrg src_format, 35967ec681f3Smrg cmask, 35977ec681f3Smrg src_type, 35987ec681f3Smrg dst_samples, 35997ec681f3Smrg src_samples, 36007ec681f3Smrg (*pipeline)->pass, 36017ec681f3Smrg device->meta.blit.p_layout, 36027ec681f3Smrg &(*pipeline)->pipeline); 36037ec681f3Smrg if (!ok) 36047ec681f3Smrg goto fail; 36057ec681f3Smrg 36067ec681f3Smrg memcpy((*pipeline)->key, key, sizeof((*pipeline)->key)); 36077ec681f3Smrg _mesa_hash_table_insert(device->meta.blit.cache[src_type], 36087ec681f3Smrg &(*pipeline)->key, *pipeline); 36097ec681f3Smrg 36107ec681f3Smrg mtx_unlock(&device->meta.mtx); 36117ec681f3Smrg return true; 36127ec681f3Smrg 36137ec681f3Smrgfail: 36147ec681f3Smrg mtx_unlock(&device->meta.mtx); 36157ec681f3Smrg 36167ec681f3Smrg VkDevice _device = v3dv_device_to_handle(device); 36177ec681f3Smrg if (*pipeline) { 36187ec681f3Smrg if ((*pipeline)->pass) 36197ec681f3Smrg v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc); 36207ec681f3Smrg if ((*pipeline)->pass_no_load) 36217ec681f3Smrg v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->vk.alloc); 36227ec681f3Smrg if ((*pipeline)->pipeline) 36237ec681f3Smrg v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc); 36247ec681f3Smrg vk_free(&device->vk.alloc, *pipeline); 36257ec681f3Smrg *pipeline = NULL; 36267ec681f3Smrg } 36277ec681f3Smrg 36287ec681f3Smrg return false; 36297ec681f3Smrg} 36307ec681f3Smrg 36317ec681f3Smrgstatic void 36327ec681f3Smrgcompute_blit_box(const VkOffset3D *offsets, 36337ec681f3Smrg uint32_t image_w, uint32_t image_h, 36347ec681f3Smrg uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h, 36357ec681f3Smrg bool *mirror_x, bool *mirror_y) 36367ec681f3Smrg{ 36377ec681f3Smrg if (offsets[1].x >= offsets[0].x) { 36387ec681f3Smrg *mirror_x = false; 36397ec681f3Smrg *x = MIN2(offsets[0].x, image_w - 1); 36407ec681f3Smrg *w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x); 36417ec681f3Smrg } else { 36427ec681f3Smrg *mirror_x = true; 36437ec681f3Smrg *x = MIN2(offsets[1].x, image_w - 1); 36447ec681f3Smrg *w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x); 36457ec681f3Smrg } 36467ec681f3Smrg if (offsets[1].y >= offsets[0].y) { 36477ec681f3Smrg *mirror_y = false; 36487ec681f3Smrg *y = MIN2(offsets[0].y, image_h - 1); 36497ec681f3Smrg *h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y); 36507ec681f3Smrg } else { 36517ec681f3Smrg *mirror_y = true; 36527ec681f3Smrg *y = MIN2(offsets[1].y, image_h - 1); 36537ec681f3Smrg *h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y); 36547ec681f3Smrg } 36557ec681f3Smrg} 36567ec681f3Smrg 36577ec681f3Smrgstatic void 36587ec681f3Smrgcompute_blit_3d_layers(const VkOffset3D *offsets, 36597ec681f3Smrg uint32_t *min_layer, uint32_t *max_layer, 36607ec681f3Smrg bool *mirror_z) 36617ec681f3Smrg{ 36627ec681f3Smrg if (offsets[1].z >= offsets[0].z) { 36637ec681f3Smrg *mirror_z = false; 36647ec681f3Smrg *min_layer = offsets[0].z; 36657ec681f3Smrg *max_layer = offsets[1].z; 36667ec681f3Smrg } else { 36677ec681f3Smrg *mirror_z = true; 36687ec681f3Smrg *min_layer = offsets[1].z; 36697ec681f3Smrg *max_layer = offsets[0].z; 36707ec681f3Smrg } 36717ec681f3Smrg} 36727ec681f3Smrg 36737ec681f3Smrgstatic VkResult 36747ec681f3Smrgcreate_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer) 36757ec681f3Smrg{ 36767ec681f3Smrg /* If this is not the first pool we create for this command buffer 36777ec681f3Smrg * size it based on the size of the currently exhausted pool. 36787ec681f3Smrg */ 36797ec681f3Smrg uint32_t descriptor_count = 64; 36807ec681f3Smrg if (cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE) { 36817ec681f3Smrg struct v3dv_descriptor_pool *exhausted_pool = 36827ec681f3Smrg v3dv_descriptor_pool_from_handle(cmd_buffer->meta.blit.dspool); 36837ec681f3Smrg descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024); 36847ec681f3Smrg } 36857ec681f3Smrg 36867ec681f3Smrg /* Create the descriptor pool */ 36877ec681f3Smrg cmd_buffer->meta.blit.dspool = VK_NULL_HANDLE; 36887ec681f3Smrg VkDescriptorPoolSize pool_size = { 36897ec681f3Smrg .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 36907ec681f3Smrg .descriptorCount = descriptor_count, 36917ec681f3Smrg }; 36927ec681f3Smrg VkDescriptorPoolCreateInfo info = { 36937ec681f3Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, 36947ec681f3Smrg .maxSets = descriptor_count, 36957ec681f3Smrg .poolSizeCount = 1, 36967ec681f3Smrg .pPoolSizes = &pool_size, 36977ec681f3Smrg .flags = 0, 36987ec681f3Smrg }; 36997ec681f3Smrg VkResult result = 37007ec681f3Smrg v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device), 37017ec681f3Smrg &info, 37027ec681f3Smrg &cmd_buffer->device->vk.alloc, 37037ec681f3Smrg &cmd_buffer->meta.blit.dspool); 37047ec681f3Smrg 37057ec681f3Smrg if (result == VK_SUCCESS) { 37067ec681f3Smrg assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE); 37077ec681f3Smrg const VkDescriptorPool _pool = cmd_buffer->meta.blit.dspool; 37087ec681f3Smrg 37097ec681f3Smrg v3dv_cmd_buffer_add_private_obj( 37107ec681f3Smrg cmd_buffer, (uintptr_t) _pool, 37117ec681f3Smrg (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool); 37127ec681f3Smrg 37137ec681f3Smrg struct v3dv_descriptor_pool *pool = 37147ec681f3Smrg v3dv_descriptor_pool_from_handle(_pool); 37157ec681f3Smrg pool->is_driver_internal = true; 37167ec681f3Smrg } 37177ec681f3Smrg 37187ec681f3Smrg return result; 37197ec681f3Smrg} 37207ec681f3Smrg 37217ec681f3Smrgstatic VkResult 37227ec681f3Smrgallocate_blit_source_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer, 37237ec681f3Smrg VkDescriptorSet *set) 37247ec681f3Smrg{ 37257ec681f3Smrg /* Make sure we have a descriptor pool */ 37267ec681f3Smrg VkResult result; 37277ec681f3Smrg if (cmd_buffer->meta.blit.dspool == VK_NULL_HANDLE) { 37287ec681f3Smrg result = create_blit_descriptor_pool(cmd_buffer); 37297ec681f3Smrg if (result != VK_SUCCESS) 37307ec681f3Smrg return result; 37317ec681f3Smrg } 37327ec681f3Smrg assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE); 37337ec681f3Smrg 37347ec681f3Smrg /* Allocate descriptor set */ 37357ec681f3Smrg struct v3dv_device *device = cmd_buffer->device; 37367ec681f3Smrg VkDevice _device = v3dv_device_to_handle(device); 37377ec681f3Smrg VkDescriptorSetAllocateInfo info = { 37387ec681f3Smrg .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, 37397ec681f3Smrg .descriptorPool = cmd_buffer->meta.blit.dspool, 37407ec681f3Smrg .descriptorSetCount = 1, 37417ec681f3Smrg .pSetLayouts = &device->meta.blit.ds_layout, 37427ec681f3Smrg }; 37437ec681f3Smrg result = v3dv_AllocateDescriptorSets(_device, &info, set); 37447ec681f3Smrg 37457ec681f3Smrg /* If we ran out of pool space, grow the pool and try again */ 37467ec681f3Smrg if (result == VK_ERROR_OUT_OF_POOL_MEMORY) { 37477ec681f3Smrg result = create_blit_descriptor_pool(cmd_buffer); 37487ec681f3Smrg if (result == VK_SUCCESS) { 37497ec681f3Smrg info.descriptorPool = cmd_buffer->meta.blit.dspool; 37507ec681f3Smrg result = v3dv_AllocateDescriptorSets(_device, &info, set); 37517ec681f3Smrg } 37527ec681f3Smrg } 37537ec681f3Smrg 37547ec681f3Smrg return result; 37557ec681f3Smrg} 37567ec681f3Smrg 37577ec681f3Smrg/** 37587ec681f3Smrg * Returns true if the implementation supports the requested operation (even if 37597ec681f3Smrg * it failed to process it, for example, due to an out-of-memory error). 37607ec681f3Smrg * 37617ec681f3Smrg * The caller can specify the channels on the destination to be written via the 37627ec681f3Smrg * cmask parameter (which can be 0 to default to all channels), as well as a 37637ec681f3Smrg * swizzle to apply to the source via the cswizzle parameter (which can be NULL 37647ec681f3Smrg * to use the default identity swizzle). 37657ec681f3Smrg */ 37667ec681f3Smrgstatic bool 37677ec681f3Smrgblit_shader(struct v3dv_cmd_buffer *cmd_buffer, 37687ec681f3Smrg struct v3dv_image *dst, 37697ec681f3Smrg VkFormat dst_format, 37707ec681f3Smrg struct v3dv_image *src, 37717ec681f3Smrg VkFormat src_format, 37727ec681f3Smrg VkColorComponentFlags cmask, 37737ec681f3Smrg VkComponentMapping *cswizzle, 37747ec681f3Smrg const VkImageBlit2KHR *_region, 37757ec681f3Smrg VkFilter filter, 37767ec681f3Smrg bool dst_is_padded_image) 37777ec681f3Smrg{ 37787ec681f3Smrg bool handled = true; 37797ec681f3Smrg VkResult result; 37807ec681f3Smrg uint32_t dirty_dynamic_state = 0; 37817ec681f3Smrg 37827ec681f3Smrg /* We don't support rendering to linear depth/stencil, this should have 37837ec681f3Smrg * been rewritten to a compatible color blit by the caller. 37847ec681f3Smrg */ 37857ec681f3Smrg assert(dst->vk.tiling != VK_IMAGE_TILING_LINEAR || 37867ec681f3Smrg !vk_format_is_depth_or_stencil(dst_format)); 37877ec681f3Smrg 37887ec681f3Smrg /* Can't sample from linear images */ 37897ec681f3Smrg if (src->vk.tiling == VK_IMAGE_TILING_LINEAR && src->vk.image_type != VK_IMAGE_TYPE_1D) 37907ec681f3Smrg return false; 37917ec681f3Smrg 37927ec681f3Smrg VkImageBlit2KHR region = *_region; 37937ec681f3Smrg /* Rewrite combined D/S blits to compatible color blits */ 37947ec681f3Smrg if (vk_format_is_depth_or_stencil(dst_format)) { 37957ec681f3Smrg assert(src_format == dst_format); 37967ec681f3Smrg assert(cmask == 0); 37977ec681f3Smrg switch(dst_format) { 37987ec681f3Smrg case VK_FORMAT_D16_UNORM: 37997ec681f3Smrg dst_format = VK_FORMAT_R16_UINT; 38007ec681f3Smrg break; 38017ec681f3Smrg case VK_FORMAT_D32_SFLOAT: 38027ec681f3Smrg dst_format = VK_FORMAT_R32_UINT; 38037ec681f3Smrg break; 38047ec681f3Smrg case VK_FORMAT_X8_D24_UNORM_PACK32: 38057ec681f3Smrg case VK_FORMAT_D24_UNORM_S8_UINT: 38067ec681f3Smrg if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { 38077ec681f3Smrg cmask |= VK_COLOR_COMPONENT_G_BIT | 38087ec681f3Smrg VK_COLOR_COMPONENT_B_BIT | 38097ec681f3Smrg VK_COLOR_COMPONENT_A_BIT; 38107ec681f3Smrg } 38117ec681f3Smrg if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { 38127ec681f3Smrg assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT); 38137ec681f3Smrg cmask |= VK_COLOR_COMPONENT_R_BIT; 38147ec681f3Smrg } 38157ec681f3Smrg dst_format = VK_FORMAT_R8G8B8A8_UINT; 38167ec681f3Smrg break; 38177ec681f3Smrg default: 38187ec681f3Smrg unreachable("Unsupported depth/stencil format"); 38197ec681f3Smrg }; 38207ec681f3Smrg src_format = dst_format; 38217ec681f3Smrg region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 38227ec681f3Smrg region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 38237ec681f3Smrg } 38247ec681f3Smrg 38257ec681f3Smrg const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT | 38267ec681f3Smrg VK_COLOR_COMPONENT_G_BIT | 38277ec681f3Smrg VK_COLOR_COMPONENT_B_BIT | 38287ec681f3Smrg VK_COLOR_COMPONENT_A_BIT; 38297ec681f3Smrg if (cmask == 0) 38307ec681f3Smrg cmask = full_cmask; 38317ec681f3Smrg 38327ec681f3Smrg VkComponentMapping ident_swizzle = { 38337ec681f3Smrg .r = VK_COMPONENT_SWIZZLE_IDENTITY, 38347ec681f3Smrg .g = VK_COMPONENT_SWIZZLE_IDENTITY, 38357ec681f3Smrg .b = VK_COMPONENT_SWIZZLE_IDENTITY, 38367ec681f3Smrg .a = VK_COMPONENT_SWIZZLE_IDENTITY, 38377ec681f3Smrg }; 38387ec681f3Smrg if (!cswizzle) 38397ec681f3Smrg cswizzle = &ident_swizzle; 38407ec681f3Smrg 38417ec681f3Smrg /* When we get here from a copy between compressed / uncompressed images 38427ec681f3Smrg * we choose to specify the destination blit region based on the size 38437ec681f3Smrg * semantics of the source image of the copy (see copy_image_blit), so we 38447ec681f3Smrg * need to apply those same semantics here when we compute the size of the 38457ec681f3Smrg * destination image level. 38467ec681f3Smrg */ 38477ec681f3Smrg const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format); 38487ec681f3Smrg const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format); 38497ec681f3Smrg const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format); 38507ec681f3Smrg const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format); 38517ec681f3Smrg const uint32_t dst_level_w = 38527ec681f3Smrg u_minify(DIV_ROUND_UP(dst->vk.extent.width * src_block_w, dst_block_w), 38537ec681f3Smrg region.dstSubresource.mipLevel); 38547ec681f3Smrg const uint32_t dst_level_h = 38557ec681f3Smrg u_minify(DIV_ROUND_UP(dst->vk.extent.height * src_block_h, dst_block_h), 38567ec681f3Smrg region.dstSubresource.mipLevel); 38577ec681f3Smrg 38587ec681f3Smrg const uint32_t src_level_w = 38597ec681f3Smrg u_minify(src->vk.extent.width, region.srcSubresource.mipLevel); 38607ec681f3Smrg const uint32_t src_level_h = 38617ec681f3Smrg u_minify(src->vk.extent.height, region.srcSubresource.mipLevel); 38627ec681f3Smrg const uint32_t src_level_d = 38637ec681f3Smrg u_minify(src->vk.extent.depth, region.srcSubresource.mipLevel); 38647ec681f3Smrg 38657ec681f3Smrg uint32_t dst_x, dst_y, dst_w, dst_h; 38667ec681f3Smrg bool dst_mirror_x, dst_mirror_y; 38677ec681f3Smrg compute_blit_box(region.dstOffsets, 38687ec681f3Smrg dst_level_w, dst_level_h, 38697ec681f3Smrg &dst_x, &dst_y, &dst_w, &dst_h, 38707ec681f3Smrg &dst_mirror_x, &dst_mirror_y); 38717ec681f3Smrg 38727ec681f3Smrg uint32_t src_x, src_y, src_w, src_h; 38737ec681f3Smrg bool src_mirror_x, src_mirror_y; 38747ec681f3Smrg compute_blit_box(region.srcOffsets, 38757ec681f3Smrg src_level_w, src_level_h, 38767ec681f3Smrg &src_x, &src_y, &src_w, &src_h, 38777ec681f3Smrg &src_mirror_x, &src_mirror_y); 38787ec681f3Smrg 38797ec681f3Smrg uint32_t min_dst_layer; 38807ec681f3Smrg uint32_t max_dst_layer; 38817ec681f3Smrg bool dst_mirror_z = false; 38827ec681f3Smrg if (dst->vk.image_type != VK_IMAGE_TYPE_3D) { 38837ec681f3Smrg min_dst_layer = region.dstSubresource.baseArrayLayer; 38847ec681f3Smrg max_dst_layer = min_dst_layer + region.dstSubresource.layerCount; 38857ec681f3Smrg } else { 38867ec681f3Smrg compute_blit_3d_layers(region.dstOffsets, 38877ec681f3Smrg &min_dst_layer, &max_dst_layer, 38887ec681f3Smrg &dst_mirror_z); 38897ec681f3Smrg } 38907ec681f3Smrg 38917ec681f3Smrg uint32_t min_src_layer; 38927ec681f3Smrg uint32_t max_src_layer; 38937ec681f3Smrg bool src_mirror_z = false; 38947ec681f3Smrg if (src->vk.image_type != VK_IMAGE_TYPE_3D) { 38957ec681f3Smrg min_src_layer = region.srcSubresource.baseArrayLayer; 38967ec681f3Smrg max_src_layer = min_src_layer + region.srcSubresource.layerCount; 38977ec681f3Smrg } else { 38987ec681f3Smrg compute_blit_3d_layers(region.srcOffsets, 38997ec681f3Smrg &min_src_layer, &max_src_layer, 39007ec681f3Smrg &src_mirror_z); 39017ec681f3Smrg } 39027ec681f3Smrg 39037ec681f3Smrg uint32_t layer_count = max_dst_layer - min_dst_layer; 39047ec681f3Smrg 39057ec681f3Smrg /* Translate source blit coordinates to normalized texture coordinates for 39067ec681f3Smrg * single sampled textures. For multisampled textures we require 39077ec681f3Smrg * unnormalized coordinates, since we can only do texelFetch on them. 39087ec681f3Smrg */ 39097ec681f3Smrg float coords[4] = { 39107ec681f3Smrg (float)src_x, 39117ec681f3Smrg (float)src_y, 39127ec681f3Smrg (float)(src_x + src_w), 39137ec681f3Smrg (float)(src_y + src_h), 39147ec681f3Smrg }; 39157ec681f3Smrg 39167ec681f3Smrg if (src->vk.samples == VK_SAMPLE_COUNT_1_BIT) { 39177ec681f3Smrg coords[0] /= (float)src_level_w; 39187ec681f3Smrg coords[1] /= (float)src_level_h; 39197ec681f3Smrg coords[2] /= (float)src_level_w; 39207ec681f3Smrg coords[3] /= (float)src_level_h; 39217ec681f3Smrg } 39227ec681f3Smrg 39237ec681f3Smrg /* Handle mirroring */ 39247ec681f3Smrg const bool mirror_x = dst_mirror_x != src_mirror_x; 39257ec681f3Smrg const bool mirror_y = dst_mirror_y != src_mirror_y; 39267ec681f3Smrg const bool mirror_z = dst_mirror_z != src_mirror_z; 39277ec681f3Smrg float tex_coords[5] = { 39287ec681f3Smrg !mirror_x ? coords[0] : coords[2], 39297ec681f3Smrg !mirror_y ? coords[1] : coords[3], 39307ec681f3Smrg !mirror_x ? coords[2] : coords[0], 39317ec681f3Smrg !mirror_y ? coords[3] : coords[1], 39327ec681f3Smrg /* Z coordinate for 3D blit sources, to be filled for each 39337ec681f3Smrg * destination layer 39347ec681f3Smrg */ 39357ec681f3Smrg 0.0f 39367ec681f3Smrg }; 39377ec681f3Smrg 39387ec681f3Smrg /* For blits from 3D images we also need to compute the slice coordinate to 39397ec681f3Smrg * sample from, which will change for each layer in the destination. 39407ec681f3Smrg * Compute the step we should increase for each iteration. 39417ec681f3Smrg */ 39427ec681f3Smrg const float src_z_step = 39437ec681f3Smrg (float)(max_src_layer - min_src_layer) / (float)layer_count; 39447ec681f3Smrg 39457ec681f3Smrg /* Get the blit pipeline */ 39467ec681f3Smrg struct v3dv_meta_blit_pipeline *pipeline = NULL; 39477ec681f3Smrg bool ok = get_blit_pipeline(cmd_buffer->device, 39487ec681f3Smrg dst_format, src_format, cmask, src->vk.image_type, 39497ec681f3Smrg dst->vk.samples, src->vk.samples, 39507ec681f3Smrg &pipeline); 39517ec681f3Smrg if (!ok) 39527ec681f3Smrg return handled; 39537ec681f3Smrg assert(pipeline && pipeline->pipeline && 39547ec681f3Smrg pipeline->pass && pipeline->pass_no_load); 39557ec681f3Smrg 39567ec681f3Smrg struct v3dv_device *device = cmd_buffer->device; 39577ec681f3Smrg assert(device->meta.blit.ds_layout); 39587ec681f3Smrg 39597ec681f3Smrg VkDevice _device = v3dv_device_to_handle(device); 39607ec681f3Smrg VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer); 39617ec681f3Smrg 39627ec681f3Smrg /* Create sampler for blit source image */ 39637ec681f3Smrg VkSamplerCreateInfo sampler_info = { 39647ec681f3Smrg .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, 39657ec681f3Smrg .magFilter = filter, 39667ec681f3Smrg .minFilter = filter, 39677ec681f3Smrg .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, 39687ec681f3Smrg .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, 39697ec681f3Smrg .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, 39707ec681f3Smrg .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, 39717ec681f3Smrg }; 39727ec681f3Smrg VkSampler sampler; 39737ec681f3Smrg result = v3dv_CreateSampler(_device, &sampler_info, &device->vk.alloc, 39747ec681f3Smrg &sampler); 39757ec681f3Smrg if (result != VK_SUCCESS) 39767ec681f3Smrg goto fail; 39777ec681f3Smrg 39787ec681f3Smrg v3dv_cmd_buffer_add_private_obj( 39797ec681f3Smrg cmd_buffer, (uintptr_t)sampler, 39807ec681f3Smrg (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler); 39817ec681f3Smrg 39827ec681f3Smrg /* Push command buffer state before starting meta operation */ 39837ec681f3Smrg v3dv_cmd_buffer_meta_state_push(cmd_buffer, true); 39847ec681f3Smrg 39857ec681f3Smrg /* Push state that is common for all layers */ 39867ec681f3Smrg v3dv_CmdBindPipeline(_cmd_buffer, 39877ec681f3Smrg VK_PIPELINE_BIND_POINT_GRAPHICS, 39887ec681f3Smrg pipeline->pipeline); 39897ec681f3Smrg 39907ec681f3Smrg const VkViewport viewport = { 39917ec681f3Smrg .x = dst_x, 39927ec681f3Smrg .y = dst_y, 39937ec681f3Smrg .width = dst_w, 39947ec681f3Smrg .height = dst_h, 39957ec681f3Smrg .minDepth = 0.0f, 39967ec681f3Smrg .maxDepth = 1.0f 39977ec681f3Smrg }; 39987ec681f3Smrg v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport); 39997ec681f3Smrg 40007ec681f3Smrg const VkRect2D scissor = { 40017ec681f3Smrg .offset = { dst_x, dst_y }, 40027ec681f3Smrg .extent = { dst_w, dst_h } 40037ec681f3Smrg }; 40047ec681f3Smrg v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor); 40057ec681f3Smrg 40067ec681f3Smrg bool can_skip_tlb_load = false; 40077ec681f3Smrg const VkRect2D render_area = { 40087ec681f3Smrg .offset = { dst_x, dst_y }, 40097ec681f3Smrg .extent = { dst_w, dst_h }, 40107ec681f3Smrg }; 40117ec681f3Smrg 40127ec681f3Smrg /* Record per-layer commands */ 40137ec681f3Smrg VkImageAspectFlags aspects = region.dstSubresource.aspectMask; 40147ec681f3Smrg for (uint32_t i = 0; i < layer_count; i++) { 40157ec681f3Smrg /* Setup framebuffer */ 40167ec681f3Smrg VkImageViewCreateInfo dst_image_view_info = { 40177ec681f3Smrg .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 40187ec681f3Smrg .image = v3dv_image_to_handle(dst), 40197ec681f3Smrg .viewType = v3dv_image_type_to_view_type(dst->vk.image_type), 40207ec681f3Smrg .format = dst_format, 40217ec681f3Smrg .subresourceRange = { 40227ec681f3Smrg .aspectMask = aspects, 40237ec681f3Smrg .baseMipLevel = region.dstSubresource.mipLevel, 40247ec681f3Smrg .levelCount = 1, 40257ec681f3Smrg .baseArrayLayer = min_dst_layer + i, 40267ec681f3Smrg .layerCount = 1 40277ec681f3Smrg }, 40287ec681f3Smrg }; 40297ec681f3Smrg VkImageView dst_image_view; 40307ec681f3Smrg result = v3dv_CreateImageView(_device, &dst_image_view_info, 40317ec681f3Smrg &device->vk.alloc, &dst_image_view); 40327ec681f3Smrg if (result != VK_SUCCESS) 40337ec681f3Smrg goto fail; 40347ec681f3Smrg 40357ec681f3Smrg v3dv_cmd_buffer_add_private_obj( 40367ec681f3Smrg cmd_buffer, (uintptr_t)dst_image_view, 40377ec681f3Smrg (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView); 40387ec681f3Smrg 40397ec681f3Smrg VkFramebufferCreateInfo fb_info = { 40407ec681f3Smrg .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, 40417ec681f3Smrg .renderPass = pipeline->pass, 40427ec681f3Smrg .attachmentCount = 1, 40437ec681f3Smrg .pAttachments = &dst_image_view, 40447ec681f3Smrg .width = dst_x + dst_w, 40457ec681f3Smrg .height = dst_y + dst_h, 40467ec681f3Smrg .layers = 1, 40477ec681f3Smrg }; 40487ec681f3Smrg 40497ec681f3Smrg VkFramebuffer fb; 40507ec681f3Smrg result = v3dv_CreateFramebuffer(_device, &fb_info, 40517ec681f3Smrg &cmd_buffer->device->vk.alloc, &fb); 40527ec681f3Smrg if (result != VK_SUCCESS) 40537ec681f3Smrg goto fail; 40547ec681f3Smrg 40557ec681f3Smrg struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb); 40567ec681f3Smrg framebuffer->has_edge_padding = fb_info.width == dst_level_w && 40577ec681f3Smrg fb_info.height == dst_level_h && 40587ec681f3Smrg dst_is_padded_image; 40597ec681f3Smrg 40607ec681f3Smrg v3dv_cmd_buffer_add_private_obj( 40617ec681f3Smrg cmd_buffer, (uintptr_t)fb, 40627ec681f3Smrg (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer); 40637ec681f3Smrg 40647ec681f3Smrg /* Setup descriptor set for blit source texture. We don't have to 40657ec681f3Smrg * register the descriptor as a private command buffer object since 40667ec681f3Smrg * all descriptors will be freed automatically with the descriptor 40677ec681f3Smrg * pool. 40687ec681f3Smrg */ 40697ec681f3Smrg VkDescriptorSet set; 40707ec681f3Smrg result = allocate_blit_source_descriptor_set(cmd_buffer, &set); 40717ec681f3Smrg if (result != VK_SUCCESS) 40727ec681f3Smrg goto fail; 40737ec681f3Smrg 40747ec681f3Smrg VkImageViewCreateInfo src_image_view_info = { 40757ec681f3Smrg .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 40767ec681f3Smrg .image = v3dv_image_to_handle(src), 40777ec681f3Smrg .viewType = v3dv_image_type_to_view_type(src->vk.image_type), 40787ec681f3Smrg .format = src_format, 40797ec681f3Smrg .components = *cswizzle, 40807ec681f3Smrg .subresourceRange = { 40817ec681f3Smrg .aspectMask = aspects, 40827ec681f3Smrg .baseMipLevel = region.srcSubresource.mipLevel, 40837ec681f3Smrg .levelCount = 1, 40847ec681f3Smrg .baseArrayLayer = 40857ec681f3Smrg src->vk.image_type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i, 40867ec681f3Smrg .layerCount = 1 40877ec681f3Smrg }, 40887ec681f3Smrg }; 40897ec681f3Smrg VkImageView src_image_view; 40907ec681f3Smrg result = v3dv_CreateImageView(_device, &src_image_view_info, 40917ec681f3Smrg &device->vk.alloc, &src_image_view); 40927ec681f3Smrg if (result != VK_SUCCESS) 40937ec681f3Smrg goto fail; 40947ec681f3Smrg 40957ec681f3Smrg v3dv_cmd_buffer_add_private_obj( 40967ec681f3Smrg cmd_buffer, (uintptr_t)src_image_view, 40977ec681f3Smrg (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView); 40987ec681f3Smrg 40997ec681f3Smrg VkDescriptorImageInfo image_info = { 41007ec681f3Smrg .sampler = sampler, 41017ec681f3Smrg .imageView = src_image_view, 41027ec681f3Smrg .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 41037ec681f3Smrg }; 41047ec681f3Smrg VkWriteDescriptorSet write = { 41057ec681f3Smrg .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 41067ec681f3Smrg .dstSet = set, 41077ec681f3Smrg .dstBinding = 0, 41087ec681f3Smrg .dstArrayElement = 0, 41097ec681f3Smrg .descriptorCount = 1, 41107ec681f3Smrg .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 41117ec681f3Smrg .pImageInfo = &image_info, 41127ec681f3Smrg }; 41137ec681f3Smrg v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL); 41147ec681f3Smrg 41157ec681f3Smrg v3dv_CmdBindDescriptorSets(_cmd_buffer, 41167ec681f3Smrg VK_PIPELINE_BIND_POINT_GRAPHICS, 41177ec681f3Smrg device->meta.blit.p_layout, 41187ec681f3Smrg 0, 1, &set, 41197ec681f3Smrg 0, NULL); 41207ec681f3Smrg 41217ec681f3Smrg /* If the region we are about to blit is tile-aligned, then we can 41227ec681f3Smrg * use the render pass version that won't pre-load the tile buffer 41237ec681f3Smrg * with the dst image contents before the blit. The exception is when we 41247ec681f3Smrg * don't have a full color mask, since in that case we need to preserve 41257ec681f3Smrg * the original value of some of the color components. 41267ec681f3Smrg * 41277ec681f3Smrg * Since all layers have the same area, we only need to compute this for 41287ec681f3Smrg * the first. 41297ec681f3Smrg */ 41307ec681f3Smrg if (i == 0) { 41317ec681f3Smrg struct v3dv_render_pass *pipeline_pass = 41327ec681f3Smrg v3dv_render_pass_from_handle(pipeline->pass); 41337ec681f3Smrg can_skip_tlb_load = 41347ec681f3Smrg cmask == full_cmask && 41357ec681f3Smrg v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area, 41367ec681f3Smrg framebuffer, pipeline_pass, 0); 41377ec681f3Smrg } 41387ec681f3Smrg 41397ec681f3Smrg /* Record blit */ 41407ec681f3Smrg VkRenderPassBeginInfo rp_info = { 41417ec681f3Smrg .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 41427ec681f3Smrg .renderPass = can_skip_tlb_load ? pipeline->pass_no_load : 41437ec681f3Smrg pipeline->pass, 41447ec681f3Smrg .framebuffer = fb, 41457ec681f3Smrg .renderArea = render_area, 41467ec681f3Smrg .clearValueCount = 0, 41477ec681f3Smrg }; 41487ec681f3Smrg 41497ec681f3Smrg v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE); 41507ec681f3Smrg struct v3dv_job *job = cmd_buffer->state.job; 41517ec681f3Smrg if (!job) 41527ec681f3Smrg goto fail; 41537ec681f3Smrg 41547ec681f3Smrg /* For 3D blits we need to compute the source slice to blit from (the Z 41557ec681f3Smrg * coordinate of the source sample operation). We want to choose this 41567ec681f3Smrg * based on the ratio of the depth of the source and the destination 41577ec681f3Smrg * images, picking the coordinate in the middle of each step. 41587ec681f3Smrg */ 41597ec681f3Smrg if (src->vk.image_type == VK_IMAGE_TYPE_3D) { 41607ec681f3Smrg tex_coords[4] = 41617ec681f3Smrg !mirror_z ? 41627ec681f3Smrg (min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d : 41637ec681f3Smrg (max_src_layer - (i + 0.5f) * src_z_step) / (float)src_level_d; 41647ec681f3Smrg } 41657ec681f3Smrg 41667ec681f3Smrg v3dv_CmdPushConstants(_cmd_buffer, 41677ec681f3Smrg device->meta.blit.p_layout, 41687ec681f3Smrg VK_SHADER_STAGE_VERTEX_BIT, 0, 20, 41697ec681f3Smrg &tex_coords); 41707ec681f3Smrg 41717ec681f3Smrg v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0); 41727ec681f3Smrg 41737ec681f3Smrg v3dv_CmdEndRenderPass(_cmd_buffer); 41747ec681f3Smrg dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR; 41757ec681f3Smrg } 41767ec681f3Smrg 41777ec681f3Smrgfail: 41787ec681f3Smrg v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true); 41797ec681f3Smrg 41807ec681f3Smrg return handled; 41817ec681f3Smrg} 41827ec681f3Smrg 41837ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 41847ec681f3Smrgv3dv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, 41857ec681f3Smrg const VkBlitImageInfo2KHR *pBlitImageInfo) 41867ec681f3Smrg{ 41877ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 41887ec681f3Smrg V3DV_FROM_HANDLE(v3dv_image, src, pBlitImageInfo->srcImage); 41897ec681f3Smrg V3DV_FROM_HANDLE(v3dv_image, dst, pBlitImageInfo->dstImage); 41907ec681f3Smrg 41917ec681f3Smrg /* This command can only happen outside a render pass */ 41927ec681f3Smrg assert(cmd_buffer->state.pass == NULL); 41937ec681f3Smrg assert(cmd_buffer->state.job == NULL); 41947ec681f3Smrg 41957ec681f3Smrg /* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */ 41967ec681f3Smrg assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT && 41977ec681f3Smrg src->vk.samples == VK_SAMPLE_COUNT_1_BIT); 41987ec681f3Smrg 41997ec681f3Smrg /* We don't export VK_FORMAT_FEATURE_BLIT_DST_BIT on compressed formats */ 42007ec681f3Smrg assert(!vk_format_is_compressed(dst->vk.format)); 42017ec681f3Smrg 42027ec681f3Smrg for (uint32_t i = 0; i < pBlitImageInfo->regionCount; i++) { 42037ec681f3Smrg if (blit_tfu(cmd_buffer, dst, src, &pBlitImageInfo->pRegions[i])) 42047ec681f3Smrg continue; 42057ec681f3Smrg if (blit_shader(cmd_buffer, 42067ec681f3Smrg dst, dst->vk.format, 42077ec681f3Smrg src, src->vk.format, 42087ec681f3Smrg 0, NULL, 42097ec681f3Smrg &pBlitImageInfo->pRegions[i], 42107ec681f3Smrg pBlitImageInfo->filter, true)) { 42117ec681f3Smrg continue; 42127ec681f3Smrg } 42137ec681f3Smrg unreachable("Unsupported blit operation"); 42147ec681f3Smrg } 42157ec681f3Smrg} 42167ec681f3Smrg 42177ec681f3Smrgstatic bool 42187ec681f3Smrgresolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, 42197ec681f3Smrg struct v3dv_image *dst, 42207ec681f3Smrg struct v3dv_image *src, 42217ec681f3Smrg const VkImageResolve2KHR *region) 42227ec681f3Smrg{ 42237ec681f3Smrg if (!v3dv_meta_can_use_tlb(src, ®ion->srcOffset, NULL) || 42247ec681f3Smrg !v3dv_meta_can_use_tlb(dst, ®ion->dstOffset, NULL)) { 42257ec681f3Smrg return false; 42267ec681f3Smrg } 42277ec681f3Smrg 42287ec681f3Smrg if (!v3dv_X(cmd_buffer->device, format_supports_tlb_resolve)(src->format)) 42297ec681f3Smrg return false; 42307ec681f3Smrg 42317ec681f3Smrg const VkFormat fb_format = src->vk.format; 42327ec681f3Smrg 42337ec681f3Smrg uint32_t num_layers; 42347ec681f3Smrg if (dst->vk.image_type != VK_IMAGE_TYPE_3D) 42357ec681f3Smrg num_layers = region->dstSubresource.layerCount; 42367ec681f3Smrg else 42377ec681f3Smrg num_layers = region->extent.depth; 42387ec681f3Smrg assert(num_layers > 0); 42397ec681f3Smrg 42407ec681f3Smrg struct v3dv_job *job = 42417ec681f3Smrg v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); 42427ec681f3Smrg if (!job) 42437ec681f3Smrg return true; 42447ec681f3Smrg 42457ec681f3Smrg const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format); 42467ec681f3Smrg const uint32_t block_h = vk_format_get_blockheight(dst->vk.format); 42477ec681f3Smrg const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w); 42487ec681f3Smrg const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h); 42497ec681f3Smrg 42507ec681f3Smrg uint32_t internal_type, internal_bpp; 42517ec681f3Smrg v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects) 42527ec681f3Smrg (fb_format, region->srcSubresource.aspectMask, 42537ec681f3Smrg &internal_type, &internal_bpp); 42547ec681f3Smrg 42557ec681f3Smrg v3dv_job_start_frame(job, width, height, num_layers, false, 42567ec681f3Smrg 1, internal_bpp, true); 42577ec681f3Smrg 42587ec681f3Smrg struct v3dv_meta_framebuffer framebuffer; 42597ec681f3Smrg v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format, 42607ec681f3Smrg internal_type, &job->frame_tiling); 42617ec681f3Smrg 42627ec681f3Smrg v3dv_X(job->device, job_emit_binning_flush)(job); 42637ec681f3Smrg v3dv_X(job->device, meta_emit_resolve_image_rcl)(job, dst, src, 42647ec681f3Smrg &framebuffer, region); 42657ec681f3Smrg 42667ec681f3Smrg v3dv_cmd_buffer_finish_job(cmd_buffer); 42677ec681f3Smrg return true; 42687ec681f3Smrg} 42697ec681f3Smrg 42707ec681f3Smrgstatic bool 42717ec681f3Smrgresolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer, 42727ec681f3Smrg struct v3dv_image *dst, 42737ec681f3Smrg struct v3dv_image *src, 42747ec681f3Smrg const VkImageResolve2KHR *region) 42757ec681f3Smrg{ 42767ec681f3Smrg const VkImageBlit2KHR blit_region = { 42777ec681f3Smrg .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR, 42787ec681f3Smrg .srcSubresource = region->srcSubresource, 42797ec681f3Smrg .srcOffsets = { 42807ec681f3Smrg region->srcOffset, 42817ec681f3Smrg { 42827ec681f3Smrg region->srcOffset.x + region->extent.width, 42837ec681f3Smrg region->srcOffset.y + region->extent.height, 42847ec681f3Smrg } 42857ec681f3Smrg }, 42867ec681f3Smrg .dstSubresource = region->dstSubresource, 42877ec681f3Smrg .dstOffsets = { 42887ec681f3Smrg region->dstOffset, 42897ec681f3Smrg { 42907ec681f3Smrg region->dstOffset.x + region->extent.width, 42917ec681f3Smrg region->dstOffset.y + region->extent.height, 42927ec681f3Smrg } 42937ec681f3Smrg }, 42947ec681f3Smrg }; 42957ec681f3Smrg return blit_shader(cmd_buffer, 42967ec681f3Smrg dst, dst->vk.format, 42977ec681f3Smrg src, src->vk.format, 42987ec681f3Smrg 0, NULL, 42997ec681f3Smrg &blit_region, VK_FILTER_NEAREST, true); 43007ec681f3Smrg} 43017ec681f3Smrg 43027ec681f3SmrgVKAPI_ATTR void VKAPI_CALL 43037ec681f3Smrgv3dv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer, 43047ec681f3Smrg const VkResolveImageInfo2KHR *info) 43057ec681f3Smrg 43067ec681f3Smrg{ 43077ec681f3Smrg V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 43087ec681f3Smrg V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage); 43097ec681f3Smrg V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage); 43107ec681f3Smrg 43117ec681f3Smrg /* This command can only happen outside a render pass */ 43127ec681f3Smrg assert(cmd_buffer->state.pass == NULL); 43137ec681f3Smrg assert(cmd_buffer->state.job == NULL); 43147ec681f3Smrg 43157ec681f3Smrg assert(src->vk.samples == VK_SAMPLE_COUNT_4_BIT); 43167ec681f3Smrg assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT); 43177ec681f3Smrg 43187ec681f3Smrg for (uint32_t i = 0; i < info->regionCount; i++) { 43197ec681f3Smrg if (resolve_image_tlb(cmd_buffer, dst, src, &info->pRegions[i])) 43207ec681f3Smrg continue; 43217ec681f3Smrg if (resolve_image_blit(cmd_buffer, dst, src, &info->pRegions[i])) 43227ec681f3Smrg continue; 43237ec681f3Smrg unreachable("Unsupported multismaple resolve operation"); 43247ec681f3Smrg } 43257ec681f3Smrg} 4326