101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2016 Red Hat. 301e04c3fSmrg * Copyright © 2016 Bas Nieuwenhuizen 401e04c3fSmrg * 501e04c3fSmrg * based in part on anv driver which is: 601e04c3fSmrg * Copyright © 2015 Intel Corporation 701e04c3fSmrg * 801e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 901e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 1001e04c3fSmrg * to deal in the Software without restriction, including without limitation 1101e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 1201e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 1301e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1401e04c3fSmrg * 1501e04c3fSmrg * The above copyright notice and this permission notice (including the next 1601e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1701e04c3fSmrg * Software. 1801e04c3fSmrg * 1901e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 2001e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 2101e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 2201e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2301e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2401e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2501e04c3fSmrg * IN THE SOFTWARE. 2601e04c3fSmrg */ 2701e04c3fSmrg 287ec681f3Smrg#include "ac_drm_fourcc.h" 297ec681f3Smrg#include "util/debug.h" 307ec681f3Smrg#include "util/u_atomic.h" 317ec681f3Smrg#include "vulkan/util/vk_format.h" 3201e04c3fSmrg#include "radv_debug.h" 3301e04c3fSmrg#include "radv_private.h" 3401e04c3fSmrg#include "radv_radeon_winsys.h" 3501e04c3fSmrg#include "sid.h" 367ec681f3Smrg#include "vk_format.h" 377ec681f3Smrg#include "vk_util.h" 387ec681f3Smrg 397ec681f3Smrg#include "gfx10_format_table.h" 407ec681f3Smrg 417ec681f3Smrgstatic const VkImageUsageFlagBits RADV_IMAGE_USAGE_WRITE_BITS = 427ec681f3Smrg VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | 437ec681f3Smrg VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT; 447ec681f3Smrg 4501e04c3fSmrgstatic unsigned 467ec681f3Smrgradv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, 477ec681f3Smrg VkFormat format) 4801e04c3fSmrg{ 497ec681f3Smrg if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) { 507ec681f3Smrg assert(pCreateInfo->samples <= 1); 517ec681f3Smrg return RADEON_SURF_MODE_LINEAR_ALIGNED; 527ec681f3Smrg } 537ec681f3Smrg 547ec681f3Smrg /* MSAA resources must be 2D tiled. */ 557ec681f3Smrg if (pCreateInfo->samples > 1) 567ec681f3Smrg return RADEON_SURF_MODE_2D; 577ec681f3Smrg 587ec681f3Smrg if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) && 597ec681f3Smrg device->physical_device->rad_info.chip_class <= GFX8) { 607ec681f3Smrg /* this causes hangs in some VK CTS tests on GFX9. */ 617ec681f3Smrg /* Textures with a very small height are recommended to be linear. */ 627ec681f3Smrg if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D || 637ec681f3Smrg /* Only very thin and long 2D textures should benefit from 647ec681f3Smrg * linear_aligned. */ 657ec681f3Smrg (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2)) 667ec681f3Smrg return RADEON_SURF_MODE_LINEAR_ALIGNED; 677ec681f3Smrg } 687ec681f3Smrg 697ec681f3Smrg return RADEON_SURF_MODE_2D; 707ec681f3Smrg} 7101e04c3fSmrg 727ec681f3Smrgstatic bool 737ec681f3Smrgradv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, 747ec681f3Smrg VkFormat format) 757ec681f3Smrg{ 767ec681f3Smrg /* TC-compat HTILE is only available for GFX8+. */ 777ec681f3Smrg if (device->physical_device->rad_info.chip_class < GFX8) 787ec681f3Smrg return false; 797ec681f3Smrg 807ec681f3Smrg if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT)) 817ec681f3Smrg return false; 827ec681f3Smrg 837ec681f3Smrg if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) 847ec681f3Smrg return false; 857ec681f3Smrg 867ec681f3Smrg /* Do not enable TC-compatible HTILE if the image isn't readable by a 877ec681f3Smrg * shader because no texture fetches will happen. 887ec681f3Smrg */ 897ec681f3Smrg if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | 907ec681f3Smrg VK_IMAGE_USAGE_TRANSFER_SRC_BIT))) 917ec681f3Smrg return false; 927ec681f3Smrg 937ec681f3Smrg if (device->physical_device->rad_info.chip_class < GFX9) { 947ec681f3Smrg /* TC-compat HTILE for MSAA depth/stencil images is broken 957ec681f3Smrg * on GFX8 because the tiling doesn't match. 967ec681f3Smrg */ 977ec681f3Smrg if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT) 987ec681f3Smrg return false; 997ec681f3Smrg 1007ec681f3Smrg /* GFX9+ supports compression for both 32-bit and 16-bit depth 1017ec681f3Smrg * surfaces, while GFX8 only supports 32-bit natively. Though, 1027ec681f3Smrg * the driver allows TC-compat HTILE for 16-bit depth surfaces 1037ec681f3Smrg * with no Z planes compression. 1047ec681f3Smrg */ 1057ec681f3Smrg if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT && 1067ec681f3Smrg format != VK_FORMAT_D16_UNORM) 1077ec681f3Smrg return false; 1087ec681f3Smrg } 1097ec681f3Smrg 1107ec681f3Smrg return true; 1117ec681f3Smrg} 1127ec681f3Smrg 1137ec681f3Smrgstatic bool 1147ec681f3Smrgradv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info) 1157ec681f3Smrg{ 1167ec681f3Smrg if (info->bo_metadata) { 1177ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9) 1187ec681f3Smrg return info->bo_metadata->u.gfx9.scanout; 1197ec681f3Smrg else 1207ec681f3Smrg return info->bo_metadata->u.legacy.scanout; 1217ec681f3Smrg } 1227ec681f3Smrg 1237ec681f3Smrg return info->scanout; 1247ec681f3Smrg} 1257ec681f3Smrg 1267ec681f3Smrgstatic bool 1277ec681f3Smrgradv_image_use_fast_clear_for_image_early(const struct radv_device *device, 1287ec681f3Smrg const struct radv_image *image) 1297ec681f3Smrg{ 1307ec681f3Smrg if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) 1317ec681f3Smrg return true; 1327ec681f3Smrg 1337ec681f3Smrg if (image->info.samples <= 1 && image->info.width * image->info.height <= 512 * 512) { 1347ec681f3Smrg /* Do not enable CMASK or DCC for small surfaces where the cost 1357ec681f3Smrg * of the eliminate pass can be higher than the benefit of fast 1367ec681f3Smrg * clear. RadeonSI does this, but the image threshold is 1377ec681f3Smrg * different. 1387ec681f3Smrg */ 1397ec681f3Smrg return false; 1407ec681f3Smrg } 1417ec681f3Smrg 1427ec681f3Smrg return !!(image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); 1437ec681f3Smrg} 1447ec681f3Smrg 1457ec681f3Smrgstatic bool 1467ec681f3Smrgradv_image_use_fast_clear_for_image(const struct radv_device *device, 1477ec681f3Smrg const struct radv_image *image) 1487ec681f3Smrg{ 1497ec681f3Smrg if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) 1507ec681f3Smrg return true; 1517ec681f3Smrg 1527ec681f3Smrg return radv_image_use_fast_clear_for_image_early(device, image) && 1537ec681f3Smrg (image->exclusive || 1547ec681f3Smrg /* Enable DCC for concurrent images if stores are 1557ec681f3Smrg * supported because that means we can keep DCC compressed on 1567ec681f3Smrg * all layouts/queues. 1577ec681f3Smrg */ 1587ec681f3Smrg radv_image_use_dcc_image_stores(device, image)); 1597ec681f3Smrg} 16001e04c3fSmrg 1617ec681f3Smrgbool 1627ec681f3Smrgradv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, 1637ec681f3Smrg VkFormat format, VkImageCreateFlags flags, bool *sign_reinterpret) 1647ec681f3Smrg{ 1657ec681f3Smrg bool blendable; 1667ec681f3Smrg 1677ec681f3Smrg if (!radv_is_colorbuffer_format_supported(pdev, format, &blendable)) 1687ec681f3Smrg return false; 1697ec681f3Smrg 1707ec681f3Smrg if (sign_reinterpret != NULL) 1717ec681f3Smrg *sign_reinterpret = false; 1727ec681f3Smrg 1737ec681f3Smrg if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) { 1747ec681f3Smrg const struct VkImageFormatListCreateInfo *format_list = 1757ec681f3Smrg (const struct VkImageFormatListCreateInfo *)vk_find_struct_const( 1767ec681f3Smrg pNext, IMAGE_FORMAT_LIST_CREATE_INFO); 1777ec681f3Smrg 1787ec681f3Smrg /* We have to ignore the existence of the list if viewFormatCount = 0 */ 1797ec681f3Smrg if (format_list && format_list->viewFormatCount) { 1807ec681f3Smrg /* compatibility is transitive, so we only need to check 1817ec681f3Smrg * one format with everything else. */ 1827ec681f3Smrg for (unsigned i = 0; i < format_list->viewFormatCount; ++i) { 1837ec681f3Smrg if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED) 1847ec681f3Smrg continue; 1857ec681f3Smrg 1867ec681f3Smrg if (!radv_dcc_formats_compatible(format, format_list->pViewFormats[i], 1877ec681f3Smrg sign_reinterpret)) 1887ec681f3Smrg return false; 1897ec681f3Smrg } 1907ec681f3Smrg } else { 1917ec681f3Smrg return false; 1927ec681f3Smrg } 1937ec681f3Smrg } 1947ec681f3Smrg 1957ec681f3Smrg return true; 1967ec681f3Smrg} 19701e04c3fSmrg 1987ec681f3Smrgstatic bool 1997ec681f3Smrgradv_format_is_atomic_allowed(struct radv_device *device, VkFormat format) 2007ec681f3Smrg{ 2017ec681f3Smrg if (format == VK_FORMAT_R32_SFLOAT && !device->image_float32_atomics) 2027ec681f3Smrg return false; 20301e04c3fSmrg 2047ec681f3Smrg return radv_is_atomic_format_supported(format); 20501e04c3fSmrg} 20601e04c3fSmrg 20701e04c3fSmrgstatic bool 2087ec681f3Smrgradv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format, 2097ec681f3Smrg VkImageCreateFlags flags) 2107ec681f3Smrg{ 2117ec681f3Smrg if (radv_format_is_atomic_allowed(device, format)) 2127ec681f3Smrg return true; 2137ec681f3Smrg 2147ec681f3Smrg if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) { 2157ec681f3Smrg const struct VkImageFormatListCreateInfo *format_list = 2167ec681f3Smrg (const struct VkImageFormatListCreateInfo *)vk_find_struct_const( 2177ec681f3Smrg pNext, IMAGE_FORMAT_LIST_CREATE_INFO); 2187ec681f3Smrg 2197ec681f3Smrg /* We have to ignore the existence of the list if viewFormatCount = 0 */ 2207ec681f3Smrg if (format_list && format_list->viewFormatCount) { 2217ec681f3Smrg for (unsigned i = 0; i < format_list->viewFormatCount; ++i) { 2227ec681f3Smrg if (radv_format_is_atomic_allowed(device, format_list->pViewFormats[i])) 2237ec681f3Smrg return true; 2247ec681f3Smrg } 2257ec681f3Smrg } 2267ec681f3Smrg } 2277ec681f3Smrg 2287ec681f3Smrg return false; 22901e04c3fSmrg} 23001e04c3fSmrg 23101e04c3fSmrgstatic bool 2327ec681f3Smrgradv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image, 2337ec681f3Smrg const VkImageCreateInfo *pCreateInfo, VkFormat format, 2347ec681f3Smrg bool *sign_reinterpret) 2357ec681f3Smrg{ 2367ec681f3Smrg /* DCC (Delta Color Compression) is only available for GFX8+. */ 2377ec681f3Smrg if (device->physical_device->rad_info.chip_class < GFX8) 2387ec681f3Smrg return false; 2397ec681f3Smrg 2407ec681f3Smrg if (device->instance->debug_flags & RADV_DEBUG_NO_DCC) 2417ec681f3Smrg return false; 2427ec681f3Smrg 2437ec681f3Smrg if (image->shareable && image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) 2447ec681f3Smrg return false; 2457ec681f3Smrg 2467ec681f3Smrg /* 2477ec681f3Smrg * TODO: Enable DCC for storage images on GFX9 and earlier. 2487ec681f3Smrg * 2497ec681f3Smrg * Also disable DCC with atomics because even when DCC stores are 2507ec681f3Smrg * supported atomics will always decompress. So if we are 2517ec681f3Smrg * decompressing a lot anyway we might as well not have DCC. 2527ec681f3Smrg */ 2537ec681f3Smrg if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) && 2547ec681f3Smrg (device->physical_device->rad_info.chip_class < GFX10 || 2557ec681f3Smrg radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags))) 2567ec681f3Smrg return false; 2577ec681f3Smrg 2587ec681f3Smrg /* Do not enable DCC for fragment shading rate attachments. */ 2597ec681f3Smrg if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) 2607ec681f3Smrg return false; 2617ec681f3Smrg 2627ec681f3Smrg if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) 2637ec681f3Smrg return false; 2647ec681f3Smrg 2657ec681f3Smrg if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1) 2667ec681f3Smrg return false; 2677ec681f3Smrg 2687ec681f3Smrg if (!radv_image_use_fast_clear_for_image_early(device, image) && 2697ec681f3Smrg image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) 2707ec681f3Smrg return false; 2717ec681f3Smrg 2727ec681f3Smrg /* Do not enable DCC for mipmapped arrays because performance is worse. */ 2737ec681f3Smrg if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1) 2747ec681f3Smrg return false; 2757ec681f3Smrg 2767ec681f3Smrg if (device->physical_device->rad_info.chip_class < GFX10) { 2777ec681f3Smrg /* TODO: Add support for DCC MSAA on GFX8-9. */ 2787ec681f3Smrg if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed) 2797ec681f3Smrg return false; 2807ec681f3Smrg 2817ec681f3Smrg /* TODO: Add support for DCC layers/mipmaps on GFX9. */ 2827ec681f3Smrg if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) && 2837ec681f3Smrg device->physical_device->rad_info.chip_class == GFX9) 2847ec681f3Smrg return false; 2857ec681f3Smrg } 2867ec681f3Smrg 2877ec681f3Smrg return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format, 2887ec681f3Smrg pCreateInfo->flags, sign_reinterpret); 28901e04c3fSmrg} 29001e04c3fSmrg 2917ec681f3Smrgstatic bool 2927ec681f3Smrgradv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image) 29301e04c3fSmrg{ 2947ec681f3Smrg if (!radv_image_has_dcc(image)) 2957ec681f3Smrg return false; 2967ec681f3Smrg 2977ec681f3Smrg if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) 2987ec681f3Smrg return true; 2997ec681f3Smrg 3007ec681f3Smrg if (!radv_image_use_fast_clear_for_image(device, image)) 3017ec681f3Smrg return false; 3027ec681f3Smrg 3037ec681f3Smrg /* TODO: Fix storage images with DCC without DCC image stores. 3047ec681f3Smrg * Disabling it for now. */ 3057ec681f3Smrg if ((image->usage & VK_IMAGE_USAGE_STORAGE_BIT) && !radv_image_use_dcc_image_stores(device, image)) 3067ec681f3Smrg return false; 3077ec681f3Smrg 3087ec681f3Smrg return true; 30901e04c3fSmrg} 31001e04c3fSmrg 3117ec681f3Smrg/* 3127ec681f3Smrg * Whether to enable image stores with DCC compression for this image. If 3137ec681f3Smrg * this function returns false the image subresource should be decompressed 3147ec681f3Smrg * before using it with image stores. 3157ec681f3Smrg * 3167ec681f3Smrg * Note that this can have mixed performance implications, see 3177ec681f3Smrg * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299 3187ec681f3Smrg * 3197ec681f3Smrg * This function assumes the image uses DCC compression. 3207ec681f3Smrg */ 3217ec681f3Smrgbool 3227ec681f3Smrgradv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image) 32301e04c3fSmrg{ 3247ec681f3Smrg return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.chip_class, 3257ec681f3Smrg &image->planes[0].surface); 32601e04c3fSmrg} 32701e04c3fSmrg 3287ec681f3Smrg/* 3297ec681f3Smrg * Whether to use a predicate to determine whether DCC is in a compressed 3307ec681f3Smrg * state. This can be used to avoid decompressing an image multiple times. 3317ec681f3Smrg */ 3327ec681f3Smrgbool 3337ec681f3Smrgradv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image) 3347ec681f3Smrg{ 3357ec681f3Smrg return radv_image_has_dcc(image) && !radv_image_use_dcc_image_stores(device, image); 3367ec681f3Smrg} 3377ec681f3Smrg 3387ec681f3Smrgstatic inline bool 3397ec681f3Smrgradv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image) 3407ec681f3Smrg{ 3417ec681f3Smrg return image->info.samples > 1 && ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) || 3427ec681f3Smrg (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)); 3437ec681f3Smrg} 3447ec681f3Smrg 3457ec681f3Smrgstatic inline bool 3467ec681f3Smrgradv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image) 3477ec681f3Smrg{ 3487ec681f3Smrg /* TODO: 3497ec681f3Smrg * - Investigate about mips+layers. 3507ec681f3Smrg * - Enable on other gens. 3517ec681f3Smrg */ 3527ec681f3Smrg bool use_htile_for_mips = 3537ec681f3Smrg image->info.array_size == 1 && device->physical_device->rad_info.chip_class >= GFX10; 3547ec681f3Smrg 3557ec681f3Smrg /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */ 3567ec681f3Smrg if (device->physical_device->rad_info.chip_class == GFX10 && 3577ec681f3Smrg image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->info.levels > 1) 3587ec681f3Smrg return false; 3597ec681f3Smrg 3607ec681f3Smrg /* Do not enable HTILE for very small images because it seems less performant but make sure it's 3617ec681f3Smrg * allowed with VRS attachments because we need HTILE. 3627ec681f3Smrg */ 3637ec681f3Smrg if (image->info.width * image->info.height < 8 * 8 && 3647ec681f3Smrg !(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) && 3657ec681f3Smrg !device->attachment_vrs_enabled) 3667ec681f3Smrg return false; 3677ec681f3Smrg 3687ec681f3Smrg if (device->instance->disable_htile_layers && image->info.array_size > 1) 3697ec681f3Smrg return false; 3707ec681f3Smrg 3717ec681f3Smrg return (image->info.levels == 1 || use_htile_for_mips) && !image->shareable; 3727ec681f3Smrg} 3737ec681f3Smrg 3747ec681f3Smrgstatic bool 3757ec681f3Smrgradv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image) 3767ec681f3Smrg{ 3777ec681f3Smrg /* TC-compat CMASK is only available for GFX8+. */ 3787ec681f3Smrg if (device->physical_device->rad_info.chip_class < GFX8) 3797ec681f3Smrg return false; 3807ec681f3Smrg 3817ec681f3Smrg if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK) 3827ec681f3Smrg return false; 3837ec681f3Smrg 3847ec681f3Smrg if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) 3857ec681f3Smrg return false; 3867ec681f3Smrg 3877ec681f3Smrg /* Do not enable TC-compatible if the image isn't readable by a shader 3887ec681f3Smrg * because no texture fetches will happen. 3897ec681f3Smrg */ 3907ec681f3Smrg if (!(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | 3917ec681f3Smrg VK_IMAGE_USAGE_TRANSFER_SRC_BIT))) 3927ec681f3Smrg return false; 3937ec681f3Smrg 3947ec681f3Smrg /* If the image doesn't have FMASK, it can't be fetchable. */ 3957ec681f3Smrg if (!radv_image_has_fmask(image)) 3967ec681f3Smrg return false; 3977ec681f3Smrg 3987ec681f3Smrg return true; 3997ec681f3Smrg} 4007ec681f3Smrg 4017ec681f3Smrgstatic uint32_t 4027ec681f3Smrgsi_get_bo_metadata_word1(const struct radv_device *device) 4037ec681f3Smrg{ 4047ec681f3Smrg return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id; 4057ec681f3Smrg} 4067ec681f3Smrg 4077ec681f3Smrgstatic bool 4087ec681f3Smrgradv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md) 4097ec681f3Smrg{ 4107ec681f3Smrg if (md->metadata[0] != 1 || md->metadata[1] != si_get_bo_metadata_word1(device)) 4117ec681f3Smrg return false; 4127ec681f3Smrg 4137ec681f3Smrg if (md->size_metadata < 40) 4147ec681f3Smrg return false; 4157ec681f3Smrg 4167ec681f3Smrg return true; 41701e04c3fSmrg} 41801e04c3fSmrg 41901e04c3fSmrgstatic void 4207ec681f3Smrgradv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface, 4217ec681f3Smrg const struct radeon_bo_metadata *md) 4227ec681f3Smrg{ 4237ec681f3Smrg surface->flags = RADEON_SURF_CLR(surface->flags, MODE); 4247ec681f3Smrg 4257ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9) { 4267ec681f3Smrg if (md->u.gfx9.swizzle_mode > 0) 4277ec681f3Smrg surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE); 4287ec681f3Smrg else 4297ec681f3Smrg surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE); 4307ec681f3Smrg 4317ec681f3Smrg surface->u.gfx9.swizzle_mode = md->u.gfx9.swizzle_mode; 4327ec681f3Smrg } else { 4337ec681f3Smrg surface->u.legacy.pipe_config = md->u.legacy.pipe_config; 4347ec681f3Smrg surface->u.legacy.bankw = md->u.legacy.bankw; 4357ec681f3Smrg surface->u.legacy.bankh = md->u.legacy.bankh; 4367ec681f3Smrg surface->u.legacy.tile_split = md->u.legacy.tile_split; 4377ec681f3Smrg surface->u.legacy.mtilea = md->u.legacy.mtilea; 4387ec681f3Smrg surface->u.legacy.num_banks = md->u.legacy.num_banks; 4397ec681f3Smrg 4407ec681f3Smrg if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED) 4417ec681f3Smrg surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE); 4427ec681f3Smrg else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED) 4437ec681f3Smrg surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE); 4447ec681f3Smrg else 4457ec681f3Smrg surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE); 4467ec681f3Smrg } 4477ec681f3Smrg} 4487ec681f3Smrg 4497ec681f3Smrgstatic VkResult 4507ec681f3Smrgradv_patch_image_dimensions(struct radv_device *device, struct radv_image *image, 4517ec681f3Smrg const struct radv_image_create_info *create_info, 4527ec681f3Smrg struct ac_surf_info *image_info) 4537ec681f3Smrg{ 4547ec681f3Smrg unsigned width = image->info.width; 4557ec681f3Smrg unsigned height = image->info.height; 4567ec681f3Smrg 4577ec681f3Smrg /* 4587ec681f3Smrg * minigbm sometimes allocates bigger images which is going to result in 4597ec681f3Smrg * weird strides and other properties. Lets be lenient where possible and 4607ec681f3Smrg * fail it on GFX10 (as we cannot cope there). 4617ec681f3Smrg * 4627ec681f3Smrg * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/ 4637ec681f3Smrg */ 4647ec681f3Smrg if (create_info->bo_metadata && 4657ec681f3Smrg radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) { 4667ec681f3Smrg const struct radeon_bo_metadata *md = create_info->bo_metadata; 4677ec681f3Smrg 4687ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX10) { 4697ec681f3Smrg width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1; 4707ec681f3Smrg height = G_00A008_HEIGHT(md->metadata[4]) + 1; 4717ec681f3Smrg } else { 4727ec681f3Smrg width = G_008F18_WIDTH(md->metadata[4]) + 1; 4737ec681f3Smrg height = G_008F18_HEIGHT(md->metadata[4]) + 1; 4747ec681f3Smrg } 4757ec681f3Smrg } 4767ec681f3Smrg 4777ec681f3Smrg if (image->info.width == width && image->info.height == height) 4787ec681f3Smrg return VK_SUCCESS; 4797ec681f3Smrg 4807ec681f3Smrg if (width < image->info.width || height < image->info.height) { 4817ec681f3Smrg fprintf(stderr, 4827ec681f3Smrg "The imported image has smaller dimensions than the internal\n" 4837ec681f3Smrg "dimensions. Using it is going to fail badly, so we reject\n" 4847ec681f3Smrg "this import.\n" 4857ec681f3Smrg "(internal dimensions: %d x %d, external dimensions: %d x %d)\n", 4867ec681f3Smrg image->info.width, image->info.height, width, height); 4877ec681f3Smrg return VK_ERROR_INVALID_EXTERNAL_HANDLE; 4887ec681f3Smrg } else if (device->physical_device->rad_info.chip_class >= GFX10) { 4897ec681f3Smrg fprintf(stderr, 4907ec681f3Smrg "Tried to import an image with inconsistent width on GFX10.\n" 4917ec681f3Smrg "As GFX10 has no separate stride fields we cannot cope with\n" 4927ec681f3Smrg "an inconsistency in width and will fail this import.\n" 4937ec681f3Smrg "(internal dimensions: %d x %d, external dimensions: %d x %d)\n", 4947ec681f3Smrg image->info.width, image->info.height, width, height); 4957ec681f3Smrg return VK_ERROR_INVALID_EXTERNAL_HANDLE; 4967ec681f3Smrg } else { 4977ec681f3Smrg fprintf(stderr, 4987ec681f3Smrg "Tried to import an image with inconsistent width on pre-GFX10.\n" 4997ec681f3Smrg "As GFX10 has no separate stride fields we cannot cope with\n" 5007ec681f3Smrg "an inconsistency and would fail on GFX10.\n" 5017ec681f3Smrg "(internal dimensions: %d x %d, external dimensions: %d x %d)\n", 5027ec681f3Smrg image->info.width, image->info.height, width, height); 5037ec681f3Smrg } 5047ec681f3Smrg image_info->width = width; 5057ec681f3Smrg image_info->height = height; 5067ec681f3Smrg 5077ec681f3Smrg return VK_SUCCESS; 5087ec681f3Smrg} 5097ec681f3Smrg 5107ec681f3Smrgstatic VkResult 5117ec681f3Smrgradv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image, 5127ec681f3Smrg const struct radv_image_create_info *create_info, 5137ec681f3Smrg struct ac_surf_info *image_info) 5147ec681f3Smrg{ 5157ec681f3Smrg VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info); 5167ec681f3Smrg if (result != VK_SUCCESS) 5177ec681f3Smrg return result; 5187ec681f3Smrg 5197ec681f3Smrg for (unsigned plane = 0; plane < image->plane_count; ++plane) { 5207ec681f3Smrg if (create_info->bo_metadata) { 5217ec681f3Smrg radv_patch_surface_from_metadata(device, &image->planes[plane].surface, 5227ec681f3Smrg create_info->bo_metadata); 5237ec681f3Smrg } 5247ec681f3Smrg 5257ec681f3Smrg if (radv_surface_has_scanout(device, create_info)) { 5267ec681f3Smrg image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT; 5277ec681f3Smrg if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC) 5287ec681f3Smrg image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC; 5297ec681f3Smrg 5307ec681f3Smrg image->info.surf_index = NULL; 5317ec681f3Smrg } 5327ec681f3Smrg } 5337ec681f3Smrg return VK_SUCCESS; 5347ec681f3Smrg} 5357ec681f3Smrg 5367ec681f3Smrgstatic uint64_t 5377ec681f3Smrgradv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id, 5387ec681f3Smrg const VkImageCreateInfo *pCreateInfo, VkFormat image_format) 5397ec681f3Smrg{ 5407ec681f3Smrg uint64_t flags; 5417ec681f3Smrg unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format); 5427ec681f3Smrg VkFormat format = vk_format_get_plane_format(image_format, plane_id); 5437ec681f3Smrg const struct util_format_description *desc = vk_format_description(format); 5447ec681f3Smrg bool is_depth, is_stencil; 5457ec681f3Smrg 5467ec681f3Smrg is_depth = util_format_has_depth(desc); 5477ec681f3Smrg is_stencil = util_format_has_stencil(desc); 5487ec681f3Smrg 5497ec681f3Smrg flags = RADEON_SURF_SET(array_mode, MODE); 5507ec681f3Smrg 5517ec681f3Smrg switch (pCreateInfo->imageType) { 5527ec681f3Smrg case VK_IMAGE_TYPE_1D: 5537ec681f3Smrg if (pCreateInfo->arrayLayers > 1) 5547ec681f3Smrg flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE); 5557ec681f3Smrg else 5567ec681f3Smrg flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE); 5577ec681f3Smrg break; 5587ec681f3Smrg case VK_IMAGE_TYPE_2D: 5597ec681f3Smrg if (pCreateInfo->arrayLayers > 1) 5607ec681f3Smrg flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE); 5617ec681f3Smrg else 5627ec681f3Smrg flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE); 5637ec681f3Smrg break; 5647ec681f3Smrg case VK_IMAGE_TYPE_3D: 5657ec681f3Smrg flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE); 5667ec681f3Smrg break; 5677ec681f3Smrg default: 5687ec681f3Smrg unreachable("unhandled image type"); 5697ec681f3Smrg } 5707ec681f3Smrg 5717ec681f3Smrg /* Required for clearing/initializing a specific layer on GFX8. */ 5727ec681f3Smrg flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS; 5737ec681f3Smrg 5747ec681f3Smrg if (is_depth) { 5757ec681f3Smrg flags |= RADEON_SURF_ZBUFFER; 5767ec681f3Smrg 5777ec681f3Smrg if (radv_use_htile_for_image(device, image) && 5787ec681f3Smrg !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) { 5797ec681f3Smrg if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format)) 5807ec681f3Smrg flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; 5817ec681f3Smrg } else { 5827ec681f3Smrg flags |= RADEON_SURF_NO_HTILE; 5837ec681f3Smrg } 5847ec681f3Smrg } 5857ec681f3Smrg 5867ec681f3Smrg if (is_stencil) 5877ec681f3Smrg flags |= RADEON_SURF_SBUFFER; 5887ec681f3Smrg 5897ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9 && 5907ec681f3Smrg pCreateInfo->imageType == VK_IMAGE_TYPE_3D && 5917ec681f3Smrg vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format)) 5927ec681f3Smrg flags |= RADEON_SURF_NO_RENDER_TARGET; 5937ec681f3Smrg 5947ec681f3Smrg if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format, 5957ec681f3Smrg &image->dcc_sign_reinterpret)) 5967ec681f3Smrg flags |= RADEON_SURF_DISABLE_DCC; 5977ec681f3Smrg 5987ec681f3Smrg if (!radv_use_fmask_for_image(device, image)) 5997ec681f3Smrg flags |= RADEON_SURF_NO_FMASK; 6007ec681f3Smrg 6017ec681f3Smrg if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) { 6027ec681f3Smrg flags |= 6037ec681f3Smrg RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC; 6047ec681f3Smrg } 6057ec681f3Smrg 6067ec681f3Smrg return flags; 6077ec681f3Smrg} 6087ec681f3Smrg 6097ec681f3Smrgstatic inline unsigned 6107ec681f3Smrgsi_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil) 6117ec681f3Smrg{ 6127ec681f3Smrg if (stencil) 6137ec681f3Smrg return plane->surface.u.legacy.zs.stencil_tiling_index[level]; 6147ec681f3Smrg else 6157ec681f3Smrg return plane->surface.u.legacy.tiling_index[level]; 6167ec681f3Smrg} 6177ec681f3Smrg 6187ec681f3Smrgstatic unsigned 6197ec681f3Smrgradv_map_swizzle(unsigned swizzle) 6207ec681f3Smrg{ 6217ec681f3Smrg switch (swizzle) { 6227ec681f3Smrg case PIPE_SWIZZLE_Y: 6237ec681f3Smrg return V_008F0C_SQ_SEL_Y; 6247ec681f3Smrg case PIPE_SWIZZLE_Z: 6257ec681f3Smrg return V_008F0C_SQ_SEL_Z; 6267ec681f3Smrg case PIPE_SWIZZLE_W: 6277ec681f3Smrg return V_008F0C_SQ_SEL_W; 6287ec681f3Smrg case PIPE_SWIZZLE_0: 6297ec681f3Smrg return V_008F0C_SQ_SEL_0; 6307ec681f3Smrg case PIPE_SWIZZLE_1: 6317ec681f3Smrg return V_008F0C_SQ_SEL_1; 6327ec681f3Smrg default: /* PIPE_SWIZZLE_X */ 6337ec681f3Smrg return V_008F0C_SQ_SEL_X; 6347ec681f3Smrg } 63501e04c3fSmrg} 63601e04c3fSmrg 63701e04c3fSmrgstatic void 6387ec681f3Smrgradv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping, 6397ec681f3Smrg enum pipe_swizzle swizzle[4]) 6407ec681f3Smrg{ 6417ec681f3Smrg if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) { 6427ec681f3Smrg /* 64-bit formats only support storage images and storage images 6437ec681f3Smrg * require identity component mappings. We use 32-bit 6447ec681f3Smrg * instructions to access 64-bit images, so we need a special 6457ec681f3Smrg * case here. 6467ec681f3Smrg * 6477ec681f3Smrg * The zw components are 1,0 so that they can be easily be used 6487ec681f3Smrg * by loads to create the w component, which has to be 0 for 6497ec681f3Smrg * NULL descriptors. 6507ec681f3Smrg */ 6517ec681f3Smrg swizzle[0] = PIPE_SWIZZLE_X; 6527ec681f3Smrg swizzle[1] = PIPE_SWIZZLE_Y; 6537ec681f3Smrg swizzle[2] = PIPE_SWIZZLE_1; 6547ec681f3Smrg swizzle[3] = PIPE_SWIZZLE_0; 6557ec681f3Smrg } else if (!mapping) { 6567ec681f3Smrg for (unsigned i = 0; i < 4; i++) 6577ec681f3Smrg swizzle[i] = desc->swizzle[i]; 6587ec681f3Smrg } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 6597ec681f3Smrg const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0, 6607ec681f3Smrg PIPE_SWIZZLE_1}; 6617ec681f3Smrg vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle); 6627ec681f3Smrg } else { 6637ec681f3Smrg vk_format_compose_swizzles(mapping, desc->swizzle, swizzle); 6647ec681f3Smrg } 66501e04c3fSmrg} 66601e04c3fSmrg 66701e04c3fSmrgstatic void 6687ec681f3Smrgradv_make_buffer_descriptor(struct radv_device *device, struct radv_buffer *buffer, 6697ec681f3Smrg VkFormat vk_format, unsigned offset, unsigned range, uint32_t *state) 6707ec681f3Smrg{ 6717ec681f3Smrg const struct util_format_description *desc; 6727ec681f3Smrg unsigned stride; 6737ec681f3Smrg uint64_t gpu_address = radv_buffer_get_va(buffer->bo); 6747ec681f3Smrg uint64_t va = gpu_address + buffer->offset; 6757ec681f3Smrg unsigned num_format, data_format; 6767ec681f3Smrg int first_non_void; 6777ec681f3Smrg enum pipe_swizzle swizzle[4]; 6787ec681f3Smrg desc = vk_format_description(vk_format); 6797ec681f3Smrg first_non_void = vk_format_get_first_non_void_channel(vk_format); 6807ec681f3Smrg stride = desc->block.bits / 8; 6817ec681f3Smrg 6827ec681f3Smrg radv_compose_swizzle(desc, NULL, swizzle); 6837ec681f3Smrg 6847ec681f3Smrg va += offset; 6857ec681f3Smrg state[0] = va; 6867ec681f3Smrg state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); 6877ec681f3Smrg 6887ec681f3Smrg if (device->physical_device->rad_info.chip_class != GFX8 && stride) { 6897ec681f3Smrg range /= stride; 6907ec681f3Smrg } 6917ec681f3Smrg 6927ec681f3Smrg state[2] = range; 6937ec681f3Smrg state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | 6947ec681f3Smrg S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | 6957ec681f3Smrg S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | 6967ec681f3Smrg S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3])); 6977ec681f3Smrg 6987ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX10) { 6997ec681f3Smrg const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)]; 7007ec681f3Smrg 7017ec681f3Smrg /* OOB_SELECT chooses the out-of-bounds check: 7027ec681f3Smrg * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE) 7037ec681f3Smrg * - 1: index >= NUM_RECORDS 7047ec681f3Smrg * - 2: NUM_RECORDS == 0 7057ec681f3Smrg * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS 7067ec681f3Smrg * else: swizzle_address >= NUM_RECORDS 7077ec681f3Smrg */ 7087ec681f3Smrg state[3] |= S_008F0C_FORMAT(fmt->img_format) | 7097ec681f3Smrg S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) | 7107ec681f3Smrg S_008F0C_RESOURCE_LEVEL(1); 7117ec681f3Smrg } else { 7127ec681f3Smrg num_format = radv_translate_buffer_numformat(desc, first_non_void); 7137ec681f3Smrg data_format = radv_translate_buffer_dataformat(desc, first_non_void); 7147ec681f3Smrg 7157ec681f3Smrg assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID); 7167ec681f3Smrg assert(num_format != ~0); 7177ec681f3Smrg 7187ec681f3Smrg state[3] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format); 7197ec681f3Smrg } 72001e04c3fSmrg} 72101e04c3fSmrg 72201e04c3fSmrgstatic void 7237ec681f3Smrgsi_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image, 7247ec681f3Smrg const struct legacy_surf_level *base_level_info, unsigned plane_id, 7257ec681f3Smrg unsigned base_level, unsigned first_level, unsigned block_width, 7267ec681f3Smrg bool is_stencil, bool is_storage_image, bool disable_compression, 7277ec681f3Smrg bool enable_write_compression, uint32_t *state) 72801e04c3fSmrg{ 7297ec681f3Smrg struct radv_image_plane *plane = &image->planes[plane_id]; 7307ec681f3Smrg uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0; 7317ec681f3Smrg uint64_t va = gpu_address; 7327ec681f3Smrg enum chip_class chip_class = device->physical_device->rad_info.chip_class; 7337ec681f3Smrg uint64_t meta_va = 0; 7347ec681f3Smrg if (chip_class >= GFX9) { 7357ec681f3Smrg if (is_stencil) 7367ec681f3Smrg va += plane->surface.u.gfx9.zs.stencil_offset; 7377ec681f3Smrg else 7387ec681f3Smrg va += plane->surface.u.gfx9.surf_offset; 7397ec681f3Smrg } else 7407ec681f3Smrg va += (uint64_t)base_level_info->offset_256B * 256; 7417ec681f3Smrg 7427ec681f3Smrg state[0] = va >> 8; 7437ec681f3Smrg if (chip_class >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D) 7447ec681f3Smrg state[0] |= plane->surface.tile_swizzle; 7457ec681f3Smrg state[1] &= C_008F14_BASE_ADDRESS_HI; 7467ec681f3Smrg state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); 7477ec681f3Smrg 7487ec681f3Smrg if (chip_class >= GFX8) { 7497ec681f3Smrg state[6] &= C_008F28_COMPRESSION_EN; 7507ec681f3Smrg state[7] = 0; 7517ec681f3Smrg if (!disable_compression && radv_dcc_enabled(image, first_level)) { 7527ec681f3Smrg meta_va = gpu_address + plane->surface.meta_offset; 7537ec681f3Smrg if (chip_class <= GFX8) 7547ec681f3Smrg meta_va += plane->surface.u.legacy.color.dcc_level[base_level].dcc_offset; 7557ec681f3Smrg 7567ec681f3Smrg unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8; 7577ec681f3Smrg dcc_tile_swizzle &= (1 << plane->surface.meta_alignment_log2) - 1; 7587ec681f3Smrg meta_va |= dcc_tile_swizzle; 7597ec681f3Smrg } else if (!disable_compression && radv_image_is_tc_compat_htile(image)) { 7607ec681f3Smrg meta_va = gpu_address + plane->surface.meta_offset; 7617ec681f3Smrg } 7627ec681f3Smrg 7637ec681f3Smrg if (meta_va) { 7647ec681f3Smrg state[6] |= S_008F28_COMPRESSION_EN(1); 7657ec681f3Smrg if (chip_class <= GFX9) 7667ec681f3Smrg state[7] = meta_va >> 8; 7677ec681f3Smrg } 7687ec681f3Smrg } 7697ec681f3Smrg 7707ec681f3Smrg if (chip_class >= GFX10) { 7717ec681f3Smrg state[3] &= C_00A00C_SW_MODE; 7727ec681f3Smrg 7737ec681f3Smrg if (is_stencil) { 7747ec681f3Smrg state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode); 7757ec681f3Smrg } else { 7767ec681f3Smrg state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.swizzle_mode); 7777ec681f3Smrg } 7787ec681f3Smrg 7797ec681f3Smrg state[6] &= C_00A018_META_DATA_ADDRESS_LO & C_00A018_META_PIPE_ALIGNED; 7807ec681f3Smrg 7817ec681f3Smrg if (meta_va) { 7827ec681f3Smrg struct gfx9_surf_meta_flags meta = { 7837ec681f3Smrg .rb_aligned = 1, 7847ec681f3Smrg .pipe_aligned = 1, 7857ec681f3Smrg }; 7867ec681f3Smrg 7877ec681f3Smrg if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER)) 7887ec681f3Smrg meta = plane->surface.u.gfx9.color.dcc; 7897ec681f3Smrg 7907ec681f3Smrg if (radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression) 7917ec681f3Smrg state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1); 7927ec681f3Smrg 7937ec681f3Smrg state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) | 7947ec681f3Smrg S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8); 7957ec681f3Smrg } 7967ec681f3Smrg 7977ec681f3Smrg state[7] = meta_va >> 16; 7987ec681f3Smrg } else if (chip_class == GFX9) { 7997ec681f3Smrg state[3] &= C_008F1C_SW_MODE; 8007ec681f3Smrg state[4] &= C_008F20_PITCH; 8017ec681f3Smrg 8027ec681f3Smrg if (is_stencil) { 8037ec681f3Smrg state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode); 8047ec681f3Smrg state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.zs.stencil_epitch); 8057ec681f3Smrg } else { 8067ec681f3Smrg state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.swizzle_mode); 8077ec681f3Smrg state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.epitch); 8087ec681f3Smrg } 8097ec681f3Smrg 8107ec681f3Smrg state[5] &= 8117ec681f3Smrg C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED; 8127ec681f3Smrg if (meta_va) { 8137ec681f3Smrg struct gfx9_surf_meta_flags meta = { 8147ec681f3Smrg .rb_aligned = 1, 8157ec681f3Smrg .pipe_aligned = 1, 8167ec681f3Smrg }; 8177ec681f3Smrg 8187ec681f3Smrg if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER)) 8197ec681f3Smrg meta = plane->surface.u.gfx9.color.dcc; 8207ec681f3Smrg 8217ec681f3Smrg state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) | 8227ec681f3Smrg S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) | 8237ec681f3Smrg S_008F24_META_RB_ALIGNED(meta.rb_aligned); 8247ec681f3Smrg } 8257ec681f3Smrg } else { 8267ec681f3Smrg /* GFX6-GFX8 */ 8277ec681f3Smrg unsigned pitch = base_level_info->nblk_x * block_width; 8287ec681f3Smrg unsigned index = si_tile_mode_index(plane, base_level, is_stencil); 8297ec681f3Smrg 8307ec681f3Smrg state[3] &= C_008F1C_TILING_INDEX; 8317ec681f3Smrg state[3] |= S_008F1C_TILING_INDEX(index); 8327ec681f3Smrg state[4] &= C_008F20_PITCH; 8337ec681f3Smrg state[4] |= S_008F20_PITCH(pitch - 1); 8347ec681f3Smrg } 8357ec681f3Smrg} 83601e04c3fSmrg 8377ec681f3Smrgstatic unsigned 8387ec681f3Smrgradv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers, 8397ec681f3Smrg unsigned nr_samples, bool is_storage_image, bool gfx9) 8407ec681f3Smrg{ 8417ec681f3Smrg if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) 8427ec681f3Smrg return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE; 8437ec681f3Smrg 8447ec681f3Smrg /* GFX9 allocates 1D textures as 2D. */ 8457ec681f3Smrg if (gfx9 && image_type == VK_IMAGE_TYPE_1D) 8467ec681f3Smrg image_type = VK_IMAGE_TYPE_2D; 8477ec681f3Smrg switch (image_type) { 8487ec681f3Smrg case VK_IMAGE_TYPE_1D: 8497ec681f3Smrg return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D; 8507ec681f3Smrg case VK_IMAGE_TYPE_2D: 8517ec681f3Smrg if (nr_samples > 1) 8527ec681f3Smrg return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA; 8537ec681f3Smrg else 8547ec681f3Smrg return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D; 8557ec681f3Smrg case VK_IMAGE_TYPE_3D: 8567ec681f3Smrg if (view_type == VK_IMAGE_VIEW_TYPE_3D) 8577ec681f3Smrg return V_008F1C_SQ_RSRC_IMG_3D; 8587ec681f3Smrg else 8597ec681f3Smrg return V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 8607ec681f3Smrg default: 8617ec681f3Smrg unreachable("illegal image type"); 8627ec681f3Smrg } 8637ec681f3Smrg} 86401e04c3fSmrg 8657ec681f3Smrgstatic unsigned 8667ec681f3Smrggfx9_border_color_swizzle(const struct util_format_description *desc) 8677ec681f3Smrg{ 8687ec681f3Smrg unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 8697ec681f3Smrg 8707ec681f3Smrg if (desc->swizzle[3] == PIPE_SWIZZLE_X) { 8717ec681f3Smrg /* For the pre-defined border color values (white, opaque 8727ec681f3Smrg * black, transparent black), the only thing that matters is 8737ec681f3Smrg * that the alpha channel winds up in the correct place 8747ec681f3Smrg * (because the RGB channels are all the same) so either of 8757ec681f3Smrg * these enumerations will work. 8767ec681f3Smrg */ 8777ec681f3Smrg if (desc->swizzle[2] == PIPE_SWIZZLE_Y) 8787ec681f3Smrg bc_swizzle = V_008F20_BC_SWIZZLE_WZYX; 8797ec681f3Smrg else 8807ec681f3Smrg bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ; 8817ec681f3Smrg } else if (desc->swizzle[0] == PIPE_SWIZZLE_X) { 8827ec681f3Smrg if (desc->swizzle[1] == PIPE_SWIZZLE_Y) 8837ec681f3Smrg bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 8847ec681f3Smrg else 8857ec681f3Smrg bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ; 8867ec681f3Smrg } else if (desc->swizzle[1] == PIPE_SWIZZLE_X) { 8877ec681f3Smrg bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ; 8887ec681f3Smrg } else if (desc->swizzle[2] == PIPE_SWIZZLE_X) { 8897ec681f3Smrg bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW; 8907ec681f3Smrg } 8917ec681f3Smrg 8927ec681f3Smrg return bc_swizzle; 89301e04c3fSmrg} 89401e04c3fSmrg 8957ec681f3Smrgbool 8967ec681f3Smrgvi_alpha_is_on_msb(struct radv_device *device, VkFormat format) 8977ec681f3Smrg{ 8987ec681f3Smrg const struct util_format_description *desc = vk_format_description(format); 8997ec681f3Smrg 9007ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1) 9017ec681f3Smrg return desc->swizzle[3] == PIPE_SWIZZLE_X; 9027ec681f3Smrg 9037ec681f3Smrg return radv_translate_colorswap(format, false) <= 1; 9047ec681f3Smrg} 9057ec681f3Smrg/** 9067ec681f3Smrg * Build the sampler view descriptor for a texture (GFX10). 9077ec681f3Smrg */ 90801e04c3fSmrgstatic void 9097ec681f3Smrggfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image, 9107ec681f3Smrg bool is_storage_image, VkImageViewType view_type, VkFormat vk_format, 9117ec681f3Smrg const VkComponentMapping *mapping, unsigned first_level, 9127ec681f3Smrg unsigned last_level, unsigned first_layer, unsigned last_layer, 9137ec681f3Smrg unsigned width, unsigned height, unsigned depth, uint32_t *state, 9147ec681f3Smrg uint32_t *fmask_state) 91501e04c3fSmrg{ 9167ec681f3Smrg const struct util_format_description *desc; 9177ec681f3Smrg enum pipe_swizzle swizzle[4]; 9187ec681f3Smrg unsigned img_format; 9197ec681f3Smrg unsigned type; 9207ec681f3Smrg 9217ec681f3Smrg desc = vk_format_description(vk_format); 9227ec681f3Smrg img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format; 9237ec681f3Smrg 9247ec681f3Smrg radv_compose_swizzle(desc, mapping, swizzle); 9257ec681f3Smrg 9267ec681f3Smrg type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples, 9277ec681f3Smrg is_storage_image, device->physical_device->rad_info.chip_class == GFX9); 9287ec681f3Smrg if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 9297ec681f3Smrg height = 1; 9307ec681f3Smrg depth = image->info.array_size; 9317ec681f3Smrg } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 9327ec681f3Smrg if (view_type != VK_IMAGE_VIEW_TYPE_3D) 9337ec681f3Smrg depth = image->info.array_size; 9347ec681f3Smrg } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 9357ec681f3Smrg depth = image->info.array_size / 6; 9367ec681f3Smrg 9377ec681f3Smrg state[0] = 0; 9387ec681f3Smrg state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1); 9397ec681f3Smrg state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | 9407ec681f3Smrg S_00A008_RESOURCE_LEVEL(1); 9417ec681f3Smrg state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | 9427ec681f3Smrg S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | 9437ec681f3Smrg S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | 9447ec681f3Smrg S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) | 9457ec681f3Smrg S_00A00C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) | 9467ec681f3Smrg S_00A00C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples) 9477ec681f3Smrg : last_level) | 9487ec681f3Smrg S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc)) | S_00A00C_TYPE(type); 9497ec681f3Smrg /* Depth is the the last accessible layer on gfx9+. The hw doesn't need 9507ec681f3Smrg * to know the total number of layers. 9517ec681f3Smrg */ 9527ec681f3Smrg state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) | 9537ec681f3Smrg S_00A010_BASE_ARRAY(first_layer); 9547ec681f3Smrg state[5] = S_00A014_ARRAY_PITCH(0) | 9557ec681f3Smrg S_00A014_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples) 9567ec681f3Smrg : image->info.levels - 1) | 9577ec681f3Smrg S_00A014_PERF_MOD(4); 9587ec681f3Smrg state[6] = 0; 9597ec681f3Smrg state[7] = 0; 9607ec681f3Smrg 9617ec681f3Smrg if (radv_dcc_enabled(image, first_level)) { 9627ec681f3Smrg state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | 9637ec681f3Smrg S_00A018_MAX_COMPRESSED_BLOCK_SIZE( 9647ec681f3Smrg image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size) | 9657ec681f3Smrg S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format)); 9667ec681f3Smrg } 9677ec681f3Smrg 9687ec681f3Smrg if (radv_image_get_iterate256(device, image)) { 9697ec681f3Smrg state[6] |= S_00A018_ITERATE_256(1); 9707ec681f3Smrg } 9717ec681f3Smrg 9727ec681f3Smrg /* Initialize the sampler view for FMASK. */ 9737ec681f3Smrg if (fmask_state) { 9747ec681f3Smrg if (radv_image_has_fmask(image)) { 9757ec681f3Smrg uint64_t gpu_address = radv_buffer_get_va(image->bo); 9767ec681f3Smrg uint32_t format; 9777ec681f3Smrg uint64_t va; 9787ec681f3Smrg 9797ec681f3Smrg assert(image->plane_count == 1); 9807ec681f3Smrg 9817ec681f3Smrg va = gpu_address + image->offset + image->planes[0].surface.fmask_offset; 9827ec681f3Smrg 9837ec681f3Smrg switch (image->info.samples) { 9847ec681f3Smrg case 2: 9857ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2; 9867ec681f3Smrg break; 9877ec681f3Smrg case 4: 9887ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4; 9897ec681f3Smrg break; 9907ec681f3Smrg case 8: 9917ec681f3Smrg format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8; 9927ec681f3Smrg break; 9937ec681f3Smrg default: 9947ec681f3Smrg unreachable("invalid nr_samples"); 9957ec681f3Smrg } 9967ec681f3Smrg 9977ec681f3Smrg fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle; 9987ec681f3Smrg fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) | 9997ec681f3Smrg S_00A004_WIDTH_LO(width - 1); 10007ec681f3Smrg fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | 10017ec681f3Smrg S_00A008_RESOURCE_LEVEL(1); 10027ec681f3Smrg fmask_state[3] = 10037ec681f3Smrg S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 10047ec681f3Smrg S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 10057ec681f3Smrg S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode) | 10067ec681f3Smrg S_00A00C_TYPE( 10077ec681f3Smrg radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false)); 10087ec681f3Smrg fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer); 10097ec681f3Smrg fmask_state[5] = 0; 10107ec681f3Smrg fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1); 10117ec681f3Smrg fmask_state[7] = 0; 10127ec681f3Smrg 10137ec681f3Smrg if (radv_image_is_tc_compat_cmask(image)) { 10147ec681f3Smrg va = gpu_address + image->offset + image->planes[0].surface.cmask_offset; 10157ec681f3Smrg 10167ec681f3Smrg fmask_state[6] |= S_00A018_COMPRESSION_EN(1); 10177ec681f3Smrg fmask_state[6] |= S_00A018_META_DATA_ADDRESS_LO(va >> 8); 10187ec681f3Smrg fmask_state[7] |= va >> 16; 10197ec681f3Smrg } 10207ec681f3Smrg } else 10217ec681f3Smrg memset(fmask_state, 0, 8 * 4); 10227ec681f3Smrg } 10237ec681f3Smrg} 102401e04c3fSmrg 10257ec681f3Smrg/** 10267ec681f3Smrg * Build the sampler view descriptor for a texture (SI-GFX9) 10277ec681f3Smrg */ 10287ec681f3Smrgstatic void 10297ec681f3Smrgsi_make_texture_descriptor(struct radv_device *device, struct radv_image *image, 10307ec681f3Smrg bool is_storage_image, VkImageViewType view_type, VkFormat vk_format, 10317ec681f3Smrg const VkComponentMapping *mapping, unsigned first_level, 10327ec681f3Smrg unsigned last_level, unsigned first_layer, unsigned last_layer, 10337ec681f3Smrg unsigned width, unsigned height, unsigned depth, uint32_t *state, 10347ec681f3Smrg uint32_t *fmask_state) 10357ec681f3Smrg{ 10367ec681f3Smrg const struct util_format_description *desc; 10377ec681f3Smrg enum pipe_swizzle swizzle[4]; 10387ec681f3Smrg int first_non_void; 10397ec681f3Smrg unsigned num_format, data_format, type; 10407ec681f3Smrg 10417ec681f3Smrg desc = vk_format_description(vk_format); 10427ec681f3Smrg 10437ec681f3Smrg radv_compose_swizzle(desc, mapping, swizzle); 10447ec681f3Smrg 10457ec681f3Smrg first_non_void = vk_format_get_first_non_void_channel(vk_format); 10467ec681f3Smrg 10477ec681f3Smrg num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void); 10487ec681f3Smrg if (num_format == ~0) { 10497ec681f3Smrg num_format = 0; 10507ec681f3Smrg } 10517ec681f3Smrg 10527ec681f3Smrg data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void); 10537ec681f3Smrg if (data_format == ~0) { 10547ec681f3Smrg data_format = 0; 10557ec681f3Smrg } 10567ec681f3Smrg 10577ec681f3Smrg /* S8 with either Z16 or Z32 HTILE need a special format. */ 10587ec681f3Smrg if (device->physical_device->rad_info.chip_class == GFX9 && vk_format == VK_FORMAT_S8_UINT && 10597ec681f3Smrg radv_image_is_tc_compat_htile(image)) { 10607ec681f3Smrg if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) 10617ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_S8_32; 10627ec681f3Smrg else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT) 10637ec681f3Smrg data_format = V_008F14_IMG_DATA_FORMAT_S8_16; 10647ec681f3Smrg } 10657ec681f3Smrg type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples, 10667ec681f3Smrg is_storage_image, device->physical_device->rad_info.chip_class == GFX9); 10677ec681f3Smrg if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 10687ec681f3Smrg height = 1; 10697ec681f3Smrg depth = image->info.array_size; 10707ec681f3Smrg } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 10717ec681f3Smrg if (view_type != VK_IMAGE_VIEW_TYPE_3D) 10727ec681f3Smrg depth = image->info.array_size; 10737ec681f3Smrg } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 10747ec681f3Smrg depth = image->info.array_size / 6; 10757ec681f3Smrg 10767ec681f3Smrg state[0] = 0; 10777ec681f3Smrg state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format)); 10787ec681f3Smrg state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4)); 10797ec681f3Smrg state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | 10807ec681f3Smrg S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | 10817ec681f3Smrg S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | 10827ec681f3Smrg S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) | 10837ec681f3Smrg S_008F1C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) | 10847ec681f3Smrg S_008F1C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples) 10857ec681f3Smrg : last_level) | 10867ec681f3Smrg S_008F1C_TYPE(type)); 10877ec681f3Smrg state[4] = 0; 10887ec681f3Smrg state[5] = S_008F24_BASE_ARRAY(first_layer); 10897ec681f3Smrg state[6] = 0; 10907ec681f3Smrg state[7] = 0; 10917ec681f3Smrg 10927ec681f3Smrg if (device->physical_device->rad_info.chip_class == GFX9) { 10937ec681f3Smrg unsigned bc_swizzle = gfx9_border_color_swizzle(desc); 10947ec681f3Smrg 10957ec681f3Smrg /* Depth is the last accessible layer on Gfx9. 10967ec681f3Smrg * The hw doesn't need to know the total number of layers. 10977ec681f3Smrg */ 10987ec681f3Smrg if (type == V_008F1C_SQ_RSRC_IMG_3D) 10997ec681f3Smrg state[4] |= S_008F20_DEPTH(depth - 1); 11007ec681f3Smrg else 11017ec681f3Smrg state[4] |= S_008F20_DEPTH(last_layer); 11027ec681f3Smrg 11037ec681f3Smrg state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle); 11047ec681f3Smrg state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples) 11057ec681f3Smrg : image->info.levels - 1); 11067ec681f3Smrg } else { 11077ec681f3Smrg state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1); 11087ec681f3Smrg state[4] |= S_008F20_DEPTH(depth - 1); 11097ec681f3Smrg state[5] |= S_008F24_LAST_ARRAY(last_layer); 11107ec681f3Smrg } 11117ec681f3Smrg if (!(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) && 11127ec681f3Smrg image->planes[0].surface.meta_offset) { 11137ec681f3Smrg state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format)); 11147ec681f3Smrg } else { 11157ec681f3Smrg /* The last dword is unused by hw. The shader uses it to clear 11167ec681f3Smrg * bits in the first dword of sampler state. 11177ec681f3Smrg */ 11187ec681f3Smrg if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) { 11197ec681f3Smrg if (first_level == last_level) 11207ec681f3Smrg state[7] = C_008F30_MAX_ANISO_RATIO; 11217ec681f3Smrg else 11227ec681f3Smrg state[7] = 0xffffffff; 11237ec681f3Smrg } 11247ec681f3Smrg } 11257ec681f3Smrg 11267ec681f3Smrg /* Initialize the sampler view for FMASK. */ 11277ec681f3Smrg if (fmask_state) { 11287ec681f3Smrg if (radv_image_has_fmask(image)) { 11297ec681f3Smrg uint32_t fmask_format; 11307ec681f3Smrg uint64_t gpu_address = radv_buffer_get_va(image->bo); 11317ec681f3Smrg uint64_t va; 11327ec681f3Smrg 11337ec681f3Smrg assert(image->plane_count == 1); 11347ec681f3Smrg 11357ec681f3Smrg va = gpu_address + image->offset + image->planes[0].surface.fmask_offset; 11367ec681f3Smrg 11377ec681f3Smrg if (device->physical_device->rad_info.chip_class == GFX9) { 11387ec681f3Smrg fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK; 11397ec681f3Smrg switch (image->info.samples) { 11407ec681f3Smrg case 2: 11417ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2; 11427ec681f3Smrg break; 11437ec681f3Smrg case 4: 11447ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4; 11457ec681f3Smrg break; 11467ec681f3Smrg case 8: 11477ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8; 11487ec681f3Smrg break; 11497ec681f3Smrg default: 11507ec681f3Smrg unreachable("invalid nr_samples"); 11517ec681f3Smrg } 11527ec681f3Smrg } else { 11537ec681f3Smrg switch (image->info.samples) { 11547ec681f3Smrg case 2: 11557ec681f3Smrg fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 11567ec681f3Smrg break; 11577ec681f3Smrg case 4: 11587ec681f3Smrg fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 11597ec681f3Smrg break; 11607ec681f3Smrg case 8: 11617ec681f3Smrg fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 11627ec681f3Smrg break; 11637ec681f3Smrg default: 11647ec681f3Smrg assert(0); 11657ec681f3Smrg fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 11667ec681f3Smrg } 11677ec681f3Smrg num_format = V_008F14_IMG_NUM_FORMAT_UINT; 11687ec681f3Smrg } 11697ec681f3Smrg 11707ec681f3Smrg fmask_state[0] = va >> 8; 11717ec681f3Smrg fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle; 11727ec681f3Smrg fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) | 11737ec681f3Smrg S_008F14_NUM_FORMAT(num_format); 11747ec681f3Smrg fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1); 11757ec681f3Smrg fmask_state[3] = 11767ec681f3Smrg S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 11777ec681f3Smrg S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 11787ec681f3Smrg S_008F1C_TYPE( 11797ec681f3Smrg radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false)); 11807ec681f3Smrg fmask_state[4] = 0; 11817ec681f3Smrg fmask_state[5] = S_008F24_BASE_ARRAY(first_layer); 11827ec681f3Smrg fmask_state[6] = 0; 11837ec681f3Smrg fmask_state[7] = 0; 11847ec681f3Smrg 11857ec681f3Smrg if (device->physical_device->rad_info.chip_class == GFX9) { 11867ec681f3Smrg fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode); 11877ec681f3Smrg fmask_state[4] |= S_008F20_DEPTH(last_layer) | 11887ec681f3Smrg S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch); 11897ec681f3Smrg fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | S_008F24_META_RB_ALIGNED(1); 11907ec681f3Smrg 11917ec681f3Smrg if (radv_image_is_tc_compat_cmask(image)) { 11927ec681f3Smrg va = gpu_address + image->offset + image->planes[0].surface.cmask_offset; 11937ec681f3Smrg 11947ec681f3Smrg fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40); 11957ec681f3Smrg fmask_state[6] |= S_008F28_COMPRESSION_EN(1); 11967ec681f3Smrg fmask_state[7] |= va >> 8; 11977ec681f3Smrg } 11987ec681f3Smrg } else { 11997ec681f3Smrg fmask_state[3] |= 12007ec681f3Smrg S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.color.fmask.tiling_index); 12017ec681f3Smrg fmask_state[4] |= 12027ec681f3Smrg S_008F20_DEPTH(depth - 1) | 12037ec681f3Smrg S_008F20_PITCH(image->planes[0].surface.u.legacy.color.fmask.pitch_in_pixels - 1); 12047ec681f3Smrg fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer); 12057ec681f3Smrg 12067ec681f3Smrg if (radv_image_is_tc_compat_cmask(image)) { 12077ec681f3Smrg va = gpu_address + image->offset + image->planes[0].surface.cmask_offset; 12087ec681f3Smrg 12097ec681f3Smrg fmask_state[6] |= S_008F28_COMPRESSION_EN(1); 12107ec681f3Smrg fmask_state[7] |= va >> 8; 12117ec681f3Smrg } 12127ec681f3Smrg } 12137ec681f3Smrg } else 12147ec681f3Smrg memset(fmask_state, 0, 8 * 4); 12157ec681f3Smrg } 121601e04c3fSmrg} 121701e04c3fSmrg 121801e04c3fSmrgstatic void 12197ec681f3Smrgradv_make_texture_descriptor(struct radv_device *device, struct radv_image *image, 12207ec681f3Smrg bool is_storage_image, VkImageViewType view_type, VkFormat vk_format, 12217ec681f3Smrg const VkComponentMapping *mapping, unsigned first_level, 12227ec681f3Smrg unsigned last_level, unsigned first_layer, unsigned last_layer, 12237ec681f3Smrg unsigned width, unsigned height, unsigned depth, uint32_t *state, 12247ec681f3Smrg uint32_t *fmask_state) 12257ec681f3Smrg{ 12267ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX10) { 12277ec681f3Smrg gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, 12287ec681f3Smrg first_level, last_level, first_layer, last_layer, width, height, 12297ec681f3Smrg depth, state, fmask_state); 12307ec681f3Smrg } else { 12317ec681f3Smrg si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, 12327ec681f3Smrg first_level, last_level, first_layer, last_layer, width, height, 12337ec681f3Smrg depth, state, fmask_state); 12347ec681f3Smrg } 123501e04c3fSmrg} 123601e04c3fSmrg 123701e04c3fSmrgstatic void 12387ec681f3Smrgradv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, 12397ec681f3Smrg struct radeon_bo_metadata *md) 124001e04c3fSmrg{ 12417ec681f3Smrg static const VkComponentMapping fixedmapping; 12427ec681f3Smrg uint32_t desc[8]; 12437ec681f3Smrg 12447ec681f3Smrg assert(image->plane_count == 1); 124501e04c3fSmrg 12467ec681f3Smrg radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->type, 12477ec681f3Smrg image->vk_format, &fixedmapping, 0, image->info.levels - 1, 0, 12487ec681f3Smrg image->info.array_size - 1, image->info.width, image->info.height, 12497ec681f3Smrg image->info.depth, desc, NULL); 1250ed98bd31Smaya 12517ec681f3Smrg si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 12527ec681f3Smrg 0, image->planes[0].surface.blk_w, false, false, false, false, 12537ec681f3Smrg desc); 1254ed98bd31Smaya 12557ec681f3Smrg ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface, 12567ec681f3Smrg image->info.levels, desc, &md->size_metadata, md->metadata); 125701e04c3fSmrg} 125801e04c3fSmrg 12597ec681f3Smrgvoid 12607ec681f3Smrgradv_init_metadata(struct radv_device *device, struct radv_image *image, 12617ec681f3Smrg struct radeon_bo_metadata *metadata) 126201e04c3fSmrg{ 12637ec681f3Smrg struct radeon_surf *surface = &image->planes[0].surface; 12647ec681f3Smrg 12657ec681f3Smrg memset(metadata, 0, sizeof(*metadata)); 12667ec681f3Smrg 12677ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9) { 12687ec681f3Smrg uint64_t dcc_offset = 12697ec681f3Smrg image->offset + 12707ec681f3Smrg (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset); 12717ec681f3Smrg metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode; 12727ec681f3Smrg metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8; 12737ec681f3Smrg metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max; 12747ec681f3Smrg metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks; 12757ec681f3Smrg metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks; 12767ec681f3Smrg metadata->u.gfx9.dcc_max_compressed_block_size = 12777ec681f3Smrg surface->u.gfx9.color.dcc.max_compressed_block_size; 12787ec681f3Smrg metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; 12797ec681f3Smrg } else { 12807ec681f3Smrg metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D 12817ec681f3Smrg ? RADEON_LAYOUT_TILED 12827ec681f3Smrg : RADEON_LAYOUT_LINEAR; 12837ec681f3Smrg metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D 12847ec681f3Smrg ? RADEON_LAYOUT_TILED 12857ec681f3Smrg : RADEON_LAYOUT_LINEAR; 12867ec681f3Smrg metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config; 12877ec681f3Smrg metadata->u.legacy.bankw = surface->u.legacy.bankw; 12887ec681f3Smrg metadata->u.legacy.bankh = surface->u.legacy.bankh; 12897ec681f3Smrg metadata->u.legacy.tile_split = surface->u.legacy.tile_split; 12907ec681f3Smrg metadata->u.legacy.mtilea = surface->u.legacy.mtilea; 12917ec681f3Smrg metadata->u.legacy.num_banks = surface->u.legacy.num_banks; 12927ec681f3Smrg metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe; 12937ec681f3Smrg metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; 12947ec681f3Smrg } 12957ec681f3Smrg radv_query_opaque_metadata(device, image, metadata); 12967ec681f3Smrg} 1297ed98bd31Smaya 12987ec681f3Smrgvoid 12997ec681f3Smrgradv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, 13007ec681f3Smrg uint64_t offset, uint32_t stride) 13017ec681f3Smrg{ 13027ec681f3Smrg ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface, 13037ec681f3Smrg image->info.levels, offset, stride); 130401e04c3fSmrg} 130501e04c3fSmrg 130601e04c3fSmrgstatic void 13077ec681f3Smrgradv_image_alloc_single_sample_cmask(const struct radv_device *device, 13087ec681f3Smrg const struct radv_image *image, struct radeon_surf *surf) 130901e04c3fSmrg{ 13107ec681f3Smrg if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->info.levels > 1 || 13117ec681f3Smrg image->info.depth > 1 || radv_image_has_dcc(image) || 13127ec681f3Smrg !radv_image_use_fast_clear_for_image(device, image) || 13137ec681f3Smrg (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)) 13147ec681f3Smrg return; 13157ec681f3Smrg 13167ec681f3Smrg assert(image->info.storage_samples == 1); 131701e04c3fSmrg 13187ec681f3Smrg surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2); 13197ec681f3Smrg surf->total_size = surf->cmask_offset + surf->cmask_size; 13207ec681f3Smrg surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2); 132101e04c3fSmrg} 132201e04c3fSmrg 13237ec681f3Smrgstatic void 13247ec681f3Smrgradv_image_alloc_values(const struct radv_device *device, struct radv_image *image) 132501e04c3fSmrg{ 13267ec681f3Smrg /* images with modifiers can be potentially imported */ 13277ec681f3Smrg if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) 13287ec681f3Smrg return; 13297ec681f3Smrg 13307ec681f3Smrg if (radv_image_has_cmask(image) || (radv_image_has_dcc(image) && !image->support_comp_to_single)) { 13317ec681f3Smrg image->fce_pred_offset = image->size; 13327ec681f3Smrg image->size += 8 * image->info.levels; 13337ec681f3Smrg } 13347ec681f3Smrg 13357ec681f3Smrg if (radv_image_use_dcc_predication(device, image)) { 13367ec681f3Smrg image->dcc_pred_offset = image->size; 13377ec681f3Smrg image->size += 8 * image->info.levels; 13387ec681f3Smrg } 13397ec681f3Smrg 13407ec681f3Smrg if ((radv_image_has_dcc(image) && !image->support_comp_to_single) || 13417ec681f3Smrg radv_image_has_cmask(image) || radv_image_has_htile(image)) { 13427ec681f3Smrg image->clear_value_offset = image->size; 13437ec681f3Smrg image->size += 8 * image->info.levels; 13447ec681f3Smrg } 13457ec681f3Smrg 13467ec681f3Smrg if (radv_image_is_tc_compat_htile(image) && 13477ec681f3Smrg device->physical_device->rad_info.has_tc_compat_zrange_bug) { 13487ec681f3Smrg /* Metadata for the TC-compatible HTILE hardware bug which 13497ec681f3Smrg * have to be fixed by updating ZRANGE_PRECISION when doing 13507ec681f3Smrg * fast depth clears to 0.0f. 13517ec681f3Smrg */ 13527ec681f3Smrg image->tc_compat_zrange_offset = image->size; 13537ec681f3Smrg image->size += image->info.levels * 4; 13547ec681f3Smrg } 13557ec681f3Smrg} 135601e04c3fSmrg 13577ec681f3Smrg/* Determine if the image is affected by the pipe misaligned metadata issue 13587ec681f3Smrg * which requires to invalidate L2. 13597ec681f3Smrg */ 13607ec681f3Smrgstatic bool 13617ec681f3Smrgradv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image) 13627ec681f3Smrg{ 13637ec681f3Smrg struct radeon_info *rad_info = &device->physical_device->rad_info; 13647ec681f3Smrg int log2_samples = util_logbase2(image->info.samples); 13657ec681f3Smrg 13667ec681f3Smrg assert(rad_info->chip_class >= GFX10); 13677ec681f3Smrg 13687ec681f3Smrg for (unsigned i = 0; i < image->plane_count; ++i) { 13697ec681f3Smrg VkFormat fmt = vk_format_get_plane_format(image->vk_format, i); 13707ec681f3Smrg int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt)); 13717ec681f3Smrg int log2_bpp_and_samples; 13727ec681f3Smrg 13737ec681f3Smrg if (rad_info->chip_class >= GFX10_3) { 13747ec681f3Smrg log2_bpp_and_samples = log2_bpp + log2_samples; 13757ec681f3Smrg } else { 13767ec681f3Smrg if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) { 13777ec681f3Smrg log2_bpp = 2; 13787ec681f3Smrg } 13797ec681f3Smrg 13807ec681f3Smrg log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples); 13817ec681f3Smrg } 13827ec681f3Smrg 13837ec681f3Smrg int num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config); 13847ec681f3Smrg int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8); 13857ec681f3Smrg 13867ec681f3Smrg if (vk_format_has_depth(image->vk_format)) { 13877ec681f3Smrg if (radv_image_is_tc_compat_htile(image) && overlap) { 13887ec681f3Smrg return true; 13897ec681f3Smrg } 13907ec681f3Smrg } else { 13917ec681f3Smrg int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config); 13927ec681f3Smrg int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags); 13937ec681f3Smrg int samples_overlap = MIN2(log2_samples, overlap); 13947ec681f3Smrg 13957ec681f3Smrg /* TODO: It shouldn't be necessary if the image has DCC but 13967ec681f3Smrg * not readable by shader. 13977ec681f3Smrg */ 13987ec681f3Smrg if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) && 13997ec681f3Smrg (samples_overlap > log2_samples_frag_diff)) { 14007ec681f3Smrg return true; 14017ec681f3Smrg } 14027ec681f3Smrg } 14037ec681f3Smrg } 14047ec681f3Smrg 14057ec681f3Smrg return false; 140601e04c3fSmrg} 140701e04c3fSmrg 14087ec681f3Smrgstatic bool 14097ec681f3Smrgradv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image) 141001e04c3fSmrg{ 14117ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX10) { 14127ec681f3Smrg return !device->physical_device->rad_info.tcc_rb_non_coherent && 14137ec681f3Smrg !radv_image_is_pipe_misaligned(device, image); 14147ec681f3Smrg } else if (device->physical_device->rad_info.chip_class == GFX9) { 14157ec681f3Smrg if (image->info.samples == 1 && 14167ec681f3Smrg (image->usage & 14177ec681f3Smrg (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) && 14187ec681f3Smrg !vk_format_has_stencil(image->vk_format)) { 14197ec681f3Smrg /* Single-sample color and single-sample depth 14207ec681f3Smrg * (not stencil) are coherent with shaders on 14217ec681f3Smrg * GFX9. 14227ec681f3Smrg */ 14237ec681f3Smrg return true; 14247ec681f3Smrg } 14257ec681f3Smrg } 14267ec681f3Smrg 14277ec681f3Smrg return false; 142801e04c3fSmrg} 142901e04c3fSmrg 14307ec681f3Smrg/** 14317ec681f3Smrg * Determine if the given image can be fast cleared. 14327ec681f3Smrg */ 14337ec681f3Smrgstatic bool 14347ec681f3Smrgradv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image) 143501e04c3fSmrg{ 14367ec681f3Smrg if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS) 14377ec681f3Smrg return false; 14387ec681f3Smrg 14397ec681f3Smrg if (vk_format_is_color(image->vk_format)) { 14407ec681f3Smrg if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image)) 14417ec681f3Smrg return false; 14427ec681f3Smrg 14437ec681f3Smrg /* RB+ doesn't work with CMASK fast clear on Stoney. */ 14447ec681f3Smrg if (!radv_image_has_dcc(image) && device->physical_device->rad_info.family == CHIP_STONEY) 14457ec681f3Smrg return false; 14467ec681f3Smrg } else { 14477ec681f3Smrg if (!radv_image_has_htile(image)) 14487ec681f3Smrg return false; 14497ec681f3Smrg } 14507ec681f3Smrg 14517ec681f3Smrg /* Do not fast clears 3D images. */ 14527ec681f3Smrg if (image->type == VK_IMAGE_TYPE_3D) 14537ec681f3Smrg return false; 14547ec681f3Smrg 14557ec681f3Smrg return true; 14567ec681f3Smrg} 145701e04c3fSmrg 14587ec681f3Smrg/** 14597ec681f3Smrg * Determine if the given image can be fast cleared using comp-to-single. 14607ec681f3Smrg */ 14617ec681f3Smrgstatic bool 14627ec681f3Smrgradv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image) 14637ec681f3Smrg{ 14647ec681f3Smrg /* comp-to-single is only available for GFX10+. */ 14657ec681f3Smrg if (device->physical_device->rad_info.chip_class < GFX10) 14667ec681f3Smrg return false; 14677ec681f3Smrg 14687ec681f3Smrg /* If the image can't be fast cleared, comp-to-single can't be used. */ 14697ec681f3Smrg if (!radv_image_can_fast_clear(device, image)) 14707ec681f3Smrg return false; 14717ec681f3Smrg 14727ec681f3Smrg /* If the image doesn't have DCC, it can't be fast cleared using comp-to-single */ 14737ec681f3Smrg if (!radv_image_has_dcc(image)) 14747ec681f3Smrg return false; 14757ec681f3Smrg 14767ec681f3Smrg /* It seems 8bpp and 16bpp require RB+ to work. */ 14777ec681f3Smrg unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk_format); 14787ec681f3Smrg if (bytes_per_pixel <= 2 && !device->physical_device->rad_info.rbplus_allowed) 14797ec681f3Smrg return false; 14807ec681f3Smrg 14817ec681f3Smrg return true; 148201e04c3fSmrg} 148301e04c3fSmrg 14847ec681f3Smrgstatic void 14857ec681f3Smrgradv_image_reset_layout(struct radv_image *image) 148601e04c3fSmrg{ 14877ec681f3Smrg image->size = 0; 14887ec681f3Smrg image->alignment = 1; 14897ec681f3Smrg 14907ec681f3Smrg image->tc_compatible_cmask = 0; 14917ec681f3Smrg image->fce_pred_offset = image->dcc_pred_offset = 0; 14927ec681f3Smrg image->clear_value_offset = image->tc_compat_zrange_offset = 0; 14937ec681f3Smrg 14947ec681f3Smrg for (unsigned i = 0; i < image->plane_count; ++i) { 14957ec681f3Smrg VkFormat format = vk_format_get_plane_format(image->vk_format, i); 14967ec681f3Smrg if (vk_format_has_depth(format)) 14977ec681f3Smrg format = vk_format_depth_only(format); 14987ec681f3Smrg 14997ec681f3Smrg uint64_t flags = image->planes[i].surface.flags; 15007ec681f3Smrg uint64_t modifier = image->planes[i].surface.modifier; 15017ec681f3Smrg memset(image->planes + i, 0, sizeof(image->planes[i])); 15027ec681f3Smrg 15037ec681f3Smrg image->planes[i].surface.flags = flags; 15047ec681f3Smrg image->planes[i].surface.modifier = modifier; 15057ec681f3Smrg image->planes[i].surface.blk_w = vk_format_get_blockwidth(format); 15067ec681f3Smrg image->planes[i].surface.blk_h = vk_format_get_blockheight(format); 15077ec681f3Smrg image->planes[i].surface.bpe = vk_format_get_blocksize(format); 15087ec681f3Smrg 15097ec681f3Smrg /* align byte per element on dword */ 15107ec681f3Smrg if (image->planes[i].surface.bpe == 3) { 15117ec681f3Smrg image->planes[i].surface.bpe = 4; 15127ec681f3Smrg } 15137ec681f3Smrg } 151401e04c3fSmrg} 151501e04c3fSmrg 15167ec681f3SmrgVkResult 15177ec681f3Smrgradv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info, 15187ec681f3Smrg const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info, 15197ec681f3Smrg struct radv_image *image) 152001e04c3fSmrg{ 15217ec681f3Smrg /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the 15227ec681f3Smrg * common internal case. */ 15237ec681f3Smrg create_info.vk_info = NULL; 15247ec681f3Smrg 15257ec681f3Smrg struct ac_surf_info image_info = image->info; 15267ec681f3Smrg VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info); 15277ec681f3Smrg if (result != VK_SUCCESS) 15287ec681f3Smrg return result; 15297ec681f3Smrg 15307ec681f3Smrg assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count); 15317ec681f3Smrg 15327ec681f3Smrg radv_image_reset_layout(image); 15337ec681f3Smrg 15347ec681f3Smrg for (unsigned plane = 0; plane < image->plane_count; ++plane) { 15357ec681f3Smrg struct ac_surf_info info = image_info; 15367ec681f3Smrg uint64_t offset; 15377ec681f3Smrg unsigned stride; 15387ec681f3Smrg 15397ec681f3Smrg info.width = vk_format_get_plane_width(image->vk_format, plane, info.width); 15407ec681f3Smrg info.height = vk_format_get_plane_height(image->vk_format, plane, info.height); 15417ec681f3Smrg 15427ec681f3Smrg if (create_info.no_metadata_planes || image->plane_count > 1) { 15437ec681f3Smrg image->planes[plane].surface.flags |= 15447ec681f3Smrg RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE; 15457ec681f3Smrg } 15467ec681f3Smrg 15477ec681f3Smrg device->ws->surface_init(device->ws, &info, &image->planes[plane].surface); 15487ec681f3Smrg 15497ec681f3Smrg if (plane == 0) { 15507ec681f3Smrg if (!radv_use_dcc_for_image_late(device, image)) 15517ec681f3Smrg ac_surface_zero_dcc_fields(&image->planes[0].surface); 15527ec681f3Smrg } 15537ec681f3Smrg 15547ec681f3Smrg if (create_info.bo_metadata && !mod_info && 15557ec681f3Smrg !ac_surface_set_umd_metadata(&device->physical_device->rad_info, 15567ec681f3Smrg &image->planes[plane].surface, image_info.storage_samples, 15577ec681f3Smrg image_info.levels, create_info.bo_metadata->size_metadata, 15587ec681f3Smrg create_info.bo_metadata->metadata)) 15597ec681f3Smrg return VK_ERROR_INVALID_EXTERNAL_HANDLE; 15607ec681f3Smrg 15617ec681f3Smrg if (!create_info.no_metadata_planes && !create_info.bo_metadata && image->plane_count == 1 && 15627ec681f3Smrg !mod_info) 15637ec681f3Smrg radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface); 15647ec681f3Smrg 15657ec681f3Smrg if (mod_info) { 15667ec681f3Smrg if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe || 15677ec681f3Smrg !mod_info->pPlaneLayouts[plane].rowPitch) 15687ec681f3Smrg return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; 15697ec681f3Smrg 15707ec681f3Smrg offset = mod_info->pPlaneLayouts[plane].offset; 15717ec681f3Smrg stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe; 15727ec681f3Smrg } else { 15737ec681f3Smrg offset = align64(image->size, 1 << image->planes[plane].surface.alignment_log2); 15747ec681f3Smrg stride = 0; /* 0 means no override */ 15757ec681f3Smrg } 15767ec681f3Smrg 15777ec681f3Smrg if (!ac_surface_override_offset_stride(&device->physical_device->rad_info, 15787ec681f3Smrg &image->planes[plane].surface, image->info.levels, 15797ec681f3Smrg offset, stride)) 15807ec681f3Smrg return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; 15817ec681f3Smrg 15827ec681f3Smrg /* Validate DCC offsets in modifier layout. */ 15837ec681f3Smrg if (image->plane_count == 1 && mod_info) { 15847ec681f3Smrg unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface); 15857ec681f3Smrg if (mod_info->drmFormatModifierPlaneCount != mem_planes) 15867ec681f3Smrg return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; 15877ec681f3Smrg 15887ec681f3Smrg for (unsigned i = 1; i < mem_planes; ++i) { 15897ec681f3Smrg if (ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class, 15907ec681f3Smrg &image->planes[plane].surface, i, 15917ec681f3Smrg 0) != mod_info->pPlaneLayouts[i].offset) 15927ec681f3Smrg return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; 15937ec681f3Smrg } 15947ec681f3Smrg } 15957ec681f3Smrg 15967ec681f3Smrg image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size); 15977ec681f3Smrg image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2); 15987ec681f3Smrg 15997ec681f3Smrg image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane); 16007ec681f3Smrg } 16017ec681f3Smrg 16027ec681f3Smrg image->tc_compatible_cmask = 16037ec681f3Smrg radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image); 16047ec681f3Smrg 16057ec681f3Smrg image->l2_coherent = radv_image_is_l2_coherent(device, image); 16067ec681f3Smrg 16077ec681f3Smrg image->support_comp_to_single = radv_image_use_comp_to_single(device, image); 16087ec681f3Smrg 16097ec681f3Smrg radv_image_alloc_values(device, image); 16107ec681f3Smrg 16117ec681f3Smrg assert(image->planes[0].surface.surf_size); 16127ec681f3Smrg assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID || 16137ec681f3Smrg ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image)); 16147ec681f3Smrg return VK_SUCCESS; 161501e04c3fSmrg} 161601e04c3fSmrg 16177ec681f3Smrgstatic void 16187ec681f3Smrgradv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator, 16197ec681f3Smrg struct radv_image *image) 1620ed98bd31Smaya{ 16217ec681f3Smrg if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo) 16227ec681f3Smrg device->ws->buffer_destroy(device->ws, image->bo); 16237ec681f3Smrg 16247ec681f3Smrg if (image->owned_memory != VK_NULL_HANDLE) { 16257ec681f3Smrg RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory); 16267ec681f3Smrg radv_free_memory(device, pAllocator, mem); 16277ec681f3Smrg } 16287ec681f3Smrg 16297ec681f3Smrg vk_object_base_finish(&image->base); 16307ec681f3Smrg vk_free2(&device->vk.alloc, pAllocator, image); 1631ed98bd31Smaya} 1632ed98bd31Smaya 16337ec681f3Smrgstatic void 16347ec681f3Smrgradv_image_print_info(struct radv_device *device, struct radv_image *image) 1635ed98bd31Smaya{ 16367ec681f3Smrg fprintf(stderr, "Image:\n"); 16377ec681f3Smrg fprintf(stderr, 16387ec681f3Smrg " Info: size=%" PRIu64 ", alignment=%" PRIu32 ", " 16397ec681f3Smrg "width=%" PRIu32 ", height=%" PRIu32 ", " 16407ec681f3Smrg "offset=%" PRIu64 ", array_size=%" PRIu32 "\n", 16417ec681f3Smrg image->size, image->alignment, image->info.width, image->info.height, image->offset, 16427ec681f3Smrg image->info.array_size); 16437ec681f3Smrg for (unsigned i = 0; i < image->plane_count; ++i) { 16447ec681f3Smrg const struct radv_image_plane *plane = &image->planes[i]; 16457ec681f3Smrg const struct radeon_surf *surf = &plane->surface; 16467ec681f3Smrg const struct util_format_description *desc = vk_format_description(plane->format); 16477ec681f3Smrg uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class, 16487ec681f3Smrg &plane->surface, 0, 0); 16497ec681f3Smrg 16507ec681f3Smrg fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset); 16517ec681f3Smrg 16527ec681f3Smrg ac_surface_print_info(stderr, &device->physical_device->rad_info, surf); 16537ec681f3Smrg } 16547ec681f3Smrg} 16557ec681f3Smrg 16567ec681f3Smrgstatic uint64_t 16577ec681f3Smrgradv_select_modifier(const struct radv_device *dev, VkFormat format, 16587ec681f3Smrg const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list) 16597ec681f3Smrg{ 16607ec681f3Smrg const struct radv_physical_device *pdev = dev->physical_device; 16617ec681f3Smrg unsigned mod_count; 16627ec681f3Smrg 16637ec681f3Smrg assert(mod_list->drmFormatModifierCount); 16647ec681f3Smrg 16657ec681f3Smrg /* We can allow everything here as it does not affect order and the application 16667ec681f3Smrg * is only allowed to specify modifiers that we support. */ 16677ec681f3Smrg const struct ac_modifier_options modifier_options = { 16687ec681f3Smrg .dcc = true, 16697ec681f3Smrg .dcc_retile = true, 16707ec681f3Smrg }; 16717ec681f3Smrg 16727ec681f3Smrg ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format), 16737ec681f3Smrg &mod_count, NULL); 16747ec681f3Smrg 16757ec681f3Smrg uint64_t *mods = calloc(mod_count, sizeof(*mods)); 16767ec681f3Smrg 16777ec681f3Smrg /* If allocations fail, fall back to a dumber solution. */ 16787ec681f3Smrg if (!mods) 16797ec681f3Smrg return mod_list->pDrmFormatModifiers[0]; 16807ec681f3Smrg 16817ec681f3Smrg ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format), 16827ec681f3Smrg &mod_count, mods); 16837ec681f3Smrg 16847ec681f3Smrg for (unsigned i = 0; i < mod_count; ++i) { 16857ec681f3Smrg for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) { 16867ec681f3Smrg if (mods[i] == mod_list->pDrmFormatModifiers[j]) { 16877ec681f3Smrg free(mods); 16887ec681f3Smrg return mod_list->pDrmFormatModifiers[j]; 16897ec681f3Smrg } 16907ec681f3Smrg } 16917ec681f3Smrg } 16927ec681f3Smrg unreachable("App specified an invalid modifier"); 1693ed98bd31Smaya} 1694ed98bd31Smaya 169501e04c3fSmrgVkResult 16967ec681f3Smrgradv_image_create(VkDevice _device, const struct radv_image_create_info *create_info, 16977ec681f3Smrg const VkAllocationCallbacks *alloc, VkImage *pImage) 16987ec681f3Smrg{ 16997ec681f3Smrg RADV_FROM_HANDLE(radv_device, device, _device); 17007ec681f3Smrg const VkImageCreateInfo *pCreateInfo = create_info->vk_info; 17017ec681f3Smrg uint64_t modifier = DRM_FORMAT_MOD_INVALID; 17027ec681f3Smrg struct radv_image *image = NULL; 17037ec681f3Smrg VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format); 17047ec681f3Smrg const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list = 17057ec681f3Smrg vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); 17067ec681f3Smrg const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod = 17077ec681f3Smrg vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT); 17087ec681f3Smrg assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); 17097ec681f3Smrg 17107ec681f3Smrg const unsigned plane_count = vk_format_get_plane_count(format); 17117ec681f3Smrg const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count; 17127ec681f3Smrg 17137ec681f3Smrg radv_assert(pCreateInfo->mipLevels > 0); 17147ec681f3Smrg radv_assert(pCreateInfo->arrayLayers > 0); 17157ec681f3Smrg radv_assert(pCreateInfo->samples > 0); 17167ec681f3Smrg radv_assert(pCreateInfo->extent.width > 0); 17177ec681f3Smrg radv_assert(pCreateInfo->extent.height > 0); 17187ec681f3Smrg radv_assert(pCreateInfo->extent.depth > 0); 17197ec681f3Smrg 17207ec681f3Smrg image = 17217ec681f3Smrg vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 17227ec681f3Smrg if (!image) 17237ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 17247ec681f3Smrg 17257ec681f3Smrg vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE); 17267ec681f3Smrg 17277ec681f3Smrg image->type = pCreateInfo->imageType; 17287ec681f3Smrg image->info.width = pCreateInfo->extent.width; 17297ec681f3Smrg image->info.height = pCreateInfo->extent.height; 17307ec681f3Smrg image->info.depth = pCreateInfo->extent.depth; 17317ec681f3Smrg image->info.samples = pCreateInfo->samples; 17327ec681f3Smrg image->info.storage_samples = pCreateInfo->samples; 17337ec681f3Smrg image->info.array_size = pCreateInfo->arrayLayers; 17347ec681f3Smrg image->info.levels = pCreateInfo->mipLevels; 17357ec681f3Smrg image->info.num_channels = vk_format_get_nr_components(format); 17367ec681f3Smrg 17377ec681f3Smrg image->vk_format = format; 17387ec681f3Smrg image->tiling = pCreateInfo->tiling; 17397ec681f3Smrg image->usage = pCreateInfo->usage; 17407ec681f3Smrg image->flags = pCreateInfo->flags; 17417ec681f3Smrg image->plane_count = plane_count; 17427ec681f3Smrg 17437ec681f3Smrg image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE; 17447ec681f3Smrg if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) { 17457ec681f3Smrg for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i) 17467ec681f3Smrg if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL || 17477ec681f3Smrg pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT) 17487ec681f3Smrg image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u; 17497ec681f3Smrg else 17507ec681f3Smrg image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i]; 17517ec681f3Smrg } 17527ec681f3Smrg 17537ec681f3Smrg const VkExternalMemoryImageCreateInfo *external_info = 17547ec681f3Smrg vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO); 17557ec681f3Smrg 17567ec681f3Smrg image->shareable = external_info; 17577ec681f3Smrg if (!vk_format_is_depth_or_stencil(format) && !image->shareable && 17587ec681f3Smrg !(image->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) && 17597ec681f3Smrg pCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { 17607ec681f3Smrg image->info.surf_index = &device->image_mrt_offset_counter; 17617ec681f3Smrg } 17627ec681f3Smrg 17637ec681f3Smrg if (mod_list) 17647ec681f3Smrg modifier = radv_select_modifier(device, format, mod_list); 17657ec681f3Smrg else if (explicit_mod) 17667ec681f3Smrg modifier = explicit_mod->drmFormatModifier; 17677ec681f3Smrg 17687ec681f3Smrg for (unsigned plane = 0; plane < image->plane_count; ++plane) { 17697ec681f3Smrg image->planes[plane].surface.flags = 17707ec681f3Smrg radv_get_surface_flags(device, image, plane, pCreateInfo, format); 17717ec681f3Smrg image->planes[plane].surface.modifier = modifier; 17727ec681f3Smrg } 17737ec681f3Smrg 17747ec681f3Smrg bool delay_layout = 17757ec681f3Smrg external_info && (external_info->handleTypes & 17767ec681f3Smrg VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID); 17777ec681f3Smrg 17787ec681f3Smrg if (delay_layout) { 17797ec681f3Smrg *pImage = radv_image_to_handle(image); 17807ec681f3Smrg assert(!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)); 17817ec681f3Smrg return VK_SUCCESS; 17827ec681f3Smrg } 17837ec681f3Smrg 17847ec681f3Smrg VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, image); 17857ec681f3Smrg if (result != VK_SUCCESS) { 17867ec681f3Smrg radv_destroy_image(device, alloc, image); 17877ec681f3Smrg return result; 17887ec681f3Smrg } 17897ec681f3Smrg 17907ec681f3Smrg if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) { 17917ec681f3Smrg image->alignment = MAX2(image->alignment, 4096); 17927ec681f3Smrg image->size = align64(image->size, image->alignment); 17937ec681f3Smrg image->offset = 0; 17947ec681f3Smrg 17957ec681f3Smrg result = 17967ec681f3Smrg device->ws->buffer_create(device->ws, image->size, image->alignment, 0, 17977ec681f3Smrg RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, &image->bo); 17987ec681f3Smrg if (result != VK_SUCCESS) { 17997ec681f3Smrg radv_destroy_image(device, alloc, image); 18007ec681f3Smrg return vk_error(device, result); 18017ec681f3Smrg } 18027ec681f3Smrg } 18037ec681f3Smrg 18047ec681f3Smrg if (device->instance->debug_flags & RADV_DEBUG_IMG) { 18057ec681f3Smrg radv_image_print_info(device, image); 18067ec681f3Smrg } 18077ec681f3Smrg 18087ec681f3Smrg *pImage = radv_image_to_handle(image); 18097ec681f3Smrg 18107ec681f3Smrg return VK_SUCCESS; 181101e04c3fSmrg} 181201e04c3fSmrg 181301e04c3fSmrgstatic void 18147ec681f3Smrgradv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device, 18157ec681f3Smrg VkFormat vk_format, const VkComponentMapping *components, 18167ec681f3Smrg bool is_storage_image, bool disable_compression, 18177ec681f3Smrg bool enable_compression, unsigned plane_id, 18187ec681f3Smrg unsigned descriptor_plane_id) 18197ec681f3Smrg{ 18207ec681f3Smrg struct radv_image *image = iview->image; 18217ec681f3Smrg struct radv_image_plane *plane = &image->planes[plane_id]; 18227ec681f3Smrg bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT; 18237ec681f3Smrg uint32_t blk_w; 18247ec681f3Smrg union radv_descriptor *descriptor; 18257ec681f3Smrg uint32_t hw_level = 0; 18267ec681f3Smrg 18277ec681f3Smrg if (is_storage_image) { 18287ec681f3Smrg descriptor = &iview->storage_descriptor; 18297ec681f3Smrg } else { 18307ec681f3Smrg descriptor = &iview->descriptor; 18317ec681f3Smrg } 18327ec681f3Smrg 18337ec681f3Smrg assert(vk_format_get_plane_count(vk_format) == 1); 18347ec681f3Smrg assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0); 18357ec681f3Smrg blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * 18367ec681f3Smrg vk_format_get_blockwidth(vk_format); 18377ec681f3Smrg 18387ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9) 18397ec681f3Smrg hw_level = iview->base_mip; 18407ec681f3Smrg radv_make_texture_descriptor( 18417ec681f3Smrg device, image, is_storage_image, iview->type, vk_format, components, hw_level, 18427ec681f3Smrg hw_level + iview->level_count - 1, iview->base_layer, 18437ec681f3Smrg iview->base_layer + iview->layer_count - 1, 18447ec681f3Smrg vk_format_get_plane_width(image->vk_format, plane_id, iview->extent.width), 18457ec681f3Smrg vk_format_get_plane_height(image->vk_format, plane_id, iview->extent.height), 18467ec681f3Smrg iview->extent.depth, descriptor->plane_descriptors[descriptor_plane_id], 18477ec681f3Smrg descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor); 18487ec681f3Smrg 18497ec681f3Smrg const struct legacy_surf_level *base_level_info = NULL; 18507ec681f3Smrg if (device->physical_device->rad_info.chip_class <= GFX9) { 18517ec681f3Smrg if (is_stencil) 18527ec681f3Smrg base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->base_mip]; 18537ec681f3Smrg else 18547ec681f3Smrg base_level_info = &plane->surface.u.legacy.level[iview->base_mip]; 18557ec681f3Smrg } 18567ec681f3Smrg 18577ec681f3Smrg bool enable_write_compression = radv_image_use_dcc_image_stores(device, image); 18587ec681f3Smrg if (is_storage_image && !(enable_write_compression || enable_compression)) 18597ec681f3Smrg disable_compression = true; 18607ec681f3Smrg si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->base_mip, 18617ec681f3Smrg iview->base_mip, blk_w, is_stencil, is_storage_image, 18627ec681f3Smrg disable_compression, enable_write_compression, 18637ec681f3Smrg descriptor->plane_descriptors[descriptor_plane_id]); 1864ed98bd31Smaya} 1865ed98bd31Smaya 1866ed98bd31Smayastatic unsigned 1867ed98bd31Smayaradv_plane_from_aspect(VkImageAspectFlags mask) 1868ed98bd31Smaya{ 18697ec681f3Smrg switch (mask) { 18707ec681f3Smrg case VK_IMAGE_ASPECT_PLANE_1_BIT: 18717ec681f3Smrg case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT: 18727ec681f3Smrg return 1; 18737ec681f3Smrg case VK_IMAGE_ASPECT_PLANE_2_BIT: 18747ec681f3Smrg case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT: 18757ec681f3Smrg return 2; 18767ec681f3Smrg case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT: 18777ec681f3Smrg return 3; 18787ec681f3Smrg default: 18797ec681f3Smrg return 0; 18807ec681f3Smrg } 1881ed98bd31Smaya} 1882ed98bd31Smaya 1883ed98bd31SmayaVkFormat 1884ed98bd31Smayaradv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask) 1885ed98bd31Smaya{ 18867ec681f3Smrg switch (mask) { 18877ec681f3Smrg case VK_IMAGE_ASPECT_PLANE_0_BIT: 18887ec681f3Smrg return image->planes[0].format; 18897ec681f3Smrg case VK_IMAGE_ASPECT_PLANE_1_BIT: 18907ec681f3Smrg return image->planes[1].format; 18917ec681f3Smrg case VK_IMAGE_ASPECT_PLANE_2_BIT: 18927ec681f3Smrg return image->planes[2].format; 18937ec681f3Smrg case VK_IMAGE_ASPECT_STENCIL_BIT: 18947ec681f3Smrg return vk_format_stencil_only(image->vk_format); 18957ec681f3Smrg case VK_IMAGE_ASPECT_DEPTH_BIT: 18967ec681f3Smrg return vk_format_depth_only(image->vk_format); 18977ec681f3Smrg case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: 18987ec681f3Smrg return vk_format_depth_only(image->vk_format); 18997ec681f3Smrg default: 19007ec681f3Smrg return image->vk_format; 19017ec681f3Smrg } 190201e04c3fSmrg} 190301e04c3fSmrg 19047ec681f3Smrg/** 19057ec681f3Smrg * Determine if the given image view can be fast cleared. 19067ec681f3Smrg */ 19077ec681f3Smrgstatic bool 19087ec681f3Smrgradv_image_view_can_fast_clear(const struct radv_device *device, 19097ec681f3Smrg const struct radv_image_view *iview) 191001e04c3fSmrg{ 19117ec681f3Smrg struct radv_image *image; 191201e04c3fSmrg 19137ec681f3Smrg if (!iview) 19147ec681f3Smrg return false; 19157ec681f3Smrg image = iview->image; 19167ec681f3Smrg 19177ec681f3Smrg /* Only fast clear if the image itself can be fast cleared. */ 19187ec681f3Smrg if (!radv_image_can_fast_clear(device, image)) 19197ec681f3Smrg return false; 19207ec681f3Smrg 19217ec681f3Smrg /* Only fast clear if all layers are bound. */ 19227ec681f3Smrg if (iview->base_layer > 0 || iview->layer_count != image->info.array_size) 19237ec681f3Smrg return false; 19247ec681f3Smrg 19257ec681f3Smrg /* Only fast clear if the view covers the whole image. */ 19267ec681f3Smrg if (!radv_image_extent_compare(image, &iview->extent)) 19277ec681f3Smrg return false; 19287ec681f3Smrg 19297ec681f3Smrg return true; 193001e04c3fSmrg} 193101e04c3fSmrg 19327ec681f3Smrgvoid 19337ec681f3Smrgradv_image_view_init(struct radv_image_view *iview, struct radv_device *device, 19347ec681f3Smrg const VkImageViewCreateInfo *pCreateInfo, 19357ec681f3Smrg const struct radv_image_view_extra_create_info *extra_create_info) 193601e04c3fSmrg{ 19377ec681f3Smrg RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image); 19387ec681f3Smrg const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; 19397ec681f3Smrg uint32_t plane_count = 1; 19407ec681f3Smrg 19417ec681f3Smrg vk_object_base_init(&device->vk, &iview->base, VK_OBJECT_TYPE_IMAGE_VIEW); 19427ec681f3Smrg 19437ec681f3Smrg switch (image->type) { 19447ec681f3Smrg case VK_IMAGE_TYPE_1D: 19457ec681f3Smrg case VK_IMAGE_TYPE_2D: 19467ec681f3Smrg assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= 19477ec681f3Smrg image->info.array_size); 19487ec681f3Smrg break; 19497ec681f3Smrg case VK_IMAGE_TYPE_3D: 19507ec681f3Smrg assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= 19517ec681f3Smrg radv_minify(image->info.depth, range->baseMipLevel)); 19527ec681f3Smrg break; 19537ec681f3Smrg default: 19547ec681f3Smrg unreachable("bad VkImageType"); 19557ec681f3Smrg } 19567ec681f3Smrg iview->image = image; 19577ec681f3Smrg iview->type = pCreateInfo->viewType; 19587ec681f3Smrg iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask); 19597ec681f3Smrg iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; 19607ec681f3Smrg iview->base_layer = range->baseArrayLayer; 19617ec681f3Smrg iview->layer_count = radv_get_layerCount(image, range); 19627ec681f3Smrg iview->base_mip = range->baseMipLevel; 19637ec681f3Smrg iview->level_count = radv_get_levelCount(image, range); 19647ec681f3Smrg 19657ec681f3Smrg iview->vk_format = pCreateInfo->format; 19667ec681f3Smrg 19677ec681f3Smrg /* If the image has an Android external format, pCreateInfo->format will be 19687ec681f3Smrg * VK_FORMAT_UNDEFINED. */ 19697ec681f3Smrg if (iview->vk_format == VK_FORMAT_UNDEFINED) 19707ec681f3Smrg iview->vk_format = image->vk_format; 19717ec681f3Smrg 19727ec681f3Smrg /* Split out the right aspect. Note that for internal meta code we sometimes 19737ec681f3Smrg * use an equivalent color format for the aspect so we first have to check 19747ec681f3Smrg * if we actually got depth/stencil formats. */ 19757ec681f3Smrg if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { 19767ec681f3Smrg if (vk_format_has_stencil(iview->vk_format)) 19777ec681f3Smrg iview->vk_format = vk_format_stencil_only(iview->vk_format); 19787ec681f3Smrg } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) { 19797ec681f3Smrg if (vk_format_has_depth(iview->vk_format)) 19807ec681f3Smrg iview->vk_format = vk_format_depth_only(iview->vk_format); 19817ec681f3Smrg } 19827ec681f3Smrg 19837ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9) { 19847ec681f3Smrg iview->extent = (VkExtent3D){ 19857ec681f3Smrg .width = image->info.width, 19867ec681f3Smrg .height = image->info.height, 19877ec681f3Smrg .depth = image->info.depth, 19887ec681f3Smrg }; 19897ec681f3Smrg } else { 19907ec681f3Smrg iview->extent = (VkExtent3D){ 19917ec681f3Smrg .width = radv_minify(image->info.width, range->baseMipLevel), 19927ec681f3Smrg .height = radv_minify(image->info.height, range->baseMipLevel), 19937ec681f3Smrg .depth = radv_minify(image->info.depth, range->baseMipLevel), 19947ec681f3Smrg }; 19957ec681f3Smrg } 19967ec681f3Smrg 19977ec681f3Smrg if (iview->vk_format != image->planes[iview->plane_id].format) { 19987ec681f3Smrg unsigned view_bw = vk_format_get_blockwidth(iview->vk_format); 19997ec681f3Smrg unsigned view_bh = vk_format_get_blockheight(iview->vk_format); 20007ec681f3Smrg unsigned img_bw = vk_format_get_blockwidth(image->vk_format); 20017ec681f3Smrg unsigned img_bh = vk_format_get_blockheight(image->vk_format); 20027ec681f3Smrg 20037ec681f3Smrg iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw); 20047ec681f3Smrg iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh); 20057ec681f3Smrg 20067ec681f3Smrg /* Comment ported from amdvlk - 20077ec681f3Smrg * If we have the following image: 20087ec681f3Smrg * Uncompressed pixels Compressed block sizes (4x4) 20097ec681f3Smrg * mip0: 22 x 22 6 x 6 20107ec681f3Smrg * mip1: 11 x 11 3 x 3 20117ec681f3Smrg * mip2: 5 x 5 2 x 2 20127ec681f3Smrg * mip3: 2 x 2 1 x 1 20137ec681f3Smrg * mip4: 1 x 1 1 x 1 20147ec681f3Smrg * 20157ec681f3Smrg * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and 20167ec681f3Smrg * the HW is calculating the degradation of the block sizes down the mip-chain as follows 20177ec681f3Smrg * (straight-up divide-by-two integer math): mip0: 6x6 mip1: 3x3 mip2: 1x1 mip3: 1x1 20187ec681f3Smrg * 20197ec681f3Smrg * This means that mip2 will be missing texels. 20207ec681f3Smrg * 20217ec681f3Smrg * Fix this by calculating the base mip's width and height, then convert 20227ec681f3Smrg * that, and round it back up to get the level 0 size. Clamp the 20237ec681f3Smrg * converted size between the original values, and the physical extent 20247ec681f3Smrg * of the base mipmap. 20257ec681f3Smrg * 20267ec681f3Smrg * On GFX10 we have to take care to not go over the physical extent 20277ec681f3Smrg * of the base mipmap as otherwise the GPU computes a different layout. 20287ec681f3Smrg * Note that the GPU does use the same base-mip dimensions for both a 20297ec681f3Smrg * block compatible format and the compressed format, so even if we take 20307ec681f3Smrg * the plain converted dimensions the physical layout is correct. 20317ec681f3Smrg */ 20327ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX9 && 20337ec681f3Smrg vk_format_is_compressed(image->vk_format) && !vk_format_is_compressed(iview->vk_format)) { 20347ec681f3Smrg /* If we have multiple levels in the view we should ideally take the last level, 20357ec681f3Smrg * but the mip calculation has a max(..., 1) so walking back to the base mip in an 20367ec681f3Smrg * useful way is hard. */ 20377ec681f3Smrg if (iview->level_count > 1) { 20387ec681f3Smrg iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width; 20397ec681f3Smrg iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height; 20407ec681f3Smrg } else { 20417ec681f3Smrg unsigned lvl_width = radv_minify(image->info.width, range->baseMipLevel); 20427ec681f3Smrg unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel); 20437ec681f3Smrg 20447ec681f3Smrg lvl_width = round_up_u32(lvl_width * view_bw, img_bw); 20457ec681f3Smrg lvl_height = round_up_u32(lvl_height * view_bh, img_bh); 20467ec681f3Smrg 20477ec681f3Smrg lvl_width <<= range->baseMipLevel; 20487ec681f3Smrg lvl_height <<= range->baseMipLevel; 20497ec681f3Smrg 20507ec681f3Smrg iview->extent.width = CLAMP(lvl_width, iview->extent.width, 20517ec681f3Smrg iview->image->planes[0].surface.u.gfx9.base_mip_width); 20527ec681f3Smrg iview->extent.height = CLAMP(lvl_height, iview->extent.height, 20537ec681f3Smrg iview->image->planes[0].surface.u.gfx9.base_mip_height); 20547ec681f3Smrg } 20557ec681f3Smrg } 20567ec681f3Smrg } 20577ec681f3Smrg 20587ec681f3Smrg iview->support_fast_clear = radv_image_view_can_fast_clear(device, iview); 20597ec681f3Smrg 20607ec681f3Smrg if (vk_format_get_plane_count(image->vk_format) > 1 && 20617ec681f3Smrg iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { 20627ec681f3Smrg plane_count = vk_format_get_plane_count(iview->vk_format); 20637ec681f3Smrg } 20647ec681f3Smrg 20657ec681f3Smrg bool disable_compression = extra_create_info ? extra_create_info->disable_compression : false; 20667ec681f3Smrg bool enable_compression = extra_create_info ? extra_create_info->enable_compression : false; 20677ec681f3Smrg for (unsigned i = 0; i < plane_count; ++i) { 20687ec681f3Smrg VkFormat format = vk_format_get_plane_format(iview->vk_format, i); 20697ec681f3Smrg radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, false, 20707ec681f3Smrg disable_compression, enable_compression, iview->plane_id + i, 20717ec681f3Smrg i); 20727ec681f3Smrg radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, true, 20737ec681f3Smrg disable_compression, enable_compression, iview->plane_id + i, 20747ec681f3Smrg i); 20757ec681f3Smrg } 20767ec681f3Smrg} 207701e04c3fSmrg 20787ec681f3Smrgvoid 20797ec681f3Smrgradv_image_view_finish(struct radv_image_view *iview) 20807ec681f3Smrg{ 20817ec681f3Smrg vk_object_base_finish(&iview->base); 208201e04c3fSmrg} 208301e04c3fSmrg 20847ec681f3Smrgbool 20857ec681f3Smrgradv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, 20867ec681f3Smrg VkImageLayout layout, bool in_render_loop, unsigned queue_mask) 208701e04c3fSmrg{ 20887ec681f3Smrg switch (layout) { 20897ec681f3Smrg case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: 20907ec681f3Smrg case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR: 20917ec681f3Smrg case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR: 20927ec681f3Smrg return radv_image_has_htile(image); 20937ec681f3Smrg case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: 20947ec681f3Smrg return radv_image_is_tc_compat_htile(image) || 20957ec681f3Smrg (radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL)); 20967ec681f3Smrg case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR: 20977ec681f3Smrg case VK_IMAGE_LAYOUT_GENERAL: 20987ec681f3Smrg /* It should be safe to enable TC-compat HTILE with 20997ec681f3Smrg * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and 21007ec681f3Smrg * if the image doesn't have the storage bit set. This 21017ec681f3Smrg * improves performance for apps that use GENERAL for the main 21027ec681f3Smrg * depth pass because this allows compression and this reduces 21037ec681f3Smrg * the number of decompressions from/to GENERAL. 21047ec681f3Smrg */ 21057ec681f3Smrg /* FIXME: Enabling TC-compat HTILE in GENERAL on the compute 21067ec681f3Smrg * queue is likely broken for eg. depth/stencil copies. 21077ec681f3Smrg */ 21087ec681f3Smrg if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) && 21097ec681f3Smrg !in_render_loop && !device->instance->disable_tc_compat_htile_in_general) { 21107ec681f3Smrg return true; 21117ec681f3Smrg } else { 21127ec681f3Smrg return false; 21137ec681f3Smrg } 21147ec681f3Smrg case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: 21157ec681f3Smrg if (radv_image_is_tc_compat_htile(image) || 21167ec681f3Smrg (radv_image_has_htile(image) && 21177ec681f3Smrg !(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) { 21187ec681f3Smrg /* Keep HTILE compressed if the image is only going to 21197ec681f3Smrg * be used as a depth/stencil read-only attachment. 21207ec681f3Smrg */ 21217ec681f3Smrg return true; 21227ec681f3Smrg } else { 21237ec681f3Smrg return false; 21247ec681f3Smrg } 21257ec681f3Smrg break; 21267ec681f3Smrg default: 21277ec681f3Smrg return radv_image_is_tc_compat_htile(image); 21287ec681f3Smrg } 212901e04c3fSmrg} 213001e04c3fSmrg 21317ec681f3Smrgbool 21327ec681f3Smrgradv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, 21337ec681f3Smrg unsigned level, VkImageLayout layout, bool in_render_loop, 21347ec681f3Smrg unsigned queue_mask) 213501e04c3fSmrg{ 21367ec681f3Smrg if (radv_dcc_enabled(image, level) && 21377ec681f3Smrg !radv_layout_dcc_compressed(device, image, level, layout, in_render_loop, queue_mask)) 21387ec681f3Smrg return false; 213901e04c3fSmrg 21407ec681f3Smrg if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS)) 21417ec681f3Smrg return false; 21427ec681f3Smrg 21437ec681f3Smrg if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) 21447ec681f3Smrg return false; 21457ec681f3Smrg 21467ec681f3Smrg /* Exclusive images with CMASK or DCC can always be fast-cleared on the gfx queue. Concurrent 21477ec681f3Smrg * images can only be fast-cleared if comp-to-single is supported because we don't yet support 21487ec681f3Smrg * FCE on the compute queue. 21497ec681f3Smrg */ 21507ec681f3Smrg return queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_use_comp_to_single(device, image); 215101e04c3fSmrg} 215201e04c3fSmrg 21537ec681f3Smrgbool 21547ec681f3Smrgradv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, 21557ec681f3Smrg unsigned level, VkImageLayout layout, bool in_render_loop, 21567ec681f3Smrg unsigned queue_mask) 21577ec681f3Smrg{ 21587ec681f3Smrg if (!radv_dcc_enabled(image, level)) 21597ec681f3Smrg return false; 21607ec681f3Smrg 21617ec681f3Smrg if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && queue_mask & (1u << RADV_QUEUE_FOREIGN)) 21627ec681f3Smrg return true; 21637ec681f3Smrg 21647ec681f3Smrg /* If the image is read-only, we can always just keep it compressed */ 21657ec681f3Smrg if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS)) 21667ec681f3Smrg return true; 21677ec681f3Smrg 21687ec681f3Smrg /* Don't compress compute transfer dst when image stores are not supported. */ 21697ec681f3Smrg if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) && 21707ec681f3Smrg (queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image)) 21717ec681f3Smrg return false; 21727ec681f3Smrg 21737ec681f3Smrg return device->physical_device->rad_info.chip_class >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL; 21747ec681f3Smrg} 217501e04c3fSmrg 21767ec681f3Smrgbool 21777ec681f3Smrgradv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image, 21787ec681f3Smrg VkImageLayout layout, unsigned queue_mask) 217901e04c3fSmrg{ 21807ec681f3Smrg if (!radv_image_has_fmask(image)) 21817ec681f3Smrg return false; 21827ec681f3Smrg 21837ec681f3Smrg /* Don't compress compute transfer dst because image stores ignore FMASK and it needs to be 21847ec681f3Smrg * expanded before. 21857ec681f3Smrg */ 21867ec681f3Smrg if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) && 21877ec681f3Smrg (queue_mask & (1u << RADV_QUEUE_COMPUTE))) 21887ec681f3Smrg return false; 21897ec681f3Smrg 21907ec681f3Smrg /* Only compress concurrent images if TC-compat CMASK is enabled (no FMASK decompression). */ 21917ec681f3Smrg return layout != VK_IMAGE_LAYOUT_GENERAL && 21927ec681f3Smrg (queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_is_tc_compat_cmask(image)); 21937ec681f3Smrg} 21947ec681f3Smrg 21957ec681f3Smrgunsigned 21967ec681f3Smrgradv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family) 21977ec681f3Smrg{ 21987ec681f3Smrg if (!image->exclusive) 21997ec681f3Smrg return image->queue_family_mask; 22007ec681f3Smrg if (family == VK_QUEUE_FAMILY_EXTERNAL || family == VK_QUEUE_FAMILY_FOREIGN_EXT) 22017ec681f3Smrg return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN); 22027ec681f3Smrg if (family == VK_QUEUE_FAMILY_IGNORED) 22037ec681f3Smrg return 1u << queue_family; 22047ec681f3Smrg return 1u << family; 220501e04c3fSmrg} 220601e04c3fSmrg 220701e04c3fSmrgVkResult 22087ec681f3Smrgradv_CreateImage(VkDevice device, const VkImageCreateInfo *pCreateInfo, 22097ec681f3Smrg const VkAllocationCallbacks *pAllocator, VkImage *pImage) 221001e04c3fSmrg{ 221101e04c3fSmrg#ifdef ANDROID 22127ec681f3Smrg const VkNativeBufferANDROID *gralloc_info = 22137ec681f3Smrg vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID); 221401e04c3fSmrg 22157ec681f3Smrg if (gralloc_info) 22167ec681f3Smrg return radv_image_from_gralloc(device, pCreateInfo, gralloc_info, pAllocator, pImage); 221701e04c3fSmrg#endif 221801e04c3fSmrg 22197ec681f3Smrg const struct wsi_image_create_info *wsi_info = 22207ec681f3Smrg vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA); 22217ec681f3Smrg bool scanout = wsi_info && wsi_info->scanout; 222201e04c3fSmrg 22237ec681f3Smrg return radv_image_create(device, 22247ec681f3Smrg &(struct radv_image_create_info){ 22257ec681f3Smrg .vk_info = pCreateInfo, 22267ec681f3Smrg .scanout = scanout, 22277ec681f3Smrg }, 22287ec681f3Smrg pAllocator, pImage); 222901e04c3fSmrg} 223001e04c3fSmrg 223101e04c3fSmrgvoid 22327ec681f3Smrgradv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator) 223301e04c3fSmrg{ 22347ec681f3Smrg RADV_FROM_HANDLE(radv_device, device, _device); 22357ec681f3Smrg RADV_FROM_HANDLE(radv_image, image, _image); 223601e04c3fSmrg 22377ec681f3Smrg if (!image) 22387ec681f3Smrg return; 223901e04c3fSmrg 22407ec681f3Smrg radv_destroy_image(device, pAllocator, image); 224101e04c3fSmrg} 224201e04c3fSmrg 22437ec681f3Smrgvoid 22447ec681f3Smrgradv_GetImageSubresourceLayout(VkDevice _device, VkImage _image, 22457ec681f3Smrg const VkImageSubresource *pSubresource, VkSubresourceLayout *pLayout) 224601e04c3fSmrg{ 22477ec681f3Smrg RADV_FROM_HANDLE(radv_image, image, _image); 22487ec681f3Smrg RADV_FROM_HANDLE(radv_device, device, _device); 22497ec681f3Smrg int level = pSubresource->mipLevel; 22507ec681f3Smrg int layer = pSubresource->arrayLayer; 22517ec681f3Smrg 22527ec681f3Smrg unsigned plane_id = 0; 22537ec681f3Smrg if (vk_format_get_plane_count(image->vk_format) > 1) 22547ec681f3Smrg plane_id = radv_plane_from_aspect(pSubresource->aspectMask); 22557ec681f3Smrg 22567ec681f3Smrg struct radv_image_plane *plane = &image->planes[plane_id]; 22577ec681f3Smrg struct radeon_surf *surface = &plane->surface; 22587ec681f3Smrg 22597ec681f3Smrg if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { 22607ec681f3Smrg unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->aspectMask); 22617ec681f3Smrg 22627ec681f3Smrg assert(level == 0); 22637ec681f3Smrg assert(layer == 0); 22647ec681f3Smrg 22657ec681f3Smrg pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class, 22667ec681f3Smrg surface, mem_plane_id, 0); 22677ec681f3Smrg pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.chip_class, 22687ec681f3Smrg surface, mem_plane_id); 22697ec681f3Smrg pLayout->arrayPitch = 0; 22707ec681f3Smrg pLayout->depthPitch = 0; 22717ec681f3Smrg pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id); 22727ec681f3Smrg } else if (device->physical_device->rad_info.chip_class >= GFX9) { 22737ec681f3Smrg uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0; 22747ec681f3Smrg 22757ec681f3Smrg pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class, 22767ec681f3Smrg &plane->surface, 0, layer) + 22777ec681f3Smrg level_offset; 22787ec681f3Smrg if (image->vk_format == VK_FORMAT_R32G32B32_UINT || 22797ec681f3Smrg image->vk_format == VK_FORMAT_R32G32B32_SINT || 22807ec681f3Smrg image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) { 22817ec681f3Smrg /* Adjust the number of bytes between each row because 22827ec681f3Smrg * the pitch is actually the number of components per 22837ec681f3Smrg * row. 22847ec681f3Smrg */ 22857ec681f3Smrg pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3; 22867ec681f3Smrg } else { 22877ec681f3Smrg uint32_t pitch = 22887ec681f3Smrg surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch; 22897ec681f3Smrg 22907ec681f3Smrg assert(util_is_power_of_two_nonzero(surface->bpe)); 22917ec681f3Smrg pLayout->rowPitch = pitch * surface->bpe; 22927ec681f3Smrg } 22937ec681f3Smrg 22947ec681f3Smrg pLayout->arrayPitch = surface->u.gfx9.surf_slice_size; 22957ec681f3Smrg pLayout->depthPitch = surface->u.gfx9.surf_slice_size; 22967ec681f3Smrg pLayout->size = surface->u.gfx9.surf_slice_size; 22977ec681f3Smrg if (image->type == VK_IMAGE_TYPE_3D) 22987ec681f3Smrg pLayout->size *= u_minify(image->info.depth, level); 22997ec681f3Smrg } else { 23007ec681f3Smrg pLayout->offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 + 23017ec681f3Smrg (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer; 23027ec681f3Smrg pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe; 23037ec681f3Smrg pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4; 23047ec681f3Smrg pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4; 23057ec681f3Smrg pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4; 23067ec681f3Smrg if (image->type == VK_IMAGE_TYPE_3D) 23077ec681f3Smrg pLayout->size *= u_minify(image->info.depth, level); 23087ec681f3Smrg } 23097ec681f3Smrg} 2310ed98bd31Smaya 23117ec681f3SmrgVkResult 23127ec681f3Smrgradv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image, 23137ec681f3Smrg VkImageDrmFormatModifierPropertiesEXT *pProperties) 23147ec681f3Smrg{ 23157ec681f3Smrg RADV_FROM_HANDLE(radv_image, image, _image); 231601e04c3fSmrg 23177ec681f3Smrg pProperties->drmFormatModifier = image->planes[0].surface.modifier; 23187ec681f3Smrg return VK_SUCCESS; 231901e04c3fSmrg} 232001e04c3fSmrg 232101e04c3fSmrgVkResult 23227ec681f3Smrgradv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, 23237ec681f3Smrg const VkAllocationCallbacks *pAllocator, VkImageView *pView) 232401e04c3fSmrg{ 23257ec681f3Smrg RADV_FROM_HANDLE(radv_device, device, _device); 23267ec681f3Smrg struct radv_image_view *view; 232701e04c3fSmrg 23287ec681f3Smrg view = 23297ec681f3Smrg vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 23307ec681f3Smrg if (view == NULL) 23317ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 233201e04c3fSmrg 23337ec681f3Smrg radv_image_view_init(view, device, pCreateInfo, NULL); 233401e04c3fSmrg 23357ec681f3Smrg *pView = radv_image_view_to_handle(view); 233601e04c3fSmrg 23377ec681f3Smrg return VK_SUCCESS; 233801e04c3fSmrg} 233901e04c3fSmrg 234001e04c3fSmrgvoid 23417ec681f3Smrgradv_DestroyImageView(VkDevice _device, VkImageView _iview, const VkAllocationCallbacks *pAllocator) 234201e04c3fSmrg{ 23437ec681f3Smrg RADV_FROM_HANDLE(radv_device, device, _device); 23447ec681f3Smrg RADV_FROM_HANDLE(radv_image_view, iview, _iview); 23457ec681f3Smrg 23467ec681f3Smrg if (!iview) 23477ec681f3Smrg return; 234801e04c3fSmrg 23497ec681f3Smrg radv_image_view_finish(iview); 23507ec681f3Smrg vk_free2(&device->vk.alloc, pAllocator, iview); 235101e04c3fSmrg} 235201e04c3fSmrg 23537ec681f3Smrgvoid 23547ec681f3Smrgradv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device, 23557ec681f3Smrg const VkBufferViewCreateInfo *pCreateInfo) 235601e04c3fSmrg{ 23577ec681f3Smrg RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer); 23587ec681f3Smrg 23597ec681f3Smrg vk_object_base_init(&device->vk, &view->base, VK_OBJECT_TYPE_BUFFER_VIEW); 23607ec681f3Smrg 23617ec681f3Smrg view->bo = buffer->bo; 23627ec681f3Smrg view->range = 23637ec681f3Smrg pCreateInfo->range == VK_WHOLE_SIZE ? buffer->size - pCreateInfo->offset : pCreateInfo->range; 23647ec681f3Smrg view->vk_format = pCreateInfo->format; 236501e04c3fSmrg 23667ec681f3Smrg radv_make_buffer_descriptor(device, buffer, view->vk_format, pCreateInfo->offset, view->range, 23677ec681f3Smrg view->state); 23687ec681f3Smrg} 236901e04c3fSmrg 23707ec681f3Smrgvoid 23717ec681f3Smrgradv_buffer_view_finish(struct radv_buffer_view *view) 23727ec681f3Smrg{ 23737ec681f3Smrg vk_object_base_finish(&view->base); 237401e04c3fSmrg} 237501e04c3fSmrg 237601e04c3fSmrgVkResult 23777ec681f3Smrgradv_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo, 23787ec681f3Smrg const VkAllocationCallbacks *pAllocator, VkBufferView *pView) 237901e04c3fSmrg{ 23807ec681f3Smrg RADV_FROM_HANDLE(radv_device, device, _device); 23817ec681f3Smrg struct radv_buffer_view *view; 238201e04c3fSmrg 23837ec681f3Smrg view = 23847ec681f3Smrg vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 23857ec681f3Smrg if (!view) 23867ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 238701e04c3fSmrg 23887ec681f3Smrg radv_buffer_view_init(view, device, pCreateInfo); 238901e04c3fSmrg 23907ec681f3Smrg *pView = radv_buffer_view_to_handle(view); 239101e04c3fSmrg 23927ec681f3Smrg return VK_SUCCESS; 239301e04c3fSmrg} 239401e04c3fSmrg 239501e04c3fSmrgvoid 239601e04c3fSmrgradv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, 23977ec681f3Smrg const VkAllocationCallbacks *pAllocator) 239801e04c3fSmrg{ 23997ec681f3Smrg RADV_FROM_HANDLE(radv_device, device, _device); 24007ec681f3Smrg RADV_FROM_HANDLE(radv_buffer_view, view, bufferView); 240101e04c3fSmrg 24027ec681f3Smrg if (!view) 24037ec681f3Smrg return; 240401e04c3fSmrg 24057ec681f3Smrg radv_buffer_view_finish(view); 24067ec681f3Smrg vk_free2(&device->vk.alloc, pAllocator, view); 240701e04c3fSmrg} 2408