1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 28#include "ac_drm_fourcc.h" 29#include "util/debug.h" 30#include "util/u_atomic.h" 31#include "vulkan/util/vk_format.h" 32#include "radv_debug.h" 33#include "radv_private.h" 34#include "radv_radeon_winsys.h" 35#include "sid.h" 36#include "vk_format.h" 37#include "vk_util.h" 38 39#include "gfx10_format_table.h" 40 41static const VkImageUsageFlagBits RADV_IMAGE_USAGE_WRITE_BITS = 42 VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | 43 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT; 44 45static unsigned 46radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, 47 VkFormat format) 48{ 49 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) { 50 assert(pCreateInfo->samples <= 1); 51 return RADEON_SURF_MODE_LINEAR_ALIGNED; 52 } 53 54 /* MSAA resources must be 2D tiled. */ 55 if (pCreateInfo->samples > 1) 56 return RADEON_SURF_MODE_2D; 57 58 if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) && 59 device->physical_device->rad_info.chip_class <= GFX8) { 60 /* this causes hangs in some VK CTS tests on GFX9. */ 61 /* Textures with a very small height are recommended to be linear. */ 62 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D || 63 /* Only very thin and long 2D textures should benefit from 64 * linear_aligned. */ 65 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2)) 66 return RADEON_SURF_MODE_LINEAR_ALIGNED; 67 } 68 69 return RADEON_SURF_MODE_2D; 70} 71 72static bool 73radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, 74 VkFormat format) 75{ 76 /* TC-compat HTILE is only available for GFX8+. */ 77 if (device->physical_device->rad_info.chip_class < GFX8) 78 return false; 79 80 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT)) 81 return false; 82 83 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) 84 return false; 85 86 /* Do not enable TC-compatible HTILE if the image isn't readable by a 87 * shader because no texture fetches will happen. 88 */ 89 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | 90 VK_IMAGE_USAGE_TRANSFER_SRC_BIT))) 91 return false; 92 93 if (device->physical_device->rad_info.chip_class < GFX9) { 94 /* TC-compat HTILE for MSAA depth/stencil images is broken 95 * on GFX8 because the tiling doesn't match. 96 */ 97 if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT) 98 return false; 99 100 /* GFX9+ supports compression for both 32-bit and 16-bit depth 101 * surfaces, while GFX8 only supports 32-bit natively. Though, 102 * the driver allows TC-compat HTILE for 16-bit depth surfaces 103 * with no Z planes compression. 104 */ 105 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT && 106 format != VK_FORMAT_D16_UNORM) 107 return false; 108 } 109 110 return true; 111} 112 113static bool 114radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info) 115{ 116 if (info->bo_metadata) { 117 if (device->physical_device->rad_info.chip_class >= GFX9) 118 return info->bo_metadata->u.gfx9.scanout; 119 else 120 return info->bo_metadata->u.legacy.scanout; 121 } 122 123 return info->scanout; 124} 125 126static bool 127radv_image_use_fast_clear_for_image_early(const struct radv_device *device, 128 const struct radv_image *image) 129{ 130 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) 131 return true; 132 133 if (image->info.samples <= 1 && image->info.width * image->info.height <= 512 * 512) { 134 /* Do not enable CMASK or DCC for small surfaces where the cost 135 * of the eliminate pass can be higher than the benefit of fast 136 * clear. RadeonSI does this, but the image threshold is 137 * different. 138 */ 139 return false; 140 } 141 142 return !!(image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); 143} 144 145static bool 146radv_image_use_fast_clear_for_image(const struct radv_device *device, 147 const struct radv_image *image) 148{ 149 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) 150 return true; 151 152 return radv_image_use_fast_clear_for_image_early(device, image) && 153 (image->exclusive || 154 /* Enable DCC for concurrent images if stores are 155 * supported because that means we can keep DCC compressed on 156 * all layouts/queues. 157 */ 158 radv_image_use_dcc_image_stores(device, image)); 159} 160 161bool 162radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, 163 VkFormat format, VkImageCreateFlags flags, bool *sign_reinterpret) 164{ 165 bool blendable; 166 167 if (!radv_is_colorbuffer_format_supported(pdev, format, &blendable)) 168 return false; 169 170 if (sign_reinterpret != NULL) 171 *sign_reinterpret = false; 172 173 if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) { 174 const struct VkImageFormatListCreateInfo *format_list = 175 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const( 176 pNext, IMAGE_FORMAT_LIST_CREATE_INFO); 177 178 /* We have to ignore the existence of the list if viewFormatCount = 0 */ 179 if (format_list && format_list->viewFormatCount) { 180 /* compatibility is transitive, so we only need to check 181 * one format with everything else. */ 182 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) { 183 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED) 184 continue; 185 186 if (!radv_dcc_formats_compatible(format, format_list->pViewFormats[i], 187 sign_reinterpret)) 188 return false; 189 } 190 } else { 191 return false; 192 } 193 } 194 195 return true; 196} 197 198static bool 199radv_format_is_atomic_allowed(struct radv_device *device, VkFormat format) 200{ 201 if (format == VK_FORMAT_R32_SFLOAT && !device->image_float32_atomics) 202 return false; 203 204 return radv_is_atomic_format_supported(format); 205} 206 207static bool 208radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format, 209 VkImageCreateFlags flags) 210{ 211 if (radv_format_is_atomic_allowed(device, format)) 212 return true; 213 214 if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) { 215 const struct VkImageFormatListCreateInfo *format_list = 216 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const( 217 pNext, IMAGE_FORMAT_LIST_CREATE_INFO); 218 219 /* We have to ignore the existence of the list if viewFormatCount = 0 */ 220 if (format_list && format_list->viewFormatCount) { 221 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) { 222 if (radv_format_is_atomic_allowed(device, format_list->pViewFormats[i])) 223 return true; 224 } 225 } 226 } 227 228 return false; 229} 230 231static bool 232radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image, 233 const VkImageCreateInfo *pCreateInfo, VkFormat format, 234 bool *sign_reinterpret) 235{ 236 /* DCC (Delta Color Compression) is only available for GFX8+. */ 237 if (device->physical_device->rad_info.chip_class < GFX8) 238 return false; 239 240 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC) 241 return false; 242 243 if (image->shareable && image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) 244 return false; 245 246 /* 247 * TODO: Enable DCC for storage images on GFX9 and earlier. 248 * 249 * Also disable DCC with atomics because even when DCC stores are 250 * supported atomics will always decompress. So if we are 251 * decompressing a lot anyway we might as well not have DCC. 252 */ 253 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) && 254 (device->physical_device->rad_info.chip_class < GFX10 || 255 radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags))) 256 return false; 257 258 /* Do not enable DCC for fragment shading rate attachments. */ 259 if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) 260 return false; 261 262 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) 263 return false; 264 265 if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1) 266 return false; 267 268 if (!radv_image_use_fast_clear_for_image_early(device, image) && 269 image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) 270 return false; 271 272 /* Do not enable DCC for mipmapped arrays because performance is worse. */ 273 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1) 274 return false; 275 276 if (device->physical_device->rad_info.chip_class < GFX10) { 277 /* TODO: Add support for DCC MSAA on GFX8-9. */ 278 if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed) 279 return false; 280 281 /* TODO: Add support for DCC layers/mipmaps on GFX9. */ 282 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) && 283 device->physical_device->rad_info.chip_class == GFX9) 284 return false; 285 } 286 287 return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format, 288 pCreateInfo->flags, sign_reinterpret); 289} 290 291static bool 292radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image) 293{ 294 if (!radv_image_has_dcc(image)) 295 return false; 296 297 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) 298 return true; 299 300 if (!radv_image_use_fast_clear_for_image(device, image)) 301 return false; 302 303 /* TODO: Fix storage images with DCC without DCC image stores. 304 * Disabling it for now. */ 305 if ((image->usage & VK_IMAGE_USAGE_STORAGE_BIT) && !radv_image_use_dcc_image_stores(device, image)) 306 return false; 307 308 return true; 309} 310 311/* 312 * Whether to enable image stores with DCC compression for this image. If 313 * this function returns false the image subresource should be decompressed 314 * before using it with image stores. 315 * 316 * Note that this can have mixed performance implications, see 317 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299 318 * 319 * This function assumes the image uses DCC compression. 320 */ 321bool 322radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image) 323{ 324 return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.chip_class, 325 &image->planes[0].surface); 326} 327 328/* 329 * Whether to use a predicate to determine whether DCC is in a compressed 330 * state. This can be used to avoid decompressing an image multiple times. 331 */ 332bool 333radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image) 334{ 335 return radv_image_has_dcc(image) && !radv_image_use_dcc_image_stores(device, image); 336} 337 338static inline bool 339radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image) 340{ 341 return image->info.samples > 1 && ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) || 342 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)); 343} 344 345static inline bool 346radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image) 347{ 348 /* TODO: 349 * - Investigate about mips+layers. 350 * - Enable on other gens. 351 */ 352 bool use_htile_for_mips = 353 image->info.array_size == 1 && device->physical_device->rad_info.chip_class >= GFX10; 354 355 /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */ 356 if (device->physical_device->rad_info.chip_class == GFX10 && 357 image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->info.levels > 1) 358 return false; 359 360 /* Do not enable HTILE for very small images because it seems less performant but make sure it's 361 * allowed with VRS attachments because we need HTILE. 362 */ 363 if (image->info.width * image->info.height < 8 * 8 && 364 !(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) && 365 !device->attachment_vrs_enabled) 366 return false; 367 368 if (device->instance->disable_htile_layers && image->info.array_size > 1) 369 return false; 370 371 return (image->info.levels == 1 || use_htile_for_mips) && !image->shareable; 372} 373 374static bool 375radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image) 376{ 377 /* TC-compat CMASK is only available for GFX8+. */ 378 if (device->physical_device->rad_info.chip_class < GFX8) 379 return false; 380 381 if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK) 382 return false; 383 384 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) 385 return false; 386 387 /* Do not enable TC-compatible if the image isn't readable by a shader 388 * because no texture fetches will happen. 389 */ 390 if (!(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | 391 VK_IMAGE_USAGE_TRANSFER_SRC_BIT))) 392 return false; 393 394 /* If the image doesn't have FMASK, it can't be fetchable. */ 395 if (!radv_image_has_fmask(image)) 396 return false; 397 398 return true; 399} 400 401static uint32_t 402si_get_bo_metadata_word1(const struct radv_device *device) 403{ 404 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id; 405} 406 407static bool 408radv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md) 409{ 410 if (md->metadata[0] != 1 || md->metadata[1] != si_get_bo_metadata_word1(device)) 411 return false; 412 413 if (md->size_metadata < 40) 414 return false; 415 416 return true; 417} 418 419static void 420radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface, 421 const struct radeon_bo_metadata *md) 422{ 423 surface->flags = RADEON_SURF_CLR(surface->flags, MODE); 424 425 if (device->physical_device->rad_info.chip_class >= GFX9) { 426 if (md->u.gfx9.swizzle_mode > 0) 427 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE); 428 else 429 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE); 430 431 surface->u.gfx9.swizzle_mode = md->u.gfx9.swizzle_mode; 432 } else { 433 surface->u.legacy.pipe_config = md->u.legacy.pipe_config; 434 surface->u.legacy.bankw = md->u.legacy.bankw; 435 surface->u.legacy.bankh = md->u.legacy.bankh; 436 surface->u.legacy.tile_split = md->u.legacy.tile_split; 437 surface->u.legacy.mtilea = md->u.legacy.mtilea; 438 surface->u.legacy.num_banks = md->u.legacy.num_banks; 439 440 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED) 441 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE); 442 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED) 443 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE); 444 else 445 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE); 446 } 447} 448 449static VkResult 450radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image, 451 const struct radv_image_create_info *create_info, 452 struct ac_surf_info *image_info) 453{ 454 unsigned width = image->info.width; 455 unsigned height = image->info.height; 456 457 /* 458 * minigbm sometimes allocates bigger images which is going to result in 459 * weird strides and other properties. Lets be lenient where possible and 460 * fail it on GFX10 (as we cannot cope there). 461 * 462 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/ 463 */ 464 if (create_info->bo_metadata && 465 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) { 466 const struct radeon_bo_metadata *md = create_info->bo_metadata; 467 468 if (device->physical_device->rad_info.chip_class >= GFX10) { 469 width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1; 470 height = G_00A008_HEIGHT(md->metadata[4]) + 1; 471 } else { 472 width = G_008F18_WIDTH(md->metadata[4]) + 1; 473 height = G_008F18_HEIGHT(md->metadata[4]) + 1; 474 } 475 } 476 477 if (image->info.width == width && image->info.height == height) 478 return VK_SUCCESS; 479 480 if (width < image->info.width || height < image->info.height) { 481 fprintf(stderr, 482 "The imported image has smaller dimensions than the internal\n" 483 "dimensions. Using it is going to fail badly, so we reject\n" 484 "this import.\n" 485 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n", 486 image->info.width, image->info.height, width, height); 487 return VK_ERROR_INVALID_EXTERNAL_HANDLE; 488 } else if (device->physical_device->rad_info.chip_class >= GFX10) { 489 fprintf(stderr, 490 "Tried to import an image with inconsistent width on GFX10.\n" 491 "As GFX10 has no separate stride fields we cannot cope with\n" 492 "an inconsistency in width and will fail this import.\n" 493 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n", 494 image->info.width, image->info.height, width, height); 495 return VK_ERROR_INVALID_EXTERNAL_HANDLE; 496 } else { 497 fprintf(stderr, 498 "Tried to import an image with inconsistent width on pre-GFX10.\n" 499 "As GFX10 has no separate stride fields we cannot cope with\n" 500 "an inconsistency and would fail on GFX10.\n" 501 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n", 502 image->info.width, image->info.height, width, height); 503 } 504 image_info->width = width; 505 image_info->height = height; 506 507 return VK_SUCCESS; 508} 509 510static VkResult 511radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image, 512 const struct radv_image_create_info *create_info, 513 struct ac_surf_info *image_info) 514{ 515 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info); 516 if (result != VK_SUCCESS) 517 return result; 518 519 for (unsigned plane = 0; plane < image->plane_count; ++plane) { 520 if (create_info->bo_metadata) { 521 radv_patch_surface_from_metadata(device, &image->planes[plane].surface, 522 create_info->bo_metadata); 523 } 524 525 if (radv_surface_has_scanout(device, create_info)) { 526 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT; 527 if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC) 528 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC; 529 530 image->info.surf_index = NULL; 531 } 532 } 533 return VK_SUCCESS; 534} 535 536static uint64_t 537radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id, 538 const VkImageCreateInfo *pCreateInfo, VkFormat image_format) 539{ 540 uint64_t flags; 541 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format); 542 VkFormat format = vk_format_get_plane_format(image_format, plane_id); 543 const struct util_format_description *desc = vk_format_description(format); 544 bool is_depth, is_stencil; 545 546 is_depth = util_format_has_depth(desc); 547 is_stencil = util_format_has_stencil(desc); 548 549 flags = RADEON_SURF_SET(array_mode, MODE); 550 551 switch (pCreateInfo->imageType) { 552 case VK_IMAGE_TYPE_1D: 553 if (pCreateInfo->arrayLayers > 1) 554 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE); 555 else 556 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE); 557 break; 558 case VK_IMAGE_TYPE_2D: 559 if (pCreateInfo->arrayLayers > 1) 560 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE); 561 else 562 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE); 563 break; 564 case VK_IMAGE_TYPE_3D: 565 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE); 566 break; 567 default: 568 unreachable("unhandled image type"); 569 } 570 571 /* Required for clearing/initializing a specific layer on GFX8. */ 572 flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS; 573 574 if (is_depth) { 575 flags |= RADEON_SURF_ZBUFFER; 576 577 if (radv_use_htile_for_image(device, image) && 578 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) { 579 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format)) 580 flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; 581 } else { 582 flags |= RADEON_SURF_NO_HTILE; 583 } 584 } 585 586 if (is_stencil) 587 flags |= RADEON_SURF_SBUFFER; 588 589 if (device->physical_device->rad_info.chip_class >= GFX9 && 590 pCreateInfo->imageType == VK_IMAGE_TYPE_3D && 591 vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format)) 592 flags |= RADEON_SURF_NO_RENDER_TARGET; 593 594 if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format, 595 &image->dcc_sign_reinterpret)) 596 flags |= RADEON_SURF_DISABLE_DCC; 597 598 if (!radv_use_fmask_for_image(device, image)) 599 flags |= RADEON_SURF_NO_FMASK; 600 601 if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) { 602 flags |= 603 RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC; 604 } 605 606 return flags; 607} 608 609static inline unsigned 610si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil) 611{ 612 if (stencil) 613 return plane->surface.u.legacy.zs.stencil_tiling_index[level]; 614 else 615 return plane->surface.u.legacy.tiling_index[level]; 616} 617 618static unsigned 619radv_map_swizzle(unsigned swizzle) 620{ 621 switch (swizzle) { 622 case PIPE_SWIZZLE_Y: 623 return V_008F0C_SQ_SEL_Y; 624 case PIPE_SWIZZLE_Z: 625 return V_008F0C_SQ_SEL_Z; 626 case PIPE_SWIZZLE_W: 627 return V_008F0C_SQ_SEL_W; 628 case PIPE_SWIZZLE_0: 629 return V_008F0C_SQ_SEL_0; 630 case PIPE_SWIZZLE_1: 631 return V_008F0C_SQ_SEL_1; 632 default: /* PIPE_SWIZZLE_X */ 633 return V_008F0C_SQ_SEL_X; 634 } 635} 636 637static void 638radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping, 639 enum pipe_swizzle swizzle[4]) 640{ 641 if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) { 642 /* 64-bit formats only support storage images and storage images 643 * require identity component mappings. We use 32-bit 644 * instructions to access 64-bit images, so we need a special 645 * case here. 646 * 647 * The zw components are 1,0 so that they can be easily be used 648 * by loads to create the w component, which has to be 0 for 649 * NULL descriptors. 650 */ 651 swizzle[0] = PIPE_SWIZZLE_X; 652 swizzle[1] = PIPE_SWIZZLE_Y; 653 swizzle[2] = PIPE_SWIZZLE_1; 654 swizzle[3] = PIPE_SWIZZLE_0; 655 } else if (!mapping) { 656 for (unsigned i = 0; i < 4; i++) 657 swizzle[i] = desc->swizzle[i]; 658 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 659 const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0, 660 PIPE_SWIZZLE_1}; 661 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle); 662 } else { 663 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle); 664 } 665} 666 667static void 668radv_make_buffer_descriptor(struct radv_device *device, struct radv_buffer *buffer, 669 VkFormat vk_format, unsigned offset, unsigned range, uint32_t *state) 670{ 671 const struct util_format_description *desc; 672 unsigned stride; 673 uint64_t gpu_address = radv_buffer_get_va(buffer->bo); 674 uint64_t va = gpu_address + buffer->offset; 675 unsigned num_format, data_format; 676 int first_non_void; 677 enum pipe_swizzle swizzle[4]; 678 desc = vk_format_description(vk_format); 679 first_non_void = vk_format_get_first_non_void_channel(vk_format); 680 stride = desc->block.bits / 8; 681 682 radv_compose_swizzle(desc, NULL, swizzle); 683 684 va += offset; 685 state[0] = va; 686 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); 687 688 if (device->physical_device->rad_info.chip_class != GFX8 && stride) { 689 range /= stride; 690 } 691 692 state[2] = range; 693 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | 694 S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | 695 S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | 696 S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3])); 697 698 if (device->physical_device->rad_info.chip_class >= GFX10) { 699 const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)]; 700 701 /* OOB_SELECT chooses the out-of-bounds check: 702 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE) 703 * - 1: index >= NUM_RECORDS 704 * - 2: NUM_RECORDS == 0 705 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS 706 * else: swizzle_address >= NUM_RECORDS 707 */ 708 state[3] |= S_008F0C_FORMAT(fmt->img_format) | 709 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) | 710 S_008F0C_RESOURCE_LEVEL(1); 711 } else { 712 num_format = radv_translate_buffer_numformat(desc, first_non_void); 713 data_format = radv_translate_buffer_dataformat(desc, first_non_void); 714 715 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID); 716 assert(num_format != ~0); 717 718 state[3] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format); 719 } 720} 721 722static void 723si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image, 724 const struct legacy_surf_level *base_level_info, unsigned plane_id, 725 unsigned base_level, unsigned first_level, unsigned block_width, 726 bool is_stencil, bool is_storage_image, bool disable_compression, 727 bool enable_write_compression, uint32_t *state) 728{ 729 struct radv_image_plane *plane = &image->planes[plane_id]; 730 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0; 731 uint64_t va = gpu_address; 732 enum chip_class chip_class = device->physical_device->rad_info.chip_class; 733 uint64_t meta_va = 0; 734 if (chip_class >= GFX9) { 735 if (is_stencil) 736 va += plane->surface.u.gfx9.zs.stencil_offset; 737 else 738 va += plane->surface.u.gfx9.surf_offset; 739 } else 740 va += (uint64_t)base_level_info->offset_256B * 256; 741 742 state[0] = va >> 8; 743 if (chip_class >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D) 744 state[0] |= plane->surface.tile_swizzle; 745 state[1] &= C_008F14_BASE_ADDRESS_HI; 746 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); 747 748 if (chip_class >= GFX8) { 749 state[6] &= C_008F28_COMPRESSION_EN; 750 state[7] = 0; 751 if (!disable_compression && radv_dcc_enabled(image, first_level)) { 752 meta_va = gpu_address + plane->surface.meta_offset; 753 if (chip_class <= GFX8) 754 meta_va += plane->surface.u.legacy.color.dcc_level[base_level].dcc_offset; 755 756 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8; 757 dcc_tile_swizzle &= (1 << plane->surface.meta_alignment_log2) - 1; 758 meta_va |= dcc_tile_swizzle; 759 } else if (!disable_compression && radv_image_is_tc_compat_htile(image)) { 760 meta_va = gpu_address + plane->surface.meta_offset; 761 } 762 763 if (meta_va) { 764 state[6] |= S_008F28_COMPRESSION_EN(1); 765 if (chip_class <= GFX9) 766 state[7] = meta_va >> 8; 767 } 768 } 769 770 if (chip_class >= GFX10) { 771 state[3] &= C_00A00C_SW_MODE; 772 773 if (is_stencil) { 774 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode); 775 } else { 776 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.swizzle_mode); 777 } 778 779 state[6] &= C_00A018_META_DATA_ADDRESS_LO & C_00A018_META_PIPE_ALIGNED; 780 781 if (meta_va) { 782 struct gfx9_surf_meta_flags meta = { 783 .rb_aligned = 1, 784 .pipe_aligned = 1, 785 }; 786 787 if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER)) 788 meta = plane->surface.u.gfx9.color.dcc; 789 790 if (radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression) 791 state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1); 792 793 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) | 794 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8); 795 } 796 797 state[7] = meta_va >> 16; 798 } else if (chip_class == GFX9) { 799 state[3] &= C_008F1C_SW_MODE; 800 state[4] &= C_008F20_PITCH; 801 802 if (is_stencil) { 803 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode); 804 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.zs.stencil_epitch); 805 } else { 806 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.swizzle_mode); 807 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.epitch); 808 } 809 810 state[5] &= 811 C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED; 812 if (meta_va) { 813 struct gfx9_surf_meta_flags meta = { 814 .rb_aligned = 1, 815 .pipe_aligned = 1, 816 }; 817 818 if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER)) 819 meta = plane->surface.u.gfx9.color.dcc; 820 821 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) | 822 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) | 823 S_008F24_META_RB_ALIGNED(meta.rb_aligned); 824 } 825 } else { 826 /* GFX6-GFX8 */ 827 unsigned pitch = base_level_info->nblk_x * block_width; 828 unsigned index = si_tile_mode_index(plane, base_level, is_stencil); 829 830 state[3] &= C_008F1C_TILING_INDEX; 831 state[3] |= S_008F1C_TILING_INDEX(index); 832 state[4] &= C_008F20_PITCH; 833 state[4] |= S_008F20_PITCH(pitch - 1); 834 } 835} 836 837static unsigned 838radv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers, 839 unsigned nr_samples, bool is_storage_image, bool gfx9) 840{ 841 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) 842 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE; 843 844 /* GFX9 allocates 1D textures as 2D. */ 845 if (gfx9 && image_type == VK_IMAGE_TYPE_1D) 846 image_type = VK_IMAGE_TYPE_2D; 847 switch (image_type) { 848 case VK_IMAGE_TYPE_1D: 849 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D; 850 case VK_IMAGE_TYPE_2D: 851 if (nr_samples > 1) 852 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA; 853 else 854 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D; 855 case VK_IMAGE_TYPE_3D: 856 if (view_type == VK_IMAGE_VIEW_TYPE_3D) 857 return V_008F1C_SQ_RSRC_IMG_3D; 858 else 859 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 860 default: 861 unreachable("illegal image type"); 862 } 863} 864 865static unsigned 866gfx9_border_color_swizzle(const struct util_format_description *desc) 867{ 868 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 869 870 if (desc->swizzle[3] == PIPE_SWIZZLE_X) { 871 /* For the pre-defined border color values (white, opaque 872 * black, transparent black), the only thing that matters is 873 * that the alpha channel winds up in the correct place 874 * (because the RGB channels are all the same) so either of 875 * these enumerations will work. 876 */ 877 if (desc->swizzle[2] == PIPE_SWIZZLE_Y) 878 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX; 879 else 880 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ; 881 } else if (desc->swizzle[0] == PIPE_SWIZZLE_X) { 882 if (desc->swizzle[1] == PIPE_SWIZZLE_Y) 883 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 884 else 885 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ; 886 } else if (desc->swizzle[1] == PIPE_SWIZZLE_X) { 887 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ; 888 } else if (desc->swizzle[2] == PIPE_SWIZZLE_X) { 889 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW; 890 } 891 892 return bc_swizzle; 893} 894 895bool 896vi_alpha_is_on_msb(struct radv_device *device, VkFormat format) 897{ 898 const struct util_format_description *desc = vk_format_description(format); 899 900 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1) 901 return desc->swizzle[3] == PIPE_SWIZZLE_X; 902 903 return radv_translate_colorswap(format, false) <= 1; 904} 905/** 906 * Build the sampler view descriptor for a texture (GFX10). 907 */ 908static void 909gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image, 910 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format, 911 const VkComponentMapping *mapping, unsigned first_level, 912 unsigned last_level, unsigned first_layer, unsigned last_layer, 913 unsigned width, unsigned height, unsigned depth, uint32_t *state, 914 uint32_t *fmask_state) 915{ 916 const struct util_format_description *desc; 917 enum pipe_swizzle swizzle[4]; 918 unsigned img_format; 919 unsigned type; 920 921 desc = vk_format_description(vk_format); 922 img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format; 923 924 radv_compose_swizzle(desc, mapping, swizzle); 925 926 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples, 927 is_storage_image, device->physical_device->rad_info.chip_class == GFX9); 928 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 929 height = 1; 930 depth = image->info.array_size; 931 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 932 if (view_type != VK_IMAGE_VIEW_TYPE_3D) 933 depth = image->info.array_size; 934 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 935 depth = image->info.array_size / 6; 936 937 state[0] = 0; 938 state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1); 939 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | 940 S_00A008_RESOURCE_LEVEL(1); 941 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | 942 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | 943 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | 944 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) | 945 S_00A00C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) | 946 S_00A00C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples) 947 : last_level) | 948 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc)) | S_00A00C_TYPE(type); 949 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need 950 * to know the total number of layers. 951 */ 952 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) | 953 S_00A010_BASE_ARRAY(first_layer); 954 state[5] = S_00A014_ARRAY_PITCH(0) | 955 S_00A014_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples) 956 : image->info.levels - 1) | 957 S_00A014_PERF_MOD(4); 958 state[6] = 0; 959 state[7] = 0; 960 961 if (radv_dcc_enabled(image, first_level)) { 962 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | 963 S_00A018_MAX_COMPRESSED_BLOCK_SIZE( 964 image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size) | 965 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format)); 966 } 967 968 if (radv_image_get_iterate256(device, image)) { 969 state[6] |= S_00A018_ITERATE_256(1); 970 } 971 972 /* Initialize the sampler view for FMASK. */ 973 if (fmask_state) { 974 if (radv_image_has_fmask(image)) { 975 uint64_t gpu_address = radv_buffer_get_va(image->bo); 976 uint32_t format; 977 uint64_t va; 978 979 assert(image->plane_count == 1); 980 981 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset; 982 983 switch (image->info.samples) { 984 case 2: 985 format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2; 986 break; 987 case 4: 988 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4; 989 break; 990 case 8: 991 format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8; 992 break; 993 default: 994 unreachable("invalid nr_samples"); 995 } 996 997 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle; 998 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) | 999 S_00A004_WIDTH_LO(width - 1); 1000 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | 1001 S_00A008_RESOURCE_LEVEL(1); 1002 fmask_state[3] = 1003 S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 1004 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 1005 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode) | 1006 S_00A00C_TYPE( 1007 radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false)); 1008 fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer); 1009 fmask_state[5] = 0; 1010 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1); 1011 fmask_state[7] = 0; 1012 1013 if (radv_image_is_tc_compat_cmask(image)) { 1014 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset; 1015 1016 fmask_state[6] |= S_00A018_COMPRESSION_EN(1); 1017 fmask_state[6] |= S_00A018_META_DATA_ADDRESS_LO(va >> 8); 1018 fmask_state[7] |= va >> 16; 1019 } 1020 } else 1021 memset(fmask_state, 0, 8 * 4); 1022 } 1023} 1024 1025/** 1026 * Build the sampler view descriptor for a texture (SI-GFX9) 1027 */ 1028static void 1029si_make_texture_descriptor(struct radv_device *device, struct radv_image *image, 1030 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format, 1031 const VkComponentMapping *mapping, unsigned first_level, 1032 unsigned last_level, unsigned first_layer, unsigned last_layer, 1033 unsigned width, unsigned height, unsigned depth, uint32_t *state, 1034 uint32_t *fmask_state) 1035{ 1036 const struct util_format_description *desc; 1037 enum pipe_swizzle swizzle[4]; 1038 int first_non_void; 1039 unsigned num_format, data_format, type; 1040 1041 desc = vk_format_description(vk_format); 1042 1043 radv_compose_swizzle(desc, mapping, swizzle); 1044 1045 first_non_void = vk_format_get_first_non_void_channel(vk_format); 1046 1047 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void); 1048 if (num_format == ~0) { 1049 num_format = 0; 1050 } 1051 1052 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void); 1053 if (data_format == ~0) { 1054 data_format = 0; 1055 } 1056 1057 /* S8 with either Z16 or Z32 HTILE need a special format. */ 1058 if (device->physical_device->rad_info.chip_class == GFX9 && vk_format == VK_FORMAT_S8_UINT && 1059 radv_image_is_tc_compat_htile(image)) { 1060 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) 1061 data_format = V_008F14_IMG_DATA_FORMAT_S8_32; 1062 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT) 1063 data_format = V_008F14_IMG_DATA_FORMAT_S8_16; 1064 } 1065 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples, 1066 is_storage_image, device->physical_device->rad_info.chip_class == GFX9); 1067 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 1068 height = 1; 1069 depth = image->info.array_size; 1070 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 1071 if (view_type != VK_IMAGE_VIEW_TYPE_3D) 1072 depth = image->info.array_size; 1073 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 1074 depth = image->info.array_size / 6; 1075 1076 state[0] = 0; 1077 state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format)); 1078 state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4)); 1079 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | 1080 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | 1081 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | 1082 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) | 1083 S_008F1C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) | 1084 S_008F1C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples) 1085 : last_level) | 1086 S_008F1C_TYPE(type)); 1087 state[4] = 0; 1088 state[5] = S_008F24_BASE_ARRAY(first_layer); 1089 state[6] = 0; 1090 state[7] = 0; 1091 1092 if (device->physical_device->rad_info.chip_class == GFX9) { 1093 unsigned bc_swizzle = gfx9_border_color_swizzle(desc); 1094 1095 /* Depth is the last accessible layer on Gfx9. 1096 * The hw doesn't need to know the total number of layers. 1097 */ 1098 if (type == V_008F1C_SQ_RSRC_IMG_3D) 1099 state[4] |= S_008F20_DEPTH(depth - 1); 1100 else 1101 state[4] |= S_008F20_DEPTH(last_layer); 1102 1103 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle); 1104 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples) 1105 : image->info.levels - 1); 1106 } else { 1107 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1); 1108 state[4] |= S_008F20_DEPTH(depth - 1); 1109 state[5] |= S_008F24_LAST_ARRAY(last_layer); 1110 } 1111 if (!(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) && 1112 image->planes[0].surface.meta_offset) { 1113 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format)); 1114 } else { 1115 /* The last dword is unused by hw. The shader uses it to clear 1116 * bits in the first dword of sampler state. 1117 */ 1118 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) { 1119 if (first_level == last_level) 1120 state[7] = C_008F30_MAX_ANISO_RATIO; 1121 else 1122 state[7] = 0xffffffff; 1123 } 1124 } 1125 1126 /* Initialize the sampler view for FMASK. */ 1127 if (fmask_state) { 1128 if (radv_image_has_fmask(image)) { 1129 uint32_t fmask_format; 1130 uint64_t gpu_address = radv_buffer_get_va(image->bo); 1131 uint64_t va; 1132 1133 assert(image->plane_count == 1); 1134 1135 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset; 1136 1137 if (device->physical_device->rad_info.chip_class == GFX9) { 1138 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK; 1139 switch (image->info.samples) { 1140 case 2: 1141 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2; 1142 break; 1143 case 4: 1144 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4; 1145 break; 1146 case 8: 1147 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8; 1148 break; 1149 default: 1150 unreachable("invalid nr_samples"); 1151 } 1152 } else { 1153 switch (image->info.samples) { 1154 case 2: 1155 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 1156 break; 1157 case 4: 1158 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 1159 break; 1160 case 8: 1161 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 1162 break; 1163 default: 1164 assert(0); 1165 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 1166 } 1167 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 1168 } 1169 1170 fmask_state[0] = va >> 8; 1171 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle; 1172 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) | 1173 S_008F14_NUM_FORMAT(num_format); 1174 fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1); 1175 fmask_state[3] = 1176 S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 1177 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 1178 S_008F1C_TYPE( 1179 radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false)); 1180 fmask_state[4] = 0; 1181 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer); 1182 fmask_state[6] = 0; 1183 fmask_state[7] = 0; 1184 1185 if (device->physical_device->rad_info.chip_class == GFX9) { 1186 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode); 1187 fmask_state[4] |= S_008F20_DEPTH(last_layer) | 1188 S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch); 1189 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | S_008F24_META_RB_ALIGNED(1); 1190 1191 if (radv_image_is_tc_compat_cmask(image)) { 1192 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset; 1193 1194 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40); 1195 fmask_state[6] |= S_008F28_COMPRESSION_EN(1); 1196 fmask_state[7] |= va >> 8; 1197 } 1198 } else { 1199 fmask_state[3] |= 1200 S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.color.fmask.tiling_index); 1201 fmask_state[4] |= 1202 S_008F20_DEPTH(depth - 1) | 1203 S_008F20_PITCH(image->planes[0].surface.u.legacy.color.fmask.pitch_in_pixels - 1); 1204 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer); 1205 1206 if (radv_image_is_tc_compat_cmask(image)) { 1207 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset; 1208 1209 fmask_state[6] |= S_008F28_COMPRESSION_EN(1); 1210 fmask_state[7] |= va >> 8; 1211 } 1212 } 1213 } else 1214 memset(fmask_state, 0, 8 * 4); 1215 } 1216} 1217 1218static void 1219radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image, 1220 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format, 1221 const VkComponentMapping *mapping, unsigned first_level, 1222 unsigned last_level, unsigned first_layer, unsigned last_layer, 1223 unsigned width, unsigned height, unsigned depth, uint32_t *state, 1224 uint32_t *fmask_state) 1225{ 1226 if (device->physical_device->rad_info.chip_class >= GFX10) { 1227 gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, 1228 first_level, last_level, first_layer, last_layer, width, height, 1229 depth, state, fmask_state); 1230 } else { 1231 si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, 1232 first_level, last_level, first_layer, last_layer, width, height, 1233 depth, state, fmask_state); 1234 } 1235} 1236 1237static void 1238radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, 1239 struct radeon_bo_metadata *md) 1240{ 1241 static const VkComponentMapping fixedmapping; 1242 uint32_t desc[8]; 1243 1244 assert(image->plane_count == 1); 1245 1246 radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->type, 1247 image->vk_format, &fixedmapping, 0, image->info.levels - 1, 0, 1248 image->info.array_size - 1, image->info.width, image->info.height, 1249 image->info.depth, desc, NULL); 1250 1251 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 1252 0, image->planes[0].surface.blk_w, false, false, false, false, 1253 desc); 1254 1255 ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface, 1256 image->info.levels, desc, &md->size_metadata, md->metadata); 1257} 1258 1259void 1260radv_init_metadata(struct radv_device *device, struct radv_image *image, 1261 struct radeon_bo_metadata *metadata) 1262{ 1263 struct radeon_surf *surface = &image->planes[0].surface; 1264 1265 memset(metadata, 0, sizeof(*metadata)); 1266 1267 if (device->physical_device->rad_info.chip_class >= GFX9) { 1268 uint64_t dcc_offset = 1269 image->offset + 1270 (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset); 1271 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode; 1272 metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8; 1273 metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max; 1274 metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks; 1275 metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks; 1276 metadata->u.gfx9.dcc_max_compressed_block_size = 1277 surface->u.gfx9.color.dcc.max_compressed_block_size; 1278 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; 1279 } else { 1280 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D 1281 ? RADEON_LAYOUT_TILED 1282 : RADEON_LAYOUT_LINEAR; 1283 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D 1284 ? RADEON_LAYOUT_TILED 1285 : RADEON_LAYOUT_LINEAR; 1286 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config; 1287 metadata->u.legacy.bankw = surface->u.legacy.bankw; 1288 metadata->u.legacy.bankh = surface->u.legacy.bankh; 1289 metadata->u.legacy.tile_split = surface->u.legacy.tile_split; 1290 metadata->u.legacy.mtilea = surface->u.legacy.mtilea; 1291 metadata->u.legacy.num_banks = surface->u.legacy.num_banks; 1292 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe; 1293 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; 1294 } 1295 radv_query_opaque_metadata(device, image, metadata); 1296} 1297 1298void 1299radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, 1300 uint64_t offset, uint32_t stride) 1301{ 1302 ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface, 1303 image->info.levels, offset, stride); 1304} 1305 1306static void 1307radv_image_alloc_single_sample_cmask(const struct radv_device *device, 1308 const struct radv_image *image, struct radeon_surf *surf) 1309{ 1310 if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->info.levels > 1 || 1311 image->info.depth > 1 || radv_image_has_dcc(image) || 1312 !radv_image_use_fast_clear_for_image(device, image) || 1313 (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)) 1314 return; 1315 1316 assert(image->info.storage_samples == 1); 1317 1318 surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2); 1319 surf->total_size = surf->cmask_offset + surf->cmask_size; 1320 surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2); 1321} 1322 1323static void 1324radv_image_alloc_values(const struct radv_device *device, struct radv_image *image) 1325{ 1326 /* images with modifiers can be potentially imported */ 1327 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) 1328 return; 1329 1330 if (radv_image_has_cmask(image) || (radv_image_has_dcc(image) && !image->support_comp_to_single)) { 1331 image->fce_pred_offset = image->size; 1332 image->size += 8 * image->info.levels; 1333 } 1334 1335 if (radv_image_use_dcc_predication(device, image)) { 1336 image->dcc_pred_offset = image->size; 1337 image->size += 8 * image->info.levels; 1338 } 1339 1340 if ((radv_image_has_dcc(image) && !image->support_comp_to_single) || 1341 radv_image_has_cmask(image) || radv_image_has_htile(image)) { 1342 image->clear_value_offset = image->size; 1343 image->size += 8 * image->info.levels; 1344 } 1345 1346 if (radv_image_is_tc_compat_htile(image) && 1347 device->physical_device->rad_info.has_tc_compat_zrange_bug) { 1348 /* Metadata for the TC-compatible HTILE hardware bug which 1349 * have to be fixed by updating ZRANGE_PRECISION when doing 1350 * fast depth clears to 0.0f. 1351 */ 1352 image->tc_compat_zrange_offset = image->size; 1353 image->size += image->info.levels * 4; 1354 } 1355} 1356 1357/* Determine if the image is affected by the pipe misaligned metadata issue 1358 * which requires to invalidate L2. 1359 */ 1360static bool 1361radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image) 1362{ 1363 struct radeon_info *rad_info = &device->physical_device->rad_info; 1364 int log2_samples = util_logbase2(image->info.samples); 1365 1366 assert(rad_info->chip_class >= GFX10); 1367 1368 for (unsigned i = 0; i < image->plane_count; ++i) { 1369 VkFormat fmt = vk_format_get_plane_format(image->vk_format, i); 1370 int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt)); 1371 int log2_bpp_and_samples; 1372 1373 if (rad_info->chip_class >= GFX10_3) { 1374 log2_bpp_and_samples = log2_bpp + log2_samples; 1375 } else { 1376 if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) { 1377 log2_bpp = 2; 1378 } 1379 1380 log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples); 1381 } 1382 1383 int num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config); 1384 int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8); 1385 1386 if (vk_format_has_depth(image->vk_format)) { 1387 if (radv_image_is_tc_compat_htile(image) && overlap) { 1388 return true; 1389 } 1390 } else { 1391 int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config); 1392 int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags); 1393 int samples_overlap = MIN2(log2_samples, overlap); 1394 1395 /* TODO: It shouldn't be necessary if the image has DCC but 1396 * not readable by shader. 1397 */ 1398 if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) && 1399 (samples_overlap > log2_samples_frag_diff)) { 1400 return true; 1401 } 1402 } 1403 } 1404 1405 return false; 1406} 1407 1408static bool 1409radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image) 1410{ 1411 if (device->physical_device->rad_info.chip_class >= GFX10) { 1412 return !device->physical_device->rad_info.tcc_rb_non_coherent && 1413 !radv_image_is_pipe_misaligned(device, image); 1414 } else if (device->physical_device->rad_info.chip_class == GFX9) { 1415 if (image->info.samples == 1 && 1416 (image->usage & 1417 (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) && 1418 !vk_format_has_stencil(image->vk_format)) { 1419 /* Single-sample color and single-sample depth 1420 * (not stencil) are coherent with shaders on 1421 * GFX9. 1422 */ 1423 return true; 1424 } 1425 } 1426 1427 return false; 1428} 1429 1430/** 1431 * Determine if the given image can be fast cleared. 1432 */ 1433static bool 1434radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image) 1435{ 1436 if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS) 1437 return false; 1438 1439 if (vk_format_is_color(image->vk_format)) { 1440 if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image)) 1441 return false; 1442 1443 /* RB+ doesn't work with CMASK fast clear on Stoney. */ 1444 if (!radv_image_has_dcc(image) && device->physical_device->rad_info.family == CHIP_STONEY) 1445 return false; 1446 } else { 1447 if (!radv_image_has_htile(image)) 1448 return false; 1449 } 1450 1451 /* Do not fast clears 3D images. */ 1452 if (image->type == VK_IMAGE_TYPE_3D) 1453 return false; 1454 1455 return true; 1456} 1457 1458/** 1459 * Determine if the given image can be fast cleared using comp-to-single. 1460 */ 1461static bool 1462radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image) 1463{ 1464 /* comp-to-single is only available for GFX10+. */ 1465 if (device->physical_device->rad_info.chip_class < GFX10) 1466 return false; 1467 1468 /* If the image can't be fast cleared, comp-to-single can't be used. */ 1469 if (!radv_image_can_fast_clear(device, image)) 1470 return false; 1471 1472 /* If the image doesn't have DCC, it can't be fast cleared using comp-to-single */ 1473 if (!radv_image_has_dcc(image)) 1474 return false; 1475 1476 /* It seems 8bpp and 16bpp require RB+ to work. */ 1477 unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk_format); 1478 if (bytes_per_pixel <= 2 && !device->physical_device->rad_info.rbplus_allowed) 1479 return false; 1480 1481 return true; 1482} 1483 1484static void 1485radv_image_reset_layout(struct radv_image *image) 1486{ 1487 image->size = 0; 1488 image->alignment = 1; 1489 1490 image->tc_compatible_cmask = 0; 1491 image->fce_pred_offset = image->dcc_pred_offset = 0; 1492 image->clear_value_offset = image->tc_compat_zrange_offset = 0; 1493 1494 for (unsigned i = 0; i < image->plane_count; ++i) { 1495 VkFormat format = vk_format_get_plane_format(image->vk_format, i); 1496 if (vk_format_has_depth(format)) 1497 format = vk_format_depth_only(format); 1498 1499 uint64_t flags = image->planes[i].surface.flags; 1500 uint64_t modifier = image->planes[i].surface.modifier; 1501 memset(image->planes + i, 0, sizeof(image->planes[i])); 1502 1503 image->planes[i].surface.flags = flags; 1504 image->planes[i].surface.modifier = modifier; 1505 image->planes[i].surface.blk_w = vk_format_get_blockwidth(format); 1506 image->planes[i].surface.blk_h = vk_format_get_blockheight(format); 1507 image->planes[i].surface.bpe = vk_format_get_blocksize(format); 1508 1509 /* align byte per element on dword */ 1510 if (image->planes[i].surface.bpe == 3) { 1511 image->planes[i].surface.bpe = 4; 1512 } 1513 } 1514} 1515 1516VkResult 1517radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info, 1518 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info, 1519 struct radv_image *image) 1520{ 1521 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the 1522 * common internal case. */ 1523 create_info.vk_info = NULL; 1524 1525 struct ac_surf_info image_info = image->info; 1526 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info); 1527 if (result != VK_SUCCESS) 1528 return result; 1529 1530 assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count); 1531 1532 radv_image_reset_layout(image); 1533 1534 for (unsigned plane = 0; plane < image->plane_count; ++plane) { 1535 struct ac_surf_info info = image_info; 1536 uint64_t offset; 1537 unsigned stride; 1538 1539 info.width = vk_format_get_plane_width(image->vk_format, plane, info.width); 1540 info.height = vk_format_get_plane_height(image->vk_format, plane, info.height); 1541 1542 if (create_info.no_metadata_planes || image->plane_count > 1) { 1543 image->planes[plane].surface.flags |= 1544 RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE; 1545 } 1546 1547 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface); 1548 1549 if (plane == 0) { 1550 if (!radv_use_dcc_for_image_late(device, image)) 1551 ac_surface_zero_dcc_fields(&image->planes[0].surface); 1552 } 1553 1554 if (create_info.bo_metadata && !mod_info && 1555 !ac_surface_set_umd_metadata(&device->physical_device->rad_info, 1556 &image->planes[plane].surface, image_info.storage_samples, 1557 image_info.levels, create_info.bo_metadata->size_metadata, 1558 create_info.bo_metadata->metadata)) 1559 return VK_ERROR_INVALID_EXTERNAL_HANDLE; 1560 1561 if (!create_info.no_metadata_planes && !create_info.bo_metadata && image->plane_count == 1 && 1562 !mod_info) 1563 radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface); 1564 1565 if (mod_info) { 1566 if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe || 1567 !mod_info->pPlaneLayouts[plane].rowPitch) 1568 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; 1569 1570 offset = mod_info->pPlaneLayouts[plane].offset; 1571 stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe; 1572 } else { 1573 offset = align64(image->size, 1 << image->planes[plane].surface.alignment_log2); 1574 stride = 0; /* 0 means no override */ 1575 } 1576 1577 if (!ac_surface_override_offset_stride(&device->physical_device->rad_info, 1578 &image->planes[plane].surface, image->info.levels, 1579 offset, stride)) 1580 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; 1581 1582 /* Validate DCC offsets in modifier layout. */ 1583 if (image->plane_count == 1 && mod_info) { 1584 unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface); 1585 if (mod_info->drmFormatModifierPlaneCount != mem_planes) 1586 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; 1587 1588 for (unsigned i = 1; i < mem_planes; ++i) { 1589 if (ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class, 1590 &image->planes[plane].surface, i, 1591 0) != mod_info->pPlaneLayouts[i].offset) 1592 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; 1593 } 1594 } 1595 1596 image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size); 1597 image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2); 1598 1599 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane); 1600 } 1601 1602 image->tc_compatible_cmask = 1603 radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image); 1604 1605 image->l2_coherent = radv_image_is_l2_coherent(device, image); 1606 1607 image->support_comp_to_single = radv_image_use_comp_to_single(device, image); 1608 1609 radv_image_alloc_values(device, image); 1610 1611 assert(image->planes[0].surface.surf_size); 1612 assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID || 1613 ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image)); 1614 return VK_SUCCESS; 1615} 1616 1617static void 1618radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator, 1619 struct radv_image *image) 1620{ 1621 if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo) 1622 device->ws->buffer_destroy(device->ws, image->bo); 1623 1624 if (image->owned_memory != VK_NULL_HANDLE) { 1625 RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory); 1626 radv_free_memory(device, pAllocator, mem); 1627 } 1628 1629 vk_object_base_finish(&image->base); 1630 vk_free2(&device->vk.alloc, pAllocator, image); 1631} 1632 1633static void 1634radv_image_print_info(struct radv_device *device, struct radv_image *image) 1635{ 1636 fprintf(stderr, "Image:\n"); 1637 fprintf(stderr, 1638 " Info: size=%" PRIu64 ", alignment=%" PRIu32 ", " 1639 "width=%" PRIu32 ", height=%" PRIu32 ", " 1640 "offset=%" PRIu64 ", array_size=%" PRIu32 "\n", 1641 image->size, image->alignment, image->info.width, image->info.height, image->offset, 1642 image->info.array_size); 1643 for (unsigned i = 0; i < image->plane_count; ++i) { 1644 const struct radv_image_plane *plane = &image->planes[i]; 1645 const struct radeon_surf *surf = &plane->surface; 1646 const struct util_format_description *desc = vk_format_description(plane->format); 1647 uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class, 1648 &plane->surface, 0, 0); 1649 1650 fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset); 1651 1652 ac_surface_print_info(stderr, &device->physical_device->rad_info, surf); 1653 } 1654} 1655 1656static uint64_t 1657radv_select_modifier(const struct radv_device *dev, VkFormat format, 1658 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list) 1659{ 1660 const struct radv_physical_device *pdev = dev->physical_device; 1661 unsigned mod_count; 1662 1663 assert(mod_list->drmFormatModifierCount); 1664 1665 /* We can allow everything here as it does not affect order and the application 1666 * is only allowed to specify modifiers that we support. */ 1667 const struct ac_modifier_options modifier_options = { 1668 .dcc = true, 1669 .dcc_retile = true, 1670 }; 1671 1672 ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format), 1673 &mod_count, NULL); 1674 1675 uint64_t *mods = calloc(mod_count, sizeof(*mods)); 1676 1677 /* If allocations fail, fall back to a dumber solution. */ 1678 if (!mods) 1679 return mod_list->pDrmFormatModifiers[0]; 1680 1681 ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format), 1682 &mod_count, mods); 1683 1684 for (unsigned i = 0; i < mod_count; ++i) { 1685 for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) { 1686 if (mods[i] == mod_list->pDrmFormatModifiers[j]) { 1687 free(mods); 1688 return mod_list->pDrmFormatModifiers[j]; 1689 } 1690 } 1691 } 1692 unreachable("App specified an invalid modifier"); 1693} 1694 1695VkResult 1696radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info, 1697 const VkAllocationCallbacks *alloc, VkImage *pImage) 1698{ 1699 RADV_FROM_HANDLE(radv_device, device, _device); 1700 const VkImageCreateInfo *pCreateInfo = create_info->vk_info; 1701 uint64_t modifier = DRM_FORMAT_MOD_INVALID; 1702 struct radv_image *image = NULL; 1703 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format); 1704 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list = 1705 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); 1706 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod = 1707 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT); 1708 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); 1709 1710 const unsigned plane_count = vk_format_get_plane_count(format); 1711 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count; 1712 1713 radv_assert(pCreateInfo->mipLevels > 0); 1714 radv_assert(pCreateInfo->arrayLayers > 0); 1715 radv_assert(pCreateInfo->samples > 0); 1716 radv_assert(pCreateInfo->extent.width > 0); 1717 radv_assert(pCreateInfo->extent.height > 0); 1718 radv_assert(pCreateInfo->extent.depth > 0); 1719 1720 image = 1721 vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1722 if (!image) 1723 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1724 1725 vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE); 1726 1727 image->type = pCreateInfo->imageType; 1728 image->info.width = pCreateInfo->extent.width; 1729 image->info.height = pCreateInfo->extent.height; 1730 image->info.depth = pCreateInfo->extent.depth; 1731 image->info.samples = pCreateInfo->samples; 1732 image->info.storage_samples = pCreateInfo->samples; 1733 image->info.array_size = pCreateInfo->arrayLayers; 1734 image->info.levels = pCreateInfo->mipLevels; 1735 image->info.num_channels = vk_format_get_nr_components(format); 1736 1737 image->vk_format = format; 1738 image->tiling = pCreateInfo->tiling; 1739 image->usage = pCreateInfo->usage; 1740 image->flags = pCreateInfo->flags; 1741 image->plane_count = plane_count; 1742 1743 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE; 1744 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) { 1745 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i) 1746 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL || 1747 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT) 1748 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u; 1749 else 1750 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i]; 1751 } 1752 1753 const VkExternalMemoryImageCreateInfo *external_info = 1754 vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO); 1755 1756 image->shareable = external_info; 1757 if (!vk_format_is_depth_or_stencil(format) && !image->shareable && 1758 !(image->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) && 1759 pCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { 1760 image->info.surf_index = &device->image_mrt_offset_counter; 1761 } 1762 1763 if (mod_list) 1764 modifier = radv_select_modifier(device, format, mod_list); 1765 else if (explicit_mod) 1766 modifier = explicit_mod->drmFormatModifier; 1767 1768 for (unsigned plane = 0; plane < image->plane_count; ++plane) { 1769 image->planes[plane].surface.flags = 1770 radv_get_surface_flags(device, image, plane, pCreateInfo, format); 1771 image->planes[plane].surface.modifier = modifier; 1772 } 1773 1774 bool delay_layout = 1775 external_info && (external_info->handleTypes & 1776 VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID); 1777 1778 if (delay_layout) { 1779 *pImage = radv_image_to_handle(image); 1780 assert(!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)); 1781 return VK_SUCCESS; 1782 } 1783 1784 VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, image); 1785 if (result != VK_SUCCESS) { 1786 radv_destroy_image(device, alloc, image); 1787 return result; 1788 } 1789 1790 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) { 1791 image->alignment = MAX2(image->alignment, 4096); 1792 image->size = align64(image->size, image->alignment); 1793 image->offset = 0; 1794 1795 result = 1796 device->ws->buffer_create(device->ws, image->size, image->alignment, 0, 1797 RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, &image->bo); 1798 if (result != VK_SUCCESS) { 1799 radv_destroy_image(device, alloc, image); 1800 return vk_error(device, result); 1801 } 1802 } 1803 1804 if (device->instance->debug_flags & RADV_DEBUG_IMG) { 1805 radv_image_print_info(device, image); 1806 } 1807 1808 *pImage = radv_image_to_handle(image); 1809 1810 return VK_SUCCESS; 1811} 1812 1813static void 1814radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device, 1815 VkFormat vk_format, const VkComponentMapping *components, 1816 bool is_storage_image, bool disable_compression, 1817 bool enable_compression, unsigned plane_id, 1818 unsigned descriptor_plane_id) 1819{ 1820 struct radv_image *image = iview->image; 1821 struct radv_image_plane *plane = &image->planes[plane_id]; 1822 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT; 1823 uint32_t blk_w; 1824 union radv_descriptor *descriptor; 1825 uint32_t hw_level = 0; 1826 1827 if (is_storage_image) { 1828 descriptor = &iview->storage_descriptor; 1829 } else { 1830 descriptor = &iview->descriptor; 1831 } 1832 1833 assert(vk_format_get_plane_count(vk_format) == 1); 1834 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0); 1835 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * 1836 vk_format_get_blockwidth(vk_format); 1837 1838 if (device->physical_device->rad_info.chip_class >= GFX9) 1839 hw_level = iview->base_mip; 1840 radv_make_texture_descriptor( 1841 device, image, is_storage_image, iview->type, vk_format, components, hw_level, 1842 hw_level + iview->level_count - 1, iview->base_layer, 1843 iview->base_layer + iview->layer_count - 1, 1844 vk_format_get_plane_width(image->vk_format, plane_id, iview->extent.width), 1845 vk_format_get_plane_height(image->vk_format, plane_id, iview->extent.height), 1846 iview->extent.depth, descriptor->plane_descriptors[descriptor_plane_id], 1847 descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor); 1848 1849 const struct legacy_surf_level *base_level_info = NULL; 1850 if (device->physical_device->rad_info.chip_class <= GFX9) { 1851 if (is_stencil) 1852 base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->base_mip]; 1853 else 1854 base_level_info = &plane->surface.u.legacy.level[iview->base_mip]; 1855 } 1856 1857 bool enable_write_compression = radv_image_use_dcc_image_stores(device, image); 1858 if (is_storage_image && !(enable_write_compression || enable_compression)) 1859 disable_compression = true; 1860 si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->base_mip, 1861 iview->base_mip, blk_w, is_stencil, is_storage_image, 1862 disable_compression, enable_write_compression, 1863 descriptor->plane_descriptors[descriptor_plane_id]); 1864} 1865 1866static unsigned 1867radv_plane_from_aspect(VkImageAspectFlags mask) 1868{ 1869 switch (mask) { 1870 case VK_IMAGE_ASPECT_PLANE_1_BIT: 1871 case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT: 1872 return 1; 1873 case VK_IMAGE_ASPECT_PLANE_2_BIT: 1874 case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT: 1875 return 2; 1876 case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT: 1877 return 3; 1878 default: 1879 return 0; 1880 } 1881} 1882 1883VkFormat 1884radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask) 1885{ 1886 switch (mask) { 1887 case VK_IMAGE_ASPECT_PLANE_0_BIT: 1888 return image->planes[0].format; 1889 case VK_IMAGE_ASPECT_PLANE_1_BIT: 1890 return image->planes[1].format; 1891 case VK_IMAGE_ASPECT_PLANE_2_BIT: 1892 return image->planes[2].format; 1893 case VK_IMAGE_ASPECT_STENCIL_BIT: 1894 return vk_format_stencil_only(image->vk_format); 1895 case VK_IMAGE_ASPECT_DEPTH_BIT: 1896 return vk_format_depth_only(image->vk_format); 1897 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: 1898 return vk_format_depth_only(image->vk_format); 1899 default: 1900 return image->vk_format; 1901 } 1902} 1903 1904/** 1905 * Determine if the given image view can be fast cleared. 1906 */ 1907static bool 1908radv_image_view_can_fast_clear(const struct radv_device *device, 1909 const struct radv_image_view *iview) 1910{ 1911 struct radv_image *image; 1912 1913 if (!iview) 1914 return false; 1915 image = iview->image; 1916 1917 /* Only fast clear if the image itself can be fast cleared. */ 1918 if (!radv_image_can_fast_clear(device, image)) 1919 return false; 1920 1921 /* Only fast clear if all layers are bound. */ 1922 if (iview->base_layer > 0 || iview->layer_count != image->info.array_size) 1923 return false; 1924 1925 /* Only fast clear if the view covers the whole image. */ 1926 if (!radv_image_extent_compare(image, &iview->extent)) 1927 return false; 1928 1929 return true; 1930} 1931 1932void 1933radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, 1934 const VkImageViewCreateInfo *pCreateInfo, 1935 const struct radv_image_view_extra_create_info *extra_create_info) 1936{ 1937 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image); 1938 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; 1939 uint32_t plane_count = 1; 1940 1941 vk_object_base_init(&device->vk, &iview->base, VK_OBJECT_TYPE_IMAGE_VIEW); 1942 1943 switch (image->type) { 1944 case VK_IMAGE_TYPE_1D: 1945 case VK_IMAGE_TYPE_2D: 1946 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= 1947 image->info.array_size); 1948 break; 1949 case VK_IMAGE_TYPE_3D: 1950 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= 1951 radv_minify(image->info.depth, range->baseMipLevel)); 1952 break; 1953 default: 1954 unreachable("bad VkImageType"); 1955 } 1956 iview->image = image; 1957 iview->type = pCreateInfo->viewType; 1958 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask); 1959 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; 1960 iview->base_layer = range->baseArrayLayer; 1961 iview->layer_count = radv_get_layerCount(image, range); 1962 iview->base_mip = range->baseMipLevel; 1963 iview->level_count = radv_get_levelCount(image, range); 1964 1965 iview->vk_format = pCreateInfo->format; 1966 1967 /* If the image has an Android external format, pCreateInfo->format will be 1968 * VK_FORMAT_UNDEFINED. */ 1969 if (iview->vk_format == VK_FORMAT_UNDEFINED) 1970 iview->vk_format = image->vk_format; 1971 1972 /* Split out the right aspect. Note that for internal meta code we sometimes 1973 * use an equivalent color format for the aspect so we first have to check 1974 * if we actually got depth/stencil formats. */ 1975 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { 1976 if (vk_format_has_stencil(iview->vk_format)) 1977 iview->vk_format = vk_format_stencil_only(iview->vk_format); 1978 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) { 1979 if (vk_format_has_depth(iview->vk_format)) 1980 iview->vk_format = vk_format_depth_only(iview->vk_format); 1981 } 1982 1983 if (device->physical_device->rad_info.chip_class >= GFX9) { 1984 iview->extent = (VkExtent3D){ 1985 .width = image->info.width, 1986 .height = image->info.height, 1987 .depth = image->info.depth, 1988 }; 1989 } else { 1990 iview->extent = (VkExtent3D){ 1991 .width = radv_minify(image->info.width, range->baseMipLevel), 1992 .height = radv_minify(image->info.height, range->baseMipLevel), 1993 .depth = radv_minify(image->info.depth, range->baseMipLevel), 1994 }; 1995 } 1996 1997 if (iview->vk_format != image->planes[iview->plane_id].format) { 1998 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format); 1999 unsigned view_bh = vk_format_get_blockheight(iview->vk_format); 2000 unsigned img_bw = vk_format_get_blockwidth(image->vk_format); 2001 unsigned img_bh = vk_format_get_blockheight(image->vk_format); 2002 2003 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw); 2004 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh); 2005 2006 /* Comment ported from amdvlk - 2007 * If we have the following image: 2008 * Uncompressed pixels Compressed block sizes (4x4) 2009 * mip0: 22 x 22 6 x 6 2010 * mip1: 11 x 11 3 x 3 2011 * mip2: 5 x 5 2 x 2 2012 * mip3: 2 x 2 1 x 1 2013 * mip4: 1 x 1 1 x 1 2014 * 2015 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and 2016 * the HW is calculating the degradation of the block sizes down the mip-chain as follows 2017 * (straight-up divide-by-two integer math): mip0: 6x6 mip1: 3x3 mip2: 1x1 mip3: 1x1 2018 * 2019 * This means that mip2 will be missing texels. 2020 * 2021 * Fix this by calculating the base mip's width and height, then convert 2022 * that, and round it back up to get the level 0 size. Clamp the 2023 * converted size between the original values, and the physical extent 2024 * of the base mipmap. 2025 * 2026 * On GFX10 we have to take care to not go over the physical extent 2027 * of the base mipmap as otherwise the GPU computes a different layout. 2028 * Note that the GPU does use the same base-mip dimensions for both a 2029 * block compatible format and the compressed format, so even if we take 2030 * the plain converted dimensions the physical layout is correct. 2031 */ 2032 if (device->physical_device->rad_info.chip_class >= GFX9 && 2033 vk_format_is_compressed(image->vk_format) && !vk_format_is_compressed(iview->vk_format)) { 2034 /* If we have multiple levels in the view we should ideally take the last level, 2035 * but the mip calculation has a max(..., 1) so walking back to the base mip in an 2036 * useful way is hard. */ 2037 if (iview->level_count > 1) { 2038 iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width; 2039 iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height; 2040 } else { 2041 unsigned lvl_width = radv_minify(image->info.width, range->baseMipLevel); 2042 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel); 2043 2044 lvl_width = round_up_u32(lvl_width * view_bw, img_bw); 2045 lvl_height = round_up_u32(lvl_height * view_bh, img_bh); 2046 2047 lvl_width <<= range->baseMipLevel; 2048 lvl_height <<= range->baseMipLevel; 2049 2050 iview->extent.width = CLAMP(lvl_width, iview->extent.width, 2051 iview->image->planes[0].surface.u.gfx9.base_mip_width); 2052 iview->extent.height = CLAMP(lvl_height, iview->extent.height, 2053 iview->image->planes[0].surface.u.gfx9.base_mip_height); 2054 } 2055 } 2056 } 2057 2058 iview->support_fast_clear = radv_image_view_can_fast_clear(device, iview); 2059 2060 if (vk_format_get_plane_count(image->vk_format) > 1 && 2061 iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { 2062 plane_count = vk_format_get_plane_count(iview->vk_format); 2063 } 2064 2065 bool disable_compression = extra_create_info ? extra_create_info->disable_compression : false; 2066 bool enable_compression = extra_create_info ? extra_create_info->enable_compression : false; 2067 for (unsigned i = 0; i < plane_count; ++i) { 2068 VkFormat format = vk_format_get_plane_format(iview->vk_format, i); 2069 radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, false, 2070 disable_compression, enable_compression, iview->plane_id + i, 2071 i); 2072 radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, true, 2073 disable_compression, enable_compression, iview->plane_id + i, 2074 i); 2075 } 2076} 2077 2078void 2079radv_image_view_finish(struct radv_image_view *iview) 2080{ 2081 vk_object_base_finish(&iview->base); 2082} 2083 2084bool 2085radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, 2086 VkImageLayout layout, bool in_render_loop, unsigned queue_mask) 2087{ 2088 switch (layout) { 2089 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: 2090 case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR: 2091 case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR: 2092 return radv_image_has_htile(image); 2093 case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: 2094 return radv_image_is_tc_compat_htile(image) || 2095 (radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL)); 2096 case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR: 2097 case VK_IMAGE_LAYOUT_GENERAL: 2098 /* It should be safe to enable TC-compat HTILE with 2099 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and 2100 * if the image doesn't have the storage bit set. This 2101 * improves performance for apps that use GENERAL for the main 2102 * depth pass because this allows compression and this reduces 2103 * the number of decompressions from/to GENERAL. 2104 */ 2105 /* FIXME: Enabling TC-compat HTILE in GENERAL on the compute 2106 * queue is likely broken for eg. depth/stencil copies. 2107 */ 2108 if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) && 2109 !in_render_loop && !device->instance->disable_tc_compat_htile_in_general) { 2110 return true; 2111 } else { 2112 return false; 2113 } 2114 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: 2115 if (radv_image_is_tc_compat_htile(image) || 2116 (radv_image_has_htile(image) && 2117 !(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) { 2118 /* Keep HTILE compressed if the image is only going to 2119 * be used as a depth/stencil read-only attachment. 2120 */ 2121 return true; 2122 } else { 2123 return false; 2124 } 2125 break; 2126 default: 2127 return radv_image_is_tc_compat_htile(image); 2128 } 2129} 2130 2131bool 2132radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, 2133 unsigned level, VkImageLayout layout, bool in_render_loop, 2134 unsigned queue_mask) 2135{ 2136 if (radv_dcc_enabled(image, level) && 2137 !radv_layout_dcc_compressed(device, image, level, layout, in_render_loop, queue_mask)) 2138 return false; 2139 2140 if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS)) 2141 return false; 2142 2143 if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) 2144 return false; 2145 2146 /* Exclusive images with CMASK or DCC can always be fast-cleared on the gfx queue. Concurrent 2147 * images can only be fast-cleared if comp-to-single is supported because we don't yet support 2148 * FCE on the compute queue. 2149 */ 2150 return queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_use_comp_to_single(device, image); 2151} 2152 2153bool 2154radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, 2155 unsigned level, VkImageLayout layout, bool in_render_loop, 2156 unsigned queue_mask) 2157{ 2158 if (!radv_dcc_enabled(image, level)) 2159 return false; 2160 2161 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && queue_mask & (1u << RADV_QUEUE_FOREIGN)) 2162 return true; 2163 2164 /* If the image is read-only, we can always just keep it compressed */ 2165 if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS)) 2166 return true; 2167 2168 /* Don't compress compute transfer dst when image stores are not supported. */ 2169 if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) && 2170 (queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image)) 2171 return false; 2172 2173 return device->physical_device->rad_info.chip_class >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL; 2174} 2175 2176bool 2177radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image, 2178 VkImageLayout layout, unsigned queue_mask) 2179{ 2180 if (!radv_image_has_fmask(image)) 2181 return false; 2182 2183 /* Don't compress compute transfer dst because image stores ignore FMASK and it needs to be 2184 * expanded before. 2185 */ 2186 if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) && 2187 (queue_mask & (1u << RADV_QUEUE_COMPUTE))) 2188 return false; 2189 2190 /* Only compress concurrent images if TC-compat CMASK is enabled (no FMASK decompression). */ 2191 return layout != VK_IMAGE_LAYOUT_GENERAL && 2192 (queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_is_tc_compat_cmask(image)); 2193} 2194 2195unsigned 2196radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family) 2197{ 2198 if (!image->exclusive) 2199 return image->queue_family_mask; 2200 if (family == VK_QUEUE_FAMILY_EXTERNAL || family == VK_QUEUE_FAMILY_FOREIGN_EXT) 2201 return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN); 2202 if (family == VK_QUEUE_FAMILY_IGNORED) 2203 return 1u << queue_family; 2204 return 1u << family; 2205} 2206 2207VkResult 2208radv_CreateImage(VkDevice device, const VkImageCreateInfo *pCreateInfo, 2209 const VkAllocationCallbacks *pAllocator, VkImage *pImage) 2210{ 2211#ifdef ANDROID 2212 const VkNativeBufferANDROID *gralloc_info = 2213 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID); 2214 2215 if (gralloc_info) 2216 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info, pAllocator, pImage); 2217#endif 2218 2219 const struct wsi_image_create_info *wsi_info = 2220 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA); 2221 bool scanout = wsi_info && wsi_info->scanout; 2222 2223 return radv_image_create(device, 2224 &(struct radv_image_create_info){ 2225 .vk_info = pCreateInfo, 2226 .scanout = scanout, 2227 }, 2228 pAllocator, pImage); 2229} 2230 2231void 2232radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator) 2233{ 2234 RADV_FROM_HANDLE(radv_device, device, _device); 2235 RADV_FROM_HANDLE(radv_image, image, _image); 2236 2237 if (!image) 2238 return; 2239 2240 radv_destroy_image(device, pAllocator, image); 2241} 2242 2243void 2244radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image, 2245 const VkImageSubresource *pSubresource, VkSubresourceLayout *pLayout) 2246{ 2247 RADV_FROM_HANDLE(radv_image, image, _image); 2248 RADV_FROM_HANDLE(radv_device, device, _device); 2249 int level = pSubresource->mipLevel; 2250 int layer = pSubresource->arrayLayer; 2251 2252 unsigned plane_id = 0; 2253 if (vk_format_get_plane_count(image->vk_format) > 1) 2254 plane_id = radv_plane_from_aspect(pSubresource->aspectMask); 2255 2256 struct radv_image_plane *plane = &image->planes[plane_id]; 2257 struct radeon_surf *surface = &plane->surface; 2258 2259 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { 2260 unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->aspectMask); 2261 2262 assert(level == 0); 2263 assert(layer == 0); 2264 2265 pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class, 2266 surface, mem_plane_id, 0); 2267 pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.chip_class, 2268 surface, mem_plane_id); 2269 pLayout->arrayPitch = 0; 2270 pLayout->depthPitch = 0; 2271 pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id); 2272 } else if (device->physical_device->rad_info.chip_class >= GFX9) { 2273 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0; 2274 2275 pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class, 2276 &plane->surface, 0, layer) + 2277 level_offset; 2278 if (image->vk_format == VK_FORMAT_R32G32B32_UINT || 2279 image->vk_format == VK_FORMAT_R32G32B32_SINT || 2280 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) { 2281 /* Adjust the number of bytes between each row because 2282 * the pitch is actually the number of components per 2283 * row. 2284 */ 2285 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3; 2286 } else { 2287 uint32_t pitch = 2288 surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch; 2289 2290 assert(util_is_power_of_two_nonzero(surface->bpe)); 2291 pLayout->rowPitch = pitch * surface->bpe; 2292 } 2293 2294 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size; 2295 pLayout->depthPitch = surface->u.gfx9.surf_slice_size; 2296 pLayout->size = surface->u.gfx9.surf_slice_size; 2297 if (image->type == VK_IMAGE_TYPE_3D) 2298 pLayout->size *= u_minify(image->info.depth, level); 2299 } else { 2300 pLayout->offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 + 2301 (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer; 2302 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe; 2303 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4; 2304 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4; 2305 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4; 2306 if (image->type == VK_IMAGE_TYPE_3D) 2307 pLayout->size *= u_minify(image->info.depth, level); 2308 } 2309} 2310 2311VkResult 2312radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image, 2313 VkImageDrmFormatModifierPropertiesEXT *pProperties) 2314{ 2315 RADV_FROM_HANDLE(radv_image, image, _image); 2316 2317 pProperties->drmFormatModifier = image->planes[0].surface.modifier; 2318 return VK_SUCCESS; 2319} 2320 2321VkResult 2322radv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, 2323 const VkAllocationCallbacks *pAllocator, VkImageView *pView) 2324{ 2325 RADV_FROM_HANDLE(radv_device, device, _device); 2326 struct radv_image_view *view; 2327 2328 view = 2329 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2330 if (view == NULL) 2331 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2332 2333 radv_image_view_init(view, device, pCreateInfo, NULL); 2334 2335 *pView = radv_image_view_to_handle(view); 2336 2337 return VK_SUCCESS; 2338} 2339 2340void 2341radv_DestroyImageView(VkDevice _device, VkImageView _iview, const VkAllocationCallbacks *pAllocator) 2342{ 2343 RADV_FROM_HANDLE(radv_device, device, _device); 2344 RADV_FROM_HANDLE(radv_image_view, iview, _iview); 2345 2346 if (!iview) 2347 return; 2348 2349 radv_image_view_finish(iview); 2350 vk_free2(&device->vk.alloc, pAllocator, iview); 2351} 2352 2353void 2354radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device, 2355 const VkBufferViewCreateInfo *pCreateInfo) 2356{ 2357 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer); 2358 2359 vk_object_base_init(&device->vk, &view->base, VK_OBJECT_TYPE_BUFFER_VIEW); 2360 2361 view->bo = buffer->bo; 2362 view->range = 2363 pCreateInfo->range == VK_WHOLE_SIZE ? buffer->size - pCreateInfo->offset : pCreateInfo->range; 2364 view->vk_format = pCreateInfo->format; 2365 2366 radv_make_buffer_descriptor(device, buffer, view->vk_format, pCreateInfo->offset, view->range, 2367 view->state); 2368} 2369 2370void 2371radv_buffer_view_finish(struct radv_buffer_view *view) 2372{ 2373 vk_object_base_finish(&view->base); 2374} 2375 2376VkResult 2377radv_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo, 2378 const VkAllocationCallbacks *pAllocator, VkBufferView *pView) 2379{ 2380 RADV_FROM_HANDLE(radv_device, device, _device); 2381 struct radv_buffer_view *view; 2382 2383 view = 2384 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2385 if (!view) 2386 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2387 2388 radv_buffer_view_init(view, device, pCreateInfo); 2389 2390 *pView = radv_buffer_view_to_handle(view); 2391 2392 return VK_SUCCESS; 2393} 2394 2395void 2396radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, 2397 const VkAllocationCallbacks *pAllocator) 2398{ 2399 RADV_FROM_HANDLE(radv_device, device, _device); 2400 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView); 2401 2402 if (!view) 2403 return; 2404 2405 radv_buffer_view_finish(view); 2406 vk_free2(&device->vk.alloc, pAllocator, view); 2407} 2408