1/*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28#include "ac_drm_fourcc.h"
29#include "util/debug.h"
30#include "util/u_atomic.h"
31#include "vulkan/util/vk_format.h"
32#include "radv_debug.h"
33#include "radv_private.h"
34#include "radv_radeon_winsys.h"
35#include "sid.h"
36#include "vk_format.h"
37#include "vk_util.h"
38
39#include "gfx10_format_table.h"
40
41static const VkImageUsageFlagBits RADV_IMAGE_USAGE_WRITE_BITS =
42   VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
43   VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
44
45static unsigned
46radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
47                   VkFormat format)
48{
49   if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
50      assert(pCreateInfo->samples <= 1);
51      return RADEON_SURF_MODE_LINEAR_ALIGNED;
52   }
53
54   /* MSAA resources must be 2D tiled. */
55   if (pCreateInfo->samples > 1)
56      return RADEON_SURF_MODE_2D;
57
58   if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) &&
59       device->physical_device->rad_info.chip_class <= GFX8) {
60      /* this causes hangs in some VK CTS tests on GFX9. */
61      /* Textures with a very small height are recommended to be linear. */
62      if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
63          /* Only very thin and long 2D textures should benefit from
64           * linear_aligned. */
65          (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
66         return RADEON_SURF_MODE_LINEAR_ALIGNED;
67   }
68
69   return RADEON_SURF_MODE_2D;
70}
71
72static bool
73radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
74                                   VkFormat format)
75{
76   /* TC-compat HTILE is only available for GFX8+. */
77   if (device->physical_device->rad_info.chip_class < GFX8)
78      return false;
79
80   if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
81      return false;
82
83   if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
84      return false;
85
86   /* Do not enable TC-compatible HTILE if the image isn't readable by a
87    * shader because no texture fetches will happen.
88    */
89   if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
90                               VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
91      return false;
92
93   if (device->physical_device->rad_info.chip_class < GFX9) {
94      /* TC-compat HTILE for MSAA depth/stencil images is broken
95       * on GFX8 because the tiling doesn't match.
96       */
97      if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
98         return false;
99
100      /* GFX9+ supports compression for both 32-bit and 16-bit depth
101       * surfaces, while GFX8 only supports 32-bit natively. Though,
102       * the driver allows TC-compat HTILE for 16-bit depth surfaces
103       * with no Z planes compression.
104       */
105      if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT &&
106          format != VK_FORMAT_D16_UNORM)
107         return false;
108   }
109
110   return true;
111}
112
113static bool
114radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
115{
116   if (info->bo_metadata) {
117      if (device->physical_device->rad_info.chip_class >= GFX9)
118         return info->bo_metadata->u.gfx9.scanout;
119      else
120         return info->bo_metadata->u.legacy.scanout;
121   }
122
123   return info->scanout;
124}
125
126static bool
127radv_image_use_fast_clear_for_image_early(const struct radv_device *device,
128                                          const struct radv_image *image)
129{
130   if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
131      return true;
132
133   if (image->info.samples <= 1 && image->info.width * image->info.height <= 512 * 512) {
134      /* Do not enable CMASK or DCC for small surfaces where the cost
135       * of the eliminate pass can be higher than the benefit of fast
136       * clear. RadeonSI does this, but the image threshold is
137       * different.
138       */
139      return false;
140   }
141
142   return !!(image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
143}
144
145static bool
146radv_image_use_fast_clear_for_image(const struct radv_device *device,
147                                    const struct radv_image *image)
148{
149   if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
150      return true;
151
152   return radv_image_use_fast_clear_for_image_early(device, image) &&
153          (image->exclusive ||
154           /* Enable DCC for concurrent images if stores are
155            * supported because that means we can keep DCC compressed on
156            * all layouts/queues.
157            */
158           radv_image_use_dcc_image_stores(device, image));
159}
160
161bool
162radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
163                                VkFormat format, VkImageCreateFlags flags, bool *sign_reinterpret)
164{
165   bool blendable;
166
167   if (!radv_is_colorbuffer_format_supported(pdev, format, &blendable))
168      return false;
169
170   if (sign_reinterpret != NULL)
171      *sign_reinterpret = false;
172
173   if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
174      const struct VkImageFormatListCreateInfo *format_list =
175         (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
176            pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
177
178      /* We have to ignore the existence of the list if viewFormatCount = 0 */
179      if (format_list && format_list->viewFormatCount) {
180         /* compatibility is transitive, so we only need to check
181          * one format with everything else. */
182         for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
183            if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
184               continue;
185
186            if (!radv_dcc_formats_compatible(format, format_list->pViewFormats[i],
187                                             sign_reinterpret))
188               return false;
189         }
190      } else {
191         return false;
192      }
193   }
194
195   return true;
196}
197
198static bool
199radv_format_is_atomic_allowed(struct radv_device *device, VkFormat format)
200{
201   if (format == VK_FORMAT_R32_SFLOAT && !device->image_float32_atomics)
202      return false;
203
204   return radv_is_atomic_format_supported(format);
205}
206
207static bool
208radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format,
209                               VkImageCreateFlags flags)
210{
211   if (radv_format_is_atomic_allowed(device, format))
212      return true;
213
214   if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
215      const struct VkImageFormatListCreateInfo *format_list =
216         (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
217            pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
218
219      /* We have to ignore the existence of the list if viewFormatCount = 0 */
220      if (format_list && format_list->viewFormatCount) {
221         for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
222            if (radv_format_is_atomic_allowed(device, format_list->pViewFormats[i]))
223               return true;
224         }
225      }
226   }
227
228   return false;
229}
230
231static bool
232radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image,
233                             const VkImageCreateInfo *pCreateInfo, VkFormat format,
234                             bool *sign_reinterpret)
235{
236   /* DCC (Delta Color Compression) is only available for GFX8+. */
237   if (device->physical_device->rad_info.chip_class < GFX8)
238      return false;
239
240   if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
241      return false;
242
243   if (image->shareable && image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
244      return false;
245
246   /*
247    * TODO: Enable DCC for storage images on GFX9 and earlier.
248    *
249    * Also disable DCC with atomics because even when DCC stores are
250    * supported atomics will always decompress. So if we are
251    * decompressing a lot anyway we might as well not have DCC.
252    */
253   if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
254       (device->physical_device->rad_info.chip_class < GFX10 ||
255        radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags)))
256      return false;
257
258   /* Do not enable DCC for fragment shading rate attachments. */
259   if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
260      return false;
261
262   if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
263      return false;
264
265   if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1)
266      return false;
267
268   if (!radv_image_use_fast_clear_for_image_early(device, image) &&
269       image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
270      return false;
271
272   /* Do not enable DCC for mipmapped arrays because performance is worse. */
273   if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
274      return false;
275
276   if (device->physical_device->rad_info.chip_class < GFX10) {
277      /* TODO: Add support for DCC MSAA on GFX8-9. */
278      if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed)
279         return false;
280
281      /* TODO: Add support for DCC layers/mipmaps on GFX9. */
282      if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
283          device->physical_device->rad_info.chip_class == GFX9)
284         return false;
285   }
286
287   return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format,
288                                          pCreateInfo->flags, sign_reinterpret);
289}
290
291static bool
292radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image)
293{
294   if (!radv_image_has_dcc(image))
295      return false;
296
297   if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
298      return true;
299
300   if (!radv_image_use_fast_clear_for_image(device, image))
301      return false;
302
303   /* TODO: Fix storage images with DCC without DCC image stores.
304    * Disabling it for now. */
305   if ((image->usage & VK_IMAGE_USAGE_STORAGE_BIT) && !radv_image_use_dcc_image_stores(device, image))
306      return false;
307
308   return true;
309}
310
311/*
312 * Whether to enable image stores with DCC compression for this image. If
313 * this function returns false the image subresource should be decompressed
314 * before using it with image stores.
315 *
316 * Note that this can have mixed performance implications, see
317 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299
318 *
319 * This function assumes the image uses DCC compression.
320 */
321bool
322radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
323{
324   return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.chip_class,
325                                               &image->planes[0].surface);
326}
327
328/*
329 * Whether to use a predicate to determine whether DCC is in a compressed
330 * state. This can be used to avoid decompressing an image multiple times.
331 */
332bool
333radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image)
334{
335   return radv_image_has_dcc(image) && !radv_image_use_dcc_image_stores(device, image);
336}
337
338static inline bool
339radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
340{
341   return image->info.samples > 1 && ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
342                                      (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
343}
344
345static inline bool
346radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image)
347{
348   /* TODO:
349    * - Investigate about mips+layers.
350    * - Enable on other gens.
351    */
352   bool use_htile_for_mips =
353      image->info.array_size == 1 && device->physical_device->rad_info.chip_class >= GFX10;
354
355   /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
356   if (device->physical_device->rad_info.chip_class == GFX10 &&
357       image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->info.levels > 1)
358      return false;
359
360   /* Do not enable HTILE for very small images because it seems less performant but make sure it's
361    * allowed with VRS attachments because we need HTILE.
362    */
363   if (image->info.width * image->info.height < 8 * 8 &&
364       !(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) &&
365       !device->attachment_vrs_enabled)
366      return false;
367
368   if (device->instance->disable_htile_layers && image->info.array_size > 1)
369      return false;
370
371   return (image->info.levels == 1 || use_htile_for_mips) && !image->shareable;
372}
373
374static bool
375radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
376{
377   /* TC-compat CMASK is only available for GFX8+. */
378   if (device->physical_device->rad_info.chip_class < GFX8)
379      return false;
380
381   if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
382      return false;
383
384   if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
385      return false;
386
387   /* Do not enable TC-compatible if the image isn't readable by a shader
388    * because no texture fetches will happen.
389    */
390   if (!(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
391                         VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
392      return false;
393
394   /* If the image doesn't have FMASK, it can't be fetchable. */
395   if (!radv_image_has_fmask(image))
396      return false;
397
398   return true;
399}
400
401static uint32_t
402si_get_bo_metadata_word1(const struct radv_device *device)
403{
404   return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
405}
406
407static bool
408radv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md)
409{
410   if (md->metadata[0] != 1 || md->metadata[1] != si_get_bo_metadata_word1(device))
411      return false;
412
413   if (md->size_metadata < 40)
414      return false;
415
416   return true;
417}
418
419static void
420radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
421                                 const struct radeon_bo_metadata *md)
422{
423   surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
424
425   if (device->physical_device->rad_info.chip_class >= GFX9) {
426      if (md->u.gfx9.swizzle_mode > 0)
427         surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
428      else
429         surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
430
431      surface->u.gfx9.swizzle_mode = md->u.gfx9.swizzle_mode;
432   } else {
433      surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
434      surface->u.legacy.bankw = md->u.legacy.bankw;
435      surface->u.legacy.bankh = md->u.legacy.bankh;
436      surface->u.legacy.tile_split = md->u.legacy.tile_split;
437      surface->u.legacy.mtilea = md->u.legacy.mtilea;
438      surface->u.legacy.num_banks = md->u.legacy.num_banks;
439
440      if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
441         surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
442      else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
443         surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
444      else
445         surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
446   }
447}
448
449static VkResult
450radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
451                            const struct radv_image_create_info *create_info,
452                            struct ac_surf_info *image_info)
453{
454   unsigned width = image->info.width;
455   unsigned height = image->info.height;
456
457   /*
458    * minigbm sometimes allocates bigger images which is going to result in
459    * weird strides and other properties. Lets be lenient where possible and
460    * fail it on GFX10 (as we cannot cope there).
461    *
462    * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
463    */
464   if (create_info->bo_metadata &&
465       radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
466      const struct radeon_bo_metadata *md = create_info->bo_metadata;
467
468      if (device->physical_device->rad_info.chip_class >= GFX10) {
469         width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
470         height = G_00A008_HEIGHT(md->metadata[4]) + 1;
471      } else {
472         width = G_008F18_WIDTH(md->metadata[4]) + 1;
473         height = G_008F18_HEIGHT(md->metadata[4]) + 1;
474      }
475   }
476
477   if (image->info.width == width && image->info.height == height)
478      return VK_SUCCESS;
479
480   if (width < image->info.width || height < image->info.height) {
481      fprintf(stderr,
482              "The imported image has smaller dimensions than the internal\n"
483              "dimensions. Using it is going to fail badly, so we reject\n"
484              "this import.\n"
485              "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
486              image->info.width, image->info.height, width, height);
487      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
488   } else if (device->physical_device->rad_info.chip_class >= GFX10) {
489      fprintf(stderr,
490              "Tried to import an image with inconsistent width on GFX10.\n"
491              "As GFX10 has no separate stride fields we cannot cope with\n"
492              "an inconsistency in width and will fail this import.\n"
493              "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
494              image->info.width, image->info.height, width, height);
495      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
496   } else {
497      fprintf(stderr,
498              "Tried to import an image with inconsistent width on pre-GFX10.\n"
499              "As GFX10 has no separate stride fields we cannot cope with\n"
500              "an inconsistency and would fail on GFX10.\n"
501              "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
502              image->info.width, image->info.height, width, height);
503   }
504   image_info->width = width;
505   image_info->height = height;
506
507   return VK_SUCCESS;
508}
509
510static VkResult
511radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
512                                 const struct radv_image_create_info *create_info,
513                                 struct ac_surf_info *image_info)
514{
515   VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
516   if (result != VK_SUCCESS)
517      return result;
518
519   for (unsigned plane = 0; plane < image->plane_count; ++plane) {
520      if (create_info->bo_metadata) {
521         radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
522                                          create_info->bo_metadata);
523      }
524
525      if (radv_surface_has_scanout(device, create_info)) {
526         image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
527         if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
528            image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
529
530         image->info.surf_index = NULL;
531      }
532   }
533   return VK_SUCCESS;
534}
535
536static uint64_t
537radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id,
538                       const VkImageCreateInfo *pCreateInfo, VkFormat image_format)
539{
540   uint64_t flags;
541   unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
542   VkFormat format = vk_format_get_plane_format(image_format, plane_id);
543   const struct util_format_description *desc = vk_format_description(format);
544   bool is_depth, is_stencil;
545
546   is_depth = util_format_has_depth(desc);
547   is_stencil = util_format_has_stencil(desc);
548
549   flags = RADEON_SURF_SET(array_mode, MODE);
550
551   switch (pCreateInfo->imageType) {
552   case VK_IMAGE_TYPE_1D:
553      if (pCreateInfo->arrayLayers > 1)
554         flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
555      else
556         flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
557      break;
558   case VK_IMAGE_TYPE_2D:
559      if (pCreateInfo->arrayLayers > 1)
560         flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
561      else
562         flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
563      break;
564   case VK_IMAGE_TYPE_3D:
565      flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
566      break;
567   default:
568      unreachable("unhandled image type");
569   }
570
571   /* Required for clearing/initializing a specific layer on GFX8. */
572   flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
573
574   if (is_depth) {
575      flags |= RADEON_SURF_ZBUFFER;
576
577      if (radv_use_htile_for_image(device, image) &&
578          !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
579         if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
580            flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
581      } else {
582         flags |= RADEON_SURF_NO_HTILE;
583      }
584   }
585
586   if (is_stencil)
587      flags |= RADEON_SURF_SBUFFER;
588
589   if (device->physical_device->rad_info.chip_class >= GFX9 &&
590       pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
591       vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
592      flags |= RADEON_SURF_NO_RENDER_TARGET;
593
594   if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format,
595                                     &image->dcc_sign_reinterpret))
596      flags |= RADEON_SURF_DISABLE_DCC;
597
598   if (!radv_use_fmask_for_image(device, image))
599      flags |= RADEON_SURF_NO_FMASK;
600
601   if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
602      flags |=
603         RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
604   }
605
606   return flags;
607}
608
609static inline unsigned
610si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
611{
612   if (stencil)
613      return plane->surface.u.legacy.zs.stencil_tiling_index[level];
614   else
615      return plane->surface.u.legacy.tiling_index[level];
616}
617
618static unsigned
619radv_map_swizzle(unsigned swizzle)
620{
621   switch (swizzle) {
622   case PIPE_SWIZZLE_Y:
623      return V_008F0C_SQ_SEL_Y;
624   case PIPE_SWIZZLE_Z:
625      return V_008F0C_SQ_SEL_Z;
626   case PIPE_SWIZZLE_W:
627      return V_008F0C_SQ_SEL_W;
628   case PIPE_SWIZZLE_0:
629      return V_008F0C_SQ_SEL_0;
630   case PIPE_SWIZZLE_1:
631      return V_008F0C_SQ_SEL_1;
632   default: /* PIPE_SWIZZLE_X */
633      return V_008F0C_SQ_SEL_X;
634   }
635}
636
637static void
638radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
639                     enum pipe_swizzle swizzle[4])
640{
641   if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
642      /* 64-bit formats only support storage images and storage images
643       * require identity component mappings. We use 32-bit
644       * instructions to access 64-bit images, so we need a special
645       * case here.
646       *
647       * The zw components are 1,0 so that they can be easily be used
648       * by loads to create the w component, which has to be 0 for
649       * NULL descriptors.
650       */
651      swizzle[0] = PIPE_SWIZZLE_X;
652      swizzle[1] = PIPE_SWIZZLE_Y;
653      swizzle[2] = PIPE_SWIZZLE_1;
654      swizzle[3] = PIPE_SWIZZLE_0;
655   } else if (!mapping) {
656      for (unsigned i = 0; i < 4; i++)
657         swizzle[i] = desc->swizzle[i];
658   } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
659      const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0,
660                                             PIPE_SWIZZLE_1};
661      vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
662   } else {
663      vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
664   }
665}
666
667static void
668radv_make_buffer_descriptor(struct radv_device *device, struct radv_buffer *buffer,
669                            VkFormat vk_format, unsigned offset, unsigned range, uint32_t *state)
670{
671   const struct util_format_description *desc;
672   unsigned stride;
673   uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
674   uint64_t va = gpu_address + buffer->offset;
675   unsigned num_format, data_format;
676   int first_non_void;
677   enum pipe_swizzle swizzle[4];
678   desc = vk_format_description(vk_format);
679   first_non_void = vk_format_get_first_non_void_channel(vk_format);
680   stride = desc->block.bits / 8;
681
682   radv_compose_swizzle(desc, NULL, swizzle);
683
684   va += offset;
685   state[0] = va;
686   state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
687
688   if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
689      range /= stride;
690   }
691
692   state[2] = range;
693   state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
694              S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
695              S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
696              S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
697
698   if (device->physical_device->rad_info.chip_class >= GFX10) {
699      const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
700
701      /* OOB_SELECT chooses the out-of-bounds check:
702       *  - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
703       *  - 1: index >= NUM_RECORDS
704       *  - 2: NUM_RECORDS == 0
705       *  - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
706       *       else: swizzle_address >= NUM_RECORDS
707       */
708      state[3] |= S_008F0C_FORMAT(fmt->img_format) |
709                  S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
710                  S_008F0C_RESOURCE_LEVEL(1);
711   } else {
712      num_format = radv_translate_buffer_numformat(desc, first_non_void);
713      data_format = radv_translate_buffer_dataformat(desc, first_non_void);
714
715      assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
716      assert(num_format != ~0);
717
718      state[3] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);
719   }
720}
721
722static void
723si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image,
724                               const struct legacy_surf_level *base_level_info, unsigned plane_id,
725                               unsigned base_level, unsigned first_level, unsigned block_width,
726                               bool is_stencil, bool is_storage_image, bool disable_compression,
727                               bool enable_write_compression, uint32_t *state)
728{
729   struct radv_image_plane *plane = &image->planes[plane_id];
730   uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
731   uint64_t va = gpu_address;
732   enum chip_class chip_class = device->physical_device->rad_info.chip_class;
733   uint64_t meta_va = 0;
734   if (chip_class >= GFX9) {
735      if (is_stencil)
736         va += plane->surface.u.gfx9.zs.stencil_offset;
737      else
738         va += plane->surface.u.gfx9.surf_offset;
739   } else
740      va += (uint64_t)base_level_info->offset_256B * 256;
741
742   state[0] = va >> 8;
743   if (chip_class >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D)
744      state[0] |= plane->surface.tile_swizzle;
745   state[1] &= C_008F14_BASE_ADDRESS_HI;
746   state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
747
748   if (chip_class >= GFX8) {
749      state[6] &= C_008F28_COMPRESSION_EN;
750      state[7] = 0;
751      if (!disable_compression && radv_dcc_enabled(image, first_level)) {
752         meta_va = gpu_address + plane->surface.meta_offset;
753         if (chip_class <= GFX8)
754            meta_va += plane->surface.u.legacy.color.dcc_level[base_level].dcc_offset;
755
756         unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
757         dcc_tile_swizzle &= (1 << plane->surface.meta_alignment_log2) - 1;
758         meta_va |= dcc_tile_swizzle;
759      } else if (!disable_compression && radv_image_is_tc_compat_htile(image)) {
760         meta_va = gpu_address + plane->surface.meta_offset;
761      }
762
763      if (meta_va) {
764         state[6] |= S_008F28_COMPRESSION_EN(1);
765         if (chip_class <= GFX9)
766            state[7] = meta_va >> 8;
767      }
768   }
769
770   if (chip_class >= GFX10) {
771      state[3] &= C_00A00C_SW_MODE;
772
773      if (is_stencil) {
774         state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode);
775      } else {
776         state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.swizzle_mode);
777      }
778
779      state[6] &= C_00A018_META_DATA_ADDRESS_LO & C_00A018_META_PIPE_ALIGNED;
780
781      if (meta_va) {
782         struct gfx9_surf_meta_flags meta = {
783            .rb_aligned = 1,
784            .pipe_aligned = 1,
785         };
786
787         if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
788            meta = plane->surface.u.gfx9.color.dcc;
789
790         if (radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression)
791            state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1);
792
793         state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
794                     S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
795      }
796
797      state[7] = meta_va >> 16;
798   } else if (chip_class == GFX9) {
799      state[3] &= C_008F1C_SW_MODE;
800      state[4] &= C_008F20_PITCH;
801
802      if (is_stencil) {
803         state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode);
804         state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.zs.stencil_epitch);
805      } else {
806         state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.swizzle_mode);
807         state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.epitch);
808      }
809
810      state[5] &=
811         C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED;
812      if (meta_va) {
813         struct gfx9_surf_meta_flags meta = {
814            .rb_aligned = 1,
815            .pipe_aligned = 1,
816         };
817
818         if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
819            meta = plane->surface.u.gfx9.color.dcc;
820
821         state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
822                     S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
823                     S_008F24_META_RB_ALIGNED(meta.rb_aligned);
824      }
825   } else {
826      /* GFX6-GFX8 */
827      unsigned pitch = base_level_info->nblk_x * block_width;
828      unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
829
830      state[3] &= C_008F1C_TILING_INDEX;
831      state[3] |= S_008F1C_TILING_INDEX(index);
832      state[4] &= C_008F20_PITCH;
833      state[4] |= S_008F20_PITCH(pitch - 1);
834   }
835}
836
837static unsigned
838radv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers,
839             unsigned nr_samples, bool is_storage_image, bool gfx9)
840{
841   if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
842      return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
843
844   /* GFX9 allocates 1D textures as 2D. */
845   if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
846      image_type = VK_IMAGE_TYPE_2D;
847   switch (image_type) {
848   case VK_IMAGE_TYPE_1D:
849      return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
850   case VK_IMAGE_TYPE_2D:
851      if (nr_samples > 1)
852         return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
853      else
854         return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
855   case VK_IMAGE_TYPE_3D:
856      if (view_type == VK_IMAGE_VIEW_TYPE_3D)
857         return V_008F1C_SQ_RSRC_IMG_3D;
858      else
859         return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
860   default:
861      unreachable("illegal image type");
862   }
863}
864
865static unsigned
866gfx9_border_color_swizzle(const struct util_format_description *desc)
867{
868   unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
869
870   if (desc->swizzle[3] == PIPE_SWIZZLE_X) {
871      /* For the pre-defined border color values (white, opaque
872       * black, transparent black), the only thing that matters is
873       * that the alpha channel winds up in the correct place
874       * (because the RGB channels are all the same) so either of
875       * these enumerations will work.
876       */
877      if (desc->swizzle[2] == PIPE_SWIZZLE_Y)
878         bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
879      else
880         bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
881   } else if (desc->swizzle[0] == PIPE_SWIZZLE_X) {
882      if (desc->swizzle[1] == PIPE_SWIZZLE_Y)
883         bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
884      else
885         bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
886   } else if (desc->swizzle[1] == PIPE_SWIZZLE_X) {
887      bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
888   } else if (desc->swizzle[2] == PIPE_SWIZZLE_X) {
889      bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
890   }
891
892   return bc_swizzle;
893}
894
895bool
896vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
897{
898   const struct util_format_description *desc = vk_format_description(format);
899
900   if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
901      return desc->swizzle[3] == PIPE_SWIZZLE_X;
902
903   return radv_translate_colorswap(format, false) <= 1;
904}
905/**
906 * Build the sampler view descriptor for a texture (GFX10).
907 */
908static void
909gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
910                              bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
911                              const VkComponentMapping *mapping, unsigned first_level,
912                              unsigned last_level, unsigned first_layer, unsigned last_layer,
913                              unsigned width, unsigned height, unsigned depth, uint32_t *state,
914                              uint32_t *fmask_state)
915{
916   const struct util_format_description *desc;
917   enum pipe_swizzle swizzle[4];
918   unsigned img_format;
919   unsigned type;
920
921   desc = vk_format_description(vk_format);
922   img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
923
924   radv_compose_swizzle(desc, mapping, swizzle);
925
926   type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
927                       is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
928   if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
929      height = 1;
930      depth = image->info.array_size;
931   } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
932      if (view_type != VK_IMAGE_VIEW_TYPE_3D)
933         depth = image->info.array_size;
934   } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
935      depth = image->info.array_size / 6;
936
937   state[0] = 0;
938   state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1);
939   state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
940              S_00A008_RESOURCE_LEVEL(1);
941   state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
942              S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
943              S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
944              S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
945              S_00A00C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
946              S_00A00C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
947                                                          : last_level) |
948              S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc)) | S_00A00C_TYPE(type);
949   /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
950    * to know the total number of layers.
951    */
952   state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
953              S_00A010_BASE_ARRAY(first_layer);
954   state[5] = S_00A014_ARRAY_PITCH(0) |
955              S_00A014_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
956                                                       : image->info.levels - 1) |
957              S_00A014_PERF_MOD(4);
958   state[6] = 0;
959   state[7] = 0;
960
961   if (radv_dcc_enabled(image, first_level)) {
962      state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
963                  S_00A018_MAX_COMPRESSED_BLOCK_SIZE(
964                     image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size) |
965                  S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
966   }
967
968   if (radv_image_get_iterate256(device, image)) {
969      state[6] |= S_00A018_ITERATE_256(1);
970   }
971
972   /* Initialize the sampler view for FMASK. */
973   if (fmask_state) {
974      if (radv_image_has_fmask(image)) {
975         uint64_t gpu_address = radv_buffer_get_va(image->bo);
976         uint32_t format;
977         uint64_t va;
978
979         assert(image->plane_count == 1);
980
981         va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
982
983         switch (image->info.samples) {
984         case 2:
985            format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2;
986            break;
987         case 4:
988            format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4;
989            break;
990         case 8:
991            format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8;
992            break;
993         default:
994            unreachable("invalid nr_samples");
995         }
996
997         fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
998         fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) |
999                          S_00A004_WIDTH_LO(width - 1);
1000         fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
1001                          S_00A008_RESOURCE_LEVEL(1);
1002         fmask_state[3] =
1003            S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1004            S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1005            S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode) |
1006            S_00A00C_TYPE(
1007               radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1008         fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer);
1009         fmask_state[5] = 0;
1010         fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
1011         fmask_state[7] = 0;
1012
1013         if (radv_image_is_tc_compat_cmask(image)) {
1014            va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1015
1016            fmask_state[6] |= S_00A018_COMPRESSION_EN(1);
1017            fmask_state[6] |= S_00A018_META_DATA_ADDRESS_LO(va >> 8);
1018            fmask_state[7] |= va >> 16;
1019         }
1020      } else
1021         memset(fmask_state, 0, 8 * 4);
1022   }
1023}
1024
1025/**
1026 * Build the sampler view descriptor for a texture (SI-GFX9)
1027 */
1028static void
1029si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
1030                           bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
1031                           const VkComponentMapping *mapping, unsigned first_level,
1032                           unsigned last_level, unsigned first_layer, unsigned last_layer,
1033                           unsigned width, unsigned height, unsigned depth, uint32_t *state,
1034                           uint32_t *fmask_state)
1035{
1036   const struct util_format_description *desc;
1037   enum pipe_swizzle swizzle[4];
1038   int first_non_void;
1039   unsigned num_format, data_format, type;
1040
1041   desc = vk_format_description(vk_format);
1042
1043   radv_compose_swizzle(desc, mapping, swizzle);
1044
1045   first_non_void = vk_format_get_first_non_void_channel(vk_format);
1046
1047   num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
1048   if (num_format == ~0) {
1049      num_format = 0;
1050   }
1051
1052   data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
1053   if (data_format == ~0) {
1054      data_format = 0;
1055   }
1056
1057   /* S8 with either Z16 or Z32 HTILE need a special format. */
1058   if (device->physical_device->rad_info.chip_class == GFX9 && vk_format == VK_FORMAT_S8_UINT &&
1059       radv_image_is_tc_compat_htile(image)) {
1060      if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
1061         data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
1062      else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
1063         data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
1064   }
1065   type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
1066                       is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
1067   if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
1068      height = 1;
1069      depth = image->info.array_size;
1070   } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
1071      if (view_type != VK_IMAGE_VIEW_TYPE_3D)
1072         depth = image->info.array_size;
1073   } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
1074      depth = image->info.array_size / 6;
1075
1076   state[0] = 0;
1077   state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format));
1078   state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4));
1079   state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
1080               S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
1081               S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
1082               S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
1083               S_008F1C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
1084               S_008F1C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
1085                                                           : last_level) |
1086               S_008F1C_TYPE(type));
1087   state[4] = 0;
1088   state[5] = S_008F24_BASE_ARRAY(first_layer);
1089   state[6] = 0;
1090   state[7] = 0;
1091
1092   if (device->physical_device->rad_info.chip_class == GFX9) {
1093      unsigned bc_swizzle = gfx9_border_color_swizzle(desc);
1094
1095      /* Depth is the last accessible layer on Gfx9.
1096       * The hw doesn't need to know the total number of layers.
1097       */
1098      if (type == V_008F1C_SQ_RSRC_IMG_3D)
1099         state[4] |= S_008F20_DEPTH(depth - 1);
1100      else
1101         state[4] |= S_008F20_DEPTH(last_layer);
1102
1103      state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
1104      state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
1105                                                           : image->info.levels - 1);
1106   } else {
1107      state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
1108      state[4] |= S_008F20_DEPTH(depth - 1);
1109      state[5] |= S_008F24_LAST_ARRAY(last_layer);
1110   }
1111   if (!(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
1112       image->planes[0].surface.meta_offset) {
1113      state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1114   } else {
1115      /* The last dword is unused by hw. The shader uses it to clear
1116       * bits in the first dword of sampler state.
1117       */
1118      if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
1119         if (first_level == last_level)
1120            state[7] = C_008F30_MAX_ANISO_RATIO;
1121         else
1122            state[7] = 0xffffffff;
1123      }
1124   }
1125
1126   /* Initialize the sampler view for FMASK. */
1127   if (fmask_state) {
1128      if (radv_image_has_fmask(image)) {
1129         uint32_t fmask_format;
1130         uint64_t gpu_address = radv_buffer_get_va(image->bo);
1131         uint64_t va;
1132
1133         assert(image->plane_count == 1);
1134
1135         va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
1136
1137         if (device->physical_device->rad_info.chip_class == GFX9) {
1138            fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
1139            switch (image->info.samples) {
1140            case 2:
1141               num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
1142               break;
1143            case 4:
1144               num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
1145               break;
1146            case 8:
1147               num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
1148               break;
1149            default:
1150               unreachable("invalid nr_samples");
1151            }
1152         } else {
1153            switch (image->info.samples) {
1154            case 2:
1155               fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1156               break;
1157            case 4:
1158               fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1159               break;
1160            case 8:
1161               fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1162               break;
1163            default:
1164               assert(0);
1165               fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1166            }
1167            num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1168         }
1169
1170         fmask_state[0] = va >> 8;
1171         fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1172         fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) |
1173                          S_008F14_NUM_FORMAT(num_format);
1174         fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1);
1175         fmask_state[3] =
1176            S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1177            S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1178            S_008F1C_TYPE(
1179               radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1180         fmask_state[4] = 0;
1181         fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1182         fmask_state[6] = 0;
1183         fmask_state[7] = 0;
1184
1185         if (device->physical_device->rad_info.chip_class == GFX9) {
1186            fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode);
1187            fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1188                              S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch);
1189            fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | S_008F24_META_RB_ALIGNED(1);
1190
1191            if (radv_image_is_tc_compat_cmask(image)) {
1192               va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1193
1194               fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1195               fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1196               fmask_state[7] |= va >> 8;
1197            }
1198         } else {
1199            fmask_state[3] |=
1200               S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.color.fmask.tiling_index);
1201            fmask_state[4] |=
1202               S_008F20_DEPTH(depth - 1) |
1203               S_008F20_PITCH(image->planes[0].surface.u.legacy.color.fmask.pitch_in_pixels - 1);
1204            fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1205
1206            if (radv_image_is_tc_compat_cmask(image)) {
1207               va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1208
1209               fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1210               fmask_state[7] |= va >> 8;
1211            }
1212         }
1213      } else
1214         memset(fmask_state, 0, 8 * 4);
1215   }
1216}
1217
1218static void
1219radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
1220                             bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
1221                             const VkComponentMapping *mapping, unsigned first_level,
1222                             unsigned last_level, unsigned first_layer, unsigned last_layer,
1223                             unsigned width, unsigned height, unsigned depth, uint32_t *state,
1224                             uint32_t *fmask_state)
1225{
1226   if (device->physical_device->rad_info.chip_class >= GFX10) {
1227      gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
1228                                    first_level, last_level, first_layer, last_layer, width, height,
1229                                    depth, state, fmask_state);
1230   } else {
1231      si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
1232                                 first_level, last_level, first_layer, last_layer, width, height,
1233                                 depth, state, fmask_state);
1234   }
1235}
1236
1237static void
1238radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image,
1239                           struct radeon_bo_metadata *md)
1240{
1241   static const VkComponentMapping fixedmapping;
1242   uint32_t desc[8];
1243
1244   assert(image->plane_count == 1);
1245
1246   radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->type,
1247                                image->vk_format, &fixedmapping, 0, image->info.levels - 1, 0,
1248                                image->info.array_size - 1, image->info.width, image->info.height,
1249                                image->info.depth, desc, NULL);
1250
1251   si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0,
1252                                  0, image->planes[0].surface.blk_w, false, false, false, false,
1253                                  desc);
1254
1255   ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface,
1256                               image->info.levels, desc, &md->size_metadata, md->metadata);
1257}
1258
1259void
1260radv_init_metadata(struct radv_device *device, struct radv_image *image,
1261                   struct radeon_bo_metadata *metadata)
1262{
1263   struct radeon_surf *surface = &image->planes[0].surface;
1264
1265   memset(metadata, 0, sizeof(*metadata));
1266
1267   if (device->physical_device->rad_info.chip_class >= GFX9) {
1268      uint64_t dcc_offset =
1269         image->offset +
1270         (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
1271      metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
1272      metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
1273      metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max;
1274      metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks;
1275      metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks;
1276      metadata->u.gfx9.dcc_max_compressed_block_size =
1277         surface->u.gfx9.color.dcc.max_compressed_block_size;
1278      metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1279   } else {
1280      metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D
1281                                        ? RADEON_LAYOUT_TILED
1282                                        : RADEON_LAYOUT_LINEAR;
1283      metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D
1284                                        ? RADEON_LAYOUT_TILED
1285                                        : RADEON_LAYOUT_LINEAR;
1286      metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1287      metadata->u.legacy.bankw = surface->u.legacy.bankw;
1288      metadata->u.legacy.bankh = surface->u.legacy.bankh;
1289      metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1290      metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1291      metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1292      metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1293      metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1294   }
1295   radv_query_opaque_metadata(device, image, metadata);
1296}
1297
1298void
1299radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
1300                                  uint64_t offset, uint32_t stride)
1301{
1302   ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface,
1303                                     image->info.levels, offset, stride);
1304}
1305
1306static void
1307radv_image_alloc_single_sample_cmask(const struct radv_device *device,
1308                                     const struct radv_image *image, struct radeon_surf *surf)
1309{
1310   if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->info.levels > 1 ||
1311       image->info.depth > 1 || radv_image_has_dcc(image) ||
1312       !radv_image_use_fast_clear_for_image(device, image) ||
1313       (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
1314      return;
1315
1316   assert(image->info.storage_samples == 1);
1317
1318   surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2);
1319   surf->total_size = surf->cmask_offset + surf->cmask_size;
1320   surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
1321}
1322
1323static void
1324radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
1325{
1326   /* images with modifiers can be potentially imported */
1327   if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
1328      return;
1329
1330   if (radv_image_has_cmask(image) || (radv_image_has_dcc(image) && !image->support_comp_to_single)) {
1331      image->fce_pred_offset = image->size;
1332      image->size += 8 * image->info.levels;
1333   }
1334
1335   if (radv_image_use_dcc_predication(device, image)) {
1336      image->dcc_pred_offset = image->size;
1337      image->size += 8 * image->info.levels;
1338   }
1339
1340   if ((radv_image_has_dcc(image) && !image->support_comp_to_single) ||
1341       radv_image_has_cmask(image) || radv_image_has_htile(image)) {
1342      image->clear_value_offset = image->size;
1343      image->size += 8 * image->info.levels;
1344   }
1345
1346   if (radv_image_is_tc_compat_htile(image) &&
1347       device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1348      /* Metadata for the TC-compatible HTILE hardware bug which
1349       * have to be fixed by updating ZRANGE_PRECISION when doing
1350       * fast depth clears to 0.0f.
1351       */
1352      image->tc_compat_zrange_offset = image->size;
1353      image->size += image->info.levels * 4;
1354   }
1355}
1356
1357/* Determine if the image is affected by the pipe misaligned metadata issue
1358 * which requires to invalidate L2.
1359 */
1360static bool
1361radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
1362{
1363   struct radeon_info *rad_info = &device->physical_device->rad_info;
1364   int log2_samples = util_logbase2(image->info.samples);
1365
1366   assert(rad_info->chip_class >= GFX10);
1367
1368   for (unsigned i = 0; i < image->plane_count; ++i) {
1369      VkFormat fmt = vk_format_get_plane_format(image->vk_format, i);
1370      int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
1371      int log2_bpp_and_samples;
1372
1373      if (rad_info->chip_class >= GFX10_3) {
1374         log2_bpp_and_samples = log2_bpp + log2_samples;
1375      } else {
1376         if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) {
1377            log2_bpp = 2;
1378         }
1379
1380         log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
1381      }
1382
1383      int num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
1384      int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
1385
1386      if (vk_format_has_depth(image->vk_format)) {
1387         if (radv_image_is_tc_compat_htile(image) && overlap) {
1388            return true;
1389         }
1390      } else {
1391         int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
1392         int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
1393         int samples_overlap = MIN2(log2_samples, overlap);
1394
1395         /* TODO: It shouldn't be necessary if the image has DCC but
1396          * not readable by shader.
1397          */
1398         if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
1399             (samples_overlap > log2_samples_frag_diff)) {
1400            return true;
1401         }
1402      }
1403   }
1404
1405   return false;
1406}
1407
1408static bool
1409radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
1410{
1411   if (device->physical_device->rad_info.chip_class >= GFX10) {
1412      return !device->physical_device->rad_info.tcc_rb_non_coherent &&
1413             !radv_image_is_pipe_misaligned(device, image);
1414   } else if (device->physical_device->rad_info.chip_class == GFX9) {
1415      if (image->info.samples == 1 &&
1416          (image->usage &
1417           (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
1418          !vk_format_has_stencil(image->vk_format)) {
1419         /* Single-sample color and single-sample depth
1420          * (not stencil) are coherent with shaders on
1421          * GFX9.
1422          */
1423         return true;
1424      }
1425   }
1426
1427   return false;
1428}
1429
1430/**
1431 * Determine if the given image can be fast cleared.
1432 */
1433static bool
1434radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
1435{
1436   if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
1437      return false;
1438
1439   if (vk_format_is_color(image->vk_format)) {
1440      if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
1441         return false;
1442
1443      /* RB+ doesn't work with CMASK fast clear on Stoney. */
1444      if (!radv_image_has_dcc(image) && device->physical_device->rad_info.family == CHIP_STONEY)
1445         return false;
1446   } else {
1447      if (!radv_image_has_htile(image))
1448         return false;
1449   }
1450
1451   /* Do not fast clears 3D images. */
1452   if (image->type == VK_IMAGE_TYPE_3D)
1453      return false;
1454
1455   return true;
1456}
1457
1458/**
1459 * Determine if the given image can be fast cleared using comp-to-single.
1460 */
1461static bool
1462radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image)
1463{
1464   /* comp-to-single is only available for GFX10+. */
1465   if (device->physical_device->rad_info.chip_class < GFX10)
1466      return false;
1467
1468   /* If the image can't be fast cleared, comp-to-single can't be used. */
1469   if (!radv_image_can_fast_clear(device, image))
1470      return false;
1471
1472   /* If the image doesn't have DCC, it can't be fast cleared using comp-to-single */
1473   if (!radv_image_has_dcc(image))
1474      return false;
1475
1476   /* It seems 8bpp and 16bpp require RB+ to work. */
1477   unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk_format);
1478   if (bytes_per_pixel <= 2 && !device->physical_device->rad_info.rbplus_allowed)
1479      return false;
1480
1481   return true;
1482}
1483
1484static void
1485radv_image_reset_layout(struct radv_image *image)
1486{
1487   image->size = 0;
1488   image->alignment = 1;
1489
1490   image->tc_compatible_cmask = 0;
1491   image->fce_pred_offset = image->dcc_pred_offset = 0;
1492   image->clear_value_offset = image->tc_compat_zrange_offset = 0;
1493
1494   for (unsigned i = 0; i < image->plane_count; ++i) {
1495      VkFormat format = vk_format_get_plane_format(image->vk_format, i);
1496      if (vk_format_has_depth(format))
1497         format = vk_format_depth_only(format);
1498
1499      uint64_t flags = image->planes[i].surface.flags;
1500      uint64_t modifier = image->planes[i].surface.modifier;
1501      memset(image->planes + i, 0, sizeof(image->planes[i]));
1502
1503      image->planes[i].surface.flags = flags;
1504      image->planes[i].surface.modifier = modifier;
1505      image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
1506      image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
1507      image->planes[i].surface.bpe = vk_format_get_blocksize(format);
1508
1509      /* align byte per element on dword */
1510      if (image->planes[i].surface.bpe == 3) {
1511         image->planes[i].surface.bpe = 4;
1512      }
1513   }
1514}
1515
1516VkResult
1517radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
1518                         const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
1519                         struct radv_image *image)
1520{
1521   /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1522    * common internal case. */
1523   create_info.vk_info = NULL;
1524
1525   struct ac_surf_info image_info = image->info;
1526   VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1527   if (result != VK_SUCCESS)
1528      return result;
1529
1530   assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
1531
1532   radv_image_reset_layout(image);
1533
1534   for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1535      struct ac_surf_info info = image_info;
1536      uint64_t offset;
1537      unsigned stride;
1538
1539      info.width = vk_format_get_plane_width(image->vk_format, plane, info.width);
1540      info.height = vk_format_get_plane_height(image->vk_format, plane, info.height);
1541
1542      if (create_info.no_metadata_planes || image->plane_count > 1) {
1543         image->planes[plane].surface.flags |=
1544            RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
1545      }
1546
1547      device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1548
1549      if (plane == 0) {
1550         if (!radv_use_dcc_for_image_late(device, image))
1551            ac_surface_zero_dcc_fields(&image->planes[0].surface);
1552      }
1553
1554      if (create_info.bo_metadata && !mod_info &&
1555          !ac_surface_set_umd_metadata(&device->physical_device->rad_info,
1556                                       &image->planes[plane].surface, image_info.storage_samples,
1557                                       image_info.levels, create_info.bo_metadata->size_metadata,
1558                                       create_info.bo_metadata->metadata))
1559         return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1560
1561      if (!create_info.no_metadata_planes && !create_info.bo_metadata && image->plane_count == 1 &&
1562          !mod_info)
1563         radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1564
1565      if (mod_info) {
1566         if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
1567             !mod_info->pPlaneLayouts[plane].rowPitch)
1568            return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1569
1570         offset = mod_info->pPlaneLayouts[plane].offset;
1571         stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
1572      } else {
1573         offset = align64(image->size, 1 << image->planes[plane].surface.alignment_log2);
1574         stride = 0; /* 0 means no override */
1575      }
1576
1577      if (!ac_surface_override_offset_stride(&device->physical_device->rad_info,
1578                                             &image->planes[plane].surface, image->info.levels,
1579                                             offset, stride))
1580         return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1581
1582      /* Validate DCC offsets in modifier layout. */
1583      if (image->plane_count == 1 && mod_info) {
1584         unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
1585         if (mod_info->drmFormatModifierPlaneCount != mem_planes)
1586            return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1587
1588         for (unsigned i = 1; i < mem_planes; ++i) {
1589            if (ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
1590                                            &image->planes[plane].surface, i,
1591                                            0) != mod_info->pPlaneLayouts[i].offset)
1592               return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1593         }
1594      }
1595
1596      image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
1597      image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
1598
1599      image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1600   }
1601
1602   image->tc_compatible_cmask =
1603      radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
1604
1605   image->l2_coherent = radv_image_is_l2_coherent(device, image);
1606
1607   image->support_comp_to_single = radv_image_use_comp_to_single(device, image);
1608
1609   radv_image_alloc_values(device, image);
1610
1611   assert(image->planes[0].surface.surf_size);
1612   assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
1613          ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
1614   return VK_SUCCESS;
1615}
1616
1617static void
1618radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
1619                   struct radv_image *image)
1620{
1621   if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo)
1622      device->ws->buffer_destroy(device->ws, image->bo);
1623
1624   if (image->owned_memory != VK_NULL_HANDLE) {
1625      RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1626      radv_free_memory(device, pAllocator, mem);
1627   }
1628
1629   vk_object_base_finish(&image->base);
1630   vk_free2(&device->vk.alloc, pAllocator, image);
1631}
1632
1633static void
1634radv_image_print_info(struct radv_device *device, struct radv_image *image)
1635{
1636   fprintf(stderr, "Image:\n");
1637   fprintf(stderr,
1638           "  Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
1639           "width=%" PRIu32 ", height=%" PRIu32 ", "
1640           "offset=%" PRIu64 ", array_size=%" PRIu32 "\n",
1641           image->size, image->alignment, image->info.width, image->info.height, image->offset,
1642           image->info.array_size);
1643   for (unsigned i = 0; i < image->plane_count; ++i) {
1644      const struct radv_image_plane *plane = &image->planes[i];
1645      const struct radeon_surf *surf = &plane->surface;
1646      const struct util_format_description *desc = vk_format_description(plane->format);
1647      uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
1648                                                    &plane->surface, 0, 0);
1649
1650      fprintf(stderr, "  Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
1651
1652      ac_surface_print_info(stderr, &device->physical_device->rad_info, surf);
1653   }
1654}
1655
1656static uint64_t
1657radv_select_modifier(const struct radv_device *dev, VkFormat format,
1658                     const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
1659{
1660   const struct radv_physical_device *pdev = dev->physical_device;
1661   unsigned mod_count;
1662
1663   assert(mod_list->drmFormatModifierCount);
1664
1665   /* We can allow everything here as it does not affect order and the application
1666    * is only allowed to specify modifiers that we support. */
1667   const struct ac_modifier_options modifier_options = {
1668      .dcc = true,
1669      .dcc_retile = true,
1670   };
1671
1672   ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
1673                              &mod_count, NULL);
1674
1675   uint64_t *mods = calloc(mod_count, sizeof(*mods));
1676
1677   /* If allocations fail, fall back to a dumber solution. */
1678   if (!mods)
1679      return mod_list->pDrmFormatModifiers[0];
1680
1681   ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
1682                              &mod_count, mods);
1683
1684   for (unsigned i = 0; i < mod_count; ++i) {
1685      for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
1686         if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
1687            free(mods);
1688            return mod_list->pDrmFormatModifiers[j];
1689         }
1690      }
1691   }
1692   unreachable("App specified an invalid modifier");
1693}
1694
1695VkResult
1696radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
1697                  const VkAllocationCallbacks *alloc, VkImage *pImage)
1698{
1699   RADV_FROM_HANDLE(radv_device, device, _device);
1700   const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1701   uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1702   struct radv_image *image = NULL;
1703   VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format);
1704   const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
1705      vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
1706   const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
1707      vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
1708   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1709
1710   const unsigned plane_count = vk_format_get_plane_count(format);
1711   const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1712
1713   radv_assert(pCreateInfo->mipLevels > 0);
1714   radv_assert(pCreateInfo->arrayLayers > 0);
1715   radv_assert(pCreateInfo->samples > 0);
1716   radv_assert(pCreateInfo->extent.width > 0);
1717   radv_assert(pCreateInfo->extent.height > 0);
1718   radv_assert(pCreateInfo->extent.depth > 0);
1719
1720   image =
1721      vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1722   if (!image)
1723      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1724
1725   vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
1726
1727   image->type = pCreateInfo->imageType;
1728   image->info.width = pCreateInfo->extent.width;
1729   image->info.height = pCreateInfo->extent.height;
1730   image->info.depth = pCreateInfo->extent.depth;
1731   image->info.samples = pCreateInfo->samples;
1732   image->info.storage_samples = pCreateInfo->samples;
1733   image->info.array_size = pCreateInfo->arrayLayers;
1734   image->info.levels = pCreateInfo->mipLevels;
1735   image->info.num_channels = vk_format_get_nr_components(format);
1736
1737   image->vk_format = format;
1738   image->tiling = pCreateInfo->tiling;
1739   image->usage = pCreateInfo->usage;
1740   image->flags = pCreateInfo->flags;
1741   image->plane_count = plane_count;
1742
1743   image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1744   if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1745      for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1746         if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1747             pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1748            image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1749         else
1750            image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1751   }
1752
1753   const VkExternalMemoryImageCreateInfo *external_info =
1754      vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
1755
1756   image->shareable = external_info;
1757   if (!vk_format_is_depth_or_stencil(format) && !image->shareable &&
1758       !(image->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) &&
1759       pCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
1760      image->info.surf_index = &device->image_mrt_offset_counter;
1761   }
1762
1763   if (mod_list)
1764      modifier = radv_select_modifier(device, format, mod_list);
1765   else if (explicit_mod)
1766      modifier = explicit_mod->drmFormatModifier;
1767
1768   for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1769      image->planes[plane].surface.flags =
1770         radv_get_surface_flags(device, image, plane, pCreateInfo, format);
1771      image->planes[plane].surface.modifier = modifier;
1772   }
1773
1774   bool delay_layout =
1775      external_info && (external_info->handleTypes &
1776                        VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1777
1778   if (delay_layout) {
1779      *pImage = radv_image_to_handle(image);
1780      assert(!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1781      return VK_SUCCESS;
1782   }
1783
1784   VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, image);
1785   if (result != VK_SUCCESS) {
1786      radv_destroy_image(device, alloc, image);
1787      return result;
1788   }
1789
1790   if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1791      image->alignment = MAX2(image->alignment, 4096);
1792      image->size = align64(image->size, image->alignment);
1793      image->offset = 0;
1794
1795      result =
1796         device->ws->buffer_create(device->ws, image->size, image->alignment, 0,
1797                                   RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, &image->bo);
1798      if (result != VK_SUCCESS) {
1799         radv_destroy_image(device, alloc, image);
1800         return vk_error(device, result);
1801      }
1802   }
1803
1804   if (device->instance->debug_flags & RADV_DEBUG_IMG) {
1805      radv_image_print_info(device, image);
1806   }
1807
1808   *pImage = radv_image_to_handle(image);
1809
1810   return VK_SUCCESS;
1811}
1812
1813static void
1814radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device,
1815                                VkFormat vk_format, const VkComponentMapping *components,
1816                                bool is_storage_image, bool disable_compression,
1817                                bool enable_compression, unsigned plane_id,
1818                                unsigned descriptor_plane_id)
1819{
1820   struct radv_image *image = iview->image;
1821   struct radv_image_plane *plane = &image->planes[plane_id];
1822   bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1823   uint32_t blk_w;
1824   union radv_descriptor *descriptor;
1825   uint32_t hw_level = 0;
1826
1827   if (is_storage_image) {
1828      descriptor = &iview->storage_descriptor;
1829   } else {
1830      descriptor = &iview->descriptor;
1831   }
1832
1833   assert(vk_format_get_plane_count(vk_format) == 1);
1834   assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1835   blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) *
1836           vk_format_get_blockwidth(vk_format);
1837
1838   if (device->physical_device->rad_info.chip_class >= GFX9)
1839      hw_level = iview->base_mip;
1840   radv_make_texture_descriptor(
1841      device, image, is_storage_image, iview->type, vk_format, components, hw_level,
1842      hw_level + iview->level_count - 1, iview->base_layer,
1843      iview->base_layer + iview->layer_count - 1,
1844      vk_format_get_plane_width(image->vk_format, plane_id, iview->extent.width),
1845      vk_format_get_plane_height(image->vk_format, plane_id, iview->extent.height),
1846      iview->extent.depth, descriptor->plane_descriptors[descriptor_plane_id],
1847      descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor);
1848
1849   const struct legacy_surf_level *base_level_info = NULL;
1850   if (device->physical_device->rad_info.chip_class <= GFX9) {
1851      if (is_stencil)
1852         base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->base_mip];
1853      else
1854         base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1855   }
1856
1857   bool enable_write_compression = radv_image_use_dcc_image_stores(device, image);
1858   if (is_storage_image && !(enable_write_compression || enable_compression))
1859      disable_compression = true;
1860   si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->base_mip,
1861                                  iview->base_mip, blk_w, is_stencil, is_storage_image,
1862                                  disable_compression, enable_write_compression,
1863                                  descriptor->plane_descriptors[descriptor_plane_id]);
1864}
1865
1866static unsigned
1867radv_plane_from_aspect(VkImageAspectFlags mask)
1868{
1869   switch (mask) {
1870   case VK_IMAGE_ASPECT_PLANE_1_BIT:
1871   case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
1872      return 1;
1873   case VK_IMAGE_ASPECT_PLANE_2_BIT:
1874   case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
1875      return 2;
1876   case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
1877      return 3;
1878   default:
1879      return 0;
1880   }
1881}
1882
1883VkFormat
1884radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1885{
1886   switch (mask) {
1887   case VK_IMAGE_ASPECT_PLANE_0_BIT:
1888      return image->planes[0].format;
1889   case VK_IMAGE_ASPECT_PLANE_1_BIT:
1890      return image->planes[1].format;
1891   case VK_IMAGE_ASPECT_PLANE_2_BIT:
1892      return image->planes[2].format;
1893   case VK_IMAGE_ASPECT_STENCIL_BIT:
1894      return vk_format_stencil_only(image->vk_format);
1895   case VK_IMAGE_ASPECT_DEPTH_BIT:
1896      return vk_format_depth_only(image->vk_format);
1897   case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1898      return vk_format_depth_only(image->vk_format);
1899   default:
1900      return image->vk_format;
1901   }
1902}
1903
1904/**
1905 * Determine if the given image view can be fast cleared.
1906 */
1907static bool
1908radv_image_view_can_fast_clear(const struct radv_device *device,
1909                               const struct radv_image_view *iview)
1910{
1911   struct radv_image *image;
1912
1913   if (!iview)
1914      return false;
1915   image = iview->image;
1916
1917   /* Only fast clear if the image itself can be fast cleared. */
1918   if (!radv_image_can_fast_clear(device, image))
1919      return false;
1920
1921   /* Only fast clear if all layers are bound. */
1922   if (iview->base_layer > 0 || iview->layer_count != image->info.array_size)
1923      return false;
1924
1925   /* Only fast clear if the view covers the whole image. */
1926   if (!radv_image_extent_compare(image, &iview->extent))
1927      return false;
1928
1929   return true;
1930}
1931
1932void
1933radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
1934                     const VkImageViewCreateInfo *pCreateInfo,
1935                     const struct radv_image_view_extra_create_info *extra_create_info)
1936{
1937   RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1938   const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1939   uint32_t plane_count = 1;
1940
1941   vk_object_base_init(&device->vk, &iview->base, VK_OBJECT_TYPE_IMAGE_VIEW);
1942
1943   switch (image->type) {
1944   case VK_IMAGE_TYPE_1D:
1945   case VK_IMAGE_TYPE_2D:
1946      assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
1947             image->info.array_size);
1948      break;
1949   case VK_IMAGE_TYPE_3D:
1950      assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
1951             radv_minify(image->info.depth, range->baseMipLevel));
1952      break;
1953   default:
1954      unreachable("bad VkImageType");
1955   }
1956   iview->image = image;
1957   iview->type = pCreateInfo->viewType;
1958   iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1959   iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1960   iview->base_layer = range->baseArrayLayer;
1961   iview->layer_count = radv_get_layerCount(image, range);
1962   iview->base_mip = range->baseMipLevel;
1963   iview->level_count = radv_get_levelCount(image, range);
1964
1965   iview->vk_format = pCreateInfo->format;
1966
1967   /* If the image has an Android external format, pCreateInfo->format will be
1968    * VK_FORMAT_UNDEFINED. */
1969   if (iview->vk_format == VK_FORMAT_UNDEFINED)
1970      iview->vk_format = image->vk_format;
1971
1972   /* Split out the right aspect. Note that for internal meta code we sometimes
1973    * use an equivalent color format for the aspect so we first have to check
1974    * if we actually got depth/stencil formats. */
1975   if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1976      if (vk_format_has_stencil(iview->vk_format))
1977         iview->vk_format = vk_format_stencil_only(iview->vk_format);
1978   } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1979      if (vk_format_has_depth(iview->vk_format))
1980         iview->vk_format = vk_format_depth_only(iview->vk_format);
1981   }
1982
1983   if (device->physical_device->rad_info.chip_class >= GFX9) {
1984      iview->extent = (VkExtent3D){
1985         .width = image->info.width,
1986         .height = image->info.height,
1987         .depth = image->info.depth,
1988      };
1989   } else {
1990      iview->extent = (VkExtent3D){
1991         .width = radv_minify(image->info.width, range->baseMipLevel),
1992         .height = radv_minify(image->info.height, range->baseMipLevel),
1993         .depth = radv_minify(image->info.depth, range->baseMipLevel),
1994      };
1995   }
1996
1997   if (iview->vk_format != image->planes[iview->plane_id].format) {
1998      unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1999      unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
2000      unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
2001      unsigned img_bh = vk_format_get_blockheight(image->vk_format);
2002
2003      iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
2004      iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
2005
2006      /* Comment ported from amdvlk -
2007       * If we have the following image:
2008       *              Uncompressed pixels   Compressed block sizes (4x4)
2009       *      mip0:       22 x 22                   6 x 6
2010       *      mip1:       11 x 11                   3 x 3
2011       *      mip2:        5 x  5                   2 x 2
2012       *      mip3:        2 x  2                   1 x 1
2013       *      mip4:        1 x  1                   1 x 1
2014       *
2015       * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and
2016       * the HW is calculating the degradation of the block sizes down the mip-chain as follows
2017       * (straight-up divide-by-two integer math): mip0:  6x6 mip1:  3x3 mip2:  1x1 mip3:  1x1
2018       *
2019       * This means that mip2 will be missing texels.
2020       *
2021       * Fix this by calculating the base mip's width and height, then convert
2022       * that, and round it back up to get the level 0 size. Clamp the
2023       * converted size between the original values, and the physical extent
2024       * of the base mipmap.
2025       *
2026       * On GFX10 we have to take care to not go over the physical extent
2027       * of the base mipmap as otherwise the GPU computes a different layout.
2028       * Note that the GPU does use the same base-mip dimensions for both a
2029       * block compatible format and the compressed format, so even if we take
2030       * the plain converted dimensions the physical layout is correct.
2031       */
2032      if (device->physical_device->rad_info.chip_class >= GFX9 &&
2033          vk_format_is_compressed(image->vk_format) && !vk_format_is_compressed(iview->vk_format)) {
2034         /* If we have multiple levels in the view we should ideally take the last level,
2035          * but the mip calculation has a max(..., 1) so walking back to the base mip in an
2036          * useful way is hard. */
2037         if (iview->level_count > 1) {
2038            iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width;
2039            iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height;
2040         } else {
2041            unsigned lvl_width = radv_minify(image->info.width, range->baseMipLevel);
2042            unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
2043
2044            lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
2045            lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
2046
2047            lvl_width <<= range->baseMipLevel;
2048            lvl_height <<= range->baseMipLevel;
2049
2050            iview->extent.width = CLAMP(lvl_width, iview->extent.width,
2051                                        iview->image->planes[0].surface.u.gfx9.base_mip_width);
2052            iview->extent.height = CLAMP(lvl_height, iview->extent.height,
2053                                         iview->image->planes[0].surface.u.gfx9.base_mip_height);
2054         }
2055      }
2056   }
2057
2058   iview->support_fast_clear = radv_image_view_can_fast_clear(device, iview);
2059
2060   if (vk_format_get_plane_count(image->vk_format) > 1 &&
2061       iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
2062      plane_count = vk_format_get_plane_count(iview->vk_format);
2063   }
2064
2065   bool disable_compression = extra_create_info ? extra_create_info->disable_compression : false;
2066   bool enable_compression = extra_create_info ? extra_create_info->enable_compression : false;
2067   for (unsigned i = 0; i < plane_count; ++i) {
2068      VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
2069      radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, false,
2070                                      disable_compression, enable_compression, iview->plane_id + i,
2071                                      i);
2072      radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, true,
2073                                      disable_compression, enable_compression, iview->plane_id + i,
2074                                      i);
2075   }
2076}
2077
2078void
2079radv_image_view_finish(struct radv_image_view *iview)
2080{
2081   vk_object_base_finish(&iview->base);
2082}
2083
2084bool
2085radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image,
2086                                VkImageLayout layout, bool in_render_loop, unsigned queue_mask)
2087{
2088   switch (layout) {
2089   case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
2090   case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR:
2091   case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR:
2092      return radv_image_has_htile(image);
2093   case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
2094      return radv_image_is_tc_compat_htile(image) ||
2095             (radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL));
2096   case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
2097   case VK_IMAGE_LAYOUT_GENERAL:
2098      /* It should be safe to enable TC-compat HTILE with
2099       * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
2100       * if the image doesn't have the storage bit set. This
2101       * improves performance for apps that use GENERAL for the main
2102       * depth pass because this allows compression and this reduces
2103       * the number of decompressions from/to GENERAL.
2104       */
2105      /* FIXME: Enabling TC-compat HTILE in GENERAL on the compute
2106       * queue is likely broken for eg. depth/stencil copies.
2107       */
2108      if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) &&
2109          !in_render_loop && !device->instance->disable_tc_compat_htile_in_general) {
2110         return true;
2111      } else {
2112         return false;
2113      }
2114   case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
2115      if (radv_image_is_tc_compat_htile(image) ||
2116          (radv_image_has_htile(image) &&
2117           !(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
2118         /* Keep HTILE compressed if the image is only going to
2119          * be used as a depth/stencil read-only attachment.
2120          */
2121         return true;
2122      } else {
2123         return false;
2124      }
2125      break;
2126   default:
2127      return radv_image_is_tc_compat_htile(image);
2128   }
2129}
2130
2131bool
2132radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
2133                           unsigned level, VkImageLayout layout, bool in_render_loop,
2134                           unsigned queue_mask)
2135{
2136   if (radv_dcc_enabled(image, level) &&
2137       !radv_layout_dcc_compressed(device, image, level, layout, in_render_loop, queue_mask))
2138      return false;
2139
2140   if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS))
2141      return false;
2142
2143   if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
2144      return false;
2145
2146   /* Exclusive images with CMASK or DCC can always be fast-cleared on the gfx queue. Concurrent
2147    * images can only be fast-cleared if comp-to-single is supported because we don't yet support
2148    * FCE on the compute queue.
2149    */
2150   return queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_use_comp_to_single(device, image);
2151}
2152
2153bool
2154radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
2155                           unsigned level, VkImageLayout layout, bool in_render_loop,
2156                           unsigned queue_mask)
2157{
2158   if (!radv_dcc_enabled(image, level))
2159      return false;
2160
2161   if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && queue_mask & (1u << RADV_QUEUE_FOREIGN))
2162      return true;
2163
2164   /* If the image is read-only, we can always just keep it compressed */
2165   if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS))
2166      return true;
2167
2168   /* Don't compress compute transfer dst when image stores are not supported. */
2169   if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
2170       (queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image))
2171      return false;
2172
2173   return device->physical_device->rad_info.chip_class >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
2174}
2175
2176bool
2177radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
2178                             VkImageLayout layout, unsigned queue_mask)
2179{
2180   if (!radv_image_has_fmask(image))
2181      return false;
2182
2183   /* Don't compress compute transfer dst because image stores ignore FMASK and it needs to be
2184    * expanded before.
2185    */
2186   if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
2187       (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
2188      return false;
2189
2190   /* Only compress concurrent images if TC-compat CMASK is enabled (no FMASK decompression). */
2191   return layout != VK_IMAGE_LAYOUT_GENERAL &&
2192          (queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_is_tc_compat_cmask(image));
2193}
2194
2195unsigned
2196radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
2197{
2198   if (!image->exclusive)
2199      return image->queue_family_mask;
2200   if (family == VK_QUEUE_FAMILY_EXTERNAL || family == VK_QUEUE_FAMILY_FOREIGN_EXT)
2201      return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN);
2202   if (family == VK_QUEUE_FAMILY_IGNORED)
2203      return 1u << queue_family;
2204   return 1u << family;
2205}
2206
2207VkResult
2208radv_CreateImage(VkDevice device, const VkImageCreateInfo *pCreateInfo,
2209                 const VkAllocationCallbacks *pAllocator, VkImage *pImage)
2210{
2211#ifdef ANDROID
2212   const VkNativeBufferANDROID *gralloc_info =
2213      vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
2214
2215   if (gralloc_info)
2216      return radv_image_from_gralloc(device, pCreateInfo, gralloc_info, pAllocator, pImage);
2217#endif
2218
2219   const struct wsi_image_create_info *wsi_info =
2220      vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
2221   bool scanout = wsi_info && wsi_info->scanout;
2222
2223   return radv_image_create(device,
2224                            &(struct radv_image_create_info){
2225                               .vk_info = pCreateInfo,
2226                               .scanout = scanout,
2227                            },
2228                            pAllocator, pImage);
2229}
2230
2231void
2232radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator)
2233{
2234   RADV_FROM_HANDLE(radv_device, device, _device);
2235   RADV_FROM_HANDLE(radv_image, image, _image);
2236
2237   if (!image)
2238      return;
2239
2240   radv_destroy_image(device, pAllocator, image);
2241}
2242
2243void
2244radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image,
2245                               const VkImageSubresource *pSubresource, VkSubresourceLayout *pLayout)
2246{
2247   RADV_FROM_HANDLE(radv_image, image, _image);
2248   RADV_FROM_HANDLE(radv_device, device, _device);
2249   int level = pSubresource->mipLevel;
2250   int layer = pSubresource->arrayLayer;
2251
2252   unsigned plane_id = 0;
2253   if (vk_format_get_plane_count(image->vk_format) > 1)
2254      plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
2255
2256   struct radv_image_plane *plane = &image->planes[plane_id];
2257   struct radeon_surf *surface = &plane->surface;
2258
2259   if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
2260      unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
2261
2262      assert(level == 0);
2263      assert(layer == 0);
2264
2265      pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
2266                                                    surface, mem_plane_id, 0);
2267      pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.chip_class,
2268                                                      surface, mem_plane_id);
2269      pLayout->arrayPitch = 0;
2270      pLayout->depthPitch = 0;
2271      pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id);
2272   } else if (device->physical_device->rad_info.chip_class >= GFX9) {
2273      uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
2274
2275      pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
2276                                                    &plane->surface, 0, layer) +
2277                        level_offset;
2278      if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
2279          image->vk_format == VK_FORMAT_R32G32B32_SINT ||
2280          image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
2281         /* Adjust the number of bytes between each row because
2282          * the pitch is actually the number of components per
2283          * row.
2284          */
2285         pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
2286      } else {
2287         uint32_t pitch =
2288            surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
2289
2290         assert(util_is_power_of_two_nonzero(surface->bpe));
2291         pLayout->rowPitch = pitch * surface->bpe;
2292      }
2293
2294      pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
2295      pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
2296      pLayout->size = surface->u.gfx9.surf_slice_size;
2297      if (image->type == VK_IMAGE_TYPE_3D)
2298         pLayout->size *= u_minify(image->info.depth, level);
2299   } else {
2300      pLayout->offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
2301                        (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
2302      pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
2303      pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2304      pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2305      pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2306      if (image->type == VK_IMAGE_TYPE_3D)
2307         pLayout->size *= u_minify(image->info.depth, level);
2308   }
2309}
2310
2311VkResult
2312radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image,
2313                                            VkImageDrmFormatModifierPropertiesEXT *pProperties)
2314{
2315   RADV_FROM_HANDLE(radv_image, image, _image);
2316
2317   pProperties->drmFormatModifier = image->planes[0].surface.modifier;
2318   return VK_SUCCESS;
2319}
2320
2321VkResult
2322radv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo,
2323                     const VkAllocationCallbacks *pAllocator, VkImageView *pView)
2324{
2325   RADV_FROM_HANDLE(radv_device, device, _device);
2326   struct radv_image_view *view;
2327
2328   view =
2329      vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2330   if (view == NULL)
2331      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2332
2333   radv_image_view_init(view, device, pCreateInfo, NULL);
2334
2335   *pView = radv_image_view_to_handle(view);
2336
2337   return VK_SUCCESS;
2338}
2339
2340void
2341radv_DestroyImageView(VkDevice _device, VkImageView _iview, const VkAllocationCallbacks *pAllocator)
2342{
2343   RADV_FROM_HANDLE(radv_device, device, _device);
2344   RADV_FROM_HANDLE(radv_image_view, iview, _iview);
2345
2346   if (!iview)
2347      return;
2348
2349   radv_image_view_finish(iview);
2350   vk_free2(&device->vk.alloc, pAllocator, iview);
2351}
2352
2353void
2354radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2355                      const VkBufferViewCreateInfo *pCreateInfo)
2356{
2357   RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
2358
2359   vk_object_base_init(&device->vk, &view->base, VK_OBJECT_TYPE_BUFFER_VIEW);
2360
2361   view->bo = buffer->bo;
2362   view->range =
2363      pCreateInfo->range == VK_WHOLE_SIZE ? buffer->size - pCreateInfo->offset : pCreateInfo->range;
2364   view->vk_format = pCreateInfo->format;
2365
2366   radv_make_buffer_descriptor(device, buffer, view->vk_format, pCreateInfo->offset, view->range,
2367                               view->state);
2368}
2369
2370void
2371radv_buffer_view_finish(struct radv_buffer_view *view)
2372{
2373   vk_object_base_finish(&view->base);
2374}
2375
2376VkResult
2377radv_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo,
2378                      const VkAllocationCallbacks *pAllocator, VkBufferView *pView)
2379{
2380   RADV_FROM_HANDLE(radv_device, device, _device);
2381   struct radv_buffer_view *view;
2382
2383   view =
2384      vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2385   if (!view)
2386      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2387
2388   radv_buffer_view_init(view, device, pCreateInfo);
2389
2390   *pView = radv_buffer_view_to_handle(view);
2391
2392   return VK_SUCCESS;
2393}
2394
2395void
2396radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
2397                       const VkAllocationCallbacks *pAllocator)
2398{
2399   RADV_FROM_HANDLE(radv_device, device, _device);
2400   RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
2401
2402   if (!view)
2403      return;
2404
2405   radv_buffer_view_finish(view);
2406   vk_free2(&device->vk.alloc, pAllocator, view);
2407}
2408