1/*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27
28#include "tu_private.h"
29#include "fdl/fd6_format_table.h"
30
31#include "util/debug.h"
32#include "util/u_atomic.h"
33#include "util/format/u_format.h"
34#include "vk_format.h"
35#include "vk_util.h"
36#include "drm-uapi/drm_fourcc.h"
37
38#include "tu_cs.h"
39
40static uint32_t
41tu6_plane_count(VkFormat format)
42{
43   switch (format) {
44   default:
45      return 1;
46   case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
47   case VK_FORMAT_D32_SFLOAT_S8_UINT:
48      return 2;
49   case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
50      return 3;
51   }
52}
53
54static VkFormat
55tu6_plane_format(VkFormat format, uint32_t plane)
56{
57   switch (format) {
58   case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
59      /* note: with UBWC, and Y plane UBWC is different from R8_UNORM */
60      return plane ? VK_FORMAT_R8G8_UNORM : VK_FORMAT_R8_UNORM;
61   case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
62      return VK_FORMAT_R8_UNORM;
63   case VK_FORMAT_D32_SFLOAT_S8_UINT:
64      return plane ? VK_FORMAT_S8_UINT : VK_FORMAT_D32_SFLOAT;
65   default:
66      return format;
67   }
68}
69
70static uint32_t
71tu6_plane_index(VkFormat format, VkImageAspectFlags aspect_mask)
72{
73   switch (aspect_mask) {
74   default:
75      return 0;
76   case VK_IMAGE_ASPECT_PLANE_1_BIT:
77      return 1;
78   case VK_IMAGE_ASPECT_PLANE_2_BIT:
79      return 2;
80   case VK_IMAGE_ASPECT_STENCIL_BIT:
81      return format == VK_FORMAT_D32_SFLOAT_S8_UINT;
82   }
83}
84
85static void
86compose_swizzle(unsigned char *swiz, const VkComponentMapping *mapping)
87{
88   unsigned char src_swiz[4] = { swiz[0], swiz[1], swiz[2], swiz[3] };
89   VkComponentSwizzle vk_swiz[4] = {
90      mapping->r, mapping->g, mapping->b, mapping->a
91   };
92   for (int i = 0; i < 4; i++) {
93      switch (vk_swiz[i]) {
94      case VK_COMPONENT_SWIZZLE_IDENTITY:
95         swiz[i] = src_swiz[i];
96         break;
97      case VK_COMPONENT_SWIZZLE_R...VK_COMPONENT_SWIZZLE_A:
98         swiz[i] = src_swiz[vk_swiz[i] - VK_COMPONENT_SWIZZLE_R];
99         break;
100      case VK_COMPONENT_SWIZZLE_ZERO:
101         swiz[i] = A6XX_TEX_ZERO;
102         break;
103      case VK_COMPONENT_SWIZZLE_ONE:
104         swiz[i] = A6XX_TEX_ONE;
105         break;
106      default:
107         unreachable("unexpected swizzle");
108      }
109   }
110}
111
112static uint32_t
113tu6_texswiz(const VkComponentMapping *comps,
114            const struct tu_sampler_ycbcr_conversion *conversion,
115            VkFormat format,
116            VkImageAspectFlagBits aspect_mask,
117            bool has_z24uint_s8uint)
118{
119   unsigned char swiz[4] = {
120      A6XX_TEX_X, A6XX_TEX_Y, A6XX_TEX_Z, A6XX_TEX_W,
121   };
122
123   switch (format) {
124   case VK_FORMAT_G8B8G8R8_422_UNORM:
125   case VK_FORMAT_B8G8R8G8_422_UNORM:
126   case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
127   case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
128      swiz[0] = A6XX_TEX_Z;
129      swiz[1] = A6XX_TEX_X;
130      swiz[2] = A6XX_TEX_Y;
131      break;
132   case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
133   case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
134      /* same hardware format is used for BC1_RGB / BC1_RGBA */
135      swiz[3] = A6XX_TEX_ONE;
136      break;
137   case VK_FORMAT_D24_UNORM_S8_UINT:
138      if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
139         if (!has_z24uint_s8uint) {
140            /* using FMT6_8_8_8_8_UINT, so need to pick out the W channel and
141             * swizzle (0,0,1) in the rest (see "Conversion to RGBA").
142             */
143            swiz[0] = A6XX_TEX_W;
144            swiz[1] = A6XX_TEX_ZERO;
145            swiz[2] = A6XX_TEX_ZERO;
146            swiz[3] = A6XX_TEX_ONE;
147         } else {
148            /* using FMT6_Z24_UINT_S8_UINT, which is (d, s, 0, 1), so need to
149             * swizzle away the d.
150             */
151            swiz[0] = A6XX_TEX_Y;
152            swiz[1] = A6XX_TEX_ZERO;
153         }
154      }
155      break;
156   default:
157      break;
158   }
159
160   compose_swizzle(swiz, comps);
161   if (conversion)
162      compose_swizzle(swiz, &conversion->components);
163
164   return A6XX_TEX_CONST_0_SWIZ_X(swiz[0]) |
165          A6XX_TEX_CONST_0_SWIZ_Y(swiz[1]) |
166          A6XX_TEX_CONST_0_SWIZ_Z(swiz[2]) |
167          A6XX_TEX_CONST_0_SWIZ_W(swiz[3]);
168}
169
170void
171tu_cs_image_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
172{
173   tu_cs_emit(cs, iview->PITCH);
174   tu_cs_emit(cs, iview->layer_size >> 6);
175   tu_cs_emit_qw(cs, iview->base_addr + iview->layer_size * layer);
176}
177
178void
179tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
180{
181   tu_cs_emit(cs, iview->stencil_PITCH);
182   tu_cs_emit(cs, iview->stencil_layer_size >> 6);
183   tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
184}
185
186void
187tu_cs_image_ref_2d(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer, bool src)
188{
189   tu_cs_emit_qw(cs, iview->base_addr + iview->layer_size * layer);
190   /* SP_PS_2D_SRC_PITCH has shifted pitch field */
191   tu_cs_emit(cs, iview->PITCH << (src ? 9 : 0));
192}
193
194void
195tu_cs_image_flag_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
196{
197   tu_cs_emit_qw(cs, iview->ubwc_addr + iview->ubwc_layer_size * layer);
198   tu_cs_emit(cs, iview->FLAG_BUFFER_PITCH);
199}
200
201void
202tu_image_view_init(struct tu_image_view *iview,
203                   const VkImageViewCreateInfo *pCreateInfo,
204                   bool has_z24uint_s8uint)
205{
206   TU_FROM_HANDLE(tu_image, image, pCreateInfo->image);
207   const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
208   VkFormat format = pCreateInfo->format;
209   VkImageAspectFlagBits aspect_mask = pCreateInfo->subresourceRange.aspectMask;
210
211   const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
212      vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
213   const struct tu_sampler_ycbcr_conversion *conversion = ycbcr_conversion ?
214      tu_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
215
216   iview->image = image;
217
218   memset(iview->descriptor, 0, sizeof(iview->descriptor));
219
220   struct fdl_layout *layout =
221      &image->layout[tu6_plane_index(image->vk_format, aspect_mask)];
222
223   uint32_t width = u_minify(layout->width0, range->baseMipLevel);
224   uint32_t height = u_minify(layout->height0, range->baseMipLevel);
225   uint32_t storage_depth = tu_get_layerCount(image, range);
226   if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_3D) {
227      storage_depth = u_minify(image->layout[0].depth0, range->baseMipLevel);
228   }
229
230   uint32_t depth = storage_depth;
231   if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE ||
232       pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
233      /* Cubes are treated as 2D arrays for storage images, so only divide the
234       * depth by 6 for the texture descriptor.
235       */
236      depth /= 6;
237   }
238
239   uint64_t base_addr = image->bo->iova + image->bo_offset +
240      fdl_surface_offset(layout, range->baseMipLevel, range->baseArrayLayer);
241   uint64_t ubwc_addr = image->bo->iova + image->bo_offset +
242      fdl_ubwc_offset(layout, range->baseMipLevel, range->baseArrayLayer);
243
244   uint32_t pitch = fdl_pitch(layout, range->baseMipLevel);
245   uint32_t ubwc_pitch = fdl_ubwc_pitch(layout, range->baseMipLevel);
246   uint32_t layer_size = fdl_layer_stride(layout, range->baseMipLevel);
247
248   if (aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT)
249      format = tu6_plane_format(format, tu6_plane_index(format, aspect_mask));
250
251   struct tu_native_format fmt = tu6_format_texture(format, layout->tile_mode);
252   /* note: freedreno layout assumes no TILE_ALL bit for non-UBWC color formats
253    * this means smaller mipmap levels have a linear tile mode.
254    * Depth/stencil formats have non-linear tile mode.
255    */
256   fmt.tile_mode = fdl_tile_mode(layout, range->baseMipLevel);
257
258   bool ubwc_enabled = fdl_ubwc_enabled(layout, range->baseMipLevel);
259
260   bool is_d24s8 = (format == VK_FORMAT_D24_UNORM_S8_UINT ||
261                    format == VK_FORMAT_X8_D24_UNORM_PACK32);
262
263   if (is_d24s8 && ubwc_enabled)
264      fmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
265
266   unsigned fmt_tex = fmt.fmt;
267   if (is_d24s8) {
268      if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
269         fmt_tex = FMT6_Z24_UNORM_S8_UINT;
270      if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
271         fmt_tex = has_z24uint_s8uint ? FMT6_Z24_UINT_S8_UINT : FMT6_8_8_8_8_UINT;
272      /* TODO: also use this format with storage descriptor ? */
273   }
274
275   iview->descriptor[0] =
276      A6XX_TEX_CONST_0_TILE_MODE(fmt.tile_mode) |
277      COND(vk_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) |
278      A6XX_TEX_CONST_0_FMT(fmt_tex) |
279      A6XX_TEX_CONST_0_SAMPLES(tu_msaa_samples(layout->nr_samples)) |
280      A6XX_TEX_CONST_0_SWAP(fmt.swap) |
281      tu6_texswiz(&pCreateInfo->components, conversion, format, aspect_mask, has_z24uint_s8uint) |
282      A6XX_TEX_CONST_0_MIPLVLS(tu_get_levelCount(image, range) - 1);
283   iview->descriptor[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height);
284   iview->descriptor[2] =
285      A6XX_TEX_CONST_2_PITCHALIGN(layout->pitchalign - 6) |
286      A6XX_TEX_CONST_2_PITCH(pitch) |
287      A6XX_TEX_CONST_2_TYPE(tu6_tex_type(pCreateInfo->viewType, false));
288   iview->descriptor[3] = A6XX_TEX_CONST_3_ARRAY_PITCH(layer_size);
289   iview->descriptor[4] = base_addr;
290   iview->descriptor[5] = (base_addr >> 32) | A6XX_TEX_CONST_5_DEPTH(depth);
291
292   if (layout->tile_all)
293      iview->descriptor[3] |= A6XX_TEX_CONST_3_TILE_ALL;
294
295   if (format == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM ||
296       format == VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM) {
297      /* chroma offset re-uses MIPLVLS bits */
298      assert(tu_get_levelCount(image, range) == 1);
299      if (conversion) {
300         if (conversion->chroma_offsets[0] == VK_CHROMA_LOCATION_MIDPOINT)
301            iview->descriptor[0] |= A6XX_TEX_CONST_0_CHROMA_MIDPOINT_X;
302         if (conversion->chroma_offsets[1] == VK_CHROMA_LOCATION_MIDPOINT)
303            iview->descriptor[0] |= A6XX_TEX_CONST_0_CHROMA_MIDPOINT_Y;
304      }
305
306      uint64_t base_addr[3];
307
308      iview->descriptor[3] |= A6XX_TEX_CONST_3_TILE_ALL;
309      if (ubwc_enabled) {
310         iview->descriptor[3] |= A6XX_TEX_CONST_3_FLAG;
311         /* no separate ubwc base, image must have the expected layout */
312         for (uint32_t i = 0; i < 3; i++) {
313            base_addr[i] = image->bo->iova + image->bo_offset +
314               fdl_ubwc_offset(&image->layout[i], range->baseMipLevel, range->baseArrayLayer);
315         }
316      } else {
317         for (uint32_t i = 0; i < 3; i++) {
318            base_addr[i] = image->bo->iova + image->bo_offset +
319               fdl_surface_offset(&image->layout[i], range->baseMipLevel, range->baseArrayLayer);
320         }
321      }
322
323      iview->descriptor[4] = base_addr[0];
324      iview->descriptor[5] |= base_addr[0] >> 32;
325      iview->descriptor[6] =
326         A6XX_TEX_CONST_6_PLANE_PITCH(fdl_pitch(&image->layout[1], range->baseMipLevel));
327      iview->descriptor[7] = base_addr[1];
328      iview->descriptor[8] = base_addr[1] >> 32;
329      iview->descriptor[9] = base_addr[2];
330      iview->descriptor[10] = base_addr[2] >> 32;
331
332      assert(pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_3D);
333      return;
334   }
335
336   if (ubwc_enabled) {
337      uint32_t block_width, block_height;
338      fdl6_get_ubwc_blockwidth(layout, &block_width, &block_height);
339
340      iview->descriptor[3] |= A6XX_TEX_CONST_3_FLAG;
341      iview->descriptor[7] = ubwc_addr;
342      iview->descriptor[8] = ubwc_addr >> 32;
343      iview->descriptor[9] |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(layout->ubwc_layer_size >> 2);
344      iview->descriptor[10] |=
345         A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(ubwc_pitch) |
346         A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(width, block_width))) |
347         A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(height, block_height)));
348   }
349
350   if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_3D) {
351      iview->descriptor[3] |=
352         A6XX_TEX_CONST_3_MIN_LAYERSZ(layout->slices[image->level_count - 1].size0);
353   }
354
355   iview->SP_PS_2D_SRC_INFO = A6XX_SP_PS_2D_SRC_INFO(
356      .color_format = fmt.fmt,
357      .tile_mode = fmt.tile_mode,
358      .color_swap = fmt.swap,
359      .flags = ubwc_enabled,
360      .srgb = vk_format_is_srgb(format),
361      .samples = tu_msaa_samples(layout->nr_samples),
362      .samples_average = layout->nr_samples > 1 &&
363                           !vk_format_is_int(format) &&
364                           !vk_format_is_depth_or_stencil(format),
365      .unk20 = 1,
366      .unk22 = 1).value;
367   iview->SP_PS_2D_SRC_SIZE =
368      A6XX_SP_PS_2D_SRC_SIZE(.width = width, .height = height).value;
369
370   /* note: these have same encoding for MRT and 2D (except 2D PITCH src) */
371   iview->PITCH = A6XX_RB_DEPTH_BUFFER_PITCH(pitch).value;
372   iview->FLAG_BUFFER_PITCH = A6XX_RB_DEPTH_FLAG_BUFFER_PITCH(
373      .pitch = ubwc_pitch, .array_pitch = layout->ubwc_layer_size >> 2).value;
374
375   iview->base_addr = base_addr;
376   iview->ubwc_addr = ubwc_addr;
377   iview->layer_size = layer_size;
378   iview->ubwc_layer_size = layout->ubwc_layer_size;
379
380   /* Don't set fields that are only used for attachments/blit dest if COLOR
381    * is unsupported.
382    */
383   if (!tu6_format_color_supported(format))
384      return;
385
386   struct tu_native_format cfmt = tu6_format_color(format, layout->tile_mode);
387   cfmt.tile_mode = fmt.tile_mode;
388
389   if (is_d24s8 && ubwc_enabled)
390      cfmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
391
392   memset(iview->storage_descriptor, 0, sizeof(iview->storage_descriptor));
393
394   iview->storage_descriptor[0] =
395      A6XX_IBO_0_FMT(fmt.fmt) |
396      A6XX_IBO_0_TILE_MODE(fmt.tile_mode);
397   iview->storage_descriptor[1] =
398      A6XX_IBO_1_WIDTH(width) |
399      A6XX_IBO_1_HEIGHT(height);
400   iview->storage_descriptor[2] =
401      A6XX_IBO_2_PITCH(pitch) |
402      A6XX_IBO_2_TYPE(tu6_tex_type(pCreateInfo->viewType, true));
403   iview->storage_descriptor[3] = A6XX_IBO_3_ARRAY_PITCH(layer_size);
404
405   iview->storage_descriptor[4] = base_addr;
406   iview->storage_descriptor[5] = (base_addr >> 32) | A6XX_IBO_5_DEPTH(storage_depth);
407
408   if (ubwc_enabled) {
409      iview->storage_descriptor[3] |= A6XX_IBO_3_FLAG | A6XX_IBO_3_UNK27;
410      iview->storage_descriptor[7] |= ubwc_addr;
411      iview->storage_descriptor[8] |= ubwc_addr >> 32;
412      iview->storage_descriptor[9] = A6XX_IBO_9_FLAG_BUFFER_ARRAY_PITCH(layout->ubwc_layer_size >> 2);
413      iview->storage_descriptor[10] =
414         A6XX_IBO_10_FLAG_BUFFER_PITCH(ubwc_pitch);
415   }
416
417   iview->extent.width = width;
418   iview->extent.height = height;
419   iview->need_y2_align =
420      (fmt.tile_mode == TILE6_LINEAR && range->baseMipLevel != image->level_count - 1);
421
422   iview->ubwc_enabled = ubwc_enabled;
423
424   iview->RB_MRT_BUF_INFO = A6XX_RB_MRT_BUF_INFO(0,
425                              .color_tile_mode = cfmt.tile_mode,
426                              .color_format = cfmt.fmt,
427                              .color_swap = cfmt.swap).value;
428
429   iview->SP_FS_MRT_REG = A6XX_SP_FS_MRT_REG(0,
430                              .color_format = cfmt.fmt,
431                              .color_sint = vk_format_is_sint(format),
432                              .color_uint = vk_format_is_uint(format)).value;
433
434   iview->RB_2D_DST_INFO = A6XX_RB_2D_DST_INFO(
435      .color_format = cfmt.fmt,
436      .tile_mode = cfmt.tile_mode,
437      .color_swap = cfmt.swap,
438      .flags = ubwc_enabled,
439      .srgb = vk_format_is_srgb(format)).value;
440
441   iview->RB_BLIT_DST_INFO = A6XX_RB_BLIT_DST_INFO(
442      .tile_mode = cfmt.tile_mode,
443      .samples = tu_msaa_samples(layout->nr_samples),
444      .color_format = cfmt.fmt,
445      .color_swap = cfmt.swap,
446      .flags = ubwc_enabled).value;
447
448   if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
449      layout = &image->layout[1];
450      iview->stencil_base_addr = image->bo->iova + image->bo_offset +
451         fdl_surface_offset(layout, range->baseMipLevel, range->baseArrayLayer);
452      iview->stencil_layer_size = fdl_layer_stride(layout, range->baseMipLevel);
453      iview->stencil_PITCH = A6XX_RB_STENCIL_BUFFER_PITCH(fdl_pitch(layout, range->baseMipLevel)).value;
454   }
455}
456
457bool
458ubwc_possible(VkFormat format, VkImageType type, VkImageUsageFlags usage,
459              VkImageUsageFlags stencil_usage, const struct fd_dev_info *info,
460              VkSampleCountFlagBits samples)
461{
462   /* no UBWC with compressed formats, E5B9G9R9, S8_UINT
463    * (S8_UINT because separate stencil doesn't have UBWC-enable bit)
464    */
465   if (vk_format_is_compressed(format) ||
466       format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 ||
467       format == VK_FORMAT_S8_UINT)
468      return false;
469
470   if (!info->a6xx.has_8bpp_ubwc &&
471       (format == VK_FORMAT_R8_UNORM ||
472        format == VK_FORMAT_R8_SNORM ||
473        format == VK_FORMAT_R8_UINT ||
474        format == VK_FORMAT_R8_SINT ||
475        format == VK_FORMAT_R8_SRGB))
476      return false;
477
478   if (type == VK_IMAGE_TYPE_3D) {
479      tu_finishme("UBWC with 3D textures");
480      return false;
481   }
482
483   /* Disable UBWC for storage images.
484    *
485    * The closed GL driver skips UBWC for storage images (and additionally
486    * uses linear for writeonly images).  We seem to have image tiling working
487    * in freedreno in general, so turnip matches that.  freedreno also enables
488    * UBWC on images, but it's not really tested due to the lack of
489    * UBWC-enabled mipmaps in freedreno currently.  Just match the closed GL
490    * behavior of no UBWC.
491   */
492   if ((usage | stencil_usage) & VK_IMAGE_USAGE_STORAGE_BIT)
493      return false;
494
495   /* Disable UBWC for D24S8 on A630 in some cases
496    *
497    * VK_IMAGE_ASPECT_STENCIL_BIT image view requires to be able to sample
498    * from the stencil component as UINT, however no format allows this
499    * on a630 (the special FMT6_Z24_UINT_S8_UINT format is missing)
500    *
501    * It must be sampled as FMT6_8_8_8_8_UINT, which is not UBWC-compatible
502    *
503    * Additionally, the special AS_R8G8B8A8 format is broken without UBWC,
504    * so we have to fallback to 8_8_8_8_UNORM when UBWC is disabled
505    */
506   if (!info->a6xx.has_z24uint_s8uint &&
507       format == VK_FORMAT_D24_UNORM_S8_UINT &&
508       (stencil_usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))
509      return false;
510
511   if (!info->a6xx.has_z24uint_s8uint && samples > VK_SAMPLE_COUNT_1_BIT)
512      return false;
513
514   return true;
515}
516
517VKAPI_ATTR VkResult VKAPI_CALL
518tu_CreateImage(VkDevice _device,
519               const VkImageCreateInfo *pCreateInfo,
520               const VkAllocationCallbacks *alloc,
521               VkImage *pImage)
522{
523   TU_FROM_HANDLE(tu_device, device, _device);
524   uint64_t modifier = DRM_FORMAT_MOD_INVALID;
525   const VkSubresourceLayout *plane_layouts = NULL;
526   struct tu_image *image;
527
528   if (pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
529      const VkImageDrmFormatModifierListCreateInfoEXT *mod_info =
530         vk_find_struct_const(pCreateInfo->pNext,
531                              IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
532      const VkImageDrmFormatModifierExplicitCreateInfoEXT *drm_explicit_info =
533         vk_find_struct_const(pCreateInfo->pNext,
534                              IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
535
536      assert(mod_info || drm_explicit_info);
537
538      if (mod_info) {
539         modifier = DRM_FORMAT_MOD_LINEAR;
540         for (unsigned i = 0; i < mod_info->drmFormatModifierCount; i++) {
541            if (mod_info->pDrmFormatModifiers[i] == DRM_FORMAT_MOD_QCOM_COMPRESSED)
542               modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
543         }
544      } else {
545         modifier = drm_explicit_info->drmFormatModifier;
546         assert(modifier == DRM_FORMAT_MOD_LINEAR ||
547                modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED);
548         plane_layouts = drm_explicit_info->pPlaneLayouts;
549      }
550   } else {
551      const struct wsi_image_create_info *wsi_info =
552         vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
553      if (wsi_info && wsi_info->scanout)
554         modifier = DRM_FORMAT_MOD_LINEAR;
555   }
556
557#ifdef ANDROID
558   const VkNativeBufferANDROID *gralloc_info =
559      vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
560   int dma_buf;
561   if (gralloc_info) {
562      VkResult result = tu_gralloc_info(device, gralloc_info, &dma_buf, &modifier);
563      if (result != VK_SUCCESS)
564         return result;
565   }
566#endif
567
568   image = vk_object_zalloc(&device->vk, alloc, sizeof(*image),
569                            VK_OBJECT_TYPE_IMAGE);
570   if (!image)
571      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
572
573   const VkExternalMemoryImageCreateInfo *external_info =
574      vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
575   image->shareable = external_info != NULL;
576
577   image->vk_format = pCreateInfo->format;
578   image->level_count = pCreateInfo->mipLevels;
579   image->layer_count = pCreateInfo->arrayLayers;
580
581   enum a6xx_tile_mode tile_mode = TILE6_3;
582   bool ubwc_enabled =
583      !(device->physical_device->instance->debug_flags & TU_DEBUG_NOUBWC);
584
585   /* use linear tiling if requested */
586   if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR || modifier == DRM_FORMAT_MOD_LINEAR) {
587      tile_mode = TILE6_LINEAR;
588      ubwc_enabled = false;
589   }
590
591   /* Mutable images can be reinterpreted as any other compatible format.
592    * This is a problem with UBWC (compression for different formats is different),
593    * but also tiling ("swap" affects how tiled formats are stored in memory)
594    * Depth and stencil formats cannot be reintepreted as another format, and
595    * cannot be linear with sysmem rendering, so don't fall back for those.
596    *
597    * TODO:
598    * - if the fmt_list contains only formats which are swapped, but compatible
599    *   with each other (B8G8R8A8_UNORM and B8G8R8A8_UINT for example), then
600    *   tiling is still possible
601    * - figure out which UBWC compressions are compatible to keep it enabled
602    */
603   if ((pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) &&
604       !vk_format_is_depth_or_stencil(image->vk_format)) {
605      const VkImageFormatListCreateInfo *fmt_list =
606         vk_find_struct_const(pCreateInfo->pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
607      bool may_be_swapped = true;
608      if (fmt_list) {
609         may_be_swapped = false;
610         for (uint32_t i = 0; i < fmt_list->viewFormatCount; i++) {
611            if (tu6_format_texture(fmt_list->pViewFormats[i], TILE6_LINEAR).swap) {
612               may_be_swapped = true;
613               break;
614            }
615         }
616      }
617      if (may_be_swapped)
618         tile_mode = TILE6_LINEAR;
619      ubwc_enabled = false;
620   }
621
622   const VkImageStencilUsageCreateInfo *stencil_usage_info =
623      vk_find_struct_const(pCreateInfo->pNext, IMAGE_STENCIL_USAGE_CREATE_INFO);
624
625   if (!ubwc_possible(image->vk_format, pCreateInfo->imageType, pCreateInfo->usage,
626                      stencil_usage_info ? stencil_usage_info->stencilUsage : pCreateInfo->usage,
627                      device->physical_device->info, pCreateInfo->samples))
628      ubwc_enabled = false;
629
630   /* expect UBWC enabled if we asked for it */
631   assert(modifier != DRM_FORMAT_MOD_QCOM_COMPRESSED || ubwc_enabled);
632
633   for (uint32_t i = 0; i < tu6_plane_count(image->vk_format); i++) {
634      struct fdl_layout *layout = &image->layout[i];
635      VkFormat format = tu6_plane_format(image->vk_format, i);
636      uint32_t width0 = pCreateInfo->extent.width;
637      uint32_t height0 = pCreateInfo->extent.height;
638
639      if (i > 0) {
640         switch (image->vk_format) {
641         case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
642         case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
643            /* half width/height on chroma planes */
644            width0 = (width0 + 1) >> 1;
645            height0 = (height0 + 1) >> 1;
646            break;
647         case VK_FORMAT_D32_SFLOAT_S8_UINT:
648            /* no UBWC for separate stencil */
649            ubwc_enabled = false;
650            break;
651         default:
652            break;
653         }
654      }
655
656      struct fdl_explicit_layout plane_layout;
657
658      if (plane_layouts) {
659         /* only expect simple 2D images for now */
660         if (pCreateInfo->mipLevels != 1 ||
661            pCreateInfo->arrayLayers != 1 ||
662            pCreateInfo->extent.depth != 1)
663            goto invalid_layout;
664
665         plane_layout.offset = plane_layouts[i].offset;
666         plane_layout.pitch = plane_layouts[i].rowPitch;
667         /* note: use plane_layouts[0].arrayPitch to support array formats */
668      }
669
670      layout->tile_mode = tile_mode;
671      layout->ubwc = ubwc_enabled;
672
673      if (!fdl6_layout(layout, vk_format_to_pipe_format(format),
674                       pCreateInfo->samples,
675                       width0, height0,
676                       pCreateInfo->extent.depth,
677                       pCreateInfo->mipLevels,
678                       pCreateInfo->arrayLayers,
679                       pCreateInfo->imageType == VK_IMAGE_TYPE_3D,
680                       plane_layouts ? &plane_layout : NULL)) {
681         assert(plane_layouts); /* can only fail with explicit layout */
682         goto invalid_layout;
683      }
684
685      /* fdl6_layout can't take explicit offset without explicit pitch
686       * add offset manually for extra layouts for planes
687       */
688      if (!plane_layouts && i > 0) {
689         uint32_t offset = ALIGN_POT(image->total_size, 4096);
690         for (int i = 0; i < pCreateInfo->mipLevels; i++) {
691            layout->slices[i].offset += offset;
692            layout->ubwc_slices[i].offset += offset;
693         }
694         layout->size += offset;
695      }
696
697      image->total_size = MAX2(image->total_size, layout->size);
698   }
699
700   const struct util_format_description *desc = util_format_description(image->layout[0].format);
701   if (util_format_has_depth(desc) && !(device->instance->debug_flags & TU_DEBUG_NOLRZ))
702   {
703      /* Depth plane is the first one */
704      struct fdl_layout *layout = &image->layout[0];
705      unsigned width = layout->width0;
706      unsigned height = layout->height0;
707
708      /* LRZ buffer is super-sampled */
709      switch (layout->nr_samples) {
710      case 4:
711         width *= 2;
712         FALLTHROUGH;
713      case 2:
714         height *= 2;
715         break;
716      default:
717         break;
718      }
719
720      unsigned lrz_pitch  = align(DIV_ROUND_UP(width, 8), 32);
721      unsigned lrz_height = align(DIV_ROUND_UP(height, 8), 16);
722
723      image->lrz_height = lrz_height;
724      image->lrz_pitch = lrz_pitch;
725      image->lrz_offset = image->total_size;
726      unsigned lrz_size = lrz_pitch * lrz_height * 2;
727      image->total_size += lrz_size;
728   }
729
730   *pImage = tu_image_to_handle(image);
731
732#ifdef ANDROID
733   if (gralloc_info)
734      return tu_import_memory_from_gralloc_handle(_device, dma_buf, alloc, *pImage);
735#endif
736   return VK_SUCCESS;
737
738invalid_layout:
739   vk_object_free(&device->vk, alloc, image);
740   return vk_error(device, VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
741}
742
743VKAPI_ATTR void VKAPI_CALL
744tu_DestroyImage(VkDevice _device,
745                VkImage _image,
746                const VkAllocationCallbacks *pAllocator)
747{
748   TU_FROM_HANDLE(tu_device, device, _device);
749   TU_FROM_HANDLE(tu_image, image, _image);
750
751   if (!image)
752      return;
753
754#ifdef ANDROID
755   if (image->owned_memory != VK_NULL_HANDLE)
756      tu_FreeMemory(_device, image->owned_memory, pAllocator);
757#endif
758
759   vk_object_free(&device->vk, pAllocator, image);
760}
761
762VKAPI_ATTR void VKAPI_CALL
763tu_GetImageSubresourceLayout(VkDevice _device,
764                             VkImage _image,
765                             const VkImageSubresource *pSubresource,
766                             VkSubresourceLayout *pLayout)
767{
768   TU_FROM_HANDLE(tu_image, image, _image);
769
770   struct fdl_layout *layout =
771      &image->layout[tu6_plane_index(image->vk_format, pSubresource->aspectMask)];
772   const struct fdl_slice *slice = layout->slices + pSubresource->mipLevel;
773
774   pLayout->offset =
775      fdl_surface_offset(layout, pSubresource->mipLevel, pSubresource->arrayLayer);
776   pLayout->rowPitch = fdl_pitch(layout, pSubresource->mipLevel);
777   pLayout->arrayPitch = fdl_layer_stride(layout, pSubresource->mipLevel);
778   pLayout->depthPitch = slice->size0;
779   pLayout->size = pLayout->depthPitch * layout->depth0;
780
781   if (fdl_ubwc_enabled(layout, pSubresource->mipLevel)) {
782      /* UBWC starts at offset 0 */
783      pLayout->offset = 0;
784      /* UBWC scanout won't match what the kernel wants if we have levels/layers */
785      assert(image->level_count == 1 && image->layer_count == 1);
786   }
787}
788
789VKAPI_ATTR VkResult VKAPI_CALL
790tu_GetImageDrmFormatModifierPropertiesEXT(
791    VkDevice                                    device,
792    VkImage                                     _image,
793    VkImageDrmFormatModifierPropertiesEXT*      pProperties)
794{
795   TU_FROM_HANDLE(tu_image, image, _image);
796
797   /* TODO invent a modifier for tiled but not UBWC buffers */
798
799   if (!image->layout[0].tile_mode)
800      pProperties->drmFormatModifier = DRM_FORMAT_MOD_LINEAR;
801   else if (image->layout[0].ubwc_layer_size)
802      pProperties->drmFormatModifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
803   else
804      pProperties->drmFormatModifier = DRM_FORMAT_MOD_INVALID;
805
806   return VK_SUCCESS;
807}
808
809
810VKAPI_ATTR VkResult VKAPI_CALL
811tu_CreateImageView(VkDevice _device,
812                   const VkImageViewCreateInfo *pCreateInfo,
813                   const VkAllocationCallbacks *pAllocator,
814                   VkImageView *pView)
815{
816   TU_FROM_HANDLE(tu_device, device, _device);
817   struct tu_image_view *view;
818
819   view = vk_object_alloc(&device->vk, pAllocator, sizeof(*view),
820                          VK_OBJECT_TYPE_IMAGE_VIEW);
821   if (view == NULL)
822      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
823
824   tu_image_view_init(view, pCreateInfo, device->physical_device->info->a6xx.has_z24uint_s8uint);
825
826   *pView = tu_image_view_to_handle(view);
827
828   return VK_SUCCESS;
829}
830
831VKAPI_ATTR void VKAPI_CALL
832tu_DestroyImageView(VkDevice _device,
833                    VkImageView _iview,
834                    const VkAllocationCallbacks *pAllocator)
835{
836   TU_FROM_HANDLE(tu_device, device, _device);
837   TU_FROM_HANDLE(tu_image_view, iview, _iview);
838
839   if (!iview)
840      return;
841
842   vk_object_free(&device->vk, pAllocator, iview);
843}
844
845void
846tu_buffer_view_init(struct tu_buffer_view *view,
847                    struct tu_device *device,
848                    const VkBufferViewCreateInfo *pCreateInfo)
849{
850   TU_FROM_HANDLE(tu_buffer, buffer, pCreateInfo->buffer);
851
852   view->buffer = buffer;
853
854   enum VkFormat vfmt = pCreateInfo->format;
855   enum pipe_format pfmt = vk_format_to_pipe_format(vfmt);
856   const struct tu_native_format fmt = tu6_format_texture(vfmt, TILE6_LINEAR);
857
858   uint32_t range;
859   if (pCreateInfo->range == VK_WHOLE_SIZE)
860      range = buffer->size - pCreateInfo->offset;
861   else
862      range = pCreateInfo->range;
863   uint32_t elements = range / util_format_get_blocksize(pfmt);
864
865   static const VkComponentMapping components = {
866      .r = VK_COMPONENT_SWIZZLE_R,
867      .g = VK_COMPONENT_SWIZZLE_G,
868      .b = VK_COMPONENT_SWIZZLE_B,
869      .a = VK_COMPONENT_SWIZZLE_A,
870   };
871
872   uint64_t iova = tu_buffer_iova(buffer) + pCreateInfo->offset;
873
874   memset(&view->descriptor, 0, sizeof(view->descriptor));
875
876   view->descriptor[0] =
877      A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) |
878      A6XX_TEX_CONST_0_SWAP(fmt.swap) |
879      A6XX_TEX_CONST_0_FMT(fmt.fmt) |
880      A6XX_TEX_CONST_0_MIPLVLS(0) |
881      tu6_texswiz(&components, NULL, vfmt, VK_IMAGE_ASPECT_COLOR_BIT, false);
882      COND(vk_format_is_srgb(vfmt), A6XX_TEX_CONST_0_SRGB);
883   view->descriptor[1] =
884      A6XX_TEX_CONST_1_WIDTH(elements & MASK(15)) |
885      A6XX_TEX_CONST_1_HEIGHT(elements >> 15);
886   view->descriptor[2] =
887      A6XX_TEX_CONST_2_UNK4 |
888      A6XX_TEX_CONST_2_UNK31;
889   view->descriptor[4] = iova;
890   view->descriptor[5] = iova >> 32;
891}
892
893VKAPI_ATTR VkResult VKAPI_CALL
894tu_CreateBufferView(VkDevice _device,
895                    const VkBufferViewCreateInfo *pCreateInfo,
896                    const VkAllocationCallbacks *pAllocator,
897                    VkBufferView *pView)
898{
899   TU_FROM_HANDLE(tu_device, device, _device);
900   struct tu_buffer_view *view;
901
902   view = vk_object_alloc(&device->vk, pAllocator, sizeof(*view),
903                          VK_OBJECT_TYPE_BUFFER_VIEW);
904   if (!view)
905      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
906
907   tu_buffer_view_init(view, device, pCreateInfo);
908
909   *pView = tu_buffer_view_to_handle(view);
910
911   return VK_SUCCESS;
912}
913
914VKAPI_ATTR void VKAPI_CALL
915tu_DestroyBufferView(VkDevice _device,
916                     VkBufferView bufferView,
917                     const VkAllocationCallbacks *pAllocator)
918{
919   TU_FROM_HANDLE(tu_device, device, _device);
920   TU_FROM_HANDLE(tu_buffer_view, view, bufferView);
921
922   if (!view)
923      return;
924
925   vk_object_free(&device->vk, pAllocator, view);
926}
927