101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2011 Red Hat All Rights Reserved.
301e04c3fSmrg * Copyright © 2017 Advanced Micro Devices, Inc.
401e04c3fSmrg * All Rights Reserved.
501e04c3fSmrg *
601e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining
701e04c3fSmrg * a copy of this software and associated documentation files (the
801e04c3fSmrg * "Software"), to deal in the Software without restriction, including
901e04c3fSmrg * without limitation the rights to use, copy, modify, merge, publish,
1001e04c3fSmrg * distribute, sub license, and/or sell copies of the Software, and to
1101e04c3fSmrg * permit persons to whom the Software is furnished to do so, subject to
1201e04c3fSmrg * the following conditions:
1301e04c3fSmrg *
1401e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1501e04c3fSmrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
1601e04c3fSmrg * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1701e04c3fSmrg * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
1801e04c3fSmrg * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
2001e04c3fSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
2101e04c3fSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
2201e04c3fSmrg *
2301e04c3fSmrg * The above copyright notice and this permission notice (including the
2401e04c3fSmrg * next paragraph) shall be included in all copies or substantial portions
2501e04c3fSmrg * of the Software.
2601e04c3fSmrg */
2701e04c3fSmrg
287ec681f3Smrg#define AC_SURFACE_INCLUDE_NIR
2901e04c3fSmrg#include "ac_surface.h"
307ec681f3Smrg
317ec681f3Smrg#include "ac_drm_fourcc.h"
3201e04c3fSmrg#include "ac_gpu_info.h"
337ec681f3Smrg#include "addrlib/inc/addrinterface.h"
347ec681f3Smrg#include "addrlib/src/amdgpu_asic_addr.h"
357ec681f3Smrg#include "amd_family.h"
367ec681f3Smrg#include "sid.h"
377ec681f3Smrg#include "util/hash_table.h"
3801e04c3fSmrg#include "util/macros.h"
397ec681f3Smrg#include "util/simple_mtx.h"
4001e04c3fSmrg#include "util/u_atomic.h"
417ec681f3Smrg#include "util/format/u_format.h"
4201e04c3fSmrg#include "util/u_math.h"
437ec681f3Smrg#include "util/u_memory.h"
4401e04c3fSmrg
4501e04c3fSmrg#include <errno.h>
4601e04c3fSmrg#include <stdio.h>
4701e04c3fSmrg#include <stdlib.h>
4801e04c3fSmrg
497ec681f3Smrg#ifdef _WIN32
507ec681f3Smrg#define AMDGPU_TILING_ARRAY_MODE_SHIFT			0
517ec681f3Smrg#define AMDGPU_TILING_ARRAY_MODE_MASK			0xf
527ec681f3Smrg#define AMDGPU_TILING_PIPE_CONFIG_SHIFT			4
537ec681f3Smrg#define AMDGPU_TILING_PIPE_CONFIG_MASK			0x1f
547ec681f3Smrg#define AMDGPU_TILING_TILE_SPLIT_SHIFT			9
557ec681f3Smrg#define AMDGPU_TILING_TILE_SPLIT_MASK			0x7
567ec681f3Smrg#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT		12
577ec681f3Smrg#define AMDGPU_TILING_MICRO_TILE_MODE_MASK		0x7
587ec681f3Smrg#define AMDGPU_TILING_BANK_WIDTH_SHIFT			15
597ec681f3Smrg#define AMDGPU_TILING_BANK_WIDTH_MASK			0x3
607ec681f3Smrg#define AMDGPU_TILING_BANK_HEIGHT_SHIFT			17
617ec681f3Smrg#define AMDGPU_TILING_BANK_HEIGHT_MASK			0x3
627ec681f3Smrg#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT		19
637ec681f3Smrg#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK		0x3
647ec681f3Smrg#define AMDGPU_TILING_NUM_BANKS_SHIFT			21
657ec681f3Smrg#define AMDGPU_TILING_NUM_BANKS_MASK			0x3
667ec681f3Smrg#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT		0
677ec681f3Smrg#define AMDGPU_TILING_SWIZZLE_MODE_MASK			0x1f
687ec681f3Smrg#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT		5
697ec681f3Smrg#define AMDGPU_TILING_DCC_OFFSET_256B_MASK		0xFFFFFF
707ec681f3Smrg#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT		29
717ec681f3Smrg#define AMDGPU_TILING_DCC_PITCH_MAX_MASK		0x3FFF
727ec681f3Smrg#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT		43
737ec681f3Smrg#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK		0x1
747ec681f3Smrg#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT	44
757ec681f3Smrg#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK		0x1
767ec681f3Smrg#define AMDGPU_TILING_SCANOUT_SHIFT			63
777ec681f3Smrg#define AMDGPU_TILING_SCANOUT_MASK			0x1
787ec681f3Smrg#define AMDGPU_TILING_SET(field, value) \
797ec681f3Smrg	(((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
807ec681f3Smrg#define AMDGPU_TILING_GET(value, field) \
817ec681f3Smrg	(((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)
827ec681f3Smrg#else
837ec681f3Smrg#include "drm-uapi/amdgpu_drm.h"
847ec681f3Smrg#endif
8501e04c3fSmrg
8601e04c3fSmrg#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
8701e04c3fSmrg#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
8801e04c3fSmrg#endif
8901e04c3fSmrg
9001e04c3fSmrg#ifndef CIASICIDGFXENGINE_ARCTICISLAND
9101e04c3fSmrg#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D
9201e04c3fSmrg#endif
9301e04c3fSmrg
947ec681f3Smrgstruct ac_addrlib {
957ec681f3Smrg   ADDR_HANDLE handle;
967ec681f3Smrg};
977ec681f3Smrg
987ec681f3Smrgbool ac_modifier_has_dcc(uint64_t modifier)
997ec681f3Smrg{
1007ec681f3Smrg   return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier);
1017ec681f3Smrg}
1027ec681f3Smrg
1037ec681f3Smrgbool ac_modifier_has_dcc_retile(uint64_t modifier)
1047ec681f3Smrg{
1057ec681f3Smrg   return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC_RETILE, modifier);
1067ec681f3Smrg}
1077ec681f3Smrg
1087ec681f3Smrgbool ac_modifier_supports_dcc_image_stores(uint64_t modifier)
1097ec681f3Smrg{
1107ec681f3Smrg   if (!ac_modifier_has_dcc(modifier))
1117ec681f3Smrg      return false;
1127ec681f3Smrg
1137ec681f3Smrg   return (!AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier) &&
1147ec681f3Smrg            AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier) &&
1157ec681f3Smrg            AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier) == AMD_FMT_MOD_DCC_BLOCK_128B) ||
1167ec681f3Smrg           (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && /* gfx10.3 */
1177ec681f3Smrg            AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier) &&
1187ec681f3Smrg            AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier) &&
1197ec681f3Smrg            AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier) == AMD_FMT_MOD_DCC_BLOCK_64B);
1207ec681f3Smrg
1217ec681f3Smrg}
1227ec681f3Smrg
1237ec681f3Smrg
1247ec681f3Smrgbool ac_surface_supports_dcc_image_stores(enum chip_class chip_class,
1257ec681f3Smrg                                          const struct radeon_surf *surf)
1267ec681f3Smrg{
1277ec681f3Smrg   /* DCC image stores is only available for GFX10+. */
1287ec681f3Smrg   if (chip_class < GFX10)
1297ec681f3Smrg      return false;
1307ec681f3Smrg
1317ec681f3Smrg   /* DCC image stores support the following settings:
1327ec681f3Smrg    * - INDEPENDENT_64B_BLOCKS = 0
1337ec681f3Smrg    * - INDEPENDENT_128B_BLOCKS = 1
1347ec681f3Smrg    * - MAX_COMPRESSED_BLOCK_SIZE = 128B
1357ec681f3Smrg    * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used)
1367ec681f3Smrg    *
1377ec681f3Smrg    * gfx10.3 also supports the following setting:
1387ec681f3Smrg    * - INDEPENDENT_64B_BLOCKS = 1
1397ec681f3Smrg    * - INDEPENDENT_128B_BLOCKS = 1
1407ec681f3Smrg    * - MAX_COMPRESSED_BLOCK_SIZE = 64B
1417ec681f3Smrg    * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used)
1427ec681f3Smrg    *
1437ec681f3Smrg    * The compressor only looks at MAX_COMPRESSED_BLOCK_SIZE to determine
1447ec681f3Smrg    * the INDEPENDENT_xx_BLOCKS settings. 128B implies INDEP_128B, while 64B
1457ec681f3Smrg    * implies INDEP_64B && INDEP_128B.
1467ec681f3Smrg    *
1477ec681f3Smrg    * The same limitations apply to SDMA compressed stores because
1487ec681f3Smrg    * SDMA uses the same DCC codec.
1497ec681f3Smrg    */
1507ec681f3Smrg   return (!surf->u.gfx9.color.dcc.independent_64B_blocks &&
1517ec681f3Smrg            surf->u.gfx9.color.dcc.independent_128B_blocks &&
1527ec681f3Smrg            surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_128B) ||
1537ec681f3Smrg           (chip_class >= GFX10_3 && /* gfx10.3 */
1547ec681f3Smrg            surf->u.gfx9.color.dcc.independent_64B_blocks &&
1557ec681f3Smrg            surf->u.gfx9.color.dcc.independent_128B_blocks &&
1567ec681f3Smrg            surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);
1577ec681f3Smrg}
1587ec681f3Smrg
1597ec681f3Smrgstatic
1607ec681f3SmrgAddrSwizzleMode ac_modifier_gfx9_swizzle_mode(uint64_t modifier)
16101e04c3fSmrg{
1627ec681f3Smrg   if (modifier == DRM_FORMAT_MOD_LINEAR)
1637ec681f3Smrg      return ADDR_SW_LINEAR;
1647ec681f3Smrg
1657ec681f3Smrg   return AMD_FMT_MOD_GET(TILE, modifier);
1667ec681f3Smrg}
1677ec681f3Smrgstatic void
1687ec681f3Smrgac_modifier_fill_dcc_params(uint64_t modifier, struct radeon_surf *surf,
1697ec681f3Smrg             ADDR2_COMPUTE_SURFACE_INFO_INPUT *surf_info)
1707ec681f3Smrg{
1717ec681f3Smrg   assert(ac_modifier_has_dcc(modifier));
1727ec681f3Smrg
1737ec681f3Smrg   if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) {
1747ec681f3Smrg      surf_info->flags.metaPipeUnaligned = 0;
1757ec681f3Smrg   } else {
1767ec681f3Smrg      surf_info->flags.metaPipeUnaligned = !AMD_FMT_MOD_GET(DCC_PIPE_ALIGN, modifier);
1777ec681f3Smrg   }
1787ec681f3Smrg
1797ec681f3Smrg   /* The metaPipeUnaligned is not strictly necessary, but ensure we don't set metaRbUnaligned on
1807ec681f3Smrg    * non-displayable DCC surfaces just because num_render_backends = 1 */
1817ec681f3Smrg   surf_info->flags.metaRbUnaligned = AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 &&
1827ec681f3Smrg                                      AMD_FMT_MOD_GET(RB, modifier) == 0 &&
1837ec681f3Smrg                                      surf_info->flags.metaPipeUnaligned;
1847ec681f3Smrg
1857ec681f3Smrg   surf->u.gfx9.color.dcc.independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier);
1867ec681f3Smrg   surf->u.gfx9.color.dcc.independent_128B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier);
1877ec681f3Smrg   surf->u.gfx9.color.dcc.max_compressed_block_size = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier);
18801e04c3fSmrg}
18901e04c3fSmrg
1907ec681f3Smrgbool ac_is_modifier_supported(const struct radeon_info *info,
1917ec681f3Smrg                              const struct ac_modifier_options *options,
1927ec681f3Smrg                              enum pipe_format format,
1937ec681f3Smrg                              uint64_t modifier)
19401e04c3fSmrg{
1957ec681f3Smrg
1967ec681f3Smrg   if (util_format_is_compressed(format) ||
1977ec681f3Smrg       util_format_is_depth_or_stencil(format) ||
1987ec681f3Smrg       util_format_get_blocksizebits(format) > 64)
1997ec681f3Smrg      return false;
2007ec681f3Smrg
2017ec681f3Smrg   if (info->chip_class < GFX9)
2027ec681f3Smrg      return false;
2037ec681f3Smrg
2047ec681f3Smrg   if(modifier == DRM_FORMAT_MOD_LINEAR)
2057ec681f3Smrg      return true;
2067ec681f3Smrg
2077ec681f3Smrg   /* GFX8 may need a different modifier for each plane */
2087ec681f3Smrg   if (info->chip_class < GFX9 && util_format_get_num_planes(format) > 1)
2097ec681f3Smrg      return false;
2107ec681f3Smrg
2117ec681f3Smrg   uint32_t allowed_swizzles = 0xFFFFFFFF;
2127ec681f3Smrg   switch(info->chip_class) {
2137ec681f3Smrg   case GFX9:
2147ec681f3Smrg      allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x06000000 : 0x06660660;
2157ec681f3Smrg      break;
2167ec681f3Smrg   case GFX10:
2177ec681f3Smrg   case GFX10_3:
2187ec681f3Smrg      allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x08000000 : 0x0E660660;
2197ec681f3Smrg      break;
2207ec681f3Smrg   default:
2217ec681f3Smrg      return false;
2227ec681f3Smrg   }
2237ec681f3Smrg
2247ec681f3Smrg   if (!((1u << ac_modifier_gfx9_swizzle_mode(modifier)) & allowed_swizzles))
2257ec681f3Smrg      return false;
2267ec681f3Smrg
2277ec681f3Smrg   if (ac_modifier_has_dcc(modifier)) {
2287ec681f3Smrg      /* TODO: support multi-planar formats with DCC */
2297ec681f3Smrg      if (util_format_get_num_planes(format) > 1)
2307ec681f3Smrg         return false;
2317ec681f3Smrg
2327ec681f3Smrg      if (!info->has_graphics)
2337ec681f3Smrg         return false;
2347ec681f3Smrg
2357ec681f3Smrg      if (!options->dcc)
2367ec681f3Smrg         return false;
2377ec681f3Smrg
2387ec681f3Smrg      if (ac_modifier_has_dcc_retile(modifier) && !options->dcc_retile)
2397ec681f3Smrg         return false;
2407ec681f3Smrg   }
2417ec681f3Smrg
2427ec681f3Smrg   return true;
24301e04c3fSmrg}
24401e04c3fSmrg
2457ec681f3Smrgbool ac_get_supported_modifiers(const struct radeon_info *info,
2467ec681f3Smrg                                const struct ac_modifier_options *options,
2477ec681f3Smrg                                enum pipe_format format,
2487ec681f3Smrg                                unsigned *mod_count,
2497ec681f3Smrg                                uint64_t *mods)
25001e04c3fSmrg{
2517ec681f3Smrg   unsigned current_mod = 0;
2527ec681f3Smrg
2537ec681f3Smrg#define ADD_MOD(name)                                                   \
2547ec681f3Smrg   if (ac_is_modifier_supported(info, options, format, (name))) {  \
2557ec681f3Smrg      if (mods && current_mod < *mod_count)                  \
2567ec681f3Smrg         mods[current_mod] = (name);                    \
2577ec681f3Smrg      ++current_mod;                                         \
2587ec681f3Smrg   }
2597ec681f3Smrg
2607ec681f3Smrg   /* The modifiers have to be added in descending order of estimated
2617ec681f3Smrg    * performance. The drivers will prefer modifiers that come earlier
2627ec681f3Smrg    * in the list. */
2637ec681f3Smrg   switch (info->chip_class) {
2647ec681f3Smrg   case GFX9: {
2657ec681f3Smrg      unsigned pipe_xor_bits = MIN2(G_0098F8_NUM_PIPES(info->gb_addr_config) +
2667ec681f3Smrg                                    G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config), 8);
2677ec681f3Smrg      unsigned bank_xor_bits =  MIN2(G_0098F8_NUM_BANKS(info->gb_addr_config), 8 - pipe_xor_bits);
2687ec681f3Smrg      unsigned pipes = G_0098F8_NUM_PIPES(info->gb_addr_config);
2697ec681f3Smrg      unsigned rb = G_0098F8_NUM_RB_PER_SE(info->gb_addr_config) +
2707ec681f3Smrg                    G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config);
2717ec681f3Smrg
2727ec681f3Smrg      uint64_t common_dcc = AMD_FMT_MOD_SET(DCC, 1) |
2737ec681f3Smrg                            AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
2747ec681f3Smrg                            AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
2757ec681f3Smrg                            AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, info->has_dcc_constant_encode) |
2767ec681f3Smrg                            AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
2777ec681f3Smrg                            AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits);
2787ec681f3Smrg
2797ec681f3Smrg      ADD_MOD(AMD_FMT_MOD |
2807ec681f3Smrg              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) |
2817ec681f3Smrg              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
2827ec681f3Smrg              AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |
2837ec681f3Smrg              common_dcc |
2847ec681f3Smrg              AMD_FMT_MOD_SET(PIPE, pipes) |
2857ec681f3Smrg              AMD_FMT_MOD_SET(RB, rb))
2867ec681f3Smrg
2877ec681f3Smrg      ADD_MOD(AMD_FMT_MOD |
2887ec681f3Smrg              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
2897ec681f3Smrg              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
2907ec681f3Smrg              AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |
2917ec681f3Smrg              common_dcc |
2927ec681f3Smrg              AMD_FMT_MOD_SET(PIPE, pipes) |
2937ec681f3Smrg              AMD_FMT_MOD_SET(RB, rb))
2947ec681f3Smrg
2957ec681f3Smrg      if (util_format_get_blocksizebits(format) == 32) {
2967ec681f3Smrg         if (info->max_render_backends == 1) {
2977ec681f3Smrg            ADD_MOD(AMD_FMT_MOD |
2987ec681f3Smrg                    AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
2997ec681f3Smrg                    AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
3007ec681f3Smrg                    common_dcc);
3017ec681f3Smrg         }
3027ec681f3Smrg
3037ec681f3Smrg
3047ec681f3Smrg         ADD_MOD(AMD_FMT_MOD |
3057ec681f3Smrg                 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
3067ec681f3Smrg                 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
3077ec681f3Smrg                 AMD_FMT_MOD_SET(DCC_RETILE, 1) |
3087ec681f3Smrg                 common_dcc |
3097ec681f3Smrg                 AMD_FMT_MOD_SET(PIPE, pipes) |
3107ec681f3Smrg                 AMD_FMT_MOD_SET(RB, rb))
3117ec681f3Smrg      }
3127ec681f3Smrg
3137ec681f3Smrg
3147ec681f3Smrg      ADD_MOD(AMD_FMT_MOD |
3157ec681f3Smrg              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) |
3167ec681f3Smrg              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
3177ec681f3Smrg              AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
3187ec681f3Smrg              AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
3197ec681f3Smrg
3207ec681f3Smrg      ADD_MOD(AMD_FMT_MOD |
3217ec681f3Smrg              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
3227ec681f3Smrg              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
3237ec681f3Smrg              AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
3247ec681f3Smrg              AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
3257ec681f3Smrg
3267ec681f3Smrg      ADD_MOD(AMD_FMT_MOD |
3277ec681f3Smrg              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
3287ec681f3Smrg              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
3297ec681f3Smrg
3307ec681f3Smrg      ADD_MOD(AMD_FMT_MOD |
3317ec681f3Smrg              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
3327ec681f3Smrg              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
3337ec681f3Smrg
3347ec681f3Smrg      ADD_MOD(DRM_FORMAT_MOD_LINEAR)
3357ec681f3Smrg      break;
3367ec681f3Smrg   }
3377ec681f3Smrg   case GFX10:
3387ec681f3Smrg   case GFX10_3: {
3397ec681f3Smrg      bool rbplus = info->chip_class >= GFX10_3;
3407ec681f3Smrg      unsigned pipe_xor_bits = G_0098F8_NUM_PIPES(info->gb_addr_config);
3417ec681f3Smrg      unsigned pkrs = rbplus ? G_0098F8_NUM_PKRS(info->gb_addr_config) : 0;
3427ec681f3Smrg
3437ec681f3Smrg      unsigned version = rbplus ? AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS : AMD_FMT_MOD_TILE_VER_GFX10;
3447ec681f3Smrg      uint64_t common_dcc = AMD_FMT_MOD_SET(TILE_VERSION, version) |
3457ec681f3Smrg                            AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
3467ec681f3Smrg                            AMD_FMT_MOD_SET(DCC, 1) |
3477ec681f3Smrg                            AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
3487ec681f3Smrg                            AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
3497ec681f3Smrg                            AMD_FMT_MOD_SET(PACKERS, pkrs);
3507ec681f3Smrg
3517ec681f3Smrg      ADD_MOD(AMD_FMT_MOD | common_dcc |
3527ec681f3Smrg              AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |
3537ec681f3Smrg              AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
3547ec681f3Smrg              AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B))
3557ec681f3Smrg
3567ec681f3Smrg      if (info->chip_class >= GFX10_3) {
3577ec681f3Smrg         if (info->max_render_backends == 1) {
3587ec681f3Smrg            ADD_MOD(AMD_FMT_MOD | common_dcc |
3597ec681f3Smrg                    AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
3607ec681f3Smrg                    AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B))
3617ec681f3Smrg         }
3627ec681f3Smrg
3637ec681f3Smrg         ADD_MOD(AMD_FMT_MOD | common_dcc |
3647ec681f3Smrg                 AMD_FMT_MOD_SET(DCC_RETILE, 1) |
3657ec681f3Smrg                 AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
3667ec681f3Smrg                 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B))
3677ec681f3Smrg      }
3687ec681f3Smrg
3697ec681f3Smrg      if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14 || info->chip_class >= GFX10_3) {
3707ec681f3Smrg         bool independent_128b = info->chip_class >= GFX10_3;
3717ec681f3Smrg
3727ec681f3Smrg         if (info->max_render_backends == 1) {
3737ec681f3Smrg            ADD_MOD(AMD_FMT_MOD | common_dcc |
3747ec681f3Smrg                    AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
3757ec681f3Smrg                    AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, independent_128b) |
3767ec681f3Smrg                    AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B))
3777ec681f3Smrg         }
3787ec681f3Smrg
3797ec681f3Smrg         ADD_MOD(AMD_FMT_MOD | common_dcc |
3807ec681f3Smrg                 AMD_FMT_MOD_SET(DCC_RETILE, 1) |
3817ec681f3Smrg                 AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
3827ec681f3Smrg                 AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, independent_128b) |
3837ec681f3Smrg                 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B))
3847ec681f3Smrg      }
3857ec681f3Smrg
3867ec681f3Smrg      ADD_MOD(AMD_FMT_MOD |
3877ec681f3Smrg              AMD_FMT_MOD_SET(TILE_VERSION, version) |
3887ec681f3Smrg              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
3897ec681f3Smrg              AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
3907ec681f3Smrg              AMD_FMT_MOD_SET(PACKERS, pkrs))
3917ec681f3Smrg
3927ec681f3Smrg      ADD_MOD(AMD_FMT_MOD |
3937ec681f3Smrg              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
3947ec681f3Smrg              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
3957ec681f3Smrg              AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits))
3967ec681f3Smrg
3977ec681f3Smrg      if (util_format_get_blocksizebits(format) != 32) {
3987ec681f3Smrg         ADD_MOD(AMD_FMT_MOD |
3997ec681f3Smrg                 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
4007ec681f3Smrg                 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
4017ec681f3Smrg      }
4027ec681f3Smrg
4037ec681f3Smrg      ADD_MOD(AMD_FMT_MOD |
4047ec681f3Smrg              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
4057ec681f3Smrg              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
4067ec681f3Smrg
4077ec681f3Smrg      ADD_MOD(DRM_FORMAT_MOD_LINEAR)
4087ec681f3Smrg      break;
4097ec681f3Smrg   }
4107ec681f3Smrg   default:
4117ec681f3Smrg      break;
4127ec681f3Smrg   }
4137ec681f3Smrg
4147ec681f3Smrg#undef ADD_MOD
4157ec681f3Smrg
4167ec681f3Smrg   if (!mods) {
4177ec681f3Smrg      *mod_count = current_mod;
4187ec681f3Smrg      return true;
4197ec681f3Smrg   }
4207ec681f3Smrg
4217ec681f3Smrg   bool complete = current_mod <= *mod_count;
4227ec681f3Smrg   *mod_count = MIN2(*mod_count, current_mod);
4237ec681f3Smrg   return complete;
42401e04c3fSmrg}
42501e04c3fSmrg
4267ec681f3Smrgstatic void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT *pInput)
42701e04c3fSmrg{
4287ec681f3Smrg   return malloc(pInput->sizeInBytes);
42901e04c3fSmrg}
43001e04c3fSmrg
4317ec681f3Smrgstatic ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT *pInput)
43201e04c3fSmrg{
4337ec681f3Smrg   free(pInput->pVirtAddr);
4347ec681f3Smrg   return ADDR_OK;
43501e04c3fSmrg}
43601e04c3fSmrg
4377ec681f3Smrgstruct ac_addrlib *ac_addrlib_create(const struct radeon_info *info,
4387ec681f3Smrg                                     uint64_t *max_alignment)
43901e04c3fSmrg{
4407ec681f3Smrg   ADDR_CREATE_INPUT addrCreateInput = {0};
4417ec681f3Smrg   ADDR_CREATE_OUTPUT addrCreateOutput = {0};
4427ec681f3Smrg   ADDR_REGISTER_VALUE regValue = {0};
4437ec681f3Smrg   ADDR_CREATE_FLAGS createFlags = {{0}};
4447ec681f3Smrg   ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0};
4457ec681f3Smrg   ADDR_E_RETURNCODE addrRet;
4467ec681f3Smrg
4477ec681f3Smrg   addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
4487ec681f3Smrg   addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
4497ec681f3Smrg
4507ec681f3Smrg   regValue.gbAddrConfig = info->gb_addr_config;
4517ec681f3Smrg   createFlags.value = 0;
4527ec681f3Smrg
4537ec681f3Smrg   addrCreateInput.chipFamily = info->family_id;
4547ec681f3Smrg   addrCreateInput.chipRevision = info->chip_external_rev;
4557ec681f3Smrg
4567ec681f3Smrg   if (addrCreateInput.chipFamily == FAMILY_UNKNOWN)
4577ec681f3Smrg      return NULL;
4587ec681f3Smrg
4597ec681f3Smrg   if (addrCreateInput.chipFamily >= FAMILY_AI) {
4607ec681f3Smrg      addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;
4617ec681f3Smrg   } else {
4627ec681f3Smrg      regValue.noOfBanks = info->mc_arb_ramcfg & 0x3;
4637ec681f3Smrg      regValue.noOfRanks = (info->mc_arb_ramcfg & 0x4) >> 2;
4647ec681f3Smrg
4657ec681f3Smrg      regValue.backendDisables = info->enabled_rb_mask;
4667ec681f3Smrg      regValue.pTileConfig = info->si_tile_mode_array;
4677ec681f3Smrg      regValue.noOfEntries = ARRAY_SIZE(info->si_tile_mode_array);
4687ec681f3Smrg      if (addrCreateInput.chipFamily == FAMILY_SI) {
4697ec681f3Smrg         regValue.pMacroTileConfig = NULL;
4707ec681f3Smrg         regValue.noOfMacroEntries = 0;
4717ec681f3Smrg      } else {
4727ec681f3Smrg         regValue.pMacroTileConfig = info->cik_macrotile_mode_array;
4737ec681f3Smrg         regValue.noOfMacroEntries = ARRAY_SIZE(info->cik_macrotile_mode_array);
4747ec681f3Smrg      }
4757ec681f3Smrg
4767ec681f3Smrg      createFlags.useTileIndex = 1;
4777ec681f3Smrg      createFlags.useHtileSliceAlign = 1;
4787ec681f3Smrg
4797ec681f3Smrg      addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
4807ec681f3Smrg   }
4817ec681f3Smrg
4827ec681f3Smrg   addrCreateInput.callbacks.allocSysMem = allocSysMem;
4837ec681f3Smrg   addrCreateInput.callbacks.freeSysMem = freeSysMem;
4847ec681f3Smrg   addrCreateInput.callbacks.debugPrint = 0;
4857ec681f3Smrg   addrCreateInput.createFlags = createFlags;
4867ec681f3Smrg   addrCreateInput.regValue = regValue;
4877ec681f3Smrg
4887ec681f3Smrg   addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput);
4897ec681f3Smrg   if (addrRet != ADDR_OK)
4907ec681f3Smrg      return NULL;
4917ec681f3Smrg
4927ec681f3Smrg   if (max_alignment) {
4937ec681f3Smrg      addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput);
4947ec681f3Smrg      if (addrRet == ADDR_OK) {
4957ec681f3Smrg         *max_alignment = addrGetMaxAlignmentsOutput.baseAlign;
4967ec681f3Smrg      }
4977ec681f3Smrg   }
4987ec681f3Smrg
4997ec681f3Smrg   struct ac_addrlib *addrlib = calloc(1, sizeof(struct ac_addrlib));
5007ec681f3Smrg   if (!addrlib) {
5017ec681f3Smrg      AddrDestroy(addrCreateOutput.hLib);
5027ec681f3Smrg      return NULL;
5037ec681f3Smrg   }
5047ec681f3Smrg
5057ec681f3Smrg   addrlib->handle = addrCreateOutput.hLib;
5067ec681f3Smrg   return addrlib;
50701e04c3fSmrg}
50801e04c3fSmrg
5097ec681f3Smrgvoid ac_addrlib_destroy(struct ac_addrlib *addrlib)
51001e04c3fSmrg{
5117ec681f3Smrg   AddrDestroy(addrlib->handle);
5127ec681f3Smrg   free(addrlib);
51301e04c3fSmrg}
51401e04c3fSmrg
5157ec681f3Smrgvoid *ac_addrlib_get_handle(struct ac_addrlib *addrlib)
5167ec681f3Smrg{
5177ec681f3Smrg   return addrlib->handle;
5187ec681f3Smrg}
51901e04c3fSmrg
5207ec681f3Smrgstatic int surf_config_sanity(const struct ac_surf_config *config, unsigned flags)
52101e04c3fSmrg{
5227ec681f3Smrg   /* FMASK is allocated together with the color surface and can't be
5237ec681f3Smrg    * allocated separately.
5247ec681f3Smrg    */
5257ec681f3Smrg   assert(!(flags & RADEON_SURF_FMASK));
5267ec681f3Smrg   if (flags & RADEON_SURF_FMASK)
5277ec681f3Smrg      return -EINVAL;
5287ec681f3Smrg
5297ec681f3Smrg   /* all dimension must be at least 1 ! */
5307ec681f3Smrg   if (!config->info.width || !config->info.height || !config->info.depth ||
5317ec681f3Smrg       !config->info.array_size || !config->info.levels)
5327ec681f3Smrg      return -EINVAL;
5337ec681f3Smrg
5347ec681f3Smrg   switch (config->info.samples) {
5357ec681f3Smrg   case 0:
5367ec681f3Smrg   case 1:
5377ec681f3Smrg   case 2:
5387ec681f3Smrg   case 4:
5397ec681f3Smrg   case 8:
5407ec681f3Smrg      break;
5417ec681f3Smrg   case 16:
5427ec681f3Smrg      if (flags & RADEON_SURF_Z_OR_SBUFFER)
5437ec681f3Smrg         return -EINVAL;
5447ec681f3Smrg      break;
5457ec681f3Smrg   default:
5467ec681f3Smrg      return -EINVAL;
5477ec681f3Smrg   }
5487ec681f3Smrg
5497ec681f3Smrg   if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) {
5507ec681f3Smrg      switch (config->info.storage_samples) {
5517ec681f3Smrg      case 0:
5527ec681f3Smrg      case 1:
5537ec681f3Smrg      case 2:
5547ec681f3Smrg      case 4:
5557ec681f3Smrg      case 8:
5567ec681f3Smrg         break;
5577ec681f3Smrg      default:
5587ec681f3Smrg         return -EINVAL;
5597ec681f3Smrg      }
5607ec681f3Smrg   }
5617ec681f3Smrg
5627ec681f3Smrg   if (config->is_3d && config->info.array_size > 1)
5637ec681f3Smrg      return -EINVAL;
5647ec681f3Smrg   if (config->is_cube && config->info.depth > 1)
5657ec681f3Smrg      return -EINVAL;
5667ec681f3Smrg
5677ec681f3Smrg   return 0;
5687ec681f3Smrg}
56901e04c3fSmrg
5707ec681f3Smrgstatic int gfx6_compute_level(ADDR_HANDLE addrlib, const struct ac_surf_config *config,
5717ec681f3Smrg                              struct radeon_surf *surf, bool is_stencil, unsigned level,
5727ec681f3Smrg                              bool compressed, ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
5737ec681f3Smrg                              ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
5747ec681f3Smrg                              ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
5757ec681f3Smrg                              ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
5767ec681f3Smrg                              ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
5777ec681f3Smrg                              ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
5787ec681f3Smrg{
5797ec681f3Smrg   struct legacy_surf_level *surf_level;
5807ec681f3Smrg   struct legacy_surf_dcc_level *dcc_level;
5817ec681f3Smrg   ADDR_E_RETURNCODE ret;
5827ec681f3Smrg
5837ec681f3Smrg   AddrSurfInfoIn->mipLevel = level;
5847ec681f3Smrg   AddrSurfInfoIn->width = u_minify(config->info.width, level);
5857ec681f3Smrg   AddrSurfInfoIn->height = u_minify(config->info.height, level);
5867ec681f3Smrg
5877ec681f3Smrg   /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,
5887ec681f3Smrg    * because GFX9 needs linear alignment of 256 bytes.
5897ec681f3Smrg    */
5907ec681f3Smrg   if (config->info.levels == 1 && AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED &&
5917ec681f3Smrg       AddrSurfInfoIn->bpp && util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp)) {
5927ec681f3Smrg      unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8);
5937ec681f3Smrg
5947ec681f3Smrg      AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment);
5957ec681f3Smrg   }
5967ec681f3Smrg
5977ec681f3Smrg   /* addrlib assumes the bytes/pixel is a divisor of 64, which is not
5987ec681f3Smrg    * true for r32g32b32 formats. */
5997ec681f3Smrg   if (AddrSurfInfoIn->bpp == 96) {
6007ec681f3Smrg      assert(config->info.levels == 1);
6017ec681f3Smrg      assert(AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED);
6027ec681f3Smrg
6037ec681f3Smrg      /* The least common multiple of 64 bytes and 12 bytes/pixel is
6047ec681f3Smrg       * 192 bytes, or 16 pixels. */
6057ec681f3Smrg      AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, 16);
6067ec681f3Smrg   }
6077ec681f3Smrg
6087ec681f3Smrg   if (config->is_3d)
6097ec681f3Smrg      AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level);
6107ec681f3Smrg   else if (config->is_cube)
6117ec681f3Smrg      AddrSurfInfoIn->numSlices = 6;
6127ec681f3Smrg   else
6137ec681f3Smrg      AddrSurfInfoIn->numSlices = config->info.array_size;
6147ec681f3Smrg
6157ec681f3Smrg   if (level > 0) {
6167ec681f3Smrg      /* Set the base level pitch. This is needed for calculation
6177ec681f3Smrg       * of non-zero levels. */
6187ec681f3Smrg      if (is_stencil)
6197ec681f3Smrg         AddrSurfInfoIn->basePitch = surf->u.legacy.zs.stencil_level[0].nblk_x;
6207ec681f3Smrg      else
6217ec681f3Smrg         AddrSurfInfoIn->basePitch = surf->u.legacy.level[0].nblk_x;
6227ec681f3Smrg
6237ec681f3Smrg      /* Convert blocks to pixels for compressed formats. */
6247ec681f3Smrg      if (compressed)
6257ec681f3Smrg         AddrSurfInfoIn->basePitch *= surf->blk_w;
6267ec681f3Smrg   }
6277ec681f3Smrg
6287ec681f3Smrg   ret = AddrComputeSurfaceInfo(addrlib, AddrSurfInfoIn, AddrSurfInfoOut);
6297ec681f3Smrg   if (ret != ADDR_OK) {
6307ec681f3Smrg      return ret;
6317ec681f3Smrg   }
6327ec681f3Smrg
6337ec681f3Smrg   surf_level = is_stencil ? &surf->u.legacy.zs.stencil_level[level] : &surf->u.legacy.level[level];
6347ec681f3Smrg   dcc_level = &surf->u.legacy.color.dcc_level[level];
6357ec681f3Smrg   surf_level->offset_256B = align64(surf->surf_size, AddrSurfInfoOut->baseAlign) / 256;
6367ec681f3Smrg   surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4;
6377ec681f3Smrg   surf_level->nblk_x = AddrSurfInfoOut->pitch;
6387ec681f3Smrg   surf_level->nblk_y = AddrSurfInfoOut->height;
6397ec681f3Smrg
6407ec681f3Smrg   switch (AddrSurfInfoOut->tileMode) {
6417ec681f3Smrg   case ADDR_TM_LINEAR_ALIGNED:
6427ec681f3Smrg      surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
6437ec681f3Smrg      break;
6447ec681f3Smrg   case ADDR_TM_1D_TILED_THIN1:
6457ec681f3Smrg   case ADDR_TM_PRT_TILED_THIN1:
6467ec681f3Smrg      surf_level->mode = RADEON_SURF_MODE_1D;
6477ec681f3Smrg      break;
6487ec681f3Smrg   case ADDR_TM_2D_TILED_THIN1:
6497ec681f3Smrg   case ADDR_TM_PRT_2D_TILED_THIN1:
6507ec681f3Smrg      surf_level->mode = RADEON_SURF_MODE_2D;
6517ec681f3Smrg      break;
6527ec681f3Smrg   default:
6537ec681f3Smrg      assert(0);
6547ec681f3Smrg   }
6557ec681f3Smrg
6567ec681f3Smrg   if (is_stencil)
6577ec681f3Smrg      surf->u.legacy.zs.stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex;
6587ec681f3Smrg   else
6597ec681f3Smrg      surf->u.legacy.tiling_index[level] = AddrSurfInfoOut->tileIndex;
6607ec681f3Smrg
6617ec681f3Smrg   if (AddrSurfInfoIn->flags.prt) {
6627ec681f3Smrg      if (level == 0) {
6637ec681f3Smrg         surf->prt_tile_width = AddrSurfInfoOut->pitchAlign;
6647ec681f3Smrg         surf->prt_tile_height = AddrSurfInfoOut->heightAlign;
6657ec681f3Smrg      }
6667ec681f3Smrg      if (surf_level->nblk_x >= surf->prt_tile_width &&
6677ec681f3Smrg          surf_level->nblk_y >= surf->prt_tile_height) {
6687ec681f3Smrg         /* +1 because the current level is not in the miptail */
6697ec681f3Smrg         surf->first_mip_tail_level = level + 1;
6707ec681f3Smrg      }
6717ec681f3Smrg   }
6727ec681f3Smrg
6737ec681f3Smrg   surf->surf_size = (uint64_t)surf_level->offset_256B * 256 + AddrSurfInfoOut->surfSize;
6747ec681f3Smrg
6757ec681f3Smrg   /* Clear DCC fields at the beginning. */
6767ec681f3Smrg   if (!AddrSurfInfoIn->flags.depth && !AddrSurfInfoIn->flags.stencil)
6777ec681f3Smrg      dcc_level->dcc_offset = 0;
6787ec681f3Smrg
6797ec681f3Smrg   /* The previous level's flag tells us if we can use DCC for this level. */
6807ec681f3Smrg   if (AddrSurfInfoIn->flags.dccCompatible && (level == 0 || AddrDccOut->subLvlCompressible)) {
6817ec681f3Smrg      bool prev_level_clearable = level == 0 || AddrDccOut->dccRamSizeAligned;
6827ec681f3Smrg
6837ec681f3Smrg      AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
6847ec681f3Smrg      AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
6857ec681f3Smrg      AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
6867ec681f3Smrg      AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
6877ec681f3Smrg      AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
6887ec681f3Smrg
6897ec681f3Smrg      ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut);
6907ec681f3Smrg
6917ec681f3Smrg      if (ret == ADDR_OK) {
6927ec681f3Smrg         dcc_level->dcc_offset = surf->meta_size;
6937ec681f3Smrg         surf->num_meta_levels = level + 1;
6947ec681f3Smrg         surf->meta_size = dcc_level->dcc_offset + AddrDccOut->dccRamSize;
6957ec681f3Smrg         surf->meta_alignment_log2 = MAX2(surf->meta_alignment_log2, util_logbase2(AddrDccOut->dccRamBaseAlign));
6967ec681f3Smrg
6977ec681f3Smrg         /* If the DCC size of a subresource (1 mip level or 1 slice)
6987ec681f3Smrg          * is not aligned, the DCC memory layout is not contiguous for
6997ec681f3Smrg          * that subresource, which means we can't use fast clear.
7007ec681f3Smrg          *
7017ec681f3Smrg          * We only do fast clears for whole mipmap levels. If we did
7027ec681f3Smrg          * per-slice fast clears, the same restriction would apply.
7037ec681f3Smrg          * (i.e. only compute the slice size and see if it's aligned)
7047ec681f3Smrg          *
7057ec681f3Smrg          * The last level can be non-contiguous and still be clearable
7067ec681f3Smrg          * if it's interleaved with the next level that doesn't exist.
7077ec681f3Smrg          */
7087ec681f3Smrg         if (AddrDccOut->dccRamSizeAligned ||
7097ec681f3Smrg             (prev_level_clearable && level == config->info.levels - 1))
7107ec681f3Smrg            dcc_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize;
7117ec681f3Smrg         else
7127ec681f3Smrg            dcc_level->dcc_fast_clear_size = 0;
7137ec681f3Smrg
7147ec681f3Smrg         /* Compute the DCC slice size because addrlib doesn't
7157ec681f3Smrg          * provide this info. As DCC memory is linear (each
7167ec681f3Smrg          * slice is the same size) it's easy to compute.
7177ec681f3Smrg          */
7187ec681f3Smrg         surf->meta_slice_size = AddrDccOut->dccRamSize / config->info.array_size;
7197ec681f3Smrg
7207ec681f3Smrg         /* For arrays, we have to compute the DCC info again
7217ec681f3Smrg          * with one slice size to get a correct fast clear
7227ec681f3Smrg          * size.
7237ec681f3Smrg          */
7247ec681f3Smrg         if (config->info.array_size > 1) {
7257ec681f3Smrg            AddrDccIn->colorSurfSize = AddrSurfInfoOut->sliceSize;
7267ec681f3Smrg            AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
7277ec681f3Smrg            AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
7287ec681f3Smrg            AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
7297ec681f3Smrg            AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
7307ec681f3Smrg
7317ec681f3Smrg            ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut);
7327ec681f3Smrg            if (ret == ADDR_OK) {
7337ec681f3Smrg               /* If the DCC memory isn't properly
7347ec681f3Smrg                * aligned, the data are interleaved
7357ec681f3Smrg                * accross slices.
7367ec681f3Smrg                */
7377ec681f3Smrg               if (AddrDccOut->dccRamSizeAligned)
7387ec681f3Smrg                  dcc_level->dcc_slice_fast_clear_size = AddrDccOut->dccFastClearSize;
7397ec681f3Smrg               else
7407ec681f3Smrg                  dcc_level->dcc_slice_fast_clear_size = 0;
7417ec681f3Smrg            }
7427ec681f3Smrg
7437ec681f3Smrg            if (surf->flags & RADEON_SURF_CONTIGUOUS_DCC_LAYERS &&
7447ec681f3Smrg                surf->meta_slice_size != dcc_level->dcc_slice_fast_clear_size) {
7457ec681f3Smrg               surf->meta_size = 0;
7467ec681f3Smrg               surf->num_meta_levels = 0;
7477ec681f3Smrg               AddrDccOut->subLvlCompressible = false;
7487ec681f3Smrg            }
7497ec681f3Smrg         } else {
7507ec681f3Smrg            dcc_level->dcc_slice_fast_clear_size = dcc_level->dcc_fast_clear_size;
7517ec681f3Smrg         }
7527ec681f3Smrg      }
7537ec681f3Smrg   }
7547ec681f3Smrg
7557ec681f3Smrg   /* HTILE. */
7567ec681f3Smrg   if (!is_stencil && AddrSurfInfoIn->flags.depth && surf_level->mode == RADEON_SURF_MODE_2D &&
7577ec681f3Smrg       level == 0 && !(surf->flags & RADEON_SURF_NO_HTILE)) {
7587ec681f3Smrg      AddrHtileIn->flags.tcCompatible = AddrSurfInfoOut->tcCompatible;
7597ec681f3Smrg      AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
7607ec681f3Smrg      AddrHtileIn->height = AddrSurfInfoOut->height;
7617ec681f3Smrg      AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
7627ec681f3Smrg      AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;
7637ec681f3Smrg      AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;
7647ec681f3Smrg      AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;
7657ec681f3Smrg      AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;
7667ec681f3Smrg      AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
7677ec681f3Smrg
7687ec681f3Smrg      ret = AddrComputeHtileInfo(addrlib, AddrHtileIn, AddrHtileOut);
7697ec681f3Smrg
7707ec681f3Smrg      if (ret == ADDR_OK) {
7717ec681f3Smrg         surf->meta_size = AddrHtileOut->htileBytes;
7727ec681f3Smrg         surf->meta_slice_size = AddrHtileOut->sliceSize;
7737ec681f3Smrg         surf->meta_alignment_log2 = util_logbase2(AddrHtileOut->baseAlign);
7747ec681f3Smrg         surf->meta_pitch = AddrHtileOut->pitch;
7757ec681f3Smrg         surf->num_meta_levels = level + 1;
7767ec681f3Smrg      }
7777ec681f3Smrg   }
7787ec681f3Smrg
7797ec681f3Smrg   return 0;
7807ec681f3Smrg}
7817ec681f3Smrg
7827ec681f3Smrgstatic void gfx6_set_micro_tile_mode(struct radeon_surf *surf, const struct radeon_info *info)
7837ec681f3Smrg{
7847ec681f3Smrg   uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
7857ec681f3Smrg
7867ec681f3Smrg   if (info->chip_class >= GFX7)
7877ec681f3Smrg      surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
7887ec681f3Smrg   else
7897ec681f3Smrg      surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
79001e04c3fSmrg}
79101e04c3fSmrg
79201e04c3fSmrgstatic unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
79301e04c3fSmrg{
7947ec681f3Smrg   unsigned index, tileb;
79501e04c3fSmrg
7967ec681f3Smrg   tileb = 8 * 8 * surf->bpe;
7977ec681f3Smrg   tileb = MIN2(surf->u.legacy.tile_split, tileb);
79801e04c3fSmrg
7997ec681f3Smrg   for (index = 0; tileb > 64; index++)
8007ec681f3Smrg      tileb >>= 1;
80101e04c3fSmrg
8027ec681f3Smrg   assert(index < 16);
8037ec681f3Smrg   return index;
80401e04c3fSmrg}
80501e04c3fSmrg
8067ec681f3Smrgstatic bool get_display_flag(const struct ac_surf_config *config, const struct radeon_surf *surf)
80701e04c3fSmrg{
8087ec681f3Smrg   unsigned num_channels = config->info.num_channels;
8097ec681f3Smrg   unsigned bpe = surf->bpe;
8107ec681f3Smrg
8117ec681f3Smrg   /* With modifiers the kernel is in charge of whether it is displayable.
8127ec681f3Smrg    * We need to ensure at least 32 pixels pitch alignment, but this is
8137ec681f3Smrg    * always the case when the blocksize >= 4K.
8147ec681f3Smrg    */
8157ec681f3Smrg   if (surf->modifier != DRM_FORMAT_MOD_INVALID)
8167ec681f3Smrg      return false;
8177ec681f3Smrg
8187ec681f3Smrg   if (!config->is_3d && !config->is_cube && !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
8197ec681f3Smrg       surf->flags & RADEON_SURF_SCANOUT && config->info.samples <= 1 && surf->blk_w <= 2 &&
8207ec681f3Smrg       surf->blk_h == 1) {
8217ec681f3Smrg      /* subsampled */
8227ec681f3Smrg      if (surf->blk_w == 2 && surf->blk_h == 1)
8237ec681f3Smrg         return true;
8247ec681f3Smrg
8257ec681f3Smrg      if (/* RGBA8 or RGBA16F */
8267ec681f3Smrg          (bpe >= 4 && bpe <= 8 && num_channels == 4) ||
8277ec681f3Smrg          /* R5G6B5 or R5G5B5A1 */
8287ec681f3Smrg          (bpe == 2 && num_channels >= 3) ||
8297ec681f3Smrg          /* C8 palette */
8307ec681f3Smrg          (bpe == 1 && num_channels == 1))
8317ec681f3Smrg         return true;
8327ec681f3Smrg   }
8337ec681f3Smrg   return false;
83401e04c3fSmrg}
83501e04c3fSmrg
83601e04c3fSmrg/**
83701e04c3fSmrg * This must be called after the first level is computed.
83801e04c3fSmrg *
83901e04c3fSmrg * Copy surface-global settings like pipe/bank config from level 0 surface
84001e04c3fSmrg * computation, and compute tile swizzle.
84101e04c3fSmrg */
8427ec681f3Smrgstatic int gfx6_surface_settings(ADDR_HANDLE addrlib, const struct radeon_info *info,
8437ec681f3Smrg                                 const struct ac_surf_config *config,
8447ec681f3Smrg                                 ADDR_COMPUTE_SURFACE_INFO_OUTPUT *csio, struct radeon_surf *surf)
84501e04c3fSmrg{
8467ec681f3Smrg   surf->surf_alignment_log2 = util_logbase2(csio->baseAlign);
8477ec681f3Smrg   surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1;
8487ec681f3Smrg   gfx6_set_micro_tile_mode(surf, info);
8497ec681f3Smrg
8507ec681f3Smrg   /* For 2D modes only. */
8517ec681f3Smrg   if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) {
8527ec681f3Smrg      surf->u.legacy.bankw = csio->pTileInfo->bankWidth;
8537ec681f3Smrg      surf->u.legacy.bankh = csio->pTileInfo->bankHeight;
8547ec681f3Smrg      surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio;
8557ec681f3Smrg      surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes;
8567ec681f3Smrg      surf->u.legacy.num_banks = csio->pTileInfo->banks;
8577ec681f3Smrg      surf->u.legacy.macro_tile_index = csio->macroModeIndex;
8587ec681f3Smrg   } else {
8597ec681f3Smrg      surf->u.legacy.macro_tile_index = 0;
8607ec681f3Smrg   }
8617ec681f3Smrg
8627ec681f3Smrg   /* Compute tile swizzle. */
8637ec681f3Smrg   /* TODO: fix tile swizzle with mipmapping for GFX6 */
8647ec681f3Smrg   if ((info->chip_class >= GFX7 || config->info.levels == 1) && config->info.surf_index &&
8657ec681f3Smrg       surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
8667ec681f3Smrg       !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) &&
8677ec681f3Smrg       !get_display_flag(config, surf)) {
8687ec681f3Smrg      ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
8697ec681f3Smrg      ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
8707ec681f3Smrg
8717ec681f3Smrg      AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
8727ec681f3Smrg      AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
8737ec681f3Smrg
8747ec681f3Smrg      AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
8757ec681f3Smrg      AddrBaseSwizzleIn.tileIndex = csio->tileIndex;
8767ec681f3Smrg      AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex;
8777ec681f3Smrg      AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo;
8787ec681f3Smrg      AddrBaseSwizzleIn.tileMode = csio->tileMode;
8797ec681f3Smrg
8807ec681f3Smrg      int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut);
8817ec681f3Smrg      if (r != ADDR_OK)
8827ec681f3Smrg         return r;
8837ec681f3Smrg
8847ec681f3Smrg      assert(AddrBaseSwizzleOut.tileSwizzle <=
8857ec681f3Smrg             u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
8867ec681f3Smrg      surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
8877ec681f3Smrg   }
8887ec681f3Smrg   return 0;
88901e04c3fSmrg}
89001e04c3fSmrg
8917ec681f3Smrgstatic void ac_compute_cmask(const struct radeon_info *info, const struct ac_surf_config *config,
8927ec681f3Smrg                             struct radeon_surf *surf)
89301e04c3fSmrg{
8947ec681f3Smrg   unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;
8957ec681f3Smrg   unsigned num_pipes = info->num_tile_pipes;
8967ec681f3Smrg   unsigned cl_width, cl_height;
8977ec681f3Smrg
8987ec681f3Smrg   if (surf->flags & RADEON_SURF_Z_OR_SBUFFER || surf->is_linear ||
8997ec681f3Smrg       (config->info.samples >= 2 && !surf->fmask_size))
9007ec681f3Smrg      return;
9017ec681f3Smrg
9027ec681f3Smrg   assert(info->chip_class <= GFX8);
9037ec681f3Smrg
9047ec681f3Smrg   switch (num_pipes) {
9057ec681f3Smrg   case 2:
9067ec681f3Smrg      cl_width = 32;
9077ec681f3Smrg      cl_height = 16;
9087ec681f3Smrg      break;
9097ec681f3Smrg   case 4:
9107ec681f3Smrg      cl_width = 32;
9117ec681f3Smrg      cl_height = 32;
9127ec681f3Smrg      break;
9137ec681f3Smrg   case 8:
9147ec681f3Smrg      cl_width = 64;
9157ec681f3Smrg      cl_height = 32;
9167ec681f3Smrg      break;
9177ec681f3Smrg   case 16: /* Hawaii */
9187ec681f3Smrg      cl_width = 64;
9197ec681f3Smrg      cl_height = 64;
9207ec681f3Smrg      break;
9217ec681f3Smrg   default:
9227ec681f3Smrg      assert(0);
9237ec681f3Smrg      return;
9247ec681f3Smrg   }
9257ec681f3Smrg
9267ec681f3Smrg   unsigned base_align = num_pipes * pipe_interleave_bytes;
9277ec681f3Smrg
9287ec681f3Smrg   unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width * 8);
9297ec681f3Smrg   unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height * 8);
9307ec681f3Smrg   unsigned slice_elements = (width * height) / (8 * 8);
9317ec681f3Smrg
9327ec681f3Smrg   /* Each element of CMASK is a nibble. */
9337ec681f3Smrg   unsigned slice_bytes = slice_elements / 2;
9347ec681f3Smrg
9357ec681f3Smrg   surf->u.legacy.color.cmask_slice_tile_max = (width * height) / (128 * 128);
9367ec681f3Smrg   if (surf->u.legacy.color.cmask_slice_tile_max)
9377ec681f3Smrg      surf->u.legacy.color.cmask_slice_tile_max -= 1;
9387ec681f3Smrg
9397ec681f3Smrg   unsigned num_layers;
9407ec681f3Smrg   if (config->is_3d)
9417ec681f3Smrg      num_layers = config->info.depth;
9427ec681f3Smrg   else if (config->is_cube)
9437ec681f3Smrg      num_layers = 6;
9447ec681f3Smrg   else
9457ec681f3Smrg      num_layers = config->info.array_size;
9467ec681f3Smrg
9477ec681f3Smrg   surf->cmask_alignment_log2 = util_logbase2(MAX2(256, base_align));
9487ec681f3Smrg   surf->cmask_slice_size = align(slice_bytes, base_align);
9497ec681f3Smrg   surf->cmask_size = surf->cmask_slice_size * num_layers;
95001e04c3fSmrg}
95101e04c3fSmrg
95201e04c3fSmrg/**
95301e04c3fSmrg * Fill in the tiling information in \p surf based on the given surface config.
95401e04c3fSmrg *
95501e04c3fSmrg * The following fields of \p surf must be initialized by the caller:
95601e04c3fSmrg * blk_w, blk_h, bpe, flags.
95701e04c3fSmrg */
9587ec681f3Smrgstatic int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info,
9597ec681f3Smrg                                const struct ac_surf_config *config, enum radeon_surf_mode mode,
9607ec681f3Smrg                                struct radeon_surf *surf)
96101e04c3fSmrg{
9627ec681f3Smrg   unsigned level;
9637ec681f3Smrg   bool compressed;
9647ec681f3Smrg   ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
9657ec681f3Smrg   ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
9667ec681f3Smrg   ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
9677ec681f3Smrg   ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
9687ec681f3Smrg   ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
9697ec681f3Smrg   ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
9707ec681f3Smrg   ADDR_TILEINFO AddrTileInfoIn = {0};
9717ec681f3Smrg   ADDR_TILEINFO AddrTileInfoOut = {0};
9727ec681f3Smrg   int r;
9737ec681f3Smrg
9747ec681f3Smrg   AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
9757ec681f3Smrg   AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
9767ec681f3Smrg   AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
9777ec681f3Smrg   AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
9787ec681f3Smrg   AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
9797ec681f3Smrg   AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
9807ec681f3Smrg   AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
9817ec681f3Smrg
9827ec681f3Smrg   compressed = surf->blk_w == 4 && surf->blk_h == 4;
9837ec681f3Smrg
9847ec681f3Smrg   /* MSAA requires 2D tiling. */
9857ec681f3Smrg   if (config->info.samples > 1)
9867ec681f3Smrg      mode = RADEON_SURF_MODE_2D;
9877ec681f3Smrg
9887ec681f3Smrg   /* DB doesn't support linear layouts. */
9897ec681f3Smrg   if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) && mode < RADEON_SURF_MODE_1D)
9907ec681f3Smrg      mode = RADEON_SURF_MODE_1D;
9917ec681f3Smrg
9927ec681f3Smrg   /* Set the requested tiling mode. */
9937ec681f3Smrg   switch (mode) {
9947ec681f3Smrg   case RADEON_SURF_MODE_LINEAR_ALIGNED:
9957ec681f3Smrg      AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED;
9967ec681f3Smrg      break;
9977ec681f3Smrg   case RADEON_SURF_MODE_1D:
9987ec681f3Smrg      if (surf->flags & RADEON_SURF_PRT)
9997ec681f3Smrg         AddrSurfInfoIn.tileMode = ADDR_TM_PRT_TILED_THIN1;
10007ec681f3Smrg      else
10017ec681f3Smrg         AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1;
10027ec681f3Smrg      break;
10037ec681f3Smrg   case RADEON_SURF_MODE_2D:
10047ec681f3Smrg      if (surf->flags & RADEON_SURF_PRT)
10057ec681f3Smrg         AddrSurfInfoIn.tileMode = ADDR_TM_PRT_2D_TILED_THIN1;
10067ec681f3Smrg      else
10077ec681f3Smrg         AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1;
10087ec681f3Smrg      break;
10097ec681f3Smrg   default:
10107ec681f3Smrg      assert(0);
10117ec681f3Smrg   }
10127ec681f3Smrg
10137ec681f3Smrg   /* The format must be set correctly for the allocation of compressed
10147ec681f3Smrg    * textures to work. In other cases, setting the bpp is sufficient.
10157ec681f3Smrg    */
10167ec681f3Smrg   if (compressed) {
10177ec681f3Smrg      switch (surf->bpe) {
10187ec681f3Smrg      case 8:
10197ec681f3Smrg         AddrSurfInfoIn.format = ADDR_FMT_BC1;
10207ec681f3Smrg         break;
10217ec681f3Smrg      case 16:
10227ec681f3Smrg         AddrSurfInfoIn.format = ADDR_FMT_BC3;
10237ec681f3Smrg         break;
10247ec681f3Smrg      default:
10257ec681f3Smrg         assert(0);
10267ec681f3Smrg      }
10277ec681f3Smrg   } else {
10287ec681f3Smrg      AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;
10297ec681f3Smrg   }
10307ec681f3Smrg
10317ec681f3Smrg   AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
10327ec681f3Smrg   AddrSurfInfoIn.tileIndex = -1;
10337ec681f3Smrg
10347ec681f3Smrg   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
10357ec681f3Smrg      AddrDccIn.numSamples = AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);
10367ec681f3Smrg   }
10377ec681f3Smrg
10387ec681f3Smrg   /* Set the micro tile type. */
10397ec681f3Smrg   if (surf->flags & RADEON_SURF_SCANOUT)
10407ec681f3Smrg      AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
10417ec681f3Smrg   else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
10427ec681f3Smrg      AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
10437ec681f3Smrg   else
10447ec681f3Smrg      AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
10457ec681f3Smrg
10467ec681f3Smrg   AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
10477ec681f3Smrg   AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
10487ec681f3Smrg   AddrSurfInfoIn.flags.cube = config->is_cube;
10497ec681f3Smrg   AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
10507ec681f3Smrg   AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1;
10517ec681f3Smrg   AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
10527ec681f3Smrg   AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0;
10537ec681f3Smrg
10547ec681f3Smrg   /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
10557ec681f3Smrg    * requested, because TC-compatible HTILE requires 2D tiling.
10567ec681f3Smrg    */
10577ec681f3Smrg   AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible &&
10587ec681f3Smrg                                    !AddrSurfInfoIn.flags.fmask && config->info.samples <= 1 &&
10597ec681f3Smrg                                    !(surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE);
10607ec681f3Smrg
10617ec681f3Smrg   /* DCC notes:
10627ec681f3Smrg    * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
10637ec681f3Smrg    *   with samples >= 4.
10647ec681f3Smrg    * - Mipmapped array textures have low performance (discovered by a closed
10657ec681f3Smrg    *   driver team).
10667ec681f3Smrg    */
10677ec681f3Smrg   AddrSurfInfoIn.flags.dccCompatible =
10687ec681f3Smrg      info->chip_class >= GFX8 && info->has_graphics && /* disable DCC on compute-only chips */
10697ec681f3Smrg      !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
10707ec681f3Smrg      !compressed &&
10717ec681f3Smrg      ((config->info.array_size == 1 && config->info.depth == 1) || config->info.levels == 1);
10727ec681f3Smrg
10737ec681f3Smrg   AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
10747ec681f3Smrg   AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
10757ec681f3Smrg
10767ec681f3Smrg   /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit)
10777ec681f3Smrg    * for Z and stencil. This can cause a number of problems which we work
10787ec681f3Smrg    * around here:
10797ec681f3Smrg    *
10807ec681f3Smrg    * - a depth part that is incompatible with mipmapped texturing
10817ec681f3Smrg    * - at least on Stoney, entirely incompatible Z/S aspects (e.g.
10827ec681f3Smrg    *   incorrect tiling applied to the stencil part, stencil buffer
10837ec681f3Smrg    *   memory accesses that go out of bounds) even without mipmapping
10847ec681f3Smrg    *
10857ec681f3Smrg    * Some piglit tests that are prone to different types of related
10867ec681f3Smrg    * failures:
10877ec681f3Smrg    *  ./bin/ext_framebuffer_multisample-upsample 2 stencil
10887ec681f3Smrg    *  ./bin/framebuffer-blit-levels {draw,read} stencil
10897ec681f3Smrg    *  ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}
10907ec681f3Smrg    *  ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}
10917ec681f3Smrg    *  ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8
10927ec681f3Smrg    */
10937ec681f3Smrg   int stencil_tile_idx = -1;
10947ec681f3Smrg
10957ec681f3Smrg   if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil &&
10967ec681f3Smrg       (config->info.levels > 1 || info->family == CHIP_STONEY)) {
10977ec681f3Smrg      /* Compute stencilTileIdx that is compatible with the (depth)
10987ec681f3Smrg       * tileIdx. This degrades the depth surface if necessary to
10997ec681f3Smrg       * ensure that a matching stencilTileIdx exists. */
11007ec681f3Smrg      AddrSurfInfoIn.flags.matchStencilTileCfg = 1;
11017ec681f3Smrg
11027ec681f3Smrg      /* Keep the depth mip-tail compatible with texturing. */
11037ec681f3Smrg      AddrSurfInfoIn.flags.noStencil = 1;
11047ec681f3Smrg   }
11057ec681f3Smrg
11067ec681f3Smrg   /* Set preferred macrotile parameters. This is usually required
11077ec681f3Smrg    * for shared resources. This is for 2D tiling only. */
11087ec681f3Smrg   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
11097ec681f3Smrg       AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 && surf->u.legacy.bankw &&
11107ec681f3Smrg       surf->u.legacy.bankh && surf->u.legacy.mtilea && surf->u.legacy.tile_split) {
11117ec681f3Smrg      /* If any of these parameters are incorrect, the calculation
11127ec681f3Smrg       * will fail. */
11137ec681f3Smrg      AddrTileInfoIn.banks = surf->u.legacy.num_banks;
11147ec681f3Smrg      AddrTileInfoIn.bankWidth = surf->u.legacy.bankw;
11157ec681f3Smrg      AddrTileInfoIn.bankHeight = surf->u.legacy.bankh;
11167ec681f3Smrg      AddrTileInfoIn.macroAspectRatio = surf->u.legacy.mtilea;
11177ec681f3Smrg      AddrTileInfoIn.tileSplitBytes = surf->u.legacy.tile_split;
11187ec681f3Smrg      AddrTileInfoIn.pipeConfig = surf->u.legacy.pipe_config + 1; /* +1 compared to GB_TILE_MODE */
11197ec681f3Smrg      AddrSurfInfoIn.flags.opt4Space = 0;
11207ec681f3Smrg      AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
11217ec681f3Smrg
11227ec681f3Smrg      /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
11237ec681f3Smrg       * the tile index, because we are expected to know it if
11247ec681f3Smrg       * we know the other parameters.
11257ec681f3Smrg       *
11267ec681f3Smrg       * This is something that can easily be fixed in Addrlib.
11277ec681f3Smrg       * For now, just figure it out here.
11287ec681f3Smrg       * Note that only 2D_TILE_THIN1 is handled here.
11297ec681f3Smrg       */
11307ec681f3Smrg      assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
11317ec681f3Smrg      assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
11327ec681f3Smrg
11337ec681f3Smrg      if (info->chip_class == GFX6) {
11347ec681f3Smrg         if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) {
11357ec681f3Smrg            if (surf->bpe == 2)
11367ec681f3Smrg               AddrSurfInfoIn.tileIndex = 11; /* 16bpp */
11377ec681f3Smrg            else
11387ec681f3Smrg               AddrSurfInfoIn.tileIndex = 12; /* 32bpp */
11397ec681f3Smrg         } else {
11407ec681f3Smrg            if (surf->bpe == 1)
11417ec681f3Smrg               AddrSurfInfoIn.tileIndex = 14; /* 8bpp */
11427ec681f3Smrg            else if (surf->bpe == 2)
11437ec681f3Smrg               AddrSurfInfoIn.tileIndex = 15; /* 16bpp */
11447ec681f3Smrg            else if (surf->bpe == 4)
11457ec681f3Smrg               AddrSurfInfoIn.tileIndex = 16; /* 32bpp */
11467ec681f3Smrg            else
11477ec681f3Smrg               AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */
11487ec681f3Smrg         }
11497ec681f3Smrg      } else {
11507ec681f3Smrg         /* GFX7 - GFX8 */
11517ec681f3Smrg         if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
11527ec681f3Smrg            AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
11537ec681f3Smrg         else
11547ec681f3Smrg            AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
11557ec681f3Smrg
11567ec681f3Smrg         /* Addrlib doesn't set this if tileIndex is forced like above. */
11577ec681f3Smrg         AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
11587ec681f3Smrg      }
11597ec681f3Smrg   }
11607ec681f3Smrg
11617ec681f3Smrg   surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
11627ec681f3Smrg   surf->num_meta_levels = 0;
11637ec681f3Smrg   surf->surf_size = 0;
11647ec681f3Smrg   surf->meta_size = 0;
11657ec681f3Smrg   surf->meta_slice_size = 0;
11667ec681f3Smrg   surf->meta_alignment_log2 = 0;
11677ec681f3Smrg
11687ec681f3Smrg   const bool only_stencil =
11697ec681f3Smrg      (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER);
11707ec681f3Smrg
11717ec681f3Smrg   /* Calculate texture layout information. */
11727ec681f3Smrg   if (!only_stencil) {
11737ec681f3Smrg      for (level = 0; level < config->info.levels; level++) {
11747ec681f3Smrg         r = gfx6_compute_level(addrlib, config, surf, false, level, compressed, &AddrSurfInfoIn,
11757ec681f3Smrg                                &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, &AddrHtileIn,
11767ec681f3Smrg                                &AddrHtileOut);
11777ec681f3Smrg         if (r)
11787ec681f3Smrg            return r;
11797ec681f3Smrg
11807ec681f3Smrg         if (level > 0)
11817ec681f3Smrg            continue;
11827ec681f3Smrg
11837ec681f3Smrg         if (!AddrSurfInfoOut.tcCompatible) {
11847ec681f3Smrg            AddrSurfInfoIn.flags.tcCompatible = 0;
11857ec681f3Smrg            surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
11867ec681f3Smrg         }
11877ec681f3Smrg
11887ec681f3Smrg         if (AddrSurfInfoIn.flags.matchStencilTileCfg) {
11897ec681f3Smrg            AddrSurfInfoIn.flags.matchStencilTileCfg = 0;
11907ec681f3Smrg            AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex;
11917ec681f3Smrg            stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx;
11927ec681f3Smrg
11937ec681f3Smrg            assert(stencil_tile_idx >= 0);
11947ec681f3Smrg         }
11957ec681f3Smrg
11967ec681f3Smrg         r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf);
11977ec681f3Smrg         if (r)
11987ec681f3Smrg            return r;
11997ec681f3Smrg      }
12007ec681f3Smrg   }
12017ec681f3Smrg
12027ec681f3Smrg   /* Calculate texture layout information for stencil. */
12037ec681f3Smrg   if (surf->flags & RADEON_SURF_SBUFFER) {
12047ec681f3Smrg      AddrSurfInfoIn.tileIndex = stencil_tile_idx;
12057ec681f3Smrg      AddrSurfInfoIn.bpp = 8;
12067ec681f3Smrg      AddrSurfInfoIn.flags.depth = 0;
12077ec681f3Smrg      AddrSurfInfoIn.flags.stencil = 1;
12087ec681f3Smrg      AddrSurfInfoIn.flags.tcCompatible = 0;
12097ec681f3Smrg      /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
12107ec681f3Smrg      AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split;
12117ec681f3Smrg
12127ec681f3Smrg      for (level = 0; level < config->info.levels; level++) {
12137ec681f3Smrg         r = gfx6_compute_level(addrlib, config, surf, true, level, compressed, &AddrSurfInfoIn,
12147ec681f3Smrg                                &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, NULL, NULL);
12157ec681f3Smrg         if (r)
12167ec681f3Smrg            return r;
12177ec681f3Smrg
12187ec681f3Smrg         /* DB uses the depth pitch for both stencil and depth. */
12197ec681f3Smrg         if (!only_stencil) {
12207ec681f3Smrg            if (surf->u.legacy.zs.stencil_level[level].nblk_x != surf->u.legacy.level[level].nblk_x)
12217ec681f3Smrg               surf->u.legacy.stencil_adjusted = true;
12227ec681f3Smrg         } else {
12237ec681f3Smrg            surf->u.legacy.level[level].nblk_x = surf->u.legacy.zs.stencil_level[level].nblk_x;
12247ec681f3Smrg         }
12257ec681f3Smrg
12267ec681f3Smrg         if (level == 0) {
12277ec681f3Smrg            if (only_stencil) {
12287ec681f3Smrg               r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf);
12297ec681f3Smrg               if (r)
12307ec681f3Smrg                  return r;
12317ec681f3Smrg            }
12327ec681f3Smrg
12337ec681f3Smrg            /* For 2D modes only. */
12347ec681f3Smrg            if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
12357ec681f3Smrg               surf->u.legacy.stencil_tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes;
12367ec681f3Smrg            }
12377ec681f3Smrg         }
12387ec681f3Smrg      }
12397ec681f3Smrg   }
12407ec681f3Smrg
12417ec681f3Smrg   /* Compute FMASK. */
12427ec681f3Smrg   if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color && info->has_graphics &&
12437ec681f3Smrg       !(surf->flags & RADEON_SURF_NO_FMASK)) {
12447ec681f3Smrg      ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0};
12457ec681f3Smrg      ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
12467ec681f3Smrg      ADDR_TILEINFO fmask_tile_info = {0};
12477ec681f3Smrg
12487ec681f3Smrg      fin.size = sizeof(fin);
12497ec681f3Smrg      fout.size = sizeof(fout);
12507ec681f3Smrg
12517ec681f3Smrg      fin.tileMode = AddrSurfInfoOut.tileMode;
12527ec681f3Smrg      fin.pitch = AddrSurfInfoOut.pitch;
12537ec681f3Smrg      fin.height = config->info.height;
12547ec681f3Smrg      fin.numSlices = AddrSurfInfoIn.numSlices;
12557ec681f3Smrg      fin.numSamples = AddrSurfInfoIn.numSamples;
12567ec681f3Smrg      fin.numFrags = AddrSurfInfoIn.numFrags;
12577ec681f3Smrg      fin.tileIndex = -1;
12587ec681f3Smrg      fout.pTileInfo = &fmask_tile_info;
12597ec681f3Smrg
12607ec681f3Smrg      r = AddrComputeFmaskInfo(addrlib, &fin, &fout);
12617ec681f3Smrg      if (r)
12627ec681f3Smrg         return r;
12637ec681f3Smrg
12647ec681f3Smrg      surf->fmask_size = fout.fmaskBytes;
12657ec681f3Smrg      surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign);
12667ec681f3Smrg      surf->fmask_slice_size = fout.sliceSize;
12677ec681f3Smrg      surf->fmask_tile_swizzle = 0;
12687ec681f3Smrg
12697ec681f3Smrg      surf->u.legacy.color.fmask.slice_tile_max = (fout.pitch * fout.height) / 64;
12707ec681f3Smrg      if (surf->u.legacy.color.fmask.slice_tile_max)
12717ec681f3Smrg         surf->u.legacy.color.fmask.slice_tile_max -= 1;
12727ec681f3Smrg
12737ec681f3Smrg      surf->u.legacy.color.fmask.tiling_index = fout.tileIndex;
12747ec681f3Smrg      surf->u.legacy.color.fmask.bankh = fout.pTileInfo->bankHeight;
12757ec681f3Smrg      surf->u.legacy.color.fmask.pitch_in_pixels = fout.pitch;
12767ec681f3Smrg
12777ec681f3Smrg      /* Compute tile swizzle for FMASK. */
12787ec681f3Smrg      if (config->info.fmask_surf_index && !(surf->flags & RADEON_SURF_SHAREABLE)) {
12797ec681f3Smrg         ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0};
12807ec681f3Smrg         ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0};
12817ec681f3Smrg
12827ec681f3Smrg         xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
12837ec681f3Smrg         xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
12847ec681f3Smrg
12857ec681f3Smrg         /* This counter starts from 1 instead of 0. */
12867ec681f3Smrg         xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
12877ec681f3Smrg         xin.tileIndex = fout.tileIndex;
12887ec681f3Smrg         xin.macroModeIndex = fout.macroModeIndex;
12897ec681f3Smrg         xin.pTileInfo = fout.pTileInfo;
12907ec681f3Smrg         xin.tileMode = fin.tileMode;
12917ec681f3Smrg
12927ec681f3Smrg         int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout);
12937ec681f3Smrg         if (r != ADDR_OK)
12947ec681f3Smrg            return r;
12957ec681f3Smrg
12967ec681f3Smrg         assert(xout.tileSwizzle <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
12977ec681f3Smrg         surf->fmask_tile_swizzle = xout.tileSwizzle;
12987ec681f3Smrg      }
12997ec681f3Smrg   }
13007ec681f3Smrg
13017ec681f3Smrg   /* Recalculate the whole DCC miptree size including disabled levels.
13027ec681f3Smrg    * This is what addrlib does, but calling addrlib would be a lot more
13037ec681f3Smrg    * complicated.
13047ec681f3Smrg    */
13057ec681f3Smrg   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_size && config->info.levels > 1) {
13067ec681f3Smrg      /* The smallest miplevels that are never compressed by DCC
13077ec681f3Smrg       * still read the DCC buffer via TC if the base level uses DCC,
13087ec681f3Smrg       * and for some reason the DCC buffer needs to be larger if
13097ec681f3Smrg       * the miptree uses non-zero tile_swizzle. Otherwise there are
13107ec681f3Smrg       * VM faults.
13117ec681f3Smrg       *
13127ec681f3Smrg       * "dcc_alignment * 4" was determined by trial and error.
13137ec681f3Smrg       */
13147ec681f3Smrg      surf->meta_size = align64(surf->surf_size >> 8, (1 << surf->meta_alignment_log2) * 4);
13157ec681f3Smrg   }
13167ec681f3Smrg
13177ec681f3Smrg   /* Make sure HTILE covers the whole miptree, because the shader reads
13187ec681f3Smrg    * TC-compatible HTILE even for levels where it's disabled by DB.
13197ec681f3Smrg    */
13207ec681f3Smrg   if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_TC_COMPATIBLE_HTILE) &&
13217ec681f3Smrg       surf->meta_size && config->info.levels > 1) {
13227ec681f3Smrg      /* MSAA can't occur with levels > 1, so ignore the sample count. */
13237ec681f3Smrg      const unsigned total_pixels = surf->surf_size / surf->bpe;
13247ec681f3Smrg      const unsigned htile_block_size = 8 * 8;
13257ec681f3Smrg      const unsigned htile_element_size = 4;
13267ec681f3Smrg
13277ec681f3Smrg      surf->meta_size = (total_pixels / htile_block_size) * htile_element_size;
13287ec681f3Smrg      surf->meta_size = align(surf->meta_size, 1 << surf->meta_alignment_log2);
13297ec681f3Smrg   } else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && !surf->meta_size) {
13307ec681f3Smrg      /* Unset this if HTILE is not present. */
13317ec681f3Smrg      surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
13327ec681f3Smrg   }
13337ec681f3Smrg
13347ec681f3Smrg   surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;
13357ec681f3Smrg   surf->is_displayable = surf->is_linear || surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY ||
13367ec681f3Smrg                          surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER;
13377ec681f3Smrg
13387ec681f3Smrg   /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
13397ec681f3Smrg    * used at the same time. This case is not currently expected to occur
13407ec681f3Smrg    * because we don't use rotated. Enforce this restriction on all chips
13417ec681f3Smrg    * to facilitate testing.
13427ec681f3Smrg    */
13437ec681f3Smrg   if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) {
13447ec681f3Smrg      assert(!"rotate micro tile mode is unsupported");
13457ec681f3Smrg      return ADDR_ERROR;
13467ec681f3Smrg   }
13477ec681f3Smrg
13487ec681f3Smrg   ac_compute_cmask(info, config, surf);
13497ec681f3Smrg   return 0;
135001e04c3fSmrg}
135101e04c3fSmrg
135201e04c3fSmrg/* This is only called when expecting a tiled layout. */
13537ec681f3Smrgstatic int gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, const struct radeon_info *info,
13547ec681f3Smrg                                           struct radeon_surf *surf,
13557ec681f3Smrg                                           ADDR2_COMPUTE_SURFACE_INFO_INPUT *in, bool is_fmask,
13567ec681f3Smrg                                           AddrSwizzleMode *swizzle_mode)
13577ec681f3Smrg{
13587ec681f3Smrg   ADDR_E_RETURNCODE ret;
13597ec681f3Smrg   ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0};
13607ec681f3Smrg   ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0};
13617ec681f3Smrg
13627ec681f3Smrg   sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT);
13637ec681f3Smrg   sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT);
13647ec681f3Smrg
13657ec681f3Smrg   sin.flags = in->flags;
13667ec681f3Smrg   sin.resourceType = in->resourceType;
13677ec681f3Smrg   sin.format = in->format;
13687ec681f3Smrg   sin.resourceLoction = ADDR_RSRC_LOC_INVIS;
13697ec681f3Smrg   /* TODO: We could allow some of these: */
13707ec681f3Smrg   sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */
13717ec681f3Smrg   sin.forbiddenBlock.var = 1;   /* don't allow the variable-sized swizzle modes */
13727ec681f3Smrg   sin.bpp = in->bpp;
13737ec681f3Smrg   sin.width = in->width;
13747ec681f3Smrg   sin.height = in->height;
13757ec681f3Smrg   sin.numSlices = in->numSlices;
13767ec681f3Smrg   sin.numMipLevels = in->numMipLevels;
13777ec681f3Smrg   sin.numSamples = in->numSamples;
13787ec681f3Smrg   sin.numFrags = in->numFrags;
13797ec681f3Smrg
13807ec681f3Smrg   if (is_fmask) {
13817ec681f3Smrg      sin.flags.display = 0;
13827ec681f3Smrg      sin.flags.color = 0;
13837ec681f3Smrg      sin.flags.fmask = 1;
13847ec681f3Smrg   }
13857ec681f3Smrg
13867ec681f3Smrg   /* With PRT images we want to force 64 KiB block size so that the image
13877ec681f3Smrg    * created is consistent with the format properties returned in Vulkan
13887ec681f3Smrg    * independent of the image. */
13897ec681f3Smrg   if (sin.flags.prt) {
13907ec681f3Smrg      sin.forbiddenBlock.macroThin4KB = 1;
13917ec681f3Smrg      sin.forbiddenBlock.macroThick4KB = 1;
13927ec681f3Smrg      sin.forbiddenBlock.linear = 1;
13937ec681f3Smrg   }
13947ec681f3Smrg
13957ec681f3Smrg   if (surf->flags & RADEON_SURF_FORCE_MICRO_TILE_MODE) {
13967ec681f3Smrg      sin.forbiddenBlock.linear = 1;
13977ec681f3Smrg
13987ec681f3Smrg      if (surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)
13997ec681f3Smrg         sin.preferredSwSet.sw_D = 1;
14007ec681f3Smrg      else if (surf->micro_tile_mode == RADEON_MICRO_MODE_STANDARD)
14017ec681f3Smrg         sin.preferredSwSet.sw_S = 1;
14027ec681f3Smrg      else if (surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH)
14037ec681f3Smrg         sin.preferredSwSet.sw_Z = 1;
14047ec681f3Smrg      else if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER)
14057ec681f3Smrg         sin.preferredSwSet.sw_R = 1;
14067ec681f3Smrg   }
14077ec681f3Smrg
14087ec681f3Smrg   if (info->chip_class >= GFX10 && in->resourceType == ADDR_RSRC_TEX_3D && in->numSlices > 1) {
14097ec681f3Smrg      /* 3D textures should use S swizzle modes for the best performance.
14107ec681f3Smrg       * THe only exception is 3D render targets, which prefer 64KB_D_X.
14117ec681f3Smrg       *
14127ec681f3Smrg       * 3D texture sampler performance with a very large 3D texture:
14137ec681f3Smrg       *   ADDR_SW_64KB_R_X = 19 FPS (DCC on), 26 FPS (DCC off)
14147ec681f3Smrg       *   ADDR_SW_64KB_Z_X = 25 FPS
14157ec681f3Smrg       *   ADDR_SW_64KB_D_X = 53 FPS
14167ec681f3Smrg       *   ADDR_SW_4KB_S    = 53 FPS
14177ec681f3Smrg       *   ADDR_SW_64KB_S   = 53 FPS
14187ec681f3Smrg       *   ADDR_SW_64KB_S_T = 61 FPS
14197ec681f3Smrg       *   ADDR_SW_4KB_S_X  = 63 FPS
14207ec681f3Smrg       *   ADDR_SW_64KB_S_X = 62 FPS
14217ec681f3Smrg       */
14227ec681f3Smrg      sin.preferredSwSet.sw_S = 1;
14237ec681f3Smrg   }
14247ec681f3Smrg
14257ec681f3Smrg   ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout);
14267ec681f3Smrg   if (ret != ADDR_OK)
14277ec681f3Smrg      return ret;
14287ec681f3Smrg
14297ec681f3Smrg   *swizzle_mode = sout.swizzleMode;
14307ec681f3Smrg   return 0;
14317ec681f3Smrg}
14327ec681f3Smrg
14337ec681f3Smrgstatic bool is_dcc_supported_by_CB(const struct radeon_info *info, unsigned sw_mode)
14347ec681f3Smrg{
14357ec681f3Smrg   if (info->chip_class >= GFX10)
14367ec681f3Smrg      return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X;
14377ec681f3Smrg
14387ec681f3Smrg   return sw_mode != ADDR_SW_LINEAR;
14397ec681f3Smrg}
14407ec681f3Smrg
14417ec681f3SmrgASSERTED static bool is_dcc_supported_by_L2(const struct radeon_info *info,
14427ec681f3Smrg                                            const struct radeon_surf *surf)
144301e04c3fSmrg{
14447ec681f3Smrg   if (info->chip_class <= GFX9) {
14457ec681f3Smrg      /* Only independent 64B blocks are supported. */
14467ec681f3Smrg      return surf->u.gfx9.color.dcc.independent_64B_blocks && !surf->u.gfx9.color.dcc.independent_128B_blocks &&
14477ec681f3Smrg             surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B;
14487ec681f3Smrg   }
14497ec681f3Smrg
14507ec681f3Smrg   if (info->family == CHIP_NAVI10) {
14517ec681f3Smrg      /* Only independent 128B blocks are supported. */
14527ec681f3Smrg      return !surf->u.gfx9.color.dcc.independent_64B_blocks && surf->u.gfx9.color.dcc.independent_128B_blocks &&
14537ec681f3Smrg             surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B;
14547ec681f3Smrg   }
14557ec681f3Smrg
14567ec681f3Smrg   if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) {
14577ec681f3Smrg      /* Either 64B or 128B can be used, but not both.
14587ec681f3Smrg       * If 64B is used, DCC image stores are unsupported.
14597ec681f3Smrg       */
14607ec681f3Smrg      return surf->u.gfx9.color.dcc.independent_64B_blocks != surf->u.gfx9.color.dcc.independent_128B_blocks &&
14617ec681f3Smrg             (!surf->u.gfx9.color.dcc.independent_64B_blocks ||
14627ec681f3Smrg              surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B) &&
14637ec681f3Smrg             (!surf->u.gfx9.color.dcc.independent_128B_blocks ||
14647ec681f3Smrg              surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B);
14657ec681f3Smrg   }
14667ec681f3Smrg
14677ec681f3Smrg   /* 128B is recommended, but 64B can be set too if needed for 4K by DCN.
14687ec681f3Smrg    * Since there is no reason to ever disable 128B, require it.
14697ec681f3Smrg    * If 64B is used, DCC image stores are unsupported.
14707ec681f3Smrg    */
14717ec681f3Smrg   return surf->u.gfx9.color.dcc.independent_128B_blocks &&
14727ec681f3Smrg          surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B;
147301e04c3fSmrg}
147401e04c3fSmrg
14757ec681f3Smrgstatic bool gfx10_DCN_requires_independent_64B_blocks(const struct radeon_info *info,
14767ec681f3Smrg                                                      const struct ac_surf_config *config)
147701e04c3fSmrg{
14787ec681f3Smrg   assert(info->chip_class >= GFX10);
14797ec681f3Smrg
14807ec681f3Smrg   /* Older kernels have buggy DAL. */
14817ec681f3Smrg   if (info->drm_minor <= 43)
14827ec681f3Smrg      return true;
14837ec681f3Smrg
14847ec681f3Smrg   /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B. */
14857ec681f3Smrg   return config->info.width > 2560 || config->info.height > 2560;
148601e04c3fSmrg}
148701e04c3fSmrg
14887ec681f3Smrgvoid ac_modifier_max_extent(const struct radeon_info *info,
14897ec681f3Smrg                            uint64_t modifier, uint32_t *width, uint32_t *height)
149001e04c3fSmrg{
14917ec681f3Smrg   if (ac_modifier_has_dcc(modifier)) {
14927ec681f3Smrg      bool independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier);
14937ec681f3Smrg
14947ec681f3Smrg      if (info->chip_class >= GFX10 && !independent_64B_blocks) {
14957ec681f3Smrg         /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B. */
14967ec681f3Smrg         *width = 2560;
14977ec681f3Smrg         *height = 2560;
14987ec681f3Smrg      } else {
14997ec681f3Smrg         /* DCC is not supported on surfaces above resolutions af 5760. */
15007ec681f3Smrg         *width = 5760;
15017ec681f3Smrg         *height = 5760;
15027ec681f3Smrg      }
15037ec681f3Smrg   } else {
15047ec681f3Smrg      /* Non-dcc modifiers */
15057ec681f3Smrg      *width = 16384;
15067ec681f3Smrg      *height = 16384;
15077ec681f3Smrg   }
150801e04c3fSmrg}
150901e04c3fSmrg
15107ec681f3Smrgstatic bool is_dcc_supported_by_DCN(const struct radeon_info *info,
15117ec681f3Smrg                                    const struct ac_surf_config *config,
15127ec681f3Smrg                                    const struct radeon_surf *surf, bool rb_aligned,
15137ec681f3Smrg                                    bool pipe_aligned)
151401e04c3fSmrg{
15157ec681f3Smrg   if (!info->use_display_dcc_unaligned && !info->use_display_dcc_with_retile_blit)
15167ec681f3Smrg      return false;
15177ec681f3Smrg
15187ec681f3Smrg   /* 16bpp and 64bpp are more complicated, so they are disallowed for now. */
15197ec681f3Smrg   if (surf->bpe != 4)
15207ec681f3Smrg      return false;
15217ec681f3Smrg
15227ec681f3Smrg   /* Handle unaligned DCC. */
15237ec681f3Smrg   if (info->use_display_dcc_unaligned && (rb_aligned || pipe_aligned))
15247ec681f3Smrg      return false;
15257ec681f3Smrg
15267ec681f3Smrg   /* Big resolutions don't support DCC. */
15277ec681f3Smrg   if (config->info.width > 5760 || config->info.height > 5760)
15287ec681f3Smrg      return false;
15297ec681f3Smrg
15307ec681f3Smrg   switch (info->chip_class) {
15317ec681f3Smrg   case GFX9:
15327ec681f3Smrg      /* There are more constraints, but we always set
15337ec681f3Smrg       * INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B,
15347ec681f3Smrg       * which always works.
15357ec681f3Smrg       */
15367ec681f3Smrg      assert(surf->u.gfx9.color.dcc.independent_64B_blocks &&
15377ec681f3Smrg             surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);
15387ec681f3Smrg      return true;
15397ec681f3Smrg   case GFX10:
15407ec681f3Smrg   case GFX10_3:
15417ec681f3Smrg      /* DCN requires INDEPENDENT_128B_BLOCKS = 0 only on Navi1x. */
15427ec681f3Smrg      if (info->chip_class == GFX10 && surf->u.gfx9.color.dcc.independent_128B_blocks)
15437ec681f3Smrg         return false;
15447ec681f3Smrg
15457ec681f3Smrg      return (!gfx10_DCN_requires_independent_64B_blocks(info, config) ||
15467ec681f3Smrg              (surf->u.gfx9.color.dcc.independent_64B_blocks &&
15477ec681f3Smrg               surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B));
15487ec681f3Smrg   default:
15497ec681f3Smrg      unreachable("unhandled chip");
15507ec681f3Smrg      return false;
15517ec681f3Smrg   }
15527ec681f3Smrg}
155301e04c3fSmrg
15547ec681f3Smrgstatic void ac_copy_dcc_equation(const struct radeon_info *info,
15557ec681f3Smrg                                 ADDR2_COMPUTE_DCCINFO_OUTPUT *dcc,
15567ec681f3Smrg                                 struct gfx9_meta_equation *equation)
15577ec681f3Smrg{
15587ec681f3Smrg   equation->meta_block_width = dcc->metaBlkWidth;
15597ec681f3Smrg   equation->meta_block_height = dcc->metaBlkHeight;
15607ec681f3Smrg   equation->meta_block_depth = dcc->metaBlkDepth;
15617ec681f3Smrg
15627ec681f3Smrg   if (info->chip_class >= GFX10) {
15637ec681f3Smrg      /* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */
15647ec681f3Smrg      for (unsigned i = 0; i < 4; i++)
15657ec681f3Smrg         assert(dcc->equation.gfx10_bits[i] == 0);
15667ec681f3Smrg
15677ec681f3Smrg      for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 4; i < 68; i++)
15687ec681f3Smrg         assert(dcc->equation.gfx10_bits[i] == 0);
15697ec681f3Smrg
15707ec681f3Smrg      memcpy(equation->u.gfx10_bits, dcc->equation.gfx10_bits + 4,
15717ec681f3Smrg             sizeof(equation->u.gfx10_bits));
15727ec681f3Smrg   } else {
15737ec681f3Smrg      assert(dcc->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit));
15747ec681f3Smrg
15757ec681f3Smrg      equation->u.gfx9.num_bits = dcc->equation.gfx9.num_bits;
15767ec681f3Smrg      equation->u.gfx9.num_pipe_bits = dcc->equation.gfx9.numPipeBits;
15777ec681f3Smrg      for (unsigned b = 0; b < ARRAY_SIZE(equation->u.gfx9.bit); b++) {
15787ec681f3Smrg         for (unsigned c = 0; c < ARRAY_SIZE(equation->u.gfx9.bit[b].coord); c++) {
15797ec681f3Smrg            equation->u.gfx9.bit[b].coord[c].dim = dcc->equation.gfx9.bit[b].coord[c].dim;
15807ec681f3Smrg            equation->u.gfx9.bit[b].coord[c].ord = dcc->equation.gfx9.bit[b].coord[c].ord;
15817ec681f3Smrg         }
15827ec681f3Smrg      }
15837ec681f3Smrg   }
15847ec681f3Smrg}
15857ec681f3Smrg
15867ec681f3Smrgstatic void ac_copy_cmask_equation(const struct radeon_info *info,
15877ec681f3Smrg                                   ADDR2_COMPUTE_CMASK_INFO_OUTPUT *cmask,
15887ec681f3Smrg                                   struct gfx9_meta_equation *equation)
15897ec681f3Smrg{
15907ec681f3Smrg   equation->meta_block_width = cmask->metaBlkWidth;
15917ec681f3Smrg   equation->meta_block_height = cmask->metaBlkHeight;
15927ec681f3Smrg   equation->meta_block_depth = 1;
15937ec681f3Smrg
15947ec681f3Smrg   if (info->chip_class == GFX9) {
15957ec681f3Smrg      assert(cmask->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit));
15967ec681f3Smrg
15977ec681f3Smrg      equation->u.gfx9.num_bits = cmask->equation.gfx9.num_bits;
15987ec681f3Smrg      equation->u.gfx9.num_pipe_bits = cmask->equation.gfx9.numPipeBits;
15997ec681f3Smrg      for (unsigned b = 0; b < ARRAY_SIZE(equation->u.gfx9.bit); b++) {
16007ec681f3Smrg         for (unsigned c = 0; c < ARRAY_SIZE(equation->u.gfx9.bit[b].coord); c++) {
16017ec681f3Smrg            equation->u.gfx9.bit[b].coord[c].dim = cmask->equation.gfx9.bit[b].coord[c].dim;
16027ec681f3Smrg            equation->u.gfx9.bit[b].coord[c].ord = cmask->equation.gfx9.bit[b].coord[c].ord;
16037ec681f3Smrg         }
16047ec681f3Smrg      }
16057ec681f3Smrg   }
16067ec681f3Smrg}
16077ec681f3Smrg
16087ec681f3Smrgstatic void ac_copy_htile_equation(const struct radeon_info *info,
16097ec681f3Smrg                                   ADDR2_COMPUTE_HTILE_INFO_OUTPUT *htile,
16107ec681f3Smrg                                   struct gfx9_meta_equation *equation)
16117ec681f3Smrg{
16127ec681f3Smrg   equation->meta_block_width = htile->metaBlkWidth;
16137ec681f3Smrg   equation->meta_block_height = htile->metaBlkHeight;
16147ec681f3Smrg
16157ec681f3Smrg   /* gfx9_meta_equation doesn't store the first 8 and the last 4 elements. They must be 0. */
16167ec681f3Smrg   for (unsigned i = 0; i < 8; i++)
16177ec681f3Smrg      assert(htile->equation.gfx10_bits[i] == 0);
16187ec681f3Smrg
16197ec681f3Smrg   for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 8; i < 72; i++)
16207ec681f3Smrg      assert(htile->equation.gfx10_bits[i] == 0);
16217ec681f3Smrg
16227ec681f3Smrg   memcpy(equation->u.gfx10_bits, htile->equation.gfx10_bits + 8,
16237ec681f3Smrg          sizeof(equation->u.gfx10_bits));
16247ec681f3Smrg}
16257ec681f3Smrg
16267ec681f3Smrgstatic int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_info *info,
16277ec681f3Smrg                                const struct ac_surf_config *config, struct radeon_surf *surf,
16287ec681f3Smrg                                bool compressed, ADDR2_COMPUTE_SURFACE_INFO_INPUT *in)
16297ec681f3Smrg{
16307ec681f3Smrg   ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {0};
16317ec681f3Smrg   ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
16327ec681f3Smrg   ADDR_E_RETURNCODE ret;
16337ec681f3Smrg
16347ec681f3Smrg   out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
16357ec681f3Smrg   out.pMipInfo = mip_info;
16367ec681f3Smrg
16377ec681f3Smrg   ret = Addr2ComputeSurfaceInfo(addrlib->handle, in, &out);
16387ec681f3Smrg   if (ret != ADDR_OK)
16397ec681f3Smrg      return ret;
16407ec681f3Smrg
16417ec681f3Smrg   if (in->flags.prt) {
16427ec681f3Smrg      surf->prt_tile_width = out.blockWidth;
16437ec681f3Smrg      surf->prt_tile_height = out.blockHeight;
16447ec681f3Smrg
16457ec681f3Smrg      for (surf->first_mip_tail_level = 0; surf->first_mip_tail_level < in->numMipLevels;
16467ec681f3Smrg           ++surf->first_mip_tail_level) {
16477ec681f3Smrg         if(mip_info[surf->first_mip_tail_level].pitch < out.blockWidth ||
16487ec681f3Smrg            mip_info[surf->first_mip_tail_level].height < out.blockHeight)
16497ec681f3Smrg            break;
16507ec681f3Smrg      }
16517ec681f3Smrg
16527ec681f3Smrg      for (unsigned i = 0; i < in->numMipLevels; i++) {
16537ec681f3Smrg         surf->u.gfx9.prt_level_offset[i] = mip_info[i].macroBlockOffset + mip_info[i].mipTailOffset;
16547ec681f3Smrg
16557ec681f3Smrg         if (info->chip_class >= GFX10)
16567ec681f3Smrg            surf->u.gfx9.prt_level_pitch[i] = mip_info[i].pitch;
16577ec681f3Smrg         else
16587ec681f3Smrg            surf->u.gfx9.prt_level_pitch[i] = out.mipChainPitch;
16597ec681f3Smrg      }
16607ec681f3Smrg   }
16617ec681f3Smrg
16627ec681f3Smrg   if (in->flags.stencil) {
16637ec681f3Smrg      surf->u.gfx9.zs.stencil_swizzle_mode = in->swizzleMode;
16647ec681f3Smrg      surf->u.gfx9.zs.stencil_epitch =
16657ec681f3Smrg         out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1;
16667ec681f3Smrg      surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2, util_logbase2(out.baseAlign));
16677ec681f3Smrg      surf->u.gfx9.zs.stencil_offset = align(surf->surf_size, out.baseAlign);
16687ec681f3Smrg      surf->surf_size = surf->u.gfx9.zs.stencil_offset + out.surfSize;
16697ec681f3Smrg      return 0;
16707ec681f3Smrg   }
16717ec681f3Smrg
16727ec681f3Smrg   surf->u.gfx9.swizzle_mode = in->swizzleMode;
16737ec681f3Smrg   surf->u.gfx9.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1;
16747ec681f3Smrg
16757ec681f3Smrg   /* CMASK fast clear uses these even if FMASK isn't allocated.
16767ec681f3Smrg    * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4.
16777ec681f3Smrg    */
16787ec681f3Smrg   if (!in->flags.depth) {
16797ec681f3Smrg      surf->u.gfx9.color.fmask_swizzle_mode = surf->u.gfx9.swizzle_mode & ~0x3;
16807ec681f3Smrg      surf->u.gfx9.color.fmask_epitch = surf->u.gfx9.epitch;
16817ec681f3Smrg   }
16827ec681f3Smrg
16837ec681f3Smrg   surf->u.gfx9.surf_slice_size = out.sliceSize;
16847ec681f3Smrg   surf->u.gfx9.surf_pitch = out.pitch;
16857ec681f3Smrg   surf->u.gfx9.surf_height = out.height;
16867ec681f3Smrg   surf->surf_size = out.surfSize;
16877ec681f3Smrg   surf->surf_alignment_log2 = util_logbase2(out.baseAlign);
16887ec681f3Smrg
16897ec681f3Smrg   if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch &&
16907ec681f3Smrg       surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR) {
16917ec681f3Smrg      /* Adjust surf_pitch to be in elements units not in pixels */
16927ec681f3Smrg      surf->u.gfx9.surf_pitch = align(surf->u.gfx9.surf_pitch / surf->blk_w, 256 / surf->bpe);
16937ec681f3Smrg      surf->u.gfx9.epitch =
16947ec681f3Smrg         MAX2(surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch * surf->blk_w - 1);
16957ec681f3Smrg      /* The surface is really a surf->bpe bytes per pixel surface even if we
16967ec681f3Smrg       * use it as a surf->bpe bytes per element one.
16977ec681f3Smrg       * Adjust surf_slice_size and surf_size to reflect the change
16987ec681f3Smrg       * made to surf_pitch.
16997ec681f3Smrg       */
17007ec681f3Smrg      surf->u.gfx9.surf_slice_size =
17017ec681f3Smrg         MAX2(surf->u.gfx9.surf_slice_size,
17027ec681f3Smrg              surf->u.gfx9.surf_pitch * out.height * surf->bpe * surf->blk_w);
17037ec681f3Smrg      surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices;
17047ec681f3Smrg   }
17057ec681f3Smrg
17067ec681f3Smrg   if (in->swizzleMode == ADDR_SW_LINEAR) {
17077ec681f3Smrg      for (unsigned i = 0; i < in->numMipLevels; i++) {
17087ec681f3Smrg         surf->u.gfx9.offset[i] = mip_info[i].offset;
17097ec681f3Smrg         surf->u.gfx9.pitch[i] = mip_info[i].pitch;
17107ec681f3Smrg      }
17117ec681f3Smrg   }
17127ec681f3Smrg
17137ec681f3Smrg   surf->u.gfx9.base_mip_width = mip_info[0].pitch;
17147ec681f3Smrg   surf->u.gfx9.base_mip_height = mip_info[0].height;
17157ec681f3Smrg
17167ec681f3Smrg   if (in->flags.depth) {
17177ec681f3Smrg      assert(in->swizzleMode != ADDR_SW_LINEAR);
17187ec681f3Smrg
17197ec681f3Smrg      if (surf->flags & RADEON_SURF_NO_HTILE)
17207ec681f3Smrg         return 0;
17217ec681f3Smrg
17227ec681f3Smrg      /* HTILE */
17237ec681f3Smrg      ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
17247ec681f3Smrg      ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
17257ec681f3Smrg      ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
17267ec681f3Smrg
17277ec681f3Smrg      hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
17287ec681f3Smrg      hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
17297ec681f3Smrg      hout.pMipInfo = meta_mip_info;
17307ec681f3Smrg
17317ec681f3Smrg      assert(in->flags.metaPipeUnaligned == 0);
17327ec681f3Smrg      assert(in->flags.metaRbUnaligned == 0);
17337ec681f3Smrg
17347ec681f3Smrg      hin.hTileFlags.pipeAligned = 1;
17357ec681f3Smrg      hin.hTileFlags.rbAligned = 1;
17367ec681f3Smrg      hin.depthFlags = in->flags;
17377ec681f3Smrg      hin.swizzleMode = in->swizzleMode;
17387ec681f3Smrg      hin.unalignedWidth = in->width;
17397ec681f3Smrg      hin.unalignedHeight = in->height;
17407ec681f3Smrg      hin.numSlices = in->numSlices;
17417ec681f3Smrg      hin.numMipLevels = in->numMipLevels;
17427ec681f3Smrg      hin.firstMipIdInTail = out.firstMipIdInTail;
17437ec681f3Smrg
17447ec681f3Smrg      ret = Addr2ComputeHtileInfo(addrlib->handle, &hin, &hout);
17457ec681f3Smrg      if (ret != ADDR_OK)
17467ec681f3Smrg         return ret;
17477ec681f3Smrg
17487ec681f3Smrg      surf->meta_size = hout.htileBytes;
17497ec681f3Smrg      surf->meta_slice_size = hout.sliceSize;
17507ec681f3Smrg      surf->meta_alignment_log2 = util_logbase2(hout.baseAlign);
17517ec681f3Smrg      surf->meta_pitch = hout.pitch;
17527ec681f3Smrg      surf->num_meta_levels = in->numMipLevels;
17537ec681f3Smrg
17547ec681f3Smrg      for (unsigned i = 0; i < in->numMipLevels; i++) {
17557ec681f3Smrg         surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset;
17567ec681f3Smrg         surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize;
17577ec681f3Smrg
17587ec681f3Smrg         if (meta_mip_info[i].inMiptail) {
17597ec681f3Smrg            /* GFX10 can only compress the first level
17607ec681f3Smrg             * in the mip tail.
17617ec681f3Smrg             */
17627ec681f3Smrg            surf->num_meta_levels = i + 1;
17637ec681f3Smrg            break;
17647ec681f3Smrg         }
17657ec681f3Smrg      }
17667ec681f3Smrg
17677ec681f3Smrg      if (!surf->num_meta_levels)
17687ec681f3Smrg         surf->meta_size = 0;
17697ec681f3Smrg
17707ec681f3Smrg      if (info->chip_class >= GFX10)
17717ec681f3Smrg         ac_copy_htile_equation(info, &hout, &surf->u.gfx9.zs.htile_equation);
17727ec681f3Smrg      return 0;
17737ec681f3Smrg   }
17747ec681f3Smrg
17757ec681f3Smrg   {
17767ec681f3Smrg      /* Compute tile swizzle for the color surface.
17777ec681f3Smrg       * All *_X and *_T modes can use the swizzle.
17787ec681f3Smrg       */
17797ec681f3Smrg      if (config->info.surf_index && in->swizzleMode >= ADDR_SW_64KB_Z_T && !out.mipChainInTail &&
17807ec681f3Smrg          !(surf->flags & RADEON_SURF_SHAREABLE) && !in->flags.display) {
17817ec681f3Smrg         ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
17827ec681f3Smrg         ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
17837ec681f3Smrg
17847ec681f3Smrg         xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
17857ec681f3Smrg         xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
17867ec681f3Smrg
17877ec681f3Smrg         xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
17887ec681f3Smrg         xin.flags = in->flags;
17897ec681f3Smrg         xin.swizzleMode = in->swizzleMode;
17907ec681f3Smrg         xin.resourceType = in->resourceType;
17917ec681f3Smrg         xin.format = in->format;
17927ec681f3Smrg         xin.numSamples = in->numSamples;
17937ec681f3Smrg         xin.numFrags = in->numFrags;
17947ec681f3Smrg
17957ec681f3Smrg         ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
17967ec681f3Smrg         if (ret != ADDR_OK)
17977ec681f3Smrg            return ret;
17987ec681f3Smrg
17997ec681f3Smrg         assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
18007ec681f3Smrg         surf->tile_swizzle = xout.pipeBankXor;
18017ec681f3Smrg      }
18027ec681f3Smrg
18037ec681f3Smrg      /* DCC */
18047ec681f3Smrg      if (info->has_graphics && !(surf->flags & RADEON_SURF_DISABLE_DCC) && !compressed &&
18057ec681f3Smrg          is_dcc_supported_by_CB(info, in->swizzleMode) &&
18067ec681f3Smrg          (!in->flags.display ||
18077ec681f3Smrg           is_dcc_supported_by_DCN(info, config, surf, !in->flags.metaRbUnaligned,
18087ec681f3Smrg                                   !in->flags.metaPipeUnaligned)) &&
18097ec681f3Smrg          (surf->modifier == DRM_FORMAT_MOD_INVALID ||
18107ec681f3Smrg           ac_modifier_has_dcc(surf->modifier))) {
18117ec681f3Smrg         ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
18127ec681f3Smrg         ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
18137ec681f3Smrg         ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
18147ec681f3Smrg
18157ec681f3Smrg         din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
18167ec681f3Smrg         dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
18177ec681f3Smrg         dout.pMipInfo = meta_mip_info;
18187ec681f3Smrg
18197ec681f3Smrg         din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned;
18207ec681f3Smrg         din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned;
18217ec681f3Smrg         din.resourceType = in->resourceType;
18227ec681f3Smrg         din.swizzleMode = in->swizzleMode;
18237ec681f3Smrg         din.bpp = in->bpp;
18247ec681f3Smrg         din.unalignedWidth = in->width;
18257ec681f3Smrg         din.unalignedHeight = in->height;
18267ec681f3Smrg         din.numSlices = in->numSlices;
18277ec681f3Smrg         din.numFrags = in->numFrags;
18287ec681f3Smrg         din.numMipLevels = in->numMipLevels;
18297ec681f3Smrg         din.dataSurfaceSize = out.surfSize;
18307ec681f3Smrg         din.firstMipIdInTail = out.firstMipIdInTail;
18317ec681f3Smrg
18327ec681f3Smrg         ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
18337ec681f3Smrg         if (ret != ADDR_OK)
18347ec681f3Smrg            return ret;
18357ec681f3Smrg
18367ec681f3Smrg         surf->u.gfx9.color.dcc.rb_aligned = din.dccKeyFlags.rbAligned;
18377ec681f3Smrg         surf->u.gfx9.color.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned;
18387ec681f3Smrg         surf->u.gfx9.color.dcc_block_width = dout.compressBlkWidth;
18397ec681f3Smrg         surf->u.gfx9.color.dcc_block_height = dout.compressBlkHeight;
18407ec681f3Smrg         surf->u.gfx9.color.dcc_block_depth = dout.compressBlkDepth;
18417ec681f3Smrg         surf->u.gfx9.color.dcc_pitch_max = dout.pitch - 1;
18427ec681f3Smrg         surf->u.gfx9.color.dcc_height = dout.height;
18437ec681f3Smrg         surf->meta_size = dout.dccRamSize;
18447ec681f3Smrg         surf->meta_slice_size = dout.dccRamSliceSize;
18457ec681f3Smrg         surf->meta_alignment_log2 = util_logbase2(dout.dccRamBaseAlign);
18467ec681f3Smrg         surf->num_meta_levels = in->numMipLevels;
18477ec681f3Smrg
18487ec681f3Smrg         /* Disable DCC for levels that are in the mip tail.
18497ec681f3Smrg          *
18507ec681f3Smrg          * There are two issues that this is intended to
18517ec681f3Smrg          * address:
18527ec681f3Smrg          *
18537ec681f3Smrg          * 1. Multiple mip levels may share a cache line. This
18547ec681f3Smrg          *    can lead to corruption when switching between
18557ec681f3Smrg          *    rendering to different mip levels because the
18567ec681f3Smrg          *    RBs don't maintain coherency.
18577ec681f3Smrg          *
18587ec681f3Smrg          * 2. Texturing with metadata after rendering sometimes
18597ec681f3Smrg          *    fails with corruption, probably for a similar
18607ec681f3Smrg          *    reason.
18617ec681f3Smrg          *
18627ec681f3Smrg          * Working around these issues for all levels in the
18637ec681f3Smrg          * mip tail may be overly conservative, but it's what
18647ec681f3Smrg          * Vulkan does.
18657ec681f3Smrg          *
18667ec681f3Smrg          * Alternative solutions that also work but are worse:
18677ec681f3Smrg          * - Disable DCC entirely.
18687ec681f3Smrg          * - Flush TC L2 after rendering.
18697ec681f3Smrg          */
18707ec681f3Smrg         for (unsigned i = 0; i < in->numMipLevels; i++) {
18717ec681f3Smrg            surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset;
18727ec681f3Smrg            surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize;
18737ec681f3Smrg
18747ec681f3Smrg            if (meta_mip_info[i].inMiptail) {
18757ec681f3Smrg               /* GFX10 can only compress the first level
18767ec681f3Smrg                * in the mip tail.
18777ec681f3Smrg                *
18787ec681f3Smrg                * TODO: Try to do the same thing for gfx9
18797ec681f3Smrg                *       if there are no regressions.
18807ec681f3Smrg                */
18817ec681f3Smrg               if (info->chip_class >= GFX10)
18827ec681f3Smrg                  surf->num_meta_levels = i + 1;
18837ec681f3Smrg               else
18847ec681f3Smrg                  surf->num_meta_levels = i;
18857ec681f3Smrg               break;
18867ec681f3Smrg            }
18877ec681f3Smrg         }
18887ec681f3Smrg
18897ec681f3Smrg         if (!surf->num_meta_levels)
18907ec681f3Smrg            surf->meta_size = 0;
18917ec681f3Smrg
18927ec681f3Smrg         surf->u.gfx9.color.display_dcc_size = surf->meta_size;
18937ec681f3Smrg         surf->u.gfx9.color.display_dcc_alignment_log2 = surf->meta_alignment_log2;
18947ec681f3Smrg         surf->u.gfx9.color.display_dcc_pitch_max = surf->u.gfx9.color.dcc_pitch_max;
18957ec681f3Smrg         surf->u.gfx9.color.display_dcc_height = surf->u.gfx9.color.dcc_height;
18967ec681f3Smrg
18977ec681f3Smrg         if (in->resourceType == ADDR_RSRC_TEX_2D)
18987ec681f3Smrg            ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.dcc_equation);
18997ec681f3Smrg
19007ec681f3Smrg         /* Compute displayable DCC. */
19017ec681f3Smrg         if (((in->flags.display && info->use_display_dcc_with_retile_blit) ||
19027ec681f3Smrg              ac_modifier_has_dcc_retile(surf->modifier)) && surf->num_meta_levels) {
19037ec681f3Smrg            /* Compute displayable DCC info. */
19047ec681f3Smrg            din.dccKeyFlags.pipeAligned = 0;
19057ec681f3Smrg            din.dccKeyFlags.rbAligned = 0;
19067ec681f3Smrg
19077ec681f3Smrg            assert(din.numSlices == 1);
19087ec681f3Smrg            assert(din.numMipLevels == 1);
19097ec681f3Smrg            assert(din.numFrags == 1);
19107ec681f3Smrg            assert(surf->tile_swizzle == 0);
19117ec681f3Smrg            assert(surf->u.gfx9.color.dcc.pipe_aligned || surf->u.gfx9.color.dcc.rb_aligned);
19127ec681f3Smrg
19137ec681f3Smrg            ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
19147ec681f3Smrg            if (ret != ADDR_OK)
19157ec681f3Smrg               return ret;
19167ec681f3Smrg
19177ec681f3Smrg            surf->u.gfx9.color.display_dcc_size = dout.dccRamSize;
19187ec681f3Smrg            surf->u.gfx9.color.display_dcc_alignment_log2 = util_logbase2(dout.dccRamBaseAlign);
19197ec681f3Smrg            surf->u.gfx9.color.display_dcc_pitch_max = dout.pitch - 1;
19207ec681f3Smrg            surf->u.gfx9.color.display_dcc_height = dout.height;
19217ec681f3Smrg            assert(surf->u.gfx9.color.display_dcc_size <= surf->meta_size);
19227ec681f3Smrg
19237ec681f3Smrg            ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.display_dcc_equation);
19247ec681f3Smrg            surf->u.gfx9.color.dcc.display_equation_valid = true;
19257ec681f3Smrg         }
19267ec681f3Smrg      }
19277ec681f3Smrg
19287ec681f3Smrg      /* FMASK */
19297ec681f3Smrg      if (in->numSamples > 1 && info->has_graphics && !(surf->flags & RADEON_SURF_NO_FMASK)) {
19307ec681f3Smrg         ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0};
19317ec681f3Smrg         ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
19327ec681f3Smrg
19337ec681f3Smrg         fin.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT);
19347ec681f3Smrg         fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT);
19357ec681f3Smrg
19367ec681f3Smrg         ret = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, in, true, &fin.swizzleMode);
19377ec681f3Smrg         if (ret != ADDR_OK)
19387ec681f3Smrg            return ret;
19397ec681f3Smrg
19407ec681f3Smrg         fin.unalignedWidth = in->width;
19417ec681f3Smrg         fin.unalignedHeight = in->height;
19427ec681f3Smrg         fin.numSlices = in->numSlices;
19437ec681f3Smrg         fin.numSamples = in->numSamples;
19447ec681f3Smrg         fin.numFrags = in->numFrags;
19457ec681f3Smrg
19467ec681f3Smrg         ret = Addr2ComputeFmaskInfo(addrlib->handle, &fin, &fout);
19477ec681f3Smrg         if (ret != ADDR_OK)
19487ec681f3Smrg            return ret;
19497ec681f3Smrg
19507ec681f3Smrg         surf->u.gfx9.color.fmask_swizzle_mode = fin.swizzleMode;
19517ec681f3Smrg         surf->u.gfx9.color.fmask_epitch = fout.pitch - 1;
19527ec681f3Smrg         surf->fmask_size = fout.fmaskBytes;
19537ec681f3Smrg         surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign);
19547ec681f3Smrg         surf->fmask_slice_size = fout.sliceSize;
19557ec681f3Smrg
19567ec681f3Smrg         /* Compute tile swizzle for the FMASK surface. */
19577ec681f3Smrg         if (config->info.fmask_surf_index && fin.swizzleMode >= ADDR_SW_64KB_Z_T &&
19587ec681f3Smrg             !(surf->flags & RADEON_SURF_SHAREABLE)) {
19597ec681f3Smrg            ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
19607ec681f3Smrg            ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
19617ec681f3Smrg
19627ec681f3Smrg            xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
19637ec681f3Smrg            xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
19647ec681f3Smrg
19657ec681f3Smrg            /* This counter starts from 1 instead of 0. */
19667ec681f3Smrg            xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
19677ec681f3Smrg            xin.flags = in->flags;
19687ec681f3Smrg            xin.swizzleMode = fin.swizzleMode;
19697ec681f3Smrg            xin.resourceType = in->resourceType;
19707ec681f3Smrg            xin.format = in->format;
19717ec681f3Smrg            xin.numSamples = in->numSamples;
19727ec681f3Smrg            xin.numFrags = in->numFrags;
19737ec681f3Smrg
19747ec681f3Smrg            ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
19757ec681f3Smrg            if (ret != ADDR_OK)
19767ec681f3Smrg               return ret;
19777ec681f3Smrg
19787ec681f3Smrg            assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->fmask_tile_swizzle) * 8));
19797ec681f3Smrg            surf->fmask_tile_swizzle = xout.pipeBankXor;
19807ec681f3Smrg         }
19817ec681f3Smrg      }
19827ec681f3Smrg
19837ec681f3Smrg      /* CMASK -- on GFX10 only for FMASK */
19847ec681f3Smrg      if (in->swizzleMode != ADDR_SW_LINEAR && in->resourceType == ADDR_RSRC_TEX_2D &&
19857ec681f3Smrg          ((info->chip_class <= GFX9 && in->numSamples == 1 && in->flags.metaPipeUnaligned == 0 &&
19867ec681f3Smrg            in->flags.metaRbUnaligned == 0) ||
19877ec681f3Smrg           (surf->fmask_size && in->numSamples >= 2))) {
19887ec681f3Smrg         ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0};
19897ec681f3Smrg         ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0};
19907ec681f3Smrg         ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
19917ec681f3Smrg
19927ec681f3Smrg         cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);
19937ec681f3Smrg         cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);
19947ec681f3Smrg         cout.pMipInfo = meta_mip_info;
19957ec681f3Smrg
19967ec681f3Smrg         assert(in->flags.metaPipeUnaligned == 0);
19977ec681f3Smrg         assert(in->flags.metaRbUnaligned == 0);
19987ec681f3Smrg
19997ec681f3Smrg         cin.cMaskFlags.pipeAligned = 1;
20007ec681f3Smrg         cin.cMaskFlags.rbAligned = 1;
20017ec681f3Smrg         cin.resourceType = in->resourceType;
20027ec681f3Smrg         cin.unalignedWidth = in->width;
20037ec681f3Smrg         cin.unalignedHeight = in->height;
20047ec681f3Smrg         cin.numSlices = in->numSlices;
20057ec681f3Smrg         cin.numMipLevels = in->numMipLevels;
20067ec681f3Smrg         cin.firstMipIdInTail = out.firstMipIdInTail;
20077ec681f3Smrg
20087ec681f3Smrg         if (in->numSamples > 1)
20097ec681f3Smrg            cin.swizzleMode = surf->u.gfx9.color.fmask_swizzle_mode;
20107ec681f3Smrg         else
20117ec681f3Smrg            cin.swizzleMode = in->swizzleMode;
20127ec681f3Smrg
20137ec681f3Smrg         ret = Addr2ComputeCmaskInfo(addrlib->handle, &cin, &cout);
20147ec681f3Smrg         if (ret != ADDR_OK)
20157ec681f3Smrg            return ret;
20167ec681f3Smrg
20177ec681f3Smrg         surf->cmask_size = cout.cmaskBytes;
20187ec681f3Smrg         surf->cmask_alignment_log2 = util_logbase2(cout.baseAlign);
20197ec681f3Smrg         surf->cmask_slice_size = cout.sliceSize;
20207ec681f3Smrg         surf->cmask_pitch = cout.pitch;
20217ec681f3Smrg         surf->cmask_height = cout.height;
20227ec681f3Smrg         surf->u.gfx9.color.cmask_level0.offset = meta_mip_info[0].offset;
20237ec681f3Smrg         surf->u.gfx9.color.cmask_level0.size = meta_mip_info[0].sliceSize;
20247ec681f3Smrg
20257ec681f3Smrg         ac_copy_cmask_equation(info, &cout, &surf->u.gfx9.color.cmask_equation);
20267ec681f3Smrg      }
20277ec681f3Smrg   }
20287ec681f3Smrg
20297ec681f3Smrg   return 0;
20307ec681f3Smrg}
20317ec681f3Smrg
20327ec681f3Smrgstatic int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
20337ec681f3Smrg                                const struct ac_surf_config *config, enum radeon_surf_mode mode,
20347ec681f3Smrg                                struct radeon_surf *surf)
20357ec681f3Smrg{
20367ec681f3Smrg   bool compressed;
20377ec681f3Smrg   ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
20387ec681f3Smrg   int r;
20397ec681f3Smrg
20407ec681f3Smrg   AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
20417ec681f3Smrg
20427ec681f3Smrg   compressed = surf->blk_w == 4 && surf->blk_h == 4;
20437ec681f3Smrg
20447ec681f3Smrg   /* The format must be set correctly for the allocation of compressed
20457ec681f3Smrg    * textures to work. In other cases, setting the bpp is sufficient. */
20467ec681f3Smrg   if (compressed) {
20477ec681f3Smrg      switch (surf->bpe) {
20487ec681f3Smrg      case 8:
20497ec681f3Smrg         AddrSurfInfoIn.format = ADDR_FMT_BC1;
20507ec681f3Smrg         break;
20517ec681f3Smrg      case 16:
20527ec681f3Smrg         AddrSurfInfoIn.format = ADDR_FMT_BC3;
20537ec681f3Smrg         break;
20547ec681f3Smrg      default:
20557ec681f3Smrg         assert(0);
20567ec681f3Smrg      }
20577ec681f3Smrg   } else {
20587ec681f3Smrg      switch (surf->bpe) {
20597ec681f3Smrg      case 1:
20607ec681f3Smrg         assert(!(surf->flags & RADEON_SURF_ZBUFFER));
20617ec681f3Smrg         AddrSurfInfoIn.format = ADDR_FMT_8;
20627ec681f3Smrg         break;
20637ec681f3Smrg      case 2:
20647ec681f3Smrg         assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER));
20657ec681f3Smrg         AddrSurfInfoIn.format = ADDR_FMT_16;
20667ec681f3Smrg         break;
20677ec681f3Smrg      case 4:
20687ec681f3Smrg         assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER));
20697ec681f3Smrg         AddrSurfInfoIn.format = ADDR_FMT_32;
20707ec681f3Smrg         break;
20717ec681f3Smrg      case 8:
20727ec681f3Smrg         assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
20737ec681f3Smrg         AddrSurfInfoIn.format = ADDR_FMT_32_32;
20747ec681f3Smrg         break;
20757ec681f3Smrg      case 12:
20767ec681f3Smrg         assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
20777ec681f3Smrg         AddrSurfInfoIn.format = ADDR_FMT_32_32_32;
20787ec681f3Smrg         break;
20797ec681f3Smrg      case 16:
20807ec681f3Smrg         assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
20817ec681f3Smrg         AddrSurfInfoIn.format = ADDR_FMT_32_32_32_32;
20827ec681f3Smrg         break;
20837ec681f3Smrg      default:
20847ec681f3Smrg         assert(0);
20857ec681f3Smrg      }
20867ec681f3Smrg      AddrSurfInfoIn.bpp = surf->bpe * 8;
20877ec681f3Smrg   }
20887ec681f3Smrg
20897ec681f3Smrg   bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
20907ec681f3Smrg   AddrSurfInfoIn.flags.color = is_color_surface && !(surf->flags & RADEON_SURF_NO_RENDER_TARGET);
20917ec681f3Smrg   AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
20927ec681f3Smrg   AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
20937ec681f3Smrg   /* flags.texture currently refers to TC-compatible HTILE */
20947ec681f3Smrg   AddrSurfInfoIn.flags.texture = is_color_surface || surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
20957ec681f3Smrg   AddrSurfInfoIn.flags.opt4space = 1;
20967ec681f3Smrg   AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0;
20977ec681f3Smrg
20987ec681f3Smrg   AddrSurfInfoIn.numMipLevels = config->info.levels;
20997ec681f3Smrg   AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
21007ec681f3Smrg   AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples;
21017ec681f3Smrg
21027ec681f3Smrg   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER))
21037ec681f3Smrg      AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);
21047ec681f3Smrg
21057ec681f3Smrg   /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures
21067ec681f3Smrg    * as 2D to avoid having shader variants for 1D vs 2D, so all shaders
21077ec681f3Smrg    * must sample 1D textures as 2D. */
21087ec681f3Smrg   if (config->is_3d)
21097ec681f3Smrg      AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D;
21107ec681f3Smrg   else if (info->chip_class != GFX9 && config->is_1d)
21117ec681f3Smrg      AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_1D;
21127ec681f3Smrg   else
21137ec681f3Smrg      AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D;
21147ec681f3Smrg
21157ec681f3Smrg   AddrSurfInfoIn.width = config->info.width;
21167ec681f3Smrg   AddrSurfInfoIn.height = config->info.height;
21177ec681f3Smrg
21187ec681f3Smrg   if (config->is_3d)
21197ec681f3Smrg      AddrSurfInfoIn.numSlices = config->info.depth;
21207ec681f3Smrg   else if (config->is_cube)
21217ec681f3Smrg      AddrSurfInfoIn.numSlices = 6;
21227ec681f3Smrg   else
21237ec681f3Smrg      AddrSurfInfoIn.numSlices = config->info.array_size;
21247ec681f3Smrg
21257ec681f3Smrg   /* This is propagated to DCC. It must be 0 for HTILE and CMASK. */
21267ec681f3Smrg   AddrSurfInfoIn.flags.metaPipeUnaligned = 0;
21277ec681f3Smrg   AddrSurfInfoIn.flags.metaRbUnaligned = 0;
21287ec681f3Smrg
21297ec681f3Smrg   if (ac_modifier_has_dcc(surf->modifier)) {
21307ec681f3Smrg      ac_modifier_fill_dcc_params(surf->modifier, surf, &AddrSurfInfoIn);
21317ec681f3Smrg   } else if (!AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.stencil) {
21327ec681f3Smrg      /* Optimal values for the L2 cache. */
21337ec681f3Smrg      /* Don't change the DCC settings for imported buffers - they might differ. */
21347ec681f3Smrg      if (!(surf->flags & RADEON_SURF_IMPORTED)) {
21357ec681f3Smrg         if (info->chip_class == GFX9) {
21367ec681f3Smrg            surf->u.gfx9.color.dcc.independent_64B_blocks = 1;
21377ec681f3Smrg            surf->u.gfx9.color.dcc.independent_128B_blocks = 0;
21387ec681f3Smrg            surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
21397ec681f3Smrg         } else if (info->chip_class >= GFX10) {
21407ec681f3Smrg            surf->u.gfx9.color.dcc.independent_64B_blocks = 0;
21417ec681f3Smrg            surf->u.gfx9.color.dcc.independent_128B_blocks = 1;
21427ec681f3Smrg            surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
21437ec681f3Smrg         }
21447ec681f3Smrg      }
21457ec681f3Smrg
21467ec681f3Smrg      if (AddrSurfInfoIn.flags.display) {
21477ec681f3Smrg         /* The display hardware can only read DCC with RB_ALIGNED=0 and
21487ec681f3Smrg          * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.
21497ec681f3Smrg          *
21507ec681f3Smrg          * The CB block requires RB_ALIGNED=1 except 1 RB chips.
21517ec681f3Smrg          * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes
21527ec681f3Smrg          * after rendering, so PIPE_ALIGNED=1 is recommended.
21537ec681f3Smrg          */
21547ec681f3Smrg         if (info->use_display_dcc_unaligned) {
21557ec681f3Smrg            AddrSurfInfoIn.flags.metaPipeUnaligned = 1;
21567ec681f3Smrg            AddrSurfInfoIn.flags.metaRbUnaligned = 1;
21577ec681f3Smrg         }
21587ec681f3Smrg
21597ec681f3Smrg         /* Adjust DCC settings to meet DCN requirements. */
21607ec681f3Smrg         /* Don't change the DCC settings for imported buffers - they might differ. */
21617ec681f3Smrg         if (!(surf->flags & RADEON_SURF_IMPORTED) &&
21627ec681f3Smrg             (info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit)) {
21637ec681f3Smrg            /* Only Navi12/14 support independent 64B blocks in L2,
21647ec681f3Smrg             * but without DCC image stores.
21657ec681f3Smrg             */
21667ec681f3Smrg            if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) {
21677ec681f3Smrg               surf->u.gfx9.color.dcc.independent_64B_blocks = 1;
21687ec681f3Smrg               surf->u.gfx9.color.dcc.independent_128B_blocks = 0;
21697ec681f3Smrg               surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
21707ec681f3Smrg            }
21717ec681f3Smrg
21727ec681f3Smrg            if ((info->chip_class >= GFX10_3 && info->family <= CHIP_YELLOW_CARP) ||
21737ec681f3Smrg                /* Newer chips will skip this when possible to get better performance.
21747ec681f3Smrg                 * This is also possible for other gfx10.3 chips, but is disabled for
21757ec681f3Smrg                 * interoperability between different Mesa versions.
21767ec681f3Smrg                 */
21777ec681f3Smrg                (info->family > CHIP_YELLOW_CARP &&
21787ec681f3Smrg                 gfx10_DCN_requires_independent_64B_blocks(info, config))) {
21797ec681f3Smrg               surf->u.gfx9.color.dcc.independent_64B_blocks = 1;
21807ec681f3Smrg               surf->u.gfx9.color.dcc.independent_128B_blocks = 1;
21817ec681f3Smrg               surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
21827ec681f3Smrg            }
21837ec681f3Smrg         }
21847ec681f3Smrg      }
21857ec681f3Smrg   }
21867ec681f3Smrg
21877ec681f3Smrg   if (surf->modifier == DRM_FORMAT_MOD_INVALID) {
21887ec681f3Smrg      switch (mode) {
21897ec681f3Smrg      case RADEON_SURF_MODE_LINEAR_ALIGNED:
21907ec681f3Smrg         assert(config->info.samples <= 1);
21917ec681f3Smrg         assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
21927ec681f3Smrg         AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR;
21937ec681f3Smrg         break;
21947ec681f3Smrg
21957ec681f3Smrg      case RADEON_SURF_MODE_1D:
21967ec681f3Smrg      case RADEON_SURF_MODE_2D:
21977ec681f3Smrg         if (surf->flags & RADEON_SURF_IMPORTED ||
21987ec681f3Smrg             (info->chip_class >= GFX10 && surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE)) {
21997ec681f3Smrg            AddrSurfInfoIn.swizzleMode = surf->u.gfx9.swizzle_mode;
22007ec681f3Smrg            break;
22017ec681f3Smrg         }
22027ec681f3Smrg
22037ec681f3Smrg         r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false,
22047ec681f3Smrg                                             &AddrSurfInfoIn.swizzleMode);
22057ec681f3Smrg         if (r)
22067ec681f3Smrg            return r;
22077ec681f3Smrg         break;
22087ec681f3Smrg
22097ec681f3Smrg      default:
22107ec681f3Smrg         assert(0);
22117ec681f3Smrg      }
22127ec681f3Smrg   } else {
22137ec681f3Smrg      /* We have a valid and required modifier here. */
22147ec681f3Smrg
22157ec681f3Smrg      assert(!compressed);
22167ec681f3Smrg      assert(!ac_modifier_has_dcc(surf->modifier) ||
22177ec681f3Smrg             !(surf->flags & RADEON_SURF_DISABLE_DCC));
22187ec681f3Smrg
22197ec681f3Smrg      AddrSurfInfoIn.swizzleMode = ac_modifier_gfx9_swizzle_mode(surf->modifier);
22207ec681f3Smrg   }
22217ec681f3Smrg
22227ec681f3Smrg   surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType;
22237ec681f3Smrg   surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
22247ec681f3Smrg
22257ec681f3Smrg   surf->num_meta_levels = 0;
22267ec681f3Smrg   surf->surf_size = 0;
22277ec681f3Smrg   surf->fmask_size = 0;
22287ec681f3Smrg   surf->meta_size = 0;
22297ec681f3Smrg   surf->meta_slice_size = 0;
22307ec681f3Smrg   surf->u.gfx9.surf_offset = 0;
22317ec681f3Smrg   if (AddrSurfInfoIn.flags.stencil)
22327ec681f3Smrg      surf->u.gfx9.zs.stencil_offset = 0;
22337ec681f3Smrg   surf->cmask_size = 0;
22347ec681f3Smrg
22357ec681f3Smrg   const bool only_stencil =
22367ec681f3Smrg      (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER);
22377ec681f3Smrg
22387ec681f3Smrg   /* Calculate texture layout information. */
22397ec681f3Smrg   if (!only_stencil) {
22407ec681f3Smrg      r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn);
22417ec681f3Smrg      if (r)
22427ec681f3Smrg         return r;
22437ec681f3Smrg   }
22447ec681f3Smrg
22457ec681f3Smrg   /* Calculate texture layout information for stencil. */
22467ec681f3Smrg   if (surf->flags & RADEON_SURF_SBUFFER) {
22477ec681f3Smrg      AddrSurfInfoIn.flags.stencil = 1;
22487ec681f3Smrg      AddrSurfInfoIn.bpp = 8;
22497ec681f3Smrg      AddrSurfInfoIn.format = ADDR_FMT_8;
22507ec681f3Smrg
22517ec681f3Smrg      if (!AddrSurfInfoIn.flags.depth) {
22527ec681f3Smrg         r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false,
22537ec681f3Smrg                                             &AddrSurfInfoIn.swizzleMode);
22547ec681f3Smrg         if (r)
22557ec681f3Smrg            return r;
22567ec681f3Smrg      } else
22577ec681f3Smrg         AddrSurfInfoIn.flags.depth = 0;
22587ec681f3Smrg
22597ec681f3Smrg      r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn);
22607ec681f3Smrg      if (r)
22617ec681f3Smrg         return r;
22627ec681f3Smrg   }
22637ec681f3Smrg
22647ec681f3Smrg   surf->is_linear = surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR;
22657ec681f3Smrg
22667ec681f3Smrg   /* Query whether the surface is displayable. */
22677ec681f3Smrg   /* This is only useful for surfaces that are allocated without SCANOUT. */
22687ec681f3Smrg   BOOL_32 displayable = false;
22697ec681f3Smrg   if (!config->is_3d && !config->is_cube) {
22707ec681f3Smrg      r = Addr2IsValidDisplaySwizzleMode(addrlib->handle, surf->u.gfx9.swizzle_mode,
22717ec681f3Smrg                                         surf->bpe * 8, &displayable);
22727ec681f3Smrg      if (r)
22737ec681f3Smrg         return r;
22747ec681f3Smrg
22757ec681f3Smrg      /* Display needs unaligned DCC. */
22767ec681f3Smrg      if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
22777ec681f3Smrg          surf->num_meta_levels &&
22787ec681f3Smrg          (!is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned,
22797ec681f3Smrg                                    surf->u.gfx9.color.dcc.pipe_aligned) ||
22807ec681f3Smrg           /* Don't set is_displayable if displayable DCC is missing. */
22817ec681f3Smrg           (info->use_display_dcc_with_retile_blit && !surf->u.gfx9.color.dcc.display_equation_valid)))
22827ec681f3Smrg         displayable = false;
22837ec681f3Smrg   }
22847ec681f3Smrg   surf->is_displayable = displayable;
22857ec681f3Smrg
22867ec681f3Smrg   /* Validate that we allocated a displayable surface if requested. */
22877ec681f3Smrg   assert(!AddrSurfInfoIn.flags.display || surf->is_displayable);
22887ec681f3Smrg
22897ec681f3Smrg   /* Validate that DCC is set up correctly. */
22907ec681f3Smrg   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->num_meta_levels) {
22917ec681f3Smrg      assert(is_dcc_supported_by_L2(info, surf));
22927ec681f3Smrg      if (AddrSurfInfoIn.flags.color)
22937ec681f3Smrg         assert(is_dcc_supported_by_CB(info, surf->u.gfx9.swizzle_mode));
22947ec681f3Smrg      if (AddrSurfInfoIn.flags.display) {
22957ec681f3Smrg         assert(is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned,
22967ec681f3Smrg                                        surf->u.gfx9.color.dcc.pipe_aligned));
22977ec681f3Smrg      }
22987ec681f3Smrg   }
22997ec681f3Smrg
23007ec681f3Smrg   if (info->has_graphics && !compressed && !config->is_3d && config->info.levels == 1 &&
23017ec681f3Smrg       AddrSurfInfoIn.flags.color && !surf->is_linear &&
23027ec681f3Smrg       (1 << surf->surf_alignment_log2) >= 64 * 1024 && /* 64KB tiling */
23037ec681f3Smrg       !(surf->flags & (RADEON_SURF_DISABLE_DCC | RADEON_SURF_FORCE_SWIZZLE_MODE |
23047ec681f3Smrg                        RADEON_SURF_FORCE_MICRO_TILE_MODE)) &&
23057ec681f3Smrg       (surf->modifier == DRM_FORMAT_MOD_INVALID ||
23067ec681f3Smrg        ac_modifier_has_dcc(surf->modifier)) &&
23077ec681f3Smrg       is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned,
23087ec681f3Smrg                               surf->u.gfx9.color.dcc.pipe_aligned)) {
23097ec681f3Smrg      /* Validate that DCC is enabled if DCN can do it. */
23107ec681f3Smrg      if ((info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit) &&
23117ec681f3Smrg          AddrSurfInfoIn.flags.display && surf->bpe == 4) {
23127ec681f3Smrg         assert(surf->num_meta_levels);
23137ec681f3Smrg      }
23147ec681f3Smrg
23157ec681f3Smrg      /* Validate that non-scanout DCC is always enabled. */
23167ec681f3Smrg      if (!AddrSurfInfoIn.flags.display)
23177ec681f3Smrg         assert(surf->num_meta_levels);
23187ec681f3Smrg   }
23197ec681f3Smrg
23207ec681f3Smrg   if (!surf->meta_size) {
23217ec681f3Smrg      /* Unset this if HTILE is not present. */
23227ec681f3Smrg      surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
23237ec681f3Smrg   }
23247ec681f3Smrg
23257ec681f3Smrg   switch (surf->u.gfx9.swizzle_mode) {
23267ec681f3Smrg   /* S = standard. */
23277ec681f3Smrg   case ADDR_SW_256B_S:
23287ec681f3Smrg   case ADDR_SW_4KB_S:
23297ec681f3Smrg   case ADDR_SW_64KB_S:
23307ec681f3Smrg   case ADDR_SW_64KB_S_T:
23317ec681f3Smrg   case ADDR_SW_4KB_S_X:
23327ec681f3Smrg   case ADDR_SW_64KB_S_X:
23337ec681f3Smrg      surf->micro_tile_mode = RADEON_MICRO_MODE_STANDARD;
23347ec681f3Smrg      break;
23357ec681f3Smrg
23367ec681f3Smrg   /* D = display. */
23377ec681f3Smrg   case ADDR_SW_LINEAR:
23387ec681f3Smrg   case ADDR_SW_256B_D:
23397ec681f3Smrg   case ADDR_SW_4KB_D:
23407ec681f3Smrg   case ADDR_SW_64KB_D:
23417ec681f3Smrg   case ADDR_SW_64KB_D_T:
23427ec681f3Smrg   case ADDR_SW_4KB_D_X:
23437ec681f3Smrg   case ADDR_SW_64KB_D_X:
23447ec681f3Smrg      surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY;
23457ec681f3Smrg      break;
23467ec681f3Smrg
23477ec681f3Smrg   /* R = rotated (gfx9), render target (gfx10). */
23487ec681f3Smrg   case ADDR_SW_256B_R:
23497ec681f3Smrg   case ADDR_SW_4KB_R:
23507ec681f3Smrg   case ADDR_SW_64KB_R:
23517ec681f3Smrg   case ADDR_SW_64KB_R_T:
23527ec681f3Smrg   case ADDR_SW_4KB_R_X:
23537ec681f3Smrg   case ADDR_SW_64KB_R_X:
23547ec681f3Smrg   case ADDR_SW_VAR_R_X:
23557ec681f3Smrg      /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
23567ec681f3Smrg       * used at the same time. We currently do not use rotated
23577ec681f3Smrg       * in gfx9.
23587ec681f3Smrg       */
23597ec681f3Smrg      assert(info->chip_class >= GFX10 || !"rotate micro tile mode is unsupported");
23607ec681f3Smrg      surf->micro_tile_mode = RADEON_MICRO_MODE_RENDER;
23617ec681f3Smrg      break;
23627ec681f3Smrg
23637ec681f3Smrg   /* Z = depth. */
23647ec681f3Smrg   case ADDR_SW_4KB_Z:
23657ec681f3Smrg   case ADDR_SW_64KB_Z:
23667ec681f3Smrg   case ADDR_SW_64KB_Z_T:
23677ec681f3Smrg   case ADDR_SW_4KB_Z_X:
23687ec681f3Smrg   case ADDR_SW_64KB_Z_X:
23697ec681f3Smrg   case ADDR_SW_VAR_Z_X:
23707ec681f3Smrg      surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH;
23717ec681f3Smrg      break;
23727ec681f3Smrg
23737ec681f3Smrg   default:
23747ec681f3Smrg      assert(0);
23757ec681f3Smrg   }
23767ec681f3Smrg
23777ec681f3Smrg   return 0;
23787ec681f3Smrg}
237901e04c3fSmrg
23807ec681f3Smrgint ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
23817ec681f3Smrg                       const struct ac_surf_config *config, enum radeon_surf_mode mode,
23827ec681f3Smrg                       struct radeon_surf *surf)
23837ec681f3Smrg{
23847ec681f3Smrg   int r;
23857ec681f3Smrg
23867ec681f3Smrg   r = surf_config_sanity(config, surf->flags);
23877ec681f3Smrg   if (r)
23887ec681f3Smrg      return r;
23897ec681f3Smrg
23907ec681f3Smrg   if (info->family_id >= FAMILY_AI)
23917ec681f3Smrg      r = gfx9_compute_surface(addrlib, info, config, mode, surf);
23927ec681f3Smrg   else
23937ec681f3Smrg      r = gfx6_compute_surface(addrlib->handle, info, config, mode, surf);
23947ec681f3Smrg
23957ec681f3Smrg   if (r)
23967ec681f3Smrg      return r;
23977ec681f3Smrg
23987ec681f3Smrg   /* Determine the memory layout of multiple allocations in one buffer. */
23997ec681f3Smrg   surf->total_size = surf->surf_size;
24007ec681f3Smrg   surf->alignment_log2 = surf->surf_alignment_log2;
24017ec681f3Smrg
24027ec681f3Smrg   /* Ensure the offsets are always 0 if not available. */
24037ec681f3Smrg   surf->meta_offset = surf->display_dcc_offset = surf->fmask_offset = surf->cmask_offset = 0;
24047ec681f3Smrg
24057ec681f3Smrg   if (surf->fmask_size) {
24067ec681f3Smrg      assert(config->info.samples >= 2);
24077ec681f3Smrg      surf->fmask_offset = align64(surf->total_size, 1 << surf->fmask_alignment_log2);
24087ec681f3Smrg      surf->total_size = surf->fmask_offset + surf->fmask_size;
24097ec681f3Smrg      surf->alignment_log2 = MAX2(surf->alignment_log2, surf->fmask_alignment_log2);
24107ec681f3Smrg   }
24117ec681f3Smrg
24127ec681f3Smrg   /* Single-sample CMASK is in a separate buffer. */
24137ec681f3Smrg   if (surf->cmask_size && config->info.samples >= 2) {
24147ec681f3Smrg      surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2);
24157ec681f3Smrg      surf->total_size = surf->cmask_offset + surf->cmask_size;
24167ec681f3Smrg      surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
24177ec681f3Smrg   }
24187ec681f3Smrg
24197ec681f3Smrg   if (surf->is_displayable)
24207ec681f3Smrg      surf->flags |= RADEON_SURF_SCANOUT;
24217ec681f3Smrg
24227ec681f3Smrg   if (surf->meta_size &&
24237ec681f3Smrg       /* dcc_size is computed on GFX9+ only if it's displayable. */
24247ec681f3Smrg       (info->chip_class >= GFX9 || !get_display_flag(config, surf))) {
24257ec681f3Smrg      /* It's better when displayable DCC is immediately after
24267ec681f3Smrg       * the image due to hw-specific reasons.
24277ec681f3Smrg       */
24287ec681f3Smrg      if (info->chip_class >= GFX9 &&
24297ec681f3Smrg          !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
24307ec681f3Smrg          surf->u.gfx9.color.dcc.display_equation_valid) {
24317ec681f3Smrg         /* Add space for the displayable DCC buffer. */
24327ec681f3Smrg         surf->display_dcc_offset = align64(surf->total_size, 1 << surf->u.gfx9.color.display_dcc_alignment_log2);
24337ec681f3Smrg         surf->total_size = surf->display_dcc_offset + surf->u.gfx9.color.display_dcc_size;
24347ec681f3Smrg      }
24357ec681f3Smrg
24367ec681f3Smrg      surf->meta_offset = align64(surf->total_size, 1 << surf->meta_alignment_log2);
24377ec681f3Smrg      surf->total_size = surf->meta_offset + surf->meta_size;
24387ec681f3Smrg      surf->alignment_log2 = MAX2(surf->alignment_log2, surf->meta_alignment_log2);
24397ec681f3Smrg   }
24407ec681f3Smrg
24417ec681f3Smrg   return 0;
24427ec681f3Smrg}
24437ec681f3Smrg
24447ec681f3Smrg/* This is meant to be used for disabling DCC. */
24457ec681f3Smrgvoid ac_surface_zero_dcc_fields(struct radeon_surf *surf)
24467ec681f3Smrg{
24477ec681f3Smrg   if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
24487ec681f3Smrg      return;
24497ec681f3Smrg
24507ec681f3Smrg   surf->meta_offset = 0;
24517ec681f3Smrg   surf->display_dcc_offset = 0;
24527ec681f3Smrg   if (!surf->fmask_offset && !surf->cmask_offset) {
24537ec681f3Smrg      surf->total_size = surf->surf_size;
24547ec681f3Smrg      surf->alignment_log2 = surf->surf_alignment_log2;
24557ec681f3Smrg   }
24567ec681f3Smrg}
24577ec681f3Smrg
24587ec681f3Smrgstatic unsigned eg_tile_split(unsigned tile_split)
24597ec681f3Smrg{
24607ec681f3Smrg   switch (tile_split) {
24617ec681f3Smrg   case 0:
24627ec681f3Smrg      tile_split = 64;
24637ec681f3Smrg      break;
24647ec681f3Smrg   case 1:
24657ec681f3Smrg      tile_split = 128;
24667ec681f3Smrg      break;
24677ec681f3Smrg   case 2:
24687ec681f3Smrg      tile_split = 256;
24697ec681f3Smrg      break;
24707ec681f3Smrg   case 3:
24717ec681f3Smrg      tile_split = 512;
24727ec681f3Smrg      break;
24737ec681f3Smrg   default:
24747ec681f3Smrg   case 4:
24757ec681f3Smrg      tile_split = 1024;
24767ec681f3Smrg      break;
24777ec681f3Smrg   case 5:
24787ec681f3Smrg      tile_split = 2048;
24797ec681f3Smrg      break;
24807ec681f3Smrg   case 6:
24817ec681f3Smrg      tile_split = 4096;
24827ec681f3Smrg      break;
24837ec681f3Smrg   }
24847ec681f3Smrg   return tile_split;
24857ec681f3Smrg}
24867ec681f3Smrg
24877ec681f3Smrgstatic unsigned eg_tile_split_rev(unsigned eg_tile_split)
24887ec681f3Smrg{
24897ec681f3Smrg   switch (eg_tile_split) {
24907ec681f3Smrg   case 64:
24917ec681f3Smrg      return 0;
24927ec681f3Smrg   case 128:
24937ec681f3Smrg      return 1;
24947ec681f3Smrg   case 256:
24957ec681f3Smrg      return 2;
24967ec681f3Smrg   case 512:
24977ec681f3Smrg      return 3;
24987ec681f3Smrg   default:
24997ec681f3Smrg   case 1024:
25007ec681f3Smrg      return 4;
25017ec681f3Smrg   case 2048:
25027ec681f3Smrg      return 5;
25037ec681f3Smrg   case 4096:
25047ec681f3Smrg      return 6;
25057ec681f3Smrg   }
25067ec681f3Smrg}
25077ec681f3Smrg
25087ec681f3Smrg#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
25097ec681f3Smrg#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK  0x3
25107ec681f3Smrg
25117ec681f3Smrg/* This should be called before ac_compute_surface. */
25127ec681f3Smrgvoid ac_surface_set_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
25137ec681f3Smrg                                uint64_t tiling_flags, enum radeon_surf_mode *mode)
25147ec681f3Smrg{
25157ec681f3Smrg   bool scanout;
25167ec681f3Smrg
25177ec681f3Smrg   if (info->chip_class >= GFX9) {
25187ec681f3Smrg      surf->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
25197ec681f3Smrg      surf->u.gfx9.color.dcc.independent_64B_blocks =
25207ec681f3Smrg         AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B);
25217ec681f3Smrg      surf->u.gfx9.color.dcc.independent_128B_blocks =
25227ec681f3Smrg         AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B);
25237ec681f3Smrg      surf->u.gfx9.color.dcc.max_compressed_block_size =
25247ec681f3Smrg         AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE);
25257ec681f3Smrg      surf->u.gfx9.color.display_dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX);
25267ec681f3Smrg      scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
25277ec681f3Smrg      *mode =
25287ec681f3Smrg         surf->u.gfx9.swizzle_mode > 0 ? RADEON_SURF_MODE_2D : RADEON_SURF_MODE_LINEAR_ALIGNED;
25297ec681f3Smrg   } else {
25307ec681f3Smrg      surf->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
25317ec681f3Smrg      surf->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
25327ec681f3Smrg      surf->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
25337ec681f3Smrg      surf->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
25347ec681f3Smrg      surf->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
25357ec681f3Smrg      surf->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
25367ec681f3Smrg      scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
25377ec681f3Smrg
25387ec681f3Smrg      if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
25397ec681f3Smrg         *mode = RADEON_SURF_MODE_2D;
25407ec681f3Smrg      else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
25417ec681f3Smrg         *mode = RADEON_SURF_MODE_1D;
25427ec681f3Smrg      else
25437ec681f3Smrg         *mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
25447ec681f3Smrg   }
25457ec681f3Smrg
25467ec681f3Smrg   if (scanout)
25477ec681f3Smrg      surf->flags |= RADEON_SURF_SCANOUT;
25487ec681f3Smrg   else
25497ec681f3Smrg      surf->flags &= ~RADEON_SURF_SCANOUT;
25507ec681f3Smrg}
25517ec681f3Smrg
25527ec681f3Smrgvoid ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
25537ec681f3Smrg                                uint64_t *tiling_flags)
25547ec681f3Smrg{
25557ec681f3Smrg   *tiling_flags = 0;
25567ec681f3Smrg
25577ec681f3Smrg   if (info->chip_class >= GFX9) {
25587ec681f3Smrg      uint64_t dcc_offset = 0;
25597ec681f3Smrg
25607ec681f3Smrg      if (surf->meta_offset) {
25617ec681f3Smrg         dcc_offset = surf->display_dcc_offset ? surf->display_dcc_offset : surf->meta_offset;
25627ec681f3Smrg         assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24));
25637ec681f3Smrg      }
25647ec681f3Smrg
25657ec681f3Smrg      *tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, surf->u.gfx9.swizzle_mode);
25667ec681f3Smrg      *tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, dcc_offset >> 8);
25677ec681f3Smrg      *tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, surf->u.gfx9.color.display_dcc_pitch_max);
25687ec681f3Smrg      *tiling_flags |=
25697ec681f3Smrg         AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, surf->u.gfx9.color.dcc.independent_64B_blocks);
25707ec681f3Smrg      *tiling_flags |=
25717ec681f3Smrg         AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, surf->u.gfx9.color.dcc.independent_128B_blocks);
25727ec681f3Smrg      *tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE,
25737ec681f3Smrg                                         surf->u.gfx9.color.dcc.max_compressed_block_size);
25747ec681f3Smrg      *tiling_flags |= AMDGPU_TILING_SET(SCANOUT, (surf->flags & RADEON_SURF_SCANOUT) != 0);
25757ec681f3Smrg   } else {
25767ec681f3Smrg      if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
25777ec681f3Smrg         *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
25787ec681f3Smrg      else if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)
25797ec681f3Smrg         *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
25807ec681f3Smrg      else
25817ec681f3Smrg         *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
25827ec681f3Smrg
25837ec681f3Smrg      *tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, surf->u.legacy.pipe_config);
25847ec681f3Smrg      *tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(surf->u.legacy.bankw));
25857ec681f3Smrg      *tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(surf->u.legacy.bankh));
25867ec681f3Smrg      if (surf->u.legacy.tile_split)
25877ec681f3Smrg         *tiling_flags |=
25887ec681f3Smrg            AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(surf->u.legacy.tile_split));
25897ec681f3Smrg      *tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(surf->u.legacy.mtilea));
25907ec681f3Smrg      *tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(surf->u.legacy.num_banks) - 1);
25917ec681f3Smrg
25927ec681f3Smrg      if (surf->flags & RADEON_SURF_SCANOUT)
25937ec681f3Smrg         *tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
25947ec681f3Smrg      else
25957ec681f3Smrg         *tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
25967ec681f3Smrg   }
25977ec681f3Smrg}
25987ec681f3Smrg
25997ec681f3Smrgstatic uint32_t ac_get_umd_metadata_word1(const struct radeon_info *info)
26007ec681f3Smrg{
26017ec681f3Smrg   return (ATI_VENDOR_ID << 16) | info->pci_id;
26027ec681f3Smrg}
26037ec681f3Smrg
26047ec681f3Smrg/* This should be called after ac_compute_surface. */
26057ec681f3Smrgbool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
26067ec681f3Smrg                                 unsigned num_storage_samples, unsigned num_mipmap_levels,
26077ec681f3Smrg                                 unsigned size_metadata, const uint32_t metadata[64])
26087ec681f3Smrg{
26097ec681f3Smrg   const uint32_t *desc = &metadata[2];
26107ec681f3Smrg   uint64_t offset;
26117ec681f3Smrg
26127ec681f3Smrg   if (surf->modifier != DRM_FORMAT_MOD_INVALID)
26137ec681f3Smrg      return true;
26147ec681f3Smrg
26157ec681f3Smrg   if (info->chip_class >= GFX9)
26167ec681f3Smrg      offset = surf->u.gfx9.surf_offset;
26177ec681f3Smrg   else
26187ec681f3Smrg      offset = (uint64_t)surf->u.legacy.level[0].offset_256B * 256;
26197ec681f3Smrg
26207ec681f3Smrg   if (offset ||                 /* Non-zero planes ignore metadata. */
26217ec681f3Smrg       size_metadata < 10 * 4 || /* at least 2(header) + 8(desc) dwords */
26227ec681f3Smrg       metadata[0] == 0 ||       /* invalid version number */
26237ec681f3Smrg       metadata[1] != ac_get_umd_metadata_word1(info)) /* invalid PCI ID */ {
26247ec681f3Smrg      /* Disable DCC because it might not be enabled. */
26257ec681f3Smrg      ac_surface_zero_dcc_fields(surf);
26267ec681f3Smrg
26277ec681f3Smrg      /* Don't report an error if the texture comes from an incompatible driver,
26287ec681f3Smrg       * but this might not work.
26297ec681f3Smrg       */
26307ec681f3Smrg      return true;
26317ec681f3Smrg   }
26327ec681f3Smrg
26337ec681f3Smrg   /* Validate that sample counts and the number of mipmap levels match. */
26347ec681f3Smrg   unsigned desc_last_level = G_008F1C_LAST_LEVEL(desc[3]);
26357ec681f3Smrg   unsigned type = G_008F1C_TYPE(desc[3]);
26367ec681f3Smrg
26377ec681f3Smrg   if (type == V_008F1C_SQ_RSRC_IMG_2D_MSAA || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
26387ec681f3Smrg      unsigned log_samples = util_logbase2(MAX2(1, num_storage_samples));
26397ec681f3Smrg
26407ec681f3Smrg      if (desc_last_level != log_samples) {
26417ec681f3Smrg         fprintf(stderr,
26427ec681f3Smrg                 "amdgpu: invalid MSAA texture import, "
26437ec681f3Smrg                 "metadata has log2(samples) = %u, the caller set %u\n",
26447ec681f3Smrg                 desc_last_level, log_samples);
26457ec681f3Smrg         return false;
26467ec681f3Smrg      }
26477ec681f3Smrg   } else {
26487ec681f3Smrg      if (desc_last_level != num_mipmap_levels - 1) {
26497ec681f3Smrg         fprintf(stderr,
26507ec681f3Smrg                 "amdgpu: invalid mipmapped texture import, "
26517ec681f3Smrg                 "metadata has last_level = %u, the caller set %u\n",
26527ec681f3Smrg                 desc_last_level, num_mipmap_levels - 1);
26537ec681f3Smrg         return false;
26547ec681f3Smrg      }
26557ec681f3Smrg   }
26567ec681f3Smrg
26577ec681f3Smrg   if (info->chip_class >= GFX8 && G_008F28_COMPRESSION_EN(desc[6])) {
26587ec681f3Smrg      /* Read DCC information. */
26597ec681f3Smrg      switch (info->chip_class) {
26607ec681f3Smrg      case GFX8:
26617ec681f3Smrg         surf->meta_offset = (uint64_t)desc[7] << 8;
26627ec681f3Smrg         break;
26637ec681f3Smrg
26647ec681f3Smrg      case GFX9:
26657ec681f3Smrg         surf->meta_offset =
26667ec681f3Smrg            ((uint64_t)desc[7] << 8) | ((uint64_t)G_008F24_META_DATA_ADDRESS(desc[5]) << 40);
26677ec681f3Smrg         surf->u.gfx9.color.dcc.pipe_aligned = G_008F24_META_PIPE_ALIGNED(desc[5]);
26687ec681f3Smrg         surf->u.gfx9.color.dcc.rb_aligned = G_008F24_META_RB_ALIGNED(desc[5]);
26697ec681f3Smrg
26707ec681f3Smrg         /* If DCC is unaligned, this can only be a displayable image. */
26717ec681f3Smrg         if (!surf->u.gfx9.color.dcc.pipe_aligned && !surf->u.gfx9.color.dcc.rb_aligned)
26727ec681f3Smrg            assert(surf->is_displayable);
26737ec681f3Smrg         break;
26747ec681f3Smrg
26757ec681f3Smrg      case GFX10:
26767ec681f3Smrg      case GFX10_3:
26777ec681f3Smrg         surf->meta_offset =
26787ec681f3Smrg            ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16);
26797ec681f3Smrg         surf->u.gfx9.color.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]);
26807ec681f3Smrg         break;
26817ec681f3Smrg
26827ec681f3Smrg      default:
26837ec681f3Smrg         assert(0);
26847ec681f3Smrg         return false;
26857ec681f3Smrg      }
26867ec681f3Smrg   } else {
26877ec681f3Smrg      /* Disable DCC. dcc_offset is always set by texture_from_handle
26887ec681f3Smrg       * and must be cleared here.
26897ec681f3Smrg       */
26907ec681f3Smrg      ac_surface_zero_dcc_fields(surf);
26917ec681f3Smrg   }
26927ec681f3Smrg
26937ec681f3Smrg   return true;
26947ec681f3Smrg}
26957ec681f3Smrg
26967ec681f3Smrgvoid ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
26977ec681f3Smrg                                 unsigned num_mipmap_levels, uint32_t desc[8],
26987ec681f3Smrg                                 unsigned *size_metadata, uint32_t metadata[64])
26997ec681f3Smrg{
27007ec681f3Smrg   /* Clear the base address and set the relative DCC offset. */
27017ec681f3Smrg   desc[0] = 0;
27027ec681f3Smrg   desc[1] &= C_008F14_BASE_ADDRESS_HI;
27037ec681f3Smrg
27047ec681f3Smrg   switch (info->chip_class) {
27057ec681f3Smrg   case GFX6:
27067ec681f3Smrg   case GFX7:
27077ec681f3Smrg      break;
27087ec681f3Smrg   case GFX8:
27097ec681f3Smrg      desc[7] = surf->meta_offset >> 8;
27107ec681f3Smrg      break;
27117ec681f3Smrg   case GFX9:
27127ec681f3Smrg      desc[7] = surf->meta_offset >> 8;
27137ec681f3Smrg      desc[5] &= C_008F24_META_DATA_ADDRESS;
27147ec681f3Smrg      desc[5] |= S_008F24_META_DATA_ADDRESS(surf->meta_offset >> 40);
27157ec681f3Smrg      break;
27167ec681f3Smrg   case GFX10:
27177ec681f3Smrg   case GFX10_3:
27187ec681f3Smrg      desc[6] &= C_00A018_META_DATA_ADDRESS_LO;
27197ec681f3Smrg      desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->meta_offset >> 8);
27207ec681f3Smrg      desc[7] = surf->meta_offset >> 16;
27217ec681f3Smrg      break;
27227ec681f3Smrg   default:
27237ec681f3Smrg      assert(0);
27247ec681f3Smrg   }
27257ec681f3Smrg
27267ec681f3Smrg   /* Metadata image format format version 1:
27277ec681f3Smrg    * [0] = 1 (metadata format identifier)
27287ec681f3Smrg    * [1] = (VENDOR_ID << 16) | PCI_ID
27297ec681f3Smrg    * [2:9] = image descriptor for the whole resource
27307ec681f3Smrg    *         [2] is always 0, because the base address is cleared
27317ec681f3Smrg    *         [9] is the DCC offset bits [39:8] from the beginning of
27327ec681f3Smrg    *             the buffer
27337ec681f3Smrg    * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
27347ec681f3Smrg    */
27357ec681f3Smrg
27367ec681f3Smrg   metadata[0] = 1; /* metadata image format version 1 */
27377ec681f3Smrg
27387ec681f3Smrg   /* Tiling modes are ambiguous without a PCI ID. */
27397ec681f3Smrg   metadata[1] = ac_get_umd_metadata_word1(info);
27407ec681f3Smrg
27417ec681f3Smrg   /* Dwords [2:9] contain the image descriptor. */
27427ec681f3Smrg   memcpy(&metadata[2], desc, 8 * 4);
27437ec681f3Smrg   *size_metadata = 10 * 4;
27447ec681f3Smrg
27457ec681f3Smrg   /* Dwords [10:..] contain the mipmap level offsets. */
27467ec681f3Smrg   if (info->chip_class <= GFX8) {
27477ec681f3Smrg      for (unsigned i = 0; i < num_mipmap_levels; i++)
27487ec681f3Smrg         metadata[10 + i] = surf->u.legacy.level[i].offset_256B;
27497ec681f3Smrg
27507ec681f3Smrg      *size_metadata += num_mipmap_levels * 4;
27517ec681f3Smrg   }
27527ec681f3Smrg}
27537ec681f3Smrg
27547ec681f3Smrgstatic uint32_t ac_surface_get_gfx9_pitch_align(struct radeon_surf *surf)
27557ec681f3Smrg{
27567ec681f3Smrg   if (surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR)
27577ec681f3Smrg      return 256 / surf->bpe;
27587ec681f3Smrg
27597ec681f3Smrg   if (surf->u.gfx9.resource_type == RADEON_RESOURCE_3D)
27607ec681f3Smrg      return 1; /* TODO */
27617ec681f3Smrg
27627ec681f3Smrg   unsigned bpe_shift = util_logbase2(surf->bpe) / 2;
27637ec681f3Smrg   switch(surf->u.gfx9.swizzle_mode & ~3) {
27647ec681f3Smrg   case ADDR_SW_LINEAR: /* 256B block. */
27657ec681f3Smrg      return 16 >> bpe_shift;
27667ec681f3Smrg   case ADDR_SW_4KB_Z:
27677ec681f3Smrg   case ADDR_SW_4KB_Z_X:
27687ec681f3Smrg      return 64 >> bpe_shift;
27697ec681f3Smrg   case ADDR_SW_64KB_Z:
27707ec681f3Smrg   case ADDR_SW_64KB_Z_T:
27717ec681f3Smrg   case ADDR_SW_64KB_Z_X:
27727ec681f3Smrg      return 256 >> bpe_shift;
27737ec681f3Smrg   case ADDR_SW_VAR_Z_X:
27747ec681f3Smrg   default:
27757ec681f3Smrg      return 1; /* TODO */
27767ec681f3Smrg   }
27777ec681f3Smrg}
27787ec681f3Smrg
27797ec681f3Smrgbool ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf,
27807ec681f3Smrg                                       unsigned num_mipmap_levels, uint64_t offset, unsigned pitch)
27817ec681f3Smrg{
27827ec681f3Smrg   /*
27837ec681f3Smrg    * GFX10 and newer don't support custom strides. Furthermore, for
27847ec681f3Smrg    * multiple miplevels or compression data we'd really need to rerun
27857ec681f3Smrg    * addrlib to update all the fields in the surface. That, however, is a
27867ec681f3Smrg    * software limitation and could be relaxed later.
27877ec681f3Smrg    */
27887ec681f3Smrg   bool require_equal_pitch = surf->surf_size != surf->total_size ||
27897ec681f3Smrg                              num_mipmap_levels != 1 ||
27907ec681f3Smrg                              info->chip_class >= GFX10;
27917ec681f3Smrg
27927ec681f3Smrg   if (info->chip_class >= GFX9) {
27937ec681f3Smrg      if (pitch) {
27947ec681f3Smrg         if (surf->u.gfx9.surf_pitch != pitch && require_equal_pitch)
27957ec681f3Smrg            return false;
27967ec681f3Smrg
27977ec681f3Smrg         if ((ac_surface_get_gfx9_pitch_align(surf) - 1) & pitch)
27987ec681f3Smrg            return false;
27997ec681f3Smrg
28007ec681f3Smrg         if (pitch != surf->u.gfx9.surf_pitch) {
28017ec681f3Smrg            unsigned slices = surf->surf_size / surf->u.gfx9.surf_slice_size;
28027ec681f3Smrg
28037ec681f3Smrg            surf->u.gfx9.surf_pitch = pitch;
28047ec681f3Smrg            surf->u.gfx9.epitch = pitch - 1;
28057ec681f3Smrg            surf->u.gfx9.surf_slice_size = (uint64_t)pitch * surf->u.gfx9.surf_height * surf->bpe;
28067ec681f3Smrg            surf->total_size = surf->surf_size = surf->u.gfx9.surf_slice_size * slices;
28077ec681f3Smrg         }
28087ec681f3Smrg      }
28097ec681f3Smrg      surf->u.gfx9.surf_offset = offset;
28107ec681f3Smrg      if (surf->u.gfx9.zs.stencil_offset)
28117ec681f3Smrg         surf->u.gfx9.zs.stencil_offset += offset;
28127ec681f3Smrg   } else {
28137ec681f3Smrg      if (pitch) {
28147ec681f3Smrg         if (surf->u.legacy.level[0].nblk_x != pitch && require_equal_pitch)
28157ec681f3Smrg            return false;
28167ec681f3Smrg
28177ec681f3Smrg         surf->u.legacy.level[0].nblk_x = pitch;
28187ec681f3Smrg         surf->u.legacy.level[0].slice_size_dw =
28197ec681f3Smrg            ((uint64_t)pitch * surf->u.legacy.level[0].nblk_y * surf->bpe) / 4;
28207ec681f3Smrg      }
28217ec681f3Smrg
28227ec681f3Smrg      if (offset) {
28237ec681f3Smrg         for (unsigned i = 0; i < ARRAY_SIZE(surf->u.legacy.level); ++i)
28247ec681f3Smrg            surf->u.legacy.level[i].offset_256B += offset / 256;
28257ec681f3Smrg      }
28267ec681f3Smrg   }
28277ec681f3Smrg
28287ec681f3Smrg   if (offset & ((1 << surf->alignment_log2) - 1) ||
28297ec681f3Smrg       offset >= UINT64_MAX - surf->total_size)
28307ec681f3Smrg      return false;
28317ec681f3Smrg
28327ec681f3Smrg   if (surf->meta_offset)
28337ec681f3Smrg      surf->meta_offset += offset;
28347ec681f3Smrg   if (surf->fmask_offset)
28357ec681f3Smrg      surf->fmask_offset += offset;
28367ec681f3Smrg   if (surf->cmask_offset)
28377ec681f3Smrg      surf->cmask_offset += offset;
28387ec681f3Smrg   if (surf->display_dcc_offset)
28397ec681f3Smrg      surf->display_dcc_offset += offset;
28407ec681f3Smrg   return true;
28417ec681f3Smrg}
28427ec681f3Smrg
28437ec681f3Smrgunsigned ac_surface_get_nplanes(const struct radeon_surf *surf)
28447ec681f3Smrg{
28457ec681f3Smrg   if (surf->modifier == DRM_FORMAT_MOD_INVALID)
28467ec681f3Smrg      return 1;
28477ec681f3Smrg   else if (surf->display_dcc_offset)
28487ec681f3Smrg      return 3;
28497ec681f3Smrg   else if (surf->meta_offset)
28507ec681f3Smrg      return 2;
28517ec681f3Smrg   else
28527ec681f3Smrg      return 1;
28537ec681f3Smrg}
28547ec681f3Smrg
28557ec681f3Smrguint64_t ac_surface_get_plane_offset(enum chip_class chip_class,
28567ec681f3Smrg                                    const struct radeon_surf *surf,
28577ec681f3Smrg                                    unsigned plane, unsigned layer)
28587ec681f3Smrg{
28597ec681f3Smrg   switch (plane) {
28607ec681f3Smrg   case 0:
28617ec681f3Smrg      if (chip_class >= GFX9) {
28627ec681f3Smrg         return surf->u.gfx9.surf_offset +
28637ec681f3Smrg                layer * surf->u.gfx9.surf_slice_size;
28647ec681f3Smrg      } else {
28657ec681f3Smrg         return (uint64_t)surf->u.legacy.level[0].offset_256B * 256 +
28667ec681f3Smrg                layer * (uint64_t)surf->u.legacy.level[0].slice_size_dw * 4;
28677ec681f3Smrg      }
28687ec681f3Smrg   case 1:
28697ec681f3Smrg      assert(!layer);
28707ec681f3Smrg      return surf->display_dcc_offset ?
28717ec681f3Smrg             surf->display_dcc_offset : surf->meta_offset;
28727ec681f3Smrg   case 2:
28737ec681f3Smrg      assert(!layer);
28747ec681f3Smrg      return surf->meta_offset;
28757ec681f3Smrg   default:
28767ec681f3Smrg      unreachable("Invalid plane index");
28777ec681f3Smrg   }
28787ec681f3Smrg}
28797ec681f3Smrg
28807ec681f3Smrguint64_t ac_surface_get_plane_stride(enum chip_class chip_class,
28817ec681f3Smrg                                    const struct radeon_surf *surf,
28827ec681f3Smrg                                    unsigned plane)
28837ec681f3Smrg{
28847ec681f3Smrg   switch (plane) {
28857ec681f3Smrg   case 0:
28867ec681f3Smrg      if (chip_class >= GFX9) {
28877ec681f3Smrg         return surf->u.gfx9.surf_pitch * surf->bpe;
28887ec681f3Smrg      } else {
28897ec681f3Smrg         return surf->u.legacy.level[0].nblk_x * surf->bpe;
28907ec681f3Smrg      }
28917ec681f3Smrg   case 1:
28927ec681f3Smrg      return 1 + (surf->display_dcc_offset ?
28937ec681f3Smrg             surf->u.gfx9.color.display_dcc_pitch_max : surf->u.gfx9.color.dcc_pitch_max);
28947ec681f3Smrg   case 2:
28957ec681f3Smrg      return surf->u.gfx9.color.dcc_pitch_max + 1;
28967ec681f3Smrg   default:
28977ec681f3Smrg      unreachable("Invalid plane index");
28987ec681f3Smrg   }
28997ec681f3Smrg}
29007ec681f3Smrg
29017ec681f3Smrguint64_t ac_surface_get_plane_size(const struct radeon_surf *surf,
29027ec681f3Smrg                                   unsigned plane)
29037ec681f3Smrg{
29047ec681f3Smrg   switch (plane) {
29057ec681f3Smrg   case 0:
29067ec681f3Smrg      return surf->surf_size;
29077ec681f3Smrg   case 1:
29087ec681f3Smrg      return surf->display_dcc_offset ?
29097ec681f3Smrg             surf->u.gfx9.color.display_dcc_size : surf->meta_size;
29107ec681f3Smrg   case 2:
29117ec681f3Smrg      return surf->meta_size;
29127ec681f3Smrg   default:
29137ec681f3Smrg      unreachable("Invalid plane index");
29147ec681f3Smrg   }
29157ec681f3Smrg}
29167ec681f3Smrg
29177ec681f3Smrgvoid ac_surface_print_info(FILE *out, const struct radeon_info *info,
29187ec681f3Smrg                           const struct radeon_surf *surf)
29197ec681f3Smrg{
29207ec681f3Smrg   if (info->chip_class >= GFX9) {
29217ec681f3Smrg      fprintf(out,
29227ec681f3Smrg              "    Surf: size=%" PRIu64 ", slice_size=%" PRIu64 ", "
29237ec681f3Smrg              "alignment=%u, swmode=%u, epitch=%u, pitch=%u, blk_w=%u, "
29247ec681f3Smrg              "blk_h=%u, bpe=%u, flags=0x%"PRIx64"\n",
29257ec681f3Smrg              surf->surf_size, surf->u.gfx9.surf_slice_size,
29267ec681f3Smrg              1 << surf->surf_alignment_log2, surf->u.gfx9.swizzle_mode,
29277ec681f3Smrg              surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch,
29287ec681f3Smrg              surf->blk_w, surf->blk_h, surf->bpe, surf->flags);
29297ec681f3Smrg
29307ec681f3Smrg      if (surf->fmask_offset)
29317ec681f3Smrg         fprintf(out,
29327ec681f3Smrg                 "    FMask: offset=%" PRIu64 ", size=%" PRIu64 ", "
29337ec681f3Smrg                 "alignment=%u, swmode=%u, epitch=%u\n",
29347ec681f3Smrg                 surf->fmask_offset, surf->fmask_size,
29357ec681f3Smrg                 1 << surf->fmask_alignment_log2, surf->u.gfx9.color.fmask_swizzle_mode,
29367ec681f3Smrg                 surf->u.gfx9.color.fmask_epitch);
29377ec681f3Smrg
29387ec681f3Smrg      if (surf->cmask_offset)
29397ec681f3Smrg         fprintf(out,
29407ec681f3Smrg                 "    CMask: offset=%" PRIu64 ", size=%u, "
29417ec681f3Smrg                 "alignment=%u\n",
29427ec681f3Smrg                 surf->cmask_offset, surf->cmask_size,
29437ec681f3Smrg                 1 << surf->cmask_alignment_log2);
29447ec681f3Smrg
29457ec681f3Smrg      if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset)
29467ec681f3Smrg         fprintf(out,
29477ec681f3Smrg                 "    HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n",
29487ec681f3Smrg                 surf->meta_offset, surf->meta_size,
29497ec681f3Smrg                 1 << surf->meta_alignment_log2);
29507ec681f3Smrg
29517ec681f3Smrg      if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)
29527ec681f3Smrg         fprintf(out,
29537ec681f3Smrg                 "    DCC: offset=%" PRIu64 ", size=%u, "
29547ec681f3Smrg                 "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n",
29557ec681f3Smrg                 surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2,
29567ec681f3Smrg                 surf->u.gfx9.color.display_dcc_pitch_max, surf->num_meta_levels);
29577ec681f3Smrg
29587ec681f3Smrg      if (surf->has_stencil)
29597ec681f3Smrg         fprintf(out,
29607ec681f3Smrg                 "    Stencil: offset=%" PRIu64 ", swmode=%u, epitch=%u\n",
29617ec681f3Smrg                 surf->u.gfx9.zs.stencil_offset,
29627ec681f3Smrg                 surf->u.gfx9.zs.stencil_swizzle_mode,
29637ec681f3Smrg                 surf->u.gfx9.zs.stencil_epitch);
29647ec681f3Smrg   } else {
29657ec681f3Smrg      fprintf(out,
29667ec681f3Smrg              "    Surf: size=%" PRIu64 ", alignment=%u, blk_w=%u, blk_h=%u, "
29677ec681f3Smrg              "bpe=%u, flags=0x%"PRIx64"\n",
29687ec681f3Smrg              surf->surf_size, 1 << surf->surf_alignment_log2, surf->blk_w,
29697ec681f3Smrg              surf->blk_h, surf->bpe, surf->flags);
29707ec681f3Smrg
29717ec681f3Smrg      fprintf(out,
29727ec681f3Smrg              "    Layout: size=%" PRIu64 ", alignment=%u, bankw=%u, bankh=%u, "
29737ec681f3Smrg              "nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
29747ec681f3Smrg              surf->surf_size, 1 << surf->surf_alignment_log2,
29757ec681f3Smrg              surf->u.legacy.bankw, surf->u.legacy.bankh,
29767ec681f3Smrg              surf->u.legacy.num_banks, surf->u.legacy.mtilea,
29777ec681f3Smrg              surf->u.legacy.tile_split, surf->u.legacy.pipe_config,
29787ec681f3Smrg              (surf->flags & RADEON_SURF_SCANOUT) != 0);
29797ec681f3Smrg
29807ec681f3Smrg      if (surf->fmask_offset)
29817ec681f3Smrg         fprintf(out,
29827ec681f3Smrg                 "    FMask: offset=%" PRIu64 ", size=%" PRIu64 ", "
29837ec681f3Smrg                 "alignment=%u, pitch_in_pixels=%u, bankh=%u, "
29847ec681f3Smrg                 "slice_tile_max=%u, tile_mode_index=%u\n",
29857ec681f3Smrg                 surf->fmask_offset, surf->fmask_size,
29867ec681f3Smrg                 1 << surf->fmask_alignment_log2, surf->u.legacy.color.fmask.pitch_in_pixels,
29877ec681f3Smrg                 surf->u.legacy.color.fmask.bankh,
29887ec681f3Smrg                 surf->u.legacy.color.fmask.slice_tile_max,
29897ec681f3Smrg                 surf->u.legacy.color.fmask.tiling_index);
29907ec681f3Smrg
29917ec681f3Smrg      if (surf->cmask_offset)
29927ec681f3Smrg         fprintf(out,
29937ec681f3Smrg                 "    CMask: offset=%" PRIu64 ", size=%u, alignment=%u, "
29947ec681f3Smrg                 "slice_tile_max=%u\n",
29957ec681f3Smrg                 surf->cmask_offset, surf->cmask_size,
29967ec681f3Smrg                 1 << surf->cmask_alignment_log2, surf->u.legacy.color.cmask_slice_tile_max);
29977ec681f3Smrg
29987ec681f3Smrg      if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset)
29997ec681f3Smrg         fprintf(out, "    HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n",
30007ec681f3Smrg                 surf->meta_offset, surf->meta_size,
30017ec681f3Smrg                 1 << surf->meta_alignment_log2);
30027ec681f3Smrg
30037ec681f3Smrg      if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)
30047ec681f3Smrg         fprintf(out, "    DCC: offset=%" PRIu64 ", size=%u, alignment=%u\n",
30057ec681f3Smrg                 surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2);
30067ec681f3Smrg
30077ec681f3Smrg      if (surf->has_stencil)
30087ec681f3Smrg         fprintf(out, "    StencilLayout: tilesplit=%u\n",
30097ec681f3Smrg                 surf->u.legacy.stencil_tile_split);
30107ec681f3Smrg   }
30117ec681f3Smrg}
30127ec681f3Smrg
30137ec681f3Smrgstatic nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
30147ec681f3Smrg                                                   struct gfx9_meta_equation *equation,
30157ec681f3Smrg                                                   int blkSizeBias, unsigned blkStart,
30167ec681f3Smrg                                                   nir_ssa_def *meta_pitch, nir_ssa_def *meta_slice_size,
30177ec681f3Smrg                                                   nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
30187ec681f3Smrg                                                   nir_ssa_def *pipe_xor,
30197ec681f3Smrg                                                   nir_ssa_def **bit_position)
30207ec681f3Smrg{
30217ec681f3Smrg   nir_ssa_def *zero = nir_imm_int(b, 0);
30227ec681f3Smrg   nir_ssa_def *one = nir_imm_int(b, 1);
30237ec681f3Smrg
30247ec681f3Smrg   assert(info->chip_class >= GFX10);
30257ec681f3Smrg
30267ec681f3Smrg   unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width);
30277ec681f3Smrg   unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height);
30287ec681f3Smrg   unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + blkSizeBias;
30297ec681f3Smrg
30307ec681f3Smrg   nir_ssa_def *coord[] = {x, y, z, 0};
30317ec681f3Smrg   nir_ssa_def *address = zero;
30327ec681f3Smrg
30337ec681f3Smrg   for (unsigned i = blkStart; i < blkSizeLog2 + 1; i++) {
30347ec681f3Smrg      nir_ssa_def *v = zero;
30357ec681f3Smrg
30367ec681f3Smrg      for (unsigned c = 0; c < 4; c++) {
30377ec681f3Smrg         unsigned index = i * 4 + c - (blkStart * 4);
30387ec681f3Smrg         if (equation->u.gfx10_bits[index]) {
30397ec681f3Smrg            unsigned mask = equation->u.gfx10_bits[index];
30407ec681f3Smrg            nir_ssa_def *bits = coord[c];
30417ec681f3Smrg
30427ec681f3Smrg            while (mask)
30437ec681f3Smrg               v = nir_ixor(b, v, nir_iand(b, nir_ushr_imm(b, bits, u_bit_scan(&mask)), one));
30447ec681f3Smrg         }
30457ec681f3Smrg      }
30467ec681f3Smrg
30477ec681f3Smrg      address = nir_ior(b, address, nir_ishl(b, v, nir_imm_int(b, i)));
30487ec681f3Smrg   }
30497ec681f3Smrg
30507ec681f3Smrg   unsigned blkMask = (1 << blkSizeLog2) - 1;
30517ec681f3Smrg   unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1;
30527ec681f3Smrg   unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
30537ec681f3Smrg   nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
30547ec681f3Smrg   nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
30557ec681f3Smrg   nir_ssa_def *pb = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
30567ec681f3Smrg   nir_ssa_def *blkIndex = nir_iadd(b, nir_imul(b, yb, pb), xb);
30577ec681f3Smrg   nir_ssa_def *pipeXor = nir_iand_imm(b, nir_ishl(b, nir_iand_imm(b, pipe_xor, pipeMask),
30587ec681f3Smrg                                                   nir_imm_int(b, m_pipeInterleaveLog2)), blkMask);
30597ec681f3Smrg
30607ec681f3Smrg   if (bit_position)
30617ec681f3Smrg      *bit_position = nir_ishl(b, nir_iand(b, address, nir_imm_int(b, 1)),
30627ec681f3Smrg                                  nir_imm_int(b, 2));
30637ec681f3Smrg
30647ec681f3Smrg   return nir_iadd(b, nir_iadd(b, nir_imul(b, meta_slice_size, z),
30657ec681f3Smrg                               nir_imul(b, blkIndex, nir_ishl(b, one, nir_imm_int(b, blkSizeLog2)))),
30667ec681f3Smrg                   nir_ixor(b, nir_ushr(b, address, one), pipeXor));
30677ec681f3Smrg}
30687ec681f3Smrg
30697ec681f3Smrgstatic nir_ssa_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
30707ec681f3Smrg                                                  struct gfx9_meta_equation *equation,
30717ec681f3Smrg                                                  nir_ssa_def *meta_pitch, nir_ssa_def *meta_height,
30727ec681f3Smrg                                                  nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
30737ec681f3Smrg                                                  nir_ssa_def *sample, nir_ssa_def *pipe_xor,
30747ec681f3Smrg                                                  nir_ssa_def **bit_position)
30757ec681f3Smrg{
30767ec681f3Smrg   nir_ssa_def *zero = nir_imm_int(b, 0);
30777ec681f3Smrg   nir_ssa_def *one = nir_imm_int(b, 1);
30787ec681f3Smrg
30797ec681f3Smrg   assert(info->chip_class >= GFX9);
30807ec681f3Smrg
30817ec681f3Smrg   unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width);
30827ec681f3Smrg   unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height);
30837ec681f3Smrg   unsigned meta_block_depth_log2 = util_logbase2(equation->meta_block_depth);
30847ec681f3Smrg
30857ec681f3Smrg   unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
30867ec681f3Smrg   unsigned numPipeBits = equation->u.gfx9.num_pipe_bits;
30877ec681f3Smrg   nir_ssa_def *pitchInBlock = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
30887ec681f3Smrg   nir_ssa_def *sliceSizeInBlock = nir_imul(b, nir_ushr_imm(b, meta_height, meta_block_height_log2),
30897ec681f3Smrg                                            pitchInBlock);
30907ec681f3Smrg
30917ec681f3Smrg   nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
30927ec681f3Smrg   nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
30937ec681f3Smrg   nir_ssa_def *zb = nir_ushr_imm(b, z, meta_block_depth_log2);
30947ec681f3Smrg
30957ec681f3Smrg   nir_ssa_def *blockIndex = nir_iadd(b, nir_iadd(b, nir_imul(b, zb, sliceSizeInBlock),
30967ec681f3Smrg                                                  nir_imul(b, yb, pitchInBlock)), xb);
30977ec681f3Smrg   nir_ssa_def *coords[] = {x, y, z, sample, blockIndex};
30987ec681f3Smrg
30997ec681f3Smrg   nir_ssa_def *address = zero;
31007ec681f3Smrg   unsigned num_bits = equation->u.gfx9.num_bits;
31017ec681f3Smrg   assert(num_bits <= 32);
31027ec681f3Smrg
31037ec681f3Smrg   /* Compute the address up until the last bit that doesn't use the block index. */
31047ec681f3Smrg   for (unsigned i = 0; i < num_bits - 1; i++) {
31057ec681f3Smrg      nir_ssa_def *xor = zero;
31067ec681f3Smrg
31077ec681f3Smrg      for (unsigned c = 0; c < 5; c++) {
31087ec681f3Smrg         if (equation->u.gfx9.bit[i].coord[c].dim >= 5)
31097ec681f3Smrg            continue;
31107ec681f3Smrg
31117ec681f3Smrg         assert(equation->u.gfx9.bit[i].coord[c].ord < 32);
31127ec681f3Smrg         nir_ssa_def *ison =
31137ec681f3Smrg            nir_iand(b, nir_ushr_imm(b, coords[equation->u.gfx9.bit[i].coord[c].dim],
31147ec681f3Smrg                                     equation->u.gfx9.bit[i].coord[c].ord), one);
31157ec681f3Smrg
31167ec681f3Smrg         xor = nir_ixor(b, xor, ison);
31177ec681f3Smrg      }
31187ec681f3Smrg      address = nir_ior(b, address, nir_ishl(b, xor, nir_imm_int(b, i)));
31197ec681f3Smrg   }
31207ec681f3Smrg
31217ec681f3Smrg   /* Fill the remaining bits with the block index. */
31227ec681f3Smrg   unsigned last = num_bits - 1;
31237ec681f3Smrg   address = nir_ior(b, address,
31247ec681f3Smrg                     nir_ishl(b, nir_ushr_imm(b, blockIndex,
31257ec681f3Smrg                                              equation->u.gfx9.bit[last].coord[0].ord),
31267ec681f3Smrg                     nir_imm_int(b, last)));
31277ec681f3Smrg
31287ec681f3Smrg   if (bit_position)
31297ec681f3Smrg      *bit_position = nir_ishl(b, nir_iand(b, address, nir_imm_int(b, 1)),
31307ec681f3Smrg                                  nir_imm_int(b, 2));
31317ec681f3Smrg
31327ec681f3Smrg   nir_ssa_def *pipeXor = nir_iand_imm(b, pipe_xor, (1 << numPipeBits) - 1);
31337ec681f3Smrg   return nir_ixor(b, nir_ushr(b, address, one),
31347ec681f3Smrg                   nir_ishl(b, pipeXor, nir_imm_int(b, m_pipeInterleaveLog2)));
31357ec681f3Smrg}
31367ec681f3Smrg
31377ec681f3Smrgnir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,
31387ec681f3Smrg                                        unsigned bpe, struct gfx9_meta_equation *equation,
31397ec681f3Smrg                                        nir_ssa_def *dcc_pitch, nir_ssa_def *dcc_height,
31407ec681f3Smrg                                        nir_ssa_def *dcc_slice_size,
31417ec681f3Smrg                                        nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
31427ec681f3Smrg                                        nir_ssa_def *sample, nir_ssa_def *pipe_xor)
31437ec681f3Smrg{
31447ec681f3Smrg   if (info->chip_class >= GFX10) {
31457ec681f3Smrg      unsigned bpp_log2 = util_logbase2(bpe);
31467ec681f3Smrg
31477ec681f3Smrg      return gfx10_nir_meta_addr_from_coord(b, info, equation, bpp_log2 - 8, 1,
31487ec681f3Smrg                                            dcc_pitch, dcc_slice_size,
31497ec681f3Smrg                                            x, y, z, pipe_xor, NULL);
31507ec681f3Smrg   } else {
31517ec681f3Smrg      return gfx9_nir_meta_addr_from_coord(b, info, equation, dcc_pitch,
31527ec681f3Smrg                                           dcc_height, x, y, z,
31537ec681f3Smrg                                           sample, pipe_xor, NULL);
31547ec681f3Smrg   }
31557ec681f3Smrg}
31567ec681f3Smrg
31577ec681f3Smrgnir_ssa_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info,
31587ec681f3Smrg                                        struct gfx9_meta_equation *equation,
31597ec681f3Smrg                                        nir_ssa_def *cmask_pitch, nir_ssa_def *cmask_height,
31607ec681f3Smrg                                        nir_ssa_def *cmask_slice_size,
31617ec681f3Smrg                                        nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
31627ec681f3Smrg                                        nir_ssa_def *pipe_xor,
31637ec681f3Smrg                                        nir_ssa_def **bit_position)
31647ec681f3Smrg{
31657ec681f3Smrg   nir_ssa_def *zero = nir_imm_int(b, 0);
31667ec681f3Smrg
31677ec681f3Smrg   if (info->chip_class >= GFX10) {
31687ec681f3Smrg      return gfx10_nir_meta_addr_from_coord(b, info, equation, -7, 1,
31697ec681f3Smrg                                            cmask_pitch, cmask_slice_size,
31707ec681f3Smrg                                            x, y, z, pipe_xor, bit_position);
31717ec681f3Smrg   } else {
31727ec681f3Smrg      return gfx9_nir_meta_addr_from_coord(b, info, equation, cmask_pitch,
31737ec681f3Smrg                                           cmask_height, x, y, z, zero,
31747ec681f3Smrg                                           pipe_xor, bit_position);
31757ec681f3Smrg   }
31767ec681f3Smrg}
31777ec681f3Smrg
31787ec681f3Smrgnir_ssa_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,
31797ec681f3Smrg                                          struct gfx9_meta_equation *equation,
31807ec681f3Smrg                                          nir_ssa_def *htile_pitch,
31817ec681f3Smrg                                          nir_ssa_def *htile_slice_size,
31827ec681f3Smrg                                          nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
31837ec681f3Smrg                                          nir_ssa_def *pipe_xor)
31847ec681f3Smrg{
31857ec681f3Smrg   return gfx10_nir_meta_addr_from_coord(b, info, equation, -4, 2,
31867ec681f3Smrg                                            htile_pitch, htile_slice_size,
31877ec681f3Smrg                                            x, y, z, pipe_xor, NULL);
318801e04c3fSmrg}
3189