101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2011 Red Hat All Rights Reserved. 301e04c3fSmrg * Copyright © 2017 Advanced Micro Devices, Inc. 401e04c3fSmrg * All Rights Reserved. 501e04c3fSmrg * 601e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining 701e04c3fSmrg * a copy of this software and associated documentation files (the 801e04c3fSmrg * "Software"), to deal in the Software without restriction, including 901e04c3fSmrg * without limitation the rights to use, copy, modify, merge, publish, 1001e04c3fSmrg * distribute, sub license, and/or sell copies of the Software, and to 1101e04c3fSmrg * permit persons to whom the Software is furnished to do so, subject to 1201e04c3fSmrg * the following conditions: 1301e04c3fSmrg * 1401e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 1501e04c3fSmrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 1601e04c3fSmrg * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 1701e04c3fSmrg * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 1801e04c3fSmrg * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 2001e04c3fSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 2101e04c3fSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 2201e04c3fSmrg * 2301e04c3fSmrg * The above copyright notice and this permission notice (including the 2401e04c3fSmrg * next paragraph) shall be included in all copies or substantial portions 2501e04c3fSmrg * of the Software. 2601e04c3fSmrg */ 2701e04c3fSmrg 287ec681f3Smrg#define AC_SURFACE_INCLUDE_NIR 2901e04c3fSmrg#include "ac_surface.h" 307ec681f3Smrg 317ec681f3Smrg#include "ac_drm_fourcc.h" 3201e04c3fSmrg#include "ac_gpu_info.h" 337ec681f3Smrg#include "addrlib/inc/addrinterface.h" 347ec681f3Smrg#include "addrlib/src/amdgpu_asic_addr.h" 357ec681f3Smrg#include "amd_family.h" 367ec681f3Smrg#include "sid.h" 377ec681f3Smrg#include "util/hash_table.h" 3801e04c3fSmrg#include "util/macros.h" 397ec681f3Smrg#include "util/simple_mtx.h" 4001e04c3fSmrg#include "util/u_atomic.h" 417ec681f3Smrg#include "util/format/u_format.h" 4201e04c3fSmrg#include "util/u_math.h" 437ec681f3Smrg#include "util/u_memory.h" 4401e04c3fSmrg 4501e04c3fSmrg#include <errno.h> 4601e04c3fSmrg#include <stdio.h> 4701e04c3fSmrg#include <stdlib.h> 4801e04c3fSmrg 497ec681f3Smrg#ifdef _WIN32 507ec681f3Smrg#define AMDGPU_TILING_ARRAY_MODE_SHIFT 0 517ec681f3Smrg#define AMDGPU_TILING_ARRAY_MODE_MASK 0xf 527ec681f3Smrg#define AMDGPU_TILING_PIPE_CONFIG_SHIFT 4 537ec681f3Smrg#define AMDGPU_TILING_PIPE_CONFIG_MASK 0x1f 547ec681f3Smrg#define AMDGPU_TILING_TILE_SPLIT_SHIFT 9 557ec681f3Smrg#define AMDGPU_TILING_TILE_SPLIT_MASK 0x7 567ec681f3Smrg#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT 12 577ec681f3Smrg#define AMDGPU_TILING_MICRO_TILE_MODE_MASK 0x7 587ec681f3Smrg#define AMDGPU_TILING_BANK_WIDTH_SHIFT 15 597ec681f3Smrg#define AMDGPU_TILING_BANK_WIDTH_MASK 0x3 607ec681f3Smrg#define AMDGPU_TILING_BANK_HEIGHT_SHIFT 17 617ec681f3Smrg#define AMDGPU_TILING_BANK_HEIGHT_MASK 0x3 627ec681f3Smrg#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT 19 637ec681f3Smrg#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK 0x3 647ec681f3Smrg#define AMDGPU_TILING_NUM_BANKS_SHIFT 21 657ec681f3Smrg#define AMDGPU_TILING_NUM_BANKS_MASK 0x3 667ec681f3Smrg#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0 677ec681f3Smrg#define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f 687ec681f3Smrg#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT 5 697ec681f3Smrg#define AMDGPU_TILING_DCC_OFFSET_256B_MASK 0xFFFFFF 707ec681f3Smrg#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT 29 717ec681f3Smrg#define AMDGPU_TILING_DCC_PITCH_MAX_MASK 0x3FFF 727ec681f3Smrg#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT 43 737ec681f3Smrg#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1 747ec681f3Smrg#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44 757ec681f3Smrg#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1 767ec681f3Smrg#define AMDGPU_TILING_SCANOUT_SHIFT 63 777ec681f3Smrg#define AMDGPU_TILING_SCANOUT_MASK 0x1 787ec681f3Smrg#define AMDGPU_TILING_SET(field, value) \ 797ec681f3Smrg (((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) 807ec681f3Smrg#define AMDGPU_TILING_GET(value, field) \ 817ec681f3Smrg (((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK) 827ec681f3Smrg#else 837ec681f3Smrg#include "drm-uapi/amdgpu_drm.h" 847ec681f3Smrg#endif 8501e04c3fSmrg 8601e04c3fSmrg#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND 8701e04c3fSmrg#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A 8801e04c3fSmrg#endif 8901e04c3fSmrg 9001e04c3fSmrg#ifndef CIASICIDGFXENGINE_ARCTICISLAND 9101e04c3fSmrg#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D 9201e04c3fSmrg#endif 9301e04c3fSmrg 947ec681f3Smrgstruct ac_addrlib { 957ec681f3Smrg ADDR_HANDLE handle; 967ec681f3Smrg}; 977ec681f3Smrg 987ec681f3Smrgbool ac_modifier_has_dcc(uint64_t modifier) 997ec681f3Smrg{ 1007ec681f3Smrg return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier); 1017ec681f3Smrg} 1027ec681f3Smrg 1037ec681f3Smrgbool ac_modifier_has_dcc_retile(uint64_t modifier) 1047ec681f3Smrg{ 1057ec681f3Smrg return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC_RETILE, modifier); 1067ec681f3Smrg} 1077ec681f3Smrg 1087ec681f3Smrgbool ac_modifier_supports_dcc_image_stores(uint64_t modifier) 1097ec681f3Smrg{ 1107ec681f3Smrg if (!ac_modifier_has_dcc(modifier)) 1117ec681f3Smrg return false; 1127ec681f3Smrg 1137ec681f3Smrg return (!AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier) && 1147ec681f3Smrg AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier) && 1157ec681f3Smrg AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier) == AMD_FMT_MOD_DCC_BLOCK_128B) || 1167ec681f3Smrg (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && /* gfx10.3 */ 1177ec681f3Smrg AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier) && 1187ec681f3Smrg AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier) && 1197ec681f3Smrg AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier) == AMD_FMT_MOD_DCC_BLOCK_64B); 1207ec681f3Smrg 1217ec681f3Smrg} 1227ec681f3Smrg 1237ec681f3Smrg 1247ec681f3Smrgbool ac_surface_supports_dcc_image_stores(enum chip_class chip_class, 1257ec681f3Smrg const struct radeon_surf *surf) 1267ec681f3Smrg{ 1277ec681f3Smrg /* DCC image stores is only available for GFX10+. */ 1287ec681f3Smrg if (chip_class < GFX10) 1297ec681f3Smrg return false; 1307ec681f3Smrg 1317ec681f3Smrg /* DCC image stores support the following settings: 1327ec681f3Smrg * - INDEPENDENT_64B_BLOCKS = 0 1337ec681f3Smrg * - INDEPENDENT_128B_BLOCKS = 1 1347ec681f3Smrg * - MAX_COMPRESSED_BLOCK_SIZE = 128B 1357ec681f3Smrg * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used) 1367ec681f3Smrg * 1377ec681f3Smrg * gfx10.3 also supports the following setting: 1387ec681f3Smrg * - INDEPENDENT_64B_BLOCKS = 1 1397ec681f3Smrg * - INDEPENDENT_128B_BLOCKS = 1 1407ec681f3Smrg * - MAX_COMPRESSED_BLOCK_SIZE = 64B 1417ec681f3Smrg * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used) 1427ec681f3Smrg * 1437ec681f3Smrg * The compressor only looks at MAX_COMPRESSED_BLOCK_SIZE to determine 1447ec681f3Smrg * the INDEPENDENT_xx_BLOCKS settings. 128B implies INDEP_128B, while 64B 1457ec681f3Smrg * implies INDEP_64B && INDEP_128B. 1467ec681f3Smrg * 1477ec681f3Smrg * The same limitations apply to SDMA compressed stores because 1487ec681f3Smrg * SDMA uses the same DCC codec. 1497ec681f3Smrg */ 1507ec681f3Smrg return (!surf->u.gfx9.color.dcc.independent_64B_blocks && 1517ec681f3Smrg surf->u.gfx9.color.dcc.independent_128B_blocks && 1527ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_128B) || 1537ec681f3Smrg (chip_class >= GFX10_3 && /* gfx10.3 */ 1547ec681f3Smrg surf->u.gfx9.color.dcc.independent_64B_blocks && 1557ec681f3Smrg surf->u.gfx9.color.dcc.independent_128B_blocks && 1567ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B); 1577ec681f3Smrg} 1587ec681f3Smrg 1597ec681f3Smrgstatic 1607ec681f3SmrgAddrSwizzleMode ac_modifier_gfx9_swizzle_mode(uint64_t modifier) 16101e04c3fSmrg{ 1627ec681f3Smrg if (modifier == DRM_FORMAT_MOD_LINEAR) 1637ec681f3Smrg return ADDR_SW_LINEAR; 1647ec681f3Smrg 1657ec681f3Smrg return AMD_FMT_MOD_GET(TILE, modifier); 1667ec681f3Smrg} 1677ec681f3Smrgstatic void 1687ec681f3Smrgac_modifier_fill_dcc_params(uint64_t modifier, struct radeon_surf *surf, 1697ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_INPUT *surf_info) 1707ec681f3Smrg{ 1717ec681f3Smrg assert(ac_modifier_has_dcc(modifier)); 1727ec681f3Smrg 1737ec681f3Smrg if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) { 1747ec681f3Smrg surf_info->flags.metaPipeUnaligned = 0; 1757ec681f3Smrg } else { 1767ec681f3Smrg surf_info->flags.metaPipeUnaligned = !AMD_FMT_MOD_GET(DCC_PIPE_ALIGN, modifier); 1777ec681f3Smrg } 1787ec681f3Smrg 1797ec681f3Smrg /* The metaPipeUnaligned is not strictly necessary, but ensure we don't set metaRbUnaligned on 1807ec681f3Smrg * non-displayable DCC surfaces just because num_render_backends = 1 */ 1817ec681f3Smrg surf_info->flags.metaRbUnaligned = AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 && 1827ec681f3Smrg AMD_FMT_MOD_GET(RB, modifier) == 0 && 1837ec681f3Smrg surf_info->flags.metaPipeUnaligned; 1847ec681f3Smrg 1857ec681f3Smrg surf->u.gfx9.color.dcc.independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); 1867ec681f3Smrg surf->u.gfx9.color.dcc.independent_128B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier); 1877ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier); 18801e04c3fSmrg} 18901e04c3fSmrg 1907ec681f3Smrgbool ac_is_modifier_supported(const struct radeon_info *info, 1917ec681f3Smrg const struct ac_modifier_options *options, 1927ec681f3Smrg enum pipe_format format, 1937ec681f3Smrg uint64_t modifier) 19401e04c3fSmrg{ 1957ec681f3Smrg 1967ec681f3Smrg if (util_format_is_compressed(format) || 1977ec681f3Smrg util_format_is_depth_or_stencil(format) || 1987ec681f3Smrg util_format_get_blocksizebits(format) > 64) 1997ec681f3Smrg return false; 2007ec681f3Smrg 2017ec681f3Smrg if (info->chip_class < GFX9) 2027ec681f3Smrg return false; 2037ec681f3Smrg 2047ec681f3Smrg if(modifier == DRM_FORMAT_MOD_LINEAR) 2057ec681f3Smrg return true; 2067ec681f3Smrg 2077ec681f3Smrg /* GFX8 may need a different modifier for each plane */ 2087ec681f3Smrg if (info->chip_class < GFX9 && util_format_get_num_planes(format) > 1) 2097ec681f3Smrg return false; 2107ec681f3Smrg 2117ec681f3Smrg uint32_t allowed_swizzles = 0xFFFFFFFF; 2127ec681f3Smrg switch(info->chip_class) { 2137ec681f3Smrg case GFX9: 2147ec681f3Smrg allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x06000000 : 0x06660660; 2157ec681f3Smrg break; 2167ec681f3Smrg case GFX10: 2177ec681f3Smrg case GFX10_3: 2187ec681f3Smrg allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x08000000 : 0x0E660660; 2197ec681f3Smrg break; 2207ec681f3Smrg default: 2217ec681f3Smrg return false; 2227ec681f3Smrg } 2237ec681f3Smrg 2247ec681f3Smrg if (!((1u << ac_modifier_gfx9_swizzle_mode(modifier)) & allowed_swizzles)) 2257ec681f3Smrg return false; 2267ec681f3Smrg 2277ec681f3Smrg if (ac_modifier_has_dcc(modifier)) { 2287ec681f3Smrg /* TODO: support multi-planar formats with DCC */ 2297ec681f3Smrg if (util_format_get_num_planes(format) > 1) 2307ec681f3Smrg return false; 2317ec681f3Smrg 2327ec681f3Smrg if (!info->has_graphics) 2337ec681f3Smrg return false; 2347ec681f3Smrg 2357ec681f3Smrg if (!options->dcc) 2367ec681f3Smrg return false; 2377ec681f3Smrg 2387ec681f3Smrg if (ac_modifier_has_dcc_retile(modifier) && !options->dcc_retile) 2397ec681f3Smrg return false; 2407ec681f3Smrg } 2417ec681f3Smrg 2427ec681f3Smrg return true; 24301e04c3fSmrg} 24401e04c3fSmrg 2457ec681f3Smrgbool ac_get_supported_modifiers(const struct radeon_info *info, 2467ec681f3Smrg const struct ac_modifier_options *options, 2477ec681f3Smrg enum pipe_format format, 2487ec681f3Smrg unsigned *mod_count, 2497ec681f3Smrg uint64_t *mods) 25001e04c3fSmrg{ 2517ec681f3Smrg unsigned current_mod = 0; 2527ec681f3Smrg 2537ec681f3Smrg#define ADD_MOD(name) \ 2547ec681f3Smrg if (ac_is_modifier_supported(info, options, format, (name))) { \ 2557ec681f3Smrg if (mods && current_mod < *mod_count) \ 2567ec681f3Smrg mods[current_mod] = (name); \ 2577ec681f3Smrg ++current_mod; \ 2587ec681f3Smrg } 2597ec681f3Smrg 2607ec681f3Smrg /* The modifiers have to be added in descending order of estimated 2617ec681f3Smrg * performance. The drivers will prefer modifiers that come earlier 2627ec681f3Smrg * in the list. */ 2637ec681f3Smrg switch (info->chip_class) { 2647ec681f3Smrg case GFX9: { 2657ec681f3Smrg unsigned pipe_xor_bits = MIN2(G_0098F8_NUM_PIPES(info->gb_addr_config) + 2667ec681f3Smrg G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config), 8); 2677ec681f3Smrg unsigned bank_xor_bits = MIN2(G_0098F8_NUM_BANKS(info->gb_addr_config), 8 - pipe_xor_bits); 2687ec681f3Smrg unsigned pipes = G_0098F8_NUM_PIPES(info->gb_addr_config); 2697ec681f3Smrg unsigned rb = G_0098F8_NUM_RB_PER_SE(info->gb_addr_config) + 2707ec681f3Smrg G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config); 2717ec681f3Smrg 2727ec681f3Smrg uint64_t common_dcc = AMD_FMT_MOD_SET(DCC, 1) | 2737ec681f3Smrg AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | 2747ec681f3Smrg AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | 2757ec681f3Smrg AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, info->has_dcc_constant_encode) | 2767ec681f3Smrg AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 2777ec681f3Smrg AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits); 2787ec681f3Smrg 2797ec681f3Smrg ADD_MOD(AMD_FMT_MOD | 2807ec681f3Smrg AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) | 2817ec681f3Smrg AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 2827ec681f3Smrg AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) | 2837ec681f3Smrg common_dcc | 2847ec681f3Smrg AMD_FMT_MOD_SET(PIPE, pipes) | 2857ec681f3Smrg AMD_FMT_MOD_SET(RB, rb)) 2867ec681f3Smrg 2877ec681f3Smrg ADD_MOD(AMD_FMT_MOD | 2887ec681f3Smrg AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 2897ec681f3Smrg AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 2907ec681f3Smrg AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) | 2917ec681f3Smrg common_dcc | 2927ec681f3Smrg AMD_FMT_MOD_SET(PIPE, pipes) | 2937ec681f3Smrg AMD_FMT_MOD_SET(RB, rb)) 2947ec681f3Smrg 2957ec681f3Smrg if (util_format_get_blocksizebits(format) == 32) { 2967ec681f3Smrg if (info->max_render_backends == 1) { 2977ec681f3Smrg ADD_MOD(AMD_FMT_MOD | 2987ec681f3Smrg AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 2997ec681f3Smrg AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 3007ec681f3Smrg common_dcc); 3017ec681f3Smrg } 3027ec681f3Smrg 3037ec681f3Smrg 3047ec681f3Smrg ADD_MOD(AMD_FMT_MOD | 3057ec681f3Smrg AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 3067ec681f3Smrg AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 3077ec681f3Smrg AMD_FMT_MOD_SET(DCC_RETILE, 1) | 3087ec681f3Smrg common_dcc | 3097ec681f3Smrg AMD_FMT_MOD_SET(PIPE, pipes) | 3107ec681f3Smrg AMD_FMT_MOD_SET(RB, rb)) 3117ec681f3Smrg } 3127ec681f3Smrg 3137ec681f3Smrg 3147ec681f3Smrg ADD_MOD(AMD_FMT_MOD | 3157ec681f3Smrg AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) | 3167ec681f3Smrg AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 3177ec681f3Smrg AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 3187ec681f3Smrg AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); 3197ec681f3Smrg 3207ec681f3Smrg ADD_MOD(AMD_FMT_MOD | 3217ec681f3Smrg AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 3227ec681f3Smrg AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 3237ec681f3Smrg AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 3247ec681f3Smrg AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); 3257ec681f3Smrg 3267ec681f3Smrg ADD_MOD(AMD_FMT_MOD | 3277ec681f3Smrg AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | 3287ec681f3Smrg AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); 3297ec681f3Smrg 3307ec681f3Smrg ADD_MOD(AMD_FMT_MOD | 3317ec681f3Smrg AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | 3327ec681f3Smrg AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); 3337ec681f3Smrg 3347ec681f3Smrg ADD_MOD(DRM_FORMAT_MOD_LINEAR) 3357ec681f3Smrg break; 3367ec681f3Smrg } 3377ec681f3Smrg case GFX10: 3387ec681f3Smrg case GFX10_3: { 3397ec681f3Smrg bool rbplus = info->chip_class >= GFX10_3; 3407ec681f3Smrg unsigned pipe_xor_bits = G_0098F8_NUM_PIPES(info->gb_addr_config); 3417ec681f3Smrg unsigned pkrs = rbplus ? G_0098F8_NUM_PKRS(info->gb_addr_config) : 0; 3427ec681f3Smrg 3437ec681f3Smrg unsigned version = rbplus ? AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS : AMD_FMT_MOD_TILE_VER_GFX10; 3447ec681f3Smrg uint64_t common_dcc = AMD_FMT_MOD_SET(TILE_VERSION, version) | 3457ec681f3Smrg AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | 3467ec681f3Smrg AMD_FMT_MOD_SET(DCC, 1) | 3477ec681f3Smrg AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | 3487ec681f3Smrg AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 3497ec681f3Smrg AMD_FMT_MOD_SET(PACKERS, pkrs); 3507ec681f3Smrg 3517ec681f3Smrg ADD_MOD(AMD_FMT_MOD | common_dcc | 3527ec681f3Smrg AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) | 3537ec681f3Smrg AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | 3547ec681f3Smrg AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)) 3557ec681f3Smrg 3567ec681f3Smrg if (info->chip_class >= GFX10_3) { 3577ec681f3Smrg if (info->max_render_backends == 1) { 3587ec681f3Smrg ADD_MOD(AMD_FMT_MOD | common_dcc | 3597ec681f3Smrg AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | 3607ec681f3Smrg AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)) 3617ec681f3Smrg } 3627ec681f3Smrg 3637ec681f3Smrg ADD_MOD(AMD_FMT_MOD | common_dcc | 3647ec681f3Smrg AMD_FMT_MOD_SET(DCC_RETILE, 1) | 3657ec681f3Smrg AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | 3667ec681f3Smrg AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)) 3677ec681f3Smrg } 3687ec681f3Smrg 3697ec681f3Smrg if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14 || info->chip_class >= GFX10_3) { 3707ec681f3Smrg bool independent_128b = info->chip_class >= GFX10_3; 3717ec681f3Smrg 3727ec681f3Smrg if (info->max_render_backends == 1) { 3737ec681f3Smrg ADD_MOD(AMD_FMT_MOD | common_dcc | 3747ec681f3Smrg AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | 3757ec681f3Smrg AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, independent_128b) | 3767ec681f3Smrg AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)) 3777ec681f3Smrg } 3787ec681f3Smrg 3797ec681f3Smrg ADD_MOD(AMD_FMT_MOD | common_dcc | 3807ec681f3Smrg AMD_FMT_MOD_SET(DCC_RETILE, 1) | 3817ec681f3Smrg AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | 3827ec681f3Smrg AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, independent_128b) | 3837ec681f3Smrg AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)) 3847ec681f3Smrg } 3857ec681f3Smrg 3867ec681f3Smrg ADD_MOD(AMD_FMT_MOD | 3877ec681f3Smrg AMD_FMT_MOD_SET(TILE_VERSION, version) | 3887ec681f3Smrg AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | 3897ec681f3Smrg AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 3907ec681f3Smrg AMD_FMT_MOD_SET(PACKERS, pkrs)) 3917ec681f3Smrg 3927ec681f3Smrg ADD_MOD(AMD_FMT_MOD | 3937ec681f3Smrg AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | 3947ec681f3Smrg AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 3957ec681f3Smrg AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)) 3967ec681f3Smrg 3977ec681f3Smrg if (util_format_get_blocksizebits(format) != 32) { 3987ec681f3Smrg ADD_MOD(AMD_FMT_MOD | 3997ec681f3Smrg AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | 4007ec681f3Smrg AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); 4017ec681f3Smrg } 4027ec681f3Smrg 4037ec681f3Smrg ADD_MOD(AMD_FMT_MOD | 4047ec681f3Smrg AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | 4057ec681f3Smrg AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); 4067ec681f3Smrg 4077ec681f3Smrg ADD_MOD(DRM_FORMAT_MOD_LINEAR) 4087ec681f3Smrg break; 4097ec681f3Smrg } 4107ec681f3Smrg default: 4117ec681f3Smrg break; 4127ec681f3Smrg } 4137ec681f3Smrg 4147ec681f3Smrg#undef ADD_MOD 4157ec681f3Smrg 4167ec681f3Smrg if (!mods) { 4177ec681f3Smrg *mod_count = current_mod; 4187ec681f3Smrg return true; 4197ec681f3Smrg } 4207ec681f3Smrg 4217ec681f3Smrg bool complete = current_mod <= *mod_count; 4227ec681f3Smrg *mod_count = MIN2(*mod_count, current_mod); 4237ec681f3Smrg return complete; 42401e04c3fSmrg} 42501e04c3fSmrg 4267ec681f3Smrgstatic void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT *pInput) 42701e04c3fSmrg{ 4287ec681f3Smrg return malloc(pInput->sizeInBytes); 42901e04c3fSmrg} 43001e04c3fSmrg 4317ec681f3Smrgstatic ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT *pInput) 43201e04c3fSmrg{ 4337ec681f3Smrg free(pInput->pVirtAddr); 4347ec681f3Smrg return ADDR_OK; 43501e04c3fSmrg} 43601e04c3fSmrg 4377ec681f3Smrgstruct ac_addrlib *ac_addrlib_create(const struct radeon_info *info, 4387ec681f3Smrg uint64_t *max_alignment) 43901e04c3fSmrg{ 4407ec681f3Smrg ADDR_CREATE_INPUT addrCreateInput = {0}; 4417ec681f3Smrg ADDR_CREATE_OUTPUT addrCreateOutput = {0}; 4427ec681f3Smrg ADDR_REGISTER_VALUE regValue = {0}; 4437ec681f3Smrg ADDR_CREATE_FLAGS createFlags = {{0}}; 4447ec681f3Smrg ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0}; 4457ec681f3Smrg ADDR_E_RETURNCODE addrRet; 4467ec681f3Smrg 4477ec681f3Smrg addrCreateInput.size = sizeof(ADDR_CREATE_INPUT); 4487ec681f3Smrg addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT); 4497ec681f3Smrg 4507ec681f3Smrg regValue.gbAddrConfig = info->gb_addr_config; 4517ec681f3Smrg createFlags.value = 0; 4527ec681f3Smrg 4537ec681f3Smrg addrCreateInput.chipFamily = info->family_id; 4547ec681f3Smrg addrCreateInput.chipRevision = info->chip_external_rev; 4557ec681f3Smrg 4567ec681f3Smrg if (addrCreateInput.chipFamily == FAMILY_UNKNOWN) 4577ec681f3Smrg return NULL; 4587ec681f3Smrg 4597ec681f3Smrg if (addrCreateInput.chipFamily >= FAMILY_AI) { 4607ec681f3Smrg addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND; 4617ec681f3Smrg } else { 4627ec681f3Smrg regValue.noOfBanks = info->mc_arb_ramcfg & 0x3; 4637ec681f3Smrg regValue.noOfRanks = (info->mc_arb_ramcfg & 0x4) >> 2; 4647ec681f3Smrg 4657ec681f3Smrg regValue.backendDisables = info->enabled_rb_mask; 4667ec681f3Smrg regValue.pTileConfig = info->si_tile_mode_array; 4677ec681f3Smrg regValue.noOfEntries = ARRAY_SIZE(info->si_tile_mode_array); 4687ec681f3Smrg if (addrCreateInput.chipFamily == FAMILY_SI) { 4697ec681f3Smrg regValue.pMacroTileConfig = NULL; 4707ec681f3Smrg regValue.noOfMacroEntries = 0; 4717ec681f3Smrg } else { 4727ec681f3Smrg regValue.pMacroTileConfig = info->cik_macrotile_mode_array; 4737ec681f3Smrg regValue.noOfMacroEntries = ARRAY_SIZE(info->cik_macrotile_mode_array); 4747ec681f3Smrg } 4757ec681f3Smrg 4767ec681f3Smrg createFlags.useTileIndex = 1; 4777ec681f3Smrg createFlags.useHtileSliceAlign = 1; 4787ec681f3Smrg 4797ec681f3Smrg addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND; 4807ec681f3Smrg } 4817ec681f3Smrg 4827ec681f3Smrg addrCreateInput.callbacks.allocSysMem = allocSysMem; 4837ec681f3Smrg addrCreateInput.callbacks.freeSysMem = freeSysMem; 4847ec681f3Smrg addrCreateInput.callbacks.debugPrint = 0; 4857ec681f3Smrg addrCreateInput.createFlags = createFlags; 4867ec681f3Smrg addrCreateInput.regValue = regValue; 4877ec681f3Smrg 4887ec681f3Smrg addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput); 4897ec681f3Smrg if (addrRet != ADDR_OK) 4907ec681f3Smrg return NULL; 4917ec681f3Smrg 4927ec681f3Smrg if (max_alignment) { 4937ec681f3Smrg addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput); 4947ec681f3Smrg if (addrRet == ADDR_OK) { 4957ec681f3Smrg *max_alignment = addrGetMaxAlignmentsOutput.baseAlign; 4967ec681f3Smrg } 4977ec681f3Smrg } 4987ec681f3Smrg 4997ec681f3Smrg struct ac_addrlib *addrlib = calloc(1, sizeof(struct ac_addrlib)); 5007ec681f3Smrg if (!addrlib) { 5017ec681f3Smrg AddrDestroy(addrCreateOutput.hLib); 5027ec681f3Smrg return NULL; 5037ec681f3Smrg } 5047ec681f3Smrg 5057ec681f3Smrg addrlib->handle = addrCreateOutput.hLib; 5067ec681f3Smrg return addrlib; 50701e04c3fSmrg} 50801e04c3fSmrg 5097ec681f3Smrgvoid ac_addrlib_destroy(struct ac_addrlib *addrlib) 51001e04c3fSmrg{ 5117ec681f3Smrg AddrDestroy(addrlib->handle); 5127ec681f3Smrg free(addrlib); 51301e04c3fSmrg} 51401e04c3fSmrg 5157ec681f3Smrgvoid *ac_addrlib_get_handle(struct ac_addrlib *addrlib) 5167ec681f3Smrg{ 5177ec681f3Smrg return addrlib->handle; 5187ec681f3Smrg} 51901e04c3fSmrg 5207ec681f3Smrgstatic int surf_config_sanity(const struct ac_surf_config *config, unsigned flags) 52101e04c3fSmrg{ 5227ec681f3Smrg /* FMASK is allocated together with the color surface and can't be 5237ec681f3Smrg * allocated separately. 5247ec681f3Smrg */ 5257ec681f3Smrg assert(!(flags & RADEON_SURF_FMASK)); 5267ec681f3Smrg if (flags & RADEON_SURF_FMASK) 5277ec681f3Smrg return -EINVAL; 5287ec681f3Smrg 5297ec681f3Smrg /* all dimension must be at least 1 ! */ 5307ec681f3Smrg if (!config->info.width || !config->info.height || !config->info.depth || 5317ec681f3Smrg !config->info.array_size || !config->info.levels) 5327ec681f3Smrg return -EINVAL; 5337ec681f3Smrg 5347ec681f3Smrg switch (config->info.samples) { 5357ec681f3Smrg case 0: 5367ec681f3Smrg case 1: 5377ec681f3Smrg case 2: 5387ec681f3Smrg case 4: 5397ec681f3Smrg case 8: 5407ec681f3Smrg break; 5417ec681f3Smrg case 16: 5427ec681f3Smrg if (flags & RADEON_SURF_Z_OR_SBUFFER) 5437ec681f3Smrg return -EINVAL; 5447ec681f3Smrg break; 5457ec681f3Smrg default: 5467ec681f3Smrg return -EINVAL; 5477ec681f3Smrg } 5487ec681f3Smrg 5497ec681f3Smrg if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) { 5507ec681f3Smrg switch (config->info.storage_samples) { 5517ec681f3Smrg case 0: 5527ec681f3Smrg case 1: 5537ec681f3Smrg case 2: 5547ec681f3Smrg case 4: 5557ec681f3Smrg case 8: 5567ec681f3Smrg break; 5577ec681f3Smrg default: 5587ec681f3Smrg return -EINVAL; 5597ec681f3Smrg } 5607ec681f3Smrg } 5617ec681f3Smrg 5627ec681f3Smrg if (config->is_3d && config->info.array_size > 1) 5637ec681f3Smrg return -EINVAL; 5647ec681f3Smrg if (config->is_cube && config->info.depth > 1) 5657ec681f3Smrg return -EINVAL; 5667ec681f3Smrg 5677ec681f3Smrg return 0; 5687ec681f3Smrg} 56901e04c3fSmrg 5707ec681f3Smrgstatic int gfx6_compute_level(ADDR_HANDLE addrlib, const struct ac_surf_config *config, 5717ec681f3Smrg struct radeon_surf *surf, bool is_stencil, unsigned level, 5727ec681f3Smrg bool compressed, ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn, 5737ec681f3Smrg ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut, 5747ec681f3Smrg ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn, 5757ec681f3Smrg ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut, 5767ec681f3Smrg ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn, 5777ec681f3Smrg ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut) 5787ec681f3Smrg{ 5797ec681f3Smrg struct legacy_surf_level *surf_level; 5807ec681f3Smrg struct legacy_surf_dcc_level *dcc_level; 5817ec681f3Smrg ADDR_E_RETURNCODE ret; 5827ec681f3Smrg 5837ec681f3Smrg AddrSurfInfoIn->mipLevel = level; 5847ec681f3Smrg AddrSurfInfoIn->width = u_minify(config->info.width, level); 5857ec681f3Smrg AddrSurfInfoIn->height = u_minify(config->info.height, level); 5867ec681f3Smrg 5877ec681f3Smrg /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics, 5887ec681f3Smrg * because GFX9 needs linear alignment of 256 bytes. 5897ec681f3Smrg */ 5907ec681f3Smrg if (config->info.levels == 1 && AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED && 5917ec681f3Smrg AddrSurfInfoIn->bpp && util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp)) { 5927ec681f3Smrg unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8); 5937ec681f3Smrg 5947ec681f3Smrg AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment); 5957ec681f3Smrg } 5967ec681f3Smrg 5977ec681f3Smrg /* addrlib assumes the bytes/pixel is a divisor of 64, which is not 5987ec681f3Smrg * true for r32g32b32 formats. */ 5997ec681f3Smrg if (AddrSurfInfoIn->bpp == 96) { 6007ec681f3Smrg assert(config->info.levels == 1); 6017ec681f3Smrg assert(AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED); 6027ec681f3Smrg 6037ec681f3Smrg /* The least common multiple of 64 bytes and 12 bytes/pixel is 6047ec681f3Smrg * 192 bytes, or 16 pixels. */ 6057ec681f3Smrg AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, 16); 6067ec681f3Smrg } 6077ec681f3Smrg 6087ec681f3Smrg if (config->is_3d) 6097ec681f3Smrg AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level); 6107ec681f3Smrg else if (config->is_cube) 6117ec681f3Smrg AddrSurfInfoIn->numSlices = 6; 6127ec681f3Smrg else 6137ec681f3Smrg AddrSurfInfoIn->numSlices = config->info.array_size; 6147ec681f3Smrg 6157ec681f3Smrg if (level > 0) { 6167ec681f3Smrg /* Set the base level pitch. This is needed for calculation 6177ec681f3Smrg * of non-zero levels. */ 6187ec681f3Smrg if (is_stencil) 6197ec681f3Smrg AddrSurfInfoIn->basePitch = surf->u.legacy.zs.stencil_level[0].nblk_x; 6207ec681f3Smrg else 6217ec681f3Smrg AddrSurfInfoIn->basePitch = surf->u.legacy.level[0].nblk_x; 6227ec681f3Smrg 6237ec681f3Smrg /* Convert blocks to pixels for compressed formats. */ 6247ec681f3Smrg if (compressed) 6257ec681f3Smrg AddrSurfInfoIn->basePitch *= surf->blk_w; 6267ec681f3Smrg } 6277ec681f3Smrg 6287ec681f3Smrg ret = AddrComputeSurfaceInfo(addrlib, AddrSurfInfoIn, AddrSurfInfoOut); 6297ec681f3Smrg if (ret != ADDR_OK) { 6307ec681f3Smrg return ret; 6317ec681f3Smrg } 6327ec681f3Smrg 6337ec681f3Smrg surf_level = is_stencil ? &surf->u.legacy.zs.stencil_level[level] : &surf->u.legacy.level[level]; 6347ec681f3Smrg dcc_level = &surf->u.legacy.color.dcc_level[level]; 6357ec681f3Smrg surf_level->offset_256B = align64(surf->surf_size, AddrSurfInfoOut->baseAlign) / 256; 6367ec681f3Smrg surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4; 6377ec681f3Smrg surf_level->nblk_x = AddrSurfInfoOut->pitch; 6387ec681f3Smrg surf_level->nblk_y = AddrSurfInfoOut->height; 6397ec681f3Smrg 6407ec681f3Smrg switch (AddrSurfInfoOut->tileMode) { 6417ec681f3Smrg case ADDR_TM_LINEAR_ALIGNED: 6427ec681f3Smrg surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED; 6437ec681f3Smrg break; 6447ec681f3Smrg case ADDR_TM_1D_TILED_THIN1: 6457ec681f3Smrg case ADDR_TM_PRT_TILED_THIN1: 6467ec681f3Smrg surf_level->mode = RADEON_SURF_MODE_1D; 6477ec681f3Smrg break; 6487ec681f3Smrg case ADDR_TM_2D_TILED_THIN1: 6497ec681f3Smrg case ADDR_TM_PRT_2D_TILED_THIN1: 6507ec681f3Smrg surf_level->mode = RADEON_SURF_MODE_2D; 6517ec681f3Smrg break; 6527ec681f3Smrg default: 6537ec681f3Smrg assert(0); 6547ec681f3Smrg } 6557ec681f3Smrg 6567ec681f3Smrg if (is_stencil) 6577ec681f3Smrg surf->u.legacy.zs.stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex; 6587ec681f3Smrg else 6597ec681f3Smrg surf->u.legacy.tiling_index[level] = AddrSurfInfoOut->tileIndex; 6607ec681f3Smrg 6617ec681f3Smrg if (AddrSurfInfoIn->flags.prt) { 6627ec681f3Smrg if (level == 0) { 6637ec681f3Smrg surf->prt_tile_width = AddrSurfInfoOut->pitchAlign; 6647ec681f3Smrg surf->prt_tile_height = AddrSurfInfoOut->heightAlign; 6657ec681f3Smrg } 6667ec681f3Smrg if (surf_level->nblk_x >= surf->prt_tile_width && 6677ec681f3Smrg surf_level->nblk_y >= surf->prt_tile_height) { 6687ec681f3Smrg /* +1 because the current level is not in the miptail */ 6697ec681f3Smrg surf->first_mip_tail_level = level + 1; 6707ec681f3Smrg } 6717ec681f3Smrg } 6727ec681f3Smrg 6737ec681f3Smrg surf->surf_size = (uint64_t)surf_level->offset_256B * 256 + AddrSurfInfoOut->surfSize; 6747ec681f3Smrg 6757ec681f3Smrg /* Clear DCC fields at the beginning. */ 6767ec681f3Smrg if (!AddrSurfInfoIn->flags.depth && !AddrSurfInfoIn->flags.stencil) 6777ec681f3Smrg dcc_level->dcc_offset = 0; 6787ec681f3Smrg 6797ec681f3Smrg /* The previous level's flag tells us if we can use DCC for this level. */ 6807ec681f3Smrg if (AddrSurfInfoIn->flags.dccCompatible && (level == 0 || AddrDccOut->subLvlCompressible)) { 6817ec681f3Smrg bool prev_level_clearable = level == 0 || AddrDccOut->dccRamSizeAligned; 6827ec681f3Smrg 6837ec681f3Smrg AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize; 6847ec681f3Smrg AddrDccIn->tileMode = AddrSurfInfoOut->tileMode; 6857ec681f3Smrg AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo; 6867ec681f3Smrg AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex; 6877ec681f3Smrg AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; 6887ec681f3Smrg 6897ec681f3Smrg ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut); 6907ec681f3Smrg 6917ec681f3Smrg if (ret == ADDR_OK) { 6927ec681f3Smrg dcc_level->dcc_offset = surf->meta_size; 6937ec681f3Smrg surf->num_meta_levels = level + 1; 6947ec681f3Smrg surf->meta_size = dcc_level->dcc_offset + AddrDccOut->dccRamSize; 6957ec681f3Smrg surf->meta_alignment_log2 = MAX2(surf->meta_alignment_log2, util_logbase2(AddrDccOut->dccRamBaseAlign)); 6967ec681f3Smrg 6977ec681f3Smrg /* If the DCC size of a subresource (1 mip level or 1 slice) 6987ec681f3Smrg * is not aligned, the DCC memory layout is not contiguous for 6997ec681f3Smrg * that subresource, which means we can't use fast clear. 7007ec681f3Smrg * 7017ec681f3Smrg * We only do fast clears for whole mipmap levels. If we did 7027ec681f3Smrg * per-slice fast clears, the same restriction would apply. 7037ec681f3Smrg * (i.e. only compute the slice size and see if it's aligned) 7047ec681f3Smrg * 7057ec681f3Smrg * The last level can be non-contiguous and still be clearable 7067ec681f3Smrg * if it's interleaved with the next level that doesn't exist. 7077ec681f3Smrg */ 7087ec681f3Smrg if (AddrDccOut->dccRamSizeAligned || 7097ec681f3Smrg (prev_level_clearable && level == config->info.levels - 1)) 7107ec681f3Smrg dcc_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize; 7117ec681f3Smrg else 7127ec681f3Smrg dcc_level->dcc_fast_clear_size = 0; 7137ec681f3Smrg 7147ec681f3Smrg /* Compute the DCC slice size because addrlib doesn't 7157ec681f3Smrg * provide this info. As DCC memory is linear (each 7167ec681f3Smrg * slice is the same size) it's easy to compute. 7177ec681f3Smrg */ 7187ec681f3Smrg surf->meta_slice_size = AddrDccOut->dccRamSize / config->info.array_size; 7197ec681f3Smrg 7207ec681f3Smrg /* For arrays, we have to compute the DCC info again 7217ec681f3Smrg * with one slice size to get a correct fast clear 7227ec681f3Smrg * size. 7237ec681f3Smrg */ 7247ec681f3Smrg if (config->info.array_size > 1) { 7257ec681f3Smrg AddrDccIn->colorSurfSize = AddrSurfInfoOut->sliceSize; 7267ec681f3Smrg AddrDccIn->tileMode = AddrSurfInfoOut->tileMode; 7277ec681f3Smrg AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo; 7287ec681f3Smrg AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex; 7297ec681f3Smrg AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; 7307ec681f3Smrg 7317ec681f3Smrg ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut); 7327ec681f3Smrg if (ret == ADDR_OK) { 7337ec681f3Smrg /* If the DCC memory isn't properly 7347ec681f3Smrg * aligned, the data are interleaved 7357ec681f3Smrg * accross slices. 7367ec681f3Smrg */ 7377ec681f3Smrg if (AddrDccOut->dccRamSizeAligned) 7387ec681f3Smrg dcc_level->dcc_slice_fast_clear_size = AddrDccOut->dccFastClearSize; 7397ec681f3Smrg else 7407ec681f3Smrg dcc_level->dcc_slice_fast_clear_size = 0; 7417ec681f3Smrg } 7427ec681f3Smrg 7437ec681f3Smrg if (surf->flags & RADEON_SURF_CONTIGUOUS_DCC_LAYERS && 7447ec681f3Smrg surf->meta_slice_size != dcc_level->dcc_slice_fast_clear_size) { 7457ec681f3Smrg surf->meta_size = 0; 7467ec681f3Smrg surf->num_meta_levels = 0; 7477ec681f3Smrg AddrDccOut->subLvlCompressible = false; 7487ec681f3Smrg } 7497ec681f3Smrg } else { 7507ec681f3Smrg dcc_level->dcc_slice_fast_clear_size = dcc_level->dcc_fast_clear_size; 7517ec681f3Smrg } 7527ec681f3Smrg } 7537ec681f3Smrg } 7547ec681f3Smrg 7557ec681f3Smrg /* HTILE. */ 7567ec681f3Smrg if (!is_stencil && AddrSurfInfoIn->flags.depth && surf_level->mode == RADEON_SURF_MODE_2D && 7577ec681f3Smrg level == 0 && !(surf->flags & RADEON_SURF_NO_HTILE)) { 7587ec681f3Smrg AddrHtileIn->flags.tcCompatible = AddrSurfInfoOut->tcCompatible; 7597ec681f3Smrg AddrHtileIn->pitch = AddrSurfInfoOut->pitch; 7607ec681f3Smrg AddrHtileIn->height = AddrSurfInfoOut->height; 7617ec681f3Smrg AddrHtileIn->numSlices = AddrSurfInfoOut->depth; 7627ec681f3Smrg AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8; 7637ec681f3Smrg AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8; 7647ec681f3Smrg AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo; 7657ec681f3Smrg AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex; 7667ec681f3Smrg AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; 7677ec681f3Smrg 7687ec681f3Smrg ret = AddrComputeHtileInfo(addrlib, AddrHtileIn, AddrHtileOut); 7697ec681f3Smrg 7707ec681f3Smrg if (ret == ADDR_OK) { 7717ec681f3Smrg surf->meta_size = AddrHtileOut->htileBytes; 7727ec681f3Smrg surf->meta_slice_size = AddrHtileOut->sliceSize; 7737ec681f3Smrg surf->meta_alignment_log2 = util_logbase2(AddrHtileOut->baseAlign); 7747ec681f3Smrg surf->meta_pitch = AddrHtileOut->pitch; 7757ec681f3Smrg surf->num_meta_levels = level + 1; 7767ec681f3Smrg } 7777ec681f3Smrg } 7787ec681f3Smrg 7797ec681f3Smrg return 0; 7807ec681f3Smrg} 7817ec681f3Smrg 7827ec681f3Smrgstatic void gfx6_set_micro_tile_mode(struct radeon_surf *surf, const struct radeon_info *info) 7837ec681f3Smrg{ 7847ec681f3Smrg uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]]; 7857ec681f3Smrg 7867ec681f3Smrg if (info->chip_class >= GFX7) 7877ec681f3Smrg surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode); 7887ec681f3Smrg else 7897ec681f3Smrg surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode); 79001e04c3fSmrg} 79101e04c3fSmrg 79201e04c3fSmrgstatic unsigned cik_get_macro_tile_index(struct radeon_surf *surf) 79301e04c3fSmrg{ 7947ec681f3Smrg unsigned index, tileb; 79501e04c3fSmrg 7967ec681f3Smrg tileb = 8 * 8 * surf->bpe; 7977ec681f3Smrg tileb = MIN2(surf->u.legacy.tile_split, tileb); 79801e04c3fSmrg 7997ec681f3Smrg for (index = 0; tileb > 64; index++) 8007ec681f3Smrg tileb >>= 1; 80101e04c3fSmrg 8027ec681f3Smrg assert(index < 16); 8037ec681f3Smrg return index; 80401e04c3fSmrg} 80501e04c3fSmrg 8067ec681f3Smrgstatic bool get_display_flag(const struct ac_surf_config *config, const struct radeon_surf *surf) 80701e04c3fSmrg{ 8087ec681f3Smrg unsigned num_channels = config->info.num_channels; 8097ec681f3Smrg unsigned bpe = surf->bpe; 8107ec681f3Smrg 8117ec681f3Smrg /* With modifiers the kernel is in charge of whether it is displayable. 8127ec681f3Smrg * We need to ensure at least 32 pixels pitch alignment, but this is 8137ec681f3Smrg * always the case when the blocksize >= 4K. 8147ec681f3Smrg */ 8157ec681f3Smrg if (surf->modifier != DRM_FORMAT_MOD_INVALID) 8167ec681f3Smrg return false; 8177ec681f3Smrg 8187ec681f3Smrg if (!config->is_3d && !config->is_cube && !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 8197ec681f3Smrg surf->flags & RADEON_SURF_SCANOUT && config->info.samples <= 1 && surf->blk_w <= 2 && 8207ec681f3Smrg surf->blk_h == 1) { 8217ec681f3Smrg /* subsampled */ 8227ec681f3Smrg if (surf->blk_w == 2 && surf->blk_h == 1) 8237ec681f3Smrg return true; 8247ec681f3Smrg 8257ec681f3Smrg if (/* RGBA8 or RGBA16F */ 8267ec681f3Smrg (bpe >= 4 && bpe <= 8 && num_channels == 4) || 8277ec681f3Smrg /* R5G6B5 or R5G5B5A1 */ 8287ec681f3Smrg (bpe == 2 && num_channels >= 3) || 8297ec681f3Smrg /* C8 palette */ 8307ec681f3Smrg (bpe == 1 && num_channels == 1)) 8317ec681f3Smrg return true; 8327ec681f3Smrg } 8337ec681f3Smrg return false; 83401e04c3fSmrg} 83501e04c3fSmrg 83601e04c3fSmrg/** 83701e04c3fSmrg * This must be called after the first level is computed. 83801e04c3fSmrg * 83901e04c3fSmrg * Copy surface-global settings like pipe/bank config from level 0 surface 84001e04c3fSmrg * computation, and compute tile swizzle. 84101e04c3fSmrg */ 8427ec681f3Smrgstatic int gfx6_surface_settings(ADDR_HANDLE addrlib, const struct radeon_info *info, 8437ec681f3Smrg const struct ac_surf_config *config, 8447ec681f3Smrg ADDR_COMPUTE_SURFACE_INFO_OUTPUT *csio, struct radeon_surf *surf) 84501e04c3fSmrg{ 8467ec681f3Smrg surf->surf_alignment_log2 = util_logbase2(csio->baseAlign); 8477ec681f3Smrg surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1; 8487ec681f3Smrg gfx6_set_micro_tile_mode(surf, info); 8497ec681f3Smrg 8507ec681f3Smrg /* For 2D modes only. */ 8517ec681f3Smrg if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) { 8527ec681f3Smrg surf->u.legacy.bankw = csio->pTileInfo->bankWidth; 8537ec681f3Smrg surf->u.legacy.bankh = csio->pTileInfo->bankHeight; 8547ec681f3Smrg surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio; 8557ec681f3Smrg surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes; 8567ec681f3Smrg surf->u.legacy.num_banks = csio->pTileInfo->banks; 8577ec681f3Smrg surf->u.legacy.macro_tile_index = csio->macroModeIndex; 8587ec681f3Smrg } else { 8597ec681f3Smrg surf->u.legacy.macro_tile_index = 0; 8607ec681f3Smrg } 8617ec681f3Smrg 8627ec681f3Smrg /* Compute tile swizzle. */ 8637ec681f3Smrg /* TODO: fix tile swizzle with mipmapping for GFX6 */ 8647ec681f3Smrg if ((info->chip_class >= GFX7 || config->info.levels == 1) && config->info.surf_index && 8657ec681f3Smrg surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D && 8667ec681f3Smrg !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) && 8677ec681f3Smrg !get_display_flag(config, surf)) { 8687ec681f3Smrg ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0}; 8697ec681f3Smrg ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0}; 8707ec681f3Smrg 8717ec681f3Smrg AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT); 8727ec681f3Smrg AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT); 8737ec681f3Smrg 8747ec681f3Smrg AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1; 8757ec681f3Smrg AddrBaseSwizzleIn.tileIndex = csio->tileIndex; 8767ec681f3Smrg AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex; 8777ec681f3Smrg AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo; 8787ec681f3Smrg AddrBaseSwizzleIn.tileMode = csio->tileMode; 8797ec681f3Smrg 8807ec681f3Smrg int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut); 8817ec681f3Smrg if (r != ADDR_OK) 8827ec681f3Smrg return r; 8837ec681f3Smrg 8847ec681f3Smrg assert(AddrBaseSwizzleOut.tileSwizzle <= 8857ec681f3Smrg u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 8867ec681f3Smrg surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle; 8877ec681f3Smrg } 8887ec681f3Smrg return 0; 88901e04c3fSmrg} 89001e04c3fSmrg 8917ec681f3Smrgstatic void ac_compute_cmask(const struct radeon_info *info, const struct ac_surf_config *config, 8927ec681f3Smrg struct radeon_surf *surf) 89301e04c3fSmrg{ 8947ec681f3Smrg unsigned pipe_interleave_bytes = info->pipe_interleave_bytes; 8957ec681f3Smrg unsigned num_pipes = info->num_tile_pipes; 8967ec681f3Smrg unsigned cl_width, cl_height; 8977ec681f3Smrg 8987ec681f3Smrg if (surf->flags & RADEON_SURF_Z_OR_SBUFFER || surf->is_linear || 8997ec681f3Smrg (config->info.samples >= 2 && !surf->fmask_size)) 9007ec681f3Smrg return; 9017ec681f3Smrg 9027ec681f3Smrg assert(info->chip_class <= GFX8); 9037ec681f3Smrg 9047ec681f3Smrg switch (num_pipes) { 9057ec681f3Smrg case 2: 9067ec681f3Smrg cl_width = 32; 9077ec681f3Smrg cl_height = 16; 9087ec681f3Smrg break; 9097ec681f3Smrg case 4: 9107ec681f3Smrg cl_width = 32; 9117ec681f3Smrg cl_height = 32; 9127ec681f3Smrg break; 9137ec681f3Smrg case 8: 9147ec681f3Smrg cl_width = 64; 9157ec681f3Smrg cl_height = 32; 9167ec681f3Smrg break; 9177ec681f3Smrg case 16: /* Hawaii */ 9187ec681f3Smrg cl_width = 64; 9197ec681f3Smrg cl_height = 64; 9207ec681f3Smrg break; 9217ec681f3Smrg default: 9227ec681f3Smrg assert(0); 9237ec681f3Smrg return; 9247ec681f3Smrg } 9257ec681f3Smrg 9267ec681f3Smrg unsigned base_align = num_pipes * pipe_interleave_bytes; 9277ec681f3Smrg 9287ec681f3Smrg unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width * 8); 9297ec681f3Smrg unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height * 8); 9307ec681f3Smrg unsigned slice_elements = (width * height) / (8 * 8); 9317ec681f3Smrg 9327ec681f3Smrg /* Each element of CMASK is a nibble. */ 9337ec681f3Smrg unsigned slice_bytes = slice_elements / 2; 9347ec681f3Smrg 9357ec681f3Smrg surf->u.legacy.color.cmask_slice_tile_max = (width * height) / (128 * 128); 9367ec681f3Smrg if (surf->u.legacy.color.cmask_slice_tile_max) 9377ec681f3Smrg surf->u.legacy.color.cmask_slice_tile_max -= 1; 9387ec681f3Smrg 9397ec681f3Smrg unsigned num_layers; 9407ec681f3Smrg if (config->is_3d) 9417ec681f3Smrg num_layers = config->info.depth; 9427ec681f3Smrg else if (config->is_cube) 9437ec681f3Smrg num_layers = 6; 9447ec681f3Smrg else 9457ec681f3Smrg num_layers = config->info.array_size; 9467ec681f3Smrg 9477ec681f3Smrg surf->cmask_alignment_log2 = util_logbase2(MAX2(256, base_align)); 9487ec681f3Smrg surf->cmask_slice_size = align(slice_bytes, base_align); 9497ec681f3Smrg surf->cmask_size = surf->cmask_slice_size * num_layers; 95001e04c3fSmrg} 95101e04c3fSmrg 95201e04c3fSmrg/** 95301e04c3fSmrg * Fill in the tiling information in \p surf based on the given surface config. 95401e04c3fSmrg * 95501e04c3fSmrg * The following fields of \p surf must be initialized by the caller: 95601e04c3fSmrg * blk_w, blk_h, bpe, flags. 95701e04c3fSmrg */ 9587ec681f3Smrgstatic int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info, 9597ec681f3Smrg const struct ac_surf_config *config, enum radeon_surf_mode mode, 9607ec681f3Smrg struct radeon_surf *surf) 96101e04c3fSmrg{ 9627ec681f3Smrg unsigned level; 9637ec681f3Smrg bool compressed; 9647ec681f3Smrg ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; 9657ec681f3Smrg ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0}; 9667ec681f3Smrg ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0}; 9677ec681f3Smrg ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0}; 9687ec681f3Smrg ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0}; 9697ec681f3Smrg ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0}; 9707ec681f3Smrg ADDR_TILEINFO AddrTileInfoIn = {0}; 9717ec681f3Smrg ADDR_TILEINFO AddrTileInfoOut = {0}; 9727ec681f3Smrg int r; 9737ec681f3Smrg 9747ec681f3Smrg AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT); 9757ec681f3Smrg AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT); 9767ec681f3Smrg AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT); 9777ec681f3Smrg AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT); 9787ec681f3Smrg AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT); 9797ec681f3Smrg AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT); 9807ec681f3Smrg AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut; 9817ec681f3Smrg 9827ec681f3Smrg compressed = surf->blk_w == 4 && surf->blk_h == 4; 9837ec681f3Smrg 9847ec681f3Smrg /* MSAA requires 2D tiling. */ 9857ec681f3Smrg if (config->info.samples > 1) 9867ec681f3Smrg mode = RADEON_SURF_MODE_2D; 9877ec681f3Smrg 9887ec681f3Smrg /* DB doesn't support linear layouts. */ 9897ec681f3Smrg if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) && mode < RADEON_SURF_MODE_1D) 9907ec681f3Smrg mode = RADEON_SURF_MODE_1D; 9917ec681f3Smrg 9927ec681f3Smrg /* Set the requested tiling mode. */ 9937ec681f3Smrg switch (mode) { 9947ec681f3Smrg case RADEON_SURF_MODE_LINEAR_ALIGNED: 9957ec681f3Smrg AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED; 9967ec681f3Smrg break; 9977ec681f3Smrg case RADEON_SURF_MODE_1D: 9987ec681f3Smrg if (surf->flags & RADEON_SURF_PRT) 9997ec681f3Smrg AddrSurfInfoIn.tileMode = ADDR_TM_PRT_TILED_THIN1; 10007ec681f3Smrg else 10017ec681f3Smrg AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1; 10027ec681f3Smrg break; 10037ec681f3Smrg case RADEON_SURF_MODE_2D: 10047ec681f3Smrg if (surf->flags & RADEON_SURF_PRT) 10057ec681f3Smrg AddrSurfInfoIn.tileMode = ADDR_TM_PRT_2D_TILED_THIN1; 10067ec681f3Smrg else 10077ec681f3Smrg AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1; 10087ec681f3Smrg break; 10097ec681f3Smrg default: 10107ec681f3Smrg assert(0); 10117ec681f3Smrg } 10127ec681f3Smrg 10137ec681f3Smrg /* The format must be set correctly for the allocation of compressed 10147ec681f3Smrg * textures to work. In other cases, setting the bpp is sufficient. 10157ec681f3Smrg */ 10167ec681f3Smrg if (compressed) { 10177ec681f3Smrg switch (surf->bpe) { 10187ec681f3Smrg case 8: 10197ec681f3Smrg AddrSurfInfoIn.format = ADDR_FMT_BC1; 10207ec681f3Smrg break; 10217ec681f3Smrg case 16: 10227ec681f3Smrg AddrSurfInfoIn.format = ADDR_FMT_BC3; 10237ec681f3Smrg break; 10247ec681f3Smrg default: 10257ec681f3Smrg assert(0); 10267ec681f3Smrg } 10277ec681f3Smrg } else { 10287ec681f3Smrg AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8; 10297ec681f3Smrg } 10307ec681f3Smrg 10317ec681f3Smrg AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples); 10327ec681f3Smrg AddrSurfInfoIn.tileIndex = -1; 10337ec681f3Smrg 10347ec681f3Smrg if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) { 10357ec681f3Smrg AddrDccIn.numSamples = AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples); 10367ec681f3Smrg } 10377ec681f3Smrg 10387ec681f3Smrg /* Set the micro tile type. */ 10397ec681f3Smrg if (surf->flags & RADEON_SURF_SCANOUT) 10407ec681f3Smrg AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE; 10417ec681f3Smrg else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) 10427ec681f3Smrg AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER; 10437ec681f3Smrg else 10447ec681f3Smrg AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE; 10457ec681f3Smrg 10467ec681f3Smrg AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 10477ec681f3Smrg AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0; 10487ec681f3Smrg AddrSurfInfoIn.flags.cube = config->is_cube; 10497ec681f3Smrg AddrSurfInfoIn.flags.display = get_display_flag(config, surf); 10507ec681f3Smrg AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1; 10517ec681f3Smrg AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0; 10527ec681f3Smrg AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0; 10537ec681f3Smrg 10547ec681f3Smrg /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been 10557ec681f3Smrg * requested, because TC-compatible HTILE requires 2D tiling. 10567ec681f3Smrg */ 10577ec681f3Smrg AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible && 10587ec681f3Smrg !AddrSurfInfoIn.flags.fmask && config->info.samples <= 1 && 10597ec681f3Smrg !(surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE); 10607ec681f3Smrg 10617ec681f3Smrg /* DCC notes: 10627ec681f3Smrg * - If we add MSAA support, keep in mind that CB can't decompress 8bpp 10637ec681f3Smrg * with samples >= 4. 10647ec681f3Smrg * - Mipmapped array textures have low performance (discovered by a closed 10657ec681f3Smrg * driver team). 10667ec681f3Smrg */ 10677ec681f3Smrg AddrSurfInfoIn.flags.dccCompatible = 10687ec681f3Smrg info->chip_class >= GFX8 && info->has_graphics && /* disable DCC on compute-only chips */ 10697ec681f3Smrg !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && !(surf->flags & RADEON_SURF_DISABLE_DCC) && 10707ec681f3Smrg !compressed && 10717ec681f3Smrg ((config->info.array_size == 1 && config->info.depth == 1) || config->info.levels == 1); 10727ec681f3Smrg 10737ec681f3Smrg AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0; 10747ec681f3Smrg AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 10757ec681f3Smrg 10767ec681f3Smrg /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit) 10777ec681f3Smrg * for Z and stencil. This can cause a number of problems which we work 10787ec681f3Smrg * around here: 10797ec681f3Smrg * 10807ec681f3Smrg * - a depth part that is incompatible with mipmapped texturing 10817ec681f3Smrg * - at least on Stoney, entirely incompatible Z/S aspects (e.g. 10827ec681f3Smrg * incorrect tiling applied to the stencil part, stencil buffer 10837ec681f3Smrg * memory accesses that go out of bounds) even without mipmapping 10847ec681f3Smrg * 10857ec681f3Smrg * Some piglit tests that are prone to different types of related 10867ec681f3Smrg * failures: 10877ec681f3Smrg * ./bin/ext_framebuffer_multisample-upsample 2 stencil 10887ec681f3Smrg * ./bin/framebuffer-blit-levels {draw,read} stencil 10897ec681f3Smrg * ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample} 10907ec681f3Smrg * ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw} 10917ec681f3Smrg * ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8 10927ec681f3Smrg */ 10937ec681f3Smrg int stencil_tile_idx = -1; 10947ec681f3Smrg 10957ec681f3Smrg if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil && 10967ec681f3Smrg (config->info.levels > 1 || info->family == CHIP_STONEY)) { 10977ec681f3Smrg /* Compute stencilTileIdx that is compatible with the (depth) 10987ec681f3Smrg * tileIdx. This degrades the depth surface if necessary to 10997ec681f3Smrg * ensure that a matching stencilTileIdx exists. */ 11007ec681f3Smrg AddrSurfInfoIn.flags.matchStencilTileCfg = 1; 11017ec681f3Smrg 11027ec681f3Smrg /* Keep the depth mip-tail compatible with texturing. */ 11037ec681f3Smrg AddrSurfInfoIn.flags.noStencil = 1; 11047ec681f3Smrg } 11057ec681f3Smrg 11067ec681f3Smrg /* Set preferred macrotile parameters. This is usually required 11077ec681f3Smrg * for shared resources. This is for 2D tiling only. */ 11087ec681f3Smrg if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 11097ec681f3Smrg AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 && surf->u.legacy.bankw && 11107ec681f3Smrg surf->u.legacy.bankh && surf->u.legacy.mtilea && surf->u.legacy.tile_split) { 11117ec681f3Smrg /* If any of these parameters are incorrect, the calculation 11127ec681f3Smrg * will fail. */ 11137ec681f3Smrg AddrTileInfoIn.banks = surf->u.legacy.num_banks; 11147ec681f3Smrg AddrTileInfoIn.bankWidth = surf->u.legacy.bankw; 11157ec681f3Smrg AddrTileInfoIn.bankHeight = surf->u.legacy.bankh; 11167ec681f3Smrg AddrTileInfoIn.macroAspectRatio = surf->u.legacy.mtilea; 11177ec681f3Smrg AddrTileInfoIn.tileSplitBytes = surf->u.legacy.tile_split; 11187ec681f3Smrg AddrTileInfoIn.pipeConfig = surf->u.legacy.pipe_config + 1; /* +1 compared to GB_TILE_MODE */ 11197ec681f3Smrg AddrSurfInfoIn.flags.opt4Space = 0; 11207ec681f3Smrg AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn; 11217ec681f3Smrg 11227ec681f3Smrg /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set 11237ec681f3Smrg * the tile index, because we are expected to know it if 11247ec681f3Smrg * we know the other parameters. 11257ec681f3Smrg * 11267ec681f3Smrg * This is something that can easily be fixed in Addrlib. 11277ec681f3Smrg * For now, just figure it out here. 11287ec681f3Smrg * Note that only 2D_TILE_THIN1 is handled here. 11297ec681f3Smrg */ 11307ec681f3Smrg assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 11317ec681f3Smrg assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1); 11327ec681f3Smrg 11337ec681f3Smrg if (info->chip_class == GFX6) { 11347ec681f3Smrg if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) { 11357ec681f3Smrg if (surf->bpe == 2) 11367ec681f3Smrg AddrSurfInfoIn.tileIndex = 11; /* 16bpp */ 11377ec681f3Smrg else 11387ec681f3Smrg AddrSurfInfoIn.tileIndex = 12; /* 32bpp */ 11397ec681f3Smrg } else { 11407ec681f3Smrg if (surf->bpe == 1) 11417ec681f3Smrg AddrSurfInfoIn.tileIndex = 14; /* 8bpp */ 11427ec681f3Smrg else if (surf->bpe == 2) 11437ec681f3Smrg AddrSurfInfoIn.tileIndex = 15; /* 16bpp */ 11447ec681f3Smrg else if (surf->bpe == 4) 11457ec681f3Smrg AddrSurfInfoIn.tileIndex = 16; /* 32bpp */ 11467ec681f3Smrg else 11477ec681f3Smrg AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */ 11487ec681f3Smrg } 11497ec681f3Smrg } else { 11507ec681f3Smrg /* GFX7 - GFX8 */ 11517ec681f3Smrg if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) 11527ec681f3Smrg AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */ 11537ec681f3Smrg else 11547ec681f3Smrg AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */ 11557ec681f3Smrg 11567ec681f3Smrg /* Addrlib doesn't set this if tileIndex is forced like above. */ 11577ec681f3Smrg AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf); 11587ec681f3Smrg } 11597ec681f3Smrg } 11607ec681f3Smrg 11617ec681f3Smrg surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); 11627ec681f3Smrg surf->num_meta_levels = 0; 11637ec681f3Smrg surf->surf_size = 0; 11647ec681f3Smrg surf->meta_size = 0; 11657ec681f3Smrg surf->meta_slice_size = 0; 11667ec681f3Smrg surf->meta_alignment_log2 = 0; 11677ec681f3Smrg 11687ec681f3Smrg const bool only_stencil = 11697ec681f3Smrg (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER); 11707ec681f3Smrg 11717ec681f3Smrg /* Calculate texture layout information. */ 11727ec681f3Smrg if (!only_stencil) { 11737ec681f3Smrg for (level = 0; level < config->info.levels; level++) { 11747ec681f3Smrg r = gfx6_compute_level(addrlib, config, surf, false, level, compressed, &AddrSurfInfoIn, 11757ec681f3Smrg &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, &AddrHtileIn, 11767ec681f3Smrg &AddrHtileOut); 11777ec681f3Smrg if (r) 11787ec681f3Smrg return r; 11797ec681f3Smrg 11807ec681f3Smrg if (level > 0) 11817ec681f3Smrg continue; 11827ec681f3Smrg 11837ec681f3Smrg if (!AddrSurfInfoOut.tcCompatible) { 11847ec681f3Smrg AddrSurfInfoIn.flags.tcCompatible = 0; 11857ec681f3Smrg surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE; 11867ec681f3Smrg } 11877ec681f3Smrg 11887ec681f3Smrg if (AddrSurfInfoIn.flags.matchStencilTileCfg) { 11897ec681f3Smrg AddrSurfInfoIn.flags.matchStencilTileCfg = 0; 11907ec681f3Smrg AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex; 11917ec681f3Smrg stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx; 11927ec681f3Smrg 11937ec681f3Smrg assert(stencil_tile_idx >= 0); 11947ec681f3Smrg } 11957ec681f3Smrg 11967ec681f3Smrg r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf); 11977ec681f3Smrg if (r) 11987ec681f3Smrg return r; 11997ec681f3Smrg } 12007ec681f3Smrg } 12017ec681f3Smrg 12027ec681f3Smrg /* Calculate texture layout information for stencil. */ 12037ec681f3Smrg if (surf->flags & RADEON_SURF_SBUFFER) { 12047ec681f3Smrg AddrSurfInfoIn.tileIndex = stencil_tile_idx; 12057ec681f3Smrg AddrSurfInfoIn.bpp = 8; 12067ec681f3Smrg AddrSurfInfoIn.flags.depth = 0; 12077ec681f3Smrg AddrSurfInfoIn.flags.stencil = 1; 12087ec681f3Smrg AddrSurfInfoIn.flags.tcCompatible = 0; 12097ec681f3Smrg /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */ 12107ec681f3Smrg AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split; 12117ec681f3Smrg 12127ec681f3Smrg for (level = 0; level < config->info.levels; level++) { 12137ec681f3Smrg r = gfx6_compute_level(addrlib, config, surf, true, level, compressed, &AddrSurfInfoIn, 12147ec681f3Smrg &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, NULL, NULL); 12157ec681f3Smrg if (r) 12167ec681f3Smrg return r; 12177ec681f3Smrg 12187ec681f3Smrg /* DB uses the depth pitch for both stencil and depth. */ 12197ec681f3Smrg if (!only_stencil) { 12207ec681f3Smrg if (surf->u.legacy.zs.stencil_level[level].nblk_x != surf->u.legacy.level[level].nblk_x) 12217ec681f3Smrg surf->u.legacy.stencil_adjusted = true; 12227ec681f3Smrg } else { 12237ec681f3Smrg surf->u.legacy.level[level].nblk_x = surf->u.legacy.zs.stencil_level[level].nblk_x; 12247ec681f3Smrg } 12257ec681f3Smrg 12267ec681f3Smrg if (level == 0) { 12277ec681f3Smrg if (only_stencil) { 12287ec681f3Smrg r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf); 12297ec681f3Smrg if (r) 12307ec681f3Smrg return r; 12317ec681f3Smrg } 12327ec681f3Smrg 12337ec681f3Smrg /* For 2D modes only. */ 12347ec681f3Smrg if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) { 12357ec681f3Smrg surf->u.legacy.stencil_tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes; 12367ec681f3Smrg } 12377ec681f3Smrg } 12387ec681f3Smrg } 12397ec681f3Smrg } 12407ec681f3Smrg 12417ec681f3Smrg /* Compute FMASK. */ 12427ec681f3Smrg if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color && info->has_graphics && 12437ec681f3Smrg !(surf->flags & RADEON_SURF_NO_FMASK)) { 12447ec681f3Smrg ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0}; 12457ec681f3Smrg ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0}; 12467ec681f3Smrg ADDR_TILEINFO fmask_tile_info = {0}; 12477ec681f3Smrg 12487ec681f3Smrg fin.size = sizeof(fin); 12497ec681f3Smrg fout.size = sizeof(fout); 12507ec681f3Smrg 12517ec681f3Smrg fin.tileMode = AddrSurfInfoOut.tileMode; 12527ec681f3Smrg fin.pitch = AddrSurfInfoOut.pitch; 12537ec681f3Smrg fin.height = config->info.height; 12547ec681f3Smrg fin.numSlices = AddrSurfInfoIn.numSlices; 12557ec681f3Smrg fin.numSamples = AddrSurfInfoIn.numSamples; 12567ec681f3Smrg fin.numFrags = AddrSurfInfoIn.numFrags; 12577ec681f3Smrg fin.tileIndex = -1; 12587ec681f3Smrg fout.pTileInfo = &fmask_tile_info; 12597ec681f3Smrg 12607ec681f3Smrg r = AddrComputeFmaskInfo(addrlib, &fin, &fout); 12617ec681f3Smrg if (r) 12627ec681f3Smrg return r; 12637ec681f3Smrg 12647ec681f3Smrg surf->fmask_size = fout.fmaskBytes; 12657ec681f3Smrg surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign); 12667ec681f3Smrg surf->fmask_slice_size = fout.sliceSize; 12677ec681f3Smrg surf->fmask_tile_swizzle = 0; 12687ec681f3Smrg 12697ec681f3Smrg surf->u.legacy.color.fmask.slice_tile_max = (fout.pitch * fout.height) / 64; 12707ec681f3Smrg if (surf->u.legacy.color.fmask.slice_tile_max) 12717ec681f3Smrg surf->u.legacy.color.fmask.slice_tile_max -= 1; 12727ec681f3Smrg 12737ec681f3Smrg surf->u.legacy.color.fmask.tiling_index = fout.tileIndex; 12747ec681f3Smrg surf->u.legacy.color.fmask.bankh = fout.pTileInfo->bankHeight; 12757ec681f3Smrg surf->u.legacy.color.fmask.pitch_in_pixels = fout.pitch; 12767ec681f3Smrg 12777ec681f3Smrg /* Compute tile swizzle for FMASK. */ 12787ec681f3Smrg if (config->info.fmask_surf_index && !(surf->flags & RADEON_SURF_SHAREABLE)) { 12797ec681f3Smrg ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0}; 12807ec681f3Smrg ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0}; 12817ec681f3Smrg 12827ec681f3Smrg xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT); 12837ec681f3Smrg xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT); 12847ec681f3Smrg 12857ec681f3Smrg /* This counter starts from 1 instead of 0. */ 12867ec681f3Smrg xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index); 12877ec681f3Smrg xin.tileIndex = fout.tileIndex; 12887ec681f3Smrg xin.macroModeIndex = fout.macroModeIndex; 12897ec681f3Smrg xin.pTileInfo = fout.pTileInfo; 12907ec681f3Smrg xin.tileMode = fin.tileMode; 12917ec681f3Smrg 12927ec681f3Smrg int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout); 12937ec681f3Smrg if (r != ADDR_OK) 12947ec681f3Smrg return r; 12957ec681f3Smrg 12967ec681f3Smrg assert(xout.tileSwizzle <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 12977ec681f3Smrg surf->fmask_tile_swizzle = xout.tileSwizzle; 12987ec681f3Smrg } 12997ec681f3Smrg } 13007ec681f3Smrg 13017ec681f3Smrg /* Recalculate the whole DCC miptree size including disabled levels. 13027ec681f3Smrg * This is what addrlib does, but calling addrlib would be a lot more 13037ec681f3Smrg * complicated. 13047ec681f3Smrg */ 13057ec681f3Smrg if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_size && config->info.levels > 1) { 13067ec681f3Smrg /* The smallest miplevels that are never compressed by DCC 13077ec681f3Smrg * still read the DCC buffer via TC if the base level uses DCC, 13087ec681f3Smrg * and for some reason the DCC buffer needs to be larger if 13097ec681f3Smrg * the miptree uses non-zero tile_swizzle. Otherwise there are 13107ec681f3Smrg * VM faults. 13117ec681f3Smrg * 13127ec681f3Smrg * "dcc_alignment * 4" was determined by trial and error. 13137ec681f3Smrg */ 13147ec681f3Smrg surf->meta_size = align64(surf->surf_size >> 8, (1 << surf->meta_alignment_log2) * 4); 13157ec681f3Smrg } 13167ec681f3Smrg 13177ec681f3Smrg /* Make sure HTILE covers the whole miptree, because the shader reads 13187ec681f3Smrg * TC-compatible HTILE even for levels where it's disabled by DB. 13197ec681f3Smrg */ 13207ec681f3Smrg if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_TC_COMPATIBLE_HTILE) && 13217ec681f3Smrg surf->meta_size && config->info.levels > 1) { 13227ec681f3Smrg /* MSAA can't occur with levels > 1, so ignore the sample count. */ 13237ec681f3Smrg const unsigned total_pixels = surf->surf_size / surf->bpe; 13247ec681f3Smrg const unsigned htile_block_size = 8 * 8; 13257ec681f3Smrg const unsigned htile_element_size = 4; 13267ec681f3Smrg 13277ec681f3Smrg surf->meta_size = (total_pixels / htile_block_size) * htile_element_size; 13287ec681f3Smrg surf->meta_size = align(surf->meta_size, 1 << surf->meta_alignment_log2); 13297ec681f3Smrg } else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && !surf->meta_size) { 13307ec681f3Smrg /* Unset this if HTILE is not present. */ 13317ec681f3Smrg surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE; 13327ec681f3Smrg } 13337ec681f3Smrg 13347ec681f3Smrg surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED; 13357ec681f3Smrg surf->is_displayable = surf->is_linear || surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY || 13367ec681f3Smrg surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER; 13377ec681f3Smrg 13387ec681f3Smrg /* The rotated micro tile mode doesn't work if both CMASK and RB+ are 13397ec681f3Smrg * used at the same time. This case is not currently expected to occur 13407ec681f3Smrg * because we don't use rotated. Enforce this restriction on all chips 13417ec681f3Smrg * to facilitate testing. 13427ec681f3Smrg */ 13437ec681f3Smrg if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) { 13447ec681f3Smrg assert(!"rotate micro tile mode is unsupported"); 13457ec681f3Smrg return ADDR_ERROR; 13467ec681f3Smrg } 13477ec681f3Smrg 13487ec681f3Smrg ac_compute_cmask(info, config, surf); 13497ec681f3Smrg return 0; 135001e04c3fSmrg} 135101e04c3fSmrg 135201e04c3fSmrg/* This is only called when expecting a tiled layout. */ 13537ec681f3Smrgstatic int gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, const struct radeon_info *info, 13547ec681f3Smrg struct radeon_surf *surf, 13557ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_INPUT *in, bool is_fmask, 13567ec681f3Smrg AddrSwizzleMode *swizzle_mode) 13577ec681f3Smrg{ 13587ec681f3Smrg ADDR_E_RETURNCODE ret; 13597ec681f3Smrg ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0}; 13607ec681f3Smrg ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0}; 13617ec681f3Smrg 13627ec681f3Smrg sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT); 13637ec681f3Smrg sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT); 13647ec681f3Smrg 13657ec681f3Smrg sin.flags = in->flags; 13667ec681f3Smrg sin.resourceType = in->resourceType; 13677ec681f3Smrg sin.format = in->format; 13687ec681f3Smrg sin.resourceLoction = ADDR_RSRC_LOC_INVIS; 13697ec681f3Smrg /* TODO: We could allow some of these: */ 13707ec681f3Smrg sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */ 13717ec681f3Smrg sin.forbiddenBlock.var = 1; /* don't allow the variable-sized swizzle modes */ 13727ec681f3Smrg sin.bpp = in->bpp; 13737ec681f3Smrg sin.width = in->width; 13747ec681f3Smrg sin.height = in->height; 13757ec681f3Smrg sin.numSlices = in->numSlices; 13767ec681f3Smrg sin.numMipLevels = in->numMipLevels; 13777ec681f3Smrg sin.numSamples = in->numSamples; 13787ec681f3Smrg sin.numFrags = in->numFrags; 13797ec681f3Smrg 13807ec681f3Smrg if (is_fmask) { 13817ec681f3Smrg sin.flags.display = 0; 13827ec681f3Smrg sin.flags.color = 0; 13837ec681f3Smrg sin.flags.fmask = 1; 13847ec681f3Smrg } 13857ec681f3Smrg 13867ec681f3Smrg /* With PRT images we want to force 64 KiB block size so that the image 13877ec681f3Smrg * created is consistent with the format properties returned in Vulkan 13887ec681f3Smrg * independent of the image. */ 13897ec681f3Smrg if (sin.flags.prt) { 13907ec681f3Smrg sin.forbiddenBlock.macroThin4KB = 1; 13917ec681f3Smrg sin.forbiddenBlock.macroThick4KB = 1; 13927ec681f3Smrg sin.forbiddenBlock.linear = 1; 13937ec681f3Smrg } 13947ec681f3Smrg 13957ec681f3Smrg if (surf->flags & RADEON_SURF_FORCE_MICRO_TILE_MODE) { 13967ec681f3Smrg sin.forbiddenBlock.linear = 1; 13977ec681f3Smrg 13987ec681f3Smrg if (surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY) 13997ec681f3Smrg sin.preferredSwSet.sw_D = 1; 14007ec681f3Smrg else if (surf->micro_tile_mode == RADEON_MICRO_MODE_STANDARD) 14017ec681f3Smrg sin.preferredSwSet.sw_S = 1; 14027ec681f3Smrg else if (surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH) 14037ec681f3Smrg sin.preferredSwSet.sw_Z = 1; 14047ec681f3Smrg else if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) 14057ec681f3Smrg sin.preferredSwSet.sw_R = 1; 14067ec681f3Smrg } 14077ec681f3Smrg 14087ec681f3Smrg if (info->chip_class >= GFX10 && in->resourceType == ADDR_RSRC_TEX_3D && in->numSlices > 1) { 14097ec681f3Smrg /* 3D textures should use S swizzle modes for the best performance. 14107ec681f3Smrg * THe only exception is 3D render targets, which prefer 64KB_D_X. 14117ec681f3Smrg * 14127ec681f3Smrg * 3D texture sampler performance with a very large 3D texture: 14137ec681f3Smrg * ADDR_SW_64KB_R_X = 19 FPS (DCC on), 26 FPS (DCC off) 14147ec681f3Smrg * ADDR_SW_64KB_Z_X = 25 FPS 14157ec681f3Smrg * ADDR_SW_64KB_D_X = 53 FPS 14167ec681f3Smrg * ADDR_SW_4KB_S = 53 FPS 14177ec681f3Smrg * ADDR_SW_64KB_S = 53 FPS 14187ec681f3Smrg * ADDR_SW_64KB_S_T = 61 FPS 14197ec681f3Smrg * ADDR_SW_4KB_S_X = 63 FPS 14207ec681f3Smrg * ADDR_SW_64KB_S_X = 62 FPS 14217ec681f3Smrg */ 14227ec681f3Smrg sin.preferredSwSet.sw_S = 1; 14237ec681f3Smrg } 14247ec681f3Smrg 14257ec681f3Smrg ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout); 14267ec681f3Smrg if (ret != ADDR_OK) 14277ec681f3Smrg return ret; 14287ec681f3Smrg 14297ec681f3Smrg *swizzle_mode = sout.swizzleMode; 14307ec681f3Smrg return 0; 14317ec681f3Smrg} 14327ec681f3Smrg 14337ec681f3Smrgstatic bool is_dcc_supported_by_CB(const struct radeon_info *info, unsigned sw_mode) 14347ec681f3Smrg{ 14357ec681f3Smrg if (info->chip_class >= GFX10) 14367ec681f3Smrg return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X; 14377ec681f3Smrg 14387ec681f3Smrg return sw_mode != ADDR_SW_LINEAR; 14397ec681f3Smrg} 14407ec681f3Smrg 14417ec681f3SmrgASSERTED static bool is_dcc_supported_by_L2(const struct radeon_info *info, 14427ec681f3Smrg const struct radeon_surf *surf) 144301e04c3fSmrg{ 14447ec681f3Smrg if (info->chip_class <= GFX9) { 14457ec681f3Smrg /* Only independent 64B blocks are supported. */ 14467ec681f3Smrg return surf->u.gfx9.color.dcc.independent_64B_blocks && !surf->u.gfx9.color.dcc.independent_128B_blocks && 14477ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B; 14487ec681f3Smrg } 14497ec681f3Smrg 14507ec681f3Smrg if (info->family == CHIP_NAVI10) { 14517ec681f3Smrg /* Only independent 128B blocks are supported. */ 14527ec681f3Smrg return !surf->u.gfx9.color.dcc.independent_64B_blocks && surf->u.gfx9.color.dcc.independent_128B_blocks && 14537ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B; 14547ec681f3Smrg } 14557ec681f3Smrg 14567ec681f3Smrg if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) { 14577ec681f3Smrg /* Either 64B or 128B can be used, but not both. 14587ec681f3Smrg * If 64B is used, DCC image stores are unsupported. 14597ec681f3Smrg */ 14607ec681f3Smrg return surf->u.gfx9.color.dcc.independent_64B_blocks != surf->u.gfx9.color.dcc.independent_128B_blocks && 14617ec681f3Smrg (!surf->u.gfx9.color.dcc.independent_64B_blocks || 14627ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B) && 14637ec681f3Smrg (!surf->u.gfx9.color.dcc.independent_128B_blocks || 14647ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B); 14657ec681f3Smrg } 14667ec681f3Smrg 14677ec681f3Smrg /* 128B is recommended, but 64B can be set too if needed for 4K by DCN. 14687ec681f3Smrg * Since there is no reason to ever disable 128B, require it. 14697ec681f3Smrg * If 64B is used, DCC image stores are unsupported. 14707ec681f3Smrg */ 14717ec681f3Smrg return surf->u.gfx9.color.dcc.independent_128B_blocks && 14727ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B; 147301e04c3fSmrg} 147401e04c3fSmrg 14757ec681f3Smrgstatic bool gfx10_DCN_requires_independent_64B_blocks(const struct radeon_info *info, 14767ec681f3Smrg const struct ac_surf_config *config) 147701e04c3fSmrg{ 14787ec681f3Smrg assert(info->chip_class >= GFX10); 14797ec681f3Smrg 14807ec681f3Smrg /* Older kernels have buggy DAL. */ 14817ec681f3Smrg if (info->drm_minor <= 43) 14827ec681f3Smrg return true; 14837ec681f3Smrg 14847ec681f3Smrg /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B. */ 14857ec681f3Smrg return config->info.width > 2560 || config->info.height > 2560; 148601e04c3fSmrg} 148701e04c3fSmrg 14887ec681f3Smrgvoid ac_modifier_max_extent(const struct radeon_info *info, 14897ec681f3Smrg uint64_t modifier, uint32_t *width, uint32_t *height) 149001e04c3fSmrg{ 14917ec681f3Smrg if (ac_modifier_has_dcc(modifier)) { 14927ec681f3Smrg bool independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); 14937ec681f3Smrg 14947ec681f3Smrg if (info->chip_class >= GFX10 && !independent_64B_blocks) { 14957ec681f3Smrg /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B. */ 14967ec681f3Smrg *width = 2560; 14977ec681f3Smrg *height = 2560; 14987ec681f3Smrg } else { 14997ec681f3Smrg /* DCC is not supported on surfaces above resolutions af 5760. */ 15007ec681f3Smrg *width = 5760; 15017ec681f3Smrg *height = 5760; 15027ec681f3Smrg } 15037ec681f3Smrg } else { 15047ec681f3Smrg /* Non-dcc modifiers */ 15057ec681f3Smrg *width = 16384; 15067ec681f3Smrg *height = 16384; 15077ec681f3Smrg } 150801e04c3fSmrg} 150901e04c3fSmrg 15107ec681f3Smrgstatic bool is_dcc_supported_by_DCN(const struct radeon_info *info, 15117ec681f3Smrg const struct ac_surf_config *config, 15127ec681f3Smrg const struct radeon_surf *surf, bool rb_aligned, 15137ec681f3Smrg bool pipe_aligned) 151401e04c3fSmrg{ 15157ec681f3Smrg if (!info->use_display_dcc_unaligned && !info->use_display_dcc_with_retile_blit) 15167ec681f3Smrg return false; 15177ec681f3Smrg 15187ec681f3Smrg /* 16bpp and 64bpp are more complicated, so they are disallowed for now. */ 15197ec681f3Smrg if (surf->bpe != 4) 15207ec681f3Smrg return false; 15217ec681f3Smrg 15227ec681f3Smrg /* Handle unaligned DCC. */ 15237ec681f3Smrg if (info->use_display_dcc_unaligned && (rb_aligned || pipe_aligned)) 15247ec681f3Smrg return false; 15257ec681f3Smrg 15267ec681f3Smrg /* Big resolutions don't support DCC. */ 15277ec681f3Smrg if (config->info.width > 5760 || config->info.height > 5760) 15287ec681f3Smrg return false; 15297ec681f3Smrg 15307ec681f3Smrg switch (info->chip_class) { 15317ec681f3Smrg case GFX9: 15327ec681f3Smrg /* There are more constraints, but we always set 15337ec681f3Smrg * INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B, 15347ec681f3Smrg * which always works. 15357ec681f3Smrg */ 15367ec681f3Smrg assert(surf->u.gfx9.color.dcc.independent_64B_blocks && 15377ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B); 15387ec681f3Smrg return true; 15397ec681f3Smrg case GFX10: 15407ec681f3Smrg case GFX10_3: 15417ec681f3Smrg /* DCN requires INDEPENDENT_128B_BLOCKS = 0 only on Navi1x. */ 15427ec681f3Smrg if (info->chip_class == GFX10 && surf->u.gfx9.color.dcc.independent_128B_blocks) 15437ec681f3Smrg return false; 15447ec681f3Smrg 15457ec681f3Smrg return (!gfx10_DCN_requires_independent_64B_blocks(info, config) || 15467ec681f3Smrg (surf->u.gfx9.color.dcc.independent_64B_blocks && 15477ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B)); 15487ec681f3Smrg default: 15497ec681f3Smrg unreachable("unhandled chip"); 15507ec681f3Smrg return false; 15517ec681f3Smrg } 15527ec681f3Smrg} 155301e04c3fSmrg 15547ec681f3Smrgstatic void ac_copy_dcc_equation(const struct radeon_info *info, 15557ec681f3Smrg ADDR2_COMPUTE_DCCINFO_OUTPUT *dcc, 15567ec681f3Smrg struct gfx9_meta_equation *equation) 15577ec681f3Smrg{ 15587ec681f3Smrg equation->meta_block_width = dcc->metaBlkWidth; 15597ec681f3Smrg equation->meta_block_height = dcc->metaBlkHeight; 15607ec681f3Smrg equation->meta_block_depth = dcc->metaBlkDepth; 15617ec681f3Smrg 15627ec681f3Smrg if (info->chip_class >= GFX10) { 15637ec681f3Smrg /* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */ 15647ec681f3Smrg for (unsigned i = 0; i < 4; i++) 15657ec681f3Smrg assert(dcc->equation.gfx10_bits[i] == 0); 15667ec681f3Smrg 15677ec681f3Smrg for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 4; i < 68; i++) 15687ec681f3Smrg assert(dcc->equation.gfx10_bits[i] == 0); 15697ec681f3Smrg 15707ec681f3Smrg memcpy(equation->u.gfx10_bits, dcc->equation.gfx10_bits + 4, 15717ec681f3Smrg sizeof(equation->u.gfx10_bits)); 15727ec681f3Smrg } else { 15737ec681f3Smrg assert(dcc->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit)); 15747ec681f3Smrg 15757ec681f3Smrg equation->u.gfx9.num_bits = dcc->equation.gfx9.num_bits; 15767ec681f3Smrg equation->u.gfx9.num_pipe_bits = dcc->equation.gfx9.numPipeBits; 15777ec681f3Smrg for (unsigned b = 0; b < ARRAY_SIZE(equation->u.gfx9.bit); b++) { 15787ec681f3Smrg for (unsigned c = 0; c < ARRAY_SIZE(equation->u.gfx9.bit[b].coord); c++) { 15797ec681f3Smrg equation->u.gfx9.bit[b].coord[c].dim = dcc->equation.gfx9.bit[b].coord[c].dim; 15807ec681f3Smrg equation->u.gfx9.bit[b].coord[c].ord = dcc->equation.gfx9.bit[b].coord[c].ord; 15817ec681f3Smrg } 15827ec681f3Smrg } 15837ec681f3Smrg } 15847ec681f3Smrg} 15857ec681f3Smrg 15867ec681f3Smrgstatic void ac_copy_cmask_equation(const struct radeon_info *info, 15877ec681f3Smrg ADDR2_COMPUTE_CMASK_INFO_OUTPUT *cmask, 15887ec681f3Smrg struct gfx9_meta_equation *equation) 15897ec681f3Smrg{ 15907ec681f3Smrg equation->meta_block_width = cmask->metaBlkWidth; 15917ec681f3Smrg equation->meta_block_height = cmask->metaBlkHeight; 15927ec681f3Smrg equation->meta_block_depth = 1; 15937ec681f3Smrg 15947ec681f3Smrg if (info->chip_class == GFX9) { 15957ec681f3Smrg assert(cmask->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit)); 15967ec681f3Smrg 15977ec681f3Smrg equation->u.gfx9.num_bits = cmask->equation.gfx9.num_bits; 15987ec681f3Smrg equation->u.gfx9.num_pipe_bits = cmask->equation.gfx9.numPipeBits; 15997ec681f3Smrg for (unsigned b = 0; b < ARRAY_SIZE(equation->u.gfx9.bit); b++) { 16007ec681f3Smrg for (unsigned c = 0; c < ARRAY_SIZE(equation->u.gfx9.bit[b].coord); c++) { 16017ec681f3Smrg equation->u.gfx9.bit[b].coord[c].dim = cmask->equation.gfx9.bit[b].coord[c].dim; 16027ec681f3Smrg equation->u.gfx9.bit[b].coord[c].ord = cmask->equation.gfx9.bit[b].coord[c].ord; 16037ec681f3Smrg } 16047ec681f3Smrg } 16057ec681f3Smrg } 16067ec681f3Smrg} 16077ec681f3Smrg 16087ec681f3Smrgstatic void ac_copy_htile_equation(const struct radeon_info *info, 16097ec681f3Smrg ADDR2_COMPUTE_HTILE_INFO_OUTPUT *htile, 16107ec681f3Smrg struct gfx9_meta_equation *equation) 16117ec681f3Smrg{ 16127ec681f3Smrg equation->meta_block_width = htile->metaBlkWidth; 16137ec681f3Smrg equation->meta_block_height = htile->metaBlkHeight; 16147ec681f3Smrg 16157ec681f3Smrg /* gfx9_meta_equation doesn't store the first 8 and the last 4 elements. They must be 0. */ 16167ec681f3Smrg for (unsigned i = 0; i < 8; i++) 16177ec681f3Smrg assert(htile->equation.gfx10_bits[i] == 0); 16187ec681f3Smrg 16197ec681f3Smrg for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 8; i < 72; i++) 16207ec681f3Smrg assert(htile->equation.gfx10_bits[i] == 0); 16217ec681f3Smrg 16227ec681f3Smrg memcpy(equation->u.gfx10_bits, htile->equation.gfx10_bits + 8, 16237ec681f3Smrg sizeof(equation->u.gfx10_bits)); 16247ec681f3Smrg} 16257ec681f3Smrg 16267ec681f3Smrgstatic int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_info *info, 16277ec681f3Smrg const struct ac_surf_config *config, struct radeon_surf *surf, 16287ec681f3Smrg bool compressed, ADDR2_COMPUTE_SURFACE_INFO_INPUT *in) 16297ec681f3Smrg{ 16307ec681f3Smrg ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {0}; 16317ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; 16327ec681f3Smrg ADDR_E_RETURNCODE ret; 16337ec681f3Smrg 16347ec681f3Smrg out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT); 16357ec681f3Smrg out.pMipInfo = mip_info; 16367ec681f3Smrg 16377ec681f3Smrg ret = Addr2ComputeSurfaceInfo(addrlib->handle, in, &out); 16387ec681f3Smrg if (ret != ADDR_OK) 16397ec681f3Smrg return ret; 16407ec681f3Smrg 16417ec681f3Smrg if (in->flags.prt) { 16427ec681f3Smrg surf->prt_tile_width = out.blockWidth; 16437ec681f3Smrg surf->prt_tile_height = out.blockHeight; 16447ec681f3Smrg 16457ec681f3Smrg for (surf->first_mip_tail_level = 0; surf->first_mip_tail_level < in->numMipLevels; 16467ec681f3Smrg ++surf->first_mip_tail_level) { 16477ec681f3Smrg if(mip_info[surf->first_mip_tail_level].pitch < out.blockWidth || 16487ec681f3Smrg mip_info[surf->first_mip_tail_level].height < out.blockHeight) 16497ec681f3Smrg break; 16507ec681f3Smrg } 16517ec681f3Smrg 16527ec681f3Smrg for (unsigned i = 0; i < in->numMipLevels; i++) { 16537ec681f3Smrg surf->u.gfx9.prt_level_offset[i] = mip_info[i].macroBlockOffset + mip_info[i].mipTailOffset; 16547ec681f3Smrg 16557ec681f3Smrg if (info->chip_class >= GFX10) 16567ec681f3Smrg surf->u.gfx9.prt_level_pitch[i] = mip_info[i].pitch; 16577ec681f3Smrg else 16587ec681f3Smrg surf->u.gfx9.prt_level_pitch[i] = out.mipChainPitch; 16597ec681f3Smrg } 16607ec681f3Smrg } 16617ec681f3Smrg 16627ec681f3Smrg if (in->flags.stencil) { 16637ec681f3Smrg surf->u.gfx9.zs.stencil_swizzle_mode = in->swizzleMode; 16647ec681f3Smrg surf->u.gfx9.zs.stencil_epitch = 16657ec681f3Smrg out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1; 16667ec681f3Smrg surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2, util_logbase2(out.baseAlign)); 16677ec681f3Smrg surf->u.gfx9.zs.stencil_offset = align(surf->surf_size, out.baseAlign); 16687ec681f3Smrg surf->surf_size = surf->u.gfx9.zs.stencil_offset + out.surfSize; 16697ec681f3Smrg return 0; 16707ec681f3Smrg } 16717ec681f3Smrg 16727ec681f3Smrg surf->u.gfx9.swizzle_mode = in->swizzleMode; 16737ec681f3Smrg surf->u.gfx9.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1; 16747ec681f3Smrg 16757ec681f3Smrg /* CMASK fast clear uses these even if FMASK isn't allocated. 16767ec681f3Smrg * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4. 16777ec681f3Smrg */ 16787ec681f3Smrg if (!in->flags.depth) { 16797ec681f3Smrg surf->u.gfx9.color.fmask_swizzle_mode = surf->u.gfx9.swizzle_mode & ~0x3; 16807ec681f3Smrg surf->u.gfx9.color.fmask_epitch = surf->u.gfx9.epitch; 16817ec681f3Smrg } 16827ec681f3Smrg 16837ec681f3Smrg surf->u.gfx9.surf_slice_size = out.sliceSize; 16847ec681f3Smrg surf->u.gfx9.surf_pitch = out.pitch; 16857ec681f3Smrg surf->u.gfx9.surf_height = out.height; 16867ec681f3Smrg surf->surf_size = out.surfSize; 16877ec681f3Smrg surf->surf_alignment_log2 = util_logbase2(out.baseAlign); 16887ec681f3Smrg 16897ec681f3Smrg if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch && 16907ec681f3Smrg surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR) { 16917ec681f3Smrg /* Adjust surf_pitch to be in elements units not in pixels */ 16927ec681f3Smrg surf->u.gfx9.surf_pitch = align(surf->u.gfx9.surf_pitch / surf->blk_w, 256 / surf->bpe); 16937ec681f3Smrg surf->u.gfx9.epitch = 16947ec681f3Smrg MAX2(surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch * surf->blk_w - 1); 16957ec681f3Smrg /* The surface is really a surf->bpe bytes per pixel surface even if we 16967ec681f3Smrg * use it as a surf->bpe bytes per element one. 16977ec681f3Smrg * Adjust surf_slice_size and surf_size to reflect the change 16987ec681f3Smrg * made to surf_pitch. 16997ec681f3Smrg */ 17007ec681f3Smrg surf->u.gfx9.surf_slice_size = 17017ec681f3Smrg MAX2(surf->u.gfx9.surf_slice_size, 17027ec681f3Smrg surf->u.gfx9.surf_pitch * out.height * surf->bpe * surf->blk_w); 17037ec681f3Smrg surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices; 17047ec681f3Smrg } 17057ec681f3Smrg 17067ec681f3Smrg if (in->swizzleMode == ADDR_SW_LINEAR) { 17077ec681f3Smrg for (unsigned i = 0; i < in->numMipLevels; i++) { 17087ec681f3Smrg surf->u.gfx9.offset[i] = mip_info[i].offset; 17097ec681f3Smrg surf->u.gfx9.pitch[i] = mip_info[i].pitch; 17107ec681f3Smrg } 17117ec681f3Smrg } 17127ec681f3Smrg 17137ec681f3Smrg surf->u.gfx9.base_mip_width = mip_info[0].pitch; 17147ec681f3Smrg surf->u.gfx9.base_mip_height = mip_info[0].height; 17157ec681f3Smrg 17167ec681f3Smrg if (in->flags.depth) { 17177ec681f3Smrg assert(in->swizzleMode != ADDR_SW_LINEAR); 17187ec681f3Smrg 17197ec681f3Smrg if (surf->flags & RADEON_SURF_NO_HTILE) 17207ec681f3Smrg return 0; 17217ec681f3Smrg 17227ec681f3Smrg /* HTILE */ 17237ec681f3Smrg ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0}; 17247ec681f3Smrg ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0}; 17257ec681f3Smrg ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0}; 17267ec681f3Smrg 17277ec681f3Smrg hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT); 17287ec681f3Smrg hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT); 17297ec681f3Smrg hout.pMipInfo = meta_mip_info; 17307ec681f3Smrg 17317ec681f3Smrg assert(in->flags.metaPipeUnaligned == 0); 17327ec681f3Smrg assert(in->flags.metaRbUnaligned == 0); 17337ec681f3Smrg 17347ec681f3Smrg hin.hTileFlags.pipeAligned = 1; 17357ec681f3Smrg hin.hTileFlags.rbAligned = 1; 17367ec681f3Smrg hin.depthFlags = in->flags; 17377ec681f3Smrg hin.swizzleMode = in->swizzleMode; 17387ec681f3Smrg hin.unalignedWidth = in->width; 17397ec681f3Smrg hin.unalignedHeight = in->height; 17407ec681f3Smrg hin.numSlices = in->numSlices; 17417ec681f3Smrg hin.numMipLevels = in->numMipLevels; 17427ec681f3Smrg hin.firstMipIdInTail = out.firstMipIdInTail; 17437ec681f3Smrg 17447ec681f3Smrg ret = Addr2ComputeHtileInfo(addrlib->handle, &hin, &hout); 17457ec681f3Smrg if (ret != ADDR_OK) 17467ec681f3Smrg return ret; 17477ec681f3Smrg 17487ec681f3Smrg surf->meta_size = hout.htileBytes; 17497ec681f3Smrg surf->meta_slice_size = hout.sliceSize; 17507ec681f3Smrg surf->meta_alignment_log2 = util_logbase2(hout.baseAlign); 17517ec681f3Smrg surf->meta_pitch = hout.pitch; 17527ec681f3Smrg surf->num_meta_levels = in->numMipLevels; 17537ec681f3Smrg 17547ec681f3Smrg for (unsigned i = 0; i < in->numMipLevels; i++) { 17557ec681f3Smrg surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset; 17567ec681f3Smrg surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize; 17577ec681f3Smrg 17587ec681f3Smrg if (meta_mip_info[i].inMiptail) { 17597ec681f3Smrg /* GFX10 can only compress the first level 17607ec681f3Smrg * in the mip tail. 17617ec681f3Smrg */ 17627ec681f3Smrg surf->num_meta_levels = i + 1; 17637ec681f3Smrg break; 17647ec681f3Smrg } 17657ec681f3Smrg } 17667ec681f3Smrg 17677ec681f3Smrg if (!surf->num_meta_levels) 17687ec681f3Smrg surf->meta_size = 0; 17697ec681f3Smrg 17707ec681f3Smrg if (info->chip_class >= GFX10) 17717ec681f3Smrg ac_copy_htile_equation(info, &hout, &surf->u.gfx9.zs.htile_equation); 17727ec681f3Smrg return 0; 17737ec681f3Smrg } 17747ec681f3Smrg 17757ec681f3Smrg { 17767ec681f3Smrg /* Compute tile swizzle for the color surface. 17777ec681f3Smrg * All *_X and *_T modes can use the swizzle. 17787ec681f3Smrg */ 17797ec681f3Smrg if (config->info.surf_index && in->swizzleMode >= ADDR_SW_64KB_Z_T && !out.mipChainInTail && 17807ec681f3Smrg !(surf->flags & RADEON_SURF_SHAREABLE) && !in->flags.display) { 17817ec681f3Smrg ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0}; 17827ec681f3Smrg ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0}; 17837ec681f3Smrg 17847ec681f3Smrg xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT); 17857ec681f3Smrg xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT); 17867ec681f3Smrg 17877ec681f3Smrg xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1; 17887ec681f3Smrg xin.flags = in->flags; 17897ec681f3Smrg xin.swizzleMode = in->swizzleMode; 17907ec681f3Smrg xin.resourceType = in->resourceType; 17917ec681f3Smrg xin.format = in->format; 17927ec681f3Smrg xin.numSamples = in->numSamples; 17937ec681f3Smrg xin.numFrags = in->numFrags; 17947ec681f3Smrg 17957ec681f3Smrg ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout); 17967ec681f3Smrg if (ret != ADDR_OK) 17977ec681f3Smrg return ret; 17987ec681f3Smrg 17997ec681f3Smrg assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 18007ec681f3Smrg surf->tile_swizzle = xout.pipeBankXor; 18017ec681f3Smrg } 18027ec681f3Smrg 18037ec681f3Smrg /* DCC */ 18047ec681f3Smrg if (info->has_graphics && !(surf->flags & RADEON_SURF_DISABLE_DCC) && !compressed && 18057ec681f3Smrg is_dcc_supported_by_CB(info, in->swizzleMode) && 18067ec681f3Smrg (!in->flags.display || 18077ec681f3Smrg is_dcc_supported_by_DCN(info, config, surf, !in->flags.metaRbUnaligned, 18087ec681f3Smrg !in->flags.metaPipeUnaligned)) && 18097ec681f3Smrg (surf->modifier == DRM_FORMAT_MOD_INVALID || 18107ec681f3Smrg ac_modifier_has_dcc(surf->modifier))) { 18117ec681f3Smrg ADDR2_COMPUTE_DCCINFO_INPUT din = {0}; 18127ec681f3Smrg ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0}; 18137ec681f3Smrg ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0}; 18147ec681f3Smrg 18157ec681f3Smrg din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT); 18167ec681f3Smrg dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT); 18177ec681f3Smrg dout.pMipInfo = meta_mip_info; 18187ec681f3Smrg 18197ec681f3Smrg din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned; 18207ec681f3Smrg din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned; 18217ec681f3Smrg din.resourceType = in->resourceType; 18227ec681f3Smrg din.swizzleMode = in->swizzleMode; 18237ec681f3Smrg din.bpp = in->bpp; 18247ec681f3Smrg din.unalignedWidth = in->width; 18257ec681f3Smrg din.unalignedHeight = in->height; 18267ec681f3Smrg din.numSlices = in->numSlices; 18277ec681f3Smrg din.numFrags = in->numFrags; 18287ec681f3Smrg din.numMipLevels = in->numMipLevels; 18297ec681f3Smrg din.dataSurfaceSize = out.surfSize; 18307ec681f3Smrg din.firstMipIdInTail = out.firstMipIdInTail; 18317ec681f3Smrg 18327ec681f3Smrg ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout); 18337ec681f3Smrg if (ret != ADDR_OK) 18347ec681f3Smrg return ret; 18357ec681f3Smrg 18367ec681f3Smrg surf->u.gfx9.color.dcc.rb_aligned = din.dccKeyFlags.rbAligned; 18377ec681f3Smrg surf->u.gfx9.color.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned; 18387ec681f3Smrg surf->u.gfx9.color.dcc_block_width = dout.compressBlkWidth; 18397ec681f3Smrg surf->u.gfx9.color.dcc_block_height = dout.compressBlkHeight; 18407ec681f3Smrg surf->u.gfx9.color.dcc_block_depth = dout.compressBlkDepth; 18417ec681f3Smrg surf->u.gfx9.color.dcc_pitch_max = dout.pitch - 1; 18427ec681f3Smrg surf->u.gfx9.color.dcc_height = dout.height; 18437ec681f3Smrg surf->meta_size = dout.dccRamSize; 18447ec681f3Smrg surf->meta_slice_size = dout.dccRamSliceSize; 18457ec681f3Smrg surf->meta_alignment_log2 = util_logbase2(dout.dccRamBaseAlign); 18467ec681f3Smrg surf->num_meta_levels = in->numMipLevels; 18477ec681f3Smrg 18487ec681f3Smrg /* Disable DCC for levels that are in the mip tail. 18497ec681f3Smrg * 18507ec681f3Smrg * There are two issues that this is intended to 18517ec681f3Smrg * address: 18527ec681f3Smrg * 18537ec681f3Smrg * 1. Multiple mip levels may share a cache line. This 18547ec681f3Smrg * can lead to corruption when switching between 18557ec681f3Smrg * rendering to different mip levels because the 18567ec681f3Smrg * RBs don't maintain coherency. 18577ec681f3Smrg * 18587ec681f3Smrg * 2. Texturing with metadata after rendering sometimes 18597ec681f3Smrg * fails with corruption, probably for a similar 18607ec681f3Smrg * reason. 18617ec681f3Smrg * 18627ec681f3Smrg * Working around these issues for all levels in the 18637ec681f3Smrg * mip tail may be overly conservative, but it's what 18647ec681f3Smrg * Vulkan does. 18657ec681f3Smrg * 18667ec681f3Smrg * Alternative solutions that also work but are worse: 18677ec681f3Smrg * - Disable DCC entirely. 18687ec681f3Smrg * - Flush TC L2 after rendering. 18697ec681f3Smrg */ 18707ec681f3Smrg for (unsigned i = 0; i < in->numMipLevels; i++) { 18717ec681f3Smrg surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset; 18727ec681f3Smrg surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize; 18737ec681f3Smrg 18747ec681f3Smrg if (meta_mip_info[i].inMiptail) { 18757ec681f3Smrg /* GFX10 can only compress the first level 18767ec681f3Smrg * in the mip tail. 18777ec681f3Smrg * 18787ec681f3Smrg * TODO: Try to do the same thing for gfx9 18797ec681f3Smrg * if there are no regressions. 18807ec681f3Smrg */ 18817ec681f3Smrg if (info->chip_class >= GFX10) 18827ec681f3Smrg surf->num_meta_levels = i + 1; 18837ec681f3Smrg else 18847ec681f3Smrg surf->num_meta_levels = i; 18857ec681f3Smrg break; 18867ec681f3Smrg } 18877ec681f3Smrg } 18887ec681f3Smrg 18897ec681f3Smrg if (!surf->num_meta_levels) 18907ec681f3Smrg surf->meta_size = 0; 18917ec681f3Smrg 18927ec681f3Smrg surf->u.gfx9.color.display_dcc_size = surf->meta_size; 18937ec681f3Smrg surf->u.gfx9.color.display_dcc_alignment_log2 = surf->meta_alignment_log2; 18947ec681f3Smrg surf->u.gfx9.color.display_dcc_pitch_max = surf->u.gfx9.color.dcc_pitch_max; 18957ec681f3Smrg surf->u.gfx9.color.display_dcc_height = surf->u.gfx9.color.dcc_height; 18967ec681f3Smrg 18977ec681f3Smrg if (in->resourceType == ADDR_RSRC_TEX_2D) 18987ec681f3Smrg ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.dcc_equation); 18997ec681f3Smrg 19007ec681f3Smrg /* Compute displayable DCC. */ 19017ec681f3Smrg if (((in->flags.display && info->use_display_dcc_with_retile_blit) || 19027ec681f3Smrg ac_modifier_has_dcc_retile(surf->modifier)) && surf->num_meta_levels) { 19037ec681f3Smrg /* Compute displayable DCC info. */ 19047ec681f3Smrg din.dccKeyFlags.pipeAligned = 0; 19057ec681f3Smrg din.dccKeyFlags.rbAligned = 0; 19067ec681f3Smrg 19077ec681f3Smrg assert(din.numSlices == 1); 19087ec681f3Smrg assert(din.numMipLevels == 1); 19097ec681f3Smrg assert(din.numFrags == 1); 19107ec681f3Smrg assert(surf->tile_swizzle == 0); 19117ec681f3Smrg assert(surf->u.gfx9.color.dcc.pipe_aligned || surf->u.gfx9.color.dcc.rb_aligned); 19127ec681f3Smrg 19137ec681f3Smrg ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout); 19147ec681f3Smrg if (ret != ADDR_OK) 19157ec681f3Smrg return ret; 19167ec681f3Smrg 19177ec681f3Smrg surf->u.gfx9.color.display_dcc_size = dout.dccRamSize; 19187ec681f3Smrg surf->u.gfx9.color.display_dcc_alignment_log2 = util_logbase2(dout.dccRamBaseAlign); 19197ec681f3Smrg surf->u.gfx9.color.display_dcc_pitch_max = dout.pitch - 1; 19207ec681f3Smrg surf->u.gfx9.color.display_dcc_height = dout.height; 19217ec681f3Smrg assert(surf->u.gfx9.color.display_dcc_size <= surf->meta_size); 19227ec681f3Smrg 19237ec681f3Smrg ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.display_dcc_equation); 19247ec681f3Smrg surf->u.gfx9.color.dcc.display_equation_valid = true; 19257ec681f3Smrg } 19267ec681f3Smrg } 19277ec681f3Smrg 19287ec681f3Smrg /* FMASK */ 19297ec681f3Smrg if (in->numSamples > 1 && info->has_graphics && !(surf->flags & RADEON_SURF_NO_FMASK)) { 19307ec681f3Smrg ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0}; 19317ec681f3Smrg ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0}; 19327ec681f3Smrg 19337ec681f3Smrg fin.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT); 19347ec681f3Smrg fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT); 19357ec681f3Smrg 19367ec681f3Smrg ret = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, in, true, &fin.swizzleMode); 19377ec681f3Smrg if (ret != ADDR_OK) 19387ec681f3Smrg return ret; 19397ec681f3Smrg 19407ec681f3Smrg fin.unalignedWidth = in->width; 19417ec681f3Smrg fin.unalignedHeight = in->height; 19427ec681f3Smrg fin.numSlices = in->numSlices; 19437ec681f3Smrg fin.numSamples = in->numSamples; 19447ec681f3Smrg fin.numFrags = in->numFrags; 19457ec681f3Smrg 19467ec681f3Smrg ret = Addr2ComputeFmaskInfo(addrlib->handle, &fin, &fout); 19477ec681f3Smrg if (ret != ADDR_OK) 19487ec681f3Smrg return ret; 19497ec681f3Smrg 19507ec681f3Smrg surf->u.gfx9.color.fmask_swizzle_mode = fin.swizzleMode; 19517ec681f3Smrg surf->u.gfx9.color.fmask_epitch = fout.pitch - 1; 19527ec681f3Smrg surf->fmask_size = fout.fmaskBytes; 19537ec681f3Smrg surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign); 19547ec681f3Smrg surf->fmask_slice_size = fout.sliceSize; 19557ec681f3Smrg 19567ec681f3Smrg /* Compute tile swizzle for the FMASK surface. */ 19577ec681f3Smrg if (config->info.fmask_surf_index && fin.swizzleMode >= ADDR_SW_64KB_Z_T && 19587ec681f3Smrg !(surf->flags & RADEON_SURF_SHAREABLE)) { 19597ec681f3Smrg ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0}; 19607ec681f3Smrg ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0}; 19617ec681f3Smrg 19627ec681f3Smrg xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT); 19637ec681f3Smrg xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT); 19647ec681f3Smrg 19657ec681f3Smrg /* This counter starts from 1 instead of 0. */ 19667ec681f3Smrg xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index); 19677ec681f3Smrg xin.flags = in->flags; 19687ec681f3Smrg xin.swizzleMode = fin.swizzleMode; 19697ec681f3Smrg xin.resourceType = in->resourceType; 19707ec681f3Smrg xin.format = in->format; 19717ec681f3Smrg xin.numSamples = in->numSamples; 19727ec681f3Smrg xin.numFrags = in->numFrags; 19737ec681f3Smrg 19747ec681f3Smrg ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout); 19757ec681f3Smrg if (ret != ADDR_OK) 19767ec681f3Smrg return ret; 19777ec681f3Smrg 19787ec681f3Smrg assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->fmask_tile_swizzle) * 8)); 19797ec681f3Smrg surf->fmask_tile_swizzle = xout.pipeBankXor; 19807ec681f3Smrg } 19817ec681f3Smrg } 19827ec681f3Smrg 19837ec681f3Smrg /* CMASK -- on GFX10 only for FMASK */ 19847ec681f3Smrg if (in->swizzleMode != ADDR_SW_LINEAR && in->resourceType == ADDR_RSRC_TEX_2D && 19857ec681f3Smrg ((info->chip_class <= GFX9 && in->numSamples == 1 && in->flags.metaPipeUnaligned == 0 && 19867ec681f3Smrg in->flags.metaRbUnaligned == 0) || 19877ec681f3Smrg (surf->fmask_size && in->numSamples >= 2))) { 19887ec681f3Smrg ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0}; 19897ec681f3Smrg ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0}; 19907ec681f3Smrg ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0}; 19917ec681f3Smrg 19927ec681f3Smrg cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT); 19937ec681f3Smrg cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT); 19947ec681f3Smrg cout.pMipInfo = meta_mip_info; 19957ec681f3Smrg 19967ec681f3Smrg assert(in->flags.metaPipeUnaligned == 0); 19977ec681f3Smrg assert(in->flags.metaRbUnaligned == 0); 19987ec681f3Smrg 19997ec681f3Smrg cin.cMaskFlags.pipeAligned = 1; 20007ec681f3Smrg cin.cMaskFlags.rbAligned = 1; 20017ec681f3Smrg cin.resourceType = in->resourceType; 20027ec681f3Smrg cin.unalignedWidth = in->width; 20037ec681f3Smrg cin.unalignedHeight = in->height; 20047ec681f3Smrg cin.numSlices = in->numSlices; 20057ec681f3Smrg cin.numMipLevels = in->numMipLevels; 20067ec681f3Smrg cin.firstMipIdInTail = out.firstMipIdInTail; 20077ec681f3Smrg 20087ec681f3Smrg if (in->numSamples > 1) 20097ec681f3Smrg cin.swizzleMode = surf->u.gfx9.color.fmask_swizzle_mode; 20107ec681f3Smrg else 20117ec681f3Smrg cin.swizzleMode = in->swizzleMode; 20127ec681f3Smrg 20137ec681f3Smrg ret = Addr2ComputeCmaskInfo(addrlib->handle, &cin, &cout); 20147ec681f3Smrg if (ret != ADDR_OK) 20157ec681f3Smrg return ret; 20167ec681f3Smrg 20177ec681f3Smrg surf->cmask_size = cout.cmaskBytes; 20187ec681f3Smrg surf->cmask_alignment_log2 = util_logbase2(cout.baseAlign); 20197ec681f3Smrg surf->cmask_slice_size = cout.sliceSize; 20207ec681f3Smrg surf->cmask_pitch = cout.pitch; 20217ec681f3Smrg surf->cmask_height = cout.height; 20227ec681f3Smrg surf->u.gfx9.color.cmask_level0.offset = meta_mip_info[0].offset; 20237ec681f3Smrg surf->u.gfx9.color.cmask_level0.size = meta_mip_info[0].sliceSize; 20247ec681f3Smrg 20257ec681f3Smrg ac_copy_cmask_equation(info, &cout, &surf->u.gfx9.color.cmask_equation); 20267ec681f3Smrg } 20277ec681f3Smrg } 20287ec681f3Smrg 20297ec681f3Smrg return 0; 20307ec681f3Smrg} 20317ec681f3Smrg 20327ec681f3Smrgstatic int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info, 20337ec681f3Smrg const struct ac_surf_config *config, enum radeon_surf_mode mode, 20347ec681f3Smrg struct radeon_surf *surf) 20357ec681f3Smrg{ 20367ec681f3Smrg bool compressed; 20377ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; 20387ec681f3Smrg int r; 20397ec681f3Smrg 20407ec681f3Smrg AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT); 20417ec681f3Smrg 20427ec681f3Smrg compressed = surf->blk_w == 4 && surf->blk_h == 4; 20437ec681f3Smrg 20447ec681f3Smrg /* The format must be set correctly for the allocation of compressed 20457ec681f3Smrg * textures to work. In other cases, setting the bpp is sufficient. */ 20467ec681f3Smrg if (compressed) { 20477ec681f3Smrg switch (surf->bpe) { 20487ec681f3Smrg case 8: 20497ec681f3Smrg AddrSurfInfoIn.format = ADDR_FMT_BC1; 20507ec681f3Smrg break; 20517ec681f3Smrg case 16: 20527ec681f3Smrg AddrSurfInfoIn.format = ADDR_FMT_BC3; 20537ec681f3Smrg break; 20547ec681f3Smrg default: 20557ec681f3Smrg assert(0); 20567ec681f3Smrg } 20577ec681f3Smrg } else { 20587ec681f3Smrg switch (surf->bpe) { 20597ec681f3Smrg case 1: 20607ec681f3Smrg assert(!(surf->flags & RADEON_SURF_ZBUFFER)); 20617ec681f3Smrg AddrSurfInfoIn.format = ADDR_FMT_8; 20627ec681f3Smrg break; 20637ec681f3Smrg case 2: 20647ec681f3Smrg assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER)); 20657ec681f3Smrg AddrSurfInfoIn.format = ADDR_FMT_16; 20667ec681f3Smrg break; 20677ec681f3Smrg case 4: 20687ec681f3Smrg assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER)); 20697ec681f3Smrg AddrSurfInfoIn.format = ADDR_FMT_32; 20707ec681f3Smrg break; 20717ec681f3Smrg case 8: 20727ec681f3Smrg assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 20737ec681f3Smrg AddrSurfInfoIn.format = ADDR_FMT_32_32; 20747ec681f3Smrg break; 20757ec681f3Smrg case 12: 20767ec681f3Smrg assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 20777ec681f3Smrg AddrSurfInfoIn.format = ADDR_FMT_32_32_32; 20787ec681f3Smrg break; 20797ec681f3Smrg case 16: 20807ec681f3Smrg assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 20817ec681f3Smrg AddrSurfInfoIn.format = ADDR_FMT_32_32_32_32; 20827ec681f3Smrg break; 20837ec681f3Smrg default: 20847ec681f3Smrg assert(0); 20857ec681f3Smrg } 20867ec681f3Smrg AddrSurfInfoIn.bpp = surf->bpe * 8; 20877ec681f3Smrg } 20887ec681f3Smrg 20897ec681f3Smrg bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 20907ec681f3Smrg AddrSurfInfoIn.flags.color = is_color_surface && !(surf->flags & RADEON_SURF_NO_RENDER_TARGET); 20917ec681f3Smrg AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0; 20927ec681f3Smrg AddrSurfInfoIn.flags.display = get_display_flag(config, surf); 20937ec681f3Smrg /* flags.texture currently refers to TC-compatible HTILE */ 20947ec681f3Smrg AddrSurfInfoIn.flags.texture = is_color_surface || surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE; 20957ec681f3Smrg AddrSurfInfoIn.flags.opt4space = 1; 20967ec681f3Smrg AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0; 20977ec681f3Smrg 20987ec681f3Smrg AddrSurfInfoIn.numMipLevels = config->info.levels; 20997ec681f3Smrg AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples); 21007ec681f3Smrg AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples; 21017ec681f3Smrg 21027ec681f3Smrg if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) 21037ec681f3Smrg AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples); 21047ec681f3Smrg 21057ec681f3Smrg /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures 21067ec681f3Smrg * as 2D to avoid having shader variants for 1D vs 2D, so all shaders 21077ec681f3Smrg * must sample 1D textures as 2D. */ 21087ec681f3Smrg if (config->is_3d) 21097ec681f3Smrg AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D; 21107ec681f3Smrg else if (info->chip_class != GFX9 && config->is_1d) 21117ec681f3Smrg AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_1D; 21127ec681f3Smrg else 21137ec681f3Smrg AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D; 21147ec681f3Smrg 21157ec681f3Smrg AddrSurfInfoIn.width = config->info.width; 21167ec681f3Smrg AddrSurfInfoIn.height = config->info.height; 21177ec681f3Smrg 21187ec681f3Smrg if (config->is_3d) 21197ec681f3Smrg AddrSurfInfoIn.numSlices = config->info.depth; 21207ec681f3Smrg else if (config->is_cube) 21217ec681f3Smrg AddrSurfInfoIn.numSlices = 6; 21227ec681f3Smrg else 21237ec681f3Smrg AddrSurfInfoIn.numSlices = config->info.array_size; 21247ec681f3Smrg 21257ec681f3Smrg /* This is propagated to DCC. It must be 0 for HTILE and CMASK. */ 21267ec681f3Smrg AddrSurfInfoIn.flags.metaPipeUnaligned = 0; 21277ec681f3Smrg AddrSurfInfoIn.flags.metaRbUnaligned = 0; 21287ec681f3Smrg 21297ec681f3Smrg if (ac_modifier_has_dcc(surf->modifier)) { 21307ec681f3Smrg ac_modifier_fill_dcc_params(surf->modifier, surf, &AddrSurfInfoIn); 21317ec681f3Smrg } else if (!AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.stencil) { 21327ec681f3Smrg /* Optimal values for the L2 cache. */ 21337ec681f3Smrg /* Don't change the DCC settings for imported buffers - they might differ. */ 21347ec681f3Smrg if (!(surf->flags & RADEON_SURF_IMPORTED)) { 21357ec681f3Smrg if (info->chip_class == GFX9) { 21367ec681f3Smrg surf->u.gfx9.color.dcc.independent_64B_blocks = 1; 21377ec681f3Smrg surf->u.gfx9.color.dcc.independent_128B_blocks = 0; 21387ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 21397ec681f3Smrg } else if (info->chip_class >= GFX10) { 21407ec681f3Smrg surf->u.gfx9.color.dcc.independent_64B_blocks = 0; 21417ec681f3Smrg surf->u.gfx9.color.dcc.independent_128B_blocks = 1; 21427ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; 21437ec681f3Smrg } 21447ec681f3Smrg } 21457ec681f3Smrg 21467ec681f3Smrg if (AddrSurfInfoIn.flags.display) { 21477ec681f3Smrg /* The display hardware can only read DCC with RB_ALIGNED=0 and 21487ec681f3Smrg * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED. 21497ec681f3Smrg * 21507ec681f3Smrg * The CB block requires RB_ALIGNED=1 except 1 RB chips. 21517ec681f3Smrg * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes 21527ec681f3Smrg * after rendering, so PIPE_ALIGNED=1 is recommended. 21537ec681f3Smrg */ 21547ec681f3Smrg if (info->use_display_dcc_unaligned) { 21557ec681f3Smrg AddrSurfInfoIn.flags.metaPipeUnaligned = 1; 21567ec681f3Smrg AddrSurfInfoIn.flags.metaRbUnaligned = 1; 21577ec681f3Smrg } 21587ec681f3Smrg 21597ec681f3Smrg /* Adjust DCC settings to meet DCN requirements. */ 21607ec681f3Smrg /* Don't change the DCC settings for imported buffers - they might differ. */ 21617ec681f3Smrg if (!(surf->flags & RADEON_SURF_IMPORTED) && 21627ec681f3Smrg (info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit)) { 21637ec681f3Smrg /* Only Navi12/14 support independent 64B blocks in L2, 21647ec681f3Smrg * but without DCC image stores. 21657ec681f3Smrg */ 21667ec681f3Smrg if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) { 21677ec681f3Smrg surf->u.gfx9.color.dcc.independent_64B_blocks = 1; 21687ec681f3Smrg surf->u.gfx9.color.dcc.independent_128B_blocks = 0; 21697ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 21707ec681f3Smrg } 21717ec681f3Smrg 21727ec681f3Smrg if ((info->chip_class >= GFX10_3 && info->family <= CHIP_YELLOW_CARP) || 21737ec681f3Smrg /* Newer chips will skip this when possible to get better performance. 21747ec681f3Smrg * This is also possible for other gfx10.3 chips, but is disabled for 21757ec681f3Smrg * interoperability between different Mesa versions. 21767ec681f3Smrg */ 21777ec681f3Smrg (info->family > CHIP_YELLOW_CARP && 21787ec681f3Smrg gfx10_DCN_requires_independent_64B_blocks(info, config))) { 21797ec681f3Smrg surf->u.gfx9.color.dcc.independent_64B_blocks = 1; 21807ec681f3Smrg surf->u.gfx9.color.dcc.independent_128B_blocks = 1; 21817ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 21827ec681f3Smrg } 21837ec681f3Smrg } 21847ec681f3Smrg } 21857ec681f3Smrg } 21867ec681f3Smrg 21877ec681f3Smrg if (surf->modifier == DRM_FORMAT_MOD_INVALID) { 21887ec681f3Smrg switch (mode) { 21897ec681f3Smrg case RADEON_SURF_MODE_LINEAR_ALIGNED: 21907ec681f3Smrg assert(config->info.samples <= 1); 21917ec681f3Smrg assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 21927ec681f3Smrg AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR; 21937ec681f3Smrg break; 21947ec681f3Smrg 21957ec681f3Smrg case RADEON_SURF_MODE_1D: 21967ec681f3Smrg case RADEON_SURF_MODE_2D: 21977ec681f3Smrg if (surf->flags & RADEON_SURF_IMPORTED || 21987ec681f3Smrg (info->chip_class >= GFX10 && surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE)) { 21997ec681f3Smrg AddrSurfInfoIn.swizzleMode = surf->u.gfx9.swizzle_mode; 22007ec681f3Smrg break; 22017ec681f3Smrg } 22027ec681f3Smrg 22037ec681f3Smrg r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false, 22047ec681f3Smrg &AddrSurfInfoIn.swizzleMode); 22057ec681f3Smrg if (r) 22067ec681f3Smrg return r; 22077ec681f3Smrg break; 22087ec681f3Smrg 22097ec681f3Smrg default: 22107ec681f3Smrg assert(0); 22117ec681f3Smrg } 22127ec681f3Smrg } else { 22137ec681f3Smrg /* We have a valid and required modifier here. */ 22147ec681f3Smrg 22157ec681f3Smrg assert(!compressed); 22167ec681f3Smrg assert(!ac_modifier_has_dcc(surf->modifier) || 22177ec681f3Smrg !(surf->flags & RADEON_SURF_DISABLE_DCC)); 22187ec681f3Smrg 22197ec681f3Smrg AddrSurfInfoIn.swizzleMode = ac_modifier_gfx9_swizzle_mode(surf->modifier); 22207ec681f3Smrg } 22217ec681f3Smrg 22227ec681f3Smrg surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType; 22237ec681f3Smrg surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); 22247ec681f3Smrg 22257ec681f3Smrg surf->num_meta_levels = 0; 22267ec681f3Smrg surf->surf_size = 0; 22277ec681f3Smrg surf->fmask_size = 0; 22287ec681f3Smrg surf->meta_size = 0; 22297ec681f3Smrg surf->meta_slice_size = 0; 22307ec681f3Smrg surf->u.gfx9.surf_offset = 0; 22317ec681f3Smrg if (AddrSurfInfoIn.flags.stencil) 22327ec681f3Smrg surf->u.gfx9.zs.stencil_offset = 0; 22337ec681f3Smrg surf->cmask_size = 0; 22347ec681f3Smrg 22357ec681f3Smrg const bool only_stencil = 22367ec681f3Smrg (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER); 22377ec681f3Smrg 22387ec681f3Smrg /* Calculate texture layout information. */ 22397ec681f3Smrg if (!only_stencil) { 22407ec681f3Smrg r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn); 22417ec681f3Smrg if (r) 22427ec681f3Smrg return r; 22437ec681f3Smrg } 22447ec681f3Smrg 22457ec681f3Smrg /* Calculate texture layout information for stencil. */ 22467ec681f3Smrg if (surf->flags & RADEON_SURF_SBUFFER) { 22477ec681f3Smrg AddrSurfInfoIn.flags.stencil = 1; 22487ec681f3Smrg AddrSurfInfoIn.bpp = 8; 22497ec681f3Smrg AddrSurfInfoIn.format = ADDR_FMT_8; 22507ec681f3Smrg 22517ec681f3Smrg if (!AddrSurfInfoIn.flags.depth) { 22527ec681f3Smrg r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false, 22537ec681f3Smrg &AddrSurfInfoIn.swizzleMode); 22547ec681f3Smrg if (r) 22557ec681f3Smrg return r; 22567ec681f3Smrg } else 22577ec681f3Smrg AddrSurfInfoIn.flags.depth = 0; 22587ec681f3Smrg 22597ec681f3Smrg r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn); 22607ec681f3Smrg if (r) 22617ec681f3Smrg return r; 22627ec681f3Smrg } 22637ec681f3Smrg 22647ec681f3Smrg surf->is_linear = surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR; 22657ec681f3Smrg 22667ec681f3Smrg /* Query whether the surface is displayable. */ 22677ec681f3Smrg /* This is only useful for surfaces that are allocated without SCANOUT. */ 22687ec681f3Smrg BOOL_32 displayable = false; 22697ec681f3Smrg if (!config->is_3d && !config->is_cube) { 22707ec681f3Smrg r = Addr2IsValidDisplaySwizzleMode(addrlib->handle, surf->u.gfx9.swizzle_mode, 22717ec681f3Smrg surf->bpe * 8, &displayable); 22727ec681f3Smrg if (r) 22737ec681f3Smrg return r; 22747ec681f3Smrg 22757ec681f3Smrg /* Display needs unaligned DCC. */ 22767ec681f3Smrg if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 22777ec681f3Smrg surf->num_meta_levels && 22787ec681f3Smrg (!is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned, 22797ec681f3Smrg surf->u.gfx9.color.dcc.pipe_aligned) || 22807ec681f3Smrg /* Don't set is_displayable if displayable DCC is missing. */ 22817ec681f3Smrg (info->use_display_dcc_with_retile_blit && !surf->u.gfx9.color.dcc.display_equation_valid))) 22827ec681f3Smrg displayable = false; 22837ec681f3Smrg } 22847ec681f3Smrg surf->is_displayable = displayable; 22857ec681f3Smrg 22867ec681f3Smrg /* Validate that we allocated a displayable surface if requested. */ 22877ec681f3Smrg assert(!AddrSurfInfoIn.flags.display || surf->is_displayable); 22887ec681f3Smrg 22897ec681f3Smrg /* Validate that DCC is set up correctly. */ 22907ec681f3Smrg if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->num_meta_levels) { 22917ec681f3Smrg assert(is_dcc_supported_by_L2(info, surf)); 22927ec681f3Smrg if (AddrSurfInfoIn.flags.color) 22937ec681f3Smrg assert(is_dcc_supported_by_CB(info, surf->u.gfx9.swizzle_mode)); 22947ec681f3Smrg if (AddrSurfInfoIn.flags.display) { 22957ec681f3Smrg assert(is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned, 22967ec681f3Smrg surf->u.gfx9.color.dcc.pipe_aligned)); 22977ec681f3Smrg } 22987ec681f3Smrg } 22997ec681f3Smrg 23007ec681f3Smrg if (info->has_graphics && !compressed && !config->is_3d && config->info.levels == 1 && 23017ec681f3Smrg AddrSurfInfoIn.flags.color && !surf->is_linear && 23027ec681f3Smrg (1 << surf->surf_alignment_log2) >= 64 * 1024 && /* 64KB tiling */ 23037ec681f3Smrg !(surf->flags & (RADEON_SURF_DISABLE_DCC | RADEON_SURF_FORCE_SWIZZLE_MODE | 23047ec681f3Smrg RADEON_SURF_FORCE_MICRO_TILE_MODE)) && 23057ec681f3Smrg (surf->modifier == DRM_FORMAT_MOD_INVALID || 23067ec681f3Smrg ac_modifier_has_dcc(surf->modifier)) && 23077ec681f3Smrg is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned, 23087ec681f3Smrg surf->u.gfx9.color.dcc.pipe_aligned)) { 23097ec681f3Smrg /* Validate that DCC is enabled if DCN can do it. */ 23107ec681f3Smrg if ((info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit) && 23117ec681f3Smrg AddrSurfInfoIn.flags.display && surf->bpe == 4) { 23127ec681f3Smrg assert(surf->num_meta_levels); 23137ec681f3Smrg } 23147ec681f3Smrg 23157ec681f3Smrg /* Validate that non-scanout DCC is always enabled. */ 23167ec681f3Smrg if (!AddrSurfInfoIn.flags.display) 23177ec681f3Smrg assert(surf->num_meta_levels); 23187ec681f3Smrg } 23197ec681f3Smrg 23207ec681f3Smrg if (!surf->meta_size) { 23217ec681f3Smrg /* Unset this if HTILE is not present. */ 23227ec681f3Smrg surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE; 23237ec681f3Smrg } 23247ec681f3Smrg 23257ec681f3Smrg switch (surf->u.gfx9.swizzle_mode) { 23267ec681f3Smrg /* S = standard. */ 23277ec681f3Smrg case ADDR_SW_256B_S: 23287ec681f3Smrg case ADDR_SW_4KB_S: 23297ec681f3Smrg case ADDR_SW_64KB_S: 23307ec681f3Smrg case ADDR_SW_64KB_S_T: 23317ec681f3Smrg case ADDR_SW_4KB_S_X: 23327ec681f3Smrg case ADDR_SW_64KB_S_X: 23337ec681f3Smrg surf->micro_tile_mode = RADEON_MICRO_MODE_STANDARD; 23347ec681f3Smrg break; 23357ec681f3Smrg 23367ec681f3Smrg /* D = display. */ 23377ec681f3Smrg case ADDR_SW_LINEAR: 23387ec681f3Smrg case ADDR_SW_256B_D: 23397ec681f3Smrg case ADDR_SW_4KB_D: 23407ec681f3Smrg case ADDR_SW_64KB_D: 23417ec681f3Smrg case ADDR_SW_64KB_D_T: 23427ec681f3Smrg case ADDR_SW_4KB_D_X: 23437ec681f3Smrg case ADDR_SW_64KB_D_X: 23447ec681f3Smrg surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY; 23457ec681f3Smrg break; 23467ec681f3Smrg 23477ec681f3Smrg /* R = rotated (gfx9), render target (gfx10). */ 23487ec681f3Smrg case ADDR_SW_256B_R: 23497ec681f3Smrg case ADDR_SW_4KB_R: 23507ec681f3Smrg case ADDR_SW_64KB_R: 23517ec681f3Smrg case ADDR_SW_64KB_R_T: 23527ec681f3Smrg case ADDR_SW_4KB_R_X: 23537ec681f3Smrg case ADDR_SW_64KB_R_X: 23547ec681f3Smrg case ADDR_SW_VAR_R_X: 23557ec681f3Smrg /* The rotated micro tile mode doesn't work if both CMASK and RB+ are 23567ec681f3Smrg * used at the same time. We currently do not use rotated 23577ec681f3Smrg * in gfx9. 23587ec681f3Smrg */ 23597ec681f3Smrg assert(info->chip_class >= GFX10 || !"rotate micro tile mode is unsupported"); 23607ec681f3Smrg surf->micro_tile_mode = RADEON_MICRO_MODE_RENDER; 23617ec681f3Smrg break; 23627ec681f3Smrg 23637ec681f3Smrg /* Z = depth. */ 23647ec681f3Smrg case ADDR_SW_4KB_Z: 23657ec681f3Smrg case ADDR_SW_64KB_Z: 23667ec681f3Smrg case ADDR_SW_64KB_Z_T: 23677ec681f3Smrg case ADDR_SW_4KB_Z_X: 23687ec681f3Smrg case ADDR_SW_64KB_Z_X: 23697ec681f3Smrg case ADDR_SW_VAR_Z_X: 23707ec681f3Smrg surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH; 23717ec681f3Smrg break; 23727ec681f3Smrg 23737ec681f3Smrg default: 23747ec681f3Smrg assert(0); 23757ec681f3Smrg } 23767ec681f3Smrg 23777ec681f3Smrg return 0; 23787ec681f3Smrg} 237901e04c3fSmrg 23807ec681f3Smrgint ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info, 23817ec681f3Smrg const struct ac_surf_config *config, enum radeon_surf_mode mode, 23827ec681f3Smrg struct radeon_surf *surf) 23837ec681f3Smrg{ 23847ec681f3Smrg int r; 23857ec681f3Smrg 23867ec681f3Smrg r = surf_config_sanity(config, surf->flags); 23877ec681f3Smrg if (r) 23887ec681f3Smrg return r; 23897ec681f3Smrg 23907ec681f3Smrg if (info->family_id >= FAMILY_AI) 23917ec681f3Smrg r = gfx9_compute_surface(addrlib, info, config, mode, surf); 23927ec681f3Smrg else 23937ec681f3Smrg r = gfx6_compute_surface(addrlib->handle, info, config, mode, surf); 23947ec681f3Smrg 23957ec681f3Smrg if (r) 23967ec681f3Smrg return r; 23977ec681f3Smrg 23987ec681f3Smrg /* Determine the memory layout of multiple allocations in one buffer. */ 23997ec681f3Smrg surf->total_size = surf->surf_size; 24007ec681f3Smrg surf->alignment_log2 = surf->surf_alignment_log2; 24017ec681f3Smrg 24027ec681f3Smrg /* Ensure the offsets are always 0 if not available. */ 24037ec681f3Smrg surf->meta_offset = surf->display_dcc_offset = surf->fmask_offset = surf->cmask_offset = 0; 24047ec681f3Smrg 24057ec681f3Smrg if (surf->fmask_size) { 24067ec681f3Smrg assert(config->info.samples >= 2); 24077ec681f3Smrg surf->fmask_offset = align64(surf->total_size, 1 << surf->fmask_alignment_log2); 24087ec681f3Smrg surf->total_size = surf->fmask_offset + surf->fmask_size; 24097ec681f3Smrg surf->alignment_log2 = MAX2(surf->alignment_log2, surf->fmask_alignment_log2); 24107ec681f3Smrg } 24117ec681f3Smrg 24127ec681f3Smrg /* Single-sample CMASK is in a separate buffer. */ 24137ec681f3Smrg if (surf->cmask_size && config->info.samples >= 2) { 24147ec681f3Smrg surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2); 24157ec681f3Smrg surf->total_size = surf->cmask_offset + surf->cmask_size; 24167ec681f3Smrg surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2); 24177ec681f3Smrg } 24187ec681f3Smrg 24197ec681f3Smrg if (surf->is_displayable) 24207ec681f3Smrg surf->flags |= RADEON_SURF_SCANOUT; 24217ec681f3Smrg 24227ec681f3Smrg if (surf->meta_size && 24237ec681f3Smrg /* dcc_size is computed on GFX9+ only if it's displayable. */ 24247ec681f3Smrg (info->chip_class >= GFX9 || !get_display_flag(config, surf))) { 24257ec681f3Smrg /* It's better when displayable DCC is immediately after 24267ec681f3Smrg * the image due to hw-specific reasons. 24277ec681f3Smrg */ 24287ec681f3Smrg if (info->chip_class >= GFX9 && 24297ec681f3Smrg !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 24307ec681f3Smrg surf->u.gfx9.color.dcc.display_equation_valid) { 24317ec681f3Smrg /* Add space for the displayable DCC buffer. */ 24327ec681f3Smrg surf->display_dcc_offset = align64(surf->total_size, 1 << surf->u.gfx9.color.display_dcc_alignment_log2); 24337ec681f3Smrg surf->total_size = surf->display_dcc_offset + surf->u.gfx9.color.display_dcc_size; 24347ec681f3Smrg } 24357ec681f3Smrg 24367ec681f3Smrg surf->meta_offset = align64(surf->total_size, 1 << surf->meta_alignment_log2); 24377ec681f3Smrg surf->total_size = surf->meta_offset + surf->meta_size; 24387ec681f3Smrg surf->alignment_log2 = MAX2(surf->alignment_log2, surf->meta_alignment_log2); 24397ec681f3Smrg } 24407ec681f3Smrg 24417ec681f3Smrg return 0; 24427ec681f3Smrg} 24437ec681f3Smrg 24447ec681f3Smrg/* This is meant to be used for disabling DCC. */ 24457ec681f3Smrgvoid ac_surface_zero_dcc_fields(struct radeon_surf *surf) 24467ec681f3Smrg{ 24477ec681f3Smrg if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) 24487ec681f3Smrg return; 24497ec681f3Smrg 24507ec681f3Smrg surf->meta_offset = 0; 24517ec681f3Smrg surf->display_dcc_offset = 0; 24527ec681f3Smrg if (!surf->fmask_offset && !surf->cmask_offset) { 24537ec681f3Smrg surf->total_size = surf->surf_size; 24547ec681f3Smrg surf->alignment_log2 = surf->surf_alignment_log2; 24557ec681f3Smrg } 24567ec681f3Smrg} 24577ec681f3Smrg 24587ec681f3Smrgstatic unsigned eg_tile_split(unsigned tile_split) 24597ec681f3Smrg{ 24607ec681f3Smrg switch (tile_split) { 24617ec681f3Smrg case 0: 24627ec681f3Smrg tile_split = 64; 24637ec681f3Smrg break; 24647ec681f3Smrg case 1: 24657ec681f3Smrg tile_split = 128; 24667ec681f3Smrg break; 24677ec681f3Smrg case 2: 24687ec681f3Smrg tile_split = 256; 24697ec681f3Smrg break; 24707ec681f3Smrg case 3: 24717ec681f3Smrg tile_split = 512; 24727ec681f3Smrg break; 24737ec681f3Smrg default: 24747ec681f3Smrg case 4: 24757ec681f3Smrg tile_split = 1024; 24767ec681f3Smrg break; 24777ec681f3Smrg case 5: 24787ec681f3Smrg tile_split = 2048; 24797ec681f3Smrg break; 24807ec681f3Smrg case 6: 24817ec681f3Smrg tile_split = 4096; 24827ec681f3Smrg break; 24837ec681f3Smrg } 24847ec681f3Smrg return tile_split; 24857ec681f3Smrg} 24867ec681f3Smrg 24877ec681f3Smrgstatic unsigned eg_tile_split_rev(unsigned eg_tile_split) 24887ec681f3Smrg{ 24897ec681f3Smrg switch (eg_tile_split) { 24907ec681f3Smrg case 64: 24917ec681f3Smrg return 0; 24927ec681f3Smrg case 128: 24937ec681f3Smrg return 1; 24947ec681f3Smrg case 256: 24957ec681f3Smrg return 2; 24967ec681f3Smrg case 512: 24977ec681f3Smrg return 3; 24987ec681f3Smrg default: 24997ec681f3Smrg case 1024: 25007ec681f3Smrg return 4; 25017ec681f3Smrg case 2048: 25027ec681f3Smrg return 5; 25037ec681f3Smrg case 4096: 25047ec681f3Smrg return 6; 25057ec681f3Smrg } 25067ec681f3Smrg} 25077ec681f3Smrg 25087ec681f3Smrg#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45 25097ec681f3Smrg#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3 25107ec681f3Smrg 25117ec681f3Smrg/* This should be called before ac_compute_surface. */ 25127ec681f3Smrgvoid ac_surface_set_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf, 25137ec681f3Smrg uint64_t tiling_flags, enum radeon_surf_mode *mode) 25147ec681f3Smrg{ 25157ec681f3Smrg bool scanout; 25167ec681f3Smrg 25177ec681f3Smrg if (info->chip_class >= GFX9) { 25187ec681f3Smrg surf->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE); 25197ec681f3Smrg surf->u.gfx9.color.dcc.independent_64B_blocks = 25207ec681f3Smrg AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B); 25217ec681f3Smrg surf->u.gfx9.color.dcc.independent_128B_blocks = 25227ec681f3Smrg AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B); 25237ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size = 25247ec681f3Smrg AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE); 25257ec681f3Smrg surf->u.gfx9.color.display_dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX); 25267ec681f3Smrg scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT); 25277ec681f3Smrg *mode = 25287ec681f3Smrg surf->u.gfx9.swizzle_mode > 0 ? RADEON_SURF_MODE_2D : RADEON_SURF_MODE_LINEAR_ALIGNED; 25297ec681f3Smrg } else { 25307ec681f3Smrg surf->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); 25317ec681f3Smrg surf->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); 25327ec681f3Smrg surf->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); 25337ec681f3Smrg surf->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT)); 25347ec681f3Smrg surf->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); 25357ec681f3Smrg surf->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); 25367ec681f3Smrg scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */ 25377ec681f3Smrg 25387ec681f3Smrg if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */ 25397ec681f3Smrg *mode = RADEON_SURF_MODE_2D; 25407ec681f3Smrg else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */ 25417ec681f3Smrg *mode = RADEON_SURF_MODE_1D; 25427ec681f3Smrg else 25437ec681f3Smrg *mode = RADEON_SURF_MODE_LINEAR_ALIGNED; 25447ec681f3Smrg } 25457ec681f3Smrg 25467ec681f3Smrg if (scanout) 25477ec681f3Smrg surf->flags |= RADEON_SURF_SCANOUT; 25487ec681f3Smrg else 25497ec681f3Smrg surf->flags &= ~RADEON_SURF_SCANOUT; 25507ec681f3Smrg} 25517ec681f3Smrg 25527ec681f3Smrgvoid ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf, 25537ec681f3Smrg uint64_t *tiling_flags) 25547ec681f3Smrg{ 25557ec681f3Smrg *tiling_flags = 0; 25567ec681f3Smrg 25577ec681f3Smrg if (info->chip_class >= GFX9) { 25587ec681f3Smrg uint64_t dcc_offset = 0; 25597ec681f3Smrg 25607ec681f3Smrg if (surf->meta_offset) { 25617ec681f3Smrg dcc_offset = surf->display_dcc_offset ? surf->display_dcc_offset : surf->meta_offset; 25627ec681f3Smrg assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24)); 25637ec681f3Smrg } 25647ec681f3Smrg 25657ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, surf->u.gfx9.swizzle_mode); 25667ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, dcc_offset >> 8); 25677ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, surf->u.gfx9.color.display_dcc_pitch_max); 25687ec681f3Smrg *tiling_flags |= 25697ec681f3Smrg AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, surf->u.gfx9.color.dcc.independent_64B_blocks); 25707ec681f3Smrg *tiling_flags |= 25717ec681f3Smrg AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, surf->u.gfx9.color.dcc.independent_128B_blocks); 25727ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, 25737ec681f3Smrg surf->u.gfx9.color.dcc.max_compressed_block_size); 25747ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(SCANOUT, (surf->flags & RADEON_SURF_SCANOUT) != 0); 25757ec681f3Smrg } else { 25767ec681f3Smrg if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D) 25777ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */ 25787ec681f3Smrg else if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D) 25797ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */ 25807ec681f3Smrg else 25817ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */ 25827ec681f3Smrg 25837ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, surf->u.legacy.pipe_config); 25847ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(surf->u.legacy.bankw)); 25857ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(surf->u.legacy.bankh)); 25867ec681f3Smrg if (surf->u.legacy.tile_split) 25877ec681f3Smrg *tiling_flags |= 25887ec681f3Smrg AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(surf->u.legacy.tile_split)); 25897ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(surf->u.legacy.mtilea)); 25907ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(surf->u.legacy.num_banks) - 1); 25917ec681f3Smrg 25927ec681f3Smrg if (surf->flags & RADEON_SURF_SCANOUT) 25937ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */ 25947ec681f3Smrg else 25957ec681f3Smrg *tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */ 25967ec681f3Smrg } 25977ec681f3Smrg} 25987ec681f3Smrg 25997ec681f3Smrgstatic uint32_t ac_get_umd_metadata_word1(const struct radeon_info *info) 26007ec681f3Smrg{ 26017ec681f3Smrg return (ATI_VENDOR_ID << 16) | info->pci_id; 26027ec681f3Smrg} 26037ec681f3Smrg 26047ec681f3Smrg/* This should be called after ac_compute_surface. */ 26057ec681f3Smrgbool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf, 26067ec681f3Smrg unsigned num_storage_samples, unsigned num_mipmap_levels, 26077ec681f3Smrg unsigned size_metadata, const uint32_t metadata[64]) 26087ec681f3Smrg{ 26097ec681f3Smrg const uint32_t *desc = &metadata[2]; 26107ec681f3Smrg uint64_t offset; 26117ec681f3Smrg 26127ec681f3Smrg if (surf->modifier != DRM_FORMAT_MOD_INVALID) 26137ec681f3Smrg return true; 26147ec681f3Smrg 26157ec681f3Smrg if (info->chip_class >= GFX9) 26167ec681f3Smrg offset = surf->u.gfx9.surf_offset; 26177ec681f3Smrg else 26187ec681f3Smrg offset = (uint64_t)surf->u.legacy.level[0].offset_256B * 256; 26197ec681f3Smrg 26207ec681f3Smrg if (offset || /* Non-zero planes ignore metadata. */ 26217ec681f3Smrg size_metadata < 10 * 4 || /* at least 2(header) + 8(desc) dwords */ 26227ec681f3Smrg metadata[0] == 0 || /* invalid version number */ 26237ec681f3Smrg metadata[1] != ac_get_umd_metadata_word1(info)) /* invalid PCI ID */ { 26247ec681f3Smrg /* Disable DCC because it might not be enabled. */ 26257ec681f3Smrg ac_surface_zero_dcc_fields(surf); 26267ec681f3Smrg 26277ec681f3Smrg /* Don't report an error if the texture comes from an incompatible driver, 26287ec681f3Smrg * but this might not work. 26297ec681f3Smrg */ 26307ec681f3Smrg return true; 26317ec681f3Smrg } 26327ec681f3Smrg 26337ec681f3Smrg /* Validate that sample counts and the number of mipmap levels match. */ 26347ec681f3Smrg unsigned desc_last_level = G_008F1C_LAST_LEVEL(desc[3]); 26357ec681f3Smrg unsigned type = G_008F1C_TYPE(desc[3]); 26367ec681f3Smrg 26377ec681f3Smrg if (type == V_008F1C_SQ_RSRC_IMG_2D_MSAA || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 26387ec681f3Smrg unsigned log_samples = util_logbase2(MAX2(1, num_storage_samples)); 26397ec681f3Smrg 26407ec681f3Smrg if (desc_last_level != log_samples) { 26417ec681f3Smrg fprintf(stderr, 26427ec681f3Smrg "amdgpu: invalid MSAA texture import, " 26437ec681f3Smrg "metadata has log2(samples) = %u, the caller set %u\n", 26447ec681f3Smrg desc_last_level, log_samples); 26457ec681f3Smrg return false; 26467ec681f3Smrg } 26477ec681f3Smrg } else { 26487ec681f3Smrg if (desc_last_level != num_mipmap_levels - 1) { 26497ec681f3Smrg fprintf(stderr, 26507ec681f3Smrg "amdgpu: invalid mipmapped texture import, " 26517ec681f3Smrg "metadata has last_level = %u, the caller set %u\n", 26527ec681f3Smrg desc_last_level, num_mipmap_levels - 1); 26537ec681f3Smrg return false; 26547ec681f3Smrg } 26557ec681f3Smrg } 26567ec681f3Smrg 26577ec681f3Smrg if (info->chip_class >= GFX8 && G_008F28_COMPRESSION_EN(desc[6])) { 26587ec681f3Smrg /* Read DCC information. */ 26597ec681f3Smrg switch (info->chip_class) { 26607ec681f3Smrg case GFX8: 26617ec681f3Smrg surf->meta_offset = (uint64_t)desc[7] << 8; 26627ec681f3Smrg break; 26637ec681f3Smrg 26647ec681f3Smrg case GFX9: 26657ec681f3Smrg surf->meta_offset = 26667ec681f3Smrg ((uint64_t)desc[7] << 8) | ((uint64_t)G_008F24_META_DATA_ADDRESS(desc[5]) << 40); 26677ec681f3Smrg surf->u.gfx9.color.dcc.pipe_aligned = G_008F24_META_PIPE_ALIGNED(desc[5]); 26687ec681f3Smrg surf->u.gfx9.color.dcc.rb_aligned = G_008F24_META_RB_ALIGNED(desc[5]); 26697ec681f3Smrg 26707ec681f3Smrg /* If DCC is unaligned, this can only be a displayable image. */ 26717ec681f3Smrg if (!surf->u.gfx9.color.dcc.pipe_aligned && !surf->u.gfx9.color.dcc.rb_aligned) 26727ec681f3Smrg assert(surf->is_displayable); 26737ec681f3Smrg break; 26747ec681f3Smrg 26757ec681f3Smrg case GFX10: 26767ec681f3Smrg case GFX10_3: 26777ec681f3Smrg surf->meta_offset = 26787ec681f3Smrg ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16); 26797ec681f3Smrg surf->u.gfx9.color.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]); 26807ec681f3Smrg break; 26817ec681f3Smrg 26827ec681f3Smrg default: 26837ec681f3Smrg assert(0); 26847ec681f3Smrg return false; 26857ec681f3Smrg } 26867ec681f3Smrg } else { 26877ec681f3Smrg /* Disable DCC. dcc_offset is always set by texture_from_handle 26887ec681f3Smrg * and must be cleared here. 26897ec681f3Smrg */ 26907ec681f3Smrg ac_surface_zero_dcc_fields(surf); 26917ec681f3Smrg } 26927ec681f3Smrg 26937ec681f3Smrg return true; 26947ec681f3Smrg} 26957ec681f3Smrg 26967ec681f3Smrgvoid ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf, 26977ec681f3Smrg unsigned num_mipmap_levels, uint32_t desc[8], 26987ec681f3Smrg unsigned *size_metadata, uint32_t metadata[64]) 26997ec681f3Smrg{ 27007ec681f3Smrg /* Clear the base address and set the relative DCC offset. */ 27017ec681f3Smrg desc[0] = 0; 27027ec681f3Smrg desc[1] &= C_008F14_BASE_ADDRESS_HI; 27037ec681f3Smrg 27047ec681f3Smrg switch (info->chip_class) { 27057ec681f3Smrg case GFX6: 27067ec681f3Smrg case GFX7: 27077ec681f3Smrg break; 27087ec681f3Smrg case GFX8: 27097ec681f3Smrg desc[7] = surf->meta_offset >> 8; 27107ec681f3Smrg break; 27117ec681f3Smrg case GFX9: 27127ec681f3Smrg desc[7] = surf->meta_offset >> 8; 27137ec681f3Smrg desc[5] &= C_008F24_META_DATA_ADDRESS; 27147ec681f3Smrg desc[5] |= S_008F24_META_DATA_ADDRESS(surf->meta_offset >> 40); 27157ec681f3Smrg break; 27167ec681f3Smrg case GFX10: 27177ec681f3Smrg case GFX10_3: 27187ec681f3Smrg desc[6] &= C_00A018_META_DATA_ADDRESS_LO; 27197ec681f3Smrg desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->meta_offset >> 8); 27207ec681f3Smrg desc[7] = surf->meta_offset >> 16; 27217ec681f3Smrg break; 27227ec681f3Smrg default: 27237ec681f3Smrg assert(0); 27247ec681f3Smrg } 27257ec681f3Smrg 27267ec681f3Smrg /* Metadata image format format version 1: 27277ec681f3Smrg * [0] = 1 (metadata format identifier) 27287ec681f3Smrg * [1] = (VENDOR_ID << 16) | PCI_ID 27297ec681f3Smrg * [2:9] = image descriptor for the whole resource 27307ec681f3Smrg * [2] is always 0, because the base address is cleared 27317ec681f3Smrg * [9] is the DCC offset bits [39:8] from the beginning of 27327ec681f3Smrg * the buffer 27337ec681f3Smrg * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level 27347ec681f3Smrg */ 27357ec681f3Smrg 27367ec681f3Smrg metadata[0] = 1; /* metadata image format version 1 */ 27377ec681f3Smrg 27387ec681f3Smrg /* Tiling modes are ambiguous without a PCI ID. */ 27397ec681f3Smrg metadata[1] = ac_get_umd_metadata_word1(info); 27407ec681f3Smrg 27417ec681f3Smrg /* Dwords [2:9] contain the image descriptor. */ 27427ec681f3Smrg memcpy(&metadata[2], desc, 8 * 4); 27437ec681f3Smrg *size_metadata = 10 * 4; 27447ec681f3Smrg 27457ec681f3Smrg /* Dwords [10:..] contain the mipmap level offsets. */ 27467ec681f3Smrg if (info->chip_class <= GFX8) { 27477ec681f3Smrg for (unsigned i = 0; i < num_mipmap_levels; i++) 27487ec681f3Smrg metadata[10 + i] = surf->u.legacy.level[i].offset_256B; 27497ec681f3Smrg 27507ec681f3Smrg *size_metadata += num_mipmap_levels * 4; 27517ec681f3Smrg } 27527ec681f3Smrg} 27537ec681f3Smrg 27547ec681f3Smrgstatic uint32_t ac_surface_get_gfx9_pitch_align(struct radeon_surf *surf) 27557ec681f3Smrg{ 27567ec681f3Smrg if (surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR) 27577ec681f3Smrg return 256 / surf->bpe; 27587ec681f3Smrg 27597ec681f3Smrg if (surf->u.gfx9.resource_type == RADEON_RESOURCE_3D) 27607ec681f3Smrg return 1; /* TODO */ 27617ec681f3Smrg 27627ec681f3Smrg unsigned bpe_shift = util_logbase2(surf->bpe) / 2; 27637ec681f3Smrg switch(surf->u.gfx9.swizzle_mode & ~3) { 27647ec681f3Smrg case ADDR_SW_LINEAR: /* 256B block. */ 27657ec681f3Smrg return 16 >> bpe_shift; 27667ec681f3Smrg case ADDR_SW_4KB_Z: 27677ec681f3Smrg case ADDR_SW_4KB_Z_X: 27687ec681f3Smrg return 64 >> bpe_shift; 27697ec681f3Smrg case ADDR_SW_64KB_Z: 27707ec681f3Smrg case ADDR_SW_64KB_Z_T: 27717ec681f3Smrg case ADDR_SW_64KB_Z_X: 27727ec681f3Smrg return 256 >> bpe_shift; 27737ec681f3Smrg case ADDR_SW_VAR_Z_X: 27747ec681f3Smrg default: 27757ec681f3Smrg return 1; /* TODO */ 27767ec681f3Smrg } 27777ec681f3Smrg} 27787ec681f3Smrg 27797ec681f3Smrgbool ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf, 27807ec681f3Smrg unsigned num_mipmap_levels, uint64_t offset, unsigned pitch) 27817ec681f3Smrg{ 27827ec681f3Smrg /* 27837ec681f3Smrg * GFX10 and newer don't support custom strides. Furthermore, for 27847ec681f3Smrg * multiple miplevels or compression data we'd really need to rerun 27857ec681f3Smrg * addrlib to update all the fields in the surface. That, however, is a 27867ec681f3Smrg * software limitation and could be relaxed later. 27877ec681f3Smrg */ 27887ec681f3Smrg bool require_equal_pitch = surf->surf_size != surf->total_size || 27897ec681f3Smrg num_mipmap_levels != 1 || 27907ec681f3Smrg info->chip_class >= GFX10; 27917ec681f3Smrg 27927ec681f3Smrg if (info->chip_class >= GFX9) { 27937ec681f3Smrg if (pitch) { 27947ec681f3Smrg if (surf->u.gfx9.surf_pitch != pitch && require_equal_pitch) 27957ec681f3Smrg return false; 27967ec681f3Smrg 27977ec681f3Smrg if ((ac_surface_get_gfx9_pitch_align(surf) - 1) & pitch) 27987ec681f3Smrg return false; 27997ec681f3Smrg 28007ec681f3Smrg if (pitch != surf->u.gfx9.surf_pitch) { 28017ec681f3Smrg unsigned slices = surf->surf_size / surf->u.gfx9.surf_slice_size; 28027ec681f3Smrg 28037ec681f3Smrg surf->u.gfx9.surf_pitch = pitch; 28047ec681f3Smrg surf->u.gfx9.epitch = pitch - 1; 28057ec681f3Smrg surf->u.gfx9.surf_slice_size = (uint64_t)pitch * surf->u.gfx9.surf_height * surf->bpe; 28067ec681f3Smrg surf->total_size = surf->surf_size = surf->u.gfx9.surf_slice_size * slices; 28077ec681f3Smrg } 28087ec681f3Smrg } 28097ec681f3Smrg surf->u.gfx9.surf_offset = offset; 28107ec681f3Smrg if (surf->u.gfx9.zs.stencil_offset) 28117ec681f3Smrg surf->u.gfx9.zs.stencil_offset += offset; 28127ec681f3Smrg } else { 28137ec681f3Smrg if (pitch) { 28147ec681f3Smrg if (surf->u.legacy.level[0].nblk_x != pitch && require_equal_pitch) 28157ec681f3Smrg return false; 28167ec681f3Smrg 28177ec681f3Smrg surf->u.legacy.level[0].nblk_x = pitch; 28187ec681f3Smrg surf->u.legacy.level[0].slice_size_dw = 28197ec681f3Smrg ((uint64_t)pitch * surf->u.legacy.level[0].nblk_y * surf->bpe) / 4; 28207ec681f3Smrg } 28217ec681f3Smrg 28227ec681f3Smrg if (offset) { 28237ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(surf->u.legacy.level); ++i) 28247ec681f3Smrg surf->u.legacy.level[i].offset_256B += offset / 256; 28257ec681f3Smrg } 28267ec681f3Smrg } 28277ec681f3Smrg 28287ec681f3Smrg if (offset & ((1 << surf->alignment_log2) - 1) || 28297ec681f3Smrg offset >= UINT64_MAX - surf->total_size) 28307ec681f3Smrg return false; 28317ec681f3Smrg 28327ec681f3Smrg if (surf->meta_offset) 28337ec681f3Smrg surf->meta_offset += offset; 28347ec681f3Smrg if (surf->fmask_offset) 28357ec681f3Smrg surf->fmask_offset += offset; 28367ec681f3Smrg if (surf->cmask_offset) 28377ec681f3Smrg surf->cmask_offset += offset; 28387ec681f3Smrg if (surf->display_dcc_offset) 28397ec681f3Smrg surf->display_dcc_offset += offset; 28407ec681f3Smrg return true; 28417ec681f3Smrg} 28427ec681f3Smrg 28437ec681f3Smrgunsigned ac_surface_get_nplanes(const struct radeon_surf *surf) 28447ec681f3Smrg{ 28457ec681f3Smrg if (surf->modifier == DRM_FORMAT_MOD_INVALID) 28467ec681f3Smrg return 1; 28477ec681f3Smrg else if (surf->display_dcc_offset) 28487ec681f3Smrg return 3; 28497ec681f3Smrg else if (surf->meta_offset) 28507ec681f3Smrg return 2; 28517ec681f3Smrg else 28527ec681f3Smrg return 1; 28537ec681f3Smrg} 28547ec681f3Smrg 28557ec681f3Smrguint64_t ac_surface_get_plane_offset(enum chip_class chip_class, 28567ec681f3Smrg const struct radeon_surf *surf, 28577ec681f3Smrg unsigned plane, unsigned layer) 28587ec681f3Smrg{ 28597ec681f3Smrg switch (plane) { 28607ec681f3Smrg case 0: 28617ec681f3Smrg if (chip_class >= GFX9) { 28627ec681f3Smrg return surf->u.gfx9.surf_offset + 28637ec681f3Smrg layer * surf->u.gfx9.surf_slice_size; 28647ec681f3Smrg } else { 28657ec681f3Smrg return (uint64_t)surf->u.legacy.level[0].offset_256B * 256 + 28667ec681f3Smrg layer * (uint64_t)surf->u.legacy.level[0].slice_size_dw * 4; 28677ec681f3Smrg } 28687ec681f3Smrg case 1: 28697ec681f3Smrg assert(!layer); 28707ec681f3Smrg return surf->display_dcc_offset ? 28717ec681f3Smrg surf->display_dcc_offset : surf->meta_offset; 28727ec681f3Smrg case 2: 28737ec681f3Smrg assert(!layer); 28747ec681f3Smrg return surf->meta_offset; 28757ec681f3Smrg default: 28767ec681f3Smrg unreachable("Invalid plane index"); 28777ec681f3Smrg } 28787ec681f3Smrg} 28797ec681f3Smrg 28807ec681f3Smrguint64_t ac_surface_get_plane_stride(enum chip_class chip_class, 28817ec681f3Smrg const struct radeon_surf *surf, 28827ec681f3Smrg unsigned plane) 28837ec681f3Smrg{ 28847ec681f3Smrg switch (plane) { 28857ec681f3Smrg case 0: 28867ec681f3Smrg if (chip_class >= GFX9) { 28877ec681f3Smrg return surf->u.gfx9.surf_pitch * surf->bpe; 28887ec681f3Smrg } else { 28897ec681f3Smrg return surf->u.legacy.level[0].nblk_x * surf->bpe; 28907ec681f3Smrg } 28917ec681f3Smrg case 1: 28927ec681f3Smrg return 1 + (surf->display_dcc_offset ? 28937ec681f3Smrg surf->u.gfx9.color.display_dcc_pitch_max : surf->u.gfx9.color.dcc_pitch_max); 28947ec681f3Smrg case 2: 28957ec681f3Smrg return surf->u.gfx9.color.dcc_pitch_max + 1; 28967ec681f3Smrg default: 28977ec681f3Smrg unreachable("Invalid plane index"); 28987ec681f3Smrg } 28997ec681f3Smrg} 29007ec681f3Smrg 29017ec681f3Smrguint64_t ac_surface_get_plane_size(const struct radeon_surf *surf, 29027ec681f3Smrg unsigned plane) 29037ec681f3Smrg{ 29047ec681f3Smrg switch (plane) { 29057ec681f3Smrg case 0: 29067ec681f3Smrg return surf->surf_size; 29077ec681f3Smrg case 1: 29087ec681f3Smrg return surf->display_dcc_offset ? 29097ec681f3Smrg surf->u.gfx9.color.display_dcc_size : surf->meta_size; 29107ec681f3Smrg case 2: 29117ec681f3Smrg return surf->meta_size; 29127ec681f3Smrg default: 29137ec681f3Smrg unreachable("Invalid plane index"); 29147ec681f3Smrg } 29157ec681f3Smrg} 29167ec681f3Smrg 29177ec681f3Smrgvoid ac_surface_print_info(FILE *out, const struct radeon_info *info, 29187ec681f3Smrg const struct radeon_surf *surf) 29197ec681f3Smrg{ 29207ec681f3Smrg if (info->chip_class >= GFX9) { 29217ec681f3Smrg fprintf(out, 29227ec681f3Smrg " Surf: size=%" PRIu64 ", slice_size=%" PRIu64 ", " 29237ec681f3Smrg "alignment=%u, swmode=%u, epitch=%u, pitch=%u, blk_w=%u, " 29247ec681f3Smrg "blk_h=%u, bpe=%u, flags=0x%"PRIx64"\n", 29257ec681f3Smrg surf->surf_size, surf->u.gfx9.surf_slice_size, 29267ec681f3Smrg 1 << surf->surf_alignment_log2, surf->u.gfx9.swizzle_mode, 29277ec681f3Smrg surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch, 29287ec681f3Smrg surf->blk_w, surf->blk_h, surf->bpe, surf->flags); 29297ec681f3Smrg 29307ec681f3Smrg if (surf->fmask_offset) 29317ec681f3Smrg fprintf(out, 29327ec681f3Smrg " FMask: offset=%" PRIu64 ", size=%" PRIu64 ", " 29337ec681f3Smrg "alignment=%u, swmode=%u, epitch=%u\n", 29347ec681f3Smrg surf->fmask_offset, surf->fmask_size, 29357ec681f3Smrg 1 << surf->fmask_alignment_log2, surf->u.gfx9.color.fmask_swizzle_mode, 29367ec681f3Smrg surf->u.gfx9.color.fmask_epitch); 29377ec681f3Smrg 29387ec681f3Smrg if (surf->cmask_offset) 29397ec681f3Smrg fprintf(out, 29407ec681f3Smrg " CMask: offset=%" PRIu64 ", size=%u, " 29417ec681f3Smrg "alignment=%u\n", 29427ec681f3Smrg surf->cmask_offset, surf->cmask_size, 29437ec681f3Smrg 1 << surf->cmask_alignment_log2); 29447ec681f3Smrg 29457ec681f3Smrg if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset) 29467ec681f3Smrg fprintf(out, 29477ec681f3Smrg " HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n", 29487ec681f3Smrg surf->meta_offset, surf->meta_size, 29497ec681f3Smrg 1 << surf->meta_alignment_log2); 29507ec681f3Smrg 29517ec681f3Smrg if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset) 29527ec681f3Smrg fprintf(out, 29537ec681f3Smrg " DCC: offset=%" PRIu64 ", size=%u, " 29547ec681f3Smrg "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n", 29557ec681f3Smrg surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2, 29567ec681f3Smrg surf->u.gfx9.color.display_dcc_pitch_max, surf->num_meta_levels); 29577ec681f3Smrg 29587ec681f3Smrg if (surf->has_stencil) 29597ec681f3Smrg fprintf(out, 29607ec681f3Smrg " Stencil: offset=%" PRIu64 ", swmode=%u, epitch=%u\n", 29617ec681f3Smrg surf->u.gfx9.zs.stencil_offset, 29627ec681f3Smrg surf->u.gfx9.zs.stencil_swizzle_mode, 29637ec681f3Smrg surf->u.gfx9.zs.stencil_epitch); 29647ec681f3Smrg } else { 29657ec681f3Smrg fprintf(out, 29667ec681f3Smrg " Surf: size=%" PRIu64 ", alignment=%u, blk_w=%u, blk_h=%u, " 29677ec681f3Smrg "bpe=%u, flags=0x%"PRIx64"\n", 29687ec681f3Smrg surf->surf_size, 1 << surf->surf_alignment_log2, surf->blk_w, 29697ec681f3Smrg surf->blk_h, surf->bpe, surf->flags); 29707ec681f3Smrg 29717ec681f3Smrg fprintf(out, 29727ec681f3Smrg " Layout: size=%" PRIu64 ", alignment=%u, bankw=%u, bankh=%u, " 29737ec681f3Smrg "nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n", 29747ec681f3Smrg surf->surf_size, 1 << surf->surf_alignment_log2, 29757ec681f3Smrg surf->u.legacy.bankw, surf->u.legacy.bankh, 29767ec681f3Smrg surf->u.legacy.num_banks, surf->u.legacy.mtilea, 29777ec681f3Smrg surf->u.legacy.tile_split, surf->u.legacy.pipe_config, 29787ec681f3Smrg (surf->flags & RADEON_SURF_SCANOUT) != 0); 29797ec681f3Smrg 29807ec681f3Smrg if (surf->fmask_offset) 29817ec681f3Smrg fprintf(out, 29827ec681f3Smrg " FMask: offset=%" PRIu64 ", size=%" PRIu64 ", " 29837ec681f3Smrg "alignment=%u, pitch_in_pixels=%u, bankh=%u, " 29847ec681f3Smrg "slice_tile_max=%u, tile_mode_index=%u\n", 29857ec681f3Smrg surf->fmask_offset, surf->fmask_size, 29867ec681f3Smrg 1 << surf->fmask_alignment_log2, surf->u.legacy.color.fmask.pitch_in_pixels, 29877ec681f3Smrg surf->u.legacy.color.fmask.bankh, 29887ec681f3Smrg surf->u.legacy.color.fmask.slice_tile_max, 29897ec681f3Smrg surf->u.legacy.color.fmask.tiling_index); 29907ec681f3Smrg 29917ec681f3Smrg if (surf->cmask_offset) 29927ec681f3Smrg fprintf(out, 29937ec681f3Smrg " CMask: offset=%" PRIu64 ", size=%u, alignment=%u, " 29947ec681f3Smrg "slice_tile_max=%u\n", 29957ec681f3Smrg surf->cmask_offset, surf->cmask_size, 29967ec681f3Smrg 1 << surf->cmask_alignment_log2, surf->u.legacy.color.cmask_slice_tile_max); 29977ec681f3Smrg 29987ec681f3Smrg if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset) 29997ec681f3Smrg fprintf(out, " HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n", 30007ec681f3Smrg surf->meta_offset, surf->meta_size, 30017ec681f3Smrg 1 << surf->meta_alignment_log2); 30027ec681f3Smrg 30037ec681f3Smrg if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset) 30047ec681f3Smrg fprintf(out, " DCC: offset=%" PRIu64 ", size=%u, alignment=%u\n", 30057ec681f3Smrg surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2); 30067ec681f3Smrg 30077ec681f3Smrg if (surf->has_stencil) 30087ec681f3Smrg fprintf(out, " StencilLayout: tilesplit=%u\n", 30097ec681f3Smrg surf->u.legacy.stencil_tile_split); 30107ec681f3Smrg } 30117ec681f3Smrg} 30127ec681f3Smrg 30137ec681f3Smrgstatic nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info, 30147ec681f3Smrg struct gfx9_meta_equation *equation, 30157ec681f3Smrg int blkSizeBias, unsigned blkStart, 30167ec681f3Smrg nir_ssa_def *meta_pitch, nir_ssa_def *meta_slice_size, 30177ec681f3Smrg nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 30187ec681f3Smrg nir_ssa_def *pipe_xor, 30197ec681f3Smrg nir_ssa_def **bit_position) 30207ec681f3Smrg{ 30217ec681f3Smrg nir_ssa_def *zero = nir_imm_int(b, 0); 30227ec681f3Smrg nir_ssa_def *one = nir_imm_int(b, 1); 30237ec681f3Smrg 30247ec681f3Smrg assert(info->chip_class >= GFX10); 30257ec681f3Smrg 30267ec681f3Smrg unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width); 30277ec681f3Smrg unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height); 30287ec681f3Smrg unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + blkSizeBias; 30297ec681f3Smrg 30307ec681f3Smrg nir_ssa_def *coord[] = {x, y, z, 0}; 30317ec681f3Smrg nir_ssa_def *address = zero; 30327ec681f3Smrg 30337ec681f3Smrg for (unsigned i = blkStart; i < blkSizeLog2 + 1; i++) { 30347ec681f3Smrg nir_ssa_def *v = zero; 30357ec681f3Smrg 30367ec681f3Smrg for (unsigned c = 0; c < 4; c++) { 30377ec681f3Smrg unsigned index = i * 4 + c - (blkStart * 4); 30387ec681f3Smrg if (equation->u.gfx10_bits[index]) { 30397ec681f3Smrg unsigned mask = equation->u.gfx10_bits[index]; 30407ec681f3Smrg nir_ssa_def *bits = coord[c]; 30417ec681f3Smrg 30427ec681f3Smrg while (mask) 30437ec681f3Smrg v = nir_ixor(b, v, nir_iand(b, nir_ushr_imm(b, bits, u_bit_scan(&mask)), one)); 30447ec681f3Smrg } 30457ec681f3Smrg } 30467ec681f3Smrg 30477ec681f3Smrg address = nir_ior(b, address, nir_ishl(b, v, nir_imm_int(b, i))); 30487ec681f3Smrg } 30497ec681f3Smrg 30507ec681f3Smrg unsigned blkMask = (1 << blkSizeLog2) - 1; 30517ec681f3Smrg unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1; 30527ec681f3Smrg unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config); 30537ec681f3Smrg nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2); 30547ec681f3Smrg nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2); 30557ec681f3Smrg nir_ssa_def *pb = nir_ushr_imm(b, meta_pitch, meta_block_width_log2); 30567ec681f3Smrg nir_ssa_def *blkIndex = nir_iadd(b, nir_imul(b, yb, pb), xb); 30577ec681f3Smrg nir_ssa_def *pipeXor = nir_iand_imm(b, nir_ishl(b, nir_iand_imm(b, pipe_xor, pipeMask), 30587ec681f3Smrg nir_imm_int(b, m_pipeInterleaveLog2)), blkMask); 30597ec681f3Smrg 30607ec681f3Smrg if (bit_position) 30617ec681f3Smrg *bit_position = nir_ishl(b, nir_iand(b, address, nir_imm_int(b, 1)), 30627ec681f3Smrg nir_imm_int(b, 2)); 30637ec681f3Smrg 30647ec681f3Smrg return nir_iadd(b, nir_iadd(b, nir_imul(b, meta_slice_size, z), 30657ec681f3Smrg nir_imul(b, blkIndex, nir_ishl(b, one, nir_imm_int(b, blkSizeLog2)))), 30667ec681f3Smrg nir_ixor(b, nir_ushr(b, address, one), pipeXor)); 30677ec681f3Smrg} 30687ec681f3Smrg 30697ec681f3Smrgstatic nir_ssa_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info, 30707ec681f3Smrg struct gfx9_meta_equation *equation, 30717ec681f3Smrg nir_ssa_def *meta_pitch, nir_ssa_def *meta_height, 30727ec681f3Smrg nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 30737ec681f3Smrg nir_ssa_def *sample, nir_ssa_def *pipe_xor, 30747ec681f3Smrg nir_ssa_def **bit_position) 30757ec681f3Smrg{ 30767ec681f3Smrg nir_ssa_def *zero = nir_imm_int(b, 0); 30777ec681f3Smrg nir_ssa_def *one = nir_imm_int(b, 1); 30787ec681f3Smrg 30797ec681f3Smrg assert(info->chip_class >= GFX9); 30807ec681f3Smrg 30817ec681f3Smrg unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width); 30827ec681f3Smrg unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height); 30837ec681f3Smrg unsigned meta_block_depth_log2 = util_logbase2(equation->meta_block_depth); 30847ec681f3Smrg 30857ec681f3Smrg unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config); 30867ec681f3Smrg unsigned numPipeBits = equation->u.gfx9.num_pipe_bits; 30877ec681f3Smrg nir_ssa_def *pitchInBlock = nir_ushr_imm(b, meta_pitch, meta_block_width_log2); 30887ec681f3Smrg nir_ssa_def *sliceSizeInBlock = nir_imul(b, nir_ushr_imm(b, meta_height, meta_block_height_log2), 30897ec681f3Smrg pitchInBlock); 30907ec681f3Smrg 30917ec681f3Smrg nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2); 30927ec681f3Smrg nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2); 30937ec681f3Smrg nir_ssa_def *zb = nir_ushr_imm(b, z, meta_block_depth_log2); 30947ec681f3Smrg 30957ec681f3Smrg nir_ssa_def *blockIndex = nir_iadd(b, nir_iadd(b, nir_imul(b, zb, sliceSizeInBlock), 30967ec681f3Smrg nir_imul(b, yb, pitchInBlock)), xb); 30977ec681f3Smrg nir_ssa_def *coords[] = {x, y, z, sample, blockIndex}; 30987ec681f3Smrg 30997ec681f3Smrg nir_ssa_def *address = zero; 31007ec681f3Smrg unsigned num_bits = equation->u.gfx9.num_bits; 31017ec681f3Smrg assert(num_bits <= 32); 31027ec681f3Smrg 31037ec681f3Smrg /* Compute the address up until the last bit that doesn't use the block index. */ 31047ec681f3Smrg for (unsigned i = 0; i < num_bits - 1; i++) { 31057ec681f3Smrg nir_ssa_def *xor = zero; 31067ec681f3Smrg 31077ec681f3Smrg for (unsigned c = 0; c < 5; c++) { 31087ec681f3Smrg if (equation->u.gfx9.bit[i].coord[c].dim >= 5) 31097ec681f3Smrg continue; 31107ec681f3Smrg 31117ec681f3Smrg assert(equation->u.gfx9.bit[i].coord[c].ord < 32); 31127ec681f3Smrg nir_ssa_def *ison = 31137ec681f3Smrg nir_iand(b, nir_ushr_imm(b, coords[equation->u.gfx9.bit[i].coord[c].dim], 31147ec681f3Smrg equation->u.gfx9.bit[i].coord[c].ord), one); 31157ec681f3Smrg 31167ec681f3Smrg xor = nir_ixor(b, xor, ison); 31177ec681f3Smrg } 31187ec681f3Smrg address = nir_ior(b, address, nir_ishl(b, xor, nir_imm_int(b, i))); 31197ec681f3Smrg } 31207ec681f3Smrg 31217ec681f3Smrg /* Fill the remaining bits with the block index. */ 31227ec681f3Smrg unsigned last = num_bits - 1; 31237ec681f3Smrg address = nir_ior(b, address, 31247ec681f3Smrg nir_ishl(b, nir_ushr_imm(b, blockIndex, 31257ec681f3Smrg equation->u.gfx9.bit[last].coord[0].ord), 31267ec681f3Smrg nir_imm_int(b, last))); 31277ec681f3Smrg 31287ec681f3Smrg if (bit_position) 31297ec681f3Smrg *bit_position = nir_ishl(b, nir_iand(b, address, nir_imm_int(b, 1)), 31307ec681f3Smrg nir_imm_int(b, 2)); 31317ec681f3Smrg 31327ec681f3Smrg nir_ssa_def *pipeXor = nir_iand_imm(b, pipe_xor, (1 << numPipeBits) - 1); 31337ec681f3Smrg return nir_ixor(b, nir_ushr(b, address, one), 31347ec681f3Smrg nir_ishl(b, pipeXor, nir_imm_int(b, m_pipeInterleaveLog2))); 31357ec681f3Smrg} 31367ec681f3Smrg 31377ec681f3Smrgnir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info, 31387ec681f3Smrg unsigned bpe, struct gfx9_meta_equation *equation, 31397ec681f3Smrg nir_ssa_def *dcc_pitch, nir_ssa_def *dcc_height, 31407ec681f3Smrg nir_ssa_def *dcc_slice_size, 31417ec681f3Smrg nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 31427ec681f3Smrg nir_ssa_def *sample, nir_ssa_def *pipe_xor) 31437ec681f3Smrg{ 31447ec681f3Smrg if (info->chip_class >= GFX10) { 31457ec681f3Smrg unsigned bpp_log2 = util_logbase2(bpe); 31467ec681f3Smrg 31477ec681f3Smrg return gfx10_nir_meta_addr_from_coord(b, info, equation, bpp_log2 - 8, 1, 31487ec681f3Smrg dcc_pitch, dcc_slice_size, 31497ec681f3Smrg x, y, z, pipe_xor, NULL); 31507ec681f3Smrg } else { 31517ec681f3Smrg return gfx9_nir_meta_addr_from_coord(b, info, equation, dcc_pitch, 31527ec681f3Smrg dcc_height, x, y, z, 31537ec681f3Smrg sample, pipe_xor, NULL); 31547ec681f3Smrg } 31557ec681f3Smrg} 31567ec681f3Smrg 31577ec681f3Smrgnir_ssa_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info, 31587ec681f3Smrg struct gfx9_meta_equation *equation, 31597ec681f3Smrg nir_ssa_def *cmask_pitch, nir_ssa_def *cmask_height, 31607ec681f3Smrg nir_ssa_def *cmask_slice_size, 31617ec681f3Smrg nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 31627ec681f3Smrg nir_ssa_def *pipe_xor, 31637ec681f3Smrg nir_ssa_def **bit_position) 31647ec681f3Smrg{ 31657ec681f3Smrg nir_ssa_def *zero = nir_imm_int(b, 0); 31667ec681f3Smrg 31677ec681f3Smrg if (info->chip_class >= GFX10) { 31687ec681f3Smrg return gfx10_nir_meta_addr_from_coord(b, info, equation, -7, 1, 31697ec681f3Smrg cmask_pitch, cmask_slice_size, 31707ec681f3Smrg x, y, z, pipe_xor, bit_position); 31717ec681f3Smrg } else { 31727ec681f3Smrg return gfx9_nir_meta_addr_from_coord(b, info, equation, cmask_pitch, 31737ec681f3Smrg cmask_height, x, y, z, zero, 31747ec681f3Smrg pipe_xor, bit_position); 31757ec681f3Smrg } 31767ec681f3Smrg} 31777ec681f3Smrg 31787ec681f3Smrgnir_ssa_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info, 31797ec681f3Smrg struct gfx9_meta_equation *equation, 31807ec681f3Smrg nir_ssa_def *htile_pitch, 31817ec681f3Smrg nir_ssa_def *htile_slice_size, 31827ec681f3Smrg nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 31837ec681f3Smrg nir_ssa_def *pipe_xor) 31847ec681f3Smrg{ 31857ec681f3Smrg return gfx10_nir_meta_addr_from_coord(b, info, equation, -4, 2, 31867ec681f3Smrg htile_pitch, htile_slice_size, 31877ec681f3Smrg x, y, z, pipe_xor, NULL); 318801e04c3fSmrg} 3189