1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2011 Red Hat All Rights Reserved. 3b8e80941Smrg * Copyright © 2017 Advanced Micro Devices, Inc. 4b8e80941Smrg * All Rights Reserved. 5b8e80941Smrg * 6b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining 7b8e80941Smrg * a copy of this software and associated documentation files (the 8b8e80941Smrg * "Software"), to deal in the Software without restriction, including 9b8e80941Smrg * without limitation the rights to use, copy, modify, merge, publish, 10b8e80941Smrg * distribute, sub license, and/or sell copies of the Software, and to 11b8e80941Smrg * permit persons to whom the Software is furnished to do so, subject to 12b8e80941Smrg * the following conditions: 13b8e80941Smrg * 14b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15b8e80941Smrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16b8e80941Smrg * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17b8e80941Smrg * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 18b8e80941Smrg * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20b8e80941Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21b8e80941Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * The above copyright notice and this permission notice (including the 24b8e80941Smrg * next paragraph) shall be included in all copies or substantial portions 25b8e80941Smrg * of the Software. 26b8e80941Smrg */ 27b8e80941Smrg 28b8e80941Smrg#include "ac_surface.h" 29b8e80941Smrg#include "amd_family.h" 30b8e80941Smrg#include "addrlib/src/amdgpu_asic_addr.h" 31b8e80941Smrg#include "ac_gpu_info.h" 32b8e80941Smrg#include "util/macros.h" 33b8e80941Smrg#include "util/u_atomic.h" 34b8e80941Smrg#include "util/u_math.h" 35b8e80941Smrg 36b8e80941Smrg#include <errno.h> 37b8e80941Smrg#include <stdio.h> 38b8e80941Smrg#include <stdlib.h> 39b8e80941Smrg#include <amdgpu.h> 40b8e80941Smrg#include <amdgpu_drm.h> 41b8e80941Smrg 42b8e80941Smrg#include "addrlib/inc/addrinterface.h" 43b8e80941Smrg 44b8e80941Smrg#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND 45b8e80941Smrg#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A 46b8e80941Smrg#endif 47b8e80941Smrg 48b8e80941Smrg#ifndef CIASICIDGFXENGINE_ARCTICISLAND 49b8e80941Smrg#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D 50b8e80941Smrg#endif 51b8e80941Smrg 52b8e80941Smrgstatic unsigned get_first(unsigned x, unsigned y) 53b8e80941Smrg{ 54b8e80941Smrg return x; 55b8e80941Smrg} 56b8e80941Smrg 57b8e80941Smrgstatic void addrlib_family_rev_id(enum radeon_family family, 58b8e80941Smrg unsigned *addrlib_family, 59b8e80941Smrg unsigned *addrlib_revid) 60b8e80941Smrg{ 61b8e80941Smrg switch (family) { 62b8e80941Smrg case CHIP_TAHITI: 63b8e80941Smrg *addrlib_family = FAMILY_SI; 64b8e80941Smrg *addrlib_revid = get_first(AMDGPU_TAHITI_RANGE); 65b8e80941Smrg break; 66b8e80941Smrg case CHIP_PITCAIRN: 67b8e80941Smrg *addrlib_family = FAMILY_SI; 68b8e80941Smrg *addrlib_revid = get_first(AMDGPU_PITCAIRN_RANGE); 69b8e80941Smrg break; 70b8e80941Smrg case CHIP_VERDE: 71b8e80941Smrg *addrlib_family = FAMILY_SI; 72b8e80941Smrg *addrlib_revid = get_first(AMDGPU_CAPEVERDE_RANGE); 73b8e80941Smrg break; 74b8e80941Smrg case CHIP_OLAND: 75b8e80941Smrg *addrlib_family = FAMILY_SI; 76b8e80941Smrg *addrlib_revid = get_first(AMDGPU_OLAND_RANGE); 77b8e80941Smrg break; 78b8e80941Smrg case CHIP_HAINAN: 79b8e80941Smrg *addrlib_family = FAMILY_SI; 80b8e80941Smrg *addrlib_revid = get_first(AMDGPU_HAINAN_RANGE); 81b8e80941Smrg break; 82b8e80941Smrg case CHIP_BONAIRE: 83b8e80941Smrg *addrlib_family = FAMILY_CI; 84b8e80941Smrg *addrlib_revid = get_first(AMDGPU_BONAIRE_RANGE); 85b8e80941Smrg break; 86b8e80941Smrg case CHIP_KAVERI: 87b8e80941Smrg *addrlib_family = FAMILY_KV; 88b8e80941Smrg *addrlib_revid = get_first(AMDGPU_SPECTRE_RANGE); 89b8e80941Smrg break; 90b8e80941Smrg case CHIP_KABINI: 91b8e80941Smrg *addrlib_family = FAMILY_KV; 92b8e80941Smrg *addrlib_revid = get_first(AMDGPU_KALINDI_RANGE); 93b8e80941Smrg break; 94b8e80941Smrg case CHIP_HAWAII: 95b8e80941Smrg *addrlib_family = FAMILY_CI; 96b8e80941Smrg *addrlib_revid = get_first(AMDGPU_HAWAII_RANGE); 97b8e80941Smrg break; 98b8e80941Smrg case CHIP_MULLINS: 99b8e80941Smrg *addrlib_family = FAMILY_KV; 100b8e80941Smrg *addrlib_revid = get_first(AMDGPU_GODAVARI_RANGE); 101b8e80941Smrg break; 102b8e80941Smrg case CHIP_TONGA: 103b8e80941Smrg *addrlib_family = FAMILY_VI; 104b8e80941Smrg *addrlib_revid = get_first(AMDGPU_TONGA_RANGE); 105b8e80941Smrg break; 106b8e80941Smrg case CHIP_ICELAND: 107b8e80941Smrg *addrlib_family = FAMILY_VI; 108b8e80941Smrg *addrlib_revid = get_first(AMDGPU_ICELAND_RANGE); 109b8e80941Smrg break; 110b8e80941Smrg case CHIP_CARRIZO: 111b8e80941Smrg *addrlib_family = FAMILY_CZ; 112b8e80941Smrg *addrlib_revid = get_first(AMDGPU_CARRIZO_RANGE); 113b8e80941Smrg break; 114b8e80941Smrg case CHIP_STONEY: 115b8e80941Smrg *addrlib_family = FAMILY_CZ; 116b8e80941Smrg *addrlib_revid = get_first(AMDGPU_STONEY_RANGE); 117b8e80941Smrg break; 118b8e80941Smrg case CHIP_FIJI: 119b8e80941Smrg *addrlib_family = FAMILY_VI; 120b8e80941Smrg *addrlib_revid = get_first(AMDGPU_FIJI_RANGE); 121b8e80941Smrg break; 122b8e80941Smrg case CHIP_POLARIS10: 123b8e80941Smrg *addrlib_family = FAMILY_VI; 124b8e80941Smrg *addrlib_revid = get_first(AMDGPU_POLARIS10_RANGE); 125b8e80941Smrg break; 126b8e80941Smrg case CHIP_POLARIS11: 127b8e80941Smrg *addrlib_family = FAMILY_VI; 128b8e80941Smrg *addrlib_revid = get_first(AMDGPU_POLARIS11_RANGE); 129b8e80941Smrg break; 130b8e80941Smrg case CHIP_POLARIS12: 131b8e80941Smrg *addrlib_family = FAMILY_VI; 132b8e80941Smrg *addrlib_revid = get_first(AMDGPU_POLARIS12_RANGE); 133b8e80941Smrg break; 134b8e80941Smrg case CHIP_VEGAM: 135b8e80941Smrg *addrlib_family = FAMILY_VI; 136b8e80941Smrg *addrlib_revid = get_first(AMDGPU_VEGAM_RANGE); 137b8e80941Smrg break; 138b8e80941Smrg case CHIP_VEGA10: 139b8e80941Smrg *addrlib_family = FAMILY_AI; 140b8e80941Smrg *addrlib_revid = get_first(AMDGPU_VEGA10_RANGE); 141b8e80941Smrg break; 142b8e80941Smrg case CHIP_VEGA12: 143b8e80941Smrg *addrlib_family = FAMILY_AI; 144b8e80941Smrg *addrlib_revid = get_first(AMDGPU_VEGA12_RANGE); 145b8e80941Smrg break; 146b8e80941Smrg case CHIP_VEGA20: 147b8e80941Smrg *addrlib_family = FAMILY_AI; 148b8e80941Smrg *addrlib_revid = get_first(AMDGPU_VEGA20_RANGE); 149b8e80941Smrg break; 150b8e80941Smrg case CHIP_RAVEN: 151b8e80941Smrg *addrlib_family = FAMILY_RV; 152b8e80941Smrg *addrlib_revid = get_first(AMDGPU_RAVEN_RANGE); 153b8e80941Smrg break; 154b8e80941Smrg case CHIP_RAVEN2: 155b8e80941Smrg *addrlib_family = FAMILY_RV; 156b8e80941Smrg *addrlib_revid = get_first(AMDGPU_RAVEN2_RANGE); 157b8e80941Smrg break; 158b8e80941Smrg default: 159b8e80941Smrg fprintf(stderr, "amdgpu: Unknown family.\n"); 160b8e80941Smrg } 161b8e80941Smrg} 162b8e80941Smrg 163b8e80941Smrgstatic void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput) 164b8e80941Smrg{ 165b8e80941Smrg return malloc(pInput->sizeInBytes); 166b8e80941Smrg} 167b8e80941Smrg 168b8e80941Smrgstatic ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT * pInput) 169b8e80941Smrg{ 170b8e80941Smrg free(pInput->pVirtAddr); 171b8e80941Smrg return ADDR_OK; 172b8e80941Smrg} 173b8e80941Smrg 174b8e80941SmrgADDR_HANDLE amdgpu_addr_create(const struct radeon_info *info, 175b8e80941Smrg const struct amdgpu_gpu_info *amdinfo, 176b8e80941Smrg uint64_t *max_alignment) 177b8e80941Smrg{ 178b8e80941Smrg ADDR_CREATE_INPUT addrCreateInput = {0}; 179b8e80941Smrg ADDR_CREATE_OUTPUT addrCreateOutput = {0}; 180b8e80941Smrg ADDR_REGISTER_VALUE regValue = {0}; 181b8e80941Smrg ADDR_CREATE_FLAGS createFlags = {{0}}; 182b8e80941Smrg ADDR_GET_MAX_ALINGMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0}; 183b8e80941Smrg ADDR_E_RETURNCODE addrRet; 184b8e80941Smrg 185b8e80941Smrg addrCreateInput.size = sizeof(ADDR_CREATE_INPUT); 186b8e80941Smrg addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT); 187b8e80941Smrg 188b8e80941Smrg regValue.gbAddrConfig = amdinfo->gb_addr_cfg; 189b8e80941Smrg createFlags.value = 0; 190b8e80941Smrg 191b8e80941Smrg addrlib_family_rev_id(info->family, &addrCreateInput.chipFamily, &addrCreateInput.chipRevision); 192b8e80941Smrg if (addrCreateInput.chipFamily == FAMILY_UNKNOWN) 193b8e80941Smrg return NULL; 194b8e80941Smrg 195b8e80941Smrg if (addrCreateInput.chipFamily >= FAMILY_AI) { 196b8e80941Smrg addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND; 197b8e80941Smrg regValue.blockVarSizeLog2 = 0; 198b8e80941Smrg } else { 199b8e80941Smrg regValue.noOfBanks = amdinfo->mc_arb_ramcfg & 0x3; 200b8e80941Smrg regValue.noOfRanks = (amdinfo->mc_arb_ramcfg & 0x4) >> 2; 201b8e80941Smrg 202b8e80941Smrg regValue.backendDisables = amdinfo->enabled_rb_pipes_mask; 203b8e80941Smrg regValue.pTileConfig = amdinfo->gb_tile_mode; 204b8e80941Smrg regValue.noOfEntries = ARRAY_SIZE(amdinfo->gb_tile_mode); 205b8e80941Smrg if (addrCreateInput.chipFamily == FAMILY_SI) { 206b8e80941Smrg regValue.pMacroTileConfig = NULL; 207b8e80941Smrg regValue.noOfMacroEntries = 0; 208b8e80941Smrg } else { 209b8e80941Smrg regValue.pMacroTileConfig = amdinfo->gb_macro_tile_mode; 210b8e80941Smrg regValue.noOfMacroEntries = ARRAY_SIZE(amdinfo->gb_macro_tile_mode); 211b8e80941Smrg } 212b8e80941Smrg 213b8e80941Smrg createFlags.useTileIndex = 1; 214b8e80941Smrg createFlags.useHtileSliceAlign = 1; 215b8e80941Smrg 216b8e80941Smrg addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND; 217b8e80941Smrg } 218b8e80941Smrg 219b8e80941Smrg addrCreateInput.callbacks.allocSysMem = allocSysMem; 220b8e80941Smrg addrCreateInput.callbacks.freeSysMem = freeSysMem; 221b8e80941Smrg addrCreateInput.callbacks.debugPrint = 0; 222b8e80941Smrg addrCreateInput.createFlags = createFlags; 223b8e80941Smrg addrCreateInput.regValue = regValue; 224b8e80941Smrg 225b8e80941Smrg addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput); 226b8e80941Smrg if (addrRet != ADDR_OK) 227b8e80941Smrg return NULL; 228b8e80941Smrg 229b8e80941Smrg if (max_alignment) { 230b8e80941Smrg addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput); 231b8e80941Smrg if (addrRet == ADDR_OK){ 232b8e80941Smrg *max_alignment = addrGetMaxAlignmentsOutput.baseAlign; 233b8e80941Smrg } 234b8e80941Smrg } 235b8e80941Smrg return addrCreateOutput.hLib; 236b8e80941Smrg} 237b8e80941Smrg 238b8e80941Smrgstatic int surf_config_sanity(const struct ac_surf_config *config, 239b8e80941Smrg unsigned flags) 240b8e80941Smrg{ 241b8e80941Smrg /* FMASK is allocated together with the color surface and can't be 242b8e80941Smrg * allocated separately. 243b8e80941Smrg */ 244b8e80941Smrg assert(!(flags & RADEON_SURF_FMASK)); 245b8e80941Smrg if (flags & RADEON_SURF_FMASK) 246b8e80941Smrg return -EINVAL; 247b8e80941Smrg 248b8e80941Smrg /* all dimension must be at least 1 ! */ 249b8e80941Smrg if (!config->info.width || !config->info.height || !config->info.depth || 250b8e80941Smrg !config->info.array_size || !config->info.levels) 251b8e80941Smrg return -EINVAL; 252b8e80941Smrg 253b8e80941Smrg switch (config->info.samples) { 254b8e80941Smrg case 0: 255b8e80941Smrg case 1: 256b8e80941Smrg case 2: 257b8e80941Smrg case 4: 258b8e80941Smrg case 8: 259b8e80941Smrg break; 260b8e80941Smrg case 16: 261b8e80941Smrg if (flags & RADEON_SURF_Z_OR_SBUFFER) 262b8e80941Smrg return -EINVAL; 263b8e80941Smrg break; 264b8e80941Smrg default: 265b8e80941Smrg return -EINVAL; 266b8e80941Smrg } 267b8e80941Smrg 268b8e80941Smrg if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) { 269b8e80941Smrg switch (config->info.storage_samples) { 270b8e80941Smrg case 0: 271b8e80941Smrg case 1: 272b8e80941Smrg case 2: 273b8e80941Smrg case 4: 274b8e80941Smrg case 8: 275b8e80941Smrg break; 276b8e80941Smrg default: 277b8e80941Smrg return -EINVAL; 278b8e80941Smrg } 279b8e80941Smrg } 280b8e80941Smrg 281b8e80941Smrg if (config->is_3d && config->info.array_size > 1) 282b8e80941Smrg return -EINVAL; 283b8e80941Smrg if (config->is_cube && config->info.depth > 1) 284b8e80941Smrg return -EINVAL; 285b8e80941Smrg 286b8e80941Smrg return 0; 287b8e80941Smrg} 288b8e80941Smrg 289b8e80941Smrgstatic int gfx6_compute_level(ADDR_HANDLE addrlib, 290b8e80941Smrg const struct ac_surf_config *config, 291b8e80941Smrg struct radeon_surf *surf, bool is_stencil, 292b8e80941Smrg unsigned level, bool compressed, 293b8e80941Smrg ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn, 294b8e80941Smrg ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut, 295b8e80941Smrg ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn, 296b8e80941Smrg ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut, 297b8e80941Smrg ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn, 298b8e80941Smrg ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut) 299b8e80941Smrg{ 300b8e80941Smrg struct legacy_surf_level *surf_level; 301b8e80941Smrg ADDR_E_RETURNCODE ret; 302b8e80941Smrg 303b8e80941Smrg AddrSurfInfoIn->mipLevel = level; 304b8e80941Smrg AddrSurfInfoIn->width = u_minify(config->info.width, level); 305b8e80941Smrg AddrSurfInfoIn->height = u_minify(config->info.height, level); 306b8e80941Smrg 307b8e80941Smrg /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics, 308b8e80941Smrg * because GFX9 needs linear alignment of 256 bytes. 309b8e80941Smrg */ 310b8e80941Smrg if (config->info.levels == 1 && 311b8e80941Smrg AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED && 312b8e80941Smrg AddrSurfInfoIn->bpp && 313b8e80941Smrg util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp)) { 314b8e80941Smrg unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8); 315b8e80941Smrg 316b8e80941Smrg AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment); 317b8e80941Smrg } 318b8e80941Smrg 319b8e80941Smrg if (config->is_3d) 320b8e80941Smrg AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level); 321b8e80941Smrg else if (config->is_cube) 322b8e80941Smrg AddrSurfInfoIn->numSlices = 6; 323b8e80941Smrg else 324b8e80941Smrg AddrSurfInfoIn->numSlices = config->info.array_size; 325b8e80941Smrg 326b8e80941Smrg if (level > 0) { 327b8e80941Smrg /* Set the base level pitch. This is needed for calculation 328b8e80941Smrg * of non-zero levels. */ 329b8e80941Smrg if (is_stencil) 330b8e80941Smrg AddrSurfInfoIn->basePitch = surf->u.legacy.stencil_level[0].nblk_x; 331b8e80941Smrg else 332b8e80941Smrg AddrSurfInfoIn->basePitch = surf->u.legacy.level[0].nblk_x; 333b8e80941Smrg 334b8e80941Smrg /* Convert blocks to pixels for compressed formats. */ 335b8e80941Smrg if (compressed) 336b8e80941Smrg AddrSurfInfoIn->basePitch *= surf->blk_w; 337b8e80941Smrg } 338b8e80941Smrg 339b8e80941Smrg ret = AddrComputeSurfaceInfo(addrlib, 340b8e80941Smrg AddrSurfInfoIn, 341b8e80941Smrg AddrSurfInfoOut); 342b8e80941Smrg if (ret != ADDR_OK) { 343b8e80941Smrg return ret; 344b8e80941Smrg } 345b8e80941Smrg 346b8e80941Smrg surf_level = is_stencil ? &surf->u.legacy.stencil_level[level] : &surf->u.legacy.level[level]; 347b8e80941Smrg surf_level->offset = align64(surf->surf_size, AddrSurfInfoOut->baseAlign); 348b8e80941Smrg surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4; 349b8e80941Smrg surf_level->nblk_x = AddrSurfInfoOut->pitch; 350b8e80941Smrg surf_level->nblk_y = AddrSurfInfoOut->height; 351b8e80941Smrg 352b8e80941Smrg switch (AddrSurfInfoOut->tileMode) { 353b8e80941Smrg case ADDR_TM_LINEAR_ALIGNED: 354b8e80941Smrg surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED; 355b8e80941Smrg break; 356b8e80941Smrg case ADDR_TM_1D_TILED_THIN1: 357b8e80941Smrg surf_level->mode = RADEON_SURF_MODE_1D; 358b8e80941Smrg break; 359b8e80941Smrg case ADDR_TM_2D_TILED_THIN1: 360b8e80941Smrg surf_level->mode = RADEON_SURF_MODE_2D; 361b8e80941Smrg break; 362b8e80941Smrg default: 363b8e80941Smrg assert(0); 364b8e80941Smrg } 365b8e80941Smrg 366b8e80941Smrg if (is_stencil) 367b8e80941Smrg surf->u.legacy.stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex; 368b8e80941Smrg else 369b8e80941Smrg surf->u.legacy.tiling_index[level] = AddrSurfInfoOut->tileIndex; 370b8e80941Smrg 371b8e80941Smrg surf->surf_size = surf_level->offset + AddrSurfInfoOut->surfSize; 372b8e80941Smrg 373b8e80941Smrg /* Clear DCC fields at the beginning. */ 374b8e80941Smrg surf_level->dcc_offset = 0; 375b8e80941Smrg 376b8e80941Smrg /* The previous level's flag tells us if we can use DCC for this level. */ 377b8e80941Smrg if (AddrSurfInfoIn->flags.dccCompatible && 378b8e80941Smrg (level == 0 || AddrDccOut->subLvlCompressible)) { 379b8e80941Smrg bool prev_level_clearable = level == 0 || 380b8e80941Smrg AddrDccOut->dccRamSizeAligned; 381b8e80941Smrg 382b8e80941Smrg AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize; 383b8e80941Smrg AddrDccIn->tileMode = AddrSurfInfoOut->tileMode; 384b8e80941Smrg AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo; 385b8e80941Smrg AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex; 386b8e80941Smrg AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; 387b8e80941Smrg 388b8e80941Smrg ret = AddrComputeDccInfo(addrlib, 389b8e80941Smrg AddrDccIn, 390b8e80941Smrg AddrDccOut); 391b8e80941Smrg 392b8e80941Smrg if (ret == ADDR_OK) { 393b8e80941Smrg surf_level->dcc_offset = surf->dcc_size; 394b8e80941Smrg surf->num_dcc_levels = level + 1; 395b8e80941Smrg surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize; 396b8e80941Smrg surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign); 397b8e80941Smrg 398b8e80941Smrg /* If the DCC size of a subresource (1 mip level or 1 slice) 399b8e80941Smrg * is not aligned, the DCC memory layout is not contiguous for 400b8e80941Smrg * that subresource, which means we can't use fast clear. 401b8e80941Smrg * 402b8e80941Smrg * We only do fast clears for whole mipmap levels. If we did 403b8e80941Smrg * per-slice fast clears, the same restriction would apply. 404b8e80941Smrg * (i.e. only compute the slice size and see if it's aligned) 405b8e80941Smrg * 406b8e80941Smrg * The last level can be non-contiguous and still be clearable 407b8e80941Smrg * if it's interleaved with the next level that doesn't exist. 408b8e80941Smrg */ 409b8e80941Smrg if (AddrDccOut->dccRamSizeAligned || 410b8e80941Smrg (prev_level_clearable && level == config->info.levels - 1)) 411b8e80941Smrg surf_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize; 412b8e80941Smrg else 413b8e80941Smrg surf_level->dcc_fast_clear_size = 0; 414b8e80941Smrg } 415b8e80941Smrg } 416b8e80941Smrg 417b8e80941Smrg /* TC-compatible HTILE. */ 418b8e80941Smrg if (!is_stencil && 419b8e80941Smrg AddrSurfInfoIn->flags.depth && 420b8e80941Smrg surf_level->mode == RADEON_SURF_MODE_2D && 421b8e80941Smrg level == 0) { 422b8e80941Smrg AddrHtileIn->flags.tcCompatible = AddrSurfInfoIn->flags.tcCompatible; 423b8e80941Smrg AddrHtileIn->pitch = AddrSurfInfoOut->pitch; 424b8e80941Smrg AddrHtileIn->height = AddrSurfInfoOut->height; 425b8e80941Smrg AddrHtileIn->numSlices = AddrSurfInfoOut->depth; 426b8e80941Smrg AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8; 427b8e80941Smrg AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8; 428b8e80941Smrg AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo; 429b8e80941Smrg AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex; 430b8e80941Smrg AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; 431b8e80941Smrg 432b8e80941Smrg ret = AddrComputeHtileInfo(addrlib, 433b8e80941Smrg AddrHtileIn, 434b8e80941Smrg AddrHtileOut); 435b8e80941Smrg 436b8e80941Smrg if (ret == ADDR_OK) { 437b8e80941Smrg surf->htile_size = AddrHtileOut->htileBytes; 438b8e80941Smrg surf->htile_slice_size = AddrHtileOut->sliceSize; 439b8e80941Smrg surf->htile_alignment = AddrHtileOut->baseAlign; 440b8e80941Smrg } 441b8e80941Smrg } 442b8e80941Smrg 443b8e80941Smrg return 0; 444b8e80941Smrg} 445b8e80941Smrg 446b8e80941Smrg#define G_009910_MICRO_TILE_MODE(x) (((x) >> 0) & 0x03) 447b8e80941Smrg#define V_009910_ADDR_SURF_THICK_MICRO_TILING 0x03 448b8e80941Smrg#define G_009910_MICRO_TILE_MODE_NEW(x) (((x) >> 22) & 0x07) 449b8e80941Smrg 450b8e80941Smrgstatic void gfx6_set_micro_tile_mode(struct radeon_surf *surf, 451b8e80941Smrg const struct radeon_info *info) 452b8e80941Smrg{ 453b8e80941Smrg uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]]; 454b8e80941Smrg 455b8e80941Smrg if (info->chip_class >= CIK) 456b8e80941Smrg surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode); 457b8e80941Smrg else 458b8e80941Smrg surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode); 459b8e80941Smrg} 460b8e80941Smrg 461b8e80941Smrgstatic unsigned cik_get_macro_tile_index(struct radeon_surf *surf) 462b8e80941Smrg{ 463b8e80941Smrg unsigned index, tileb; 464b8e80941Smrg 465b8e80941Smrg tileb = 8 * 8 * surf->bpe; 466b8e80941Smrg tileb = MIN2(surf->u.legacy.tile_split, tileb); 467b8e80941Smrg 468b8e80941Smrg for (index = 0; tileb > 64; index++) 469b8e80941Smrg tileb >>= 1; 470b8e80941Smrg 471b8e80941Smrg assert(index < 16); 472b8e80941Smrg return index; 473b8e80941Smrg} 474b8e80941Smrg 475b8e80941Smrgstatic bool get_display_flag(const struct ac_surf_config *config, 476b8e80941Smrg const struct radeon_surf *surf) 477b8e80941Smrg{ 478b8e80941Smrg unsigned num_channels = config->info.num_channels; 479b8e80941Smrg unsigned bpe = surf->bpe; 480b8e80941Smrg 481b8e80941Smrg if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 482b8e80941Smrg surf->flags & RADEON_SURF_SCANOUT && 483b8e80941Smrg config->info.samples <= 1 && 484b8e80941Smrg surf->blk_w <= 2 && surf->blk_h == 1) { 485b8e80941Smrg /* subsampled */ 486b8e80941Smrg if (surf->blk_w == 2 && surf->blk_h == 1) 487b8e80941Smrg return true; 488b8e80941Smrg 489b8e80941Smrg if (/* RGBA8 or RGBA16F */ 490b8e80941Smrg (bpe >= 4 && bpe <= 8 && num_channels == 4) || 491b8e80941Smrg /* R5G6B5 or R5G5B5A1 */ 492b8e80941Smrg (bpe == 2 && num_channels >= 3) || 493b8e80941Smrg /* C8 palette */ 494b8e80941Smrg (bpe == 1 && num_channels == 1)) 495b8e80941Smrg return true; 496b8e80941Smrg } 497b8e80941Smrg return false; 498b8e80941Smrg} 499b8e80941Smrg 500b8e80941Smrg/** 501b8e80941Smrg * This must be called after the first level is computed. 502b8e80941Smrg * 503b8e80941Smrg * Copy surface-global settings like pipe/bank config from level 0 surface 504b8e80941Smrg * computation, and compute tile swizzle. 505b8e80941Smrg */ 506b8e80941Smrgstatic int gfx6_surface_settings(ADDR_HANDLE addrlib, 507b8e80941Smrg const struct radeon_info *info, 508b8e80941Smrg const struct ac_surf_config *config, 509b8e80941Smrg ADDR_COMPUTE_SURFACE_INFO_OUTPUT* csio, 510b8e80941Smrg struct radeon_surf *surf) 511b8e80941Smrg{ 512b8e80941Smrg surf->surf_alignment = csio->baseAlign; 513b8e80941Smrg surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1; 514b8e80941Smrg gfx6_set_micro_tile_mode(surf, info); 515b8e80941Smrg 516b8e80941Smrg /* For 2D modes only. */ 517b8e80941Smrg if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) { 518b8e80941Smrg surf->u.legacy.bankw = csio->pTileInfo->bankWidth; 519b8e80941Smrg surf->u.legacy.bankh = csio->pTileInfo->bankHeight; 520b8e80941Smrg surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio; 521b8e80941Smrg surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes; 522b8e80941Smrg surf->u.legacy.num_banks = csio->pTileInfo->banks; 523b8e80941Smrg surf->u.legacy.macro_tile_index = csio->macroModeIndex; 524b8e80941Smrg } else { 525b8e80941Smrg surf->u.legacy.macro_tile_index = 0; 526b8e80941Smrg } 527b8e80941Smrg 528b8e80941Smrg /* Compute tile swizzle. */ 529b8e80941Smrg /* TODO: fix tile swizzle with mipmapping for SI */ 530b8e80941Smrg if ((info->chip_class >= CIK || config->info.levels == 1) && 531b8e80941Smrg config->info.surf_index && 532b8e80941Smrg surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D && 533b8e80941Smrg !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) && 534b8e80941Smrg !get_display_flag(config, surf)) { 535b8e80941Smrg ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0}; 536b8e80941Smrg ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0}; 537b8e80941Smrg 538b8e80941Smrg AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT); 539b8e80941Smrg AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT); 540b8e80941Smrg 541b8e80941Smrg AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1; 542b8e80941Smrg AddrBaseSwizzleIn.tileIndex = csio->tileIndex; 543b8e80941Smrg AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex; 544b8e80941Smrg AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo; 545b8e80941Smrg AddrBaseSwizzleIn.tileMode = csio->tileMode; 546b8e80941Smrg 547b8e80941Smrg int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, 548b8e80941Smrg &AddrBaseSwizzleOut); 549b8e80941Smrg if (r != ADDR_OK) 550b8e80941Smrg return r; 551b8e80941Smrg 552b8e80941Smrg assert(AddrBaseSwizzleOut.tileSwizzle <= 553b8e80941Smrg u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 554b8e80941Smrg surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle; 555b8e80941Smrg } 556b8e80941Smrg return 0; 557b8e80941Smrg} 558b8e80941Smrg 559b8e80941Smrgvoid ac_compute_cmask(const struct radeon_info *info, 560b8e80941Smrg const struct ac_surf_config *config, 561b8e80941Smrg struct radeon_surf *surf) 562b8e80941Smrg{ 563b8e80941Smrg unsigned pipe_interleave_bytes = info->pipe_interleave_bytes; 564b8e80941Smrg unsigned num_pipes = info->num_tile_pipes; 565b8e80941Smrg unsigned cl_width, cl_height; 566b8e80941Smrg 567b8e80941Smrg if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) 568b8e80941Smrg return; 569b8e80941Smrg 570b8e80941Smrg assert(info->chip_class <= VI); 571b8e80941Smrg 572b8e80941Smrg switch (num_pipes) { 573b8e80941Smrg case 2: 574b8e80941Smrg cl_width = 32; 575b8e80941Smrg cl_height = 16; 576b8e80941Smrg break; 577b8e80941Smrg case 4: 578b8e80941Smrg cl_width = 32; 579b8e80941Smrg cl_height = 32; 580b8e80941Smrg break; 581b8e80941Smrg case 8: 582b8e80941Smrg cl_width = 64; 583b8e80941Smrg cl_height = 32; 584b8e80941Smrg break; 585b8e80941Smrg case 16: /* Hawaii */ 586b8e80941Smrg cl_width = 64; 587b8e80941Smrg cl_height = 64; 588b8e80941Smrg break; 589b8e80941Smrg default: 590b8e80941Smrg assert(0); 591b8e80941Smrg return; 592b8e80941Smrg } 593b8e80941Smrg 594b8e80941Smrg unsigned base_align = num_pipes * pipe_interleave_bytes; 595b8e80941Smrg 596b8e80941Smrg unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width*8); 597b8e80941Smrg unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height*8); 598b8e80941Smrg unsigned slice_elements = (width * height) / (8*8); 599b8e80941Smrg 600b8e80941Smrg /* Each element of CMASK is a nibble. */ 601b8e80941Smrg unsigned slice_bytes = slice_elements / 2; 602b8e80941Smrg 603b8e80941Smrg surf->u.legacy.cmask_slice_tile_max = (width * height) / (128*128); 604b8e80941Smrg if (surf->u.legacy.cmask_slice_tile_max) 605b8e80941Smrg surf->u.legacy.cmask_slice_tile_max -= 1; 606b8e80941Smrg 607b8e80941Smrg unsigned num_layers; 608b8e80941Smrg if (config->is_3d) 609b8e80941Smrg num_layers = config->info.depth; 610b8e80941Smrg else if (config->is_cube) 611b8e80941Smrg num_layers = 6; 612b8e80941Smrg else 613b8e80941Smrg num_layers = config->info.array_size; 614b8e80941Smrg 615b8e80941Smrg surf->cmask_alignment = MAX2(256, base_align); 616b8e80941Smrg surf->cmask_size = align(slice_bytes, base_align) * num_layers; 617b8e80941Smrg} 618b8e80941Smrg 619b8e80941Smrg/** 620b8e80941Smrg * Fill in the tiling information in \p surf based on the given surface config. 621b8e80941Smrg * 622b8e80941Smrg * The following fields of \p surf must be initialized by the caller: 623b8e80941Smrg * blk_w, blk_h, bpe, flags. 624b8e80941Smrg */ 625b8e80941Smrgstatic int gfx6_compute_surface(ADDR_HANDLE addrlib, 626b8e80941Smrg const struct radeon_info *info, 627b8e80941Smrg const struct ac_surf_config *config, 628b8e80941Smrg enum radeon_surf_mode mode, 629b8e80941Smrg struct radeon_surf *surf) 630b8e80941Smrg{ 631b8e80941Smrg unsigned level; 632b8e80941Smrg bool compressed; 633b8e80941Smrg ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; 634b8e80941Smrg ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0}; 635b8e80941Smrg ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0}; 636b8e80941Smrg ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0}; 637b8e80941Smrg ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0}; 638b8e80941Smrg ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0}; 639b8e80941Smrg ADDR_TILEINFO AddrTileInfoIn = {0}; 640b8e80941Smrg ADDR_TILEINFO AddrTileInfoOut = {0}; 641b8e80941Smrg int r; 642b8e80941Smrg 643b8e80941Smrg AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT); 644b8e80941Smrg AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT); 645b8e80941Smrg AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT); 646b8e80941Smrg AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT); 647b8e80941Smrg AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT); 648b8e80941Smrg AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT); 649b8e80941Smrg AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut; 650b8e80941Smrg 651b8e80941Smrg compressed = surf->blk_w == 4 && surf->blk_h == 4; 652b8e80941Smrg 653b8e80941Smrg /* MSAA requires 2D tiling. */ 654b8e80941Smrg if (config->info.samples > 1) 655b8e80941Smrg mode = RADEON_SURF_MODE_2D; 656b8e80941Smrg 657b8e80941Smrg /* DB doesn't support linear layouts. */ 658b8e80941Smrg if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) && 659b8e80941Smrg mode < RADEON_SURF_MODE_1D) 660b8e80941Smrg mode = RADEON_SURF_MODE_1D; 661b8e80941Smrg 662b8e80941Smrg /* Set the requested tiling mode. */ 663b8e80941Smrg switch (mode) { 664b8e80941Smrg case RADEON_SURF_MODE_LINEAR_ALIGNED: 665b8e80941Smrg AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED; 666b8e80941Smrg break; 667b8e80941Smrg case RADEON_SURF_MODE_1D: 668b8e80941Smrg AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1; 669b8e80941Smrg break; 670b8e80941Smrg case RADEON_SURF_MODE_2D: 671b8e80941Smrg AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1; 672b8e80941Smrg break; 673b8e80941Smrg default: 674b8e80941Smrg assert(0); 675b8e80941Smrg } 676b8e80941Smrg 677b8e80941Smrg /* The format must be set correctly for the allocation of compressed 678b8e80941Smrg * textures to work. In other cases, setting the bpp is sufficient. 679b8e80941Smrg */ 680b8e80941Smrg if (compressed) { 681b8e80941Smrg switch (surf->bpe) { 682b8e80941Smrg case 8: 683b8e80941Smrg AddrSurfInfoIn.format = ADDR_FMT_BC1; 684b8e80941Smrg break; 685b8e80941Smrg case 16: 686b8e80941Smrg AddrSurfInfoIn.format = ADDR_FMT_BC3; 687b8e80941Smrg break; 688b8e80941Smrg default: 689b8e80941Smrg assert(0); 690b8e80941Smrg } 691b8e80941Smrg } 692b8e80941Smrg else { 693b8e80941Smrg AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8; 694b8e80941Smrg } 695b8e80941Smrg 696b8e80941Smrg AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = 697b8e80941Smrg MAX2(1, config->info.samples); 698b8e80941Smrg AddrSurfInfoIn.tileIndex = -1; 699b8e80941Smrg 700b8e80941Smrg if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) { 701b8e80941Smrg AddrDccIn.numSamples = AddrSurfInfoIn.numFrags = 702b8e80941Smrg MAX2(1, config->info.storage_samples); 703b8e80941Smrg } 704b8e80941Smrg 705b8e80941Smrg /* Set the micro tile type. */ 706b8e80941Smrg if (surf->flags & RADEON_SURF_SCANOUT) 707b8e80941Smrg AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE; 708b8e80941Smrg else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) 709b8e80941Smrg AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER; 710b8e80941Smrg else 711b8e80941Smrg AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE; 712b8e80941Smrg 713b8e80941Smrg AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 714b8e80941Smrg AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0; 715b8e80941Smrg AddrSurfInfoIn.flags.cube = config->is_cube; 716b8e80941Smrg AddrSurfInfoIn.flags.display = get_display_flag(config, surf); 717b8e80941Smrg AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1; 718b8e80941Smrg AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0; 719b8e80941Smrg 720b8e80941Smrg /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been 721b8e80941Smrg * requested, because TC-compatible HTILE requires 2D tiling. 722b8e80941Smrg */ 723b8e80941Smrg AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible && 724b8e80941Smrg !AddrSurfInfoIn.flags.fmask && 725b8e80941Smrg config->info.samples <= 1 && 726b8e80941Smrg (surf->flags & RADEON_SURF_OPTIMIZE_FOR_SPACE); 727b8e80941Smrg 728b8e80941Smrg /* DCC notes: 729b8e80941Smrg * - If we add MSAA support, keep in mind that CB can't decompress 8bpp 730b8e80941Smrg * with samples >= 4. 731b8e80941Smrg * - Mipmapped array textures have low performance (discovered by a closed 732b8e80941Smrg * driver team). 733b8e80941Smrg */ 734b8e80941Smrg AddrSurfInfoIn.flags.dccCompatible = 735b8e80941Smrg info->chip_class >= VI && 736b8e80941Smrg !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 737b8e80941Smrg !(surf->flags & RADEON_SURF_DISABLE_DCC) && 738b8e80941Smrg !compressed && 739b8e80941Smrg ((config->info.array_size == 1 && config->info.depth == 1) || 740b8e80941Smrg config->info.levels == 1); 741b8e80941Smrg 742b8e80941Smrg AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0; 743b8e80941Smrg AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 744b8e80941Smrg 745b8e80941Smrg /* On CI/VI, the DB uses the same pitch and tile mode (except tilesplit) 746b8e80941Smrg * for Z and stencil. This can cause a number of problems which we work 747b8e80941Smrg * around here: 748b8e80941Smrg * 749b8e80941Smrg * - a depth part that is incompatible with mipmapped texturing 750b8e80941Smrg * - at least on Stoney, entirely incompatible Z/S aspects (e.g. 751b8e80941Smrg * incorrect tiling applied to the stencil part, stencil buffer 752b8e80941Smrg * memory accesses that go out of bounds) even without mipmapping 753b8e80941Smrg * 754b8e80941Smrg * Some piglit tests that are prone to different types of related 755b8e80941Smrg * failures: 756b8e80941Smrg * ./bin/ext_framebuffer_multisample-upsample 2 stencil 757b8e80941Smrg * ./bin/framebuffer-blit-levels {draw,read} stencil 758b8e80941Smrg * ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample} 759b8e80941Smrg * ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw} 760b8e80941Smrg * ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8 761b8e80941Smrg */ 762b8e80941Smrg int stencil_tile_idx = -1; 763b8e80941Smrg 764b8e80941Smrg if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil && 765b8e80941Smrg (config->info.levels > 1 || info->family == CHIP_STONEY)) { 766b8e80941Smrg /* Compute stencilTileIdx that is compatible with the (depth) 767b8e80941Smrg * tileIdx. This degrades the depth surface if necessary to 768b8e80941Smrg * ensure that a matching stencilTileIdx exists. */ 769b8e80941Smrg AddrSurfInfoIn.flags.matchStencilTileCfg = 1; 770b8e80941Smrg 771b8e80941Smrg /* Keep the depth mip-tail compatible with texturing. */ 772b8e80941Smrg AddrSurfInfoIn.flags.noStencil = 1; 773b8e80941Smrg } 774b8e80941Smrg 775b8e80941Smrg /* Set preferred macrotile parameters. This is usually required 776b8e80941Smrg * for shared resources. This is for 2D tiling only. */ 777b8e80941Smrg if (AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 && 778b8e80941Smrg surf->u.legacy.bankw && surf->u.legacy.bankh && 779b8e80941Smrg surf->u.legacy.mtilea && surf->u.legacy.tile_split) { 780b8e80941Smrg /* If any of these parameters are incorrect, the calculation 781b8e80941Smrg * will fail. */ 782b8e80941Smrg AddrTileInfoIn.banks = surf->u.legacy.num_banks; 783b8e80941Smrg AddrTileInfoIn.bankWidth = surf->u.legacy.bankw; 784b8e80941Smrg AddrTileInfoIn.bankHeight = surf->u.legacy.bankh; 785b8e80941Smrg AddrTileInfoIn.macroAspectRatio = surf->u.legacy.mtilea; 786b8e80941Smrg AddrTileInfoIn.tileSplitBytes = surf->u.legacy.tile_split; 787b8e80941Smrg AddrTileInfoIn.pipeConfig = surf->u.legacy.pipe_config + 1; /* +1 compared to GB_TILE_MODE */ 788b8e80941Smrg AddrSurfInfoIn.flags.opt4Space = 0; 789b8e80941Smrg AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn; 790b8e80941Smrg 791b8e80941Smrg /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set 792b8e80941Smrg * the tile index, because we are expected to know it if 793b8e80941Smrg * we know the other parameters. 794b8e80941Smrg * 795b8e80941Smrg * This is something that can easily be fixed in Addrlib. 796b8e80941Smrg * For now, just figure it out here. 797b8e80941Smrg * Note that only 2D_TILE_THIN1 is handled here. 798b8e80941Smrg */ 799b8e80941Smrg assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 800b8e80941Smrg assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1); 801b8e80941Smrg 802b8e80941Smrg if (info->chip_class == SI) { 803b8e80941Smrg if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) { 804b8e80941Smrg if (surf->bpe == 2) 805b8e80941Smrg AddrSurfInfoIn.tileIndex = 11; /* 16bpp */ 806b8e80941Smrg else 807b8e80941Smrg AddrSurfInfoIn.tileIndex = 12; /* 32bpp */ 808b8e80941Smrg } else { 809b8e80941Smrg if (surf->bpe == 1) 810b8e80941Smrg AddrSurfInfoIn.tileIndex = 14; /* 8bpp */ 811b8e80941Smrg else if (surf->bpe == 2) 812b8e80941Smrg AddrSurfInfoIn.tileIndex = 15; /* 16bpp */ 813b8e80941Smrg else if (surf->bpe == 4) 814b8e80941Smrg AddrSurfInfoIn.tileIndex = 16; /* 32bpp */ 815b8e80941Smrg else 816b8e80941Smrg AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */ 817b8e80941Smrg } 818b8e80941Smrg } else { 819b8e80941Smrg /* CIK - VI */ 820b8e80941Smrg if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) 821b8e80941Smrg AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */ 822b8e80941Smrg else 823b8e80941Smrg AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */ 824b8e80941Smrg 825b8e80941Smrg /* Addrlib doesn't set this if tileIndex is forced like above. */ 826b8e80941Smrg AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf); 827b8e80941Smrg } 828b8e80941Smrg } 829b8e80941Smrg 830b8e80941Smrg surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); 831b8e80941Smrg surf->num_dcc_levels = 0; 832b8e80941Smrg surf->surf_size = 0; 833b8e80941Smrg surf->dcc_size = 0; 834b8e80941Smrg surf->dcc_alignment = 1; 835b8e80941Smrg surf->htile_size = 0; 836b8e80941Smrg surf->htile_slice_size = 0; 837b8e80941Smrg surf->htile_alignment = 1; 838b8e80941Smrg 839b8e80941Smrg const bool only_stencil = (surf->flags & RADEON_SURF_SBUFFER) && 840b8e80941Smrg !(surf->flags & RADEON_SURF_ZBUFFER); 841b8e80941Smrg 842b8e80941Smrg /* Calculate texture layout information. */ 843b8e80941Smrg if (!only_stencil) { 844b8e80941Smrg for (level = 0; level < config->info.levels; level++) { 845b8e80941Smrg r = gfx6_compute_level(addrlib, config, surf, false, level, compressed, 846b8e80941Smrg &AddrSurfInfoIn, &AddrSurfInfoOut, 847b8e80941Smrg &AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut); 848b8e80941Smrg if (r) 849b8e80941Smrg return r; 850b8e80941Smrg 851b8e80941Smrg if (level > 0) 852b8e80941Smrg continue; 853b8e80941Smrg 854b8e80941Smrg /* Check that we actually got a TC-compatible HTILE if 855b8e80941Smrg * we requested it (only for level 0, since we're not 856b8e80941Smrg * supporting HTILE on higher mip levels anyway). */ 857b8e80941Smrg assert(AddrSurfInfoOut.tcCompatible || 858b8e80941Smrg !AddrSurfInfoIn.flags.tcCompatible || 859b8e80941Smrg AddrSurfInfoIn.flags.matchStencilTileCfg); 860b8e80941Smrg 861b8e80941Smrg if (AddrSurfInfoIn.flags.matchStencilTileCfg) { 862b8e80941Smrg if (!AddrSurfInfoOut.tcCompatible) { 863b8e80941Smrg AddrSurfInfoIn.flags.tcCompatible = 0; 864b8e80941Smrg surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE; 865b8e80941Smrg } 866b8e80941Smrg 867b8e80941Smrg AddrSurfInfoIn.flags.matchStencilTileCfg = 0; 868b8e80941Smrg AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex; 869b8e80941Smrg stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx; 870b8e80941Smrg 871b8e80941Smrg assert(stencil_tile_idx >= 0); 872b8e80941Smrg } 873b8e80941Smrg 874b8e80941Smrg r = gfx6_surface_settings(addrlib, info, config, 875b8e80941Smrg &AddrSurfInfoOut, surf); 876b8e80941Smrg if (r) 877b8e80941Smrg return r; 878b8e80941Smrg } 879b8e80941Smrg } 880b8e80941Smrg 881b8e80941Smrg /* Calculate texture layout information for stencil. */ 882b8e80941Smrg if (surf->flags & RADEON_SURF_SBUFFER) { 883b8e80941Smrg AddrSurfInfoIn.tileIndex = stencil_tile_idx; 884b8e80941Smrg AddrSurfInfoIn.bpp = 8; 885b8e80941Smrg AddrSurfInfoIn.flags.depth = 0; 886b8e80941Smrg AddrSurfInfoIn.flags.stencil = 1; 887b8e80941Smrg AddrSurfInfoIn.flags.tcCompatible = 0; 888b8e80941Smrg /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */ 889b8e80941Smrg AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split; 890b8e80941Smrg 891b8e80941Smrg for (level = 0; level < config->info.levels; level++) { 892b8e80941Smrg r = gfx6_compute_level(addrlib, config, surf, true, level, compressed, 893b8e80941Smrg &AddrSurfInfoIn, &AddrSurfInfoOut, 894b8e80941Smrg &AddrDccIn, &AddrDccOut, 895b8e80941Smrg NULL, NULL); 896b8e80941Smrg if (r) 897b8e80941Smrg return r; 898b8e80941Smrg 899b8e80941Smrg /* DB uses the depth pitch for both stencil and depth. */ 900b8e80941Smrg if (!only_stencil) { 901b8e80941Smrg if (surf->u.legacy.stencil_level[level].nblk_x != 902b8e80941Smrg surf->u.legacy.level[level].nblk_x) 903b8e80941Smrg surf->u.legacy.stencil_adjusted = true; 904b8e80941Smrg } else { 905b8e80941Smrg surf->u.legacy.level[level].nblk_x = 906b8e80941Smrg surf->u.legacy.stencil_level[level].nblk_x; 907b8e80941Smrg } 908b8e80941Smrg 909b8e80941Smrg if (level == 0) { 910b8e80941Smrg if (only_stencil) { 911b8e80941Smrg r = gfx6_surface_settings(addrlib, info, config, 912b8e80941Smrg &AddrSurfInfoOut, surf); 913b8e80941Smrg if (r) 914b8e80941Smrg return r; 915b8e80941Smrg } 916b8e80941Smrg 917b8e80941Smrg /* For 2D modes only. */ 918b8e80941Smrg if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) { 919b8e80941Smrg surf->u.legacy.stencil_tile_split = 920b8e80941Smrg AddrSurfInfoOut.pTileInfo->tileSplitBytes; 921b8e80941Smrg } 922b8e80941Smrg } 923b8e80941Smrg } 924b8e80941Smrg } 925b8e80941Smrg 926b8e80941Smrg /* Compute FMASK. */ 927b8e80941Smrg if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color) { 928b8e80941Smrg ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0}; 929b8e80941Smrg ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0}; 930b8e80941Smrg ADDR_TILEINFO fmask_tile_info = {}; 931b8e80941Smrg 932b8e80941Smrg fin.size = sizeof(fin); 933b8e80941Smrg fout.size = sizeof(fout); 934b8e80941Smrg 935b8e80941Smrg fin.tileMode = AddrSurfInfoOut.tileMode; 936b8e80941Smrg fin.pitch = AddrSurfInfoOut.pitch; 937b8e80941Smrg fin.height = config->info.height; 938b8e80941Smrg fin.numSlices = AddrSurfInfoIn.numSlices; 939b8e80941Smrg fin.numSamples = AddrSurfInfoIn.numSamples; 940b8e80941Smrg fin.numFrags = AddrSurfInfoIn.numFrags; 941b8e80941Smrg fin.tileIndex = -1; 942b8e80941Smrg fout.pTileInfo = &fmask_tile_info; 943b8e80941Smrg 944b8e80941Smrg r = AddrComputeFmaskInfo(addrlib, &fin, &fout); 945b8e80941Smrg if (r) 946b8e80941Smrg return r; 947b8e80941Smrg 948b8e80941Smrg surf->fmask_size = fout.fmaskBytes; 949b8e80941Smrg surf->fmask_alignment = fout.baseAlign; 950b8e80941Smrg surf->fmask_tile_swizzle = 0; 951b8e80941Smrg 952b8e80941Smrg surf->u.legacy.fmask.slice_tile_max = 953b8e80941Smrg (fout.pitch * fout.height) / 64; 954b8e80941Smrg if (surf->u.legacy.fmask.slice_tile_max) 955b8e80941Smrg surf->u.legacy.fmask.slice_tile_max -= 1; 956b8e80941Smrg 957b8e80941Smrg surf->u.legacy.fmask.tiling_index = fout.tileIndex; 958b8e80941Smrg surf->u.legacy.fmask.bankh = fout.pTileInfo->bankHeight; 959b8e80941Smrg surf->u.legacy.fmask.pitch_in_pixels = fout.pitch; 960b8e80941Smrg 961b8e80941Smrg /* Compute tile swizzle for FMASK. */ 962b8e80941Smrg if (config->info.fmask_surf_index && 963b8e80941Smrg !(surf->flags & RADEON_SURF_SHAREABLE)) { 964b8e80941Smrg ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0}; 965b8e80941Smrg ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0}; 966b8e80941Smrg 967b8e80941Smrg xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT); 968b8e80941Smrg xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT); 969b8e80941Smrg 970b8e80941Smrg /* This counter starts from 1 instead of 0. */ 971b8e80941Smrg xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index); 972b8e80941Smrg xin.tileIndex = fout.tileIndex; 973b8e80941Smrg xin.macroModeIndex = fout.macroModeIndex; 974b8e80941Smrg xin.pTileInfo = fout.pTileInfo; 975b8e80941Smrg xin.tileMode = fin.tileMode; 976b8e80941Smrg 977b8e80941Smrg int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout); 978b8e80941Smrg if (r != ADDR_OK) 979b8e80941Smrg return r; 980b8e80941Smrg 981b8e80941Smrg assert(xout.tileSwizzle <= 982b8e80941Smrg u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 983b8e80941Smrg surf->fmask_tile_swizzle = xout.tileSwizzle; 984b8e80941Smrg } 985b8e80941Smrg } 986b8e80941Smrg 987b8e80941Smrg /* Recalculate the whole DCC miptree size including disabled levels. 988b8e80941Smrg * This is what addrlib does, but calling addrlib would be a lot more 989b8e80941Smrg * complicated. 990b8e80941Smrg */ 991b8e80941Smrg if (surf->dcc_size && config->info.levels > 1) { 992b8e80941Smrg /* The smallest miplevels that are never compressed by DCC 993b8e80941Smrg * still read the DCC buffer via TC if the base level uses DCC, 994b8e80941Smrg * and for some reason the DCC buffer needs to be larger if 995b8e80941Smrg * the miptree uses non-zero tile_swizzle. Otherwise there are 996b8e80941Smrg * VM faults. 997b8e80941Smrg * 998b8e80941Smrg * "dcc_alignment * 4" was determined by trial and error. 999b8e80941Smrg */ 1000b8e80941Smrg surf->dcc_size = align64(surf->surf_size >> 8, 1001b8e80941Smrg surf->dcc_alignment * 4); 1002b8e80941Smrg } 1003b8e80941Smrg 1004b8e80941Smrg /* Make sure HTILE covers the whole miptree, because the shader reads 1005b8e80941Smrg * TC-compatible HTILE even for levels where it's disabled by DB. 1006b8e80941Smrg */ 1007b8e80941Smrg if (surf->htile_size && config->info.levels > 1 && 1008b8e80941Smrg surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) { 1009b8e80941Smrg /* MSAA can't occur with levels > 1, so ignore the sample count. */ 1010b8e80941Smrg const unsigned total_pixels = surf->surf_size / surf->bpe; 1011b8e80941Smrg const unsigned htile_block_size = 8 * 8; 1012b8e80941Smrg const unsigned htile_element_size = 4; 1013b8e80941Smrg 1014b8e80941Smrg surf->htile_size = (total_pixels / htile_block_size) * 1015b8e80941Smrg htile_element_size; 1016b8e80941Smrg surf->htile_size = align(surf->htile_size, surf->htile_alignment); 1017b8e80941Smrg } 1018b8e80941Smrg 1019b8e80941Smrg surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED; 1020b8e80941Smrg surf->is_displayable = surf->is_linear || 1021b8e80941Smrg surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY || 1022b8e80941Smrg surf->micro_tile_mode == RADEON_MICRO_MODE_ROTATED; 1023b8e80941Smrg 1024b8e80941Smrg /* The rotated micro tile mode doesn't work if both CMASK and RB+ are 1025b8e80941Smrg * used at the same time. This case is not currently expected to occur 1026b8e80941Smrg * because we don't use rotated. Enforce this restriction on all chips 1027b8e80941Smrg * to facilitate testing. 1028b8e80941Smrg */ 1029b8e80941Smrg if (surf->micro_tile_mode == RADEON_MICRO_MODE_ROTATED) { 1030b8e80941Smrg assert(!"rotate micro tile mode is unsupported"); 1031b8e80941Smrg return ADDR_ERROR; 1032b8e80941Smrg } 1033b8e80941Smrg 1034b8e80941Smrg ac_compute_cmask(info, config, surf); 1035b8e80941Smrg return 0; 1036b8e80941Smrg} 1037b8e80941Smrg 1038b8e80941Smrg/* This is only called when expecting a tiled layout. */ 1039b8e80941Smrgstatic int 1040b8e80941Smrggfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, 1041b8e80941Smrg ADDR2_COMPUTE_SURFACE_INFO_INPUT *in, 1042b8e80941Smrg bool is_fmask, AddrSwizzleMode *swizzle_mode) 1043b8e80941Smrg{ 1044b8e80941Smrg ADDR_E_RETURNCODE ret; 1045b8e80941Smrg ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0}; 1046b8e80941Smrg ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0}; 1047b8e80941Smrg 1048b8e80941Smrg sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT); 1049b8e80941Smrg sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT); 1050b8e80941Smrg 1051b8e80941Smrg sin.flags = in->flags; 1052b8e80941Smrg sin.resourceType = in->resourceType; 1053b8e80941Smrg sin.format = in->format; 1054b8e80941Smrg sin.resourceLoction = ADDR_RSRC_LOC_INVIS; 1055b8e80941Smrg /* TODO: We could allow some of these: */ 1056b8e80941Smrg sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */ 1057b8e80941Smrg sin.forbiddenBlock.var = 1; /* don't allow the variable-sized swizzle modes */ 1058b8e80941Smrg sin.forbiddenBlock.linear = 1; /* don't allow linear swizzle modes */ 1059b8e80941Smrg sin.bpp = in->bpp; 1060b8e80941Smrg sin.width = in->width; 1061b8e80941Smrg sin.height = in->height; 1062b8e80941Smrg sin.numSlices = in->numSlices; 1063b8e80941Smrg sin.numMipLevels = in->numMipLevels; 1064b8e80941Smrg sin.numSamples = in->numSamples; 1065b8e80941Smrg sin.numFrags = in->numFrags; 1066b8e80941Smrg 1067b8e80941Smrg if (is_fmask) { 1068b8e80941Smrg sin.flags.display = 0; 1069b8e80941Smrg sin.flags.color = 0; 1070b8e80941Smrg sin.flags.fmask = 1; 1071b8e80941Smrg } 1072b8e80941Smrg 1073b8e80941Smrg ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout); 1074b8e80941Smrg if (ret != ADDR_OK) 1075b8e80941Smrg return ret; 1076b8e80941Smrg 1077b8e80941Smrg *swizzle_mode = sout.swizzleMode; 1078b8e80941Smrg return 0; 1079b8e80941Smrg} 1080b8e80941Smrg 1081b8e80941Smrgstatic int gfx9_compute_miptree(ADDR_HANDLE addrlib, 1082b8e80941Smrg const struct radeon_info *info, 1083b8e80941Smrg const struct ac_surf_config *config, 1084b8e80941Smrg struct radeon_surf *surf, bool compressed, 1085b8e80941Smrg ADDR2_COMPUTE_SURFACE_INFO_INPUT *in) 1086b8e80941Smrg{ 1087b8e80941Smrg ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {}; 1088b8e80941Smrg ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; 1089b8e80941Smrg ADDR_E_RETURNCODE ret; 1090b8e80941Smrg 1091b8e80941Smrg out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT); 1092b8e80941Smrg out.pMipInfo = mip_info; 1093b8e80941Smrg 1094b8e80941Smrg ret = Addr2ComputeSurfaceInfo(addrlib, in, &out); 1095b8e80941Smrg if (ret != ADDR_OK) 1096b8e80941Smrg return ret; 1097b8e80941Smrg 1098b8e80941Smrg if (in->flags.stencil) { 1099b8e80941Smrg surf->u.gfx9.stencil.swizzle_mode = in->swizzleMode; 1100b8e80941Smrg surf->u.gfx9.stencil.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 : 1101b8e80941Smrg out.mipChainPitch - 1; 1102b8e80941Smrg surf->surf_alignment = MAX2(surf->surf_alignment, out.baseAlign); 1103b8e80941Smrg surf->u.gfx9.stencil_offset = align(surf->surf_size, out.baseAlign); 1104b8e80941Smrg surf->surf_size = surf->u.gfx9.stencil_offset + out.surfSize; 1105b8e80941Smrg return 0; 1106b8e80941Smrg } 1107b8e80941Smrg 1108b8e80941Smrg surf->u.gfx9.surf.swizzle_mode = in->swizzleMode; 1109b8e80941Smrg surf->u.gfx9.surf.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 : 1110b8e80941Smrg out.mipChainPitch - 1; 1111b8e80941Smrg 1112b8e80941Smrg /* CMASK fast clear uses these even if FMASK isn't allocated. 1113b8e80941Smrg * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4. 1114b8e80941Smrg */ 1115b8e80941Smrg surf->u.gfx9.fmask.swizzle_mode = surf->u.gfx9.surf.swizzle_mode & ~0x3; 1116b8e80941Smrg surf->u.gfx9.fmask.epitch = surf->u.gfx9.surf.epitch; 1117b8e80941Smrg 1118b8e80941Smrg surf->u.gfx9.surf_slice_size = out.sliceSize; 1119b8e80941Smrg surf->u.gfx9.surf_pitch = out.pitch; 1120b8e80941Smrg surf->u.gfx9.surf_height = out.height; 1121b8e80941Smrg surf->surf_size = out.surfSize; 1122b8e80941Smrg surf->surf_alignment = out.baseAlign; 1123b8e80941Smrg 1124b8e80941Smrg if (in->swizzleMode == ADDR_SW_LINEAR) { 1125b8e80941Smrg for (unsigned i = 0; i < in->numMipLevels; i++) 1126b8e80941Smrg surf->u.gfx9.offset[i] = mip_info[i].offset; 1127b8e80941Smrg } 1128b8e80941Smrg 1129b8e80941Smrg if (in->flags.depth) { 1130b8e80941Smrg assert(in->swizzleMode != ADDR_SW_LINEAR); 1131b8e80941Smrg 1132b8e80941Smrg /* HTILE */ 1133b8e80941Smrg ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0}; 1134b8e80941Smrg ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0}; 1135b8e80941Smrg 1136b8e80941Smrg hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT); 1137b8e80941Smrg hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT); 1138b8e80941Smrg 1139b8e80941Smrg hin.hTileFlags.pipeAligned = !in->flags.metaPipeUnaligned; 1140b8e80941Smrg hin.hTileFlags.rbAligned = !in->flags.metaRbUnaligned; 1141b8e80941Smrg hin.depthFlags = in->flags; 1142b8e80941Smrg hin.swizzleMode = in->swizzleMode; 1143b8e80941Smrg hin.unalignedWidth = in->width; 1144b8e80941Smrg hin.unalignedHeight = in->height; 1145b8e80941Smrg hin.numSlices = in->numSlices; 1146b8e80941Smrg hin.numMipLevels = in->numMipLevels; 1147b8e80941Smrg hin.firstMipIdInTail = out.firstMipIdInTail; 1148b8e80941Smrg 1149b8e80941Smrg ret = Addr2ComputeHtileInfo(addrlib, &hin, &hout); 1150b8e80941Smrg if (ret != ADDR_OK) 1151b8e80941Smrg return ret; 1152b8e80941Smrg 1153b8e80941Smrg surf->u.gfx9.htile.rb_aligned = hin.hTileFlags.rbAligned; 1154b8e80941Smrg surf->u.gfx9.htile.pipe_aligned = hin.hTileFlags.pipeAligned; 1155b8e80941Smrg surf->htile_size = hout.htileBytes; 1156b8e80941Smrg surf->htile_slice_size = hout.sliceSize; 1157b8e80941Smrg surf->htile_alignment = hout.baseAlign; 1158b8e80941Smrg } else { 1159b8e80941Smrg /* Compute tile swizzle for the color surface. 1160b8e80941Smrg * All *_X and *_T modes can use the swizzle. 1161b8e80941Smrg */ 1162b8e80941Smrg if (config->info.surf_index && 1163b8e80941Smrg in->swizzleMode >= ADDR_SW_64KB_Z_T && 1164b8e80941Smrg !out.mipChainInTail && 1165b8e80941Smrg !(surf->flags & RADEON_SURF_SHAREABLE) && 1166b8e80941Smrg !in->flags.display) { 1167b8e80941Smrg ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0}; 1168b8e80941Smrg ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0}; 1169b8e80941Smrg 1170b8e80941Smrg xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT); 1171b8e80941Smrg xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT); 1172b8e80941Smrg 1173b8e80941Smrg xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1; 1174b8e80941Smrg xin.flags = in->flags; 1175b8e80941Smrg xin.swizzleMode = in->swizzleMode; 1176b8e80941Smrg xin.resourceType = in->resourceType; 1177b8e80941Smrg xin.format = in->format; 1178b8e80941Smrg xin.numSamples = in->numSamples; 1179b8e80941Smrg xin.numFrags = in->numFrags; 1180b8e80941Smrg 1181b8e80941Smrg ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout); 1182b8e80941Smrg if (ret != ADDR_OK) 1183b8e80941Smrg return ret; 1184b8e80941Smrg 1185b8e80941Smrg assert(xout.pipeBankXor <= 1186b8e80941Smrg u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 1187b8e80941Smrg surf->tile_swizzle = xout.pipeBankXor; 1188b8e80941Smrg } 1189b8e80941Smrg 1190b8e80941Smrg /* DCC */ 1191b8e80941Smrg if (!(surf->flags & RADEON_SURF_DISABLE_DCC) && 1192b8e80941Smrg !compressed && 1193b8e80941Smrg in->swizzleMode != ADDR_SW_LINEAR) { 1194b8e80941Smrg ADDR2_COMPUTE_DCCINFO_INPUT din = {0}; 1195b8e80941Smrg ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0}; 1196b8e80941Smrg ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {}; 1197b8e80941Smrg 1198b8e80941Smrg din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT); 1199b8e80941Smrg dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT); 1200b8e80941Smrg dout.pMipInfo = meta_mip_info; 1201b8e80941Smrg 1202b8e80941Smrg din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned; 1203b8e80941Smrg din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned; 1204b8e80941Smrg din.colorFlags = in->flags; 1205b8e80941Smrg din.resourceType = in->resourceType; 1206b8e80941Smrg din.swizzleMode = in->swizzleMode; 1207b8e80941Smrg din.bpp = in->bpp; 1208b8e80941Smrg din.unalignedWidth = in->width; 1209b8e80941Smrg din.unalignedHeight = in->height; 1210b8e80941Smrg din.numSlices = in->numSlices; 1211b8e80941Smrg din.numFrags = in->numFrags; 1212b8e80941Smrg din.numMipLevels = in->numMipLevels; 1213b8e80941Smrg din.dataSurfaceSize = out.surfSize; 1214b8e80941Smrg din.firstMipIdInTail = out.firstMipIdInTail; 1215b8e80941Smrg 1216b8e80941Smrg ret = Addr2ComputeDccInfo(addrlib, &din, &dout); 1217b8e80941Smrg if (ret != ADDR_OK) 1218b8e80941Smrg return ret; 1219b8e80941Smrg 1220b8e80941Smrg surf->u.gfx9.dcc.rb_aligned = din.dccKeyFlags.rbAligned; 1221b8e80941Smrg surf->u.gfx9.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned; 1222b8e80941Smrg surf->dcc_size = dout.dccRamSize; 1223b8e80941Smrg surf->dcc_alignment = dout.dccRamBaseAlign; 1224b8e80941Smrg surf->num_dcc_levels = in->numMipLevels; 1225b8e80941Smrg 1226b8e80941Smrg /* Disable DCC for levels that are in the mip tail. 1227b8e80941Smrg * 1228b8e80941Smrg * There are two issues that this is intended to 1229b8e80941Smrg * address: 1230b8e80941Smrg * 1231b8e80941Smrg * 1. Multiple mip levels may share a cache line. This 1232b8e80941Smrg * can lead to corruption when switching between 1233b8e80941Smrg * rendering to different mip levels because the 1234b8e80941Smrg * RBs don't maintain coherency. 1235b8e80941Smrg * 1236b8e80941Smrg * 2. Texturing with metadata after rendering sometimes 1237b8e80941Smrg * fails with corruption, probably for a similar 1238b8e80941Smrg * reason. 1239b8e80941Smrg * 1240b8e80941Smrg * Working around these issues for all levels in the 1241b8e80941Smrg * mip tail may be overly conservative, but it's what 1242b8e80941Smrg * Vulkan does. 1243b8e80941Smrg * 1244b8e80941Smrg * Alternative solutions that also work but are worse: 1245b8e80941Smrg * - Disable DCC entirely. 1246b8e80941Smrg * - Flush TC L2 after rendering. 1247b8e80941Smrg */ 1248b8e80941Smrg for (unsigned i = 0; i < in->numMipLevels; i++) { 1249b8e80941Smrg if (meta_mip_info[i].inMiptail) { 1250b8e80941Smrg surf->num_dcc_levels = i; 1251b8e80941Smrg break; 1252b8e80941Smrg } 1253b8e80941Smrg } 1254b8e80941Smrg 1255b8e80941Smrg if (!surf->num_dcc_levels) 1256b8e80941Smrg surf->dcc_size = 0; 1257b8e80941Smrg 1258b8e80941Smrg surf->u.gfx9.display_dcc_size = surf->dcc_size; 1259b8e80941Smrg surf->u.gfx9.display_dcc_alignment = surf->dcc_alignment; 1260b8e80941Smrg surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1; 1261b8e80941Smrg 1262b8e80941Smrg /* Compute displayable DCC. */ 1263b8e80941Smrg if (in->flags.display && 1264b8e80941Smrg surf->num_dcc_levels && 1265b8e80941Smrg info->use_display_dcc_with_retile_blit) { 1266b8e80941Smrg /* Compute displayable DCC info. */ 1267b8e80941Smrg din.dccKeyFlags.pipeAligned = 0; 1268b8e80941Smrg din.dccKeyFlags.rbAligned = 0; 1269b8e80941Smrg 1270b8e80941Smrg assert(din.numSlices == 1); 1271b8e80941Smrg assert(din.numMipLevels == 1); 1272b8e80941Smrg assert(din.numFrags == 1); 1273b8e80941Smrg assert(surf->tile_swizzle == 0); 1274b8e80941Smrg assert(surf->u.gfx9.dcc.pipe_aligned || 1275b8e80941Smrg surf->u.gfx9.dcc.rb_aligned); 1276b8e80941Smrg 1277b8e80941Smrg ret = Addr2ComputeDccInfo(addrlib, &din, &dout); 1278b8e80941Smrg if (ret != ADDR_OK) 1279b8e80941Smrg return ret; 1280b8e80941Smrg 1281b8e80941Smrg surf->u.gfx9.display_dcc_size = dout.dccRamSize; 1282b8e80941Smrg surf->u.gfx9.display_dcc_alignment = dout.dccRamBaseAlign; 1283b8e80941Smrg surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1; 1284b8e80941Smrg assert(surf->u.gfx9.display_dcc_size <= surf->dcc_size); 1285b8e80941Smrg 1286b8e80941Smrg /* Compute address mapping from non-displayable to displayable DCC. */ 1287b8e80941Smrg ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin = {}; 1288b8e80941Smrg addrin.size = sizeof(addrin); 1289b8e80941Smrg addrin.colorFlags.color = 1; 1290b8e80941Smrg addrin.swizzleMode = din.swizzleMode; 1291b8e80941Smrg addrin.resourceType = din.resourceType; 1292b8e80941Smrg addrin.bpp = din.bpp; 1293b8e80941Smrg addrin.unalignedWidth = din.unalignedWidth; 1294b8e80941Smrg addrin.unalignedHeight = din.unalignedHeight; 1295b8e80941Smrg addrin.numSlices = 1; 1296b8e80941Smrg addrin.numMipLevels = 1; 1297b8e80941Smrg addrin.numFrags = 1; 1298b8e80941Smrg 1299b8e80941Smrg ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {}; 1300b8e80941Smrg addrout.size = sizeof(addrout); 1301b8e80941Smrg 1302b8e80941Smrg surf->u.gfx9.dcc_retile_num_elements = 1303b8e80941Smrg DIV_ROUND_UP(in->width, dout.compressBlkWidth) * 1304b8e80941Smrg DIV_ROUND_UP(in->height, dout.compressBlkHeight) * 2; 1305b8e80941Smrg /* Align the size to 4 (for the compute shader). */ 1306b8e80941Smrg surf->u.gfx9.dcc_retile_num_elements = 1307b8e80941Smrg align(surf->u.gfx9.dcc_retile_num_elements, 4); 1308b8e80941Smrg 1309b8e80941Smrg surf->u.gfx9.dcc_retile_map = 1310b8e80941Smrg malloc(surf->u.gfx9.dcc_retile_num_elements * 4); 1311b8e80941Smrg if (!surf->u.gfx9.dcc_retile_map) 1312b8e80941Smrg return ADDR_OUTOFMEMORY; 1313b8e80941Smrg 1314b8e80941Smrg unsigned index = 0; 1315b8e80941Smrg surf->u.gfx9.dcc_retile_use_uint16 = true; 1316b8e80941Smrg 1317b8e80941Smrg for (unsigned y = 0; y < in->height; y += dout.compressBlkHeight) { 1318b8e80941Smrg addrin.y = y; 1319b8e80941Smrg 1320b8e80941Smrg for (unsigned x = 0; x < in->width; x += dout.compressBlkWidth) { 1321b8e80941Smrg addrin.x = x; 1322b8e80941Smrg 1323b8e80941Smrg /* Compute src DCC address */ 1324b8e80941Smrg addrin.dccKeyFlags.pipeAligned = surf->u.gfx9.dcc.pipe_aligned; 1325b8e80941Smrg addrin.dccKeyFlags.rbAligned = surf->u.gfx9.dcc.rb_aligned; 1326b8e80941Smrg addrout.addr = 0; 1327b8e80941Smrg 1328b8e80941Smrg ret = Addr2ComputeDccAddrFromCoord(addrlib, &addrin, &addrout); 1329b8e80941Smrg if (ret != ADDR_OK) 1330b8e80941Smrg return ret; 1331b8e80941Smrg 1332b8e80941Smrg surf->u.gfx9.dcc_retile_map[index * 2] = addrout.addr; 1333b8e80941Smrg if (addrout.addr > USHRT_MAX) 1334b8e80941Smrg surf->u.gfx9.dcc_retile_use_uint16 = false; 1335b8e80941Smrg 1336b8e80941Smrg /* Compute dst DCC address */ 1337b8e80941Smrg addrin.dccKeyFlags.pipeAligned = 0; 1338b8e80941Smrg addrin.dccKeyFlags.rbAligned = 0; 1339b8e80941Smrg addrout.addr = 0; 1340b8e80941Smrg 1341b8e80941Smrg ret = Addr2ComputeDccAddrFromCoord(addrlib, &addrin, &addrout); 1342b8e80941Smrg if (ret != ADDR_OK) 1343b8e80941Smrg return ret; 1344b8e80941Smrg 1345b8e80941Smrg surf->u.gfx9.dcc_retile_map[index * 2 + 1] = addrout.addr; 1346b8e80941Smrg if (addrout.addr > USHRT_MAX) 1347b8e80941Smrg surf->u.gfx9.dcc_retile_use_uint16 = false; 1348b8e80941Smrg 1349b8e80941Smrg assert(index * 2 + 1 < surf->u.gfx9.dcc_retile_num_elements); 1350b8e80941Smrg index++; 1351b8e80941Smrg } 1352b8e80941Smrg } 1353b8e80941Smrg /* Fill the remaining pairs with the last one (for the compute shader). */ 1354b8e80941Smrg for (unsigned i = index * 2; i < surf->u.gfx9.dcc_retile_num_elements; i++) 1355b8e80941Smrg surf->u.gfx9.dcc_retile_map[i] = surf->u.gfx9.dcc_retile_map[i - 2]; 1356b8e80941Smrg } 1357b8e80941Smrg } 1358b8e80941Smrg 1359b8e80941Smrg /* FMASK */ 1360b8e80941Smrg if (in->numSamples > 1) { 1361b8e80941Smrg ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0}; 1362b8e80941Smrg ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0}; 1363b8e80941Smrg 1364b8e80941Smrg fin.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT); 1365b8e80941Smrg fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT); 1366b8e80941Smrg 1367b8e80941Smrg ret = gfx9_get_preferred_swizzle_mode(addrlib, in, 1368b8e80941Smrg true, &fin.swizzleMode); 1369b8e80941Smrg if (ret != ADDR_OK) 1370b8e80941Smrg return ret; 1371b8e80941Smrg 1372b8e80941Smrg fin.unalignedWidth = in->width; 1373b8e80941Smrg fin.unalignedHeight = in->height; 1374b8e80941Smrg fin.numSlices = in->numSlices; 1375b8e80941Smrg fin.numSamples = in->numSamples; 1376b8e80941Smrg fin.numFrags = in->numFrags; 1377b8e80941Smrg 1378b8e80941Smrg ret = Addr2ComputeFmaskInfo(addrlib, &fin, &fout); 1379b8e80941Smrg if (ret != ADDR_OK) 1380b8e80941Smrg return ret; 1381b8e80941Smrg 1382b8e80941Smrg surf->u.gfx9.fmask.swizzle_mode = fin.swizzleMode; 1383b8e80941Smrg surf->u.gfx9.fmask.epitch = fout.pitch - 1; 1384b8e80941Smrg surf->fmask_size = fout.fmaskBytes; 1385b8e80941Smrg surf->fmask_alignment = fout.baseAlign; 1386b8e80941Smrg 1387b8e80941Smrg /* Compute tile swizzle for the FMASK surface. */ 1388b8e80941Smrg if (config->info.fmask_surf_index && 1389b8e80941Smrg fin.swizzleMode >= ADDR_SW_64KB_Z_T && 1390b8e80941Smrg !(surf->flags & RADEON_SURF_SHAREABLE)) { 1391b8e80941Smrg ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0}; 1392b8e80941Smrg ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0}; 1393b8e80941Smrg 1394b8e80941Smrg xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT); 1395b8e80941Smrg xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT); 1396b8e80941Smrg 1397b8e80941Smrg /* This counter starts from 1 instead of 0. */ 1398b8e80941Smrg xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index); 1399b8e80941Smrg xin.flags = in->flags; 1400b8e80941Smrg xin.swizzleMode = fin.swizzleMode; 1401b8e80941Smrg xin.resourceType = in->resourceType; 1402b8e80941Smrg xin.format = in->format; 1403b8e80941Smrg xin.numSamples = in->numSamples; 1404b8e80941Smrg xin.numFrags = in->numFrags; 1405b8e80941Smrg 1406b8e80941Smrg ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout); 1407b8e80941Smrg if (ret != ADDR_OK) 1408b8e80941Smrg return ret; 1409b8e80941Smrg 1410b8e80941Smrg assert(xout.pipeBankXor <= 1411b8e80941Smrg u_bit_consecutive(0, sizeof(surf->fmask_tile_swizzle) * 8)); 1412b8e80941Smrg surf->fmask_tile_swizzle = xout.pipeBankXor; 1413b8e80941Smrg } 1414b8e80941Smrg } 1415b8e80941Smrg 1416b8e80941Smrg /* CMASK */ 1417b8e80941Smrg if (in->swizzleMode != ADDR_SW_LINEAR) { 1418b8e80941Smrg ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0}; 1419b8e80941Smrg ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0}; 1420b8e80941Smrg 1421b8e80941Smrg cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT); 1422b8e80941Smrg cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT); 1423b8e80941Smrg 1424b8e80941Smrg if (in->numSamples > 1) { 1425b8e80941Smrg /* FMASK is always aligned. */ 1426b8e80941Smrg cin.cMaskFlags.pipeAligned = 1; 1427b8e80941Smrg cin.cMaskFlags.rbAligned = 1; 1428b8e80941Smrg } else { 1429b8e80941Smrg cin.cMaskFlags.pipeAligned = !in->flags.metaPipeUnaligned; 1430b8e80941Smrg cin.cMaskFlags.rbAligned = !in->flags.metaRbUnaligned; 1431b8e80941Smrg } 1432b8e80941Smrg cin.colorFlags = in->flags; 1433b8e80941Smrg cin.resourceType = in->resourceType; 1434b8e80941Smrg cin.unalignedWidth = in->width; 1435b8e80941Smrg cin.unalignedHeight = in->height; 1436b8e80941Smrg cin.numSlices = in->numSlices; 1437b8e80941Smrg 1438b8e80941Smrg if (in->numSamples > 1) 1439b8e80941Smrg cin.swizzleMode = surf->u.gfx9.fmask.swizzle_mode; 1440b8e80941Smrg else 1441b8e80941Smrg cin.swizzleMode = in->swizzleMode; 1442b8e80941Smrg 1443b8e80941Smrg ret = Addr2ComputeCmaskInfo(addrlib, &cin, &cout); 1444b8e80941Smrg if (ret != ADDR_OK) 1445b8e80941Smrg return ret; 1446b8e80941Smrg 1447b8e80941Smrg surf->u.gfx9.cmask.rb_aligned = cin.cMaskFlags.rbAligned; 1448b8e80941Smrg surf->u.gfx9.cmask.pipe_aligned = cin.cMaskFlags.pipeAligned; 1449b8e80941Smrg surf->cmask_size = cout.cmaskBytes; 1450b8e80941Smrg surf->cmask_alignment = cout.baseAlign; 1451b8e80941Smrg } 1452b8e80941Smrg } 1453b8e80941Smrg 1454b8e80941Smrg return 0; 1455b8e80941Smrg} 1456b8e80941Smrg 1457b8e80941Smrgstatic int gfx9_compute_surface(ADDR_HANDLE addrlib, 1458b8e80941Smrg const struct radeon_info *info, 1459b8e80941Smrg const struct ac_surf_config *config, 1460b8e80941Smrg enum radeon_surf_mode mode, 1461b8e80941Smrg struct radeon_surf *surf) 1462b8e80941Smrg{ 1463b8e80941Smrg bool compressed; 1464b8e80941Smrg ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; 1465b8e80941Smrg int r; 1466b8e80941Smrg 1467b8e80941Smrg AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT); 1468b8e80941Smrg 1469b8e80941Smrg compressed = surf->blk_w == 4 && surf->blk_h == 4; 1470b8e80941Smrg 1471b8e80941Smrg /* The format must be set correctly for the allocation of compressed 1472b8e80941Smrg * textures to work. In other cases, setting the bpp is sufficient. */ 1473b8e80941Smrg if (compressed) { 1474b8e80941Smrg switch (surf->bpe) { 1475b8e80941Smrg case 8: 1476b8e80941Smrg AddrSurfInfoIn.format = ADDR_FMT_BC1; 1477b8e80941Smrg break; 1478b8e80941Smrg case 16: 1479b8e80941Smrg AddrSurfInfoIn.format = ADDR_FMT_BC3; 1480b8e80941Smrg break; 1481b8e80941Smrg default: 1482b8e80941Smrg assert(0); 1483b8e80941Smrg } 1484b8e80941Smrg } else { 1485b8e80941Smrg switch (surf->bpe) { 1486b8e80941Smrg case 1: 1487b8e80941Smrg assert(!(surf->flags & RADEON_SURF_ZBUFFER)); 1488b8e80941Smrg AddrSurfInfoIn.format = ADDR_FMT_8; 1489b8e80941Smrg break; 1490b8e80941Smrg case 2: 1491b8e80941Smrg assert(surf->flags & RADEON_SURF_ZBUFFER || 1492b8e80941Smrg !(surf->flags & RADEON_SURF_SBUFFER)); 1493b8e80941Smrg AddrSurfInfoIn.format = ADDR_FMT_16; 1494b8e80941Smrg break; 1495b8e80941Smrg case 4: 1496b8e80941Smrg assert(surf->flags & RADEON_SURF_ZBUFFER || 1497b8e80941Smrg !(surf->flags & RADEON_SURF_SBUFFER)); 1498b8e80941Smrg AddrSurfInfoIn.format = ADDR_FMT_32; 1499b8e80941Smrg break; 1500b8e80941Smrg case 8: 1501b8e80941Smrg assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 1502b8e80941Smrg AddrSurfInfoIn.format = ADDR_FMT_32_32; 1503b8e80941Smrg break; 1504b8e80941Smrg case 12: 1505b8e80941Smrg assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 1506b8e80941Smrg AddrSurfInfoIn.format = ADDR_FMT_32_32_32; 1507b8e80941Smrg break; 1508b8e80941Smrg case 16: 1509b8e80941Smrg assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 1510b8e80941Smrg AddrSurfInfoIn.format = ADDR_FMT_32_32_32_32; 1511b8e80941Smrg break; 1512b8e80941Smrg default: 1513b8e80941Smrg assert(0); 1514b8e80941Smrg } 1515b8e80941Smrg AddrSurfInfoIn.bpp = surf->bpe * 8; 1516b8e80941Smrg } 1517b8e80941Smrg 1518b8e80941Smrg bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 1519b8e80941Smrg AddrSurfInfoIn.flags.color = is_color_surface && 1520b8e80941Smrg !(surf->flags & RADEON_SURF_NO_RENDER_TARGET); 1521b8e80941Smrg AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0; 1522b8e80941Smrg AddrSurfInfoIn.flags.display = get_display_flag(config, surf); 1523b8e80941Smrg /* flags.texture currently refers to TC-compatible HTILE */ 1524b8e80941Smrg AddrSurfInfoIn.flags.texture = is_color_surface || 1525b8e80941Smrg surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE; 1526b8e80941Smrg AddrSurfInfoIn.flags.opt4space = 1; 1527b8e80941Smrg 1528b8e80941Smrg AddrSurfInfoIn.numMipLevels = config->info.levels; 1529b8e80941Smrg AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples); 1530b8e80941Smrg AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples; 1531b8e80941Smrg 1532b8e80941Smrg if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) 1533b8e80941Smrg AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples); 1534b8e80941Smrg 1535b8e80941Smrg /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures 1536b8e80941Smrg * as 2D to avoid having shader variants for 1D vs 2D, so all shaders 1537b8e80941Smrg * must sample 1D textures as 2D. */ 1538b8e80941Smrg if (config->is_3d) 1539b8e80941Smrg AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D; 1540b8e80941Smrg else 1541b8e80941Smrg AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D; 1542b8e80941Smrg 1543b8e80941Smrg AddrSurfInfoIn.width = config->info.width; 1544b8e80941Smrg AddrSurfInfoIn.height = config->info.height; 1545b8e80941Smrg 1546b8e80941Smrg if (config->is_3d) 1547b8e80941Smrg AddrSurfInfoIn.numSlices = config->info.depth; 1548b8e80941Smrg else if (config->is_cube) 1549b8e80941Smrg AddrSurfInfoIn.numSlices = 6; 1550b8e80941Smrg else 1551b8e80941Smrg AddrSurfInfoIn.numSlices = config->info.array_size; 1552b8e80941Smrg 1553b8e80941Smrg /* This is propagated to HTILE/DCC/CMASK. */ 1554b8e80941Smrg AddrSurfInfoIn.flags.metaPipeUnaligned = 0; 1555b8e80941Smrg AddrSurfInfoIn.flags.metaRbUnaligned = 0; 1556b8e80941Smrg 1557b8e80941Smrg /* The display hardware can only read DCC with RB_ALIGNED=0 and 1558b8e80941Smrg * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED. 1559b8e80941Smrg * 1560b8e80941Smrg * The CB block requires RB_ALIGNED=1 except 1 RB chips. 1561b8e80941Smrg * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes 1562b8e80941Smrg * after rendering, so PIPE_ALIGNED=1 is recommended. 1563b8e80941Smrg */ 1564b8e80941Smrg if (info->use_display_dcc_unaligned && is_color_surface && 1565b8e80941Smrg AddrSurfInfoIn.flags.display) { 1566b8e80941Smrg AddrSurfInfoIn.flags.metaPipeUnaligned = 1; 1567b8e80941Smrg AddrSurfInfoIn.flags.metaRbUnaligned = 1; 1568b8e80941Smrg } 1569b8e80941Smrg 1570b8e80941Smrg switch (mode) { 1571b8e80941Smrg case RADEON_SURF_MODE_LINEAR_ALIGNED: 1572b8e80941Smrg assert(config->info.samples <= 1); 1573b8e80941Smrg assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 1574b8e80941Smrg AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR; 1575b8e80941Smrg break; 1576b8e80941Smrg 1577b8e80941Smrg case RADEON_SURF_MODE_1D: 1578b8e80941Smrg case RADEON_SURF_MODE_2D: 1579b8e80941Smrg if (surf->flags & RADEON_SURF_IMPORTED) { 1580b8e80941Smrg AddrSurfInfoIn.swizzleMode = surf->u.gfx9.surf.swizzle_mode; 1581b8e80941Smrg break; 1582b8e80941Smrg } 1583b8e80941Smrg 1584b8e80941Smrg r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn, 1585b8e80941Smrg false, &AddrSurfInfoIn.swizzleMode); 1586b8e80941Smrg if (r) 1587b8e80941Smrg return r; 1588b8e80941Smrg break; 1589b8e80941Smrg 1590b8e80941Smrg default: 1591b8e80941Smrg assert(0); 1592b8e80941Smrg } 1593b8e80941Smrg 1594b8e80941Smrg surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType; 1595b8e80941Smrg surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); 1596b8e80941Smrg 1597b8e80941Smrg surf->num_dcc_levels = 0; 1598b8e80941Smrg surf->surf_size = 0; 1599b8e80941Smrg surf->fmask_size = 0; 1600b8e80941Smrg surf->dcc_size = 0; 1601b8e80941Smrg surf->htile_size = 0; 1602b8e80941Smrg surf->htile_slice_size = 0; 1603b8e80941Smrg surf->u.gfx9.surf_offset = 0; 1604b8e80941Smrg surf->u.gfx9.stencil_offset = 0; 1605b8e80941Smrg surf->cmask_size = 0; 1606b8e80941Smrg surf->u.gfx9.dcc_retile_use_uint16 = false; 1607b8e80941Smrg surf->u.gfx9.dcc_retile_num_elements = 0; 1608b8e80941Smrg surf->u.gfx9.dcc_retile_map = NULL; 1609b8e80941Smrg 1610b8e80941Smrg /* Calculate texture layout information. */ 1611b8e80941Smrg r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, 1612b8e80941Smrg &AddrSurfInfoIn); 1613b8e80941Smrg if (r) 1614b8e80941Smrg goto error; 1615b8e80941Smrg 1616b8e80941Smrg /* Calculate texture layout information for stencil. */ 1617b8e80941Smrg if (surf->flags & RADEON_SURF_SBUFFER) { 1618b8e80941Smrg AddrSurfInfoIn.flags.stencil = 1; 1619b8e80941Smrg AddrSurfInfoIn.bpp = 8; 1620b8e80941Smrg AddrSurfInfoIn.format = ADDR_FMT_8; 1621b8e80941Smrg 1622b8e80941Smrg if (!AddrSurfInfoIn.flags.depth) { 1623b8e80941Smrg r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn, 1624b8e80941Smrg false, &AddrSurfInfoIn.swizzleMode); 1625b8e80941Smrg if (r) 1626b8e80941Smrg goto error; 1627b8e80941Smrg } else 1628b8e80941Smrg AddrSurfInfoIn.flags.depth = 0; 1629b8e80941Smrg 1630b8e80941Smrg r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, 1631b8e80941Smrg &AddrSurfInfoIn); 1632b8e80941Smrg if (r) 1633b8e80941Smrg goto error; 1634b8e80941Smrg } 1635b8e80941Smrg 1636b8e80941Smrg surf->is_linear = surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR; 1637b8e80941Smrg 1638b8e80941Smrg /* Query whether the surface is displayable. */ 1639b8e80941Smrg bool displayable = false; 1640b8e80941Smrg if (!config->is_3d && !config->is_cube) { 1641b8e80941Smrg r = Addr2IsValidDisplaySwizzleMode(addrlib, surf->u.gfx9.surf.swizzle_mode, 1642b8e80941Smrg surf->bpe * 8, &displayable); 1643b8e80941Smrg if (r) 1644b8e80941Smrg goto error; 1645b8e80941Smrg 1646b8e80941Smrg /* Display needs unaligned DCC. */ 1647b8e80941Smrg if (info->use_display_dcc_unaligned && 1648b8e80941Smrg surf->num_dcc_levels && 1649b8e80941Smrg (surf->u.gfx9.dcc.pipe_aligned || 1650b8e80941Smrg surf->u.gfx9.dcc.rb_aligned)) 1651b8e80941Smrg displayable = false; 1652b8e80941Smrg } 1653b8e80941Smrg surf->is_displayable = displayable; 1654b8e80941Smrg 1655b8e80941Smrg switch (surf->u.gfx9.surf.swizzle_mode) { 1656b8e80941Smrg /* S = standard. */ 1657b8e80941Smrg case ADDR_SW_256B_S: 1658b8e80941Smrg case ADDR_SW_4KB_S: 1659b8e80941Smrg case ADDR_SW_64KB_S: 1660b8e80941Smrg case ADDR_SW_VAR_S: 1661b8e80941Smrg case ADDR_SW_64KB_S_T: 1662b8e80941Smrg case ADDR_SW_4KB_S_X: 1663b8e80941Smrg case ADDR_SW_64KB_S_X: 1664b8e80941Smrg case ADDR_SW_VAR_S_X: 1665b8e80941Smrg surf->micro_tile_mode = RADEON_MICRO_MODE_THIN; 1666b8e80941Smrg break; 1667b8e80941Smrg 1668b8e80941Smrg /* D = display. */ 1669b8e80941Smrg case ADDR_SW_LINEAR: 1670b8e80941Smrg case ADDR_SW_256B_D: 1671b8e80941Smrg case ADDR_SW_4KB_D: 1672b8e80941Smrg case ADDR_SW_64KB_D: 1673b8e80941Smrg case ADDR_SW_VAR_D: 1674b8e80941Smrg case ADDR_SW_64KB_D_T: 1675b8e80941Smrg case ADDR_SW_4KB_D_X: 1676b8e80941Smrg case ADDR_SW_64KB_D_X: 1677b8e80941Smrg case ADDR_SW_VAR_D_X: 1678b8e80941Smrg surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY; 1679b8e80941Smrg break; 1680b8e80941Smrg 1681b8e80941Smrg /* R = rotated. */ 1682b8e80941Smrg case ADDR_SW_256B_R: 1683b8e80941Smrg case ADDR_SW_4KB_R: 1684b8e80941Smrg case ADDR_SW_64KB_R: 1685b8e80941Smrg case ADDR_SW_VAR_R: 1686b8e80941Smrg case ADDR_SW_64KB_R_T: 1687b8e80941Smrg case ADDR_SW_4KB_R_X: 1688b8e80941Smrg case ADDR_SW_64KB_R_X: 1689b8e80941Smrg case ADDR_SW_VAR_R_X: 1690b8e80941Smrg /* The rotated micro tile mode doesn't work if both CMASK and RB+ are 1691b8e80941Smrg * used at the same time. This case is not currently expected to occur 1692b8e80941Smrg * because we don't use rotated. Enforce this restriction on all chips 1693b8e80941Smrg * to facilitate testing. 1694b8e80941Smrg */ 1695b8e80941Smrg assert(!"rotate micro tile mode is unsupported"); 1696b8e80941Smrg r = ADDR_ERROR; 1697b8e80941Smrg goto error; 1698b8e80941Smrg 1699b8e80941Smrg /* Z = depth. */ 1700b8e80941Smrg case ADDR_SW_4KB_Z: 1701b8e80941Smrg case ADDR_SW_64KB_Z: 1702b8e80941Smrg case ADDR_SW_VAR_Z: 1703b8e80941Smrg case ADDR_SW_64KB_Z_T: 1704b8e80941Smrg case ADDR_SW_4KB_Z_X: 1705b8e80941Smrg case ADDR_SW_64KB_Z_X: 1706b8e80941Smrg case ADDR_SW_VAR_Z_X: 1707b8e80941Smrg surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH; 1708b8e80941Smrg break; 1709b8e80941Smrg 1710b8e80941Smrg default: 1711b8e80941Smrg assert(0); 1712b8e80941Smrg } 1713b8e80941Smrg 1714b8e80941Smrg return 0; 1715b8e80941Smrg 1716b8e80941Smrgerror: 1717b8e80941Smrg free(surf->u.gfx9.dcc_retile_map); 1718b8e80941Smrg surf->u.gfx9.dcc_retile_map = NULL; 1719b8e80941Smrg return r; 1720b8e80941Smrg} 1721b8e80941Smrg 1722b8e80941Smrgint ac_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info, 1723b8e80941Smrg const struct ac_surf_config *config, 1724b8e80941Smrg enum radeon_surf_mode mode, 1725b8e80941Smrg struct radeon_surf *surf) 1726b8e80941Smrg{ 1727b8e80941Smrg int r; 1728b8e80941Smrg 1729b8e80941Smrg r = surf_config_sanity(config, surf->flags); 1730b8e80941Smrg if (r) 1731b8e80941Smrg return r; 1732b8e80941Smrg 1733b8e80941Smrg if (info->chip_class >= GFX9) 1734b8e80941Smrg return gfx9_compute_surface(addrlib, info, config, mode, surf); 1735b8e80941Smrg else 1736b8e80941Smrg return gfx6_compute_surface(addrlib, info, config, mode, surf); 1737b8e80941Smrg} 1738