ac_surface.c revision ed98bd31
1/* 2 * Copyright © 2011 Red Hat All Rights Reserved. 3 * Copyright © 2017 Advanced Micro Devices, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 */ 27 28#include "ac_surface.h" 29#include "amd_family.h" 30#include "addrlib/src/amdgpu_asic_addr.h" 31#include "ac_gpu_info.h" 32#include "util/macros.h" 33#include "util/u_atomic.h" 34#include "util/u_math.h" 35 36#include <errno.h> 37#include <stdio.h> 38#include <stdlib.h> 39#include <amdgpu.h> 40#include <amdgpu_drm.h> 41 42#include "addrlib/inc/addrinterface.h" 43 44#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND 45#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A 46#endif 47 48#ifndef CIASICIDGFXENGINE_ARCTICISLAND 49#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D 50#endif 51 52static unsigned get_first(unsigned x, unsigned y) 53{ 54 return x; 55} 56 57static void addrlib_family_rev_id(enum radeon_family family, 58 unsigned *addrlib_family, 59 unsigned *addrlib_revid) 60{ 61 switch (family) { 62 case CHIP_TAHITI: 63 *addrlib_family = FAMILY_SI; 64 *addrlib_revid = get_first(AMDGPU_TAHITI_RANGE); 65 break; 66 case CHIP_PITCAIRN: 67 *addrlib_family = FAMILY_SI; 68 *addrlib_revid = get_first(AMDGPU_PITCAIRN_RANGE); 69 break; 70 case CHIP_VERDE: 71 *addrlib_family = FAMILY_SI; 72 *addrlib_revid = get_first(AMDGPU_CAPEVERDE_RANGE); 73 break; 74 case CHIP_OLAND: 75 *addrlib_family = FAMILY_SI; 76 *addrlib_revid = get_first(AMDGPU_OLAND_RANGE); 77 break; 78 case CHIP_HAINAN: 79 *addrlib_family = FAMILY_SI; 80 *addrlib_revid = get_first(AMDGPU_HAINAN_RANGE); 81 break; 82 case CHIP_BONAIRE: 83 *addrlib_family = FAMILY_CI; 84 *addrlib_revid = get_first(AMDGPU_BONAIRE_RANGE); 85 break; 86 case CHIP_KAVERI: 87 *addrlib_family = FAMILY_KV; 88 *addrlib_revid = get_first(AMDGPU_SPECTRE_RANGE); 89 break; 90 case CHIP_KABINI: 91 *addrlib_family = FAMILY_KV; 92 *addrlib_revid = get_first(AMDGPU_KALINDI_RANGE); 93 break; 94 case CHIP_HAWAII: 95 *addrlib_family = FAMILY_CI; 96 *addrlib_revid = get_first(AMDGPU_HAWAII_RANGE); 97 break; 98 case CHIP_MULLINS: 99 *addrlib_family = FAMILY_KV; 100 *addrlib_revid = get_first(AMDGPU_GODAVARI_RANGE); 101 break; 102 case CHIP_TONGA: 103 *addrlib_family = FAMILY_VI; 104 *addrlib_revid = get_first(AMDGPU_TONGA_RANGE); 105 break; 106 case CHIP_ICELAND: 107 *addrlib_family = FAMILY_VI; 108 *addrlib_revid = get_first(AMDGPU_ICELAND_RANGE); 109 break; 110 case CHIP_CARRIZO: 111 *addrlib_family = FAMILY_CZ; 112 *addrlib_revid = get_first(AMDGPU_CARRIZO_RANGE); 113 break; 114 case CHIP_STONEY: 115 *addrlib_family = FAMILY_CZ; 116 *addrlib_revid = get_first(AMDGPU_STONEY_RANGE); 117 break; 118 case CHIP_FIJI: 119 *addrlib_family = FAMILY_VI; 120 *addrlib_revid = get_first(AMDGPU_FIJI_RANGE); 121 break; 122 case CHIP_POLARIS10: 123 *addrlib_family = FAMILY_VI; 124 *addrlib_revid = get_first(AMDGPU_POLARIS10_RANGE); 125 break; 126 case CHIP_POLARIS11: 127 *addrlib_family = FAMILY_VI; 128 *addrlib_revid = get_first(AMDGPU_POLARIS11_RANGE); 129 break; 130 case CHIP_POLARIS12: 131 *addrlib_family = FAMILY_VI; 132 *addrlib_revid = get_first(AMDGPU_POLARIS12_RANGE); 133 break; 134 case CHIP_VEGAM: 135 *addrlib_family = FAMILY_VI; 136 *addrlib_revid = get_first(AMDGPU_VEGAM_RANGE); 137 break; 138 case CHIP_VEGA10: 139 *addrlib_family = FAMILY_AI; 140 *addrlib_revid = get_first(AMDGPU_VEGA10_RANGE); 141 break; 142 case CHIP_VEGA12: 143 *addrlib_family = FAMILY_AI; 144 *addrlib_revid = get_first(AMDGPU_VEGA12_RANGE); 145 break; 146 case CHIP_VEGA20: 147 *addrlib_family = FAMILY_AI; 148 *addrlib_revid = get_first(AMDGPU_VEGA20_RANGE); 149 break; 150 case CHIP_RAVEN: 151 *addrlib_family = FAMILY_RV; 152 *addrlib_revid = get_first(AMDGPU_RAVEN_RANGE); 153 break; 154 case CHIP_RAVEN2: 155 *addrlib_family = FAMILY_RV; 156 *addrlib_revid = get_first(AMDGPU_RAVEN2_RANGE); 157 break; 158 default: 159 fprintf(stderr, "amdgpu: Unknown family.\n"); 160 } 161} 162 163static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput) 164{ 165 return malloc(pInput->sizeInBytes); 166} 167 168static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT * pInput) 169{ 170 free(pInput->pVirtAddr); 171 return ADDR_OK; 172} 173 174ADDR_HANDLE amdgpu_addr_create(const struct radeon_info *info, 175 const struct amdgpu_gpu_info *amdinfo, 176 uint64_t *max_alignment) 177{ 178 ADDR_CREATE_INPUT addrCreateInput = {0}; 179 ADDR_CREATE_OUTPUT addrCreateOutput = {0}; 180 ADDR_REGISTER_VALUE regValue = {0}; 181 ADDR_CREATE_FLAGS createFlags = {{0}}; 182 ADDR_GET_MAX_ALINGMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0}; 183 ADDR_E_RETURNCODE addrRet; 184 185 addrCreateInput.size = sizeof(ADDR_CREATE_INPUT); 186 addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT); 187 188 regValue.gbAddrConfig = amdinfo->gb_addr_cfg; 189 createFlags.value = 0; 190 191 addrlib_family_rev_id(info->family, &addrCreateInput.chipFamily, &addrCreateInput.chipRevision); 192 if (addrCreateInput.chipFamily == FAMILY_UNKNOWN) 193 return NULL; 194 195 if (addrCreateInput.chipFamily >= FAMILY_AI) { 196 addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND; 197 regValue.blockVarSizeLog2 = 0; 198 } else { 199 regValue.noOfBanks = amdinfo->mc_arb_ramcfg & 0x3; 200 regValue.noOfRanks = (amdinfo->mc_arb_ramcfg & 0x4) >> 2; 201 202 regValue.backendDisables = amdinfo->enabled_rb_pipes_mask; 203 regValue.pTileConfig = amdinfo->gb_tile_mode; 204 regValue.noOfEntries = ARRAY_SIZE(amdinfo->gb_tile_mode); 205 if (addrCreateInput.chipFamily == FAMILY_SI) { 206 regValue.pMacroTileConfig = NULL; 207 regValue.noOfMacroEntries = 0; 208 } else { 209 regValue.pMacroTileConfig = amdinfo->gb_macro_tile_mode; 210 regValue.noOfMacroEntries = ARRAY_SIZE(amdinfo->gb_macro_tile_mode); 211 } 212 213 createFlags.useTileIndex = 1; 214 createFlags.useHtileSliceAlign = 1; 215 216 addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND; 217 } 218 219 addrCreateInput.callbacks.allocSysMem = allocSysMem; 220 addrCreateInput.callbacks.freeSysMem = freeSysMem; 221 addrCreateInput.callbacks.debugPrint = 0; 222 addrCreateInput.createFlags = createFlags; 223 addrCreateInput.regValue = regValue; 224 225 addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput); 226 if (addrRet != ADDR_OK) 227 return NULL; 228 229 if (max_alignment) { 230 addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput); 231 if (addrRet == ADDR_OK){ 232 *max_alignment = addrGetMaxAlignmentsOutput.baseAlign; 233 } 234 } 235 return addrCreateOutput.hLib; 236} 237 238static int surf_config_sanity(const struct ac_surf_config *config, 239 unsigned flags) 240{ 241 /* FMASK is allocated together with the color surface and can't be 242 * allocated separately. 243 */ 244 assert(!(flags & RADEON_SURF_FMASK)); 245 if (flags & RADEON_SURF_FMASK) 246 return -EINVAL; 247 248 /* all dimension must be at least 1 ! */ 249 if (!config->info.width || !config->info.height || !config->info.depth || 250 !config->info.array_size || !config->info.levels) 251 return -EINVAL; 252 253 switch (config->info.samples) { 254 case 0: 255 case 1: 256 case 2: 257 case 4: 258 case 8: 259 break; 260 case 16: 261 if (flags & RADEON_SURF_Z_OR_SBUFFER) 262 return -EINVAL; 263 break; 264 default: 265 return -EINVAL; 266 } 267 268 if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) { 269 switch (config->info.storage_samples) { 270 case 0: 271 case 1: 272 case 2: 273 case 4: 274 case 8: 275 break; 276 default: 277 return -EINVAL; 278 } 279 } 280 281 if (config->is_3d && config->info.array_size > 1) 282 return -EINVAL; 283 if (config->is_cube && config->info.depth > 1) 284 return -EINVAL; 285 286 return 0; 287} 288 289static int gfx6_compute_level(ADDR_HANDLE addrlib, 290 const struct ac_surf_config *config, 291 struct radeon_surf *surf, bool is_stencil, 292 unsigned level, bool compressed, 293 ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn, 294 ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut, 295 ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn, 296 ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut, 297 ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn, 298 ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut) 299{ 300 struct legacy_surf_level *surf_level; 301 ADDR_E_RETURNCODE ret; 302 303 AddrSurfInfoIn->mipLevel = level; 304 AddrSurfInfoIn->width = u_minify(config->info.width, level); 305 AddrSurfInfoIn->height = u_minify(config->info.height, level); 306 307 /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics, 308 * because GFX9 needs linear alignment of 256 bytes. 309 */ 310 if (config->info.levels == 1 && 311 AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED && 312 AddrSurfInfoIn->bpp && 313 util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp)) { 314 unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8); 315 316 AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment); 317 } 318 319 if (config->is_3d) 320 AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level); 321 else if (config->is_cube) 322 AddrSurfInfoIn->numSlices = 6; 323 else 324 AddrSurfInfoIn->numSlices = config->info.array_size; 325 326 if (level > 0) { 327 /* Set the base level pitch. This is needed for calculation 328 * of non-zero levels. */ 329 if (is_stencil) 330 AddrSurfInfoIn->basePitch = surf->u.legacy.stencil_level[0].nblk_x; 331 else 332 AddrSurfInfoIn->basePitch = surf->u.legacy.level[0].nblk_x; 333 334 /* Convert blocks to pixels for compressed formats. */ 335 if (compressed) 336 AddrSurfInfoIn->basePitch *= surf->blk_w; 337 } 338 339 ret = AddrComputeSurfaceInfo(addrlib, 340 AddrSurfInfoIn, 341 AddrSurfInfoOut); 342 if (ret != ADDR_OK) { 343 return ret; 344 } 345 346 surf_level = is_stencil ? &surf->u.legacy.stencil_level[level] : &surf->u.legacy.level[level]; 347 surf_level->offset = align64(surf->surf_size, AddrSurfInfoOut->baseAlign); 348 surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4; 349 surf_level->nblk_x = AddrSurfInfoOut->pitch; 350 surf_level->nblk_y = AddrSurfInfoOut->height; 351 352 switch (AddrSurfInfoOut->tileMode) { 353 case ADDR_TM_LINEAR_ALIGNED: 354 surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED; 355 break; 356 case ADDR_TM_1D_TILED_THIN1: 357 surf_level->mode = RADEON_SURF_MODE_1D; 358 break; 359 case ADDR_TM_2D_TILED_THIN1: 360 surf_level->mode = RADEON_SURF_MODE_2D; 361 break; 362 default: 363 assert(0); 364 } 365 366 if (is_stencil) 367 surf->u.legacy.stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex; 368 else 369 surf->u.legacy.tiling_index[level] = AddrSurfInfoOut->tileIndex; 370 371 surf->surf_size = surf_level->offset + AddrSurfInfoOut->surfSize; 372 373 /* Clear DCC fields at the beginning. */ 374 surf_level->dcc_offset = 0; 375 376 /* The previous level's flag tells us if we can use DCC for this level. */ 377 if (AddrSurfInfoIn->flags.dccCompatible && 378 (level == 0 || AddrDccOut->subLvlCompressible)) { 379 bool prev_level_clearable = level == 0 || 380 AddrDccOut->dccRamSizeAligned; 381 382 AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize; 383 AddrDccIn->tileMode = AddrSurfInfoOut->tileMode; 384 AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo; 385 AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex; 386 AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; 387 388 ret = AddrComputeDccInfo(addrlib, 389 AddrDccIn, 390 AddrDccOut); 391 392 if (ret == ADDR_OK) { 393 surf_level->dcc_offset = surf->dcc_size; 394 surf->num_dcc_levels = level + 1; 395 surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize; 396 surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign); 397 398 /* If the DCC size of a subresource (1 mip level or 1 slice) 399 * is not aligned, the DCC memory layout is not contiguous for 400 * that subresource, which means we can't use fast clear. 401 * 402 * We only do fast clears for whole mipmap levels. If we did 403 * per-slice fast clears, the same restriction would apply. 404 * (i.e. only compute the slice size and see if it's aligned) 405 * 406 * The last level can be non-contiguous and still be clearable 407 * if it's interleaved with the next level that doesn't exist. 408 */ 409 if (AddrDccOut->dccRamSizeAligned || 410 (prev_level_clearable && level == config->info.levels - 1)) 411 surf_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize; 412 else 413 surf_level->dcc_fast_clear_size = 0; 414 } 415 } 416 417 /* TC-compatible HTILE. */ 418 if (!is_stencil && 419 AddrSurfInfoIn->flags.depth && 420 surf_level->mode == RADEON_SURF_MODE_2D && 421 level == 0) { 422 AddrHtileIn->flags.tcCompatible = AddrSurfInfoIn->flags.tcCompatible; 423 AddrHtileIn->pitch = AddrSurfInfoOut->pitch; 424 AddrHtileIn->height = AddrSurfInfoOut->height; 425 AddrHtileIn->numSlices = AddrSurfInfoOut->depth; 426 AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8; 427 AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8; 428 AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo; 429 AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex; 430 AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; 431 432 ret = AddrComputeHtileInfo(addrlib, 433 AddrHtileIn, 434 AddrHtileOut); 435 436 if (ret == ADDR_OK) { 437 surf->htile_size = AddrHtileOut->htileBytes; 438 surf->htile_slice_size = AddrHtileOut->sliceSize; 439 surf->htile_alignment = AddrHtileOut->baseAlign; 440 } 441 } 442 443 return 0; 444} 445 446#define G_009910_MICRO_TILE_MODE(x) (((x) >> 0) & 0x03) 447#define V_009910_ADDR_SURF_THICK_MICRO_TILING 0x03 448#define G_009910_MICRO_TILE_MODE_NEW(x) (((x) >> 22) & 0x07) 449 450static void gfx6_set_micro_tile_mode(struct radeon_surf *surf, 451 const struct radeon_info *info) 452{ 453 uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]]; 454 455 if (info->chip_class >= CIK) 456 surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode); 457 else 458 surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode); 459} 460 461static unsigned cik_get_macro_tile_index(struct radeon_surf *surf) 462{ 463 unsigned index, tileb; 464 465 tileb = 8 * 8 * surf->bpe; 466 tileb = MIN2(surf->u.legacy.tile_split, tileb); 467 468 for (index = 0; tileb > 64; index++) 469 tileb >>= 1; 470 471 assert(index < 16); 472 return index; 473} 474 475static bool get_display_flag(const struct ac_surf_config *config, 476 const struct radeon_surf *surf) 477{ 478 unsigned num_channels = config->info.num_channels; 479 unsigned bpe = surf->bpe; 480 481 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 482 surf->flags & RADEON_SURF_SCANOUT && 483 config->info.samples <= 1 && 484 surf->blk_w <= 2 && surf->blk_h == 1) { 485 /* subsampled */ 486 if (surf->blk_w == 2 && surf->blk_h == 1) 487 return true; 488 489 if (/* RGBA8 or RGBA16F */ 490 (bpe >= 4 && bpe <= 8 && num_channels == 4) || 491 /* R5G6B5 or R5G5B5A1 */ 492 (bpe == 2 && num_channels >= 3) || 493 /* C8 palette */ 494 (bpe == 1 && num_channels == 1)) 495 return true; 496 } 497 return false; 498} 499 500/** 501 * This must be called after the first level is computed. 502 * 503 * Copy surface-global settings like pipe/bank config from level 0 surface 504 * computation, and compute tile swizzle. 505 */ 506static int gfx6_surface_settings(ADDR_HANDLE addrlib, 507 const struct radeon_info *info, 508 const struct ac_surf_config *config, 509 ADDR_COMPUTE_SURFACE_INFO_OUTPUT* csio, 510 struct radeon_surf *surf) 511{ 512 surf->surf_alignment = csio->baseAlign; 513 surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1; 514 gfx6_set_micro_tile_mode(surf, info); 515 516 /* For 2D modes only. */ 517 if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) { 518 surf->u.legacy.bankw = csio->pTileInfo->bankWidth; 519 surf->u.legacy.bankh = csio->pTileInfo->bankHeight; 520 surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio; 521 surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes; 522 surf->u.legacy.num_banks = csio->pTileInfo->banks; 523 surf->u.legacy.macro_tile_index = csio->macroModeIndex; 524 } else { 525 surf->u.legacy.macro_tile_index = 0; 526 } 527 528 /* Compute tile swizzle. */ 529 /* TODO: fix tile swizzle with mipmapping for SI */ 530 if ((info->chip_class >= CIK || config->info.levels == 1) && 531 config->info.surf_index && 532 surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D && 533 !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) && 534 !get_display_flag(config, surf)) { 535 ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0}; 536 ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0}; 537 538 AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT); 539 AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT); 540 541 AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1; 542 AddrBaseSwizzleIn.tileIndex = csio->tileIndex; 543 AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex; 544 AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo; 545 AddrBaseSwizzleIn.tileMode = csio->tileMode; 546 547 int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, 548 &AddrBaseSwizzleOut); 549 if (r != ADDR_OK) 550 return r; 551 552 assert(AddrBaseSwizzleOut.tileSwizzle <= 553 u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 554 surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle; 555 } 556 return 0; 557} 558 559void ac_compute_cmask(const struct radeon_info *info, 560 const struct ac_surf_config *config, 561 struct radeon_surf *surf) 562{ 563 unsigned pipe_interleave_bytes = info->pipe_interleave_bytes; 564 unsigned num_pipes = info->num_tile_pipes; 565 unsigned cl_width, cl_height; 566 567 if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) 568 return; 569 570 assert(info->chip_class <= VI); 571 572 switch (num_pipes) { 573 case 2: 574 cl_width = 32; 575 cl_height = 16; 576 break; 577 case 4: 578 cl_width = 32; 579 cl_height = 32; 580 break; 581 case 8: 582 cl_width = 64; 583 cl_height = 32; 584 break; 585 case 16: /* Hawaii */ 586 cl_width = 64; 587 cl_height = 64; 588 break; 589 default: 590 assert(0); 591 return; 592 } 593 594 unsigned base_align = num_pipes * pipe_interleave_bytes; 595 596 unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width*8); 597 unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height*8); 598 unsigned slice_elements = (width * height) / (8*8); 599 600 /* Each element of CMASK is a nibble. */ 601 unsigned slice_bytes = slice_elements / 2; 602 603 surf->u.legacy.cmask_slice_tile_max = (width * height) / (128*128); 604 if (surf->u.legacy.cmask_slice_tile_max) 605 surf->u.legacy.cmask_slice_tile_max -= 1; 606 607 unsigned num_layers; 608 if (config->is_3d) 609 num_layers = config->info.depth; 610 else if (config->is_cube) 611 num_layers = 6; 612 else 613 num_layers = config->info.array_size; 614 615 surf->cmask_alignment = MAX2(256, base_align); 616 surf->cmask_size = align(slice_bytes, base_align) * num_layers; 617} 618 619/** 620 * Fill in the tiling information in \p surf based on the given surface config. 621 * 622 * The following fields of \p surf must be initialized by the caller: 623 * blk_w, blk_h, bpe, flags. 624 */ 625static int gfx6_compute_surface(ADDR_HANDLE addrlib, 626 const struct radeon_info *info, 627 const struct ac_surf_config *config, 628 enum radeon_surf_mode mode, 629 struct radeon_surf *surf) 630{ 631 unsigned level; 632 bool compressed; 633 ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; 634 ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0}; 635 ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0}; 636 ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0}; 637 ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0}; 638 ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0}; 639 ADDR_TILEINFO AddrTileInfoIn = {0}; 640 ADDR_TILEINFO AddrTileInfoOut = {0}; 641 int r; 642 643 AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT); 644 AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT); 645 AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT); 646 AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT); 647 AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT); 648 AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT); 649 AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut; 650 651 compressed = surf->blk_w == 4 && surf->blk_h == 4; 652 653 /* MSAA requires 2D tiling. */ 654 if (config->info.samples > 1) 655 mode = RADEON_SURF_MODE_2D; 656 657 /* DB doesn't support linear layouts. */ 658 if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) && 659 mode < RADEON_SURF_MODE_1D) 660 mode = RADEON_SURF_MODE_1D; 661 662 /* Set the requested tiling mode. */ 663 switch (mode) { 664 case RADEON_SURF_MODE_LINEAR_ALIGNED: 665 AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED; 666 break; 667 case RADEON_SURF_MODE_1D: 668 AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1; 669 break; 670 case RADEON_SURF_MODE_2D: 671 AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1; 672 break; 673 default: 674 assert(0); 675 } 676 677 /* The format must be set correctly for the allocation of compressed 678 * textures to work. In other cases, setting the bpp is sufficient. 679 */ 680 if (compressed) { 681 switch (surf->bpe) { 682 case 8: 683 AddrSurfInfoIn.format = ADDR_FMT_BC1; 684 break; 685 case 16: 686 AddrSurfInfoIn.format = ADDR_FMT_BC3; 687 break; 688 default: 689 assert(0); 690 } 691 } 692 else { 693 AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8; 694 } 695 696 AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = 697 MAX2(1, config->info.samples); 698 AddrSurfInfoIn.tileIndex = -1; 699 700 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) { 701 AddrDccIn.numSamples = AddrSurfInfoIn.numFrags = 702 MAX2(1, config->info.storage_samples); 703 } 704 705 /* Set the micro tile type. */ 706 if (surf->flags & RADEON_SURF_SCANOUT) 707 AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE; 708 else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) 709 AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER; 710 else 711 AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE; 712 713 AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 714 AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0; 715 AddrSurfInfoIn.flags.cube = config->is_cube; 716 AddrSurfInfoIn.flags.display = get_display_flag(config, surf); 717 AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1; 718 AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0; 719 720 /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been 721 * requested, because TC-compatible HTILE requires 2D tiling. 722 */ 723 AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible && 724 !AddrSurfInfoIn.flags.fmask && 725 config->info.samples <= 1 && 726 (surf->flags & RADEON_SURF_OPTIMIZE_FOR_SPACE); 727 728 /* DCC notes: 729 * - If we add MSAA support, keep in mind that CB can't decompress 8bpp 730 * with samples >= 4. 731 * - Mipmapped array textures have low performance (discovered by a closed 732 * driver team). 733 */ 734 AddrSurfInfoIn.flags.dccCompatible = 735 info->chip_class >= VI && 736 !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 737 !(surf->flags & RADEON_SURF_DISABLE_DCC) && 738 !compressed && 739 ((config->info.array_size == 1 && config->info.depth == 1) || 740 config->info.levels == 1); 741 742 AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0; 743 AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 744 745 /* On CI/VI, the DB uses the same pitch and tile mode (except tilesplit) 746 * for Z and stencil. This can cause a number of problems which we work 747 * around here: 748 * 749 * - a depth part that is incompatible with mipmapped texturing 750 * - at least on Stoney, entirely incompatible Z/S aspects (e.g. 751 * incorrect tiling applied to the stencil part, stencil buffer 752 * memory accesses that go out of bounds) even without mipmapping 753 * 754 * Some piglit tests that are prone to different types of related 755 * failures: 756 * ./bin/ext_framebuffer_multisample-upsample 2 stencil 757 * ./bin/framebuffer-blit-levels {draw,read} stencil 758 * ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample} 759 * ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw} 760 * ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8 761 */ 762 int stencil_tile_idx = -1; 763 764 if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil && 765 (config->info.levels > 1 || info->family == CHIP_STONEY)) { 766 /* Compute stencilTileIdx that is compatible with the (depth) 767 * tileIdx. This degrades the depth surface if necessary to 768 * ensure that a matching stencilTileIdx exists. */ 769 AddrSurfInfoIn.flags.matchStencilTileCfg = 1; 770 771 /* Keep the depth mip-tail compatible with texturing. */ 772 AddrSurfInfoIn.flags.noStencil = 1; 773 } 774 775 /* Set preferred macrotile parameters. This is usually required 776 * for shared resources. This is for 2D tiling only. */ 777 if (AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 && 778 surf->u.legacy.bankw && surf->u.legacy.bankh && 779 surf->u.legacy.mtilea && surf->u.legacy.tile_split) { 780 /* If any of these parameters are incorrect, the calculation 781 * will fail. */ 782 AddrTileInfoIn.banks = surf->u.legacy.num_banks; 783 AddrTileInfoIn.bankWidth = surf->u.legacy.bankw; 784 AddrTileInfoIn.bankHeight = surf->u.legacy.bankh; 785 AddrTileInfoIn.macroAspectRatio = surf->u.legacy.mtilea; 786 AddrTileInfoIn.tileSplitBytes = surf->u.legacy.tile_split; 787 AddrTileInfoIn.pipeConfig = surf->u.legacy.pipe_config + 1; /* +1 compared to GB_TILE_MODE */ 788 AddrSurfInfoIn.flags.opt4Space = 0; 789 AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn; 790 791 /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set 792 * the tile index, because we are expected to know it if 793 * we know the other parameters. 794 * 795 * This is something that can easily be fixed in Addrlib. 796 * For now, just figure it out here. 797 * Note that only 2D_TILE_THIN1 is handled here. 798 */ 799 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 800 assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1); 801 802 if (info->chip_class == SI) { 803 if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) { 804 if (surf->bpe == 2) 805 AddrSurfInfoIn.tileIndex = 11; /* 16bpp */ 806 else 807 AddrSurfInfoIn.tileIndex = 12; /* 32bpp */ 808 } else { 809 if (surf->bpe == 1) 810 AddrSurfInfoIn.tileIndex = 14; /* 8bpp */ 811 else if (surf->bpe == 2) 812 AddrSurfInfoIn.tileIndex = 15; /* 16bpp */ 813 else if (surf->bpe == 4) 814 AddrSurfInfoIn.tileIndex = 16; /* 32bpp */ 815 else 816 AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */ 817 } 818 } else { 819 /* CIK - VI */ 820 if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) 821 AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */ 822 else 823 AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */ 824 825 /* Addrlib doesn't set this if tileIndex is forced like above. */ 826 AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf); 827 } 828 } 829 830 surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); 831 surf->num_dcc_levels = 0; 832 surf->surf_size = 0; 833 surf->dcc_size = 0; 834 surf->dcc_alignment = 1; 835 surf->htile_size = 0; 836 surf->htile_slice_size = 0; 837 surf->htile_alignment = 1; 838 839 const bool only_stencil = (surf->flags & RADEON_SURF_SBUFFER) && 840 !(surf->flags & RADEON_SURF_ZBUFFER); 841 842 /* Calculate texture layout information. */ 843 if (!only_stencil) { 844 for (level = 0; level < config->info.levels; level++) { 845 r = gfx6_compute_level(addrlib, config, surf, false, level, compressed, 846 &AddrSurfInfoIn, &AddrSurfInfoOut, 847 &AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut); 848 if (r) 849 return r; 850 851 if (level > 0) 852 continue; 853 854 /* Check that we actually got a TC-compatible HTILE if 855 * we requested it (only for level 0, since we're not 856 * supporting HTILE on higher mip levels anyway). */ 857 assert(AddrSurfInfoOut.tcCompatible || 858 !AddrSurfInfoIn.flags.tcCompatible || 859 AddrSurfInfoIn.flags.matchStencilTileCfg); 860 861 if (AddrSurfInfoIn.flags.matchStencilTileCfg) { 862 if (!AddrSurfInfoOut.tcCompatible) { 863 AddrSurfInfoIn.flags.tcCompatible = 0; 864 surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE; 865 } 866 867 AddrSurfInfoIn.flags.matchStencilTileCfg = 0; 868 AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex; 869 stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx; 870 871 assert(stencil_tile_idx >= 0); 872 } 873 874 r = gfx6_surface_settings(addrlib, info, config, 875 &AddrSurfInfoOut, surf); 876 if (r) 877 return r; 878 } 879 } 880 881 /* Calculate texture layout information for stencil. */ 882 if (surf->flags & RADEON_SURF_SBUFFER) { 883 AddrSurfInfoIn.tileIndex = stencil_tile_idx; 884 AddrSurfInfoIn.bpp = 8; 885 AddrSurfInfoIn.flags.depth = 0; 886 AddrSurfInfoIn.flags.stencil = 1; 887 AddrSurfInfoIn.flags.tcCompatible = 0; 888 /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */ 889 AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split; 890 891 for (level = 0; level < config->info.levels; level++) { 892 r = gfx6_compute_level(addrlib, config, surf, true, level, compressed, 893 &AddrSurfInfoIn, &AddrSurfInfoOut, 894 &AddrDccIn, &AddrDccOut, 895 NULL, NULL); 896 if (r) 897 return r; 898 899 /* DB uses the depth pitch for both stencil and depth. */ 900 if (!only_stencil) { 901 if (surf->u.legacy.stencil_level[level].nblk_x != 902 surf->u.legacy.level[level].nblk_x) 903 surf->u.legacy.stencil_adjusted = true; 904 } else { 905 surf->u.legacy.level[level].nblk_x = 906 surf->u.legacy.stencil_level[level].nblk_x; 907 } 908 909 if (level == 0) { 910 if (only_stencil) { 911 r = gfx6_surface_settings(addrlib, info, config, 912 &AddrSurfInfoOut, surf); 913 if (r) 914 return r; 915 } 916 917 /* For 2D modes only. */ 918 if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) { 919 surf->u.legacy.stencil_tile_split = 920 AddrSurfInfoOut.pTileInfo->tileSplitBytes; 921 } 922 } 923 } 924 } 925 926 /* Compute FMASK. */ 927 if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color) { 928 ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0}; 929 ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0}; 930 ADDR_TILEINFO fmask_tile_info = {}; 931 932 fin.size = sizeof(fin); 933 fout.size = sizeof(fout); 934 935 fin.tileMode = AddrSurfInfoOut.tileMode; 936 fin.pitch = AddrSurfInfoOut.pitch; 937 fin.height = config->info.height; 938 fin.numSlices = AddrSurfInfoIn.numSlices; 939 fin.numSamples = AddrSurfInfoIn.numSamples; 940 fin.numFrags = AddrSurfInfoIn.numFrags; 941 fin.tileIndex = -1; 942 fout.pTileInfo = &fmask_tile_info; 943 944 r = AddrComputeFmaskInfo(addrlib, &fin, &fout); 945 if (r) 946 return r; 947 948 surf->fmask_size = fout.fmaskBytes; 949 surf->fmask_alignment = fout.baseAlign; 950 surf->fmask_tile_swizzle = 0; 951 952 surf->u.legacy.fmask.slice_tile_max = 953 (fout.pitch * fout.height) / 64; 954 if (surf->u.legacy.fmask.slice_tile_max) 955 surf->u.legacy.fmask.slice_tile_max -= 1; 956 957 surf->u.legacy.fmask.tiling_index = fout.tileIndex; 958 surf->u.legacy.fmask.bankh = fout.pTileInfo->bankHeight; 959 surf->u.legacy.fmask.pitch_in_pixels = fout.pitch; 960 961 /* Compute tile swizzle for FMASK. */ 962 if (config->info.fmask_surf_index && 963 !(surf->flags & RADEON_SURF_SHAREABLE)) { 964 ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0}; 965 ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0}; 966 967 xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT); 968 xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT); 969 970 /* This counter starts from 1 instead of 0. */ 971 xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index); 972 xin.tileIndex = fout.tileIndex; 973 xin.macroModeIndex = fout.macroModeIndex; 974 xin.pTileInfo = fout.pTileInfo; 975 xin.tileMode = fin.tileMode; 976 977 int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout); 978 if (r != ADDR_OK) 979 return r; 980 981 assert(xout.tileSwizzle <= 982 u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 983 surf->fmask_tile_swizzle = xout.tileSwizzle; 984 } 985 } 986 987 /* Recalculate the whole DCC miptree size including disabled levels. 988 * This is what addrlib does, but calling addrlib would be a lot more 989 * complicated. 990 */ 991 if (surf->dcc_size && config->info.levels > 1) { 992 /* The smallest miplevels that are never compressed by DCC 993 * still read the DCC buffer via TC if the base level uses DCC, 994 * and for some reason the DCC buffer needs to be larger if 995 * the miptree uses non-zero tile_swizzle. Otherwise there are 996 * VM faults. 997 * 998 * "dcc_alignment * 4" was determined by trial and error. 999 */ 1000 surf->dcc_size = align64(surf->surf_size >> 8, 1001 surf->dcc_alignment * 4); 1002 } 1003 1004 /* Make sure HTILE covers the whole miptree, because the shader reads 1005 * TC-compatible HTILE even for levels where it's disabled by DB. 1006 */ 1007 if (surf->htile_size && config->info.levels > 1 && 1008 surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) { 1009 /* MSAA can't occur with levels > 1, so ignore the sample count. */ 1010 const unsigned total_pixels = surf->surf_size / surf->bpe; 1011 const unsigned htile_block_size = 8 * 8; 1012 const unsigned htile_element_size = 4; 1013 1014 surf->htile_size = (total_pixels / htile_block_size) * 1015 htile_element_size; 1016 surf->htile_size = align(surf->htile_size, surf->htile_alignment); 1017 } 1018 1019 surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED; 1020 surf->is_displayable = surf->is_linear || 1021 surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY || 1022 surf->micro_tile_mode == RADEON_MICRO_MODE_ROTATED; 1023 1024 /* The rotated micro tile mode doesn't work if both CMASK and RB+ are 1025 * used at the same time. This case is not currently expected to occur 1026 * because we don't use rotated. Enforce this restriction on all chips 1027 * to facilitate testing. 1028 */ 1029 if (surf->micro_tile_mode == RADEON_MICRO_MODE_ROTATED) { 1030 assert(!"rotate micro tile mode is unsupported"); 1031 return ADDR_ERROR; 1032 } 1033 1034 ac_compute_cmask(info, config, surf); 1035 return 0; 1036} 1037 1038/* This is only called when expecting a tiled layout. */ 1039static int 1040gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, 1041 ADDR2_COMPUTE_SURFACE_INFO_INPUT *in, 1042 bool is_fmask, AddrSwizzleMode *swizzle_mode) 1043{ 1044 ADDR_E_RETURNCODE ret; 1045 ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0}; 1046 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0}; 1047 1048 sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT); 1049 sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT); 1050 1051 sin.flags = in->flags; 1052 sin.resourceType = in->resourceType; 1053 sin.format = in->format; 1054 sin.resourceLoction = ADDR_RSRC_LOC_INVIS; 1055 /* TODO: We could allow some of these: */ 1056 sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */ 1057 sin.forbiddenBlock.var = 1; /* don't allow the variable-sized swizzle modes */ 1058 sin.forbiddenBlock.linear = 1; /* don't allow linear swizzle modes */ 1059 sin.bpp = in->bpp; 1060 sin.width = in->width; 1061 sin.height = in->height; 1062 sin.numSlices = in->numSlices; 1063 sin.numMipLevels = in->numMipLevels; 1064 sin.numSamples = in->numSamples; 1065 sin.numFrags = in->numFrags; 1066 1067 if (is_fmask) { 1068 sin.flags.display = 0; 1069 sin.flags.color = 0; 1070 sin.flags.fmask = 1; 1071 } 1072 1073 ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout); 1074 if (ret != ADDR_OK) 1075 return ret; 1076 1077 *swizzle_mode = sout.swizzleMode; 1078 return 0; 1079} 1080 1081static int gfx9_compute_miptree(ADDR_HANDLE addrlib, 1082 const struct radeon_info *info, 1083 const struct ac_surf_config *config, 1084 struct radeon_surf *surf, bool compressed, 1085 ADDR2_COMPUTE_SURFACE_INFO_INPUT *in) 1086{ 1087 ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {}; 1088 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; 1089 ADDR_E_RETURNCODE ret; 1090 1091 out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT); 1092 out.pMipInfo = mip_info; 1093 1094 ret = Addr2ComputeSurfaceInfo(addrlib, in, &out); 1095 if (ret != ADDR_OK) 1096 return ret; 1097 1098 if (in->flags.stencil) { 1099 surf->u.gfx9.stencil.swizzle_mode = in->swizzleMode; 1100 surf->u.gfx9.stencil.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 : 1101 out.mipChainPitch - 1; 1102 surf->surf_alignment = MAX2(surf->surf_alignment, out.baseAlign); 1103 surf->u.gfx9.stencil_offset = align(surf->surf_size, out.baseAlign); 1104 surf->surf_size = surf->u.gfx9.stencil_offset + out.surfSize; 1105 return 0; 1106 } 1107 1108 surf->u.gfx9.surf.swizzle_mode = in->swizzleMode; 1109 surf->u.gfx9.surf.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 : 1110 out.mipChainPitch - 1; 1111 1112 /* CMASK fast clear uses these even if FMASK isn't allocated. 1113 * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4. 1114 */ 1115 surf->u.gfx9.fmask.swizzle_mode = surf->u.gfx9.surf.swizzle_mode & ~0x3; 1116 surf->u.gfx9.fmask.epitch = surf->u.gfx9.surf.epitch; 1117 1118 surf->u.gfx9.surf_slice_size = out.sliceSize; 1119 surf->u.gfx9.surf_pitch = out.pitch; 1120 surf->u.gfx9.surf_height = out.height; 1121 surf->surf_size = out.surfSize; 1122 surf->surf_alignment = out.baseAlign; 1123 1124 if (in->swizzleMode == ADDR_SW_LINEAR) { 1125 for (unsigned i = 0; i < in->numMipLevels; i++) 1126 surf->u.gfx9.offset[i] = mip_info[i].offset; 1127 } 1128 1129 if (in->flags.depth) { 1130 assert(in->swizzleMode != ADDR_SW_LINEAR); 1131 1132 /* HTILE */ 1133 ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0}; 1134 ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0}; 1135 1136 hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT); 1137 hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT); 1138 1139 hin.hTileFlags.pipeAligned = !in->flags.metaPipeUnaligned; 1140 hin.hTileFlags.rbAligned = !in->flags.metaRbUnaligned; 1141 hin.depthFlags = in->flags; 1142 hin.swizzleMode = in->swizzleMode; 1143 hin.unalignedWidth = in->width; 1144 hin.unalignedHeight = in->height; 1145 hin.numSlices = in->numSlices; 1146 hin.numMipLevels = in->numMipLevels; 1147 hin.firstMipIdInTail = out.firstMipIdInTail; 1148 1149 ret = Addr2ComputeHtileInfo(addrlib, &hin, &hout); 1150 if (ret != ADDR_OK) 1151 return ret; 1152 1153 surf->u.gfx9.htile.rb_aligned = hin.hTileFlags.rbAligned; 1154 surf->u.gfx9.htile.pipe_aligned = hin.hTileFlags.pipeAligned; 1155 surf->htile_size = hout.htileBytes; 1156 surf->htile_slice_size = hout.sliceSize; 1157 surf->htile_alignment = hout.baseAlign; 1158 } else { 1159 /* Compute tile swizzle for the color surface. 1160 * All *_X and *_T modes can use the swizzle. 1161 */ 1162 if (config->info.surf_index && 1163 in->swizzleMode >= ADDR_SW_64KB_Z_T && 1164 !out.mipChainInTail && 1165 !(surf->flags & RADEON_SURF_SHAREABLE) && 1166 !in->flags.display) { 1167 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0}; 1168 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0}; 1169 1170 xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT); 1171 xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT); 1172 1173 xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1; 1174 xin.flags = in->flags; 1175 xin.swizzleMode = in->swizzleMode; 1176 xin.resourceType = in->resourceType; 1177 xin.format = in->format; 1178 xin.numSamples = in->numSamples; 1179 xin.numFrags = in->numFrags; 1180 1181 ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout); 1182 if (ret != ADDR_OK) 1183 return ret; 1184 1185 assert(xout.pipeBankXor <= 1186 u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 1187 surf->tile_swizzle = xout.pipeBankXor; 1188 } 1189 1190 /* DCC */ 1191 if (!(surf->flags & RADEON_SURF_DISABLE_DCC) && 1192 !compressed && 1193 in->swizzleMode != ADDR_SW_LINEAR) { 1194 ADDR2_COMPUTE_DCCINFO_INPUT din = {0}; 1195 ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0}; 1196 ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {}; 1197 1198 din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT); 1199 dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT); 1200 dout.pMipInfo = meta_mip_info; 1201 1202 din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned; 1203 din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned; 1204 din.colorFlags = in->flags; 1205 din.resourceType = in->resourceType; 1206 din.swizzleMode = in->swizzleMode; 1207 din.bpp = in->bpp; 1208 din.unalignedWidth = in->width; 1209 din.unalignedHeight = in->height; 1210 din.numSlices = in->numSlices; 1211 din.numFrags = in->numFrags; 1212 din.numMipLevels = in->numMipLevels; 1213 din.dataSurfaceSize = out.surfSize; 1214 din.firstMipIdInTail = out.firstMipIdInTail; 1215 1216 ret = Addr2ComputeDccInfo(addrlib, &din, &dout); 1217 if (ret != ADDR_OK) 1218 return ret; 1219 1220 surf->u.gfx9.dcc.rb_aligned = din.dccKeyFlags.rbAligned; 1221 surf->u.gfx9.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned; 1222 surf->dcc_size = dout.dccRamSize; 1223 surf->dcc_alignment = dout.dccRamBaseAlign; 1224 surf->num_dcc_levels = in->numMipLevels; 1225 1226 /* Disable DCC for levels that are in the mip tail. 1227 * 1228 * There are two issues that this is intended to 1229 * address: 1230 * 1231 * 1. Multiple mip levels may share a cache line. This 1232 * can lead to corruption when switching between 1233 * rendering to different mip levels because the 1234 * RBs don't maintain coherency. 1235 * 1236 * 2. Texturing with metadata after rendering sometimes 1237 * fails with corruption, probably for a similar 1238 * reason. 1239 * 1240 * Working around these issues for all levels in the 1241 * mip tail may be overly conservative, but it's what 1242 * Vulkan does. 1243 * 1244 * Alternative solutions that also work but are worse: 1245 * - Disable DCC entirely. 1246 * - Flush TC L2 after rendering. 1247 */ 1248 for (unsigned i = 0; i < in->numMipLevels; i++) { 1249 if (meta_mip_info[i].inMiptail) { 1250 surf->num_dcc_levels = i; 1251 break; 1252 } 1253 } 1254 1255 if (!surf->num_dcc_levels) 1256 surf->dcc_size = 0; 1257 1258 surf->u.gfx9.display_dcc_size = surf->dcc_size; 1259 surf->u.gfx9.display_dcc_alignment = surf->dcc_alignment; 1260 surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1; 1261 1262 /* Compute displayable DCC. */ 1263 if (in->flags.display && 1264 surf->num_dcc_levels && 1265 info->use_display_dcc_with_retile_blit) { 1266 /* Compute displayable DCC info. */ 1267 din.dccKeyFlags.pipeAligned = 0; 1268 din.dccKeyFlags.rbAligned = 0; 1269 1270 assert(din.numSlices == 1); 1271 assert(din.numMipLevels == 1); 1272 assert(din.numFrags == 1); 1273 assert(surf->tile_swizzle == 0); 1274 assert(surf->u.gfx9.dcc.pipe_aligned || 1275 surf->u.gfx9.dcc.rb_aligned); 1276 1277 ret = Addr2ComputeDccInfo(addrlib, &din, &dout); 1278 if (ret != ADDR_OK) 1279 return ret; 1280 1281 surf->u.gfx9.display_dcc_size = dout.dccRamSize; 1282 surf->u.gfx9.display_dcc_alignment = dout.dccRamBaseAlign; 1283 surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1; 1284 assert(surf->u.gfx9.display_dcc_size <= surf->dcc_size); 1285 1286 /* Compute address mapping from non-displayable to displayable DCC. */ 1287 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin = {}; 1288 addrin.size = sizeof(addrin); 1289 addrin.colorFlags.color = 1; 1290 addrin.swizzleMode = din.swizzleMode; 1291 addrin.resourceType = din.resourceType; 1292 addrin.bpp = din.bpp; 1293 addrin.unalignedWidth = din.unalignedWidth; 1294 addrin.unalignedHeight = din.unalignedHeight; 1295 addrin.numSlices = 1; 1296 addrin.numMipLevels = 1; 1297 addrin.numFrags = 1; 1298 1299 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {}; 1300 addrout.size = sizeof(addrout); 1301 1302 surf->u.gfx9.dcc_retile_num_elements = 1303 DIV_ROUND_UP(in->width, dout.compressBlkWidth) * 1304 DIV_ROUND_UP(in->height, dout.compressBlkHeight) * 2; 1305 /* Align the size to 4 (for the compute shader). */ 1306 surf->u.gfx9.dcc_retile_num_elements = 1307 align(surf->u.gfx9.dcc_retile_num_elements, 4); 1308 1309 surf->u.gfx9.dcc_retile_map = 1310 malloc(surf->u.gfx9.dcc_retile_num_elements * 4); 1311 if (!surf->u.gfx9.dcc_retile_map) 1312 return ADDR_OUTOFMEMORY; 1313 1314 unsigned index = 0; 1315 surf->u.gfx9.dcc_retile_use_uint16 = true; 1316 1317 for (unsigned y = 0; y < in->height; y += dout.compressBlkHeight) { 1318 addrin.y = y; 1319 1320 for (unsigned x = 0; x < in->width; x += dout.compressBlkWidth) { 1321 addrin.x = x; 1322 1323 /* Compute src DCC address */ 1324 addrin.dccKeyFlags.pipeAligned = surf->u.gfx9.dcc.pipe_aligned; 1325 addrin.dccKeyFlags.rbAligned = surf->u.gfx9.dcc.rb_aligned; 1326 addrout.addr = 0; 1327 1328 ret = Addr2ComputeDccAddrFromCoord(addrlib, &addrin, &addrout); 1329 if (ret != ADDR_OK) 1330 return ret; 1331 1332 surf->u.gfx9.dcc_retile_map[index * 2] = addrout.addr; 1333 if (addrout.addr > USHRT_MAX) 1334 surf->u.gfx9.dcc_retile_use_uint16 = false; 1335 1336 /* Compute dst DCC address */ 1337 addrin.dccKeyFlags.pipeAligned = 0; 1338 addrin.dccKeyFlags.rbAligned = 0; 1339 addrout.addr = 0; 1340 1341 ret = Addr2ComputeDccAddrFromCoord(addrlib, &addrin, &addrout); 1342 if (ret != ADDR_OK) 1343 return ret; 1344 1345 surf->u.gfx9.dcc_retile_map[index * 2 + 1] = addrout.addr; 1346 if (addrout.addr > USHRT_MAX) 1347 surf->u.gfx9.dcc_retile_use_uint16 = false; 1348 1349 assert(index * 2 + 1 < surf->u.gfx9.dcc_retile_num_elements); 1350 index++; 1351 } 1352 } 1353 /* Fill the remaining pairs with the last one (for the compute shader). */ 1354 for (unsigned i = index * 2; i < surf->u.gfx9.dcc_retile_num_elements; i++) 1355 surf->u.gfx9.dcc_retile_map[i] = surf->u.gfx9.dcc_retile_map[i - 2]; 1356 } 1357 } 1358 1359 /* FMASK */ 1360 if (in->numSamples > 1) { 1361 ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0}; 1362 ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0}; 1363 1364 fin.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT); 1365 fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT); 1366 1367 ret = gfx9_get_preferred_swizzle_mode(addrlib, in, 1368 true, &fin.swizzleMode); 1369 if (ret != ADDR_OK) 1370 return ret; 1371 1372 fin.unalignedWidth = in->width; 1373 fin.unalignedHeight = in->height; 1374 fin.numSlices = in->numSlices; 1375 fin.numSamples = in->numSamples; 1376 fin.numFrags = in->numFrags; 1377 1378 ret = Addr2ComputeFmaskInfo(addrlib, &fin, &fout); 1379 if (ret != ADDR_OK) 1380 return ret; 1381 1382 surf->u.gfx9.fmask.swizzle_mode = fin.swizzleMode; 1383 surf->u.gfx9.fmask.epitch = fout.pitch - 1; 1384 surf->fmask_size = fout.fmaskBytes; 1385 surf->fmask_alignment = fout.baseAlign; 1386 1387 /* Compute tile swizzle for the FMASK surface. */ 1388 if (config->info.fmask_surf_index && 1389 fin.swizzleMode >= ADDR_SW_64KB_Z_T && 1390 !(surf->flags & RADEON_SURF_SHAREABLE)) { 1391 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0}; 1392 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0}; 1393 1394 xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT); 1395 xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT); 1396 1397 /* This counter starts from 1 instead of 0. */ 1398 xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index); 1399 xin.flags = in->flags; 1400 xin.swizzleMode = fin.swizzleMode; 1401 xin.resourceType = in->resourceType; 1402 xin.format = in->format; 1403 xin.numSamples = in->numSamples; 1404 xin.numFrags = in->numFrags; 1405 1406 ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout); 1407 if (ret != ADDR_OK) 1408 return ret; 1409 1410 assert(xout.pipeBankXor <= 1411 u_bit_consecutive(0, sizeof(surf->fmask_tile_swizzle) * 8)); 1412 surf->fmask_tile_swizzle = xout.pipeBankXor; 1413 } 1414 } 1415 1416 /* CMASK */ 1417 if (in->swizzleMode != ADDR_SW_LINEAR) { 1418 ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0}; 1419 ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0}; 1420 1421 cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT); 1422 cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT); 1423 1424 if (in->numSamples > 1) { 1425 /* FMASK is always aligned. */ 1426 cin.cMaskFlags.pipeAligned = 1; 1427 cin.cMaskFlags.rbAligned = 1; 1428 } else { 1429 cin.cMaskFlags.pipeAligned = !in->flags.metaPipeUnaligned; 1430 cin.cMaskFlags.rbAligned = !in->flags.metaRbUnaligned; 1431 } 1432 cin.colorFlags = in->flags; 1433 cin.resourceType = in->resourceType; 1434 cin.unalignedWidth = in->width; 1435 cin.unalignedHeight = in->height; 1436 cin.numSlices = in->numSlices; 1437 1438 if (in->numSamples > 1) 1439 cin.swizzleMode = surf->u.gfx9.fmask.swizzle_mode; 1440 else 1441 cin.swizzleMode = in->swizzleMode; 1442 1443 ret = Addr2ComputeCmaskInfo(addrlib, &cin, &cout); 1444 if (ret != ADDR_OK) 1445 return ret; 1446 1447 surf->u.gfx9.cmask.rb_aligned = cin.cMaskFlags.rbAligned; 1448 surf->u.gfx9.cmask.pipe_aligned = cin.cMaskFlags.pipeAligned; 1449 surf->cmask_size = cout.cmaskBytes; 1450 surf->cmask_alignment = cout.baseAlign; 1451 } 1452 } 1453 1454 return 0; 1455} 1456 1457static int gfx9_compute_surface(ADDR_HANDLE addrlib, 1458 const struct radeon_info *info, 1459 const struct ac_surf_config *config, 1460 enum radeon_surf_mode mode, 1461 struct radeon_surf *surf) 1462{ 1463 bool compressed; 1464 ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; 1465 int r; 1466 1467 AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT); 1468 1469 compressed = surf->blk_w == 4 && surf->blk_h == 4; 1470 1471 /* The format must be set correctly for the allocation of compressed 1472 * textures to work. In other cases, setting the bpp is sufficient. */ 1473 if (compressed) { 1474 switch (surf->bpe) { 1475 case 8: 1476 AddrSurfInfoIn.format = ADDR_FMT_BC1; 1477 break; 1478 case 16: 1479 AddrSurfInfoIn.format = ADDR_FMT_BC3; 1480 break; 1481 default: 1482 assert(0); 1483 } 1484 } else { 1485 switch (surf->bpe) { 1486 case 1: 1487 assert(!(surf->flags & RADEON_SURF_ZBUFFER)); 1488 AddrSurfInfoIn.format = ADDR_FMT_8; 1489 break; 1490 case 2: 1491 assert(surf->flags & RADEON_SURF_ZBUFFER || 1492 !(surf->flags & RADEON_SURF_SBUFFER)); 1493 AddrSurfInfoIn.format = ADDR_FMT_16; 1494 break; 1495 case 4: 1496 assert(surf->flags & RADEON_SURF_ZBUFFER || 1497 !(surf->flags & RADEON_SURF_SBUFFER)); 1498 AddrSurfInfoIn.format = ADDR_FMT_32; 1499 break; 1500 case 8: 1501 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 1502 AddrSurfInfoIn.format = ADDR_FMT_32_32; 1503 break; 1504 case 12: 1505 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 1506 AddrSurfInfoIn.format = ADDR_FMT_32_32_32; 1507 break; 1508 case 16: 1509 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 1510 AddrSurfInfoIn.format = ADDR_FMT_32_32_32_32; 1511 break; 1512 default: 1513 assert(0); 1514 } 1515 AddrSurfInfoIn.bpp = surf->bpe * 8; 1516 } 1517 1518 bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 1519 AddrSurfInfoIn.flags.color = is_color_surface && 1520 !(surf->flags & RADEON_SURF_NO_RENDER_TARGET); 1521 AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0; 1522 AddrSurfInfoIn.flags.display = get_display_flag(config, surf); 1523 /* flags.texture currently refers to TC-compatible HTILE */ 1524 AddrSurfInfoIn.flags.texture = is_color_surface || 1525 surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE; 1526 AddrSurfInfoIn.flags.opt4space = 1; 1527 1528 AddrSurfInfoIn.numMipLevels = config->info.levels; 1529 AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples); 1530 AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples; 1531 1532 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) 1533 AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples); 1534 1535 /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures 1536 * as 2D to avoid having shader variants for 1D vs 2D, so all shaders 1537 * must sample 1D textures as 2D. */ 1538 if (config->is_3d) 1539 AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D; 1540 else 1541 AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D; 1542 1543 AddrSurfInfoIn.width = config->info.width; 1544 AddrSurfInfoIn.height = config->info.height; 1545 1546 if (config->is_3d) 1547 AddrSurfInfoIn.numSlices = config->info.depth; 1548 else if (config->is_cube) 1549 AddrSurfInfoIn.numSlices = 6; 1550 else 1551 AddrSurfInfoIn.numSlices = config->info.array_size; 1552 1553 /* This is propagated to HTILE/DCC/CMASK. */ 1554 AddrSurfInfoIn.flags.metaPipeUnaligned = 0; 1555 AddrSurfInfoIn.flags.metaRbUnaligned = 0; 1556 1557 /* The display hardware can only read DCC with RB_ALIGNED=0 and 1558 * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED. 1559 * 1560 * The CB block requires RB_ALIGNED=1 except 1 RB chips. 1561 * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes 1562 * after rendering, so PIPE_ALIGNED=1 is recommended. 1563 */ 1564 if (info->use_display_dcc_unaligned && is_color_surface && 1565 AddrSurfInfoIn.flags.display) { 1566 AddrSurfInfoIn.flags.metaPipeUnaligned = 1; 1567 AddrSurfInfoIn.flags.metaRbUnaligned = 1; 1568 } 1569 1570 switch (mode) { 1571 case RADEON_SURF_MODE_LINEAR_ALIGNED: 1572 assert(config->info.samples <= 1); 1573 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 1574 AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR; 1575 break; 1576 1577 case RADEON_SURF_MODE_1D: 1578 case RADEON_SURF_MODE_2D: 1579 if (surf->flags & RADEON_SURF_IMPORTED) { 1580 AddrSurfInfoIn.swizzleMode = surf->u.gfx9.surf.swizzle_mode; 1581 break; 1582 } 1583 1584 r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn, 1585 false, &AddrSurfInfoIn.swizzleMode); 1586 if (r) 1587 return r; 1588 break; 1589 1590 default: 1591 assert(0); 1592 } 1593 1594 surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType; 1595 surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); 1596 1597 surf->num_dcc_levels = 0; 1598 surf->surf_size = 0; 1599 surf->fmask_size = 0; 1600 surf->dcc_size = 0; 1601 surf->htile_size = 0; 1602 surf->htile_slice_size = 0; 1603 surf->u.gfx9.surf_offset = 0; 1604 surf->u.gfx9.stencil_offset = 0; 1605 surf->cmask_size = 0; 1606 surf->u.gfx9.dcc_retile_use_uint16 = false; 1607 surf->u.gfx9.dcc_retile_num_elements = 0; 1608 surf->u.gfx9.dcc_retile_map = NULL; 1609 1610 /* Calculate texture layout information. */ 1611 r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, 1612 &AddrSurfInfoIn); 1613 if (r) 1614 goto error; 1615 1616 /* Calculate texture layout information for stencil. */ 1617 if (surf->flags & RADEON_SURF_SBUFFER) { 1618 AddrSurfInfoIn.flags.stencil = 1; 1619 AddrSurfInfoIn.bpp = 8; 1620 AddrSurfInfoIn.format = ADDR_FMT_8; 1621 1622 if (!AddrSurfInfoIn.flags.depth) { 1623 r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn, 1624 false, &AddrSurfInfoIn.swizzleMode); 1625 if (r) 1626 goto error; 1627 } else 1628 AddrSurfInfoIn.flags.depth = 0; 1629 1630 r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, 1631 &AddrSurfInfoIn); 1632 if (r) 1633 goto error; 1634 } 1635 1636 surf->is_linear = surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR; 1637 1638 /* Query whether the surface is displayable. */ 1639 bool displayable = false; 1640 if (!config->is_3d && !config->is_cube) { 1641 r = Addr2IsValidDisplaySwizzleMode(addrlib, surf->u.gfx9.surf.swizzle_mode, 1642 surf->bpe * 8, &displayable); 1643 if (r) 1644 goto error; 1645 1646 /* Display needs unaligned DCC. */ 1647 if (info->use_display_dcc_unaligned && 1648 surf->num_dcc_levels && 1649 (surf->u.gfx9.dcc.pipe_aligned || 1650 surf->u.gfx9.dcc.rb_aligned)) 1651 displayable = false; 1652 } 1653 surf->is_displayable = displayable; 1654 1655 switch (surf->u.gfx9.surf.swizzle_mode) { 1656 /* S = standard. */ 1657 case ADDR_SW_256B_S: 1658 case ADDR_SW_4KB_S: 1659 case ADDR_SW_64KB_S: 1660 case ADDR_SW_VAR_S: 1661 case ADDR_SW_64KB_S_T: 1662 case ADDR_SW_4KB_S_X: 1663 case ADDR_SW_64KB_S_X: 1664 case ADDR_SW_VAR_S_X: 1665 surf->micro_tile_mode = RADEON_MICRO_MODE_THIN; 1666 break; 1667 1668 /* D = display. */ 1669 case ADDR_SW_LINEAR: 1670 case ADDR_SW_256B_D: 1671 case ADDR_SW_4KB_D: 1672 case ADDR_SW_64KB_D: 1673 case ADDR_SW_VAR_D: 1674 case ADDR_SW_64KB_D_T: 1675 case ADDR_SW_4KB_D_X: 1676 case ADDR_SW_64KB_D_X: 1677 case ADDR_SW_VAR_D_X: 1678 surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY; 1679 break; 1680 1681 /* R = rotated. */ 1682 case ADDR_SW_256B_R: 1683 case ADDR_SW_4KB_R: 1684 case ADDR_SW_64KB_R: 1685 case ADDR_SW_VAR_R: 1686 case ADDR_SW_64KB_R_T: 1687 case ADDR_SW_4KB_R_X: 1688 case ADDR_SW_64KB_R_X: 1689 case ADDR_SW_VAR_R_X: 1690 /* The rotated micro tile mode doesn't work if both CMASK and RB+ are 1691 * used at the same time. This case is not currently expected to occur 1692 * because we don't use rotated. Enforce this restriction on all chips 1693 * to facilitate testing. 1694 */ 1695 assert(!"rotate micro tile mode is unsupported"); 1696 r = ADDR_ERROR; 1697 goto error; 1698 1699 /* Z = depth. */ 1700 case ADDR_SW_4KB_Z: 1701 case ADDR_SW_64KB_Z: 1702 case ADDR_SW_VAR_Z: 1703 case ADDR_SW_64KB_Z_T: 1704 case ADDR_SW_4KB_Z_X: 1705 case ADDR_SW_64KB_Z_X: 1706 case ADDR_SW_VAR_Z_X: 1707 surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH; 1708 break; 1709 1710 default: 1711 assert(0); 1712 } 1713 1714 return 0; 1715 1716error: 1717 free(surf->u.gfx9.dcc_retile_map); 1718 surf->u.gfx9.dcc_retile_map = NULL; 1719 return r; 1720} 1721 1722int ac_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info, 1723 const struct ac_surf_config *config, 1724 enum radeon_surf_mode mode, 1725 struct radeon_surf *surf) 1726{ 1727 int r; 1728 1729 r = surf_config_sanity(config, surf->flags); 1730 if (r) 1731 return r; 1732 1733 if (info->chip_class >= GFX9) 1734 return gfx9_compute_surface(addrlib, info, config, mode, surf); 1735 else 1736 return gfx6_compute_surface(addrlib, info, config, mode, surf); 1737} 1738