1/* 2 * Copyright © 2007-2019 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 * USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * The above copyright notice and this permission notice (including the 23 * next paragraph) shall be included in all copies or substantial portions 24 * of the Software. 25 */ 26 27/** 28************************************************************************************************************************ 29* @file gfx10addrlib.cpp 30* @brief Contain the implementation for the Gfx10Lib class. 31************************************************************************************************************************ 32*/ 33 34#include "gfx10addrlib.h" 35#include "gfx10_gb_reg.h" 36 37#include "amdgpu_asic_addr.h" 38 39//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 40//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 41 42namespace Addr 43{ 44/** 45************************************************************************************************************************ 46* Gfx10HwlInit 47* 48* @brief 49* Creates an Gfx10Lib object. 50* 51* @return 52* Returns an Gfx10Lib object pointer. 53************************************************************************************************************************ 54*/ 55Addr::Lib* Gfx10HwlInit(const Client* pClient) 56{ 57 return V2::Gfx10Lib::CreateObj(pClient); 58} 59 60namespace V2 61{ 62 63//////////////////////////////////////////////////////////////////////////////////////////////////// 64// Static Const Member 65//////////////////////////////////////////////////////////////////////////////////////////////////// 66 67const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] = 68{//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved 69 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR 70 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S 71 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D 72 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 73 74 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 75 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S 76 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D 77 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 78 79 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 80 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S 81 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D 82 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 83 84 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 85 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 86 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 87 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 88 89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 90 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T 91 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T 92 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 93 94 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 95 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_X 96 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_X 97 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 98 99 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X 100 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X 101 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X 102 {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_64KB_R_X 103 104 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_Z_X 105 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 106 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 107 {0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_VAR_R_X 108 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL 109}; 110 111const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}}; 112 113const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}}; 114const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}}; 115 116/** 117************************************************************************************************************************ 118* Gfx10Lib::Gfx10Lib 119* 120* @brief 121* Constructor 122* 123************************************************************************************************************************ 124*/ 125Gfx10Lib::Gfx10Lib(const Client* pClient) 126 : 127 Lib(pClient), 128 m_numPkrLog2(0), 129 m_numSaLog2(0), 130 m_colorBaseIndex(0), 131 m_xmaskBaseIndex(0), 132 m_dccBaseIndex(0) 133{ 134 memset(&m_settings, 0, sizeof(m_settings)); 135 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable)); 136} 137 138/** 139************************************************************************************************************************ 140* Gfx10Lib::~Gfx10Lib 141* 142* @brief 143* Destructor 144************************************************************************************************************************ 145*/ 146Gfx10Lib::~Gfx10Lib() 147{ 148} 149 150/** 151************************************************************************************************************************ 152* Gfx10Lib::HwlComputeHtileInfo 153* 154* @brief 155* Interface function stub of AddrComputeHtilenfo 156* 157* @return 158* ADDR_E_RETURNCODE 159************************************************************************************************************************ 160*/ 161ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo( 162 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure 163 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure 164 ) const 165{ 166 ADDR_E_RETURNCODE ret = ADDR_OK; 167 168 if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) && 169 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) || 170 (pIn->hTileFlags.pipeAligned != TRUE)) 171 { 172 ret = ADDR_INVALIDPARAMS; 173 } 174 else 175 { 176 Dim3d metaBlk = {}; 177 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil, 178 ADDR_RSRC_TEX_2D, 179 pIn->swizzleMode, 180 0, 181 0, 182 TRUE, 183 &metaBlk); 184 185 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w); 186 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h); 187 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u)); 188 pOut->metaBlkWidth = metaBlk.w; 189 pOut->metaBlkHeight = metaBlk.h; 190 191 if (pIn->numMipLevels > 1) 192 { 193 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels); 194 195 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize; 196 197 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--) 198 { 199 UINT_32 mipWidth, mipHeight; 200 201 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight); 202 203 mipWidth = PowTwoAlign(mipWidth, metaBlk.w); 204 mipHeight = PowTwoAlign(mipHeight, metaBlk.h); 205 206 const UINT_32 pitchInM = mipWidth / metaBlk.w; 207 const UINT_32 heightInM = mipHeight / metaBlk.h; 208 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize; 209 210 if (pOut->pMipInfo != NULL) 211 { 212 pOut->pMipInfo[i].inMiptail = FALSE; 213 pOut->pMipInfo[i].offset = offset; 214 pOut->pMipInfo[i].sliceSize = mipSliceSize; 215 } 216 217 offset += mipSliceSize; 218 } 219 220 pOut->sliceSize = offset; 221 pOut->metaBlkNumPerSlice = offset / metaBlkSize; 222 pOut->htileBytes = pOut->sliceSize * pIn->numSlices; 223 224 if (pOut->pMipInfo != NULL) 225 { 226 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++) 227 { 228 pOut->pMipInfo[i].inMiptail = TRUE; 229 pOut->pMipInfo[i].offset = 0; 230 pOut->pMipInfo[i].sliceSize = 0; 231 } 232 233 if (pIn->firstMipIdInTail != pIn->numMipLevels) 234 { 235 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize; 236 } 237 } 238 } 239 else 240 { 241 const UINT_32 pitchInM = pOut->pitch / metaBlk.w; 242 const UINT_32 heightInM = pOut->height / metaBlk.h; 243 244 pOut->metaBlkNumPerSlice = pitchInM * heightInM; 245 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize; 246 pOut->htileBytes = pOut->sliceSize * pIn->numSlices; 247 248 if (pOut->pMipInfo != NULL) 249 { 250 pOut->pMipInfo[0].inMiptail = FALSE; 251 pOut->pMipInfo[0].offset = 0; 252 pOut->pMipInfo[0].sliceSize = pOut->sliceSize; 253 } 254 } 255 256 // Get the HTILE address equation (copied from HtileAddrFromCoord). 257 // HTILE addressing depends on the number of samples, but this code doesn't support it yet. 258 const UINT_32 index = m_xmaskBaseIndex; 259 const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX; 260 261 ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2); 262 pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]]; 263 } 264 265 return ret; 266} 267 268/** 269************************************************************************************************************************ 270* Gfx10Lib::HwlComputeCmaskInfo 271* 272* @brief 273* Interface function stub of AddrComputeCmaskInfo 274* 275* @return 276* ADDR_E_RETURNCODE 277************************************************************************************************************************ 278*/ 279ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo( 280 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure 281 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure 282 ) const 283{ 284 ADDR_E_RETURNCODE ret = ADDR_OK; 285 286 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) || 287 (pIn->cMaskFlags.pipeAligned != TRUE) || 288 ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) && 289 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0)))) 290 { 291 ret = ADDR_INVALIDPARAMS; 292 } 293 else 294 { 295 Dim3d metaBlk = {}; 296 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask, 297 ADDR_RSRC_TEX_2D, 298 pIn->swizzleMode, 299 0, 300 0, 301 TRUE, 302 &metaBlk); 303 304 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w); 305 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h); 306 pOut->baseAlign = metaBlkSize; 307 pOut->metaBlkWidth = metaBlk.w; 308 pOut->metaBlkHeight = metaBlk.h; 309 310 if (pIn->numMipLevels > 1) 311 { 312 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels); 313 314 UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1; 315 316 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--) 317 { 318 UINT_32 mipWidth, mipHeight; 319 320 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight); 321 322 mipWidth = PowTwoAlign(mipWidth, metaBlk.w); 323 mipHeight = PowTwoAlign(mipHeight, metaBlk.h); 324 325 const UINT_32 pitchInM = mipWidth / metaBlk.w; 326 const UINT_32 heightInM = mipHeight / metaBlk.h; 327 328 if (pOut->pMipInfo != NULL) 329 { 330 pOut->pMipInfo[i].inMiptail = FALSE; 331 pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize; 332 pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize; 333 } 334 335 metaBlkPerSlice += pitchInM * heightInM; 336 } 337 338 pOut->metaBlkNumPerSlice = metaBlkPerSlice; 339 340 if (pOut->pMipInfo != NULL) 341 { 342 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++) 343 { 344 pOut->pMipInfo[i].inMiptail = TRUE; 345 pOut->pMipInfo[i].offset = 0; 346 pOut->pMipInfo[i].sliceSize = 0; 347 } 348 349 if (pIn->firstMipIdInTail != pIn->numMipLevels) 350 { 351 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize; 352 } 353 } 354 } 355 else 356 { 357 const UINT_32 pitchInM = pOut->pitch / metaBlk.w; 358 const UINT_32 heightInM = pOut->height / metaBlk.h; 359 360 pOut->metaBlkNumPerSlice = pitchInM * heightInM; 361 362 if (pOut->pMipInfo != NULL) 363 { 364 pOut->pMipInfo[0].inMiptail = FALSE; 365 pOut->pMipInfo[0].offset = 0; 366 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize; 367 } 368 } 369 370 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize; 371 pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices; 372 373 // Get the CMASK address equation (copied from CmaskAddrFromCoord) 374 const UINT_32 fmaskBpp = GetFmaskBpp(1, 1); 375 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3); 376 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2; 377 const UINT_8* patIdxTable = 378 (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX : 379 (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX); 380 381 ADDR_C_ASSERT(sizeof(GFX10_CMASK_SW_PATTERN[patIdxTable[index]]) == 68 * 2); 382 pOut->equation.gfx10_bits = (UINT_16*)GFX10_CMASK_SW_PATTERN[patIdxTable[index]]; 383 } 384 385 return ret; 386} 387 388/** 389************************************************************************************************************************ 390* Gfx10Lib::HwlComputeDccInfo 391* 392* @brief 393* Interface function to compute DCC key info 394* 395* @return 396* ADDR_E_RETURNCODE 397************************************************************************************************************************ 398*/ 399ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo( 400 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure 401 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure 402 ) const 403{ 404 ADDR_E_RETURNCODE ret = ADDR_OK; 405 406 if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode)) 407 { 408 // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only 409 // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface. 410 ret = ADDR_INVALIDPARAMS; 411 } 412 else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode)) 413 { 414 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1 415 ret = ADDR_INVALIDPARAMS; 416 } 417 else 418 { 419 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); 420 421 { 422 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB 423 ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode)); 424 425 const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode); 426 427 pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w; 428 pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h; 429 pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1; 430 } 431 432 if (ret == ADDR_OK) 433 { 434 Dim3d metaBlk = {}; 435 const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u)); 436 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor, 437 pIn->resourceType, 438 pIn->swizzleMode, 439 elemLog2, 440 numFragLog2, 441 pIn->dccKeyFlags.pipeAligned, 442 &metaBlk); 443 444 pOut->dccRamBaseAlign = metaBlkSize; 445 pOut->metaBlkWidth = metaBlk.w; 446 pOut->metaBlkHeight = metaBlk.h; 447 pOut->metaBlkDepth = metaBlk.d; 448 pOut->metaBlkSize = metaBlkSize; 449 450 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w); 451 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h); 452 pOut->depth = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d); 453 454 if (pIn->numMipLevels > 1) 455 { 456 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels); 457 458 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize; 459 460 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--) 461 { 462 UINT_32 mipWidth, mipHeight; 463 464 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight); 465 466 mipWidth = PowTwoAlign(mipWidth, metaBlk.w); 467 mipHeight = PowTwoAlign(mipHeight, metaBlk.h); 468 469 const UINT_32 pitchInM = mipWidth / metaBlk.w; 470 const UINT_32 heightInM = mipHeight / metaBlk.h; 471 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize; 472 473 if (pOut->pMipInfo != NULL) 474 { 475 pOut->pMipInfo[i].inMiptail = FALSE; 476 pOut->pMipInfo[i].offset = offset; 477 pOut->pMipInfo[i].sliceSize = mipSliceSize; 478 } 479 480 offset += mipSliceSize; 481 } 482 483 pOut->dccRamSliceSize = offset; 484 pOut->metaBlkNumPerSlice = offset / metaBlkSize; 485 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d); 486 487 if (pOut->pMipInfo != NULL) 488 { 489 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++) 490 { 491 pOut->pMipInfo[i].inMiptail = TRUE; 492 pOut->pMipInfo[i].offset = 0; 493 pOut->pMipInfo[i].sliceSize = 0; 494 } 495 496 if (pIn->firstMipIdInTail != pIn->numMipLevels) 497 { 498 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize; 499 } 500 } 501 } 502 else 503 { 504 const UINT_32 pitchInM = pOut->pitch / metaBlk.w; 505 const UINT_32 heightInM = pOut->height / metaBlk.h; 506 507 pOut->metaBlkNumPerSlice = pitchInM * heightInM; 508 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize; 509 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d); 510 511 if (pOut->pMipInfo != NULL) 512 { 513 pOut->pMipInfo[0].inMiptail = FALSE; 514 pOut->pMipInfo[0].offset = 0; 515 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize; 516 } 517 } 518 519 // Get the DCC address equation (copied from DccAddrFromCoord) 520 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); 521 const UINT_32 numPipeLog2 = m_pipesLog2; 522 UINT_32 index = m_dccBaseIndex + elemLog2; 523 const UINT_8* patIdxTable; 524 525 if (m_settings.supportRbPlus) 526 { 527 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX; 528 529 if (pIn->dccKeyFlags.pipeAligned) 530 { 531 index += MaxNumOfBpp; 532 533 if (m_numPkrLog2 < 2) 534 { 535 index += m_pipesLog2 * MaxNumOfBpp; 536 } 537 else 538 { 539 // 4 groups for "m_numPkrLog2 < 2" case 540 index += 4 * MaxNumOfBpp; 541 542 const UINT_32 dccPipePerPkr = 3; 543 544 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp + 545 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp; 546 } 547 } 548 } 549 else 550 { 551 patIdxTable = GFX10_DCC_64K_R_X_PATIDX; 552 553 if (pIn->dccKeyFlags.pipeAligned) 554 { 555 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp; 556 } 557 else 558 { 559 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp; 560 } 561 } 562 563 ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2); 564 pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]; 565 } 566 } 567 568 return ret; 569} 570 571/** 572************************************************************************************************************************ 573* Gfx10Lib::HwlComputeCmaskAddrFromCoord 574* 575* @brief 576* Interface function stub of AddrComputeCmaskAddrFromCoord 577* 578* @return 579* ADDR_E_RETURNCODE 580************************************************************************************************************************ 581*/ 582ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord( 583 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 584 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure 585{ 586 // Only support pipe aligned CMask 587 ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE); 588 589 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {}; 590 input.size = sizeof(input); 591 input.cMaskFlags = pIn->cMaskFlags; 592 input.colorFlags = pIn->colorFlags; 593 input.unalignedWidth = Max(pIn->unalignedWidth, 1u); 594 input.unalignedHeight = Max(pIn->unalignedHeight, 1u); 595 input.numSlices = Max(pIn->numSlices, 1u); 596 input.swizzleMode = pIn->swizzleMode; 597 input.resourceType = pIn->resourceType; 598 599 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {}; 600 output.size = sizeof(output); 601 602 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output); 603 604 if (returnCode == ADDR_OK) 605 { 606 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags); 607 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3); 608 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1; 609 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2; 610 const UINT_8* patIdxTable = 611 (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX : 612 (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX); 613 614 615 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7; 616 const UINT_32 blkMask = (1 << blkSizeLog2) - 1; 617 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]], 618 blkSizeLog2 + 1, // +1 for nibble offset 619 pIn->x, 620 pIn->y, 621 pIn->slice, 622 0); 623 const UINT_32 xb = pIn->x / output.metaBlkWidth; 624 const UINT_32 yb = pIn->y / output.metaBlkHeight; 625 const UINT_32 pb = output.pitch / output.metaBlkWidth; 626 const UINT_32 blkIndex = (yb * pb) + xb; 627 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask; 628 629 pOut->addr = (output.sliceSize * pIn->slice) + 630 (blkIndex * (1 << blkSizeLog2)) + 631 ((blkOffset >> 1) ^ pipeXor); 632 pOut->bitPosition = (blkOffset & 1) << 2; 633 } 634 635 return returnCode; 636} 637 638/** 639************************************************************************************************************************ 640* Gfx10Lib::HwlComputeHtileAddrFromCoord 641* 642* @brief 643* Interface function stub of AddrComputeHtileAddrFromCoord 644* 645* @return 646* ADDR_E_RETURNCODE 647************************************************************************************************************************ 648*/ 649ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord( 650 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 651 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure 652{ 653 ADDR_E_RETURNCODE returnCode = ADDR_OK; 654 655 if (pIn->numMipLevels > 1) 656 { 657 returnCode = ADDR_NOTIMPLEMENTED; 658 } 659 else 660 { 661 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {}; 662 input.size = sizeof(input); 663 input.hTileFlags = pIn->hTileFlags; 664 input.depthFlags = pIn->depthflags; 665 input.swizzleMode = pIn->swizzleMode; 666 input.unalignedWidth = Max(pIn->unalignedWidth, 1u); 667 input.unalignedHeight = Max(pIn->unalignedHeight, 1u); 668 input.numSlices = Max(pIn->numSlices, 1u); 669 input.numMipLevels = 1; 670 671 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {}; 672 output.size = sizeof(output); 673 674 returnCode = ComputeHtileInfo(&input, &output); 675 676 if (returnCode == ADDR_OK) 677 { 678 const UINT_32 numSampleLog2 = Log2(pIn->numSamples); 679 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1; 680 const UINT_32 index = m_xmaskBaseIndex + numSampleLog2; 681 const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX; 682 683 684 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4; 685 const UINT_32 blkMask = (1 << blkSizeLog2) - 1; 686 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]], 687 blkSizeLog2 + 1, // +1 for nibble offset 688 pIn->x, 689 pIn->y, 690 pIn->slice, 691 0); 692 const UINT_32 xb = pIn->x / output.metaBlkWidth; 693 const UINT_32 yb = pIn->y / output.metaBlkHeight; 694 const UINT_32 pb = output.pitch / output.metaBlkWidth; 695 const UINT_32 blkIndex = (yb * pb) + xb; 696 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask; 697 698 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) + 699 (blkIndex * (1 << blkSizeLog2)) + 700 ((blkOffset >> 1) ^ pipeXor); 701 } 702 } 703 704 return returnCode; 705} 706 707/** 708************************************************************************************************************************ 709* Gfx10Lib::HwlComputeHtileCoordFromAddr 710* 711* @brief 712* Interface function stub of AddrComputeHtileCoordFromAddr 713* 714* @return 715* ADDR_E_RETURNCODE 716************************************************************************************************************************ 717*/ 718ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr( 719 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure 720 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure 721{ 722 ADDR_NOT_IMPLEMENTED(); 723 724 return ADDR_OK; 725} 726 727/** 728************************************************************************************************************************ 729* Gfx10Lib::HwlSupportComputeDccAddrFromCoord 730* 731* @brief 732* Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter 733* 734* @return 735* ADDR_E_RETURNCODE 736************************************************************************************************************************ 737*/ 738ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord( 739 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn) 740{ 741 ADDR_E_RETURNCODE returnCode = ADDR_OK; 742 743 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) || 744 (pIn->swizzleMode != ADDR_SW_64KB_R_X) || 745 (pIn->dccKeyFlags.linear == TRUE) || 746 (pIn->numFrags > 1) || 747 (pIn->numMipLevels > 1) || 748 (pIn->mipId > 0)) 749 { 750 returnCode = ADDR_NOTSUPPORTED; 751 } 752 else if ((pIn->pitch == 0) || 753 (pIn->metaBlkWidth == 0) || 754 (pIn->metaBlkHeight == 0) || 755 (pIn->slice > 0 && pIn->dccRamSliceSize == 0)) 756 { 757 returnCode = ADDR_NOTSUPPORTED; 758 } 759 760 return returnCode; 761} 762 763/** 764************************************************************************************************************************ 765* Gfx10Lib::HwlComputeDccAddrFromCoord 766* 767* @brief 768* Interface function stub of AddrComputeDccAddrFromCoord 769* 770* @return 771* N/A 772************************************************************************************************************************ 773*/ 774VOID Gfx10Lib::HwlComputeDccAddrFromCoord( 775 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 776 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure 777{ 778 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); 779 const UINT_32 numPipeLog2 = m_pipesLog2; 780 const UINT_32 pipeMask = (1 << numPipeLog2) - 1; 781 UINT_32 index = m_dccBaseIndex + elemLog2; 782 const UINT_8* patIdxTable; 783 784 if (m_settings.supportRbPlus) 785 { 786 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX; 787 788 if (pIn->dccKeyFlags.pipeAligned) 789 { 790 index += MaxNumOfBpp; 791 792 if (m_numPkrLog2 < 2) 793 { 794 index += m_pipesLog2 * MaxNumOfBpp; 795 } 796 else 797 { 798 // 4 groups for "m_numPkrLog2 < 2" case 799 index += 4 * MaxNumOfBpp; 800 801 const UINT_32 dccPipePerPkr = 3; 802 803 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp + 804 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp; 805 } 806 } 807 } 808 else 809 { 810 patIdxTable = GFX10_DCC_64K_R_X_PATIDX; 811 812 if (pIn->dccKeyFlags.pipeAligned) 813 { 814 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp; 815 } 816 else 817 { 818 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp; 819 } 820 } 821 822 const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8; 823 const UINT_32 blkMask = (1 << blkSizeLog2) - 1; 824 const UINT_32 blkOffset = 825 ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]], 826 blkSizeLog2 + 1, // +1 for nibble offset 827 pIn->x, 828 pIn->y, 829 pIn->slice, 830 0); 831 const UINT_32 xb = pIn->x / pIn->metaBlkWidth; 832 const UINT_32 yb = pIn->y / pIn->metaBlkHeight; 833 const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth; 834 const UINT_32 blkIndex = (yb * pb) + xb; 835 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask; 836 837 pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) + 838 (blkIndex * (1 << blkSizeLog2)) + 839 ((blkOffset >> 1) ^ pipeXor); 840} 841 842/** 843************************************************************************************************************************ 844* Gfx10Lib::HwlInitGlobalParams 845* 846* @brief 847* Initializes global parameters 848* 849* @return 850* TRUE if all settings are valid 851* 852************************************************************************************************************************ 853*/ 854BOOL_32 Gfx10Lib::HwlInitGlobalParams( 855 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input 856{ 857 BOOL_32 valid = TRUE; 858 GB_ADDR_CONFIG_GFX10 gbAddrConfig; 859 860 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig; 861 862 // These values are copied from CModel code 863 switch (gbAddrConfig.bits.NUM_PIPES) 864 { 865 case ADDR_CONFIG_1_PIPE: 866 m_pipes = 1; 867 m_pipesLog2 = 0; 868 break; 869 case ADDR_CONFIG_2_PIPE: 870 m_pipes = 2; 871 m_pipesLog2 = 1; 872 break; 873 case ADDR_CONFIG_4_PIPE: 874 m_pipes = 4; 875 m_pipesLog2 = 2; 876 break; 877 case ADDR_CONFIG_8_PIPE: 878 m_pipes = 8; 879 m_pipesLog2 = 3; 880 break; 881 case ADDR_CONFIG_16_PIPE: 882 m_pipes = 16; 883 m_pipesLog2 = 4; 884 break; 885 case ADDR_CONFIG_32_PIPE: 886 m_pipes = 32; 887 m_pipesLog2 = 5; 888 break; 889 case ADDR_CONFIG_64_PIPE: 890 m_pipes = 64; 891 m_pipesLog2 = 6; 892 break; 893 default: 894 ADDR_ASSERT_ALWAYS(); 895 valid = FALSE; 896 break; 897 } 898 899 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE) 900 { 901 case ADDR_CONFIG_PIPE_INTERLEAVE_256B: 902 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B; 903 m_pipeInterleaveLog2 = 8; 904 break; 905 case ADDR_CONFIG_PIPE_INTERLEAVE_512B: 906 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B; 907 m_pipeInterleaveLog2 = 9; 908 break; 909 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB: 910 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB; 911 m_pipeInterleaveLog2 = 10; 912 break; 913 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB: 914 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB; 915 m_pipeInterleaveLog2 = 11; 916 break; 917 default: 918 ADDR_ASSERT_ALWAYS(); 919 valid = FALSE; 920 break; 921 } 922 923 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and 924 // any larger value requires a post-process (left shift) on the output pipeBankXor bits. 925 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case. 926 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B); 927 928 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS) 929 { 930 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS: 931 m_maxCompFrag = 1; 932 m_maxCompFragLog2 = 0; 933 break; 934 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS: 935 m_maxCompFrag = 2; 936 m_maxCompFragLog2 = 1; 937 break; 938 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS: 939 m_maxCompFrag = 4; 940 m_maxCompFragLog2 = 2; 941 break; 942 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS: 943 m_maxCompFrag = 8; 944 m_maxCompFragLog2 = 3; 945 break; 946 default: 947 ADDR_ASSERT_ALWAYS(); 948 valid = FALSE; 949 break; 950 } 951 952 { 953 // Skip unaligned case 954 m_xmaskBaseIndex += MaxNumOfAA; 955 956 m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA; 957 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp; 958 959 if (m_settings.supportRbPlus) 960 { 961 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS; 962 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0; 963 964 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2)); 965 966 ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) == 967 sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0])); 968 969 if (m_numPkrLog2 >= 2) 970 { 971 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp; 972 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA; 973 } 974 } 975 else 976 { 977 const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) - 978 static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) + 979 1; 980 981 ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA); 982 983 ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == 984 sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0])); 985 } 986 } 987 988 if (m_settings.supportRbPlus) 989 { 990 // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the 991 // corresponding SW_64KB_* mode 992 m_blockVarSizeLog2 = m_pipesLog2 + 14; 993 } 994 995 996 if (valid) 997 { 998 InitEquationTable(); 999 } 1000 1001 return valid; 1002} 1003 1004/** 1005************************************************************************************************************************ 1006* Gfx10Lib::HwlConvertChipFamily 1007* 1008* @brief 1009* Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision 1010* @return 1011* ChipFamily 1012************************************************************************************************************************ 1013*/ 1014ChipFamily Gfx10Lib::HwlConvertChipFamily( 1015 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h 1016 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h 1017{ 1018 ChipFamily family = ADDR_CHIP_FAMILY_NAVI; 1019 1020 m_settings.dccUnsup3DSwDis = 1; 1021 m_settings.dsMipmapHtileFix = 1; 1022 1023 switch (chipFamily) 1024 { 1025 case FAMILY_NV: 1026 if (ASICREV_IS_NAVI10_P(chipRevision)) 1027 { 1028 m_settings.dsMipmapHtileFix = 0; 1029 m_settings.isDcn20 = 1; 1030 } 1031 1032 if (ASICREV_IS_NAVI12_P(chipRevision)) 1033 { 1034 m_settings.isDcn20 = 1; 1035 } 1036 1037 if (ASICREV_IS_NAVI14_M(chipRevision)) 1038 { 1039 m_settings.isDcn20 = 1; 1040 } 1041 1042 if (ASICREV_IS_SIENNA_CICHLID(chipRevision)) 1043 { 1044 m_settings.supportRbPlus = 1; 1045 m_settings.dccUnsup3DSwDis = 0; 1046 } 1047 1048 if (ASICREV_IS_NAVY_FLOUNDER(chipRevision)) 1049 { 1050 m_settings.supportRbPlus = 1; 1051 m_settings.dccUnsup3DSwDis = 0; 1052 } 1053 1054 if (ASICREV_IS_DIMGREY_CAVEFISH(chipRevision)) 1055 { 1056 m_settings.supportRbPlus = 1; 1057 m_settings.dccUnsup3DSwDis = 0; 1058 } 1059 1060 if (ASICREV_IS_BEIGE_GOBY(chipRevision)) 1061 { 1062 m_settings.supportRbPlus = 1; 1063 m_settings.dccUnsup3DSwDis = 0; 1064 } 1065 break; 1066 1067 case FAMILY_VGH: 1068 if (ASICREV_IS_VANGOGH(chipRevision)) 1069 { 1070 m_settings.supportRbPlus = 1; 1071 m_settings.dccUnsup3DSwDis = 0; 1072 } 1073 else 1074 { 1075 ADDR_ASSERT(!"Unknown chip revision"); 1076 } 1077 1078 break; 1079 1080 case FAMILY_YC: 1081 if (ASICREV_IS_YELLOW_CARP(chipRevision)) 1082 { 1083 m_settings.supportRbPlus = 1; 1084 m_settings.dccUnsup3DSwDis = 0; 1085 } 1086 else 1087 { 1088 ADDR_ASSERT(!"Unknown chip revision"); 1089 } 1090 1091 break; 1092 1093 default: 1094 ADDR_ASSERT(!"Unknown chip family"); 1095 break; 1096 } 1097 1098 m_configFlags.use32bppFor422Fmt = TRUE; 1099 1100 return family; 1101} 1102 1103/** 1104************************************************************************************************************************ 1105* Gfx10Lib::GetBlk256SizeLog2 1106* 1107* @brief 1108* Get block 256 size 1109* 1110* @return 1111* N/A 1112************************************************************************************************************************ 1113*/ 1114void Gfx10Lib::GetBlk256SizeLog2( 1115 AddrResourceType resourceType, ///< [in] Resource type 1116 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode 1117 UINT_32 elemLog2, ///< [in] element size log2 1118 UINT_32 numSamplesLog2, ///< [in] number of samples 1119 Dim3d* pBlock ///< [out] block size 1120 ) const 1121{ 1122 if (IsThin(resourceType, swizzleMode)) 1123 { 1124 UINT_32 blockBits = 8 - elemLog2; 1125 1126 if (IsZOrderSwizzle(swizzleMode)) 1127 { 1128 blockBits -= numSamplesLog2; 1129 } 1130 1131 pBlock->w = (blockBits >> 1) + (blockBits & 1); 1132 pBlock->h = (blockBits >> 1); 1133 pBlock->d = 0; 1134 } 1135 else 1136 { 1137 ADDR_ASSERT(IsThick(resourceType, swizzleMode)); 1138 1139 UINT_32 blockBits = 8 - elemLog2; 1140 1141 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0); 1142 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0); 1143 pBlock->h = (blockBits / 3); 1144 } 1145} 1146 1147/** 1148************************************************************************************************************************ 1149* Gfx10Lib::GetCompressedBlockSizeLog2 1150* 1151* @brief 1152* Get compress block size 1153* 1154* @return 1155* N/A 1156************************************************************************************************************************ 1157*/ 1158void Gfx10Lib::GetCompressedBlockSizeLog2( 1159 Gfx10DataType dataType, ///< [in] Data type 1160 AddrResourceType resourceType, ///< [in] Resource type 1161 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode 1162 UINT_32 elemLog2, ///< [in] element size log2 1163 UINT_32 numSamplesLog2, ///< [in] number of samples 1164 Dim3d* pBlock ///< [out] block size 1165 ) const 1166{ 1167 if (dataType == Gfx10DataColor) 1168 { 1169 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock); 1170 } 1171 else 1172 { 1173 ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask)); 1174 pBlock->w = 3; 1175 pBlock->h = 3; 1176 pBlock->d = 0; 1177 } 1178} 1179 1180/** 1181************************************************************************************************************************ 1182* Gfx10Lib::GetMetaOverlapLog2 1183* 1184* @brief 1185* Get meta block overlap 1186* 1187* @return 1188* N/A 1189************************************************************************************************************************ 1190*/ 1191INT_32 Gfx10Lib::GetMetaOverlapLog2( 1192 Gfx10DataType dataType, ///< [in] Data type 1193 AddrResourceType resourceType, ///< [in] Resource type 1194 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode 1195 UINT_32 elemLog2, ///< [in] element size log2 1196 UINT_32 numSamplesLog2 ///< [in] number of samples 1197 ) const 1198{ 1199 Dim3d compBlock; 1200 Dim3d microBlock; 1201 1202 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock); 1203 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, µBlock); 1204 1205 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d; 1206 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d; 1207 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2); 1208 const INT_32 numPipesLog2 = GetEffectiveNumPipes(); 1209 INT_32 overlap = numPipesLog2 - maxSizeLog2; 1210 1211 if ((numPipesLog2 > 1) && m_settings.supportRbPlus) 1212 { 1213 overlap++; 1214 } 1215 1216 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4) 1217 if ((elemLog2 == 4) && (numSamplesLog2 == 3)) 1218 { 1219 overlap--; 1220 } 1221 overlap = Max(overlap, 0); 1222 return overlap; 1223} 1224 1225/** 1226************************************************************************************************************************ 1227* Gfx10Lib::Get3DMetaOverlapLog2 1228* 1229* @brief 1230* Get 3d meta block overlap 1231* 1232* @return 1233* N/A 1234************************************************************************************************************************ 1235*/ 1236INT_32 Gfx10Lib::Get3DMetaOverlapLog2( 1237 AddrResourceType resourceType, ///< [in] Resource type 1238 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode 1239 UINT_32 elemLog2 ///< [in] element size log2 1240 ) const 1241{ 1242 Dim3d microBlock; 1243 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, µBlock); 1244 1245 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w); 1246 1247 if (m_settings.supportRbPlus) 1248 { 1249 overlap++; 1250 } 1251 1252 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE)) 1253 { 1254 overlap = 0; 1255 } 1256 return overlap; 1257} 1258 1259/** 1260************************************************************************************************************************ 1261* Gfx10Lib::GetPipeRotateAmount 1262* 1263* @brief 1264* Get pipe rotate amount 1265* 1266* @return 1267* Pipe rotate amount 1268************************************************************************************************************************ 1269*/ 1270 1271INT_32 Gfx10Lib::GetPipeRotateAmount( 1272 AddrResourceType resourceType, ///< [in] Resource type 1273 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode 1274 ) const 1275{ 1276 INT_32 amount = 0; 1277 1278 if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1)) 1279 { 1280 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ? 1281 1 : m_pipesLog2 - (m_numSaLog2 + 1); 1282 } 1283 1284 return amount; 1285} 1286 1287/** 1288************************************************************************************************************************ 1289* Gfx10Lib::GetMetaBlkSize 1290* 1291* @brief 1292* Get metadata block size 1293* 1294* @return 1295* Meta block size 1296************************************************************************************************************************ 1297*/ 1298UINT_32 Gfx10Lib::GetMetaBlkSize( 1299 Gfx10DataType dataType, ///< [in] Data type 1300 AddrResourceType resourceType, ///< [in] Resource type 1301 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode 1302 UINT_32 elemLog2, ///< [in] element size log2 1303 UINT_32 numSamplesLog2, ///< [in] number of samples 1304 BOOL_32 pipeAlign, ///< [in] pipe align 1305 Dim3d* pBlock ///< [out] block size 1306 ) const 1307{ 1308 INT_32 metablkSizeLog2; 1309 1310 { 1311 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType); 1312 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType); 1313 const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2; 1314 const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ? 1315 numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2); 1316 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode); 1317 INT_32 numPipesLog2 = m_pipesLog2; 1318 1319 if (IsThin(resourceType, swizzleMode)) 1320 { 1321 if ((pipeAlign == FALSE) || 1322 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) || 1323 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE)) 1324 { 1325 if (pipeAlign) 1326 { 1327 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12); 1328 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2); 1329 } 1330 else 1331 { 1332 metablkSizeLog2 = Min(dataBlkSizeLog2, 12); 1333 } 1334 } 1335 else 1336 { 1337 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1)) 1338 { 1339 numPipesLog2++; 1340 } 1341 1342 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode); 1343 1344 if (numPipesLog2 >= 4) 1345 { 1346 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2); 1347 1348 // In 16Bpe 8xaa, we have an extra overlap bit 1349 if ((pipeRotateLog2 > 0) && 1350 (elemLog2 == 4) && 1351 (numSamplesLog2 == 3) && 1352 (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3))) 1353 { 1354 overlapLog2++; 1355 } 1356 1357 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2; 1358 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2); 1359 1360 if (m_settings.supportRbPlus && 1361 IsRtOptSwizzle(swizzleMode) && 1362 (numPipesLog2 == 6) && 1363 (numSamplesLog2 == 3) && 1364 (m_maxCompFragLog2 == 3) && 1365 (metablkSizeLog2 < 15)) 1366 { 1367 metablkSizeLog2 = 15; 1368 } 1369 } 1370 else 1371 { 1372 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12); 1373 } 1374 1375 if (dataType == Gfx10DataDepthStencil) 1376 { 1377 // For htile surfaces, pad meta block size to 2K * num_pipes 1378 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2); 1379 } 1380 1381 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2); 1382 1383 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1)) 1384 { 1385 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1); 1386 1387 metablkSizeLog2 = Max(metablkSizeLog2, tmp); 1388 } 1389 } 1390 1391 const INT_32 metablkBitsLog2 = 1392 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2; 1393 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1)); 1394 pBlock->h = 1 << (metablkBitsLog2 >> 1); 1395 pBlock->d = 1; 1396 } 1397 else 1398 { 1399 ADDR_ASSERT(IsThick(resourceType, swizzleMode)); 1400 1401 if (pipeAlign) 1402 { 1403 if (m_settings.supportRbPlus && 1404 (m_pipesLog2 == m_numSaLog2 + 1) && 1405 (m_pipesLog2 > 1) && 1406 IsRbAligned(resourceType, swizzleMode)) 1407 { 1408 numPipesLog2++; 1409 } 1410 1411 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2); 1412 1413 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2; 1414 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2); 1415 metablkSizeLog2 = Max(metablkSizeLog2, 12); 1416 } 1417 else 1418 { 1419 metablkSizeLog2 = 12; 1420 } 1421 1422 const INT_32 metablkBitsLog2 = 1423 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2; 1424 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0)); 1425 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0)); 1426 pBlock->d = 1 << (metablkBitsLog2 / 3); 1427 } 1428 } 1429 1430 return (1 << static_cast<UINT_32>(metablkSizeLog2)); 1431} 1432 1433/** 1434************************************************************************************************************************ 1435* Gfx10Lib::ConvertSwizzlePatternToEquation 1436* 1437* @brief 1438* Convert swizzle pattern to equation. 1439* 1440* @return 1441* N/A 1442************************************************************************************************************************ 1443*/ 1444VOID Gfx10Lib::ConvertSwizzlePatternToEquation( 1445 UINT_32 elemLog2, ///< [in] element bytes log2 1446 AddrResourceType rsrcType, ///< [in] resource type 1447 AddrSwizzleMode swMode, ///< [in] swizzle mode 1448 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor 1449 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern 1450 const 1451{ 1452 ADDR_BIT_SETTING fullSwizzlePattern[20]; 1453 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern); 1454 1455 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern; 1456 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode); 1457 1458 pEquation->numBits = blockSizeLog2; 1459 pEquation->stackedDepthSlices = FALSE; 1460 1461 for (UINT_32 i = 0; i < elemLog2; i++) 1462 { 1463 pEquation->addr[i].channel = 0; 1464 pEquation->addr[i].valid = 1; 1465 pEquation->addr[i].index = i; 1466 } 1467 1468 if (IsXor(swMode) == FALSE) 1469 { 1470 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++) 1471 { 1472 ADDR_ASSERT(IsPow2(pSwizzle[i].value)); 1473 1474 if (pSwizzle[i].x != 0) 1475 { 1476 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x))); 1477 1478 pEquation->addr[i].channel = 0; 1479 pEquation->addr[i].valid = 1; 1480 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2; 1481 } 1482 else if (pSwizzle[i].y != 0) 1483 { 1484 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y))); 1485 1486 pEquation->addr[i].channel = 1; 1487 pEquation->addr[i].valid = 1; 1488 pEquation->addr[i].index = Log2(pSwizzle[i].y); 1489 } 1490 else 1491 { 1492 ADDR_ASSERT(pSwizzle[i].z != 0); 1493 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z))); 1494 1495 pEquation->addr[i].channel = 2; 1496 pEquation->addr[i].valid = 1; 1497 pEquation->addr[i].index = Log2(pSwizzle[i].z); 1498 } 1499 1500 pEquation->xor1[i].value = 0; 1501 pEquation->xor2[i].value = 0; 1502 } 1503 } 1504 else if (IsThin(rsrcType, swMode)) 1505 { 1506 Dim3d dim; 1507 ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode); 1508 1509 const UINT_32 blkXLog2 = Log2(dim.w); 1510 const UINT_32 blkYLog2 = Log2(dim.h); 1511 const UINT_32 blkXMask = dim.w - 1; 1512 const UINT_32 blkYMask = dim.h - 1; 1513 1514 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {}; 1515 UINT_32 xMask = 0; 1516 UINT_32 yMask = 0; 1517 UINT_32 bMask = (1 << elemLog2) - 1; 1518 1519 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++) 1520 { 1521 if (IsPow2(pSwizzle[i].value)) 1522 { 1523 if (pSwizzle[i].x != 0) 1524 { 1525 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0); 1526 xMask |= pSwizzle[i].x; 1527 1528 const UINT_32 xLog2 = Log2(pSwizzle[i].x); 1529 1530 ADDR_ASSERT(xLog2 < blkXLog2); 1531 1532 pEquation->addr[i].channel = 0; 1533 pEquation->addr[i].valid = 1; 1534 pEquation->addr[i].index = xLog2 + elemLog2; 1535 } 1536 else 1537 { 1538 ADDR_ASSERT(pSwizzle[i].y != 0); 1539 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0); 1540 yMask |= pSwizzle[i].y; 1541 1542 pEquation->addr[i].channel = 1; 1543 pEquation->addr[i].valid = 1; 1544 pEquation->addr[i].index = Log2(pSwizzle[i].y); 1545 1546 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); 1547 } 1548 1549 swizzle[i].value = 0; 1550 bMask |= 1 << i; 1551 } 1552 else 1553 { 1554 if (pSwizzle[i].z != 0) 1555 { 1556 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z))); 1557 1558 pEquation->xor2[i].channel = 2; 1559 pEquation->xor2[i].valid = 1; 1560 pEquation->xor2[i].index = Log2(pSwizzle[i].z); 1561 } 1562 1563 swizzle[i].x = pSwizzle[i].x; 1564 swizzle[i].y = pSwizzle[i].y; 1565 swizzle[i].z = swizzle[i].s = 0; 1566 1567 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE); 1568 1569 const UINT_32 xHi = swizzle[i].x & (~blkXMask); 1570 1571 if (xHi != 0) 1572 { 1573 ADDR_ASSERT(IsPow2(xHi)); 1574 ADDR_ASSERT(pEquation->xor1[i].value == 0); 1575 1576 pEquation->xor1[i].channel = 0; 1577 pEquation->xor1[i].valid = 1; 1578 pEquation->xor1[i].index = Log2(xHi) + elemLog2; 1579 1580 swizzle[i].x &= blkXMask; 1581 } 1582 1583 const UINT_32 yHi = swizzle[i].y & (~blkYMask); 1584 1585 if (yHi != 0) 1586 { 1587 ADDR_ASSERT(IsPow2(yHi)); 1588 1589 if (xHi == 0) 1590 { 1591 ADDR_ASSERT(pEquation->xor1[i].value == 0); 1592 pEquation->xor1[i].channel = 1; 1593 pEquation->xor1[i].valid = 1; 1594 pEquation->xor1[i].index = Log2(yHi); 1595 } 1596 else 1597 { 1598 ADDR_ASSERT(pEquation->xor2[i].value == 0); 1599 pEquation->xor2[i].channel = 1; 1600 pEquation->xor2[i].valid = 1; 1601 pEquation->xor2[i].index = Log2(yHi); 1602 } 1603 1604 swizzle[i].y &= blkYMask; 1605 } 1606 1607 if (swizzle[i].value == 0) 1608 { 1609 bMask |= 1 << i; 1610 } 1611 } 1612 } 1613 1614 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1; 1615 const UINT_32 blockMask = (1 << blockSizeLog2) - 1; 1616 1617 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask); 1618 1619 while (bMask != blockMask) 1620 { 1621 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++) 1622 { 1623 if ((bMask & (1 << i)) == 0) 1624 { 1625 if (IsPow2(swizzle[i].value)) 1626 { 1627 if (swizzle[i].x != 0) 1628 { 1629 ADDR_ASSERT((xMask & swizzle[i].x) == 0); 1630 xMask |= swizzle[i].x; 1631 1632 const UINT_32 xLog2 = Log2(swizzle[i].x); 1633 1634 ADDR_ASSERT(xLog2 < blkXLog2); 1635 1636 pEquation->addr[i].channel = 0; 1637 pEquation->addr[i].valid = 1; 1638 pEquation->addr[i].index = xLog2 + elemLog2; 1639 } 1640 else 1641 { 1642 ADDR_ASSERT(swizzle[i].y != 0); 1643 ADDR_ASSERT((yMask & swizzle[i].y) == 0); 1644 yMask |= swizzle[i].y; 1645 1646 pEquation->addr[i].channel = 1; 1647 pEquation->addr[i].valid = 1; 1648 pEquation->addr[i].index = Log2(swizzle[i].y); 1649 1650 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); 1651 } 1652 1653 swizzle[i].value = 0; 1654 bMask |= 1 << i; 1655 } 1656 else 1657 { 1658 const UINT_32 x = swizzle[i].x & xMask; 1659 const UINT_32 y = swizzle[i].y & yMask; 1660 1661 if (x != 0) 1662 { 1663 ADDR_ASSERT(IsPow2(x)); 1664 1665 if (pEquation->xor1[i].value == 0) 1666 { 1667 pEquation->xor1[i].channel = 0; 1668 pEquation->xor1[i].valid = 1; 1669 pEquation->xor1[i].index = Log2(x) + elemLog2; 1670 } 1671 else 1672 { 1673 ADDR_ASSERT(pEquation->xor2[i].value == 0); 1674 pEquation->xor2[i].channel = 0; 1675 pEquation->xor2[i].valid = 1; 1676 pEquation->xor2[i].index = Log2(x) + elemLog2; 1677 } 1678 } 1679 1680 if (y != 0) 1681 { 1682 ADDR_ASSERT(IsPow2(y)); 1683 1684 if (pEquation->xor1[i].value == 0) 1685 { 1686 pEquation->xor1[i].channel = 1; 1687 pEquation->xor1[i].valid = 1; 1688 pEquation->xor1[i].index = Log2(y); 1689 } 1690 else 1691 { 1692 ADDR_ASSERT(pEquation->xor2[i].value == 0); 1693 pEquation->xor2[i].channel = 1; 1694 pEquation->xor2[i].valid = 1; 1695 pEquation->xor2[i].index = Log2(y); 1696 } 1697 } 1698 1699 swizzle[i].x &= ~x; 1700 swizzle[i].y &= ~y; 1701 } 1702 } 1703 } 1704 } 1705 1706 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask)); 1707 } 1708 else 1709 { 1710 const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w; 1711 const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h; 1712 const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d; 1713 const UINT_32 blkXMask = (1 << blkXLog2) - 1; 1714 const UINT_32 blkYMask = (1 << blkYLog2) - 1; 1715 const UINT_32 blkZMask = (1 << blkZLog2) - 1; 1716 1717 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {}; 1718 UINT_32 xMask = 0; 1719 UINT_32 yMask = 0; 1720 UINT_32 zMask = 0; 1721 UINT_32 bMask = (1 << elemLog2) - 1; 1722 1723 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++) 1724 { 1725 if (IsPow2(pSwizzle[i].value)) 1726 { 1727 if (pSwizzle[i].x != 0) 1728 { 1729 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0); 1730 xMask |= pSwizzle[i].x; 1731 1732 const UINT_32 xLog2 = Log2(pSwizzle[i].x); 1733 1734 ADDR_ASSERT(xLog2 < blkXLog2); 1735 1736 pEquation->addr[i].channel = 0; 1737 pEquation->addr[i].valid = 1; 1738 pEquation->addr[i].index = xLog2 + elemLog2; 1739 } 1740 else if (pSwizzle[i].y != 0) 1741 { 1742 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0); 1743 yMask |= pSwizzle[i].y; 1744 1745 pEquation->addr[i].channel = 1; 1746 pEquation->addr[i].valid = 1; 1747 pEquation->addr[i].index = Log2(pSwizzle[i].y); 1748 1749 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); 1750 } 1751 else 1752 { 1753 ADDR_ASSERT(pSwizzle[i].z != 0); 1754 ADDR_ASSERT((zMask & pSwizzle[i].z) == 0); 1755 zMask |= pSwizzle[i].z; 1756 1757 pEquation->addr[i].channel = 2; 1758 pEquation->addr[i].valid = 1; 1759 pEquation->addr[i].index = Log2(pSwizzle[i].z); 1760 1761 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2); 1762 } 1763 1764 swizzle[i].value = 0; 1765 bMask |= 1 << i; 1766 } 1767 else 1768 { 1769 swizzle[i].x = pSwizzle[i].x; 1770 swizzle[i].y = pSwizzle[i].y; 1771 swizzle[i].z = pSwizzle[i].z; 1772 swizzle[i].s = 0; 1773 1774 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE); 1775 1776 const UINT_32 xHi = swizzle[i].x & (~blkXMask); 1777 const UINT_32 yHi = swizzle[i].y & (~blkYMask); 1778 const UINT_32 zHi = swizzle[i].z & (~blkZMask); 1779 1780 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0)); 1781 1782 if (xHi != 0) 1783 { 1784 ADDR_ASSERT(IsPow2(xHi)); 1785 ADDR_ASSERT(pEquation->xor1[i].value == 0); 1786 1787 pEquation->xor1[i].channel = 0; 1788 pEquation->xor1[i].valid = 1; 1789 pEquation->xor1[i].index = Log2(xHi) + elemLog2; 1790 1791 swizzle[i].x &= blkXMask; 1792 } 1793 1794 if (yHi != 0) 1795 { 1796 ADDR_ASSERT(IsPow2(yHi)); 1797 1798 if (pEquation->xor1[i].value == 0) 1799 { 1800 pEquation->xor1[i].channel = 1; 1801 pEquation->xor1[i].valid = 1; 1802 pEquation->xor1[i].index = Log2(yHi); 1803 } 1804 else 1805 { 1806 ADDR_ASSERT(pEquation->xor2[i].value == 0); 1807 pEquation->xor2[i].channel = 1; 1808 pEquation->xor2[i].valid = 1; 1809 pEquation->xor2[i].index = Log2(yHi); 1810 } 1811 1812 swizzle[i].y &= blkYMask; 1813 } 1814 1815 if (zHi != 0) 1816 { 1817 ADDR_ASSERT(IsPow2(zHi)); 1818 1819 if (pEquation->xor1[i].value == 0) 1820 { 1821 pEquation->xor1[i].channel = 2; 1822 pEquation->xor1[i].valid = 1; 1823 pEquation->xor1[i].index = Log2(zHi); 1824 } 1825 else 1826 { 1827 ADDR_ASSERT(pEquation->xor2[i].value == 0); 1828 pEquation->xor2[i].channel = 2; 1829 pEquation->xor2[i].valid = 1; 1830 pEquation->xor2[i].index = Log2(zHi); 1831 } 1832 1833 swizzle[i].z &= blkZMask; 1834 } 1835 1836 if (swizzle[i].value == 0) 1837 { 1838 bMask |= 1 << i; 1839 } 1840 } 1841 } 1842 1843 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1; 1844 const UINT_32 blockMask = (1 << blockSizeLog2) - 1; 1845 1846 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask); 1847 1848 while (bMask != blockMask) 1849 { 1850 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++) 1851 { 1852 if ((bMask & (1 << i)) == 0) 1853 { 1854 if (IsPow2(swizzle[i].value)) 1855 { 1856 if (swizzle[i].x != 0) 1857 { 1858 ADDR_ASSERT((xMask & swizzle[i].x) == 0); 1859 xMask |= swizzle[i].x; 1860 1861 const UINT_32 xLog2 = Log2(swizzle[i].x); 1862 1863 ADDR_ASSERT(xLog2 < blkXLog2); 1864 1865 pEquation->addr[i].channel = 0; 1866 pEquation->addr[i].valid = 1; 1867 pEquation->addr[i].index = xLog2 + elemLog2; 1868 } 1869 else if (swizzle[i].y != 0) 1870 { 1871 ADDR_ASSERT((yMask & swizzle[i].y) == 0); 1872 yMask |= swizzle[i].y; 1873 1874 pEquation->addr[i].channel = 1; 1875 pEquation->addr[i].valid = 1; 1876 pEquation->addr[i].index = Log2(swizzle[i].y); 1877 1878 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); 1879 } 1880 else 1881 { 1882 ADDR_ASSERT(swizzle[i].z != 0); 1883 ADDR_ASSERT((zMask & swizzle[i].z) == 0); 1884 zMask |= swizzle[i].z; 1885 1886 pEquation->addr[i].channel = 2; 1887 pEquation->addr[i].valid = 1; 1888 pEquation->addr[i].index = Log2(swizzle[i].z); 1889 1890 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2); 1891 } 1892 1893 swizzle[i].value = 0; 1894 bMask |= 1 << i; 1895 } 1896 else 1897 { 1898 const UINT_32 x = swizzle[i].x & xMask; 1899 const UINT_32 y = swizzle[i].y & yMask; 1900 const UINT_32 z = swizzle[i].z & zMask; 1901 1902 if (x != 0) 1903 { 1904 ADDR_ASSERT(IsPow2(x)); 1905 1906 if (pEquation->xor1[i].value == 0) 1907 { 1908 pEquation->xor1[i].channel = 0; 1909 pEquation->xor1[i].valid = 1; 1910 pEquation->xor1[i].index = Log2(x) + elemLog2; 1911 } 1912 else 1913 { 1914 ADDR_ASSERT(pEquation->xor2[i].value == 0); 1915 pEquation->xor2[i].channel = 0; 1916 pEquation->xor2[i].valid = 1; 1917 pEquation->xor2[i].index = Log2(x) + elemLog2; 1918 } 1919 } 1920 1921 if (y != 0) 1922 { 1923 ADDR_ASSERT(IsPow2(y)); 1924 1925 if (pEquation->xor1[i].value == 0) 1926 { 1927 pEquation->xor1[i].channel = 1; 1928 pEquation->xor1[i].valid = 1; 1929 pEquation->xor1[i].index = Log2(y); 1930 } 1931 else 1932 { 1933 ADDR_ASSERT(pEquation->xor2[i].value == 0); 1934 pEquation->xor2[i].channel = 1; 1935 pEquation->xor2[i].valid = 1; 1936 pEquation->xor2[i].index = Log2(y); 1937 } 1938 } 1939 1940 if (z != 0) 1941 { 1942 ADDR_ASSERT(IsPow2(z)); 1943 1944 if (pEquation->xor1[i].value == 0) 1945 { 1946 pEquation->xor1[i].channel = 2; 1947 pEquation->xor1[i].valid = 1; 1948 pEquation->xor1[i].index = Log2(z); 1949 } 1950 else 1951 { 1952 ADDR_ASSERT(pEquation->xor2[i].value == 0); 1953 pEquation->xor2[i].channel = 2; 1954 pEquation->xor2[i].valid = 1; 1955 pEquation->xor2[i].index = Log2(z); 1956 } 1957 } 1958 1959 swizzle[i].x &= ~x; 1960 swizzle[i].y &= ~y; 1961 swizzle[i].z &= ~z; 1962 } 1963 } 1964 } 1965 } 1966 1967 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask)); 1968 } 1969} 1970 1971/** 1972************************************************************************************************************************ 1973* Gfx10Lib::InitEquationTable 1974* 1975* @brief 1976* Initialize Equation table. 1977* 1978* @return 1979* N/A 1980************************************************************************************************************************ 1981*/ 1982VOID Gfx10Lib::InitEquationTable() 1983{ 1984 memset(m_equationTable, 0, sizeof(m_equationTable)); 1985 1986 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++) 1987 { 1988 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D); 1989 1990 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++) 1991 { 1992 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx); 1993 1994 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++) 1995 { 1996 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX; 1997 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1); 1998 1999 if (pPatInfo != NULL) 2000 { 2001 ADDR_ASSERT(IsValidSwMode(swMode)); 2002 2003 if (pPatInfo->maxItemCount <= 3) 2004 { 2005 ADDR_EQUATION equation = {}; 2006 2007 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation); 2008 2009 equationIndex = m_numEquations; 2010 ADDR_ASSERT(equationIndex < EquationTableSize); 2011 2012 m_equationTable[equationIndex] = equation; 2013 2014 m_numEquations++; 2015 } 2016 else 2017 { 2018 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case 2019 ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4)); 2020 ADDR_ASSERT(rsrcTypeIdx == 1); 2021 ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X); 2022 ADDR_ASSERT(m_settings.supportRbPlus == 1); 2023 } 2024 } 2025 2026 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex; 2027 } 2028 } 2029 } 2030} 2031 2032/** 2033************************************************************************************************************************ 2034* Gfx10Lib::HwlGetEquationIndex 2035* 2036* @brief 2037* Interface function stub of GetEquationIndex 2038* 2039* @return 2040* ADDR_E_RETURNCODE 2041************************************************************************************************************************ 2042*/ 2043UINT_32 Gfx10Lib::HwlGetEquationIndex( 2044 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure 2045 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure 2046 ) const 2047{ 2048 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX; 2049 2050 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) || 2051 (pIn->resourceType == ADDR_RSRC_TEX_3D)) 2052 { 2053 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1; 2054 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode); 2055 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); 2056 2057 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2]; 2058 } 2059 2060 if (pOut->pMipInfo != NULL) 2061 { 2062 for (UINT_32 i = 0; i < pIn->numMipLevels; i++) 2063 { 2064 pOut->pMipInfo[i].equationIndex = equationIdx; 2065 } 2066 } 2067 2068 return equationIdx; 2069} 2070 2071/** 2072************************************************************************************************************************ 2073* Gfx10Lib::GetValidDisplaySwizzleModes 2074* 2075* @brief 2076* Get valid swizzle modes mask for displayable surface 2077* 2078* @return 2079* Valid swizzle modes mask for displayable surface 2080************************************************************************************************************************ 2081*/ 2082UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes( 2083 UINT_32 bpp 2084 ) const 2085{ 2086 UINT_32 swModeMask = 0; 2087 2088 if (bpp <= 64) 2089 { 2090 if (m_settings.isDcn20) 2091 { 2092 swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask; 2093 } 2094 else 2095 { 2096 swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask; 2097 } 2098 } 2099 2100 return swModeMask; 2101} 2102 2103/** 2104************************************************************************************************************************ 2105* Gfx10Lib::IsValidDisplaySwizzleMode 2106* 2107* @brief 2108* Check if a swizzle mode is supported by display engine 2109* 2110* @return 2111* TRUE is swizzle mode is supported by display engine 2112************************************************************************************************************************ 2113*/ 2114BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode( 2115 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure 2116 ) const 2117{ 2118 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D); 2119 2120 return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE; 2121} 2122 2123/** 2124************************************************************************************************************************ 2125* Gfx10Lib::GetMaxNumMipsInTail 2126* 2127* @brief 2128* Return max number of mips in tails 2129* 2130* @return 2131* Max number of mips in tails 2132************************************************************************************************************************ 2133*/ 2134UINT_32 Gfx10Lib::GetMaxNumMipsInTail( 2135 UINT_32 blockSizeLog2, ///< block size log2 2136 BOOL_32 isThin ///< is thin or thick 2137 ) const 2138{ 2139 UINT_32 effectiveLog2 = blockSizeLog2; 2140 2141 if (isThin == FALSE) 2142 { 2143 effectiveLog2 -= (blockSizeLog2 - 8) / 3; 2144 } 2145 2146 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4); 2147} 2148 2149/** 2150************************************************************************************************************************ 2151* Gfx10Lib::HwlComputePipeBankXor 2152* 2153* @brief 2154* Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address 2155* 2156* @return 2157* PipeBankXor value 2158************************************************************************************************************************ 2159*/ 2160ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor( 2161 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure 2162 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure 2163 ) const 2164{ 2165 if (IsNonPrtXor(pIn->swizzleMode)) 2166 { 2167 const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode)); 2168 2169 // No pipe xor... 2170 const UINT_32 pipeXor = 0; 2171 UINT_32 bankXor = 0; 2172 2173 const UINT_32 XorPatternLen = 8; 2174 static const UINT_32 XorBankRot1b[XorPatternLen] = {0, 1, 0, 1, 0, 1, 0, 1}; 2175 static const UINT_32 XorBankRot2b[XorPatternLen] = {0, 2, 1, 3, 2, 0, 3, 1}; 2176 static const UINT_32 XorBankRot3b[XorPatternLen] = {0, 4, 2, 6, 1, 5, 3, 7}; 2177 static const UINT_32 XorBankRot4b[XorPatternLen] = {0, 8, 4, 12, 2, 10, 6, 14}; 2178 static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b}; 2179 2180 switch (bankBits) 2181 { 2182 case 1: 2183 case 2: 2184 case 3: 2185 case 4: 2186 bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits); 2187 break; 2188 default: 2189 // valid bank bits should be 0~4 2190 ADDR_ASSERT_ALWAYS(); 2191 case 0: 2192 break; 2193 } 2194 2195 pOut->pipeBankXor = bankXor | pipeXor; 2196 } 2197 else 2198 { 2199 pOut->pipeBankXor = 0; 2200 } 2201 2202 return ADDR_OK; 2203} 2204 2205/** 2206************************************************************************************************************************ 2207* Gfx10Lib::HwlComputeSlicePipeBankXor 2208* 2209* @brief 2210* Generate slice PipeBankXor value based on base PipeBankXor value and slice id 2211* 2212* @return 2213* PipeBankXor value 2214************************************************************************************************************************ 2215*/ 2216ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor( 2217 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure 2218 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure 2219 ) const 2220{ 2221 if (IsNonPrtXor(pIn->swizzleMode)) 2222 { 2223 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode); 2224 const UINT_32 pipeBits = GetPipeXorBits(blockBits); 2225 const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits); 2226 2227 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor; 2228 2229 if (pIn->bpe != 0) 2230 { 2231 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode, 2232 pIn->resourceType, 2233 Log2(pIn->bpe >> 3), 2234 1); 2235 2236 if (pPatInfo != NULL) 2237 { 2238 ADDR_BIT_SETTING fullSwizzlePattern[20]; 2239 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern); 2240 2241 const UINT_32 pipeBankXorOffset = 2242 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern), 2243 blockBits, 2244 0, 2245 0, 2246 pIn->slice, 2247 0); 2248 2249 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2; 2250 2251 // Should have no bit set under pipe interleave 2252 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset); 2253 2254 // This assertion firing means old approach doesn't calculate a correct sliceXor value... 2255 ADDR_ASSERT(pipeBankXor == pipeXor); 2256 2257 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor; 2258 } 2259 } 2260 } 2261 else 2262 { 2263 pOut->pipeBankXor = 0; 2264 } 2265 2266 return ADDR_OK; 2267} 2268 2269/** 2270************************************************************************************************************************ 2271* Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern 2272* 2273* @brief 2274* Compute sub resource offset to support swizzle pattern 2275* 2276* @return 2277* Offset 2278************************************************************************************************************************ 2279*/ 2280ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern( 2281 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure 2282 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure 2283 ) const 2284{ 2285 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode)); 2286 2287 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset; 2288 2289 return ADDR_OK; 2290} 2291 2292/** 2293************************************************************************************************************************ 2294* Gfx10Lib::HwlComputeNonBlockCompressedView 2295* 2296* @brief 2297* Compute non-block-compressed view for a given mipmap level/slice. 2298* 2299* @return 2300* ADDR_E_RETURNCODE 2301************************************************************************************************************************ 2302*/ 2303ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView( 2304 const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure 2305 ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure 2306 ) const 2307{ 2308 ADDR_E_RETURNCODE returnCode = ADDR_OK; 2309 2310 if (pIn->resourceType != ADDR_RSRC_TEX_2D) 2311 { 2312 // Only 2D resource can have a NonBC view... 2313 returnCode = ADDR_INVALIDPARAMS; 2314 } 2315 else if ((pIn->format != ADDR_FMT_ASTC_8x8) && 2316 ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7))) 2317 { 2318 // Only support BC1~BC7 or ASTC_8x8 for now... 2319 returnCode = ADDR_NOTSUPPORTED; 2320 } 2321 else 2322 { 2323 UINT_32 bcWidth, bcHeight; 2324 UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight); 2325 2326 ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {}; 2327 infoIn.flags = pIn->flags; 2328 infoIn.swizzleMode = pIn->swizzleMode; 2329 infoIn.resourceType = pIn->resourceType; 2330 infoIn.bpp = bpp; 2331 infoIn.width = PowTwoAlign(pIn->width, bcWidth) / bcWidth; 2332 infoIn.height = PowTwoAlign(pIn->height, bcHeight) / bcHeight; 2333 infoIn.numSlices = pIn->numSlices; 2334 infoIn.numMipLevels = pIn->numMipLevels; 2335 infoIn.numSamples = 1; 2336 infoIn.numFrags = 1; 2337 2338 ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {}; 2339 2340 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {}; 2341 infoOut.pMipInfo = mipInfo; 2342 2343 const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE; 2344 2345 if (tiled) 2346 { 2347 returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut); 2348 } 2349 else 2350 { 2351 returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut); 2352 } 2353 2354 if (returnCode == ADDR_OK) 2355 { 2356 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {}; 2357 subOffIn.swizzleMode = infoIn.swizzleMode; 2358 subOffIn.resourceType = infoIn.resourceType; 2359 subOffIn.slice = pIn->slice; 2360 subOffIn.sliceSize = infoOut.sliceSize; 2361 subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset; 2362 subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset; 2363 2364 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {}; 2365 2366 // For any mipmap level, move nonBc view base address by offset 2367 HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut); 2368 pOut->offset = subOffOut.offset; 2369 2370 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {}; 2371 slicePbXorIn.bpe = infoIn.bpp; 2372 slicePbXorIn.swizzleMode = infoIn.swizzleMode; 2373 slicePbXorIn.resourceType = infoIn.resourceType; 2374 slicePbXorIn.basePipeBankXor = pIn->pipeBankXor; 2375 slicePbXorIn.slice = pIn->slice; 2376 2377 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {}; 2378 2379 // For any mipmap level, nonBc view should use computed pbXor 2380 HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut); 2381 pOut->pipeBankXor = slicePbXorOut.pipeBankXor; 2382 2383 const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE; 2384 const UINT_32 requestMipWidth = PowTwoAlign(Max(pIn->width >> pIn->mipId, 1u), bcWidth) / bcWidth; 2385 const UINT_32 requestMipHeight = PowTwoAlign(Max(pIn->height >> pIn->mipId, 1u), bcHeight) / bcHeight; 2386 2387 if (inTail) 2388 { 2389 // For mipmap level that is in mip tail block, hack a lot of things... 2390 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels 2391 // are fit in tail block: 2392 2393 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain) 2394 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail; 2395 2396 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!) 2397 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u); 2398 2399 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold 2400 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2); 2401 2402 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold 2403 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight); 2404 } 2405 // This check should cover at least mipId == 0 2406 else if (requestMipWidth << pIn->mipId == infoIn.width) 2407 { 2408 // For mipmap level [N] that is not in mip tail block and downgraded without losing element: 2409 // - only one mipmap level and mipId = 0 2410 pOut->mipId = 0; 2411 pOut->numMipLevels = 1; 2412 2413 // (mip0) width = requestMipWidth 2414 pOut->unalignedWidth = requestMipWidth; 2415 2416 // (mip0) height = requestMipHeight 2417 pOut->unalignedHeight = requestMipHeight; 2418 } 2419 else 2420 { 2421 // For mipmap level [N] that is not in mip tail block and downgraded with element losing, 2422 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed, 2423 // because single mip view may have different pitch value than original (multiple) mip view... 2424 // A simple case would be: 2425 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40] 2426 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view 2427 // mip0 width = 0x101/mip1 width = 0x80 2428 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in 2429 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes. 2430 2431 // - 2 levels and mipId = 1 2432 pOut->mipId = 1; 2433 pOut->numMipLevels = 2; 2434 2435 const UINT_32 upperMipWidth = 2436 PowTwoAlign(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth) / bcWidth; 2437 const UINT_32 upperMipHeight = 2438 PowTwoAlign(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight) / bcHeight; 2439 2440 const BOOL_32 needToAvoidInTail = 2441 tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ? 2442 TRUE : FALSE; 2443 2444 const UINT_32 hwMipWidth = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth); 2445 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight); 2446 2447 const BOOL_32 needExtraWidth = 2448 ((upperMipWidth < requestMipWidth * 2) || 2449 ((upperMipWidth == requestMipWidth * 2) && 2450 ((needToAvoidInTail == TRUE) || 2451 (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE; 2452 2453 const BOOL_32 needExtraHeight = 2454 ((upperMipHeight < requestMipHeight * 2) || 2455 ((upperMipHeight == requestMipHeight * 2) && 2456 ((needToAvoidInTail == TRUE) || 2457 (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE; 2458 2459 // (mip0) width = requestLastMipLevelWidth 2460 pOut->unalignedWidth = upperMipWidth + (needExtraWidth ? 1: 0); 2461 2462 // (mip0) height = requestLastMipLevelHeight 2463 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0); 2464 } 2465 2466 // Assert the downgrading from this mip[0] width would still generate correct mip[N] width 2467 ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth); 2468 // Assert the downgrading from this mip[0] height would still generate correct mip[N] height 2469 ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight); 2470 } 2471 } 2472 2473 return returnCode; 2474} 2475 2476/** 2477************************************************************************************************************************ 2478* Gfx10Lib::ValidateNonSwModeParams 2479* 2480* @brief 2481* Validate compute surface info params except swizzle mode 2482* 2483* @return 2484* TRUE if parameters are valid, FALSE otherwise 2485************************************************************************************************************************ 2486*/ 2487BOOL_32 Gfx10Lib::ValidateNonSwModeParams( 2488 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const 2489{ 2490 BOOL_32 valid = TRUE; 2491 2492 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16)) 2493 { 2494 ADDR_ASSERT_ALWAYS(); 2495 valid = FALSE; 2496 } 2497 2498 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE) 2499 { 2500 ADDR_ASSERT_ALWAYS(); 2501 valid = FALSE; 2502 } 2503 2504 const ADDR2_SURFACE_FLAGS flags = pIn->flags; 2505 const AddrResourceType rsrcType = pIn->resourceType; 2506 const BOOL_32 mipmap = (pIn->numMipLevels > 1); 2507 const BOOL_32 msaa = (pIn->numFrags > 1); 2508 const BOOL_32 display = flags.display; 2509 const BOOL_32 tex3d = IsTex3d(rsrcType); 2510 const BOOL_32 tex2d = IsTex2d(rsrcType); 2511 const BOOL_32 tex1d = IsTex1d(rsrcType); 2512 const BOOL_32 stereo = flags.qbStereo; 2513 2514 2515 // Resource type check 2516 if (tex1d) 2517 { 2518 if (msaa || display || stereo) 2519 { 2520 ADDR_ASSERT_ALWAYS(); 2521 valid = FALSE; 2522 } 2523 } 2524 else if (tex2d) 2525 { 2526 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap)) 2527 { 2528 ADDR_ASSERT_ALWAYS(); 2529 valid = FALSE; 2530 } 2531 } 2532 else if (tex3d) 2533 { 2534 if (msaa || display || stereo) 2535 { 2536 ADDR_ASSERT_ALWAYS(); 2537 valid = FALSE; 2538 } 2539 } 2540 else 2541 { 2542 ADDR_ASSERT_ALWAYS(); 2543 valid = FALSE; 2544 } 2545 2546 return valid; 2547} 2548 2549/** 2550************************************************************************************************************************ 2551* Gfx10Lib::ValidateSwModeParams 2552* 2553* @brief 2554* Validate compute surface info related to swizzle mode 2555* 2556* @return 2557* TRUE if parameters are valid, FALSE otherwise 2558************************************************************************************************************************ 2559*/ 2560BOOL_32 Gfx10Lib::ValidateSwModeParams( 2561 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const 2562{ 2563 BOOL_32 valid = TRUE; 2564 2565 if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE) 2566 { 2567 ADDR_ASSERT_ALWAYS(); 2568 valid = FALSE; 2569 } 2570 else if (IsValidSwMode(pIn->swizzleMode) == FALSE) 2571 { 2572 { 2573 ADDR_ASSERT_ALWAYS(); 2574 valid = FALSE; 2575 } 2576 } 2577 2578 const ADDR2_SURFACE_FLAGS flags = pIn->flags; 2579 const AddrResourceType rsrcType = pIn->resourceType; 2580 const AddrSwizzleMode swizzle = pIn->swizzleMode; 2581 const BOOL_32 msaa = (pIn->numFrags > 1); 2582 const BOOL_32 zbuffer = flags.depth || flags.stencil; 2583 const BOOL_32 color = flags.color; 2584 const BOOL_32 display = flags.display; 2585 const BOOL_32 tex3d = IsTex3d(rsrcType); 2586 const BOOL_32 tex2d = IsTex2d(rsrcType); 2587 const BOOL_32 tex1d = IsTex1d(rsrcType); 2588 const BOOL_32 thin3d = flags.view3dAs2dArray; 2589 const BOOL_32 linear = IsLinear(swizzle); 2590 const BOOL_32 blk256B = IsBlock256b(swizzle); 2591 const BOOL_32 blkVar = IsBlockVariable(swizzle); 2592 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle); 2593 const BOOL_32 prt = flags.prt; 2594 const BOOL_32 fmask = flags.fmask; 2595 2596 // Misc check 2597 if ((pIn->numFrags > 1) && 2598 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags))) 2599 { 2600 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples 2601 ADDR_ASSERT_ALWAYS(); 2602 valid = FALSE; 2603 } 2604 2605 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE)) 2606 { 2607 ADDR_ASSERT_ALWAYS(); 2608 valid = FALSE; 2609 } 2610 2611 if ((pIn->bpp == 96) && (linear == FALSE)) 2612 { 2613 ADDR_ASSERT_ALWAYS(); 2614 valid = FALSE; 2615 } 2616 2617 const UINT_32 swizzleMask = 1 << swizzle; 2618 2619 // Resource type check 2620 if (tex1d) 2621 { 2622 if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0) 2623 { 2624 ADDR_ASSERT_ALWAYS(); 2625 valid = FALSE; 2626 } 2627 } 2628 else if (tex2d) 2629 { 2630 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0) 2631 { 2632 { 2633 ADDR_ASSERT_ALWAYS(); 2634 valid = FALSE; 2635 } 2636 } 2637 else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) || 2638 (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0))) 2639 { 2640 ADDR_ASSERT_ALWAYS(); 2641 valid = FALSE; 2642 } 2643 2644 } 2645 else if (tex3d) 2646 { 2647 if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) || 2648 (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) || 2649 (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0))) 2650 { 2651 ADDR_ASSERT_ALWAYS(); 2652 valid = FALSE; 2653 } 2654 } 2655 2656 // Swizzle type check 2657 if (linear) 2658 { 2659 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0)) 2660 { 2661 ADDR_ASSERT_ALWAYS(); 2662 valid = FALSE; 2663 } 2664 } 2665 else if (IsZOrderSwizzle(swizzle)) 2666 { 2667 if ((pIn->bpp > 64) || 2668 (msaa && (color || (pIn->bpp > 32))) || 2669 ElemLib::IsBlockCompressed(pIn->format) || 2670 ElemLib::IsMacroPixelPacked(pIn->format)) 2671 { 2672 ADDR_ASSERT_ALWAYS(); 2673 valid = FALSE; 2674 } 2675 } 2676 else if (IsStandardSwizzle(rsrcType, swizzle)) 2677 { 2678 if (zbuffer || msaa) 2679 { 2680 ADDR_ASSERT_ALWAYS(); 2681 valid = FALSE; 2682 } 2683 } 2684 else if (IsDisplaySwizzle(rsrcType, swizzle)) 2685 { 2686 if (zbuffer || msaa) 2687 { 2688 ADDR_ASSERT_ALWAYS(); 2689 valid = FALSE; 2690 } 2691 } 2692 else if (IsRtOptSwizzle(swizzle)) 2693 { 2694 if (zbuffer) 2695 { 2696 ADDR_ASSERT_ALWAYS(); 2697 valid = FALSE; 2698 } 2699 } 2700 else 2701 { 2702 { 2703 ADDR_ASSERT_ALWAYS(); 2704 valid = FALSE; 2705 } 2706 } 2707 2708 // Block type check 2709 if (blk256B) 2710 { 2711 if (zbuffer || tex3d || msaa) 2712 { 2713 ADDR_ASSERT_ALWAYS(); 2714 valid = FALSE; 2715 } 2716 } 2717 else if (blkVar) 2718 { 2719 if (m_blockVarSizeLog2 == 0) 2720 { 2721 ADDR_ASSERT_ALWAYS(); 2722 valid = FALSE; 2723 } 2724 } 2725 2726 return valid; 2727} 2728 2729/** 2730************************************************************************************************************************ 2731* Gfx10Lib::HwlComputeSurfaceInfoSanityCheck 2732* 2733* @brief 2734* Compute surface info sanity check 2735* 2736* @return 2737* Offset 2738************************************************************************************************************************ 2739*/ 2740ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck( 2741 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure 2742 ) const 2743{ 2744 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS; 2745} 2746 2747/** 2748************************************************************************************************************************ 2749* Gfx10Lib::HwlGetPreferredSurfaceSetting 2750* 2751* @brief 2752* Internal function to get suggested surface information for cliet to use 2753* 2754* @return 2755* ADDR_E_RETURNCODE 2756************************************************************************************************************************ 2757*/ 2758ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( 2759 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure 2760 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure 2761 ) const 2762{ 2763 ADDR_E_RETURNCODE returnCode = ADDR_OK; 2764 2765 if (pIn->flags.fmask) 2766 { 2767 const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE; 2768 const BOOL_32 forbidVarBlockType = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0)); 2769 2770 if (forbid64KbBlockType && forbidVarBlockType) 2771 { 2772 // Invalid combination... 2773 ADDR_ASSERT_ALWAYS(); 2774 returnCode = ADDR_INVALIDPARAMS; 2775 } 2776 else 2777 { 2778 pOut->resourceType = ADDR_RSRC_TEX_2D; 2779 pOut->validBlockSet.value = 0; 2780 pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1; 2781 pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1; 2782 pOut->validSwModeSet.value = 0; 2783 pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1; 2784 pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType ? 0 : 1; 2785 pOut->canXor = TRUE; 2786 pOut->validSwTypeSet.value = AddrSwSetZ; 2787 pOut->clientPreferredSwSet = pOut->validSwTypeSet; 2788 2789 BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE); 2790 2791 if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE)) 2792 { 2793 const UINT_8 maxFmaskSwizzleModeType = 2; 2794 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2); 2795 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1); 2796 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags); 2797 const UINT_32 numSlices = Max(pIn->numSlices, 1u); 2798 const UINT_32 width = Max(pIn->width, 1u); 2799 const UINT_32 height = Max(pIn->height, 1u); 2800 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u); 2801 2802 AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X}; 2803 Dim3d blkDim[maxFmaskSwizzleModeType] = {{}, {}}; 2804 Dim3d padDim[maxFmaskSwizzleModeType] = {{}, {}}; 2805 UINT_64 padSize[maxFmaskSwizzleModeType] = {}; 2806 2807 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++) 2808 { 2809 ComputeBlockDimensionForSurf(&blkDim[i].w, 2810 &blkDim[i].h, 2811 &blkDim[i].d, 2812 fmaskBpp, 2813 1, 2814 pOut->resourceType, 2815 swMode[i]); 2816 2817 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]); 2818 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement); 2819 } 2820 2821 if (BlockTypeWithinMemoryBudget(padSize[0], 2822 padSize[1], 2823 ratioLow, 2824 ratioHi, 2825 pIn->memoryBudget, 2826 GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0]))) 2827 { 2828 use64KbBlockType = FALSE; 2829 } 2830 } 2831 else if (forbidVarBlockType) 2832 { 2833 use64KbBlockType = TRUE; 2834 } 2835 2836 if (use64KbBlockType) 2837 { 2838 pOut->swizzleMode = ADDR_SW_64KB_Z_X; 2839 } 2840 else 2841 { 2842 pOut->swizzleMode = ADDR_SW_VAR_Z_X; 2843 } 2844 } 2845 } 2846 else 2847 { 2848 UINT_32 bpp = pIn->bpp; 2849 UINT_32 width = Max(pIn->width, 1u); 2850 UINT_32 height = Max(pIn->height, 1u); 2851 2852 // Set format to INVALID will skip this conversion 2853 if (pIn->format != ADDR_FMT_INVALID) 2854 { 2855 ElemMode elemMode = ADDR_UNCOMPRESSED; 2856 UINT_32 expandX, expandY; 2857 2858 // Get compression/expansion factors and element mode which indicates compression/expansion 2859 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, 2860 &elemMode, 2861 &expandX, 2862 &expandY); 2863 2864 UINT_32 basePitch = 0; 2865 GetElemLib()->AdjustSurfaceInfo(elemMode, 2866 expandX, 2867 expandY, 2868 &bpp, 2869 &basePitch, 2870 &width, 2871 &height); 2872 } 2873 2874 const UINT_32 numSlices = Max(pIn->numSlices, 1u); 2875 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u); 2876 const UINT_32 numSamples = Max(pIn->numSamples, 1u); 2877 const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags; 2878 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1); 2879 2880 // Pre sanity check on non swizzle mode parameters 2881 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {}; 2882 localIn.flags = pIn->flags; 2883 localIn.resourceType = pIn->resourceType; 2884 localIn.format = pIn->format; 2885 localIn.bpp = bpp; 2886 localIn.width = width; 2887 localIn.height = height; 2888 localIn.numSlices = numSlices; 2889 localIn.numMipLevels = numMipLevels; 2890 localIn.numSamples = numSamples; 2891 localIn.numFrags = numFrags; 2892 2893 if (ValidateNonSwModeParams(&localIn)) 2894 { 2895 // Forbid swizzle mode(s) by client setting 2896 ADDR2_SWMODE_SET allowedSwModeSet = {}; 2897 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask; 2898 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask; 2899 allowedSwModeSet.value |= 2900 pIn->forbiddenBlock.macroThin4KB ? 0 : 2901 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask); 2902 allowedSwModeSet.value |= 2903 pIn->forbiddenBlock.macroThick4KB ? 0 : 2904 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0); 2905 allowedSwModeSet.value |= 2906 pIn->forbiddenBlock.macroThin64KB ? 0 : 2907 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask); 2908 allowedSwModeSet.value |= 2909 pIn->forbiddenBlock.macroThick64KB ? 0 : 2910 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0); 2911 allowedSwModeSet.value |= 2912 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0); 2913 2914 if (pIn->preferredSwSet.value != 0) 2915 { 2916 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask; 2917 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask; 2918 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask; 2919 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask; 2920 } 2921 2922 if (pIn->noXor) 2923 { 2924 allowedSwModeSet.value &= ~Gfx10XorSwModeMask; 2925 } 2926 2927 if (pIn->maxAlign > 0) 2928 { 2929 if (pIn->maxAlign < (1u << m_blockVarSizeLog2)) 2930 { 2931 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask; 2932 } 2933 2934 if (pIn->maxAlign < Size64K) 2935 { 2936 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask; 2937 } 2938 2939 if (pIn->maxAlign < Size4K) 2940 { 2941 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask; 2942 } 2943 2944 if (pIn->maxAlign < Size256) 2945 { 2946 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask; 2947 } 2948 } 2949 2950 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions 2951 switch (pIn->resourceType) 2952 { 2953 case ADDR_RSRC_TEX_1D: 2954 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask; 2955 break; 2956 2957 case ADDR_RSRC_TEX_2D: 2958 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask; 2959 2960 break; 2961 2962 case ADDR_RSRC_TEX_3D: 2963 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask; 2964 2965 if (pIn->flags.view3dAs2dArray) 2966 { 2967 allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask; 2968 } 2969 break; 2970 2971 default: 2972 ADDR_ASSERT_ALWAYS(); 2973 allowedSwModeSet.value = 0; 2974 break; 2975 } 2976 2977 if (ElemLib::IsBlockCompressed(pIn->format) || 2978 ElemLib::IsMacroPixelPacked(pIn->format) || 2979 (bpp > 64) || 2980 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered))) 2981 { 2982 allowedSwModeSet.value &= ~Gfx10ZSwModeMask; 2983 } 2984 2985 if (pIn->format == ADDR_FMT_32_32_32) 2986 { 2987 allowedSwModeSet.value &= Gfx10LinearSwModeMask; 2988 } 2989 2990 if (msaa) 2991 { 2992 allowedSwModeSet.value &= Gfx10MsaaSwModeMask; 2993 } 2994 2995 if (pIn->flags.depth || pIn->flags.stencil) 2996 { 2997 allowedSwModeSet.value &= Gfx10ZSwModeMask; 2998 } 2999 3000 if (pIn->flags.display) 3001 { 3002 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp); 3003 } 3004 3005 if (allowedSwModeSet.value != 0) 3006 { 3007#if DEBUG 3008 // Post sanity check, at least AddrLib should accept the output generated by its own 3009 UINT_32 validateSwModeSet = allowedSwModeSet.value; 3010 3011 for (UINT_32 i = 0; validateSwModeSet != 0; i++) 3012 { 3013 if (validateSwModeSet & 1) 3014 { 3015 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i); 3016 ADDR_ASSERT(ValidateSwModeParams(&localIn)); 3017 } 3018 3019 validateSwModeSet >>= 1; 3020 } 3021#endif 3022 3023 pOut->resourceType = pIn->resourceType; 3024 pOut->validSwModeSet = allowedSwModeSet; 3025 pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE; 3026 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType); 3027 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet); 3028 3029 pOut->clientPreferredSwSet = pIn->preferredSwSet; 3030 3031 if (pOut->clientPreferredSwSet.value == 0) 3032 { 3033 pOut->clientPreferredSwSet.value = AddrSwSetAll; 3034 } 3035 3036 // Apply optional restrictions 3037 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig) 3038 { 3039 if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0) 3040 { 3041 // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from 3042 // the GL2 in VAR mode, so it should be avoided. 3043 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask; 3044 } 3045 else 3046 { 3047 // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil. 3048 // But we have to suffer from low performance because there is no other choice... 3049 ADDR_ASSERT_ALWAYS(); 3050 } 3051 } 3052 3053 if (pIn->flags.needEquation) 3054 { 3055 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3)); 3056 } 3057 3058 if (allowedSwModeSet.value == Gfx10LinearSwModeMask) 3059 { 3060 pOut->swizzleMode = ADDR_SW_LINEAR; 3061 } 3062 else 3063 { 3064 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0); 3065 3066 if ((height > 1) && (computeMinSize == FALSE)) 3067 { 3068 // Always ignore linear swizzle mode if: 3069 // 1. This is a (2D/3D) resource with height > 1 3070 // 2. Client doesn't require computing minimize size 3071 allowedSwModeSet.swLinear = 0; 3072 } 3073 3074 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType); 3075 3076 // Determine block size if there are 2 or more block type candidates 3077 if (IsPow2(allowedBlockSet.value) == FALSE) 3078 { 3079 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {}; 3080 3081 swMode[AddrBlockLinear] = ADDR_SW_LINEAR; 3082 3083 if (m_blockVarSizeLog2 != 0) 3084 { 3085 swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X; 3086 } 3087 3088 if (pOut->resourceType == ADDR_RSRC_TEX_3D) 3089 { 3090 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S; 3091 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X; 3092 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S; 3093 } 3094 else 3095 { 3096 swMode[AddrBlockMicro] = ADDR_SW_256B_S; 3097 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_S; 3098 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S; 3099 } 3100 3101 UINT_64 padSize[AddrBlockMaxTiledType] = {}; 3102 3103 const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2); 3104 const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1); 3105 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u); 3106 UINT_32 minSizeBlk = AddrBlockMicro; 3107 UINT_64 minSize = 0; 3108 3109 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; 3110 3111 for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++) 3112 { 3113 if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i))) 3114 { 3115 localIn.swizzleMode = swMode[i]; 3116 3117 if (localIn.swizzleMode == ADDR_SW_LINEAR) 3118 { 3119 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut); 3120 } 3121 else 3122 { 3123 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut); 3124 } 3125 3126 if (returnCode == ADDR_OK) 3127 { 3128 padSize[i] = localOut.surfSize; 3129 3130 if (minSize == 0) 3131 { 3132 minSize = padSize[i]; 3133 minSizeBlk = i; 3134 } 3135 else 3136 { 3137 if (BlockTypeWithinMemoryBudget( 3138 minSize, 3139 padSize[i], 3140 ratioLow, 3141 ratioHi, 3142 0.0, 3143 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk]))) 3144 { 3145 minSize = padSize[i]; 3146 minSizeBlk = i; 3147 } 3148 } 3149 } 3150 else 3151 { 3152 ADDR_ASSERT_ALWAYS(); 3153 break; 3154 } 3155 } 3156 } 3157 3158 if (pIn->memoryBudget > 1.0) 3159 { 3160 // If minimum size is given by swizzle mode with bigger-block type, then don't ever check 3161 // smaller-block type again in coming loop 3162 switch (minSizeBlk) 3163 { 3164 case AddrBlockThick64KB: 3165 allowedBlockSet.macroThin64KB = 0; 3166 case AddrBlockThinVar: 3167 case AddrBlockThin64KB: 3168 allowedBlockSet.macroThick4KB = 0; 3169 case AddrBlockThick4KB: 3170 allowedBlockSet.macroThin4KB = 0; 3171 case AddrBlockThin4KB: 3172 allowedBlockSet.micro = 0; 3173 case AddrBlockMicro: 3174 allowedBlockSet.linear = 0; 3175 case AddrBlockLinear: 3176 break; 3177 3178 default: 3179 ADDR_ASSERT_ALWAYS(); 3180 break; 3181 } 3182 3183 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) 3184 { 3185 if ((i != minSizeBlk) && 3186 IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i))) 3187 { 3188 if (BlockTypeWithinMemoryBudget( 3189 minSize, 3190 padSize[i], 3191 0, 3192 0, 3193 pIn->memoryBudget, 3194 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE) 3195 { 3196 // Clear the block type if the memory waste is unacceptable 3197 allowedBlockSet.value &= ~(1u << (i - 1)); 3198 } 3199 } 3200 } 3201 3202 // Remove VAR block type if bigger block type is allowed 3203 if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X)) 3204 { 3205 if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB) 3206 { 3207 allowedBlockSet.var = 0; 3208 } 3209 } 3210 3211 // Remove linear block type if 2 or more block types are allowed 3212 if (IsPow2(allowedBlockSet.value) == FALSE) 3213 { 3214 allowedBlockSet.linear = 0; 3215 } 3216 3217 // Select the biggest allowed block type 3218 minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1; 3219 3220 if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType)) 3221 { 3222 minSizeBlk = AddrBlockLinear; 3223 } 3224 } 3225 3226 switch (minSizeBlk) 3227 { 3228 case AddrBlockLinear: 3229 allowedSwModeSet.value &= Gfx10LinearSwModeMask; 3230 break; 3231 3232 case AddrBlockMicro: 3233 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D); 3234 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask; 3235 break; 3236 3237 case AddrBlockThin4KB: 3238 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D); 3239 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask; 3240 break; 3241 3242 case AddrBlockThick4KB: 3243 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D); 3244 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask; 3245 break; 3246 3247 case AddrBlockThin64KB: 3248 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ? 3249 Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask; 3250 break; 3251 3252 case AddrBlockThick64KB: 3253 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D); 3254 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask; 3255 break; 3256 3257 case AddrBlockThinVar: 3258 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask; 3259 break; 3260 3261 default: 3262 ADDR_ASSERT_ALWAYS(); 3263 allowedSwModeSet.value = 0; 3264 break; 3265 } 3266 } 3267 3268 // Block type should be determined. 3269 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value)); 3270 3271 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet); 3272 3273 // Determine swizzle type if there are 2 or more swizzle type candidates 3274 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE)) 3275 { 3276 if (ElemLib::IsBlockCompressed(pIn->format)) 3277 { 3278 if (allowedSwSet.sw_D) 3279 { 3280 allowedSwModeSet.value &= Gfx10DisplaySwModeMask; 3281 } 3282 else if (allowedSwSet.sw_S) 3283 { 3284 allowedSwModeSet.value &= Gfx10StandardSwModeMask; 3285 } 3286 else 3287 { 3288 ADDR_ASSERT(allowedSwSet.sw_R); 3289 allowedSwModeSet.value &= Gfx10RenderSwModeMask; 3290 } 3291 } 3292 else if (ElemLib::IsMacroPixelPacked(pIn->format)) 3293 { 3294 if (allowedSwSet.sw_S) 3295 { 3296 allowedSwModeSet.value &= Gfx10StandardSwModeMask; 3297 } 3298 else if (allowedSwSet.sw_D) 3299 { 3300 allowedSwModeSet.value &= Gfx10DisplaySwModeMask; 3301 } 3302 else 3303 { 3304 ADDR_ASSERT(allowedSwSet.sw_R); 3305 allowedSwModeSet.value &= Gfx10RenderSwModeMask; 3306 } 3307 } 3308 else if (pIn->resourceType == ADDR_RSRC_TEX_3D) 3309 { 3310 if (pIn->flags.color && 3311 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB && 3312 allowedSwSet.sw_D) 3313 { 3314 allowedSwModeSet.value &= Gfx10DisplaySwModeMask; 3315 } 3316 else if (allowedSwSet.sw_S) 3317 { 3318 allowedSwModeSet.value &= Gfx10StandardSwModeMask; 3319 } 3320 else if (allowedSwSet.sw_R) 3321 { 3322 allowedSwModeSet.value &= Gfx10RenderSwModeMask; 3323 } 3324 else 3325 { 3326 ADDR_ASSERT(allowedSwSet.sw_Z); 3327 allowedSwModeSet.value &= Gfx10ZSwModeMask; 3328 } 3329 } 3330 else 3331 { 3332 if (allowedSwSet.sw_R) 3333 { 3334 allowedSwModeSet.value &= Gfx10RenderSwModeMask; 3335 } 3336 else if (allowedSwSet.sw_D) 3337 { 3338 allowedSwModeSet.value &= Gfx10DisplaySwModeMask; 3339 } 3340 else if (allowedSwSet.sw_S) 3341 { 3342 allowedSwModeSet.value &= Gfx10StandardSwModeMask; 3343 } 3344 else 3345 { 3346 ADDR_ASSERT(allowedSwSet.sw_Z); 3347 allowedSwModeSet.value &= Gfx10ZSwModeMask; 3348 } 3349 } 3350 3351 // Swizzle type should be determined. 3352 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value)); 3353 } 3354 3355 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + 3356 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's 3357 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9). 3358 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value)); 3359 } 3360 } 3361 else 3362 { 3363 // Invalid combination... 3364 ADDR_ASSERT_ALWAYS(); 3365 returnCode = ADDR_INVALIDPARAMS; 3366 } 3367 } 3368 else 3369 { 3370 // Invalid combination... 3371 ADDR_ASSERT_ALWAYS(); 3372 returnCode = ADDR_INVALIDPARAMS; 3373 } 3374 } 3375 3376 return returnCode; 3377} 3378 3379/** 3380************************************************************************************************************************ 3381* Gfx10Lib::ComputeStereoInfo 3382* 3383* @brief 3384* Compute height alignment and right eye pipeBankXor for stereo surface 3385* 3386* @return 3387* Error code 3388* 3389************************************************************************************************************************ 3390*/ 3391ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo( 3392 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info 3393 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y 3394 UINT_32* pRightXor ///< Right eye xor 3395 ) const 3396{ 3397 ADDR_E_RETURNCODE ret = ADDR_OK; 3398 3399 *pRightXor = 0; 3400 3401 if (IsNonPrtXor(pIn->swizzleMode)) 3402 { 3403 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode); 3404 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); 3405 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1; 3406 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode); 3407 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2]; 3408 3409 if (eqIndex != ADDR_INVALID_EQUATION_INDEX) 3410 { 3411 UINT_32 yMax = 0; 3412 UINT_32 yPosMask = 0; 3413 3414 // First get "max y bit" 3415 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++) 3416 { 3417 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1); 3418 3419 if ((m_equationTable[eqIndex].addr[i].channel == 1) && 3420 (m_equationTable[eqIndex].addr[i].index > yMax)) 3421 { 3422 yMax = m_equationTable[eqIndex].addr[i].index; 3423 } 3424 3425 if ((m_equationTable[eqIndex].xor1[i].valid == 1) && 3426 (m_equationTable[eqIndex].xor1[i].channel == 1) && 3427 (m_equationTable[eqIndex].xor1[i].index > yMax)) 3428 { 3429 yMax = m_equationTable[eqIndex].xor1[i].index; 3430 } 3431 3432 if ((m_equationTable[eqIndex].xor2[i].valid == 1) && 3433 (m_equationTable[eqIndex].xor2[i].channel == 1) && 3434 (m_equationTable[eqIndex].xor2[i].index > yMax)) 3435 { 3436 yMax = m_equationTable[eqIndex].xor2[i].index; 3437 } 3438 } 3439 3440 // Then loop again for populating a position mask of "max Y bit" 3441 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++) 3442 { 3443 if ((m_equationTable[eqIndex].addr[i].channel == 1) && 3444 (m_equationTable[eqIndex].addr[i].index == yMax)) 3445 { 3446 yPosMask |= 1u << i; 3447 } 3448 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) && 3449 (m_equationTable[eqIndex].xor1[i].channel == 1) && 3450 (m_equationTable[eqIndex].xor1[i].index == yMax)) 3451 { 3452 yPosMask |= 1u << i; 3453 } 3454 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) && 3455 (m_equationTable[eqIndex].xor2[i].channel == 1) && 3456 (m_equationTable[eqIndex].xor2[i].index == yMax)) 3457 { 3458 yPosMask |= 1u << i; 3459 } 3460 } 3461 3462 const UINT_32 additionalAlign = 1 << yMax; 3463 3464 if (additionalAlign >= *pAlignY) 3465 { 3466 *pAlignY = additionalAlign; 3467 3468 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign); 3469 3470 if ((alignedHeight >> yMax) & 1) 3471 { 3472 *pRightXor = yPosMask >> m_pipeInterleaveLog2; 3473 } 3474 } 3475 } 3476 else 3477 { 3478 ret = ADDR_INVALIDPARAMS; 3479 } 3480 } 3481 3482 return ret; 3483} 3484 3485/** 3486************************************************************************************************************************ 3487* Gfx10Lib::HwlComputeSurfaceInfoTiled 3488* 3489* @brief 3490* Internal function to calculate alignment for tiled surface 3491* 3492* @return 3493* ADDR_E_RETURNCODE 3494************************************************************************************************************************ 3495*/ 3496ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled( 3497 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure 3498 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure 3499 ) const 3500{ 3501 ADDR_E_RETURNCODE ret; 3502 3503 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value 3504 pOut->mipChainPitch = 0; 3505 pOut->mipChainHeight = 0; 3506 pOut->mipChainSlice = 0; 3507 pOut->epitchIsHeight = FALSE; 3508 3509 // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary 3510 pOut->mipChainInTail = FALSE; 3511 pOut->firstMipIdInTail = pIn->numMipLevels; 3512 3513 if (IsBlock256b(pIn->swizzleMode)) 3514 { 3515 ret = ComputeSurfaceInfoMicroTiled(pIn, pOut); 3516 } 3517 else 3518 { 3519 ret = ComputeSurfaceInfoMacroTiled(pIn, pOut); 3520 } 3521 3522 return ret; 3523} 3524 3525 3526/** 3527************************************************************************************************************************ 3528* Gfx10Lib::ComputeSurfaceInfoMicroTiled 3529* 3530* @brief 3531* Internal function to calculate alignment for micro tiled surface 3532* 3533* @return 3534* ADDR_E_RETURNCODE 3535************************************************************************************************************************ 3536*/ 3537ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled( 3538 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure 3539 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure 3540 ) const 3541{ 3542 ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth, 3543 &pOut->blockHeight, 3544 &pOut->blockSlices, 3545 pIn->bpp, 3546 pIn->numFrags, 3547 pIn->resourceType, 3548 pIn->swizzleMode); 3549 3550 if (ret == ADDR_OK) 3551 { 3552 const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode); 3553 3554 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth); 3555 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight); 3556 pOut->numSlices = pIn->numSlices; 3557 pOut->baseAlign = blockSize; 3558 3559 if (pIn->numMipLevels > 1) 3560 { 3561 const UINT_32 mip0Width = pIn->width; 3562 const UINT_32 mip0Height = pIn->height; 3563 UINT_64 mipSliceSize = 0; 3564 3565 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--) 3566 { 3567 UINT_32 mipWidth, mipHeight; 3568 3569 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight); 3570 3571 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth); 3572 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight); 3573 3574 if (pOut->pMipInfo != NULL) 3575 { 3576 pOut->pMipInfo[i].pitch = mipActualWidth; 3577 pOut->pMipInfo[i].height = mipActualHeight; 3578 pOut->pMipInfo[i].depth = 1; 3579 pOut->pMipInfo[i].offset = mipSliceSize; 3580 pOut->pMipInfo[i].mipTailOffset = 0; 3581 pOut->pMipInfo[i].macroBlockOffset = mipSliceSize; 3582 } 3583 3584 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3); 3585 } 3586 3587 pOut->sliceSize = mipSliceSize; 3588 pOut->surfSize = mipSliceSize * pOut->numSlices; 3589 } 3590 else 3591 { 3592 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3); 3593 pOut->surfSize = pOut->sliceSize * pOut->numSlices; 3594 3595 if (pOut->pMipInfo != NULL) 3596 { 3597 pOut->pMipInfo[0].pitch = pOut->pitch; 3598 pOut->pMipInfo[0].height = pOut->height; 3599 pOut->pMipInfo[0].depth = 1; 3600 pOut->pMipInfo[0].offset = 0; 3601 pOut->pMipInfo[0].mipTailOffset = 0; 3602 pOut->pMipInfo[0].macroBlockOffset = 0; 3603 } 3604 } 3605 3606 } 3607 3608 return ret; 3609} 3610 3611/** 3612************************************************************************************************************************ 3613* Gfx10Lib::ComputeSurfaceInfoMacroTiled 3614* 3615* @brief 3616* Internal function to calculate alignment for macro tiled surface 3617* 3618* @return 3619* ADDR_E_RETURNCODE 3620************************************************************************************************************************ 3621*/ 3622ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled( 3623 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure 3624 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure 3625 ) const 3626{ 3627 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth, 3628 &pOut->blockHeight, 3629 &pOut->blockSlices, 3630 pIn->bpp, 3631 pIn->numFrags, 3632 pIn->resourceType, 3633 pIn->swizzleMode); 3634 3635 if (returnCode == ADDR_OK) 3636 { 3637 UINT_32 heightAlign = pOut->blockHeight; 3638 3639 if (pIn->flags.qbStereo) 3640 { 3641 UINT_32 rightXor = 0; 3642 3643 returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor); 3644 3645 if (returnCode == ADDR_OK) 3646 { 3647 pOut->pStereoInfo->rightSwizzle = rightXor; 3648 } 3649 } 3650 3651 if (returnCode == ADDR_OK) 3652 { 3653 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode); 3654 const UINT_32 blockSize = 1 << blockSizeLog2; 3655 3656 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth); 3657 pOut->height = PowTwoAlign(pIn->height, heightAlign); 3658 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices); 3659 pOut->baseAlign = blockSize; 3660 3661 if (pIn->numMipLevels > 1) 3662 { 3663 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType, 3664 pIn->swizzleMode, 3665 pOut->blockWidth, 3666 pOut->blockHeight, 3667 pOut->blockSlices); 3668 const UINT_32 mip0Width = pIn->width; 3669 const UINT_32 mip0Height = pIn->height; 3670 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode); 3671 const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices; 3672 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin); 3673 const UINT_32 index = Log2(pIn->bpp >> 3); 3674 UINT_32 firstMipInTail = pIn->numMipLevels; 3675 UINT_64 mipChainSliceSize = 0; 3676 UINT_64 mipSize[MaxMipLevels]; 3677 UINT_64 mipSliceSize[MaxMipLevels]; 3678 3679 Dim3d fixedTailMaxDim = tailMaxDim; 3680 3681 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1)) 3682 { 3683 fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w; 3684 fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h; 3685 } 3686 3687 for (UINT_32 i = 0; i < pIn->numMipLevels; i++) 3688 { 3689 UINT_32 mipWidth, mipHeight, mipDepth; 3690 3691 GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth); 3692 3693 if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i)) 3694 { 3695 firstMipInTail = i; 3696 mipChainSliceSize += blockSize / pOut->blockSlices; 3697 break; 3698 } 3699 else 3700 { 3701 const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth); 3702 const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight); 3703 const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices); 3704 const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3); 3705 3706 mipSize[i] = sliceSize * depth; 3707 mipSliceSize[i] = sliceSize * pOut->blockSlices; 3708 mipChainSliceSize += sliceSize; 3709 3710 if (pOut->pMipInfo != NULL) 3711 { 3712 pOut->pMipInfo[i].pitch = pitch; 3713 pOut->pMipInfo[i].height = height; 3714 pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1; 3715 } 3716 } 3717 } 3718 3719 pOut->sliceSize = mipChainSliceSize; 3720 pOut->surfSize = mipChainSliceSize * pOut->numSlices; 3721 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE; 3722 pOut->firstMipIdInTail = firstMipInTail; 3723 3724 if (pOut->pMipInfo != NULL) 3725 { 3726 UINT_64 offset = 0; 3727 UINT_64 macroBlkOffset = 0; 3728 UINT_32 tailMaxDepth = 0; 3729 3730 if (firstMipInTail != pIn->numMipLevels) 3731 { 3732 UINT_32 mipWidth, mipHeight; 3733 3734 GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail, 3735 &mipWidth, &mipHeight, &tailMaxDepth); 3736 3737 offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices; 3738 macroBlkOffset = blockSize; 3739 } 3740 3741 for (INT_32 i = firstMipInTail - 1; i >= 0; i--) 3742 { 3743 pOut->pMipInfo[i].offset = offset; 3744 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset; 3745 pOut->pMipInfo[i].mipTailOffset = 0; 3746 3747 offset += mipSize[i]; 3748 macroBlkOffset += mipSliceSize[i]; 3749 } 3750 3751 UINT_32 pitch = tailMaxDim.w; 3752 UINT_32 height = tailMaxDim.h; 3753 UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d); 3754 3755 tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d); 3756 3757 for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++) 3758 { 3759 const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail); 3760 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8); 3761 3762 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth; 3763 pOut->pMipInfo[i].mipTailOffset = mipOffset; 3764 pOut->pMipInfo[i].macroBlockOffset = 0; 3765 3766 pOut->pMipInfo[i].pitch = pitch; 3767 pOut->pMipInfo[i].height = height; 3768 pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1; 3769 3770 UINT_32 mipX = ((mipOffset >> 9) & 1) | 3771 ((mipOffset >> 10) & 2) | 3772 ((mipOffset >> 11) & 4) | 3773 ((mipOffset >> 12) & 8) | 3774 ((mipOffset >> 13) & 16) | 3775 ((mipOffset >> 14) & 32); 3776 UINT_32 mipY = ((mipOffset >> 8) & 1) | 3777 ((mipOffset >> 9) & 2) | 3778 ((mipOffset >> 10) & 4) | 3779 ((mipOffset >> 11) & 8) | 3780 ((mipOffset >> 12) & 16) | 3781 ((mipOffset >> 13) & 32); 3782 3783 if (blockSizeLog2 & 1) 3784 { 3785 const UINT_32 temp = mipX; 3786 mipX = mipY; 3787 mipY = temp; 3788 3789 if (index & 1) 3790 { 3791 mipY = (mipY << 1) | (mipX & 1); 3792 mipX = mipX >> 1; 3793 } 3794 } 3795 3796 if (isThin) 3797 { 3798 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w; 3799 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h; 3800 pOut->pMipInfo[i].mipTailCoordZ = 0; 3801 3802 pitch = Max(pitch >> 1, Block256_2d[index].w); 3803 height = Max(height >> 1, Block256_2d[index].h); 3804 } 3805 else 3806 { 3807 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w; 3808 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h; 3809 pOut->pMipInfo[i].mipTailCoordZ = 0; 3810 3811 pitch = Max(pitch >> 1, Block256_3d[index].w); 3812 height = Max(height >> 1, Block256_3d[index].h); 3813 } 3814 } 3815 } 3816 } 3817 else 3818 { 3819 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags; 3820 pOut->surfSize = pOut->sliceSize * pOut->numSlices; 3821 3822 if (pOut->pMipInfo != NULL) 3823 { 3824 pOut->pMipInfo[0].pitch = pOut->pitch; 3825 pOut->pMipInfo[0].height = pOut->height; 3826 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1; 3827 pOut->pMipInfo[0].offset = 0; 3828 pOut->pMipInfo[0].mipTailOffset = 0; 3829 pOut->pMipInfo[0].macroBlockOffset = 0; 3830 pOut->pMipInfo[0].mipTailCoordX = 0; 3831 pOut->pMipInfo[0].mipTailCoordY = 0; 3832 pOut->pMipInfo[0].mipTailCoordZ = 0; 3833 } 3834 } 3835 } 3836 } 3837 3838 return returnCode; 3839} 3840 3841/** 3842************************************************************************************************************************ 3843* Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled 3844* 3845* @brief 3846* Internal function to calculate address from coord for tiled swizzle surface 3847* 3848* @return 3849* ADDR_E_RETURNCODE 3850************************************************************************************************************************ 3851*/ 3852ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled( 3853 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 3854 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure 3855 ) const 3856{ 3857 ADDR_E_RETURNCODE ret; 3858 3859 if (IsBlock256b(pIn->swizzleMode)) 3860 { 3861 ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut); 3862 } 3863 else 3864 { 3865 ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut); 3866 } 3867 3868 return ret; 3869} 3870 3871/** 3872************************************************************************************************************************ 3873* Gfx10Lib::ComputeOffsetFromEquation 3874* 3875* @brief 3876* Compute offset from equation 3877* 3878* @return 3879* Offset 3880************************************************************************************************************************ 3881*/ 3882UINT_32 Gfx10Lib::ComputeOffsetFromEquation( 3883 const ADDR_EQUATION* pEq, ///< Equation 3884 UINT_32 x, ///< x coord in bytes 3885 UINT_32 y, ///< y coord in pixel 3886 UINT_32 z ///< z coord in slice 3887 ) const 3888{ 3889 UINT_32 offset = 0; 3890 3891 for (UINT_32 i = 0; i < pEq->numBits; i++) 3892 { 3893 UINT_32 v = 0; 3894 3895 if (pEq->addr[i].valid) 3896 { 3897 if (pEq->addr[i].channel == 0) 3898 { 3899 v ^= (x >> pEq->addr[i].index) & 1; 3900 } 3901 else if (pEq->addr[i].channel == 1) 3902 { 3903 v ^= (y >> pEq->addr[i].index) & 1; 3904 } 3905 else 3906 { 3907 ADDR_ASSERT(pEq->addr[i].channel == 2); 3908 v ^= (z >> pEq->addr[i].index) & 1; 3909 } 3910 } 3911 3912 if (pEq->xor1[i].valid) 3913 { 3914 if (pEq->xor1[i].channel == 0) 3915 { 3916 v ^= (x >> pEq->xor1[i].index) & 1; 3917 } 3918 else if (pEq->xor1[i].channel == 1) 3919 { 3920 v ^= (y >> pEq->xor1[i].index) & 1; 3921 } 3922 else 3923 { 3924 ADDR_ASSERT(pEq->xor1[i].channel == 2); 3925 v ^= (z >> pEq->xor1[i].index) & 1; 3926 } 3927 } 3928 3929 if (pEq->xor2[i].valid) 3930 { 3931 if (pEq->xor2[i].channel == 0) 3932 { 3933 v ^= (x >> pEq->xor2[i].index) & 1; 3934 } 3935 else if (pEq->xor2[i].channel == 1) 3936 { 3937 v ^= (y >> pEq->xor2[i].index) & 1; 3938 } 3939 else 3940 { 3941 ADDR_ASSERT(pEq->xor2[i].channel == 2); 3942 v ^= (z >> pEq->xor2[i].index) & 1; 3943 } 3944 } 3945 3946 offset |= (v << i); 3947 } 3948 3949 return offset; 3950} 3951 3952/** 3953************************************************************************************************************************ 3954* Gfx10Lib::ComputeOffsetFromSwizzlePattern 3955* 3956* @brief 3957* Compute offset from swizzle pattern 3958* 3959* @return 3960* Offset 3961************************************************************************************************************************ 3962*/ 3963UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern( 3964 const UINT_64* pPattern, ///< Swizzle pattern 3965 UINT_32 numBits, ///< Number of bits in pattern 3966 UINT_32 x, ///< x coord in pixel 3967 UINT_32 y, ///< y coord in pixel 3968 UINT_32 z, ///< z coord in slice 3969 UINT_32 s ///< sample id 3970 ) const 3971{ 3972 UINT_32 offset = 0; 3973 const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern); 3974 3975 for (UINT_32 i = 0; i < numBits; i++) 3976 { 3977 UINT_32 v = 0; 3978 3979 if (pSwizzlePattern[i].x != 0) 3980 { 3981 UINT_16 mask = pSwizzlePattern[i].x; 3982 UINT_32 xBits = x; 3983 3984 while (mask != 0) 3985 { 3986 if (mask & 1) 3987 { 3988 v ^= xBits & 1; 3989 } 3990 3991 xBits >>= 1; 3992 mask >>= 1; 3993 } 3994 } 3995 3996 if (pSwizzlePattern[i].y != 0) 3997 { 3998 UINT_16 mask = pSwizzlePattern[i].y; 3999 UINT_32 yBits = y; 4000 4001 while (mask != 0) 4002 { 4003 if (mask & 1) 4004 { 4005 v ^= yBits & 1; 4006 } 4007 4008 yBits >>= 1; 4009 mask >>= 1; 4010 } 4011 } 4012 4013 if (pSwizzlePattern[i].z != 0) 4014 { 4015 UINT_16 mask = pSwizzlePattern[i].z; 4016 UINT_32 zBits = z; 4017 4018 while (mask != 0) 4019 { 4020 if (mask & 1) 4021 { 4022 v ^= zBits & 1; 4023 } 4024 4025 zBits >>= 1; 4026 mask >>= 1; 4027 } 4028 } 4029 4030 if (pSwizzlePattern[i].s != 0) 4031 { 4032 UINT_16 mask = pSwizzlePattern[i].s; 4033 UINT_32 sBits = s; 4034 4035 while (mask != 0) 4036 { 4037 if (mask & 1) 4038 { 4039 v ^= sBits & 1; 4040 } 4041 4042 sBits >>= 1; 4043 mask >>= 1; 4044 } 4045 } 4046 4047 offset |= (v << i); 4048 } 4049 4050 return offset; 4051} 4052 4053/** 4054************************************************************************************************************************ 4055* Gfx10Lib::GetSwizzlePatternInfo 4056* 4057* @brief 4058* Get swizzle pattern 4059* 4060* @return 4061* Swizzle pattern information 4062************************************************************************************************************************ 4063*/ 4064const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( 4065 AddrSwizzleMode swizzleMode, ///< Swizzle mode 4066 AddrResourceType resourceType, ///< Resource type 4067 UINT_32 elemLog2, ///< Element size in bytes log2 4068 UINT_32 numFrag ///< Number of fragment 4069 ) const 4070{ 4071 const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2; 4072 const ADDR_SW_PATINFO* patInfo = NULL; 4073 const UINT_32 swizzleMask = 1 << swizzleMode; 4074 4075 if (IsBlockVariable(swizzleMode)) 4076 { 4077 if (m_blockVarSizeLog2 != 0) 4078 { 4079 ADDR_ASSERT(m_settings.supportRbPlus); 4080 4081 if (IsRtOptSwizzle(swizzleMode)) 4082 { 4083 if (numFrag == 1) 4084 { 4085 patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO; 4086 } 4087 else if (numFrag == 2) 4088 { 4089 patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO; 4090 } 4091 else if (numFrag == 4) 4092 { 4093 patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO; 4094 } 4095 else 4096 { 4097 ADDR_ASSERT(numFrag == 8); 4098 patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO; 4099 } 4100 } 4101 else if (IsZOrderSwizzle(swizzleMode)) 4102 { 4103 if (numFrag == 1) 4104 { 4105 patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO; 4106 } 4107 else if (numFrag == 2) 4108 { 4109 patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO; 4110 } 4111 else if (numFrag == 4) 4112 { 4113 patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO; 4114 } 4115 else 4116 { 4117 ADDR_ASSERT(numFrag == 8); 4118 patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO; 4119 } 4120 } 4121 } 4122 } 4123 else if (IsLinear(swizzleMode) == FALSE) 4124 { 4125 if (resourceType == ADDR_RSRC_TEX_3D) 4126 { 4127 ADDR_ASSERT(numFrag == 1); 4128 4129 if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0) 4130 { 4131 if (IsRtOptSwizzle(swizzleMode)) 4132 { 4133 patInfo = m_settings.supportRbPlus ? 4134 GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO; 4135 } 4136 else if (IsZOrderSwizzle(swizzleMode)) 4137 { 4138 patInfo = m_settings.supportRbPlus ? 4139 GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO; 4140 } 4141 else if (IsDisplaySwizzle(resourceType, swizzleMode)) 4142 { 4143 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X); 4144 patInfo = m_settings.supportRbPlus ? 4145 GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO; 4146 } 4147 else 4148 { 4149 ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode)); 4150 4151 if (IsBlock4kb(swizzleMode)) 4152 { 4153 if (swizzleMode == ADDR_SW_4KB_S) 4154 { 4155 patInfo = m_settings.supportRbPlus ? 4156 GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO; 4157 } 4158 else 4159 { 4160 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X); 4161 patInfo = m_settings.supportRbPlus ? 4162 GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO; 4163 } 4164 } 4165 else 4166 { 4167 if (swizzleMode == ADDR_SW_64KB_S) 4168 { 4169 patInfo = m_settings.supportRbPlus ? 4170 GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO; 4171 } 4172 else if (swizzleMode == ADDR_SW_64KB_S_X) 4173 { 4174 patInfo = m_settings.supportRbPlus ? 4175 GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO; 4176 } 4177 else 4178 { 4179 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T); 4180 patInfo = m_settings.supportRbPlus ? 4181 GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO; 4182 } 4183 } 4184 } 4185 } 4186 } 4187 else 4188 { 4189 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0) 4190 { 4191 if (IsBlock256b(swizzleMode)) 4192 { 4193 if (swizzleMode == ADDR_SW_256B_S) 4194 { 4195 patInfo = m_settings.supportRbPlus ? 4196 GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO; 4197 } 4198 else 4199 { 4200 ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D); 4201 patInfo = m_settings.supportRbPlus ? 4202 GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO; 4203 } 4204 } 4205 else if (IsBlock4kb(swizzleMode)) 4206 { 4207 if (IsStandardSwizzle(resourceType, swizzleMode)) 4208 { 4209 if (swizzleMode == ADDR_SW_4KB_S) 4210 { 4211 patInfo = m_settings.supportRbPlus ? 4212 GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO; 4213 } 4214 else 4215 { 4216 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X); 4217 patInfo = m_settings.supportRbPlus ? 4218 GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO; 4219 } 4220 } 4221 else 4222 { 4223 if (swizzleMode == ADDR_SW_4KB_D) 4224 { 4225 patInfo = m_settings.supportRbPlus ? 4226 GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO; 4227 } 4228 else 4229 { 4230 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X); 4231 patInfo = m_settings.supportRbPlus ? 4232 GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO; 4233 } 4234 } 4235 } 4236 else 4237 { 4238 if (IsRtOptSwizzle(swizzleMode)) 4239 { 4240 if (numFrag == 1) 4241 { 4242 patInfo = m_settings.supportRbPlus ? 4243 GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO; 4244 } 4245 else if (numFrag == 2) 4246 { 4247 patInfo = m_settings.supportRbPlus ? 4248 GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO; 4249 } 4250 else if (numFrag == 4) 4251 { 4252 patInfo = m_settings.supportRbPlus ? 4253 GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO; 4254 } 4255 else 4256 { 4257 ADDR_ASSERT(numFrag == 8); 4258 patInfo = m_settings.supportRbPlus ? 4259 GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO; 4260 } 4261 } 4262 else if (IsZOrderSwizzle(swizzleMode)) 4263 { 4264 if (numFrag == 1) 4265 { 4266 patInfo = m_settings.supportRbPlus ? 4267 GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO; 4268 } 4269 else if (numFrag == 2) 4270 { 4271 patInfo = m_settings.supportRbPlus ? 4272 GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO; 4273 } 4274 else if (numFrag == 4) 4275 { 4276 patInfo = m_settings.supportRbPlus ? 4277 GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO; 4278 } 4279 else 4280 { 4281 ADDR_ASSERT(numFrag == 8); 4282 patInfo = m_settings.supportRbPlus ? 4283 GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO; 4284 } 4285 } 4286 else if (IsDisplaySwizzle(resourceType, swizzleMode)) 4287 { 4288 if (swizzleMode == ADDR_SW_64KB_D) 4289 { 4290 patInfo = m_settings.supportRbPlus ? 4291 GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO; 4292 } 4293 else if (swizzleMode == ADDR_SW_64KB_D_X) 4294 { 4295 patInfo = m_settings.supportRbPlus ? 4296 GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO; 4297 } 4298 else 4299 { 4300 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T); 4301 patInfo = m_settings.supportRbPlus ? 4302 GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO; 4303 } 4304 } 4305 else 4306 { 4307 if (swizzleMode == ADDR_SW_64KB_S) 4308 { 4309 patInfo = m_settings.supportRbPlus ? 4310 GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO; 4311 } 4312 else if (swizzleMode == ADDR_SW_64KB_S_X) 4313 { 4314 patInfo = m_settings.supportRbPlus ? 4315 GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO; 4316 } 4317 else 4318 { 4319 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T); 4320 patInfo = m_settings.supportRbPlus ? 4321 GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO; 4322 } 4323 } 4324 } 4325 } 4326 } 4327 } 4328 4329 return (patInfo != NULL) ? &patInfo[index] : NULL; 4330} 4331 4332 4333/** 4334************************************************************************************************************************ 4335* Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled 4336* 4337* @brief 4338* Internal function to calculate address from coord for micro tiled swizzle surface 4339* 4340* @return 4341* ADDR_E_RETURNCODE 4342************************************************************************************************************************ 4343*/ 4344ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled( 4345 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 4346 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure 4347 ) const 4348{ 4349 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {}; 4350 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; 4351 ADDR2_MIP_INFO mipInfo[MaxMipLevels]; 4352 4353 localIn.swizzleMode = pIn->swizzleMode; 4354 localIn.flags = pIn->flags; 4355 localIn.resourceType = pIn->resourceType; 4356 localIn.bpp = pIn->bpp; 4357 localIn.width = Max(pIn->unalignedWidth, 1u); 4358 localIn.height = Max(pIn->unalignedHeight, 1u); 4359 localIn.numSlices = Max(pIn->numSlices, 1u); 4360 localIn.numMipLevels = Max(pIn->numMipLevels, 1u); 4361 localIn.numSamples = Max(pIn->numSamples, 1u); 4362 localIn.numFrags = Max(pIn->numFrags, 1u); 4363 localOut.pMipInfo = mipInfo; 4364 4365 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut); 4366 4367 if (ret == ADDR_OK) 4368 { 4369 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); 4370 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1; 4371 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode); 4372 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2]; 4373 4374 if (eqIndex != ADDR_INVALID_EQUATION_INDEX) 4375 { 4376 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth; 4377 const UINT_32 yb = pIn->y / localOut.blockHeight; 4378 const UINT_32 xb = pIn->x / localOut.blockWidth; 4379 const UINT_32 blockIndex = yb * pb + xb; 4380 const UINT_32 blockSize = 256; 4381 const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex], 4382 pIn->x << elemLog2, 4383 pIn->y, 4384 0); 4385 pOut->addr = localOut.sliceSize * pIn->slice + 4386 mipInfo[pIn->mipId].macroBlockOffset + 4387 (blockIndex * blockSize) + 4388 blk256Offset; 4389 } 4390 else 4391 { 4392 ret = ADDR_INVALIDPARAMS; 4393 } 4394 } 4395 4396 return ret; 4397} 4398 4399/** 4400************************************************************************************************************************ 4401* Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled 4402* 4403* @brief 4404* Internal function to calculate address from coord for macro tiled swizzle surface 4405* 4406* @return 4407* ADDR_E_RETURNCODE 4408************************************************************************************************************************ 4409*/ 4410ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled( 4411 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 4412 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure 4413 ) const 4414{ 4415 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {}; 4416 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; 4417 ADDR2_MIP_INFO mipInfo[MaxMipLevels]; 4418 4419 localIn.swizzleMode = pIn->swizzleMode; 4420 localIn.flags = pIn->flags; 4421 localIn.resourceType = pIn->resourceType; 4422 localIn.bpp = pIn->bpp; 4423 localIn.width = Max(pIn->unalignedWidth, 1u); 4424 localIn.height = Max(pIn->unalignedHeight, 1u); 4425 localIn.numSlices = Max(pIn->numSlices, 1u); 4426 localIn.numMipLevels = Max(pIn->numMipLevels, 1u); 4427 localIn.numSamples = Max(pIn->numSamples, 1u); 4428 localIn.numFrags = Max(pIn->numFrags, 1u); 4429 localOut.pMipInfo = mipInfo; 4430 4431 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut); 4432 4433 if (ret == ADDR_OK) 4434 { 4435 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); 4436 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode); 4437 const UINT_32 blkMask = (1 << blkSizeLog2) - 1; 4438 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1; 4439 const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits); 4440 const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ? 4441 (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0; 4442 4443 if (localIn.numFrags > 1) 4444 { 4445 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode, 4446 pIn->resourceType, 4447 elemLog2, 4448 localIn.numFrags); 4449 4450 if (pPatInfo != NULL) 4451 { 4452 const UINT_32 pb = localOut.pitch / localOut.blockWidth; 4453 const UINT_32 yb = pIn->y / localOut.blockHeight; 4454 const UINT_32 xb = pIn->x / localOut.blockWidth; 4455 const UINT_64 blkIdx = yb * pb + xb; 4456 4457 ADDR_BIT_SETTING fullSwizzlePattern[20]; 4458 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern); 4459 4460 const UINT_32 blkOffset = 4461 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern), 4462 blkSizeLog2, 4463 pIn->x, 4464 pIn->y, 4465 pIn->slice, 4466 pIn->sample); 4467 4468 pOut->addr = (localOut.sliceSize * pIn->slice) + 4469 (blkIdx << blkSizeLog2) + 4470 (blkOffset ^ pipeBankXor); 4471 } 4472 else 4473 { 4474 ret = ADDR_INVALIDPARAMS; 4475 } 4476 } 4477 else 4478 { 4479 const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0; 4480 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode); 4481 const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2]; 4482 4483 if (eqIndex != ADDR_INVALID_EQUATION_INDEX) 4484 { 4485 const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE; 4486 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode); 4487 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices); 4488 const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices); 4489 const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x; 4490 const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y; 4491 const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice; 4492 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth; 4493 const UINT_32 yb = pIn->y / localOut.blockHeight; 4494 const UINT_32 xb = pIn->x / localOut.blockWidth; 4495 const UINT_64 blkIdx = yb * pb + xb; 4496 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex], 4497 x << elemLog2, 4498 y, 4499 z); 4500 pOut->addr = sliceSize * sliceId + 4501 mipInfo[pIn->mipId].macroBlockOffset + 4502 (blkIdx << blkSizeLog2) + 4503 (blkOffset ^ pipeBankXor); 4504 } 4505 else 4506 { 4507 ret = ADDR_INVALIDPARAMS; 4508 } 4509 } 4510 } 4511 4512 return ret; 4513} 4514 4515/** 4516************************************************************************************************************************ 4517* Gfx10Lib::HwlComputeMaxBaseAlignments 4518* 4519* @brief 4520* Gets maximum alignments 4521* @return 4522* maximum alignments 4523************************************************************************************************************************ 4524*/ 4525UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const 4526{ 4527 return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K; 4528} 4529 4530/** 4531************************************************************************************************************************ 4532* Gfx10Lib::HwlComputeMaxMetaBaseAlignments 4533* 4534* @brief 4535* Gets maximum alignments for metadata 4536* @return 4537* maximum alignments for metadata 4538************************************************************************************************************************ 4539*/ 4540UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const 4541{ 4542 Dim3d metaBlk; 4543 4544 const AddrSwizzleMode ValidSwizzleModeForXmask[] = 4545 { 4546 ADDR_SW_64KB_Z_X, 4547 m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X, 4548 }; 4549 4550 UINT_32 maxBaseAlignHtile = 0; 4551 UINT_32 maxBaseAlignCmask = 0; 4552 4553 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++) 4554 { 4555 for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++) 4556 { 4557 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++) 4558 { 4559 // Max base alignment for Htile 4560 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil, 4561 ADDR_RSRC_TEX_2D, 4562 ValidSwizzleModeForXmask[swIdx], 4563 bppLog2, 4564 numFragLog2, 4565 TRUE, 4566 &metaBlk); 4567 4568 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile); 4569 } 4570 } 4571 4572 // Max base alignment for Cmask 4573 const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask, 4574 ADDR_RSRC_TEX_2D, 4575 ValidSwizzleModeForXmask[swIdx], 4576 0, 4577 0, 4578 TRUE, 4579 &metaBlk); 4580 4581 maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask); 4582 } 4583 4584 // Max base alignment for 2D Dcc 4585 const AddrSwizzleMode ValidSwizzleModeForDcc2D[] = 4586 { 4587 ADDR_SW_64KB_S_X, 4588 ADDR_SW_64KB_D_X, 4589 ADDR_SW_64KB_R_X, 4590 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X, 4591 }; 4592 4593 UINT_32 maxBaseAlignDcc2D = 0; 4594 4595 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++) 4596 { 4597 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++) 4598 { 4599 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++) 4600 { 4601 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor, 4602 ADDR_RSRC_TEX_2D, 4603 ValidSwizzleModeForDcc2D[swIdx], 4604 bppLog2, 4605 numFragLog2, 4606 TRUE, 4607 &metaBlk); 4608 4609 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D); 4610 } 4611 } 4612 } 4613 4614 // Max base alignment for 3D Dcc 4615 const AddrSwizzleMode ValidSwizzleModeForDcc3D[] = 4616 { 4617 ADDR_SW_64KB_Z_X, 4618 ADDR_SW_64KB_S_X, 4619 ADDR_SW_64KB_D_X, 4620 ADDR_SW_64KB_R_X, 4621 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X, 4622 }; 4623 4624 UINT_32 maxBaseAlignDcc3D = 0; 4625 4626 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++) 4627 { 4628 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++) 4629 { 4630 const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor, 4631 ADDR_RSRC_TEX_3D, 4632 ValidSwizzleModeForDcc3D[swIdx], 4633 bppLog2, 4634 0, 4635 TRUE, 4636 &metaBlk); 4637 4638 maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D); 4639 } 4640 } 4641 4642 return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D)); 4643} 4644 4645/** 4646************************************************************************************************************************ 4647* Gfx10Lib::GetMetaElementSizeLog2 4648* 4649* @brief 4650* Gets meta data element size log2 4651* @return 4652* Meta data element size log2 4653************************************************************************************************************************ 4654*/ 4655INT_32 Gfx10Lib::GetMetaElementSizeLog2( 4656 Gfx10DataType dataType) ///< Data surface type 4657{ 4658 INT_32 elemSizeLog2 = 0; 4659 4660 if (dataType == Gfx10DataColor) 4661 { 4662 elemSizeLog2 = 0; 4663 } 4664 else if (dataType == Gfx10DataDepthStencil) 4665 { 4666 elemSizeLog2 = 2; 4667 } 4668 else 4669 { 4670 ADDR_ASSERT(dataType == Gfx10DataFmask); 4671 elemSizeLog2 = -1; 4672 } 4673 4674 return elemSizeLog2; 4675} 4676 4677/** 4678************************************************************************************************************************ 4679* Gfx10Lib::GetMetaCacheSizeLog2 4680* 4681* @brief 4682* Gets meta data cache line size log2 4683* @return 4684* Meta data cache line size log2 4685************************************************************************************************************************ 4686*/ 4687INT_32 Gfx10Lib::GetMetaCacheSizeLog2( 4688 Gfx10DataType dataType) ///< Data surface type 4689{ 4690 INT_32 cacheSizeLog2 = 0; 4691 4692 if (dataType == Gfx10DataColor) 4693 { 4694 cacheSizeLog2 = 6; 4695 } 4696 else if (dataType == Gfx10DataDepthStencil) 4697 { 4698 cacheSizeLog2 = 8; 4699 } 4700 else 4701 { 4702 ADDR_ASSERT(dataType == Gfx10DataFmask); 4703 cacheSizeLog2 = 8; 4704 } 4705 return cacheSizeLog2; 4706} 4707 4708/** 4709************************************************************************************************************************ 4710* Gfx10Lib::HwlComputeSurfaceInfoLinear 4711* 4712* @brief 4713* Internal function to calculate alignment for linear surface 4714* 4715* @return 4716* ADDR_E_RETURNCODE 4717************************************************************************************************************************ 4718*/ 4719ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear( 4720 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure 4721 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure 4722 ) const 4723{ 4724 ADDR_E_RETURNCODE returnCode = ADDR_OK; 4725 4726 if (IsTex1d(pIn->resourceType) && (pIn->height > 1)) 4727 { 4728 returnCode = ADDR_INVALIDPARAMS; 4729 } 4730 else 4731 { 4732 const UINT_32 elementBytes = pIn->bpp >> 3; 4733 const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes); 4734 const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1; 4735 UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign); 4736 UINT_32 actualHeight = pIn->height; 4737 UINT_64 sliceSize = 0; 4738 4739 if (pIn->numMipLevels > 1) 4740 { 4741 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--) 4742 { 4743 UINT_32 mipWidth, mipHeight; 4744 4745 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight); 4746 4747 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign); 4748 4749 if (pOut->pMipInfo != NULL) 4750 { 4751 pOut->pMipInfo[i].pitch = mipActualWidth; 4752 pOut->pMipInfo[i].height = mipHeight; 4753 pOut->pMipInfo[i].depth = mipDepth; 4754 pOut->pMipInfo[i].offset = sliceSize; 4755 pOut->pMipInfo[i].mipTailOffset = 0; 4756 pOut->pMipInfo[i].macroBlockOffset = sliceSize; 4757 } 4758 4759 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes; 4760 } 4761 } 4762 else 4763 { 4764 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight); 4765 4766 if (returnCode == ADDR_OK) 4767 { 4768 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes; 4769 4770 if (pOut->pMipInfo != NULL) 4771 { 4772 pOut->pMipInfo[0].pitch = pitch; 4773 pOut->pMipInfo[0].height = actualHeight; 4774 pOut->pMipInfo[0].depth = mipDepth; 4775 pOut->pMipInfo[0].offset = 0; 4776 pOut->pMipInfo[0].mipTailOffset = 0; 4777 pOut->pMipInfo[0].macroBlockOffset = 0; 4778 } 4779 } 4780 } 4781 4782 if (returnCode == ADDR_OK) 4783 { 4784 pOut->pitch = pitch; 4785 pOut->height = actualHeight; 4786 pOut->numSlices = pIn->numSlices; 4787 pOut->sliceSize = sliceSize; 4788 pOut->surfSize = sliceSize * pOut->numSlices; 4789 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256; 4790 pOut->blockWidth = pitchAlign; 4791 pOut->blockHeight = 1; 4792 pOut->blockSlices = 1; 4793 4794 // Following members are useless on GFX10 4795 pOut->mipChainPitch = 0; 4796 pOut->mipChainHeight = 0; 4797 pOut->mipChainSlice = 0; 4798 pOut->epitchIsHeight = FALSE; 4799 4800 // Post calculation validate 4801 ADDR_ASSERT(pOut->sliceSize > 0); 4802 } 4803 } 4804 4805 return returnCode; 4806} 4807 4808} // V2 4809} // Addr 4810