17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2007-2019 Advanced Micro Devices, Inc. 37ec681f3Smrg * All Rights Reserved. 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining 67ec681f3Smrg * a copy of this software and associated documentation files (the 77ec681f3Smrg * "Software"), to deal in the Software without restriction, including 87ec681f3Smrg * without limitation the rights to use, copy, modify, merge, publish, 97ec681f3Smrg * distribute, sub license, and/or sell copies of the Software, and to 107ec681f3Smrg * permit persons to whom the Software is furnished to do so, subject to 117ec681f3Smrg * the following conditions: 127ec681f3Smrg * 137ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 147ec681f3Smrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 157ec681f3Smrg * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 167ec681f3Smrg * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 177ec681f3Smrg * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 187ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 197ec681f3Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 207ec681f3Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 217ec681f3Smrg * 227ec681f3Smrg * The above copyright notice and this permission notice (including the 237ec681f3Smrg * next paragraph) shall be included in all copies or substantial portions 247ec681f3Smrg * of the Software. 257ec681f3Smrg */ 267ec681f3Smrg 277ec681f3Smrg/** 287ec681f3Smrg************************************************************************************************************************ 297ec681f3Smrg* @file gfx10addrlib.cpp 307ec681f3Smrg* @brief Contain the implementation for the Gfx10Lib class. 317ec681f3Smrg************************************************************************************************************************ 327ec681f3Smrg*/ 337ec681f3Smrg 347ec681f3Smrg#include "gfx10addrlib.h" 357ec681f3Smrg#include "gfx10_gb_reg.h" 367ec681f3Smrg 377ec681f3Smrg#include "amdgpu_asic_addr.h" 387ec681f3Smrg 397ec681f3Smrg//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 407ec681f3Smrg//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 417ec681f3Smrg 427ec681f3Smrgnamespace Addr 437ec681f3Smrg{ 447ec681f3Smrg/** 457ec681f3Smrg************************************************************************************************************************ 467ec681f3Smrg* Gfx10HwlInit 477ec681f3Smrg* 487ec681f3Smrg* @brief 497ec681f3Smrg* Creates an Gfx10Lib object. 507ec681f3Smrg* 517ec681f3Smrg* @return 527ec681f3Smrg* Returns an Gfx10Lib object pointer. 537ec681f3Smrg************************************************************************************************************************ 547ec681f3Smrg*/ 557ec681f3SmrgAddr::Lib* Gfx10HwlInit(const Client* pClient) 567ec681f3Smrg{ 577ec681f3Smrg return V2::Gfx10Lib::CreateObj(pClient); 587ec681f3Smrg} 597ec681f3Smrg 607ec681f3Smrgnamespace V2 617ec681f3Smrg{ 627ec681f3Smrg 637ec681f3Smrg//////////////////////////////////////////////////////////////////////////////////////////////////// 647ec681f3Smrg// Static Const Member 657ec681f3Smrg//////////////////////////////////////////////////////////////////////////////////////////////////// 667ec681f3Smrg 677ec681f3Smrgconst SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] = 687ec681f3Smrg{//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved 697ec681f3Smrg {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR 707ec681f3Smrg {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S 717ec681f3Smrg {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D 727ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 737ec681f3Smrg 747ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 757ec681f3Smrg {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S 767ec681f3Smrg {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D 777ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 787ec681f3Smrg 797ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 807ec681f3Smrg {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S 817ec681f3Smrg {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D 827ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 837ec681f3Smrg 847ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 857ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 867ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 877ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 887ec681f3Smrg 897ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 907ec681f3Smrg {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T 917ec681f3Smrg {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T 927ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 937ec681f3Smrg 947ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 957ec681f3Smrg {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_X 967ec681f3Smrg {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_X 977ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 987ec681f3Smrg 997ec681f3Smrg {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X 1007ec681f3Smrg {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X 1017ec681f3Smrg {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X 1027ec681f3Smrg {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_64KB_R_X 1037ec681f3Smrg 1047ec681f3Smrg {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_Z_X 1057ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 1067ec681f3Smrg {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved 1077ec681f3Smrg {0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_VAR_R_X 1087ec681f3Smrg {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL 1097ec681f3Smrg}; 1107ec681f3Smrg 1117ec681f3Smrgconst Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}}; 1127ec681f3Smrg 1137ec681f3Smrgconst Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}}; 1147ec681f3Smrgconst Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}}; 1157ec681f3Smrg 1167ec681f3Smrg/** 1177ec681f3Smrg************************************************************************************************************************ 1187ec681f3Smrg* Gfx10Lib::Gfx10Lib 1197ec681f3Smrg* 1207ec681f3Smrg* @brief 1217ec681f3Smrg* Constructor 1227ec681f3Smrg* 1237ec681f3Smrg************************************************************************************************************************ 1247ec681f3Smrg*/ 1257ec681f3SmrgGfx10Lib::Gfx10Lib(const Client* pClient) 1267ec681f3Smrg : 1277ec681f3Smrg Lib(pClient), 1287ec681f3Smrg m_numPkrLog2(0), 1297ec681f3Smrg m_numSaLog2(0), 1307ec681f3Smrg m_colorBaseIndex(0), 1317ec681f3Smrg m_xmaskBaseIndex(0), 1327ec681f3Smrg m_dccBaseIndex(0) 1337ec681f3Smrg{ 1347ec681f3Smrg memset(&m_settings, 0, sizeof(m_settings)); 1357ec681f3Smrg memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable)); 1367ec681f3Smrg} 1377ec681f3Smrg 1387ec681f3Smrg/** 1397ec681f3Smrg************************************************************************************************************************ 1407ec681f3Smrg* Gfx10Lib::~Gfx10Lib 1417ec681f3Smrg* 1427ec681f3Smrg* @brief 1437ec681f3Smrg* Destructor 1447ec681f3Smrg************************************************************************************************************************ 1457ec681f3Smrg*/ 1467ec681f3SmrgGfx10Lib::~Gfx10Lib() 1477ec681f3Smrg{ 1487ec681f3Smrg} 1497ec681f3Smrg 1507ec681f3Smrg/** 1517ec681f3Smrg************************************************************************************************************************ 1527ec681f3Smrg* Gfx10Lib::HwlComputeHtileInfo 1537ec681f3Smrg* 1547ec681f3Smrg* @brief 1557ec681f3Smrg* Interface function stub of AddrComputeHtilenfo 1567ec681f3Smrg* 1577ec681f3Smrg* @return 1587ec681f3Smrg* ADDR_E_RETURNCODE 1597ec681f3Smrg************************************************************************************************************************ 1607ec681f3Smrg*/ 1617ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo( 1627ec681f3Smrg const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure 1637ec681f3Smrg ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure 1647ec681f3Smrg ) const 1657ec681f3Smrg{ 1667ec681f3Smrg ADDR_E_RETURNCODE ret = ADDR_OK; 1677ec681f3Smrg 1687ec681f3Smrg if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) && 1697ec681f3Smrg ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) || 1707ec681f3Smrg (pIn->hTileFlags.pipeAligned != TRUE)) 1717ec681f3Smrg { 1727ec681f3Smrg ret = ADDR_INVALIDPARAMS; 1737ec681f3Smrg } 1747ec681f3Smrg else 1757ec681f3Smrg { 1767ec681f3Smrg Dim3d metaBlk = {}; 1777ec681f3Smrg const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil, 1787ec681f3Smrg ADDR_RSRC_TEX_2D, 1797ec681f3Smrg pIn->swizzleMode, 1807ec681f3Smrg 0, 1817ec681f3Smrg 0, 1827ec681f3Smrg TRUE, 1837ec681f3Smrg &metaBlk); 1847ec681f3Smrg 1857ec681f3Smrg pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w); 1867ec681f3Smrg pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h); 1877ec681f3Smrg pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u)); 1887ec681f3Smrg pOut->metaBlkWidth = metaBlk.w; 1897ec681f3Smrg pOut->metaBlkHeight = metaBlk.h; 1907ec681f3Smrg 1917ec681f3Smrg if (pIn->numMipLevels > 1) 1927ec681f3Smrg { 1937ec681f3Smrg ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels); 1947ec681f3Smrg 1957ec681f3Smrg UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize; 1967ec681f3Smrg 1977ec681f3Smrg for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--) 1987ec681f3Smrg { 1997ec681f3Smrg UINT_32 mipWidth, mipHeight; 2007ec681f3Smrg 2017ec681f3Smrg GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight); 2027ec681f3Smrg 2037ec681f3Smrg mipWidth = PowTwoAlign(mipWidth, metaBlk.w); 2047ec681f3Smrg mipHeight = PowTwoAlign(mipHeight, metaBlk.h); 2057ec681f3Smrg 2067ec681f3Smrg const UINT_32 pitchInM = mipWidth / metaBlk.w; 2077ec681f3Smrg const UINT_32 heightInM = mipHeight / metaBlk.h; 2087ec681f3Smrg const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize; 2097ec681f3Smrg 2107ec681f3Smrg if (pOut->pMipInfo != NULL) 2117ec681f3Smrg { 2127ec681f3Smrg pOut->pMipInfo[i].inMiptail = FALSE; 2137ec681f3Smrg pOut->pMipInfo[i].offset = offset; 2147ec681f3Smrg pOut->pMipInfo[i].sliceSize = mipSliceSize; 2157ec681f3Smrg } 2167ec681f3Smrg 2177ec681f3Smrg offset += mipSliceSize; 2187ec681f3Smrg } 2197ec681f3Smrg 2207ec681f3Smrg pOut->sliceSize = offset; 2217ec681f3Smrg pOut->metaBlkNumPerSlice = offset / metaBlkSize; 2227ec681f3Smrg pOut->htileBytes = pOut->sliceSize * pIn->numSlices; 2237ec681f3Smrg 2247ec681f3Smrg if (pOut->pMipInfo != NULL) 2257ec681f3Smrg { 2267ec681f3Smrg for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++) 2277ec681f3Smrg { 2287ec681f3Smrg pOut->pMipInfo[i].inMiptail = TRUE; 2297ec681f3Smrg pOut->pMipInfo[i].offset = 0; 2307ec681f3Smrg pOut->pMipInfo[i].sliceSize = 0; 2317ec681f3Smrg } 2327ec681f3Smrg 2337ec681f3Smrg if (pIn->firstMipIdInTail != pIn->numMipLevels) 2347ec681f3Smrg { 2357ec681f3Smrg pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize; 2367ec681f3Smrg } 2377ec681f3Smrg } 2387ec681f3Smrg } 2397ec681f3Smrg else 2407ec681f3Smrg { 2417ec681f3Smrg const UINT_32 pitchInM = pOut->pitch / metaBlk.w; 2427ec681f3Smrg const UINT_32 heightInM = pOut->height / metaBlk.h; 2437ec681f3Smrg 2447ec681f3Smrg pOut->metaBlkNumPerSlice = pitchInM * heightInM; 2457ec681f3Smrg pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize; 2467ec681f3Smrg pOut->htileBytes = pOut->sliceSize * pIn->numSlices; 2477ec681f3Smrg 2487ec681f3Smrg if (pOut->pMipInfo != NULL) 2497ec681f3Smrg { 2507ec681f3Smrg pOut->pMipInfo[0].inMiptail = FALSE; 2517ec681f3Smrg pOut->pMipInfo[0].offset = 0; 2527ec681f3Smrg pOut->pMipInfo[0].sliceSize = pOut->sliceSize; 2537ec681f3Smrg } 2547ec681f3Smrg } 2557ec681f3Smrg 2567ec681f3Smrg // Get the HTILE address equation (copied from HtileAddrFromCoord). 2577ec681f3Smrg // HTILE addressing depends on the number of samples, but this code doesn't support it yet. 2587ec681f3Smrg const UINT_32 index = m_xmaskBaseIndex; 2597ec681f3Smrg const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX; 2607ec681f3Smrg 2617ec681f3Smrg ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2); 2627ec681f3Smrg pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]]; 2637ec681f3Smrg } 2647ec681f3Smrg 2657ec681f3Smrg return ret; 2667ec681f3Smrg} 2677ec681f3Smrg 2687ec681f3Smrg/** 2697ec681f3Smrg************************************************************************************************************************ 2707ec681f3Smrg* Gfx10Lib::HwlComputeCmaskInfo 2717ec681f3Smrg* 2727ec681f3Smrg* @brief 2737ec681f3Smrg* Interface function stub of AddrComputeCmaskInfo 2747ec681f3Smrg* 2757ec681f3Smrg* @return 2767ec681f3Smrg* ADDR_E_RETURNCODE 2777ec681f3Smrg************************************************************************************************************************ 2787ec681f3Smrg*/ 2797ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo( 2807ec681f3Smrg const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure 2817ec681f3Smrg ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure 2827ec681f3Smrg ) const 2837ec681f3Smrg{ 2847ec681f3Smrg ADDR_E_RETURNCODE ret = ADDR_OK; 2857ec681f3Smrg 2867ec681f3Smrg if ((pIn->resourceType != ADDR_RSRC_TEX_2D) || 2877ec681f3Smrg (pIn->cMaskFlags.pipeAligned != TRUE) || 2887ec681f3Smrg ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) && 2897ec681f3Smrg ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0)))) 2907ec681f3Smrg { 2917ec681f3Smrg ret = ADDR_INVALIDPARAMS; 2927ec681f3Smrg } 2937ec681f3Smrg else 2947ec681f3Smrg { 2957ec681f3Smrg Dim3d metaBlk = {}; 2967ec681f3Smrg const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask, 2977ec681f3Smrg ADDR_RSRC_TEX_2D, 2987ec681f3Smrg pIn->swizzleMode, 2997ec681f3Smrg 0, 3007ec681f3Smrg 0, 3017ec681f3Smrg TRUE, 3027ec681f3Smrg &metaBlk); 3037ec681f3Smrg 3047ec681f3Smrg pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w); 3057ec681f3Smrg pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h); 3067ec681f3Smrg pOut->baseAlign = metaBlkSize; 3077ec681f3Smrg pOut->metaBlkWidth = metaBlk.w; 3087ec681f3Smrg pOut->metaBlkHeight = metaBlk.h; 3097ec681f3Smrg 3107ec681f3Smrg if (pIn->numMipLevels > 1) 3117ec681f3Smrg { 3127ec681f3Smrg ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels); 3137ec681f3Smrg 3147ec681f3Smrg UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1; 3157ec681f3Smrg 3167ec681f3Smrg for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--) 3177ec681f3Smrg { 3187ec681f3Smrg UINT_32 mipWidth, mipHeight; 3197ec681f3Smrg 3207ec681f3Smrg GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight); 3217ec681f3Smrg 3227ec681f3Smrg mipWidth = PowTwoAlign(mipWidth, metaBlk.w); 3237ec681f3Smrg mipHeight = PowTwoAlign(mipHeight, metaBlk.h); 3247ec681f3Smrg 3257ec681f3Smrg const UINT_32 pitchInM = mipWidth / metaBlk.w; 3267ec681f3Smrg const UINT_32 heightInM = mipHeight / metaBlk.h; 3277ec681f3Smrg 3287ec681f3Smrg if (pOut->pMipInfo != NULL) 3297ec681f3Smrg { 3307ec681f3Smrg pOut->pMipInfo[i].inMiptail = FALSE; 3317ec681f3Smrg pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize; 3327ec681f3Smrg pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize; 3337ec681f3Smrg } 3347ec681f3Smrg 3357ec681f3Smrg metaBlkPerSlice += pitchInM * heightInM; 3367ec681f3Smrg } 3377ec681f3Smrg 3387ec681f3Smrg pOut->metaBlkNumPerSlice = metaBlkPerSlice; 3397ec681f3Smrg 3407ec681f3Smrg if (pOut->pMipInfo != NULL) 3417ec681f3Smrg { 3427ec681f3Smrg for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++) 3437ec681f3Smrg { 3447ec681f3Smrg pOut->pMipInfo[i].inMiptail = TRUE; 3457ec681f3Smrg pOut->pMipInfo[i].offset = 0; 3467ec681f3Smrg pOut->pMipInfo[i].sliceSize = 0; 3477ec681f3Smrg } 3487ec681f3Smrg 3497ec681f3Smrg if (pIn->firstMipIdInTail != pIn->numMipLevels) 3507ec681f3Smrg { 3517ec681f3Smrg pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize; 3527ec681f3Smrg } 3537ec681f3Smrg } 3547ec681f3Smrg } 3557ec681f3Smrg else 3567ec681f3Smrg { 3577ec681f3Smrg const UINT_32 pitchInM = pOut->pitch / metaBlk.w; 3587ec681f3Smrg const UINT_32 heightInM = pOut->height / metaBlk.h; 3597ec681f3Smrg 3607ec681f3Smrg pOut->metaBlkNumPerSlice = pitchInM * heightInM; 3617ec681f3Smrg 3627ec681f3Smrg if (pOut->pMipInfo != NULL) 3637ec681f3Smrg { 3647ec681f3Smrg pOut->pMipInfo[0].inMiptail = FALSE; 3657ec681f3Smrg pOut->pMipInfo[0].offset = 0; 3667ec681f3Smrg pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize; 3677ec681f3Smrg } 3687ec681f3Smrg } 3697ec681f3Smrg 3707ec681f3Smrg pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize; 3717ec681f3Smrg pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices; 3727ec681f3Smrg 3737ec681f3Smrg // Get the CMASK address equation (copied from CmaskAddrFromCoord) 3747ec681f3Smrg const UINT_32 fmaskBpp = GetFmaskBpp(1, 1); 3757ec681f3Smrg const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3); 3767ec681f3Smrg const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2; 3777ec681f3Smrg const UINT_8* patIdxTable = 3787ec681f3Smrg (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX : 3797ec681f3Smrg (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX); 3807ec681f3Smrg 3817ec681f3Smrg ADDR_C_ASSERT(sizeof(GFX10_CMASK_SW_PATTERN[patIdxTable[index]]) == 68 * 2); 3827ec681f3Smrg pOut->equation.gfx10_bits = (UINT_16*)GFX10_CMASK_SW_PATTERN[patIdxTable[index]]; 3837ec681f3Smrg } 3847ec681f3Smrg 3857ec681f3Smrg return ret; 3867ec681f3Smrg} 3877ec681f3Smrg 3887ec681f3Smrg/** 3897ec681f3Smrg************************************************************************************************************************ 3907ec681f3Smrg* Gfx10Lib::HwlComputeDccInfo 3917ec681f3Smrg* 3927ec681f3Smrg* @brief 3937ec681f3Smrg* Interface function to compute DCC key info 3947ec681f3Smrg* 3957ec681f3Smrg* @return 3967ec681f3Smrg* ADDR_E_RETURNCODE 3977ec681f3Smrg************************************************************************************************************************ 3987ec681f3Smrg*/ 3997ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo( 4007ec681f3Smrg const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure 4017ec681f3Smrg ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure 4027ec681f3Smrg ) const 4037ec681f3Smrg{ 4047ec681f3Smrg ADDR_E_RETURNCODE ret = ADDR_OK; 4057ec681f3Smrg 4067ec681f3Smrg if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode)) 4077ec681f3Smrg { 4087ec681f3Smrg // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only 4097ec681f3Smrg // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface. 4107ec681f3Smrg ret = ADDR_INVALIDPARAMS; 4117ec681f3Smrg } 4127ec681f3Smrg else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode)) 4137ec681f3Smrg { 4147ec681f3Smrg // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1 4157ec681f3Smrg ret = ADDR_INVALIDPARAMS; 4167ec681f3Smrg } 4177ec681f3Smrg else 4187ec681f3Smrg { 4197ec681f3Smrg const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); 4207ec681f3Smrg 4217ec681f3Smrg { 4227ec681f3Smrg // only SW_*_R_X surfaces may be DCC compressed when attached to the CB 4237ec681f3Smrg ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode)); 4247ec681f3Smrg 4257ec681f3Smrg const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode); 4267ec681f3Smrg 4277ec681f3Smrg pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w; 4287ec681f3Smrg pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h; 4297ec681f3Smrg pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1; 4307ec681f3Smrg } 4317ec681f3Smrg 4327ec681f3Smrg if (ret == ADDR_OK) 4337ec681f3Smrg { 4347ec681f3Smrg Dim3d metaBlk = {}; 4357ec681f3Smrg const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u)); 4367ec681f3Smrg const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor, 4377ec681f3Smrg pIn->resourceType, 4387ec681f3Smrg pIn->swizzleMode, 4397ec681f3Smrg elemLog2, 4407ec681f3Smrg numFragLog2, 4417ec681f3Smrg pIn->dccKeyFlags.pipeAligned, 4427ec681f3Smrg &metaBlk); 4437ec681f3Smrg 4447ec681f3Smrg pOut->dccRamBaseAlign = metaBlkSize; 4457ec681f3Smrg pOut->metaBlkWidth = metaBlk.w; 4467ec681f3Smrg pOut->metaBlkHeight = metaBlk.h; 4477ec681f3Smrg pOut->metaBlkDepth = metaBlk.d; 4487ec681f3Smrg pOut->metaBlkSize = metaBlkSize; 4497ec681f3Smrg 4507ec681f3Smrg pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w); 4517ec681f3Smrg pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h); 4527ec681f3Smrg pOut->depth = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d); 4537ec681f3Smrg 4547ec681f3Smrg if (pIn->numMipLevels > 1) 4557ec681f3Smrg { 4567ec681f3Smrg ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels); 4577ec681f3Smrg 4587ec681f3Smrg UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize; 4597ec681f3Smrg 4607ec681f3Smrg for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--) 4617ec681f3Smrg { 4627ec681f3Smrg UINT_32 mipWidth, mipHeight; 4637ec681f3Smrg 4647ec681f3Smrg GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight); 4657ec681f3Smrg 4667ec681f3Smrg mipWidth = PowTwoAlign(mipWidth, metaBlk.w); 4677ec681f3Smrg mipHeight = PowTwoAlign(mipHeight, metaBlk.h); 4687ec681f3Smrg 4697ec681f3Smrg const UINT_32 pitchInM = mipWidth / metaBlk.w; 4707ec681f3Smrg const UINT_32 heightInM = mipHeight / metaBlk.h; 4717ec681f3Smrg const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize; 4727ec681f3Smrg 4737ec681f3Smrg if (pOut->pMipInfo != NULL) 4747ec681f3Smrg { 4757ec681f3Smrg pOut->pMipInfo[i].inMiptail = FALSE; 4767ec681f3Smrg pOut->pMipInfo[i].offset = offset; 4777ec681f3Smrg pOut->pMipInfo[i].sliceSize = mipSliceSize; 4787ec681f3Smrg } 4797ec681f3Smrg 4807ec681f3Smrg offset += mipSliceSize; 4817ec681f3Smrg } 4827ec681f3Smrg 4837ec681f3Smrg pOut->dccRamSliceSize = offset; 4847ec681f3Smrg pOut->metaBlkNumPerSlice = offset / metaBlkSize; 4857ec681f3Smrg pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d); 4867ec681f3Smrg 4877ec681f3Smrg if (pOut->pMipInfo != NULL) 4887ec681f3Smrg { 4897ec681f3Smrg for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++) 4907ec681f3Smrg { 4917ec681f3Smrg pOut->pMipInfo[i].inMiptail = TRUE; 4927ec681f3Smrg pOut->pMipInfo[i].offset = 0; 4937ec681f3Smrg pOut->pMipInfo[i].sliceSize = 0; 4947ec681f3Smrg } 4957ec681f3Smrg 4967ec681f3Smrg if (pIn->firstMipIdInTail != pIn->numMipLevels) 4977ec681f3Smrg { 4987ec681f3Smrg pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize; 4997ec681f3Smrg } 5007ec681f3Smrg } 5017ec681f3Smrg } 5027ec681f3Smrg else 5037ec681f3Smrg { 5047ec681f3Smrg const UINT_32 pitchInM = pOut->pitch / metaBlk.w; 5057ec681f3Smrg const UINT_32 heightInM = pOut->height / metaBlk.h; 5067ec681f3Smrg 5077ec681f3Smrg pOut->metaBlkNumPerSlice = pitchInM * heightInM; 5087ec681f3Smrg pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize; 5097ec681f3Smrg pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d); 5107ec681f3Smrg 5117ec681f3Smrg if (pOut->pMipInfo != NULL) 5127ec681f3Smrg { 5137ec681f3Smrg pOut->pMipInfo[0].inMiptail = FALSE; 5147ec681f3Smrg pOut->pMipInfo[0].offset = 0; 5157ec681f3Smrg pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize; 5167ec681f3Smrg } 5177ec681f3Smrg } 5187ec681f3Smrg 5197ec681f3Smrg // Get the DCC address equation (copied from DccAddrFromCoord) 5207ec681f3Smrg const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); 5217ec681f3Smrg const UINT_32 numPipeLog2 = m_pipesLog2; 5227ec681f3Smrg UINT_32 index = m_dccBaseIndex + elemLog2; 5237ec681f3Smrg const UINT_8* patIdxTable; 5247ec681f3Smrg 5257ec681f3Smrg if (m_settings.supportRbPlus) 5267ec681f3Smrg { 5277ec681f3Smrg patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX; 5287ec681f3Smrg 5297ec681f3Smrg if (pIn->dccKeyFlags.pipeAligned) 5307ec681f3Smrg { 5317ec681f3Smrg index += MaxNumOfBpp; 5327ec681f3Smrg 5337ec681f3Smrg if (m_numPkrLog2 < 2) 5347ec681f3Smrg { 5357ec681f3Smrg index += m_pipesLog2 * MaxNumOfBpp; 5367ec681f3Smrg } 5377ec681f3Smrg else 5387ec681f3Smrg { 5397ec681f3Smrg // 4 groups for "m_numPkrLog2 < 2" case 5407ec681f3Smrg index += 4 * MaxNumOfBpp; 5417ec681f3Smrg 5427ec681f3Smrg const UINT_32 dccPipePerPkr = 3; 5437ec681f3Smrg 5447ec681f3Smrg index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp + 5457ec681f3Smrg (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp; 5467ec681f3Smrg } 5477ec681f3Smrg } 5487ec681f3Smrg } 5497ec681f3Smrg else 5507ec681f3Smrg { 5517ec681f3Smrg patIdxTable = GFX10_DCC_64K_R_X_PATIDX; 5527ec681f3Smrg 5537ec681f3Smrg if (pIn->dccKeyFlags.pipeAligned) 5547ec681f3Smrg { 5557ec681f3Smrg index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp; 5567ec681f3Smrg } 5577ec681f3Smrg else 5587ec681f3Smrg { 5597ec681f3Smrg index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp; 5607ec681f3Smrg } 5617ec681f3Smrg } 5627ec681f3Smrg 5637ec681f3Smrg ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2); 5647ec681f3Smrg pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]; 5657ec681f3Smrg } 5667ec681f3Smrg } 5677ec681f3Smrg 5687ec681f3Smrg return ret; 5697ec681f3Smrg} 5707ec681f3Smrg 5717ec681f3Smrg/** 5727ec681f3Smrg************************************************************************************************************************ 5737ec681f3Smrg* Gfx10Lib::HwlComputeCmaskAddrFromCoord 5747ec681f3Smrg* 5757ec681f3Smrg* @brief 5767ec681f3Smrg* Interface function stub of AddrComputeCmaskAddrFromCoord 5777ec681f3Smrg* 5787ec681f3Smrg* @return 5797ec681f3Smrg* ADDR_E_RETURNCODE 5807ec681f3Smrg************************************************************************************************************************ 5817ec681f3Smrg*/ 5827ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord( 5837ec681f3Smrg const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 5847ec681f3Smrg ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure 5857ec681f3Smrg{ 5867ec681f3Smrg // Only support pipe aligned CMask 5877ec681f3Smrg ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE); 5887ec681f3Smrg 5897ec681f3Smrg ADDR2_COMPUTE_CMASK_INFO_INPUT input = {}; 5907ec681f3Smrg input.size = sizeof(input); 5917ec681f3Smrg input.cMaskFlags = pIn->cMaskFlags; 5927ec681f3Smrg input.colorFlags = pIn->colorFlags; 5937ec681f3Smrg input.unalignedWidth = Max(pIn->unalignedWidth, 1u); 5947ec681f3Smrg input.unalignedHeight = Max(pIn->unalignedHeight, 1u); 5957ec681f3Smrg input.numSlices = Max(pIn->numSlices, 1u); 5967ec681f3Smrg input.swizzleMode = pIn->swizzleMode; 5977ec681f3Smrg input.resourceType = pIn->resourceType; 5987ec681f3Smrg 5997ec681f3Smrg ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {}; 6007ec681f3Smrg output.size = sizeof(output); 6017ec681f3Smrg 6027ec681f3Smrg ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output); 6037ec681f3Smrg 6047ec681f3Smrg if (returnCode == ADDR_OK) 6057ec681f3Smrg { 6067ec681f3Smrg const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags); 6077ec681f3Smrg const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3); 6087ec681f3Smrg const UINT_32 pipeMask = (1 << m_pipesLog2) - 1; 6097ec681f3Smrg const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2; 6107ec681f3Smrg const UINT_8* patIdxTable = 6117ec681f3Smrg (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX : 6127ec681f3Smrg (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX); 6137ec681f3Smrg 6147ec681f3Smrg 6157ec681f3Smrg const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7; 6167ec681f3Smrg const UINT_32 blkMask = (1 << blkSizeLog2) - 1; 6177ec681f3Smrg const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]], 6187ec681f3Smrg blkSizeLog2 + 1, // +1 for nibble offset 6197ec681f3Smrg pIn->x, 6207ec681f3Smrg pIn->y, 6217ec681f3Smrg pIn->slice, 6227ec681f3Smrg 0); 6237ec681f3Smrg const UINT_32 xb = pIn->x / output.metaBlkWidth; 6247ec681f3Smrg const UINT_32 yb = pIn->y / output.metaBlkHeight; 6257ec681f3Smrg const UINT_32 pb = output.pitch / output.metaBlkWidth; 6267ec681f3Smrg const UINT_32 blkIndex = (yb * pb) + xb; 6277ec681f3Smrg const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask; 6287ec681f3Smrg 6297ec681f3Smrg pOut->addr = (output.sliceSize * pIn->slice) + 6307ec681f3Smrg (blkIndex * (1 << blkSizeLog2)) + 6317ec681f3Smrg ((blkOffset >> 1) ^ pipeXor); 6327ec681f3Smrg pOut->bitPosition = (blkOffset & 1) << 2; 6337ec681f3Smrg } 6347ec681f3Smrg 6357ec681f3Smrg return returnCode; 6367ec681f3Smrg} 6377ec681f3Smrg 6387ec681f3Smrg/** 6397ec681f3Smrg************************************************************************************************************************ 6407ec681f3Smrg* Gfx10Lib::HwlComputeHtileAddrFromCoord 6417ec681f3Smrg* 6427ec681f3Smrg* @brief 6437ec681f3Smrg* Interface function stub of AddrComputeHtileAddrFromCoord 6447ec681f3Smrg* 6457ec681f3Smrg* @return 6467ec681f3Smrg* ADDR_E_RETURNCODE 6477ec681f3Smrg************************************************************************************************************************ 6487ec681f3Smrg*/ 6497ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord( 6507ec681f3Smrg const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 6517ec681f3Smrg ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure 6527ec681f3Smrg{ 6537ec681f3Smrg ADDR_E_RETURNCODE returnCode = ADDR_OK; 6547ec681f3Smrg 6557ec681f3Smrg if (pIn->numMipLevels > 1) 6567ec681f3Smrg { 6577ec681f3Smrg returnCode = ADDR_NOTIMPLEMENTED; 6587ec681f3Smrg } 6597ec681f3Smrg else 6607ec681f3Smrg { 6617ec681f3Smrg ADDR2_COMPUTE_HTILE_INFO_INPUT input = {}; 6627ec681f3Smrg input.size = sizeof(input); 6637ec681f3Smrg input.hTileFlags = pIn->hTileFlags; 6647ec681f3Smrg input.depthFlags = pIn->depthflags; 6657ec681f3Smrg input.swizzleMode = pIn->swizzleMode; 6667ec681f3Smrg input.unalignedWidth = Max(pIn->unalignedWidth, 1u); 6677ec681f3Smrg input.unalignedHeight = Max(pIn->unalignedHeight, 1u); 6687ec681f3Smrg input.numSlices = Max(pIn->numSlices, 1u); 6697ec681f3Smrg input.numMipLevels = 1; 6707ec681f3Smrg 6717ec681f3Smrg ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {}; 6727ec681f3Smrg output.size = sizeof(output); 6737ec681f3Smrg 6747ec681f3Smrg returnCode = ComputeHtileInfo(&input, &output); 6757ec681f3Smrg 6767ec681f3Smrg if (returnCode == ADDR_OK) 6777ec681f3Smrg { 6787ec681f3Smrg const UINT_32 numSampleLog2 = Log2(pIn->numSamples); 6797ec681f3Smrg const UINT_32 pipeMask = (1 << m_pipesLog2) - 1; 6807ec681f3Smrg const UINT_32 index = m_xmaskBaseIndex + numSampleLog2; 6817ec681f3Smrg const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX; 6827ec681f3Smrg 6837ec681f3Smrg 6847ec681f3Smrg const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4; 6857ec681f3Smrg const UINT_32 blkMask = (1 << blkSizeLog2) - 1; 6867ec681f3Smrg const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]], 6877ec681f3Smrg blkSizeLog2 + 1, // +1 for nibble offset 6887ec681f3Smrg pIn->x, 6897ec681f3Smrg pIn->y, 6907ec681f3Smrg pIn->slice, 6917ec681f3Smrg 0); 6927ec681f3Smrg const UINT_32 xb = pIn->x / output.metaBlkWidth; 6937ec681f3Smrg const UINT_32 yb = pIn->y / output.metaBlkHeight; 6947ec681f3Smrg const UINT_32 pb = output.pitch / output.metaBlkWidth; 6957ec681f3Smrg const UINT_32 blkIndex = (yb * pb) + xb; 6967ec681f3Smrg const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask; 6977ec681f3Smrg 6987ec681f3Smrg pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) + 6997ec681f3Smrg (blkIndex * (1 << blkSizeLog2)) + 7007ec681f3Smrg ((blkOffset >> 1) ^ pipeXor); 7017ec681f3Smrg } 7027ec681f3Smrg } 7037ec681f3Smrg 7047ec681f3Smrg return returnCode; 7057ec681f3Smrg} 7067ec681f3Smrg 7077ec681f3Smrg/** 7087ec681f3Smrg************************************************************************************************************************ 7097ec681f3Smrg* Gfx10Lib::HwlComputeHtileCoordFromAddr 7107ec681f3Smrg* 7117ec681f3Smrg* @brief 7127ec681f3Smrg* Interface function stub of AddrComputeHtileCoordFromAddr 7137ec681f3Smrg* 7147ec681f3Smrg* @return 7157ec681f3Smrg* ADDR_E_RETURNCODE 7167ec681f3Smrg************************************************************************************************************************ 7177ec681f3Smrg*/ 7187ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr( 7197ec681f3Smrg const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure 7207ec681f3Smrg ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure 7217ec681f3Smrg{ 7227ec681f3Smrg ADDR_NOT_IMPLEMENTED(); 7237ec681f3Smrg 7247ec681f3Smrg return ADDR_OK; 7257ec681f3Smrg} 7267ec681f3Smrg 7277ec681f3Smrg/** 7287ec681f3Smrg************************************************************************************************************************ 7297ec681f3Smrg* Gfx10Lib::HwlSupportComputeDccAddrFromCoord 7307ec681f3Smrg* 7317ec681f3Smrg* @brief 7327ec681f3Smrg* Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter 7337ec681f3Smrg* 7347ec681f3Smrg* @return 7357ec681f3Smrg* ADDR_E_RETURNCODE 7367ec681f3Smrg************************************************************************************************************************ 7377ec681f3Smrg*/ 7387ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord( 7397ec681f3Smrg const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn) 7407ec681f3Smrg{ 7417ec681f3Smrg ADDR_E_RETURNCODE returnCode = ADDR_OK; 7427ec681f3Smrg 7437ec681f3Smrg if ((pIn->resourceType != ADDR_RSRC_TEX_2D) || 7447ec681f3Smrg (pIn->swizzleMode != ADDR_SW_64KB_R_X) || 7457ec681f3Smrg (pIn->dccKeyFlags.linear == TRUE) || 7467ec681f3Smrg (pIn->numFrags > 1) || 7477ec681f3Smrg (pIn->numMipLevels > 1) || 7487ec681f3Smrg (pIn->mipId > 0)) 7497ec681f3Smrg { 7507ec681f3Smrg returnCode = ADDR_NOTSUPPORTED; 7517ec681f3Smrg } 7527ec681f3Smrg else if ((pIn->pitch == 0) || 7537ec681f3Smrg (pIn->metaBlkWidth == 0) || 7547ec681f3Smrg (pIn->metaBlkHeight == 0) || 7557ec681f3Smrg (pIn->slice > 0 && pIn->dccRamSliceSize == 0)) 7567ec681f3Smrg { 7577ec681f3Smrg returnCode = ADDR_NOTSUPPORTED; 7587ec681f3Smrg } 7597ec681f3Smrg 7607ec681f3Smrg return returnCode; 7617ec681f3Smrg} 7627ec681f3Smrg 7637ec681f3Smrg/** 7647ec681f3Smrg************************************************************************************************************************ 7657ec681f3Smrg* Gfx10Lib::HwlComputeDccAddrFromCoord 7667ec681f3Smrg* 7677ec681f3Smrg* @brief 7687ec681f3Smrg* Interface function stub of AddrComputeDccAddrFromCoord 7697ec681f3Smrg* 7707ec681f3Smrg* @return 7717ec681f3Smrg* N/A 7727ec681f3Smrg************************************************************************************************************************ 7737ec681f3Smrg*/ 7747ec681f3SmrgVOID Gfx10Lib::HwlComputeDccAddrFromCoord( 7757ec681f3Smrg const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 7767ec681f3Smrg ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure 7777ec681f3Smrg{ 7787ec681f3Smrg const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); 7797ec681f3Smrg const UINT_32 numPipeLog2 = m_pipesLog2; 7807ec681f3Smrg const UINT_32 pipeMask = (1 << numPipeLog2) - 1; 7817ec681f3Smrg UINT_32 index = m_dccBaseIndex + elemLog2; 7827ec681f3Smrg const UINT_8* patIdxTable; 7837ec681f3Smrg 7847ec681f3Smrg if (m_settings.supportRbPlus) 7857ec681f3Smrg { 7867ec681f3Smrg patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX; 7877ec681f3Smrg 7887ec681f3Smrg if (pIn->dccKeyFlags.pipeAligned) 7897ec681f3Smrg { 7907ec681f3Smrg index += MaxNumOfBpp; 7917ec681f3Smrg 7927ec681f3Smrg if (m_numPkrLog2 < 2) 7937ec681f3Smrg { 7947ec681f3Smrg index += m_pipesLog2 * MaxNumOfBpp; 7957ec681f3Smrg } 7967ec681f3Smrg else 7977ec681f3Smrg { 7987ec681f3Smrg // 4 groups for "m_numPkrLog2 < 2" case 7997ec681f3Smrg index += 4 * MaxNumOfBpp; 8007ec681f3Smrg 8017ec681f3Smrg const UINT_32 dccPipePerPkr = 3; 8027ec681f3Smrg 8037ec681f3Smrg index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp + 8047ec681f3Smrg (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp; 8057ec681f3Smrg } 8067ec681f3Smrg } 8077ec681f3Smrg } 8087ec681f3Smrg else 8097ec681f3Smrg { 8107ec681f3Smrg patIdxTable = GFX10_DCC_64K_R_X_PATIDX; 8117ec681f3Smrg 8127ec681f3Smrg if (pIn->dccKeyFlags.pipeAligned) 8137ec681f3Smrg { 8147ec681f3Smrg index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp; 8157ec681f3Smrg } 8167ec681f3Smrg else 8177ec681f3Smrg { 8187ec681f3Smrg index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp; 8197ec681f3Smrg } 8207ec681f3Smrg } 8217ec681f3Smrg 8227ec681f3Smrg const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8; 8237ec681f3Smrg const UINT_32 blkMask = (1 << blkSizeLog2) - 1; 8247ec681f3Smrg const UINT_32 blkOffset = 8257ec681f3Smrg ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]], 8267ec681f3Smrg blkSizeLog2 + 1, // +1 for nibble offset 8277ec681f3Smrg pIn->x, 8287ec681f3Smrg pIn->y, 8297ec681f3Smrg pIn->slice, 8307ec681f3Smrg 0); 8317ec681f3Smrg const UINT_32 xb = pIn->x / pIn->metaBlkWidth; 8327ec681f3Smrg const UINT_32 yb = pIn->y / pIn->metaBlkHeight; 8337ec681f3Smrg const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth; 8347ec681f3Smrg const UINT_32 blkIndex = (yb * pb) + xb; 8357ec681f3Smrg const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask; 8367ec681f3Smrg 8377ec681f3Smrg pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) + 8387ec681f3Smrg (blkIndex * (1 << blkSizeLog2)) + 8397ec681f3Smrg ((blkOffset >> 1) ^ pipeXor); 8407ec681f3Smrg} 8417ec681f3Smrg 8427ec681f3Smrg/** 8437ec681f3Smrg************************************************************************************************************************ 8447ec681f3Smrg* Gfx10Lib::HwlInitGlobalParams 8457ec681f3Smrg* 8467ec681f3Smrg* @brief 8477ec681f3Smrg* Initializes global parameters 8487ec681f3Smrg* 8497ec681f3Smrg* @return 8507ec681f3Smrg* TRUE if all settings are valid 8517ec681f3Smrg* 8527ec681f3Smrg************************************************************************************************************************ 8537ec681f3Smrg*/ 8547ec681f3SmrgBOOL_32 Gfx10Lib::HwlInitGlobalParams( 8557ec681f3Smrg const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input 8567ec681f3Smrg{ 8577ec681f3Smrg BOOL_32 valid = TRUE; 8587ec681f3Smrg GB_ADDR_CONFIG_GFX10 gbAddrConfig; 8597ec681f3Smrg 8607ec681f3Smrg gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig; 8617ec681f3Smrg 8627ec681f3Smrg // These values are copied from CModel code 8637ec681f3Smrg switch (gbAddrConfig.bits.NUM_PIPES) 8647ec681f3Smrg { 8657ec681f3Smrg case ADDR_CONFIG_1_PIPE: 8667ec681f3Smrg m_pipes = 1; 8677ec681f3Smrg m_pipesLog2 = 0; 8687ec681f3Smrg break; 8697ec681f3Smrg case ADDR_CONFIG_2_PIPE: 8707ec681f3Smrg m_pipes = 2; 8717ec681f3Smrg m_pipesLog2 = 1; 8727ec681f3Smrg break; 8737ec681f3Smrg case ADDR_CONFIG_4_PIPE: 8747ec681f3Smrg m_pipes = 4; 8757ec681f3Smrg m_pipesLog2 = 2; 8767ec681f3Smrg break; 8777ec681f3Smrg case ADDR_CONFIG_8_PIPE: 8787ec681f3Smrg m_pipes = 8; 8797ec681f3Smrg m_pipesLog2 = 3; 8807ec681f3Smrg break; 8817ec681f3Smrg case ADDR_CONFIG_16_PIPE: 8827ec681f3Smrg m_pipes = 16; 8837ec681f3Smrg m_pipesLog2 = 4; 8847ec681f3Smrg break; 8857ec681f3Smrg case ADDR_CONFIG_32_PIPE: 8867ec681f3Smrg m_pipes = 32; 8877ec681f3Smrg m_pipesLog2 = 5; 8887ec681f3Smrg break; 8897ec681f3Smrg case ADDR_CONFIG_64_PIPE: 8907ec681f3Smrg m_pipes = 64; 8917ec681f3Smrg m_pipesLog2 = 6; 8927ec681f3Smrg break; 8937ec681f3Smrg default: 8947ec681f3Smrg ADDR_ASSERT_ALWAYS(); 8957ec681f3Smrg valid = FALSE; 8967ec681f3Smrg break; 8977ec681f3Smrg } 8987ec681f3Smrg 8997ec681f3Smrg switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE) 9007ec681f3Smrg { 9017ec681f3Smrg case ADDR_CONFIG_PIPE_INTERLEAVE_256B: 9027ec681f3Smrg m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B; 9037ec681f3Smrg m_pipeInterleaveLog2 = 8; 9047ec681f3Smrg break; 9057ec681f3Smrg case ADDR_CONFIG_PIPE_INTERLEAVE_512B: 9067ec681f3Smrg m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B; 9077ec681f3Smrg m_pipeInterleaveLog2 = 9; 9087ec681f3Smrg break; 9097ec681f3Smrg case ADDR_CONFIG_PIPE_INTERLEAVE_1KB: 9107ec681f3Smrg m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB; 9117ec681f3Smrg m_pipeInterleaveLog2 = 10; 9127ec681f3Smrg break; 9137ec681f3Smrg case ADDR_CONFIG_PIPE_INTERLEAVE_2KB: 9147ec681f3Smrg m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB; 9157ec681f3Smrg m_pipeInterleaveLog2 = 11; 9167ec681f3Smrg break; 9177ec681f3Smrg default: 9187ec681f3Smrg ADDR_ASSERT_ALWAYS(); 9197ec681f3Smrg valid = FALSE; 9207ec681f3Smrg break; 9217ec681f3Smrg } 9227ec681f3Smrg 9237ec681f3Smrg // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and 9247ec681f3Smrg // any larger value requires a post-process (left shift) on the output pipeBankXor bits. 9257ec681f3Smrg // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case. 9267ec681f3Smrg ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B); 9277ec681f3Smrg 9287ec681f3Smrg switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS) 9297ec681f3Smrg { 9307ec681f3Smrg case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS: 9317ec681f3Smrg m_maxCompFrag = 1; 9327ec681f3Smrg m_maxCompFragLog2 = 0; 9337ec681f3Smrg break; 9347ec681f3Smrg case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS: 9357ec681f3Smrg m_maxCompFrag = 2; 9367ec681f3Smrg m_maxCompFragLog2 = 1; 9377ec681f3Smrg break; 9387ec681f3Smrg case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS: 9397ec681f3Smrg m_maxCompFrag = 4; 9407ec681f3Smrg m_maxCompFragLog2 = 2; 9417ec681f3Smrg break; 9427ec681f3Smrg case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS: 9437ec681f3Smrg m_maxCompFrag = 8; 9447ec681f3Smrg m_maxCompFragLog2 = 3; 9457ec681f3Smrg break; 9467ec681f3Smrg default: 9477ec681f3Smrg ADDR_ASSERT_ALWAYS(); 9487ec681f3Smrg valid = FALSE; 9497ec681f3Smrg break; 9507ec681f3Smrg } 9517ec681f3Smrg 9527ec681f3Smrg { 9537ec681f3Smrg // Skip unaligned case 9547ec681f3Smrg m_xmaskBaseIndex += MaxNumOfAA; 9557ec681f3Smrg 9567ec681f3Smrg m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA; 9577ec681f3Smrg m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp; 9587ec681f3Smrg 9597ec681f3Smrg if (m_settings.supportRbPlus) 9607ec681f3Smrg { 9617ec681f3Smrg m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS; 9627ec681f3Smrg m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0; 9637ec681f3Smrg 9647ec681f3Smrg ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2)); 9657ec681f3Smrg 9667ec681f3Smrg ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) == 9677ec681f3Smrg sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0])); 9687ec681f3Smrg 9697ec681f3Smrg if (m_numPkrLog2 >= 2) 9707ec681f3Smrg { 9717ec681f3Smrg m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp; 9727ec681f3Smrg m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA; 9737ec681f3Smrg } 9747ec681f3Smrg } 9757ec681f3Smrg else 9767ec681f3Smrg { 9777ec681f3Smrg const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) - 9787ec681f3Smrg static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) + 9797ec681f3Smrg 1; 9807ec681f3Smrg 9817ec681f3Smrg ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA); 9827ec681f3Smrg 9837ec681f3Smrg ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == 9847ec681f3Smrg sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0])); 9857ec681f3Smrg } 9867ec681f3Smrg } 9877ec681f3Smrg 9887ec681f3Smrg if (m_settings.supportRbPlus) 9897ec681f3Smrg { 9907ec681f3Smrg // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the 9917ec681f3Smrg // corresponding SW_64KB_* mode 9927ec681f3Smrg m_blockVarSizeLog2 = m_pipesLog2 + 14; 9937ec681f3Smrg } 9947ec681f3Smrg 9957ec681f3Smrg 9967ec681f3Smrg if (valid) 9977ec681f3Smrg { 9987ec681f3Smrg InitEquationTable(); 9997ec681f3Smrg } 10007ec681f3Smrg 10017ec681f3Smrg return valid; 10027ec681f3Smrg} 10037ec681f3Smrg 10047ec681f3Smrg/** 10057ec681f3Smrg************************************************************************************************************************ 10067ec681f3Smrg* Gfx10Lib::HwlConvertChipFamily 10077ec681f3Smrg* 10087ec681f3Smrg* @brief 10097ec681f3Smrg* Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision 10107ec681f3Smrg* @return 10117ec681f3Smrg* ChipFamily 10127ec681f3Smrg************************************************************************************************************************ 10137ec681f3Smrg*/ 10147ec681f3SmrgChipFamily Gfx10Lib::HwlConvertChipFamily( 10157ec681f3Smrg UINT_32 chipFamily, ///< [in] chip family defined in atiih.h 10167ec681f3Smrg UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h 10177ec681f3Smrg{ 10187ec681f3Smrg ChipFamily family = ADDR_CHIP_FAMILY_NAVI; 10197ec681f3Smrg 10207ec681f3Smrg m_settings.dccUnsup3DSwDis = 1; 10217ec681f3Smrg m_settings.dsMipmapHtileFix = 1; 10227ec681f3Smrg 10237ec681f3Smrg switch (chipFamily) 10247ec681f3Smrg { 10257ec681f3Smrg case FAMILY_NV: 10267ec681f3Smrg if (ASICREV_IS_NAVI10_P(chipRevision)) 10277ec681f3Smrg { 10287ec681f3Smrg m_settings.dsMipmapHtileFix = 0; 10297ec681f3Smrg m_settings.isDcn20 = 1; 10307ec681f3Smrg } 10317ec681f3Smrg 10327ec681f3Smrg if (ASICREV_IS_NAVI12_P(chipRevision)) 10337ec681f3Smrg { 10347ec681f3Smrg m_settings.isDcn20 = 1; 10357ec681f3Smrg } 10367ec681f3Smrg 10377ec681f3Smrg if (ASICREV_IS_NAVI14_M(chipRevision)) 10387ec681f3Smrg { 10397ec681f3Smrg m_settings.isDcn20 = 1; 10407ec681f3Smrg } 10417ec681f3Smrg 10427ec681f3Smrg if (ASICREV_IS_SIENNA_CICHLID(chipRevision)) 10437ec681f3Smrg { 10447ec681f3Smrg m_settings.supportRbPlus = 1; 10457ec681f3Smrg m_settings.dccUnsup3DSwDis = 0; 10467ec681f3Smrg } 10477ec681f3Smrg 10487ec681f3Smrg if (ASICREV_IS_NAVY_FLOUNDER(chipRevision)) 10497ec681f3Smrg { 10507ec681f3Smrg m_settings.supportRbPlus = 1; 10517ec681f3Smrg m_settings.dccUnsup3DSwDis = 0; 10527ec681f3Smrg } 10537ec681f3Smrg 10547ec681f3Smrg if (ASICREV_IS_DIMGREY_CAVEFISH(chipRevision)) 10557ec681f3Smrg { 10567ec681f3Smrg m_settings.supportRbPlus = 1; 10577ec681f3Smrg m_settings.dccUnsup3DSwDis = 0; 10587ec681f3Smrg } 10597ec681f3Smrg 10607ec681f3Smrg if (ASICREV_IS_BEIGE_GOBY(chipRevision)) 10617ec681f3Smrg { 10627ec681f3Smrg m_settings.supportRbPlus = 1; 10637ec681f3Smrg m_settings.dccUnsup3DSwDis = 0; 10647ec681f3Smrg } 10657ec681f3Smrg break; 10667ec681f3Smrg 10677ec681f3Smrg case FAMILY_VGH: 10687ec681f3Smrg if (ASICREV_IS_VANGOGH(chipRevision)) 10697ec681f3Smrg { 10707ec681f3Smrg m_settings.supportRbPlus = 1; 10717ec681f3Smrg m_settings.dccUnsup3DSwDis = 0; 10727ec681f3Smrg } 10737ec681f3Smrg else 10747ec681f3Smrg { 10757ec681f3Smrg ADDR_ASSERT(!"Unknown chip revision"); 10767ec681f3Smrg } 10777ec681f3Smrg 10787ec681f3Smrg break; 10797ec681f3Smrg 10807ec681f3Smrg case FAMILY_YC: 10817ec681f3Smrg if (ASICREV_IS_YELLOW_CARP(chipRevision)) 10827ec681f3Smrg { 10837ec681f3Smrg m_settings.supportRbPlus = 1; 10847ec681f3Smrg m_settings.dccUnsup3DSwDis = 0; 10857ec681f3Smrg } 10867ec681f3Smrg else 10877ec681f3Smrg { 10887ec681f3Smrg ADDR_ASSERT(!"Unknown chip revision"); 10897ec681f3Smrg } 10907ec681f3Smrg 10917ec681f3Smrg break; 10927ec681f3Smrg 10937ec681f3Smrg default: 10947ec681f3Smrg ADDR_ASSERT(!"Unknown chip family"); 10957ec681f3Smrg break; 10967ec681f3Smrg } 10977ec681f3Smrg 10987ec681f3Smrg m_configFlags.use32bppFor422Fmt = TRUE; 10997ec681f3Smrg 11007ec681f3Smrg return family; 11017ec681f3Smrg} 11027ec681f3Smrg 11037ec681f3Smrg/** 11047ec681f3Smrg************************************************************************************************************************ 11057ec681f3Smrg* Gfx10Lib::GetBlk256SizeLog2 11067ec681f3Smrg* 11077ec681f3Smrg* @brief 11087ec681f3Smrg* Get block 256 size 11097ec681f3Smrg* 11107ec681f3Smrg* @return 11117ec681f3Smrg* N/A 11127ec681f3Smrg************************************************************************************************************************ 11137ec681f3Smrg*/ 11147ec681f3Smrgvoid Gfx10Lib::GetBlk256SizeLog2( 11157ec681f3Smrg AddrResourceType resourceType, ///< [in] Resource type 11167ec681f3Smrg AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode 11177ec681f3Smrg UINT_32 elemLog2, ///< [in] element size log2 11187ec681f3Smrg UINT_32 numSamplesLog2, ///< [in] number of samples 11197ec681f3Smrg Dim3d* pBlock ///< [out] block size 11207ec681f3Smrg ) const 11217ec681f3Smrg{ 11227ec681f3Smrg if (IsThin(resourceType, swizzleMode)) 11237ec681f3Smrg { 11247ec681f3Smrg UINT_32 blockBits = 8 - elemLog2; 11257ec681f3Smrg 11267ec681f3Smrg if (IsZOrderSwizzle(swizzleMode)) 11277ec681f3Smrg { 11287ec681f3Smrg blockBits -= numSamplesLog2; 11297ec681f3Smrg } 11307ec681f3Smrg 11317ec681f3Smrg pBlock->w = (blockBits >> 1) + (blockBits & 1); 11327ec681f3Smrg pBlock->h = (blockBits >> 1); 11337ec681f3Smrg pBlock->d = 0; 11347ec681f3Smrg } 11357ec681f3Smrg else 11367ec681f3Smrg { 11377ec681f3Smrg ADDR_ASSERT(IsThick(resourceType, swizzleMode)); 11387ec681f3Smrg 11397ec681f3Smrg UINT_32 blockBits = 8 - elemLog2; 11407ec681f3Smrg 11417ec681f3Smrg pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0); 11427ec681f3Smrg pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0); 11437ec681f3Smrg pBlock->h = (blockBits / 3); 11447ec681f3Smrg } 11457ec681f3Smrg} 11467ec681f3Smrg 11477ec681f3Smrg/** 11487ec681f3Smrg************************************************************************************************************************ 11497ec681f3Smrg* Gfx10Lib::GetCompressedBlockSizeLog2 11507ec681f3Smrg* 11517ec681f3Smrg* @brief 11527ec681f3Smrg* Get compress block size 11537ec681f3Smrg* 11547ec681f3Smrg* @return 11557ec681f3Smrg* N/A 11567ec681f3Smrg************************************************************************************************************************ 11577ec681f3Smrg*/ 11587ec681f3Smrgvoid Gfx10Lib::GetCompressedBlockSizeLog2( 11597ec681f3Smrg Gfx10DataType dataType, ///< [in] Data type 11607ec681f3Smrg AddrResourceType resourceType, ///< [in] Resource type 11617ec681f3Smrg AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode 11627ec681f3Smrg UINT_32 elemLog2, ///< [in] element size log2 11637ec681f3Smrg UINT_32 numSamplesLog2, ///< [in] number of samples 11647ec681f3Smrg Dim3d* pBlock ///< [out] block size 11657ec681f3Smrg ) const 11667ec681f3Smrg{ 11677ec681f3Smrg if (dataType == Gfx10DataColor) 11687ec681f3Smrg { 11697ec681f3Smrg GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock); 11707ec681f3Smrg } 11717ec681f3Smrg else 11727ec681f3Smrg { 11737ec681f3Smrg ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask)); 11747ec681f3Smrg pBlock->w = 3; 11757ec681f3Smrg pBlock->h = 3; 11767ec681f3Smrg pBlock->d = 0; 11777ec681f3Smrg } 11787ec681f3Smrg} 11797ec681f3Smrg 11807ec681f3Smrg/** 11817ec681f3Smrg************************************************************************************************************************ 11827ec681f3Smrg* Gfx10Lib::GetMetaOverlapLog2 11837ec681f3Smrg* 11847ec681f3Smrg* @brief 11857ec681f3Smrg* Get meta block overlap 11867ec681f3Smrg* 11877ec681f3Smrg* @return 11887ec681f3Smrg* N/A 11897ec681f3Smrg************************************************************************************************************************ 11907ec681f3Smrg*/ 11917ec681f3SmrgINT_32 Gfx10Lib::GetMetaOverlapLog2( 11927ec681f3Smrg Gfx10DataType dataType, ///< [in] Data type 11937ec681f3Smrg AddrResourceType resourceType, ///< [in] Resource type 11947ec681f3Smrg AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode 11957ec681f3Smrg UINT_32 elemLog2, ///< [in] element size log2 11967ec681f3Smrg UINT_32 numSamplesLog2 ///< [in] number of samples 11977ec681f3Smrg ) const 11987ec681f3Smrg{ 11997ec681f3Smrg Dim3d compBlock; 12007ec681f3Smrg Dim3d microBlock; 12017ec681f3Smrg 12027ec681f3Smrg GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock); 12037ec681f3Smrg GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, µBlock); 12047ec681f3Smrg 12057ec681f3Smrg const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d; 12067ec681f3Smrg const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d; 12077ec681f3Smrg const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2); 12087ec681f3Smrg const INT_32 numPipesLog2 = GetEffectiveNumPipes(); 12097ec681f3Smrg INT_32 overlap = numPipesLog2 - maxSizeLog2; 12107ec681f3Smrg 12117ec681f3Smrg if ((numPipesLog2 > 1) && m_settings.supportRbPlus) 12127ec681f3Smrg { 12137ec681f3Smrg overlap++; 12147ec681f3Smrg } 12157ec681f3Smrg 12167ec681f3Smrg // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4) 12177ec681f3Smrg if ((elemLog2 == 4) && (numSamplesLog2 == 3)) 12187ec681f3Smrg { 12197ec681f3Smrg overlap--; 12207ec681f3Smrg } 12217ec681f3Smrg overlap = Max(overlap, 0); 12227ec681f3Smrg return overlap; 12237ec681f3Smrg} 12247ec681f3Smrg 12257ec681f3Smrg/** 12267ec681f3Smrg************************************************************************************************************************ 12277ec681f3Smrg* Gfx10Lib::Get3DMetaOverlapLog2 12287ec681f3Smrg* 12297ec681f3Smrg* @brief 12307ec681f3Smrg* Get 3d meta block overlap 12317ec681f3Smrg* 12327ec681f3Smrg* @return 12337ec681f3Smrg* N/A 12347ec681f3Smrg************************************************************************************************************************ 12357ec681f3Smrg*/ 12367ec681f3SmrgINT_32 Gfx10Lib::Get3DMetaOverlapLog2( 12377ec681f3Smrg AddrResourceType resourceType, ///< [in] Resource type 12387ec681f3Smrg AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode 12397ec681f3Smrg UINT_32 elemLog2 ///< [in] element size log2 12407ec681f3Smrg ) const 12417ec681f3Smrg{ 12427ec681f3Smrg Dim3d microBlock; 12437ec681f3Smrg GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, µBlock); 12447ec681f3Smrg 12457ec681f3Smrg INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w); 12467ec681f3Smrg 12477ec681f3Smrg if (m_settings.supportRbPlus) 12487ec681f3Smrg { 12497ec681f3Smrg overlap++; 12507ec681f3Smrg } 12517ec681f3Smrg 12527ec681f3Smrg if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE)) 12537ec681f3Smrg { 12547ec681f3Smrg overlap = 0; 12557ec681f3Smrg } 12567ec681f3Smrg return overlap; 12577ec681f3Smrg} 12587ec681f3Smrg 12597ec681f3Smrg/** 12607ec681f3Smrg************************************************************************************************************************ 12617ec681f3Smrg* Gfx10Lib::GetPipeRotateAmount 12627ec681f3Smrg* 12637ec681f3Smrg* @brief 12647ec681f3Smrg* Get pipe rotate amount 12657ec681f3Smrg* 12667ec681f3Smrg* @return 12677ec681f3Smrg* Pipe rotate amount 12687ec681f3Smrg************************************************************************************************************************ 12697ec681f3Smrg*/ 12707ec681f3Smrg 12717ec681f3SmrgINT_32 Gfx10Lib::GetPipeRotateAmount( 12727ec681f3Smrg AddrResourceType resourceType, ///< [in] Resource type 12737ec681f3Smrg AddrSwizzleMode swizzleMode ///< [in] Swizzle mode 12747ec681f3Smrg ) const 12757ec681f3Smrg{ 12767ec681f3Smrg INT_32 amount = 0; 12777ec681f3Smrg 12787ec681f3Smrg if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1)) 12797ec681f3Smrg { 12807ec681f3Smrg amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ? 12817ec681f3Smrg 1 : m_pipesLog2 - (m_numSaLog2 + 1); 12827ec681f3Smrg } 12837ec681f3Smrg 12847ec681f3Smrg return amount; 12857ec681f3Smrg} 12867ec681f3Smrg 12877ec681f3Smrg/** 12887ec681f3Smrg************************************************************************************************************************ 12897ec681f3Smrg* Gfx10Lib::GetMetaBlkSize 12907ec681f3Smrg* 12917ec681f3Smrg* @brief 12927ec681f3Smrg* Get metadata block size 12937ec681f3Smrg* 12947ec681f3Smrg* @return 12957ec681f3Smrg* Meta block size 12967ec681f3Smrg************************************************************************************************************************ 12977ec681f3Smrg*/ 12987ec681f3SmrgUINT_32 Gfx10Lib::GetMetaBlkSize( 12997ec681f3Smrg Gfx10DataType dataType, ///< [in] Data type 13007ec681f3Smrg AddrResourceType resourceType, ///< [in] Resource type 13017ec681f3Smrg AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode 13027ec681f3Smrg UINT_32 elemLog2, ///< [in] element size log2 13037ec681f3Smrg UINT_32 numSamplesLog2, ///< [in] number of samples 13047ec681f3Smrg BOOL_32 pipeAlign, ///< [in] pipe align 13057ec681f3Smrg Dim3d* pBlock ///< [out] block size 13067ec681f3Smrg ) const 13077ec681f3Smrg{ 13087ec681f3Smrg INT_32 metablkSizeLog2; 13097ec681f3Smrg 13107ec681f3Smrg { 13117ec681f3Smrg const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType); 13127ec681f3Smrg const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType); 13137ec681f3Smrg const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2; 13147ec681f3Smrg const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ? 13157ec681f3Smrg numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2); 13167ec681f3Smrg const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode); 13177ec681f3Smrg INT_32 numPipesLog2 = m_pipesLog2; 13187ec681f3Smrg 13197ec681f3Smrg if (IsThin(resourceType, swizzleMode)) 13207ec681f3Smrg { 13217ec681f3Smrg if ((pipeAlign == FALSE) || 13227ec681f3Smrg (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) || 13237ec681f3Smrg (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE)) 13247ec681f3Smrg { 13257ec681f3Smrg if (pipeAlign) 13267ec681f3Smrg { 13277ec681f3Smrg metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12); 13287ec681f3Smrg metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2); 13297ec681f3Smrg } 13307ec681f3Smrg else 13317ec681f3Smrg { 13327ec681f3Smrg metablkSizeLog2 = Min(dataBlkSizeLog2, 12); 13337ec681f3Smrg } 13347ec681f3Smrg } 13357ec681f3Smrg else 13367ec681f3Smrg { 13377ec681f3Smrg if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1)) 13387ec681f3Smrg { 13397ec681f3Smrg numPipesLog2++; 13407ec681f3Smrg } 13417ec681f3Smrg 13427ec681f3Smrg INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode); 13437ec681f3Smrg 13447ec681f3Smrg if (numPipesLog2 >= 4) 13457ec681f3Smrg { 13467ec681f3Smrg INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2); 13477ec681f3Smrg 13487ec681f3Smrg // In 16Bpe 8xaa, we have an extra overlap bit 13497ec681f3Smrg if ((pipeRotateLog2 > 0) && 13507ec681f3Smrg (elemLog2 == 4) && 13517ec681f3Smrg (numSamplesLog2 == 3) && 13527ec681f3Smrg (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3))) 13537ec681f3Smrg { 13547ec681f3Smrg overlapLog2++; 13557ec681f3Smrg } 13567ec681f3Smrg 13577ec681f3Smrg metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2; 13587ec681f3Smrg metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2); 13597ec681f3Smrg 13607ec681f3Smrg if (m_settings.supportRbPlus && 13617ec681f3Smrg IsRtOptSwizzle(swizzleMode) && 13627ec681f3Smrg (numPipesLog2 == 6) && 13637ec681f3Smrg (numSamplesLog2 == 3) && 13647ec681f3Smrg (m_maxCompFragLog2 == 3) && 13657ec681f3Smrg (metablkSizeLog2 < 15)) 13667ec681f3Smrg { 13677ec681f3Smrg metablkSizeLog2 = 15; 13687ec681f3Smrg } 13697ec681f3Smrg } 13707ec681f3Smrg else 13717ec681f3Smrg { 13727ec681f3Smrg metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12); 13737ec681f3Smrg } 13747ec681f3Smrg 13757ec681f3Smrg if (dataType == Gfx10DataDepthStencil) 13767ec681f3Smrg { 13777ec681f3Smrg // For htile surfaces, pad meta block size to 2K * num_pipes 13787ec681f3Smrg metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2); 13797ec681f3Smrg } 13807ec681f3Smrg 13817ec681f3Smrg const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2); 13827ec681f3Smrg 13837ec681f3Smrg if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1)) 13847ec681f3Smrg { 13857ec681f3Smrg const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1); 13867ec681f3Smrg 13877ec681f3Smrg metablkSizeLog2 = Max(metablkSizeLog2, tmp); 13887ec681f3Smrg } 13897ec681f3Smrg } 13907ec681f3Smrg 13917ec681f3Smrg const INT_32 metablkBitsLog2 = 13927ec681f3Smrg metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2; 13937ec681f3Smrg pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1)); 13947ec681f3Smrg pBlock->h = 1 << (metablkBitsLog2 >> 1); 13957ec681f3Smrg pBlock->d = 1; 13967ec681f3Smrg } 13977ec681f3Smrg else 13987ec681f3Smrg { 13997ec681f3Smrg ADDR_ASSERT(IsThick(resourceType, swizzleMode)); 14007ec681f3Smrg 14017ec681f3Smrg if (pipeAlign) 14027ec681f3Smrg { 14037ec681f3Smrg if (m_settings.supportRbPlus && 14047ec681f3Smrg (m_pipesLog2 == m_numSaLog2 + 1) && 14057ec681f3Smrg (m_pipesLog2 > 1) && 14067ec681f3Smrg IsRbAligned(resourceType, swizzleMode)) 14077ec681f3Smrg { 14087ec681f3Smrg numPipesLog2++; 14097ec681f3Smrg } 14107ec681f3Smrg 14117ec681f3Smrg const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2); 14127ec681f3Smrg 14137ec681f3Smrg metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2; 14147ec681f3Smrg metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2); 14157ec681f3Smrg metablkSizeLog2 = Max(metablkSizeLog2, 12); 14167ec681f3Smrg } 14177ec681f3Smrg else 14187ec681f3Smrg { 14197ec681f3Smrg metablkSizeLog2 = 12; 14207ec681f3Smrg } 14217ec681f3Smrg 14227ec681f3Smrg const INT_32 metablkBitsLog2 = 14237ec681f3Smrg metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2; 14247ec681f3Smrg pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0)); 14257ec681f3Smrg pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0)); 14267ec681f3Smrg pBlock->d = 1 << (metablkBitsLog2 / 3); 14277ec681f3Smrg } 14287ec681f3Smrg } 14297ec681f3Smrg 14307ec681f3Smrg return (1 << static_cast<UINT_32>(metablkSizeLog2)); 14317ec681f3Smrg} 14327ec681f3Smrg 14337ec681f3Smrg/** 14347ec681f3Smrg************************************************************************************************************************ 14357ec681f3Smrg* Gfx10Lib::ConvertSwizzlePatternToEquation 14367ec681f3Smrg* 14377ec681f3Smrg* @brief 14387ec681f3Smrg* Convert swizzle pattern to equation. 14397ec681f3Smrg* 14407ec681f3Smrg* @return 14417ec681f3Smrg* N/A 14427ec681f3Smrg************************************************************************************************************************ 14437ec681f3Smrg*/ 14447ec681f3SmrgVOID Gfx10Lib::ConvertSwizzlePatternToEquation( 14457ec681f3Smrg UINT_32 elemLog2, ///< [in] element bytes log2 14467ec681f3Smrg AddrResourceType rsrcType, ///< [in] resource type 14477ec681f3Smrg AddrSwizzleMode swMode, ///< [in] swizzle mode 14487ec681f3Smrg const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor 14497ec681f3Smrg ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern 14507ec681f3Smrg const 14517ec681f3Smrg{ 14527ec681f3Smrg ADDR_BIT_SETTING fullSwizzlePattern[20]; 14537ec681f3Smrg GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern); 14547ec681f3Smrg 14557ec681f3Smrg const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern; 14567ec681f3Smrg const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode); 14577ec681f3Smrg 14587ec681f3Smrg pEquation->numBits = blockSizeLog2; 14597ec681f3Smrg pEquation->stackedDepthSlices = FALSE; 14607ec681f3Smrg 14617ec681f3Smrg for (UINT_32 i = 0; i < elemLog2; i++) 14627ec681f3Smrg { 14637ec681f3Smrg pEquation->addr[i].channel = 0; 14647ec681f3Smrg pEquation->addr[i].valid = 1; 14657ec681f3Smrg pEquation->addr[i].index = i; 14667ec681f3Smrg } 14677ec681f3Smrg 14687ec681f3Smrg if (IsXor(swMode) == FALSE) 14697ec681f3Smrg { 14707ec681f3Smrg for (UINT_32 i = elemLog2; i < blockSizeLog2; i++) 14717ec681f3Smrg { 14727ec681f3Smrg ADDR_ASSERT(IsPow2(pSwizzle[i].value)); 14737ec681f3Smrg 14747ec681f3Smrg if (pSwizzle[i].x != 0) 14757ec681f3Smrg { 14767ec681f3Smrg ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x))); 14777ec681f3Smrg 14787ec681f3Smrg pEquation->addr[i].channel = 0; 14797ec681f3Smrg pEquation->addr[i].valid = 1; 14807ec681f3Smrg pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2; 14817ec681f3Smrg } 14827ec681f3Smrg else if (pSwizzle[i].y != 0) 14837ec681f3Smrg { 14847ec681f3Smrg ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y))); 14857ec681f3Smrg 14867ec681f3Smrg pEquation->addr[i].channel = 1; 14877ec681f3Smrg pEquation->addr[i].valid = 1; 14887ec681f3Smrg pEquation->addr[i].index = Log2(pSwizzle[i].y); 14897ec681f3Smrg } 14907ec681f3Smrg else 14917ec681f3Smrg { 14927ec681f3Smrg ADDR_ASSERT(pSwizzle[i].z != 0); 14937ec681f3Smrg ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z))); 14947ec681f3Smrg 14957ec681f3Smrg pEquation->addr[i].channel = 2; 14967ec681f3Smrg pEquation->addr[i].valid = 1; 14977ec681f3Smrg pEquation->addr[i].index = Log2(pSwizzle[i].z); 14987ec681f3Smrg } 14997ec681f3Smrg 15007ec681f3Smrg pEquation->xor1[i].value = 0; 15017ec681f3Smrg pEquation->xor2[i].value = 0; 15027ec681f3Smrg } 15037ec681f3Smrg } 15047ec681f3Smrg else if (IsThin(rsrcType, swMode)) 15057ec681f3Smrg { 15067ec681f3Smrg Dim3d dim; 15077ec681f3Smrg ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode); 15087ec681f3Smrg 15097ec681f3Smrg const UINT_32 blkXLog2 = Log2(dim.w); 15107ec681f3Smrg const UINT_32 blkYLog2 = Log2(dim.h); 15117ec681f3Smrg const UINT_32 blkXMask = dim.w - 1; 15127ec681f3Smrg const UINT_32 blkYMask = dim.h - 1; 15137ec681f3Smrg 15147ec681f3Smrg ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {}; 15157ec681f3Smrg UINT_32 xMask = 0; 15167ec681f3Smrg UINT_32 yMask = 0; 15177ec681f3Smrg UINT_32 bMask = (1 << elemLog2) - 1; 15187ec681f3Smrg 15197ec681f3Smrg for (UINT_32 i = elemLog2; i < blockSizeLog2; i++) 15207ec681f3Smrg { 15217ec681f3Smrg if (IsPow2(pSwizzle[i].value)) 15227ec681f3Smrg { 15237ec681f3Smrg if (pSwizzle[i].x != 0) 15247ec681f3Smrg { 15257ec681f3Smrg ADDR_ASSERT((xMask & pSwizzle[i].x) == 0); 15267ec681f3Smrg xMask |= pSwizzle[i].x; 15277ec681f3Smrg 15287ec681f3Smrg const UINT_32 xLog2 = Log2(pSwizzle[i].x); 15297ec681f3Smrg 15307ec681f3Smrg ADDR_ASSERT(xLog2 < blkXLog2); 15317ec681f3Smrg 15327ec681f3Smrg pEquation->addr[i].channel = 0; 15337ec681f3Smrg pEquation->addr[i].valid = 1; 15347ec681f3Smrg pEquation->addr[i].index = xLog2 + elemLog2; 15357ec681f3Smrg } 15367ec681f3Smrg else 15377ec681f3Smrg { 15387ec681f3Smrg ADDR_ASSERT(pSwizzle[i].y != 0); 15397ec681f3Smrg ADDR_ASSERT((yMask & pSwizzle[i].y) == 0); 15407ec681f3Smrg yMask |= pSwizzle[i].y; 15417ec681f3Smrg 15427ec681f3Smrg pEquation->addr[i].channel = 1; 15437ec681f3Smrg pEquation->addr[i].valid = 1; 15447ec681f3Smrg pEquation->addr[i].index = Log2(pSwizzle[i].y); 15457ec681f3Smrg 15467ec681f3Smrg ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); 15477ec681f3Smrg } 15487ec681f3Smrg 15497ec681f3Smrg swizzle[i].value = 0; 15507ec681f3Smrg bMask |= 1 << i; 15517ec681f3Smrg } 15527ec681f3Smrg else 15537ec681f3Smrg { 15547ec681f3Smrg if (pSwizzle[i].z != 0) 15557ec681f3Smrg { 15567ec681f3Smrg ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z))); 15577ec681f3Smrg 15587ec681f3Smrg pEquation->xor2[i].channel = 2; 15597ec681f3Smrg pEquation->xor2[i].valid = 1; 15607ec681f3Smrg pEquation->xor2[i].index = Log2(pSwizzle[i].z); 15617ec681f3Smrg } 15627ec681f3Smrg 15637ec681f3Smrg swizzle[i].x = pSwizzle[i].x; 15647ec681f3Smrg swizzle[i].y = pSwizzle[i].y; 15657ec681f3Smrg swizzle[i].z = swizzle[i].s = 0; 15667ec681f3Smrg 15677ec681f3Smrg ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE); 15687ec681f3Smrg 15697ec681f3Smrg const UINT_32 xHi = swizzle[i].x & (~blkXMask); 15707ec681f3Smrg 15717ec681f3Smrg if (xHi != 0) 15727ec681f3Smrg { 15737ec681f3Smrg ADDR_ASSERT(IsPow2(xHi)); 15747ec681f3Smrg ADDR_ASSERT(pEquation->xor1[i].value == 0); 15757ec681f3Smrg 15767ec681f3Smrg pEquation->xor1[i].channel = 0; 15777ec681f3Smrg pEquation->xor1[i].valid = 1; 15787ec681f3Smrg pEquation->xor1[i].index = Log2(xHi) + elemLog2; 15797ec681f3Smrg 15807ec681f3Smrg swizzle[i].x &= blkXMask; 15817ec681f3Smrg } 15827ec681f3Smrg 15837ec681f3Smrg const UINT_32 yHi = swizzle[i].y & (~blkYMask); 15847ec681f3Smrg 15857ec681f3Smrg if (yHi != 0) 15867ec681f3Smrg { 15877ec681f3Smrg ADDR_ASSERT(IsPow2(yHi)); 15887ec681f3Smrg 15897ec681f3Smrg if (xHi == 0) 15907ec681f3Smrg { 15917ec681f3Smrg ADDR_ASSERT(pEquation->xor1[i].value == 0); 15927ec681f3Smrg pEquation->xor1[i].channel = 1; 15937ec681f3Smrg pEquation->xor1[i].valid = 1; 15947ec681f3Smrg pEquation->xor1[i].index = Log2(yHi); 15957ec681f3Smrg } 15967ec681f3Smrg else 15977ec681f3Smrg { 15987ec681f3Smrg ADDR_ASSERT(pEquation->xor2[i].value == 0); 15997ec681f3Smrg pEquation->xor2[i].channel = 1; 16007ec681f3Smrg pEquation->xor2[i].valid = 1; 16017ec681f3Smrg pEquation->xor2[i].index = Log2(yHi); 16027ec681f3Smrg } 16037ec681f3Smrg 16047ec681f3Smrg swizzle[i].y &= blkYMask; 16057ec681f3Smrg } 16067ec681f3Smrg 16077ec681f3Smrg if (swizzle[i].value == 0) 16087ec681f3Smrg { 16097ec681f3Smrg bMask |= 1 << i; 16107ec681f3Smrg } 16117ec681f3Smrg } 16127ec681f3Smrg } 16137ec681f3Smrg 16147ec681f3Smrg const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1; 16157ec681f3Smrg const UINT_32 blockMask = (1 << blockSizeLog2) - 1; 16167ec681f3Smrg 16177ec681f3Smrg ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask); 16187ec681f3Smrg 16197ec681f3Smrg while (bMask != blockMask) 16207ec681f3Smrg { 16217ec681f3Smrg for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++) 16227ec681f3Smrg { 16237ec681f3Smrg if ((bMask & (1 << i)) == 0) 16247ec681f3Smrg { 16257ec681f3Smrg if (IsPow2(swizzle[i].value)) 16267ec681f3Smrg { 16277ec681f3Smrg if (swizzle[i].x != 0) 16287ec681f3Smrg { 16297ec681f3Smrg ADDR_ASSERT((xMask & swizzle[i].x) == 0); 16307ec681f3Smrg xMask |= swizzle[i].x; 16317ec681f3Smrg 16327ec681f3Smrg const UINT_32 xLog2 = Log2(swizzle[i].x); 16337ec681f3Smrg 16347ec681f3Smrg ADDR_ASSERT(xLog2 < blkXLog2); 16357ec681f3Smrg 16367ec681f3Smrg pEquation->addr[i].channel = 0; 16377ec681f3Smrg pEquation->addr[i].valid = 1; 16387ec681f3Smrg pEquation->addr[i].index = xLog2 + elemLog2; 16397ec681f3Smrg } 16407ec681f3Smrg else 16417ec681f3Smrg { 16427ec681f3Smrg ADDR_ASSERT(swizzle[i].y != 0); 16437ec681f3Smrg ADDR_ASSERT((yMask & swizzle[i].y) == 0); 16447ec681f3Smrg yMask |= swizzle[i].y; 16457ec681f3Smrg 16467ec681f3Smrg pEquation->addr[i].channel = 1; 16477ec681f3Smrg pEquation->addr[i].valid = 1; 16487ec681f3Smrg pEquation->addr[i].index = Log2(swizzle[i].y); 16497ec681f3Smrg 16507ec681f3Smrg ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); 16517ec681f3Smrg } 16527ec681f3Smrg 16537ec681f3Smrg swizzle[i].value = 0; 16547ec681f3Smrg bMask |= 1 << i; 16557ec681f3Smrg } 16567ec681f3Smrg else 16577ec681f3Smrg { 16587ec681f3Smrg const UINT_32 x = swizzle[i].x & xMask; 16597ec681f3Smrg const UINT_32 y = swizzle[i].y & yMask; 16607ec681f3Smrg 16617ec681f3Smrg if (x != 0) 16627ec681f3Smrg { 16637ec681f3Smrg ADDR_ASSERT(IsPow2(x)); 16647ec681f3Smrg 16657ec681f3Smrg if (pEquation->xor1[i].value == 0) 16667ec681f3Smrg { 16677ec681f3Smrg pEquation->xor1[i].channel = 0; 16687ec681f3Smrg pEquation->xor1[i].valid = 1; 16697ec681f3Smrg pEquation->xor1[i].index = Log2(x) + elemLog2; 16707ec681f3Smrg } 16717ec681f3Smrg else 16727ec681f3Smrg { 16737ec681f3Smrg ADDR_ASSERT(pEquation->xor2[i].value == 0); 16747ec681f3Smrg pEquation->xor2[i].channel = 0; 16757ec681f3Smrg pEquation->xor2[i].valid = 1; 16767ec681f3Smrg pEquation->xor2[i].index = Log2(x) + elemLog2; 16777ec681f3Smrg } 16787ec681f3Smrg } 16797ec681f3Smrg 16807ec681f3Smrg if (y != 0) 16817ec681f3Smrg { 16827ec681f3Smrg ADDR_ASSERT(IsPow2(y)); 16837ec681f3Smrg 16847ec681f3Smrg if (pEquation->xor1[i].value == 0) 16857ec681f3Smrg { 16867ec681f3Smrg pEquation->xor1[i].channel = 1; 16877ec681f3Smrg pEquation->xor1[i].valid = 1; 16887ec681f3Smrg pEquation->xor1[i].index = Log2(y); 16897ec681f3Smrg } 16907ec681f3Smrg else 16917ec681f3Smrg { 16927ec681f3Smrg ADDR_ASSERT(pEquation->xor2[i].value == 0); 16937ec681f3Smrg pEquation->xor2[i].channel = 1; 16947ec681f3Smrg pEquation->xor2[i].valid = 1; 16957ec681f3Smrg pEquation->xor2[i].index = Log2(y); 16967ec681f3Smrg } 16977ec681f3Smrg } 16987ec681f3Smrg 16997ec681f3Smrg swizzle[i].x &= ~x; 17007ec681f3Smrg swizzle[i].y &= ~y; 17017ec681f3Smrg } 17027ec681f3Smrg } 17037ec681f3Smrg } 17047ec681f3Smrg } 17057ec681f3Smrg 17067ec681f3Smrg ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask)); 17077ec681f3Smrg } 17087ec681f3Smrg else 17097ec681f3Smrg { 17107ec681f3Smrg const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w; 17117ec681f3Smrg const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h; 17127ec681f3Smrg const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d; 17137ec681f3Smrg const UINT_32 blkXMask = (1 << blkXLog2) - 1; 17147ec681f3Smrg const UINT_32 blkYMask = (1 << blkYLog2) - 1; 17157ec681f3Smrg const UINT_32 blkZMask = (1 << blkZLog2) - 1; 17167ec681f3Smrg 17177ec681f3Smrg ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {}; 17187ec681f3Smrg UINT_32 xMask = 0; 17197ec681f3Smrg UINT_32 yMask = 0; 17207ec681f3Smrg UINT_32 zMask = 0; 17217ec681f3Smrg UINT_32 bMask = (1 << elemLog2) - 1; 17227ec681f3Smrg 17237ec681f3Smrg for (UINT_32 i = elemLog2; i < blockSizeLog2; i++) 17247ec681f3Smrg { 17257ec681f3Smrg if (IsPow2(pSwizzle[i].value)) 17267ec681f3Smrg { 17277ec681f3Smrg if (pSwizzle[i].x != 0) 17287ec681f3Smrg { 17297ec681f3Smrg ADDR_ASSERT((xMask & pSwizzle[i].x) == 0); 17307ec681f3Smrg xMask |= pSwizzle[i].x; 17317ec681f3Smrg 17327ec681f3Smrg const UINT_32 xLog2 = Log2(pSwizzle[i].x); 17337ec681f3Smrg 17347ec681f3Smrg ADDR_ASSERT(xLog2 < blkXLog2); 17357ec681f3Smrg 17367ec681f3Smrg pEquation->addr[i].channel = 0; 17377ec681f3Smrg pEquation->addr[i].valid = 1; 17387ec681f3Smrg pEquation->addr[i].index = xLog2 + elemLog2; 17397ec681f3Smrg } 17407ec681f3Smrg else if (pSwizzle[i].y != 0) 17417ec681f3Smrg { 17427ec681f3Smrg ADDR_ASSERT((yMask & pSwizzle[i].y) == 0); 17437ec681f3Smrg yMask |= pSwizzle[i].y; 17447ec681f3Smrg 17457ec681f3Smrg pEquation->addr[i].channel = 1; 17467ec681f3Smrg pEquation->addr[i].valid = 1; 17477ec681f3Smrg pEquation->addr[i].index = Log2(pSwizzle[i].y); 17487ec681f3Smrg 17497ec681f3Smrg ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); 17507ec681f3Smrg } 17517ec681f3Smrg else 17527ec681f3Smrg { 17537ec681f3Smrg ADDR_ASSERT(pSwizzle[i].z != 0); 17547ec681f3Smrg ADDR_ASSERT((zMask & pSwizzle[i].z) == 0); 17557ec681f3Smrg zMask |= pSwizzle[i].z; 17567ec681f3Smrg 17577ec681f3Smrg pEquation->addr[i].channel = 2; 17587ec681f3Smrg pEquation->addr[i].valid = 1; 17597ec681f3Smrg pEquation->addr[i].index = Log2(pSwizzle[i].z); 17607ec681f3Smrg 17617ec681f3Smrg ADDR_ASSERT(pEquation->addr[i].index < blkZLog2); 17627ec681f3Smrg } 17637ec681f3Smrg 17647ec681f3Smrg swizzle[i].value = 0; 17657ec681f3Smrg bMask |= 1 << i; 17667ec681f3Smrg } 17677ec681f3Smrg else 17687ec681f3Smrg { 17697ec681f3Smrg swizzle[i].x = pSwizzle[i].x; 17707ec681f3Smrg swizzle[i].y = pSwizzle[i].y; 17717ec681f3Smrg swizzle[i].z = pSwizzle[i].z; 17727ec681f3Smrg swizzle[i].s = 0; 17737ec681f3Smrg 17747ec681f3Smrg ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE); 17757ec681f3Smrg 17767ec681f3Smrg const UINT_32 xHi = swizzle[i].x & (~blkXMask); 17777ec681f3Smrg const UINT_32 yHi = swizzle[i].y & (~blkYMask); 17787ec681f3Smrg const UINT_32 zHi = swizzle[i].z & (~blkZMask); 17797ec681f3Smrg 17807ec681f3Smrg ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0)); 17817ec681f3Smrg 17827ec681f3Smrg if (xHi != 0) 17837ec681f3Smrg { 17847ec681f3Smrg ADDR_ASSERT(IsPow2(xHi)); 17857ec681f3Smrg ADDR_ASSERT(pEquation->xor1[i].value == 0); 17867ec681f3Smrg 17877ec681f3Smrg pEquation->xor1[i].channel = 0; 17887ec681f3Smrg pEquation->xor1[i].valid = 1; 17897ec681f3Smrg pEquation->xor1[i].index = Log2(xHi) + elemLog2; 17907ec681f3Smrg 17917ec681f3Smrg swizzle[i].x &= blkXMask; 17927ec681f3Smrg } 17937ec681f3Smrg 17947ec681f3Smrg if (yHi != 0) 17957ec681f3Smrg { 17967ec681f3Smrg ADDR_ASSERT(IsPow2(yHi)); 17977ec681f3Smrg 17987ec681f3Smrg if (pEquation->xor1[i].value == 0) 17997ec681f3Smrg { 18007ec681f3Smrg pEquation->xor1[i].channel = 1; 18017ec681f3Smrg pEquation->xor1[i].valid = 1; 18027ec681f3Smrg pEquation->xor1[i].index = Log2(yHi); 18037ec681f3Smrg } 18047ec681f3Smrg else 18057ec681f3Smrg { 18067ec681f3Smrg ADDR_ASSERT(pEquation->xor2[i].value == 0); 18077ec681f3Smrg pEquation->xor2[i].channel = 1; 18087ec681f3Smrg pEquation->xor2[i].valid = 1; 18097ec681f3Smrg pEquation->xor2[i].index = Log2(yHi); 18107ec681f3Smrg } 18117ec681f3Smrg 18127ec681f3Smrg swizzle[i].y &= blkYMask; 18137ec681f3Smrg } 18147ec681f3Smrg 18157ec681f3Smrg if (zHi != 0) 18167ec681f3Smrg { 18177ec681f3Smrg ADDR_ASSERT(IsPow2(zHi)); 18187ec681f3Smrg 18197ec681f3Smrg if (pEquation->xor1[i].value == 0) 18207ec681f3Smrg { 18217ec681f3Smrg pEquation->xor1[i].channel = 2; 18227ec681f3Smrg pEquation->xor1[i].valid = 1; 18237ec681f3Smrg pEquation->xor1[i].index = Log2(zHi); 18247ec681f3Smrg } 18257ec681f3Smrg else 18267ec681f3Smrg { 18277ec681f3Smrg ADDR_ASSERT(pEquation->xor2[i].value == 0); 18287ec681f3Smrg pEquation->xor2[i].channel = 2; 18297ec681f3Smrg pEquation->xor2[i].valid = 1; 18307ec681f3Smrg pEquation->xor2[i].index = Log2(zHi); 18317ec681f3Smrg } 18327ec681f3Smrg 18337ec681f3Smrg swizzle[i].z &= blkZMask; 18347ec681f3Smrg } 18357ec681f3Smrg 18367ec681f3Smrg if (swizzle[i].value == 0) 18377ec681f3Smrg { 18387ec681f3Smrg bMask |= 1 << i; 18397ec681f3Smrg } 18407ec681f3Smrg } 18417ec681f3Smrg } 18427ec681f3Smrg 18437ec681f3Smrg const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1; 18447ec681f3Smrg const UINT_32 blockMask = (1 << blockSizeLog2) - 1; 18457ec681f3Smrg 18467ec681f3Smrg ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask); 18477ec681f3Smrg 18487ec681f3Smrg while (bMask != blockMask) 18497ec681f3Smrg { 18507ec681f3Smrg for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++) 18517ec681f3Smrg { 18527ec681f3Smrg if ((bMask & (1 << i)) == 0) 18537ec681f3Smrg { 18547ec681f3Smrg if (IsPow2(swizzle[i].value)) 18557ec681f3Smrg { 18567ec681f3Smrg if (swizzle[i].x != 0) 18577ec681f3Smrg { 18587ec681f3Smrg ADDR_ASSERT((xMask & swizzle[i].x) == 0); 18597ec681f3Smrg xMask |= swizzle[i].x; 18607ec681f3Smrg 18617ec681f3Smrg const UINT_32 xLog2 = Log2(swizzle[i].x); 18627ec681f3Smrg 18637ec681f3Smrg ADDR_ASSERT(xLog2 < blkXLog2); 18647ec681f3Smrg 18657ec681f3Smrg pEquation->addr[i].channel = 0; 18667ec681f3Smrg pEquation->addr[i].valid = 1; 18677ec681f3Smrg pEquation->addr[i].index = xLog2 + elemLog2; 18687ec681f3Smrg } 18697ec681f3Smrg else if (swizzle[i].y != 0) 18707ec681f3Smrg { 18717ec681f3Smrg ADDR_ASSERT((yMask & swizzle[i].y) == 0); 18727ec681f3Smrg yMask |= swizzle[i].y; 18737ec681f3Smrg 18747ec681f3Smrg pEquation->addr[i].channel = 1; 18757ec681f3Smrg pEquation->addr[i].valid = 1; 18767ec681f3Smrg pEquation->addr[i].index = Log2(swizzle[i].y); 18777ec681f3Smrg 18787ec681f3Smrg ADDR_ASSERT(pEquation->addr[i].index < blkYLog2); 18797ec681f3Smrg } 18807ec681f3Smrg else 18817ec681f3Smrg { 18827ec681f3Smrg ADDR_ASSERT(swizzle[i].z != 0); 18837ec681f3Smrg ADDR_ASSERT((zMask & swizzle[i].z) == 0); 18847ec681f3Smrg zMask |= swizzle[i].z; 18857ec681f3Smrg 18867ec681f3Smrg pEquation->addr[i].channel = 2; 18877ec681f3Smrg pEquation->addr[i].valid = 1; 18887ec681f3Smrg pEquation->addr[i].index = Log2(swizzle[i].z); 18897ec681f3Smrg 18907ec681f3Smrg ADDR_ASSERT(pEquation->addr[i].index < blkZLog2); 18917ec681f3Smrg } 18927ec681f3Smrg 18937ec681f3Smrg swizzle[i].value = 0; 18947ec681f3Smrg bMask |= 1 << i; 18957ec681f3Smrg } 18967ec681f3Smrg else 18977ec681f3Smrg { 18987ec681f3Smrg const UINT_32 x = swizzle[i].x & xMask; 18997ec681f3Smrg const UINT_32 y = swizzle[i].y & yMask; 19007ec681f3Smrg const UINT_32 z = swizzle[i].z & zMask; 19017ec681f3Smrg 19027ec681f3Smrg if (x != 0) 19037ec681f3Smrg { 19047ec681f3Smrg ADDR_ASSERT(IsPow2(x)); 19057ec681f3Smrg 19067ec681f3Smrg if (pEquation->xor1[i].value == 0) 19077ec681f3Smrg { 19087ec681f3Smrg pEquation->xor1[i].channel = 0; 19097ec681f3Smrg pEquation->xor1[i].valid = 1; 19107ec681f3Smrg pEquation->xor1[i].index = Log2(x) + elemLog2; 19117ec681f3Smrg } 19127ec681f3Smrg else 19137ec681f3Smrg { 19147ec681f3Smrg ADDR_ASSERT(pEquation->xor2[i].value == 0); 19157ec681f3Smrg pEquation->xor2[i].channel = 0; 19167ec681f3Smrg pEquation->xor2[i].valid = 1; 19177ec681f3Smrg pEquation->xor2[i].index = Log2(x) + elemLog2; 19187ec681f3Smrg } 19197ec681f3Smrg } 19207ec681f3Smrg 19217ec681f3Smrg if (y != 0) 19227ec681f3Smrg { 19237ec681f3Smrg ADDR_ASSERT(IsPow2(y)); 19247ec681f3Smrg 19257ec681f3Smrg if (pEquation->xor1[i].value == 0) 19267ec681f3Smrg { 19277ec681f3Smrg pEquation->xor1[i].channel = 1; 19287ec681f3Smrg pEquation->xor1[i].valid = 1; 19297ec681f3Smrg pEquation->xor1[i].index = Log2(y); 19307ec681f3Smrg } 19317ec681f3Smrg else 19327ec681f3Smrg { 19337ec681f3Smrg ADDR_ASSERT(pEquation->xor2[i].value == 0); 19347ec681f3Smrg pEquation->xor2[i].channel = 1; 19357ec681f3Smrg pEquation->xor2[i].valid = 1; 19367ec681f3Smrg pEquation->xor2[i].index = Log2(y); 19377ec681f3Smrg } 19387ec681f3Smrg } 19397ec681f3Smrg 19407ec681f3Smrg if (z != 0) 19417ec681f3Smrg { 19427ec681f3Smrg ADDR_ASSERT(IsPow2(z)); 19437ec681f3Smrg 19447ec681f3Smrg if (pEquation->xor1[i].value == 0) 19457ec681f3Smrg { 19467ec681f3Smrg pEquation->xor1[i].channel = 2; 19477ec681f3Smrg pEquation->xor1[i].valid = 1; 19487ec681f3Smrg pEquation->xor1[i].index = Log2(z); 19497ec681f3Smrg } 19507ec681f3Smrg else 19517ec681f3Smrg { 19527ec681f3Smrg ADDR_ASSERT(pEquation->xor2[i].value == 0); 19537ec681f3Smrg pEquation->xor2[i].channel = 2; 19547ec681f3Smrg pEquation->xor2[i].valid = 1; 19557ec681f3Smrg pEquation->xor2[i].index = Log2(z); 19567ec681f3Smrg } 19577ec681f3Smrg } 19587ec681f3Smrg 19597ec681f3Smrg swizzle[i].x &= ~x; 19607ec681f3Smrg swizzle[i].y &= ~y; 19617ec681f3Smrg swizzle[i].z &= ~z; 19627ec681f3Smrg } 19637ec681f3Smrg } 19647ec681f3Smrg } 19657ec681f3Smrg } 19667ec681f3Smrg 19677ec681f3Smrg ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask)); 19687ec681f3Smrg } 19697ec681f3Smrg} 19707ec681f3Smrg 19717ec681f3Smrg/** 19727ec681f3Smrg************************************************************************************************************************ 19737ec681f3Smrg* Gfx10Lib::InitEquationTable 19747ec681f3Smrg* 19757ec681f3Smrg* @brief 19767ec681f3Smrg* Initialize Equation table. 19777ec681f3Smrg* 19787ec681f3Smrg* @return 19797ec681f3Smrg* N/A 19807ec681f3Smrg************************************************************************************************************************ 19817ec681f3Smrg*/ 19827ec681f3SmrgVOID Gfx10Lib::InitEquationTable() 19837ec681f3Smrg{ 19847ec681f3Smrg memset(m_equationTable, 0, sizeof(m_equationTable)); 19857ec681f3Smrg 19867ec681f3Smrg for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++) 19877ec681f3Smrg { 19887ec681f3Smrg const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D); 19897ec681f3Smrg 19907ec681f3Smrg for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++) 19917ec681f3Smrg { 19927ec681f3Smrg const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx); 19937ec681f3Smrg 19947ec681f3Smrg for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++) 19957ec681f3Smrg { 19967ec681f3Smrg UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX; 19977ec681f3Smrg const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1); 19987ec681f3Smrg 19997ec681f3Smrg if (pPatInfo != NULL) 20007ec681f3Smrg { 20017ec681f3Smrg ADDR_ASSERT(IsValidSwMode(swMode)); 20027ec681f3Smrg 20037ec681f3Smrg if (pPatInfo->maxItemCount <= 3) 20047ec681f3Smrg { 20057ec681f3Smrg ADDR_EQUATION equation = {}; 20067ec681f3Smrg 20077ec681f3Smrg ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation); 20087ec681f3Smrg 20097ec681f3Smrg equationIndex = m_numEquations; 20107ec681f3Smrg ADDR_ASSERT(equationIndex < EquationTableSize); 20117ec681f3Smrg 20127ec681f3Smrg m_equationTable[equationIndex] = equation; 20137ec681f3Smrg 20147ec681f3Smrg m_numEquations++; 20157ec681f3Smrg } 20167ec681f3Smrg else 20177ec681f3Smrg { 20187ec681f3Smrg // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case 20197ec681f3Smrg ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4)); 20207ec681f3Smrg ADDR_ASSERT(rsrcTypeIdx == 1); 20217ec681f3Smrg ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X); 20227ec681f3Smrg ADDR_ASSERT(m_settings.supportRbPlus == 1); 20237ec681f3Smrg } 20247ec681f3Smrg } 20257ec681f3Smrg 20267ec681f3Smrg m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex; 20277ec681f3Smrg } 20287ec681f3Smrg } 20297ec681f3Smrg } 20307ec681f3Smrg} 20317ec681f3Smrg 20327ec681f3Smrg/** 20337ec681f3Smrg************************************************************************************************************************ 20347ec681f3Smrg* Gfx10Lib::HwlGetEquationIndex 20357ec681f3Smrg* 20367ec681f3Smrg* @brief 20377ec681f3Smrg* Interface function stub of GetEquationIndex 20387ec681f3Smrg* 20397ec681f3Smrg* @return 20407ec681f3Smrg* ADDR_E_RETURNCODE 20417ec681f3Smrg************************************************************************************************************************ 20427ec681f3Smrg*/ 20437ec681f3SmrgUINT_32 Gfx10Lib::HwlGetEquationIndex( 20447ec681f3Smrg const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure 20457ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure 20467ec681f3Smrg ) const 20477ec681f3Smrg{ 20487ec681f3Smrg UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX; 20497ec681f3Smrg 20507ec681f3Smrg if ((pIn->resourceType == ADDR_RSRC_TEX_2D) || 20517ec681f3Smrg (pIn->resourceType == ADDR_RSRC_TEX_3D)) 20527ec681f3Smrg { 20537ec681f3Smrg const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1; 20547ec681f3Smrg const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode); 20557ec681f3Smrg const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); 20567ec681f3Smrg 20577ec681f3Smrg equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2]; 20587ec681f3Smrg } 20597ec681f3Smrg 20607ec681f3Smrg if (pOut->pMipInfo != NULL) 20617ec681f3Smrg { 20627ec681f3Smrg for (UINT_32 i = 0; i < pIn->numMipLevels; i++) 20637ec681f3Smrg { 20647ec681f3Smrg pOut->pMipInfo[i].equationIndex = equationIdx; 20657ec681f3Smrg } 20667ec681f3Smrg } 20677ec681f3Smrg 20687ec681f3Smrg return equationIdx; 20697ec681f3Smrg} 20707ec681f3Smrg 20717ec681f3Smrg/** 20727ec681f3Smrg************************************************************************************************************************ 20737ec681f3Smrg* Gfx10Lib::GetValidDisplaySwizzleModes 20747ec681f3Smrg* 20757ec681f3Smrg* @brief 20767ec681f3Smrg* Get valid swizzle modes mask for displayable surface 20777ec681f3Smrg* 20787ec681f3Smrg* @return 20797ec681f3Smrg* Valid swizzle modes mask for displayable surface 20807ec681f3Smrg************************************************************************************************************************ 20817ec681f3Smrg*/ 20827ec681f3SmrgUINT_32 Gfx10Lib::GetValidDisplaySwizzleModes( 20837ec681f3Smrg UINT_32 bpp 20847ec681f3Smrg ) const 20857ec681f3Smrg{ 20867ec681f3Smrg UINT_32 swModeMask = 0; 20877ec681f3Smrg 20887ec681f3Smrg if (bpp <= 64) 20897ec681f3Smrg { 20907ec681f3Smrg if (m_settings.isDcn20) 20917ec681f3Smrg { 20927ec681f3Smrg swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask; 20937ec681f3Smrg } 20947ec681f3Smrg else 20957ec681f3Smrg { 20967ec681f3Smrg swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask; 20977ec681f3Smrg } 20987ec681f3Smrg } 20997ec681f3Smrg 21007ec681f3Smrg return swModeMask; 21017ec681f3Smrg} 21027ec681f3Smrg 21037ec681f3Smrg/** 21047ec681f3Smrg************************************************************************************************************************ 21057ec681f3Smrg* Gfx10Lib::IsValidDisplaySwizzleMode 21067ec681f3Smrg* 21077ec681f3Smrg* @brief 21087ec681f3Smrg* Check if a swizzle mode is supported by display engine 21097ec681f3Smrg* 21107ec681f3Smrg* @return 21117ec681f3Smrg* TRUE is swizzle mode is supported by display engine 21127ec681f3Smrg************************************************************************************************************************ 21137ec681f3Smrg*/ 21147ec681f3SmrgBOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode( 21157ec681f3Smrg const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure 21167ec681f3Smrg ) const 21177ec681f3Smrg{ 21187ec681f3Smrg ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D); 21197ec681f3Smrg 21207ec681f3Smrg return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE; 21217ec681f3Smrg} 21227ec681f3Smrg 21237ec681f3Smrg/** 21247ec681f3Smrg************************************************************************************************************************ 21257ec681f3Smrg* Gfx10Lib::GetMaxNumMipsInTail 21267ec681f3Smrg* 21277ec681f3Smrg* @brief 21287ec681f3Smrg* Return max number of mips in tails 21297ec681f3Smrg* 21307ec681f3Smrg* @return 21317ec681f3Smrg* Max number of mips in tails 21327ec681f3Smrg************************************************************************************************************************ 21337ec681f3Smrg*/ 21347ec681f3SmrgUINT_32 Gfx10Lib::GetMaxNumMipsInTail( 21357ec681f3Smrg UINT_32 blockSizeLog2, ///< block size log2 21367ec681f3Smrg BOOL_32 isThin ///< is thin or thick 21377ec681f3Smrg ) const 21387ec681f3Smrg{ 21397ec681f3Smrg UINT_32 effectiveLog2 = blockSizeLog2; 21407ec681f3Smrg 21417ec681f3Smrg if (isThin == FALSE) 21427ec681f3Smrg { 21437ec681f3Smrg effectiveLog2 -= (blockSizeLog2 - 8) / 3; 21447ec681f3Smrg } 21457ec681f3Smrg 21467ec681f3Smrg return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4); 21477ec681f3Smrg} 21487ec681f3Smrg 21497ec681f3Smrg/** 21507ec681f3Smrg************************************************************************************************************************ 21517ec681f3Smrg* Gfx10Lib::HwlComputePipeBankXor 21527ec681f3Smrg* 21537ec681f3Smrg* @brief 21547ec681f3Smrg* Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address 21557ec681f3Smrg* 21567ec681f3Smrg* @return 21577ec681f3Smrg* PipeBankXor value 21587ec681f3Smrg************************************************************************************************************************ 21597ec681f3Smrg*/ 21607ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor( 21617ec681f3Smrg const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure 21627ec681f3Smrg ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure 21637ec681f3Smrg ) const 21647ec681f3Smrg{ 21657ec681f3Smrg if (IsNonPrtXor(pIn->swizzleMode)) 21667ec681f3Smrg { 21677ec681f3Smrg const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode)); 21687ec681f3Smrg 21697ec681f3Smrg // No pipe xor... 21707ec681f3Smrg const UINT_32 pipeXor = 0; 21717ec681f3Smrg UINT_32 bankXor = 0; 21727ec681f3Smrg 21737ec681f3Smrg const UINT_32 XorPatternLen = 8; 21747ec681f3Smrg static const UINT_32 XorBankRot1b[XorPatternLen] = {0, 1, 0, 1, 0, 1, 0, 1}; 21757ec681f3Smrg static const UINT_32 XorBankRot2b[XorPatternLen] = {0, 2, 1, 3, 2, 0, 3, 1}; 21767ec681f3Smrg static const UINT_32 XorBankRot3b[XorPatternLen] = {0, 4, 2, 6, 1, 5, 3, 7}; 21777ec681f3Smrg static const UINT_32 XorBankRot4b[XorPatternLen] = {0, 8, 4, 12, 2, 10, 6, 14}; 21787ec681f3Smrg static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b}; 21797ec681f3Smrg 21807ec681f3Smrg switch (bankBits) 21817ec681f3Smrg { 21827ec681f3Smrg case 1: 21837ec681f3Smrg case 2: 21847ec681f3Smrg case 3: 21857ec681f3Smrg case 4: 21867ec681f3Smrg bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits); 21877ec681f3Smrg break; 21887ec681f3Smrg default: 21897ec681f3Smrg // valid bank bits should be 0~4 21907ec681f3Smrg ADDR_ASSERT_ALWAYS(); 21917ec681f3Smrg case 0: 21927ec681f3Smrg break; 21937ec681f3Smrg } 21947ec681f3Smrg 21957ec681f3Smrg pOut->pipeBankXor = bankXor | pipeXor; 21967ec681f3Smrg } 21977ec681f3Smrg else 21987ec681f3Smrg { 21997ec681f3Smrg pOut->pipeBankXor = 0; 22007ec681f3Smrg } 22017ec681f3Smrg 22027ec681f3Smrg return ADDR_OK; 22037ec681f3Smrg} 22047ec681f3Smrg 22057ec681f3Smrg/** 22067ec681f3Smrg************************************************************************************************************************ 22077ec681f3Smrg* Gfx10Lib::HwlComputeSlicePipeBankXor 22087ec681f3Smrg* 22097ec681f3Smrg* @brief 22107ec681f3Smrg* Generate slice PipeBankXor value based on base PipeBankXor value and slice id 22117ec681f3Smrg* 22127ec681f3Smrg* @return 22137ec681f3Smrg* PipeBankXor value 22147ec681f3Smrg************************************************************************************************************************ 22157ec681f3Smrg*/ 22167ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor( 22177ec681f3Smrg const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure 22187ec681f3Smrg ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure 22197ec681f3Smrg ) const 22207ec681f3Smrg{ 22217ec681f3Smrg if (IsNonPrtXor(pIn->swizzleMode)) 22227ec681f3Smrg { 22237ec681f3Smrg const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode); 22247ec681f3Smrg const UINT_32 pipeBits = GetPipeXorBits(blockBits); 22257ec681f3Smrg const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits); 22267ec681f3Smrg 22277ec681f3Smrg pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor; 22287ec681f3Smrg 22297ec681f3Smrg if (pIn->bpe != 0) 22307ec681f3Smrg { 22317ec681f3Smrg const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode, 22327ec681f3Smrg pIn->resourceType, 22337ec681f3Smrg Log2(pIn->bpe >> 3), 22347ec681f3Smrg 1); 22357ec681f3Smrg 22367ec681f3Smrg if (pPatInfo != NULL) 22377ec681f3Smrg { 22387ec681f3Smrg ADDR_BIT_SETTING fullSwizzlePattern[20]; 22397ec681f3Smrg GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern); 22407ec681f3Smrg 22417ec681f3Smrg const UINT_32 pipeBankXorOffset = 22427ec681f3Smrg ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern), 22437ec681f3Smrg blockBits, 22447ec681f3Smrg 0, 22457ec681f3Smrg 0, 22467ec681f3Smrg pIn->slice, 22477ec681f3Smrg 0); 22487ec681f3Smrg 22497ec681f3Smrg const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2; 22507ec681f3Smrg 22517ec681f3Smrg // Should have no bit set under pipe interleave 22527ec681f3Smrg ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset); 22537ec681f3Smrg 22547ec681f3Smrg // This assertion firing means old approach doesn't calculate a correct sliceXor value... 22557ec681f3Smrg ADDR_ASSERT(pipeBankXor == pipeXor); 22567ec681f3Smrg 22577ec681f3Smrg pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor; 22587ec681f3Smrg } 22597ec681f3Smrg } 22607ec681f3Smrg } 22617ec681f3Smrg else 22627ec681f3Smrg { 22637ec681f3Smrg pOut->pipeBankXor = 0; 22647ec681f3Smrg } 22657ec681f3Smrg 22667ec681f3Smrg return ADDR_OK; 22677ec681f3Smrg} 22687ec681f3Smrg 22697ec681f3Smrg/** 22707ec681f3Smrg************************************************************************************************************************ 22717ec681f3Smrg* Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern 22727ec681f3Smrg* 22737ec681f3Smrg* @brief 22747ec681f3Smrg* Compute sub resource offset to support swizzle pattern 22757ec681f3Smrg* 22767ec681f3Smrg* @return 22777ec681f3Smrg* Offset 22787ec681f3Smrg************************************************************************************************************************ 22797ec681f3Smrg*/ 22807ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern( 22817ec681f3Smrg const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure 22827ec681f3Smrg ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure 22837ec681f3Smrg ) const 22847ec681f3Smrg{ 22857ec681f3Smrg ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode)); 22867ec681f3Smrg 22877ec681f3Smrg pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset; 22887ec681f3Smrg 22897ec681f3Smrg return ADDR_OK; 22907ec681f3Smrg} 22917ec681f3Smrg 22927ec681f3Smrg/** 22937ec681f3Smrg************************************************************************************************************************ 22947ec681f3Smrg* Gfx10Lib::HwlComputeNonBlockCompressedView 22957ec681f3Smrg* 22967ec681f3Smrg* @brief 22977ec681f3Smrg* Compute non-block-compressed view for a given mipmap level/slice. 22987ec681f3Smrg* 22997ec681f3Smrg* @return 23007ec681f3Smrg* ADDR_E_RETURNCODE 23017ec681f3Smrg************************************************************************************************************************ 23027ec681f3Smrg*/ 23037ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView( 23047ec681f3Smrg const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure 23057ec681f3Smrg ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure 23067ec681f3Smrg ) const 23077ec681f3Smrg{ 23087ec681f3Smrg ADDR_E_RETURNCODE returnCode = ADDR_OK; 23097ec681f3Smrg 23107ec681f3Smrg if (pIn->resourceType != ADDR_RSRC_TEX_2D) 23117ec681f3Smrg { 23127ec681f3Smrg // Only 2D resource can have a NonBC view... 23137ec681f3Smrg returnCode = ADDR_INVALIDPARAMS; 23147ec681f3Smrg } 23157ec681f3Smrg else if ((pIn->format != ADDR_FMT_ASTC_8x8) && 23167ec681f3Smrg ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7))) 23177ec681f3Smrg { 23187ec681f3Smrg // Only support BC1~BC7 or ASTC_8x8 for now... 23197ec681f3Smrg returnCode = ADDR_NOTSUPPORTED; 23207ec681f3Smrg } 23217ec681f3Smrg else 23227ec681f3Smrg { 23237ec681f3Smrg UINT_32 bcWidth, bcHeight; 23247ec681f3Smrg UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight); 23257ec681f3Smrg 23267ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {}; 23277ec681f3Smrg infoIn.flags = pIn->flags; 23287ec681f3Smrg infoIn.swizzleMode = pIn->swizzleMode; 23297ec681f3Smrg infoIn.resourceType = pIn->resourceType; 23307ec681f3Smrg infoIn.bpp = bpp; 23317ec681f3Smrg infoIn.width = PowTwoAlign(pIn->width, bcWidth) / bcWidth; 23327ec681f3Smrg infoIn.height = PowTwoAlign(pIn->height, bcHeight) / bcHeight; 23337ec681f3Smrg infoIn.numSlices = pIn->numSlices; 23347ec681f3Smrg infoIn.numMipLevels = pIn->numMipLevels; 23357ec681f3Smrg infoIn.numSamples = 1; 23367ec681f3Smrg infoIn.numFrags = 1; 23377ec681f3Smrg 23387ec681f3Smrg ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {}; 23397ec681f3Smrg 23407ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {}; 23417ec681f3Smrg infoOut.pMipInfo = mipInfo; 23427ec681f3Smrg 23437ec681f3Smrg const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE; 23447ec681f3Smrg 23457ec681f3Smrg if (tiled) 23467ec681f3Smrg { 23477ec681f3Smrg returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut); 23487ec681f3Smrg } 23497ec681f3Smrg else 23507ec681f3Smrg { 23517ec681f3Smrg returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut); 23527ec681f3Smrg } 23537ec681f3Smrg 23547ec681f3Smrg if (returnCode == ADDR_OK) 23557ec681f3Smrg { 23567ec681f3Smrg ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {}; 23577ec681f3Smrg subOffIn.swizzleMode = infoIn.swizzleMode; 23587ec681f3Smrg subOffIn.resourceType = infoIn.resourceType; 23597ec681f3Smrg subOffIn.slice = pIn->slice; 23607ec681f3Smrg subOffIn.sliceSize = infoOut.sliceSize; 23617ec681f3Smrg subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset; 23627ec681f3Smrg subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset; 23637ec681f3Smrg 23647ec681f3Smrg ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {}; 23657ec681f3Smrg 23667ec681f3Smrg // For any mipmap level, move nonBc view base address by offset 23677ec681f3Smrg HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut); 23687ec681f3Smrg pOut->offset = subOffOut.offset; 23697ec681f3Smrg 23707ec681f3Smrg ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {}; 23717ec681f3Smrg slicePbXorIn.bpe = infoIn.bpp; 23727ec681f3Smrg slicePbXorIn.swizzleMode = infoIn.swizzleMode; 23737ec681f3Smrg slicePbXorIn.resourceType = infoIn.resourceType; 23747ec681f3Smrg slicePbXorIn.basePipeBankXor = pIn->pipeBankXor; 23757ec681f3Smrg slicePbXorIn.slice = pIn->slice; 23767ec681f3Smrg 23777ec681f3Smrg ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {}; 23787ec681f3Smrg 23797ec681f3Smrg // For any mipmap level, nonBc view should use computed pbXor 23807ec681f3Smrg HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut); 23817ec681f3Smrg pOut->pipeBankXor = slicePbXorOut.pipeBankXor; 23827ec681f3Smrg 23837ec681f3Smrg const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE; 23847ec681f3Smrg const UINT_32 requestMipWidth = PowTwoAlign(Max(pIn->width >> pIn->mipId, 1u), bcWidth) / bcWidth; 23857ec681f3Smrg const UINT_32 requestMipHeight = PowTwoAlign(Max(pIn->height >> pIn->mipId, 1u), bcHeight) / bcHeight; 23867ec681f3Smrg 23877ec681f3Smrg if (inTail) 23887ec681f3Smrg { 23897ec681f3Smrg // For mipmap level that is in mip tail block, hack a lot of things... 23907ec681f3Smrg // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels 23917ec681f3Smrg // are fit in tail block: 23927ec681f3Smrg 23937ec681f3Smrg // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain) 23947ec681f3Smrg pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail; 23957ec681f3Smrg 23967ec681f3Smrg // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!) 23977ec681f3Smrg pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u); 23987ec681f3Smrg 23997ec681f3Smrg // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold 24007ec681f3Smrg pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2); 24017ec681f3Smrg 24027ec681f3Smrg // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold 24037ec681f3Smrg pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight); 24047ec681f3Smrg } 24057ec681f3Smrg // This check should cover at least mipId == 0 24067ec681f3Smrg else if (requestMipWidth << pIn->mipId == infoIn.width) 24077ec681f3Smrg { 24087ec681f3Smrg // For mipmap level [N] that is not in mip tail block and downgraded without losing element: 24097ec681f3Smrg // - only one mipmap level and mipId = 0 24107ec681f3Smrg pOut->mipId = 0; 24117ec681f3Smrg pOut->numMipLevels = 1; 24127ec681f3Smrg 24137ec681f3Smrg // (mip0) width = requestMipWidth 24147ec681f3Smrg pOut->unalignedWidth = requestMipWidth; 24157ec681f3Smrg 24167ec681f3Smrg // (mip0) height = requestMipHeight 24177ec681f3Smrg pOut->unalignedHeight = requestMipHeight; 24187ec681f3Smrg } 24197ec681f3Smrg else 24207ec681f3Smrg { 24217ec681f3Smrg // For mipmap level [N] that is not in mip tail block and downgraded with element losing, 24227ec681f3Smrg // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed, 24237ec681f3Smrg // because single mip view may have different pitch value than original (multiple) mip view... 24247ec681f3Smrg // A simple case would be: 24257ec681f3Smrg // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40] 24267ec681f3Smrg // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view 24277ec681f3Smrg // mip0 width = 0x101/mip1 width = 0x80 24287ec681f3Smrg // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in 24297ec681f3Smrg // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes. 24307ec681f3Smrg 24317ec681f3Smrg // - 2 levels and mipId = 1 24327ec681f3Smrg pOut->mipId = 1; 24337ec681f3Smrg pOut->numMipLevels = 2; 24347ec681f3Smrg 24357ec681f3Smrg const UINT_32 upperMipWidth = 24367ec681f3Smrg PowTwoAlign(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth) / bcWidth; 24377ec681f3Smrg const UINT_32 upperMipHeight = 24387ec681f3Smrg PowTwoAlign(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight) / bcHeight; 24397ec681f3Smrg 24407ec681f3Smrg const BOOL_32 needToAvoidInTail = 24417ec681f3Smrg tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ? 24427ec681f3Smrg TRUE : FALSE; 24437ec681f3Smrg 24447ec681f3Smrg const UINT_32 hwMipWidth = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth); 24457ec681f3Smrg const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight); 24467ec681f3Smrg 24477ec681f3Smrg const BOOL_32 needExtraWidth = 24487ec681f3Smrg ((upperMipWidth < requestMipWidth * 2) || 24497ec681f3Smrg ((upperMipWidth == requestMipWidth * 2) && 24507ec681f3Smrg ((needToAvoidInTail == TRUE) || 24517ec681f3Smrg (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE; 24527ec681f3Smrg 24537ec681f3Smrg const BOOL_32 needExtraHeight = 24547ec681f3Smrg ((upperMipHeight < requestMipHeight * 2) || 24557ec681f3Smrg ((upperMipHeight == requestMipHeight * 2) && 24567ec681f3Smrg ((needToAvoidInTail == TRUE) || 24577ec681f3Smrg (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE; 24587ec681f3Smrg 24597ec681f3Smrg // (mip0) width = requestLastMipLevelWidth 24607ec681f3Smrg pOut->unalignedWidth = upperMipWidth + (needExtraWidth ? 1: 0); 24617ec681f3Smrg 24627ec681f3Smrg // (mip0) height = requestLastMipLevelHeight 24637ec681f3Smrg pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0); 24647ec681f3Smrg } 24657ec681f3Smrg 24667ec681f3Smrg // Assert the downgrading from this mip[0] width would still generate correct mip[N] width 24677ec681f3Smrg ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth); 24687ec681f3Smrg // Assert the downgrading from this mip[0] height would still generate correct mip[N] height 24697ec681f3Smrg ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight); 24707ec681f3Smrg } 24717ec681f3Smrg } 24727ec681f3Smrg 24737ec681f3Smrg return returnCode; 24747ec681f3Smrg} 24757ec681f3Smrg 24767ec681f3Smrg/** 24777ec681f3Smrg************************************************************************************************************************ 24787ec681f3Smrg* Gfx10Lib::ValidateNonSwModeParams 24797ec681f3Smrg* 24807ec681f3Smrg* @brief 24817ec681f3Smrg* Validate compute surface info params except swizzle mode 24827ec681f3Smrg* 24837ec681f3Smrg* @return 24847ec681f3Smrg* TRUE if parameters are valid, FALSE otherwise 24857ec681f3Smrg************************************************************************************************************************ 24867ec681f3Smrg*/ 24877ec681f3SmrgBOOL_32 Gfx10Lib::ValidateNonSwModeParams( 24887ec681f3Smrg const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const 24897ec681f3Smrg{ 24907ec681f3Smrg BOOL_32 valid = TRUE; 24917ec681f3Smrg 24927ec681f3Smrg if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16)) 24937ec681f3Smrg { 24947ec681f3Smrg ADDR_ASSERT_ALWAYS(); 24957ec681f3Smrg valid = FALSE; 24967ec681f3Smrg } 24977ec681f3Smrg 24987ec681f3Smrg if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE) 24997ec681f3Smrg { 25007ec681f3Smrg ADDR_ASSERT_ALWAYS(); 25017ec681f3Smrg valid = FALSE; 25027ec681f3Smrg } 25037ec681f3Smrg 25047ec681f3Smrg const ADDR2_SURFACE_FLAGS flags = pIn->flags; 25057ec681f3Smrg const AddrResourceType rsrcType = pIn->resourceType; 25067ec681f3Smrg const BOOL_32 mipmap = (pIn->numMipLevels > 1); 25077ec681f3Smrg const BOOL_32 msaa = (pIn->numFrags > 1); 25087ec681f3Smrg const BOOL_32 display = flags.display; 25097ec681f3Smrg const BOOL_32 tex3d = IsTex3d(rsrcType); 25107ec681f3Smrg const BOOL_32 tex2d = IsTex2d(rsrcType); 25117ec681f3Smrg const BOOL_32 tex1d = IsTex1d(rsrcType); 25127ec681f3Smrg const BOOL_32 stereo = flags.qbStereo; 25137ec681f3Smrg 25147ec681f3Smrg 25157ec681f3Smrg // Resource type check 25167ec681f3Smrg if (tex1d) 25177ec681f3Smrg { 25187ec681f3Smrg if (msaa || display || stereo) 25197ec681f3Smrg { 25207ec681f3Smrg ADDR_ASSERT_ALWAYS(); 25217ec681f3Smrg valid = FALSE; 25227ec681f3Smrg } 25237ec681f3Smrg } 25247ec681f3Smrg else if (tex2d) 25257ec681f3Smrg { 25267ec681f3Smrg if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap)) 25277ec681f3Smrg { 25287ec681f3Smrg ADDR_ASSERT_ALWAYS(); 25297ec681f3Smrg valid = FALSE; 25307ec681f3Smrg } 25317ec681f3Smrg } 25327ec681f3Smrg else if (tex3d) 25337ec681f3Smrg { 25347ec681f3Smrg if (msaa || display || stereo) 25357ec681f3Smrg { 25367ec681f3Smrg ADDR_ASSERT_ALWAYS(); 25377ec681f3Smrg valid = FALSE; 25387ec681f3Smrg } 25397ec681f3Smrg } 25407ec681f3Smrg else 25417ec681f3Smrg { 25427ec681f3Smrg ADDR_ASSERT_ALWAYS(); 25437ec681f3Smrg valid = FALSE; 25447ec681f3Smrg } 25457ec681f3Smrg 25467ec681f3Smrg return valid; 25477ec681f3Smrg} 25487ec681f3Smrg 25497ec681f3Smrg/** 25507ec681f3Smrg************************************************************************************************************************ 25517ec681f3Smrg* Gfx10Lib::ValidateSwModeParams 25527ec681f3Smrg* 25537ec681f3Smrg* @brief 25547ec681f3Smrg* Validate compute surface info related to swizzle mode 25557ec681f3Smrg* 25567ec681f3Smrg* @return 25577ec681f3Smrg* TRUE if parameters are valid, FALSE otherwise 25587ec681f3Smrg************************************************************************************************************************ 25597ec681f3Smrg*/ 25607ec681f3SmrgBOOL_32 Gfx10Lib::ValidateSwModeParams( 25617ec681f3Smrg const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const 25627ec681f3Smrg{ 25637ec681f3Smrg BOOL_32 valid = TRUE; 25647ec681f3Smrg 25657ec681f3Smrg if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE) 25667ec681f3Smrg { 25677ec681f3Smrg ADDR_ASSERT_ALWAYS(); 25687ec681f3Smrg valid = FALSE; 25697ec681f3Smrg } 25707ec681f3Smrg else if (IsValidSwMode(pIn->swizzleMode) == FALSE) 25717ec681f3Smrg { 25727ec681f3Smrg { 25737ec681f3Smrg ADDR_ASSERT_ALWAYS(); 25747ec681f3Smrg valid = FALSE; 25757ec681f3Smrg } 25767ec681f3Smrg } 25777ec681f3Smrg 25787ec681f3Smrg const ADDR2_SURFACE_FLAGS flags = pIn->flags; 25797ec681f3Smrg const AddrResourceType rsrcType = pIn->resourceType; 25807ec681f3Smrg const AddrSwizzleMode swizzle = pIn->swizzleMode; 25817ec681f3Smrg const BOOL_32 msaa = (pIn->numFrags > 1); 25827ec681f3Smrg const BOOL_32 zbuffer = flags.depth || flags.stencil; 25837ec681f3Smrg const BOOL_32 color = flags.color; 25847ec681f3Smrg const BOOL_32 display = flags.display; 25857ec681f3Smrg const BOOL_32 tex3d = IsTex3d(rsrcType); 25867ec681f3Smrg const BOOL_32 tex2d = IsTex2d(rsrcType); 25877ec681f3Smrg const BOOL_32 tex1d = IsTex1d(rsrcType); 25887ec681f3Smrg const BOOL_32 thin3d = flags.view3dAs2dArray; 25897ec681f3Smrg const BOOL_32 linear = IsLinear(swizzle); 25907ec681f3Smrg const BOOL_32 blk256B = IsBlock256b(swizzle); 25917ec681f3Smrg const BOOL_32 blkVar = IsBlockVariable(swizzle); 25927ec681f3Smrg const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle); 25937ec681f3Smrg const BOOL_32 prt = flags.prt; 25947ec681f3Smrg const BOOL_32 fmask = flags.fmask; 25957ec681f3Smrg 25967ec681f3Smrg // Misc check 25977ec681f3Smrg if ((pIn->numFrags > 1) && 25987ec681f3Smrg (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags))) 25997ec681f3Smrg { 26007ec681f3Smrg // MSAA surface must have blk_bytes/pipe_interleave >= num_samples 26017ec681f3Smrg ADDR_ASSERT_ALWAYS(); 26027ec681f3Smrg valid = FALSE; 26037ec681f3Smrg } 26047ec681f3Smrg 26057ec681f3Smrg if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE)) 26067ec681f3Smrg { 26077ec681f3Smrg ADDR_ASSERT_ALWAYS(); 26087ec681f3Smrg valid = FALSE; 26097ec681f3Smrg } 26107ec681f3Smrg 26117ec681f3Smrg if ((pIn->bpp == 96) && (linear == FALSE)) 26127ec681f3Smrg { 26137ec681f3Smrg ADDR_ASSERT_ALWAYS(); 26147ec681f3Smrg valid = FALSE; 26157ec681f3Smrg } 26167ec681f3Smrg 26177ec681f3Smrg const UINT_32 swizzleMask = 1 << swizzle; 26187ec681f3Smrg 26197ec681f3Smrg // Resource type check 26207ec681f3Smrg if (tex1d) 26217ec681f3Smrg { 26227ec681f3Smrg if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0) 26237ec681f3Smrg { 26247ec681f3Smrg ADDR_ASSERT_ALWAYS(); 26257ec681f3Smrg valid = FALSE; 26267ec681f3Smrg } 26277ec681f3Smrg } 26287ec681f3Smrg else if (tex2d) 26297ec681f3Smrg { 26307ec681f3Smrg if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0) 26317ec681f3Smrg { 26327ec681f3Smrg { 26337ec681f3Smrg ADDR_ASSERT_ALWAYS(); 26347ec681f3Smrg valid = FALSE; 26357ec681f3Smrg } 26367ec681f3Smrg } 26377ec681f3Smrg else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) || 26387ec681f3Smrg (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0))) 26397ec681f3Smrg { 26407ec681f3Smrg ADDR_ASSERT_ALWAYS(); 26417ec681f3Smrg valid = FALSE; 26427ec681f3Smrg } 26437ec681f3Smrg 26447ec681f3Smrg } 26457ec681f3Smrg else if (tex3d) 26467ec681f3Smrg { 26477ec681f3Smrg if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) || 26487ec681f3Smrg (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) || 26497ec681f3Smrg (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0))) 26507ec681f3Smrg { 26517ec681f3Smrg ADDR_ASSERT_ALWAYS(); 26527ec681f3Smrg valid = FALSE; 26537ec681f3Smrg } 26547ec681f3Smrg } 26557ec681f3Smrg 26567ec681f3Smrg // Swizzle type check 26577ec681f3Smrg if (linear) 26587ec681f3Smrg { 26597ec681f3Smrg if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0)) 26607ec681f3Smrg { 26617ec681f3Smrg ADDR_ASSERT_ALWAYS(); 26627ec681f3Smrg valid = FALSE; 26637ec681f3Smrg } 26647ec681f3Smrg } 26657ec681f3Smrg else if (IsZOrderSwizzle(swizzle)) 26667ec681f3Smrg { 26677ec681f3Smrg if ((pIn->bpp > 64) || 26687ec681f3Smrg (msaa && (color || (pIn->bpp > 32))) || 26697ec681f3Smrg ElemLib::IsBlockCompressed(pIn->format) || 26707ec681f3Smrg ElemLib::IsMacroPixelPacked(pIn->format)) 26717ec681f3Smrg { 26727ec681f3Smrg ADDR_ASSERT_ALWAYS(); 26737ec681f3Smrg valid = FALSE; 26747ec681f3Smrg } 26757ec681f3Smrg } 26767ec681f3Smrg else if (IsStandardSwizzle(rsrcType, swizzle)) 26777ec681f3Smrg { 26787ec681f3Smrg if (zbuffer || msaa) 26797ec681f3Smrg { 26807ec681f3Smrg ADDR_ASSERT_ALWAYS(); 26817ec681f3Smrg valid = FALSE; 26827ec681f3Smrg } 26837ec681f3Smrg } 26847ec681f3Smrg else if (IsDisplaySwizzle(rsrcType, swizzle)) 26857ec681f3Smrg { 26867ec681f3Smrg if (zbuffer || msaa) 26877ec681f3Smrg { 26887ec681f3Smrg ADDR_ASSERT_ALWAYS(); 26897ec681f3Smrg valid = FALSE; 26907ec681f3Smrg } 26917ec681f3Smrg } 26927ec681f3Smrg else if (IsRtOptSwizzle(swizzle)) 26937ec681f3Smrg { 26947ec681f3Smrg if (zbuffer) 26957ec681f3Smrg { 26967ec681f3Smrg ADDR_ASSERT_ALWAYS(); 26977ec681f3Smrg valid = FALSE; 26987ec681f3Smrg } 26997ec681f3Smrg } 27007ec681f3Smrg else 27017ec681f3Smrg { 27027ec681f3Smrg { 27037ec681f3Smrg ADDR_ASSERT_ALWAYS(); 27047ec681f3Smrg valid = FALSE; 27057ec681f3Smrg } 27067ec681f3Smrg } 27077ec681f3Smrg 27087ec681f3Smrg // Block type check 27097ec681f3Smrg if (blk256B) 27107ec681f3Smrg { 27117ec681f3Smrg if (zbuffer || tex3d || msaa) 27127ec681f3Smrg { 27137ec681f3Smrg ADDR_ASSERT_ALWAYS(); 27147ec681f3Smrg valid = FALSE; 27157ec681f3Smrg } 27167ec681f3Smrg } 27177ec681f3Smrg else if (blkVar) 27187ec681f3Smrg { 27197ec681f3Smrg if (m_blockVarSizeLog2 == 0) 27207ec681f3Smrg { 27217ec681f3Smrg ADDR_ASSERT_ALWAYS(); 27227ec681f3Smrg valid = FALSE; 27237ec681f3Smrg } 27247ec681f3Smrg } 27257ec681f3Smrg 27267ec681f3Smrg return valid; 27277ec681f3Smrg} 27287ec681f3Smrg 27297ec681f3Smrg/** 27307ec681f3Smrg************************************************************************************************************************ 27317ec681f3Smrg* Gfx10Lib::HwlComputeSurfaceInfoSanityCheck 27327ec681f3Smrg* 27337ec681f3Smrg* @brief 27347ec681f3Smrg* Compute surface info sanity check 27357ec681f3Smrg* 27367ec681f3Smrg* @return 27377ec681f3Smrg* Offset 27387ec681f3Smrg************************************************************************************************************************ 27397ec681f3Smrg*/ 27407ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck( 27417ec681f3Smrg const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure 27427ec681f3Smrg ) const 27437ec681f3Smrg{ 27447ec681f3Smrg return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS; 27457ec681f3Smrg} 27467ec681f3Smrg 27477ec681f3Smrg/** 27487ec681f3Smrg************************************************************************************************************************ 27497ec681f3Smrg* Gfx10Lib::HwlGetPreferredSurfaceSetting 27507ec681f3Smrg* 27517ec681f3Smrg* @brief 27527ec681f3Smrg* Internal function to get suggested surface information for cliet to use 27537ec681f3Smrg* 27547ec681f3Smrg* @return 27557ec681f3Smrg* ADDR_E_RETURNCODE 27567ec681f3Smrg************************************************************************************************************************ 27577ec681f3Smrg*/ 27587ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( 27597ec681f3Smrg const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure 27607ec681f3Smrg ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure 27617ec681f3Smrg ) const 27627ec681f3Smrg{ 27637ec681f3Smrg ADDR_E_RETURNCODE returnCode = ADDR_OK; 27647ec681f3Smrg 27657ec681f3Smrg if (pIn->flags.fmask) 27667ec681f3Smrg { 27677ec681f3Smrg const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE; 27687ec681f3Smrg const BOOL_32 forbidVarBlockType = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0)); 27697ec681f3Smrg 27707ec681f3Smrg if (forbid64KbBlockType && forbidVarBlockType) 27717ec681f3Smrg { 27727ec681f3Smrg // Invalid combination... 27737ec681f3Smrg ADDR_ASSERT_ALWAYS(); 27747ec681f3Smrg returnCode = ADDR_INVALIDPARAMS; 27757ec681f3Smrg } 27767ec681f3Smrg else 27777ec681f3Smrg { 27787ec681f3Smrg pOut->resourceType = ADDR_RSRC_TEX_2D; 27797ec681f3Smrg pOut->validBlockSet.value = 0; 27807ec681f3Smrg pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1; 27817ec681f3Smrg pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1; 27827ec681f3Smrg pOut->validSwModeSet.value = 0; 27837ec681f3Smrg pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1; 27847ec681f3Smrg pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType ? 0 : 1; 27857ec681f3Smrg pOut->canXor = TRUE; 27867ec681f3Smrg pOut->validSwTypeSet.value = AddrSwSetZ; 27877ec681f3Smrg pOut->clientPreferredSwSet = pOut->validSwTypeSet; 27887ec681f3Smrg 27897ec681f3Smrg BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE); 27907ec681f3Smrg 27917ec681f3Smrg if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE)) 27927ec681f3Smrg { 27937ec681f3Smrg const UINT_8 maxFmaskSwizzleModeType = 2; 27947ec681f3Smrg const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2); 27957ec681f3Smrg const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1); 27967ec681f3Smrg const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags); 27977ec681f3Smrg const UINT_32 numSlices = Max(pIn->numSlices, 1u); 27987ec681f3Smrg const UINT_32 width = Max(pIn->width, 1u); 27997ec681f3Smrg const UINT_32 height = Max(pIn->height, 1u); 28007ec681f3Smrg const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u); 28017ec681f3Smrg 28027ec681f3Smrg AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X}; 28037ec681f3Smrg Dim3d blkDim[maxFmaskSwizzleModeType] = {{}, {}}; 28047ec681f3Smrg Dim3d padDim[maxFmaskSwizzleModeType] = {{}, {}}; 28057ec681f3Smrg UINT_64 padSize[maxFmaskSwizzleModeType] = {}; 28067ec681f3Smrg 28077ec681f3Smrg for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++) 28087ec681f3Smrg { 28097ec681f3Smrg ComputeBlockDimensionForSurf(&blkDim[i].w, 28107ec681f3Smrg &blkDim[i].h, 28117ec681f3Smrg &blkDim[i].d, 28127ec681f3Smrg fmaskBpp, 28137ec681f3Smrg 1, 28147ec681f3Smrg pOut->resourceType, 28157ec681f3Smrg swMode[i]); 28167ec681f3Smrg 28177ec681f3Smrg padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]); 28187ec681f3Smrg padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement); 28197ec681f3Smrg } 28207ec681f3Smrg 28217ec681f3Smrg if (BlockTypeWithinMemoryBudget(padSize[0], 28227ec681f3Smrg padSize[1], 28237ec681f3Smrg ratioLow, 28247ec681f3Smrg ratioHi, 28257ec681f3Smrg pIn->memoryBudget, 28267ec681f3Smrg GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0]))) 28277ec681f3Smrg { 28287ec681f3Smrg use64KbBlockType = FALSE; 28297ec681f3Smrg } 28307ec681f3Smrg } 28317ec681f3Smrg else if (forbidVarBlockType) 28327ec681f3Smrg { 28337ec681f3Smrg use64KbBlockType = TRUE; 28347ec681f3Smrg } 28357ec681f3Smrg 28367ec681f3Smrg if (use64KbBlockType) 28377ec681f3Smrg { 28387ec681f3Smrg pOut->swizzleMode = ADDR_SW_64KB_Z_X; 28397ec681f3Smrg } 28407ec681f3Smrg else 28417ec681f3Smrg { 28427ec681f3Smrg pOut->swizzleMode = ADDR_SW_VAR_Z_X; 28437ec681f3Smrg } 28447ec681f3Smrg } 28457ec681f3Smrg } 28467ec681f3Smrg else 28477ec681f3Smrg { 28487ec681f3Smrg UINT_32 bpp = pIn->bpp; 28497ec681f3Smrg UINT_32 width = Max(pIn->width, 1u); 28507ec681f3Smrg UINT_32 height = Max(pIn->height, 1u); 28517ec681f3Smrg 28527ec681f3Smrg // Set format to INVALID will skip this conversion 28537ec681f3Smrg if (pIn->format != ADDR_FMT_INVALID) 28547ec681f3Smrg { 28557ec681f3Smrg ElemMode elemMode = ADDR_UNCOMPRESSED; 28567ec681f3Smrg UINT_32 expandX, expandY; 28577ec681f3Smrg 28587ec681f3Smrg // Get compression/expansion factors and element mode which indicates compression/expansion 28597ec681f3Smrg bpp = GetElemLib()->GetBitsPerPixel(pIn->format, 28607ec681f3Smrg &elemMode, 28617ec681f3Smrg &expandX, 28627ec681f3Smrg &expandY); 28637ec681f3Smrg 28647ec681f3Smrg UINT_32 basePitch = 0; 28657ec681f3Smrg GetElemLib()->AdjustSurfaceInfo(elemMode, 28667ec681f3Smrg expandX, 28677ec681f3Smrg expandY, 28687ec681f3Smrg &bpp, 28697ec681f3Smrg &basePitch, 28707ec681f3Smrg &width, 28717ec681f3Smrg &height); 28727ec681f3Smrg } 28737ec681f3Smrg 28747ec681f3Smrg const UINT_32 numSlices = Max(pIn->numSlices, 1u); 28757ec681f3Smrg const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u); 28767ec681f3Smrg const UINT_32 numSamples = Max(pIn->numSamples, 1u); 28777ec681f3Smrg const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags; 28787ec681f3Smrg const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1); 28797ec681f3Smrg 28807ec681f3Smrg // Pre sanity check on non swizzle mode parameters 28817ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {}; 28827ec681f3Smrg localIn.flags = pIn->flags; 28837ec681f3Smrg localIn.resourceType = pIn->resourceType; 28847ec681f3Smrg localIn.format = pIn->format; 28857ec681f3Smrg localIn.bpp = bpp; 28867ec681f3Smrg localIn.width = width; 28877ec681f3Smrg localIn.height = height; 28887ec681f3Smrg localIn.numSlices = numSlices; 28897ec681f3Smrg localIn.numMipLevels = numMipLevels; 28907ec681f3Smrg localIn.numSamples = numSamples; 28917ec681f3Smrg localIn.numFrags = numFrags; 28927ec681f3Smrg 28937ec681f3Smrg if (ValidateNonSwModeParams(&localIn)) 28947ec681f3Smrg { 28957ec681f3Smrg // Forbid swizzle mode(s) by client setting 28967ec681f3Smrg ADDR2_SWMODE_SET allowedSwModeSet = {}; 28977ec681f3Smrg allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask; 28987ec681f3Smrg allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask; 28997ec681f3Smrg allowedSwModeSet.value |= 29007ec681f3Smrg pIn->forbiddenBlock.macroThin4KB ? 0 : 29017ec681f3Smrg ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask); 29027ec681f3Smrg allowedSwModeSet.value |= 29037ec681f3Smrg pIn->forbiddenBlock.macroThick4KB ? 0 : 29047ec681f3Smrg ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0); 29057ec681f3Smrg allowedSwModeSet.value |= 29067ec681f3Smrg pIn->forbiddenBlock.macroThin64KB ? 0 : 29077ec681f3Smrg ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask); 29087ec681f3Smrg allowedSwModeSet.value |= 29097ec681f3Smrg pIn->forbiddenBlock.macroThick64KB ? 0 : 29107ec681f3Smrg ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0); 29117ec681f3Smrg allowedSwModeSet.value |= 29127ec681f3Smrg pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0); 29137ec681f3Smrg 29147ec681f3Smrg if (pIn->preferredSwSet.value != 0) 29157ec681f3Smrg { 29167ec681f3Smrg allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask; 29177ec681f3Smrg allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask; 29187ec681f3Smrg allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask; 29197ec681f3Smrg allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask; 29207ec681f3Smrg } 29217ec681f3Smrg 29227ec681f3Smrg if (pIn->noXor) 29237ec681f3Smrg { 29247ec681f3Smrg allowedSwModeSet.value &= ~Gfx10XorSwModeMask; 29257ec681f3Smrg } 29267ec681f3Smrg 29277ec681f3Smrg if (pIn->maxAlign > 0) 29287ec681f3Smrg { 29297ec681f3Smrg if (pIn->maxAlign < (1u << m_blockVarSizeLog2)) 29307ec681f3Smrg { 29317ec681f3Smrg allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask; 29327ec681f3Smrg } 29337ec681f3Smrg 29347ec681f3Smrg if (pIn->maxAlign < Size64K) 29357ec681f3Smrg { 29367ec681f3Smrg allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask; 29377ec681f3Smrg } 29387ec681f3Smrg 29397ec681f3Smrg if (pIn->maxAlign < Size4K) 29407ec681f3Smrg { 29417ec681f3Smrg allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask; 29427ec681f3Smrg } 29437ec681f3Smrg 29447ec681f3Smrg if (pIn->maxAlign < Size256) 29457ec681f3Smrg { 29467ec681f3Smrg allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask; 29477ec681f3Smrg } 29487ec681f3Smrg } 29497ec681f3Smrg 29507ec681f3Smrg // Filter out invalid swizzle mode(s) by image attributes and HW restrictions 29517ec681f3Smrg switch (pIn->resourceType) 29527ec681f3Smrg { 29537ec681f3Smrg case ADDR_RSRC_TEX_1D: 29547ec681f3Smrg allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask; 29557ec681f3Smrg break; 29567ec681f3Smrg 29577ec681f3Smrg case ADDR_RSRC_TEX_2D: 29587ec681f3Smrg allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask; 29597ec681f3Smrg 29607ec681f3Smrg break; 29617ec681f3Smrg 29627ec681f3Smrg case ADDR_RSRC_TEX_3D: 29637ec681f3Smrg allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask; 29647ec681f3Smrg 29657ec681f3Smrg if (pIn->flags.view3dAs2dArray) 29667ec681f3Smrg { 29677ec681f3Smrg allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask; 29687ec681f3Smrg } 29697ec681f3Smrg break; 29707ec681f3Smrg 29717ec681f3Smrg default: 29727ec681f3Smrg ADDR_ASSERT_ALWAYS(); 29737ec681f3Smrg allowedSwModeSet.value = 0; 29747ec681f3Smrg break; 29757ec681f3Smrg } 29767ec681f3Smrg 29777ec681f3Smrg if (ElemLib::IsBlockCompressed(pIn->format) || 29787ec681f3Smrg ElemLib::IsMacroPixelPacked(pIn->format) || 29797ec681f3Smrg (bpp > 64) || 29807ec681f3Smrg (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered))) 29817ec681f3Smrg { 29827ec681f3Smrg allowedSwModeSet.value &= ~Gfx10ZSwModeMask; 29837ec681f3Smrg } 29847ec681f3Smrg 29857ec681f3Smrg if (pIn->format == ADDR_FMT_32_32_32) 29867ec681f3Smrg { 29877ec681f3Smrg allowedSwModeSet.value &= Gfx10LinearSwModeMask; 29887ec681f3Smrg } 29897ec681f3Smrg 29907ec681f3Smrg if (msaa) 29917ec681f3Smrg { 29927ec681f3Smrg allowedSwModeSet.value &= Gfx10MsaaSwModeMask; 29937ec681f3Smrg } 29947ec681f3Smrg 29957ec681f3Smrg if (pIn->flags.depth || pIn->flags.stencil) 29967ec681f3Smrg { 29977ec681f3Smrg allowedSwModeSet.value &= Gfx10ZSwModeMask; 29987ec681f3Smrg } 29997ec681f3Smrg 30007ec681f3Smrg if (pIn->flags.display) 30017ec681f3Smrg { 30027ec681f3Smrg allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp); 30037ec681f3Smrg } 30047ec681f3Smrg 30057ec681f3Smrg if (allowedSwModeSet.value != 0) 30067ec681f3Smrg { 30077ec681f3Smrg#if DEBUG 30087ec681f3Smrg // Post sanity check, at least AddrLib should accept the output generated by its own 30097ec681f3Smrg UINT_32 validateSwModeSet = allowedSwModeSet.value; 30107ec681f3Smrg 30117ec681f3Smrg for (UINT_32 i = 0; validateSwModeSet != 0; i++) 30127ec681f3Smrg { 30137ec681f3Smrg if (validateSwModeSet & 1) 30147ec681f3Smrg { 30157ec681f3Smrg localIn.swizzleMode = static_cast<AddrSwizzleMode>(i); 30167ec681f3Smrg ADDR_ASSERT(ValidateSwModeParams(&localIn)); 30177ec681f3Smrg } 30187ec681f3Smrg 30197ec681f3Smrg validateSwModeSet >>= 1; 30207ec681f3Smrg } 30217ec681f3Smrg#endif 30227ec681f3Smrg 30237ec681f3Smrg pOut->resourceType = pIn->resourceType; 30247ec681f3Smrg pOut->validSwModeSet = allowedSwModeSet; 30257ec681f3Smrg pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE; 30267ec681f3Smrg pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType); 30277ec681f3Smrg pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet); 30287ec681f3Smrg 30297ec681f3Smrg pOut->clientPreferredSwSet = pIn->preferredSwSet; 30307ec681f3Smrg 30317ec681f3Smrg if (pOut->clientPreferredSwSet.value == 0) 30327ec681f3Smrg { 30337ec681f3Smrg pOut->clientPreferredSwSet.value = AddrSwSetAll; 30347ec681f3Smrg } 30357ec681f3Smrg 30367ec681f3Smrg // Apply optional restrictions 30377ec681f3Smrg if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig) 30387ec681f3Smrg { 30397ec681f3Smrg if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0) 30407ec681f3Smrg { 30417ec681f3Smrg // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from 30427ec681f3Smrg // the GL2 in VAR mode, so it should be avoided. 30437ec681f3Smrg allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask; 30447ec681f3Smrg } 30457ec681f3Smrg else 30467ec681f3Smrg { 30477ec681f3Smrg // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil. 30487ec681f3Smrg // But we have to suffer from low performance because there is no other choice... 30497ec681f3Smrg ADDR_ASSERT_ALWAYS(); 30507ec681f3Smrg } 30517ec681f3Smrg } 30527ec681f3Smrg 30537ec681f3Smrg if (pIn->flags.needEquation) 30547ec681f3Smrg { 30557ec681f3Smrg FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3)); 30567ec681f3Smrg } 30577ec681f3Smrg 30587ec681f3Smrg if (allowedSwModeSet.value == Gfx10LinearSwModeMask) 30597ec681f3Smrg { 30607ec681f3Smrg pOut->swizzleMode = ADDR_SW_LINEAR; 30617ec681f3Smrg } 30627ec681f3Smrg else 30637ec681f3Smrg { 30647ec681f3Smrg const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0); 30657ec681f3Smrg 30667ec681f3Smrg if ((height > 1) && (computeMinSize == FALSE)) 30677ec681f3Smrg { 30687ec681f3Smrg // Always ignore linear swizzle mode if: 30697ec681f3Smrg // 1. This is a (2D/3D) resource with height > 1 30707ec681f3Smrg // 2. Client doesn't require computing minimize size 30717ec681f3Smrg allowedSwModeSet.swLinear = 0; 30727ec681f3Smrg } 30737ec681f3Smrg 30747ec681f3Smrg ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType); 30757ec681f3Smrg 30767ec681f3Smrg // Determine block size if there are 2 or more block type candidates 30777ec681f3Smrg if (IsPow2(allowedBlockSet.value) == FALSE) 30787ec681f3Smrg { 30797ec681f3Smrg AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {}; 30807ec681f3Smrg 30817ec681f3Smrg swMode[AddrBlockLinear] = ADDR_SW_LINEAR; 30827ec681f3Smrg 30837ec681f3Smrg if (m_blockVarSizeLog2 != 0) 30847ec681f3Smrg { 30857ec681f3Smrg swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X; 30867ec681f3Smrg } 30877ec681f3Smrg 30887ec681f3Smrg if (pOut->resourceType == ADDR_RSRC_TEX_3D) 30897ec681f3Smrg { 30907ec681f3Smrg swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S; 30917ec681f3Smrg swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X; 30927ec681f3Smrg swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S; 30937ec681f3Smrg } 30947ec681f3Smrg else 30957ec681f3Smrg { 30967ec681f3Smrg swMode[AddrBlockMicro] = ADDR_SW_256B_S; 30977ec681f3Smrg swMode[AddrBlockThin4KB] = ADDR_SW_4KB_S; 30987ec681f3Smrg swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S; 30997ec681f3Smrg } 31007ec681f3Smrg 31017ec681f3Smrg UINT_64 padSize[AddrBlockMaxTiledType] = {}; 31027ec681f3Smrg 31037ec681f3Smrg const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2); 31047ec681f3Smrg const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1); 31057ec681f3Smrg const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u); 31067ec681f3Smrg UINT_32 minSizeBlk = AddrBlockMicro; 31077ec681f3Smrg UINT_64 minSize = 0; 31087ec681f3Smrg 31097ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; 31107ec681f3Smrg 31117ec681f3Smrg for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++) 31127ec681f3Smrg { 31137ec681f3Smrg if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i))) 31147ec681f3Smrg { 31157ec681f3Smrg localIn.swizzleMode = swMode[i]; 31167ec681f3Smrg 31177ec681f3Smrg if (localIn.swizzleMode == ADDR_SW_LINEAR) 31187ec681f3Smrg { 31197ec681f3Smrg returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut); 31207ec681f3Smrg } 31217ec681f3Smrg else 31227ec681f3Smrg { 31237ec681f3Smrg returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut); 31247ec681f3Smrg } 31257ec681f3Smrg 31267ec681f3Smrg if (returnCode == ADDR_OK) 31277ec681f3Smrg { 31287ec681f3Smrg padSize[i] = localOut.surfSize; 31297ec681f3Smrg 31307ec681f3Smrg if (minSize == 0) 31317ec681f3Smrg { 31327ec681f3Smrg minSize = padSize[i]; 31337ec681f3Smrg minSizeBlk = i; 31347ec681f3Smrg } 31357ec681f3Smrg else 31367ec681f3Smrg { 31377ec681f3Smrg if (BlockTypeWithinMemoryBudget( 31387ec681f3Smrg minSize, 31397ec681f3Smrg padSize[i], 31407ec681f3Smrg ratioLow, 31417ec681f3Smrg ratioHi, 31427ec681f3Smrg 0.0, 31437ec681f3Smrg GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk]))) 31447ec681f3Smrg { 31457ec681f3Smrg minSize = padSize[i]; 31467ec681f3Smrg minSizeBlk = i; 31477ec681f3Smrg } 31487ec681f3Smrg } 31497ec681f3Smrg } 31507ec681f3Smrg else 31517ec681f3Smrg { 31527ec681f3Smrg ADDR_ASSERT_ALWAYS(); 31537ec681f3Smrg break; 31547ec681f3Smrg } 31557ec681f3Smrg } 31567ec681f3Smrg } 31577ec681f3Smrg 31587ec681f3Smrg if (pIn->memoryBudget > 1.0) 31597ec681f3Smrg { 31607ec681f3Smrg // If minimum size is given by swizzle mode with bigger-block type, then don't ever check 31617ec681f3Smrg // smaller-block type again in coming loop 31627ec681f3Smrg switch (minSizeBlk) 31637ec681f3Smrg { 31647ec681f3Smrg case AddrBlockThick64KB: 31657ec681f3Smrg allowedBlockSet.macroThin64KB = 0; 31667ec681f3Smrg case AddrBlockThinVar: 31677ec681f3Smrg case AddrBlockThin64KB: 31687ec681f3Smrg allowedBlockSet.macroThick4KB = 0; 31697ec681f3Smrg case AddrBlockThick4KB: 31707ec681f3Smrg allowedBlockSet.macroThin4KB = 0; 31717ec681f3Smrg case AddrBlockThin4KB: 31727ec681f3Smrg allowedBlockSet.micro = 0; 31737ec681f3Smrg case AddrBlockMicro: 31747ec681f3Smrg allowedBlockSet.linear = 0; 31757ec681f3Smrg case AddrBlockLinear: 31767ec681f3Smrg break; 31777ec681f3Smrg 31787ec681f3Smrg default: 31797ec681f3Smrg ADDR_ASSERT_ALWAYS(); 31807ec681f3Smrg break; 31817ec681f3Smrg } 31827ec681f3Smrg 31837ec681f3Smrg for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) 31847ec681f3Smrg { 31857ec681f3Smrg if ((i != minSizeBlk) && 31867ec681f3Smrg IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i))) 31877ec681f3Smrg { 31887ec681f3Smrg if (BlockTypeWithinMemoryBudget( 31897ec681f3Smrg minSize, 31907ec681f3Smrg padSize[i], 31917ec681f3Smrg 0, 31927ec681f3Smrg 0, 31937ec681f3Smrg pIn->memoryBudget, 31947ec681f3Smrg GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE) 31957ec681f3Smrg { 31967ec681f3Smrg // Clear the block type if the memory waste is unacceptable 31977ec681f3Smrg allowedBlockSet.value &= ~(1u << (i - 1)); 31987ec681f3Smrg } 31997ec681f3Smrg } 32007ec681f3Smrg } 32017ec681f3Smrg 32027ec681f3Smrg // Remove VAR block type if bigger block type is allowed 32037ec681f3Smrg if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X)) 32047ec681f3Smrg { 32057ec681f3Smrg if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB) 32067ec681f3Smrg { 32077ec681f3Smrg allowedBlockSet.var = 0; 32087ec681f3Smrg } 32097ec681f3Smrg } 32107ec681f3Smrg 32117ec681f3Smrg // Remove linear block type if 2 or more block types are allowed 32127ec681f3Smrg if (IsPow2(allowedBlockSet.value) == FALSE) 32137ec681f3Smrg { 32147ec681f3Smrg allowedBlockSet.linear = 0; 32157ec681f3Smrg } 32167ec681f3Smrg 32177ec681f3Smrg // Select the biggest allowed block type 32187ec681f3Smrg minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1; 32197ec681f3Smrg 32207ec681f3Smrg if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType)) 32217ec681f3Smrg { 32227ec681f3Smrg minSizeBlk = AddrBlockLinear; 32237ec681f3Smrg } 32247ec681f3Smrg } 32257ec681f3Smrg 32267ec681f3Smrg switch (minSizeBlk) 32277ec681f3Smrg { 32287ec681f3Smrg case AddrBlockLinear: 32297ec681f3Smrg allowedSwModeSet.value &= Gfx10LinearSwModeMask; 32307ec681f3Smrg break; 32317ec681f3Smrg 32327ec681f3Smrg case AddrBlockMicro: 32337ec681f3Smrg ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D); 32347ec681f3Smrg allowedSwModeSet.value &= Gfx10Blk256BSwModeMask; 32357ec681f3Smrg break; 32367ec681f3Smrg 32377ec681f3Smrg case AddrBlockThin4KB: 32387ec681f3Smrg ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D); 32397ec681f3Smrg allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask; 32407ec681f3Smrg break; 32417ec681f3Smrg 32427ec681f3Smrg case AddrBlockThick4KB: 32437ec681f3Smrg ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D); 32447ec681f3Smrg allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask; 32457ec681f3Smrg break; 32467ec681f3Smrg 32477ec681f3Smrg case AddrBlockThin64KB: 32487ec681f3Smrg allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ? 32497ec681f3Smrg Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask; 32507ec681f3Smrg break; 32517ec681f3Smrg 32527ec681f3Smrg case AddrBlockThick64KB: 32537ec681f3Smrg ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D); 32547ec681f3Smrg allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask; 32557ec681f3Smrg break; 32567ec681f3Smrg 32577ec681f3Smrg case AddrBlockThinVar: 32587ec681f3Smrg allowedSwModeSet.value &= Gfx10BlkVarSwModeMask; 32597ec681f3Smrg break; 32607ec681f3Smrg 32617ec681f3Smrg default: 32627ec681f3Smrg ADDR_ASSERT_ALWAYS(); 32637ec681f3Smrg allowedSwModeSet.value = 0; 32647ec681f3Smrg break; 32657ec681f3Smrg } 32667ec681f3Smrg } 32677ec681f3Smrg 32687ec681f3Smrg // Block type should be determined. 32697ec681f3Smrg ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value)); 32707ec681f3Smrg 32717ec681f3Smrg ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet); 32727ec681f3Smrg 32737ec681f3Smrg // Determine swizzle type if there are 2 or more swizzle type candidates 32747ec681f3Smrg if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE)) 32757ec681f3Smrg { 32767ec681f3Smrg if (ElemLib::IsBlockCompressed(pIn->format)) 32777ec681f3Smrg { 32787ec681f3Smrg if (allowedSwSet.sw_D) 32797ec681f3Smrg { 32807ec681f3Smrg allowedSwModeSet.value &= Gfx10DisplaySwModeMask; 32817ec681f3Smrg } 32827ec681f3Smrg else if (allowedSwSet.sw_S) 32837ec681f3Smrg { 32847ec681f3Smrg allowedSwModeSet.value &= Gfx10StandardSwModeMask; 32857ec681f3Smrg } 32867ec681f3Smrg else 32877ec681f3Smrg { 32887ec681f3Smrg ADDR_ASSERT(allowedSwSet.sw_R); 32897ec681f3Smrg allowedSwModeSet.value &= Gfx10RenderSwModeMask; 32907ec681f3Smrg } 32917ec681f3Smrg } 32927ec681f3Smrg else if (ElemLib::IsMacroPixelPacked(pIn->format)) 32937ec681f3Smrg { 32947ec681f3Smrg if (allowedSwSet.sw_S) 32957ec681f3Smrg { 32967ec681f3Smrg allowedSwModeSet.value &= Gfx10StandardSwModeMask; 32977ec681f3Smrg } 32987ec681f3Smrg else if (allowedSwSet.sw_D) 32997ec681f3Smrg { 33007ec681f3Smrg allowedSwModeSet.value &= Gfx10DisplaySwModeMask; 33017ec681f3Smrg } 33027ec681f3Smrg else 33037ec681f3Smrg { 33047ec681f3Smrg ADDR_ASSERT(allowedSwSet.sw_R); 33057ec681f3Smrg allowedSwModeSet.value &= Gfx10RenderSwModeMask; 33067ec681f3Smrg } 33077ec681f3Smrg } 33087ec681f3Smrg else if (pIn->resourceType == ADDR_RSRC_TEX_3D) 33097ec681f3Smrg { 33107ec681f3Smrg if (pIn->flags.color && 33117ec681f3Smrg GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB && 33127ec681f3Smrg allowedSwSet.sw_D) 33137ec681f3Smrg { 33147ec681f3Smrg allowedSwModeSet.value &= Gfx10DisplaySwModeMask; 33157ec681f3Smrg } 33167ec681f3Smrg else if (allowedSwSet.sw_S) 33177ec681f3Smrg { 33187ec681f3Smrg allowedSwModeSet.value &= Gfx10StandardSwModeMask; 33197ec681f3Smrg } 33207ec681f3Smrg else if (allowedSwSet.sw_R) 33217ec681f3Smrg { 33227ec681f3Smrg allowedSwModeSet.value &= Gfx10RenderSwModeMask; 33237ec681f3Smrg } 33247ec681f3Smrg else 33257ec681f3Smrg { 33267ec681f3Smrg ADDR_ASSERT(allowedSwSet.sw_Z); 33277ec681f3Smrg allowedSwModeSet.value &= Gfx10ZSwModeMask; 33287ec681f3Smrg } 33297ec681f3Smrg } 33307ec681f3Smrg else 33317ec681f3Smrg { 33327ec681f3Smrg if (allowedSwSet.sw_R) 33337ec681f3Smrg { 33347ec681f3Smrg allowedSwModeSet.value &= Gfx10RenderSwModeMask; 33357ec681f3Smrg } 33367ec681f3Smrg else if (allowedSwSet.sw_D) 33377ec681f3Smrg { 33387ec681f3Smrg allowedSwModeSet.value &= Gfx10DisplaySwModeMask; 33397ec681f3Smrg } 33407ec681f3Smrg else if (allowedSwSet.sw_S) 33417ec681f3Smrg { 33427ec681f3Smrg allowedSwModeSet.value &= Gfx10StandardSwModeMask; 33437ec681f3Smrg } 33447ec681f3Smrg else 33457ec681f3Smrg { 33467ec681f3Smrg ADDR_ASSERT(allowedSwSet.sw_Z); 33477ec681f3Smrg allowedSwModeSet.value &= Gfx10ZSwModeMask; 33487ec681f3Smrg } 33497ec681f3Smrg } 33507ec681f3Smrg 33517ec681f3Smrg // Swizzle type should be determined. 33527ec681f3Smrg ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value)); 33537ec681f3Smrg } 33547ec681f3Smrg 33557ec681f3Smrg // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + 33567ec681f3Smrg // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's 33577ec681f3Smrg // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9). 33587ec681f3Smrg pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value)); 33597ec681f3Smrg } 33607ec681f3Smrg } 33617ec681f3Smrg else 33627ec681f3Smrg { 33637ec681f3Smrg // Invalid combination... 33647ec681f3Smrg ADDR_ASSERT_ALWAYS(); 33657ec681f3Smrg returnCode = ADDR_INVALIDPARAMS; 33667ec681f3Smrg } 33677ec681f3Smrg } 33687ec681f3Smrg else 33697ec681f3Smrg { 33707ec681f3Smrg // Invalid combination... 33717ec681f3Smrg ADDR_ASSERT_ALWAYS(); 33727ec681f3Smrg returnCode = ADDR_INVALIDPARAMS; 33737ec681f3Smrg } 33747ec681f3Smrg } 33757ec681f3Smrg 33767ec681f3Smrg return returnCode; 33777ec681f3Smrg} 33787ec681f3Smrg 33797ec681f3Smrg/** 33807ec681f3Smrg************************************************************************************************************************ 33817ec681f3Smrg* Gfx10Lib::ComputeStereoInfo 33827ec681f3Smrg* 33837ec681f3Smrg* @brief 33847ec681f3Smrg* Compute height alignment and right eye pipeBankXor for stereo surface 33857ec681f3Smrg* 33867ec681f3Smrg* @return 33877ec681f3Smrg* Error code 33887ec681f3Smrg* 33897ec681f3Smrg************************************************************************************************************************ 33907ec681f3Smrg*/ 33917ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo( 33927ec681f3Smrg const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info 33937ec681f3Smrg UINT_32* pAlignY, ///< Stereo requested additional alignment in Y 33947ec681f3Smrg UINT_32* pRightXor ///< Right eye xor 33957ec681f3Smrg ) const 33967ec681f3Smrg{ 33977ec681f3Smrg ADDR_E_RETURNCODE ret = ADDR_OK; 33987ec681f3Smrg 33997ec681f3Smrg *pRightXor = 0; 34007ec681f3Smrg 34017ec681f3Smrg if (IsNonPrtXor(pIn->swizzleMode)) 34027ec681f3Smrg { 34037ec681f3Smrg const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode); 34047ec681f3Smrg const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); 34057ec681f3Smrg const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1; 34067ec681f3Smrg const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode); 34077ec681f3Smrg const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2]; 34087ec681f3Smrg 34097ec681f3Smrg if (eqIndex != ADDR_INVALID_EQUATION_INDEX) 34107ec681f3Smrg { 34117ec681f3Smrg UINT_32 yMax = 0; 34127ec681f3Smrg UINT_32 yPosMask = 0; 34137ec681f3Smrg 34147ec681f3Smrg // First get "max y bit" 34157ec681f3Smrg for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++) 34167ec681f3Smrg { 34177ec681f3Smrg ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1); 34187ec681f3Smrg 34197ec681f3Smrg if ((m_equationTable[eqIndex].addr[i].channel == 1) && 34207ec681f3Smrg (m_equationTable[eqIndex].addr[i].index > yMax)) 34217ec681f3Smrg { 34227ec681f3Smrg yMax = m_equationTable[eqIndex].addr[i].index; 34237ec681f3Smrg } 34247ec681f3Smrg 34257ec681f3Smrg if ((m_equationTable[eqIndex].xor1[i].valid == 1) && 34267ec681f3Smrg (m_equationTable[eqIndex].xor1[i].channel == 1) && 34277ec681f3Smrg (m_equationTable[eqIndex].xor1[i].index > yMax)) 34287ec681f3Smrg { 34297ec681f3Smrg yMax = m_equationTable[eqIndex].xor1[i].index; 34307ec681f3Smrg } 34317ec681f3Smrg 34327ec681f3Smrg if ((m_equationTable[eqIndex].xor2[i].valid == 1) && 34337ec681f3Smrg (m_equationTable[eqIndex].xor2[i].channel == 1) && 34347ec681f3Smrg (m_equationTable[eqIndex].xor2[i].index > yMax)) 34357ec681f3Smrg { 34367ec681f3Smrg yMax = m_equationTable[eqIndex].xor2[i].index; 34377ec681f3Smrg } 34387ec681f3Smrg } 34397ec681f3Smrg 34407ec681f3Smrg // Then loop again for populating a position mask of "max Y bit" 34417ec681f3Smrg for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++) 34427ec681f3Smrg { 34437ec681f3Smrg if ((m_equationTable[eqIndex].addr[i].channel == 1) && 34447ec681f3Smrg (m_equationTable[eqIndex].addr[i].index == yMax)) 34457ec681f3Smrg { 34467ec681f3Smrg yPosMask |= 1u << i; 34477ec681f3Smrg } 34487ec681f3Smrg else if ((m_equationTable[eqIndex].xor1[i].valid == 1) && 34497ec681f3Smrg (m_equationTable[eqIndex].xor1[i].channel == 1) && 34507ec681f3Smrg (m_equationTable[eqIndex].xor1[i].index == yMax)) 34517ec681f3Smrg { 34527ec681f3Smrg yPosMask |= 1u << i; 34537ec681f3Smrg } 34547ec681f3Smrg else if ((m_equationTable[eqIndex].xor2[i].valid == 1) && 34557ec681f3Smrg (m_equationTable[eqIndex].xor2[i].channel == 1) && 34567ec681f3Smrg (m_equationTable[eqIndex].xor2[i].index == yMax)) 34577ec681f3Smrg { 34587ec681f3Smrg yPosMask |= 1u << i; 34597ec681f3Smrg } 34607ec681f3Smrg } 34617ec681f3Smrg 34627ec681f3Smrg const UINT_32 additionalAlign = 1 << yMax; 34637ec681f3Smrg 34647ec681f3Smrg if (additionalAlign >= *pAlignY) 34657ec681f3Smrg { 34667ec681f3Smrg *pAlignY = additionalAlign; 34677ec681f3Smrg 34687ec681f3Smrg const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign); 34697ec681f3Smrg 34707ec681f3Smrg if ((alignedHeight >> yMax) & 1) 34717ec681f3Smrg { 34727ec681f3Smrg *pRightXor = yPosMask >> m_pipeInterleaveLog2; 34737ec681f3Smrg } 34747ec681f3Smrg } 34757ec681f3Smrg } 34767ec681f3Smrg else 34777ec681f3Smrg { 34787ec681f3Smrg ret = ADDR_INVALIDPARAMS; 34797ec681f3Smrg } 34807ec681f3Smrg } 34817ec681f3Smrg 34827ec681f3Smrg return ret; 34837ec681f3Smrg} 34847ec681f3Smrg 34857ec681f3Smrg/** 34867ec681f3Smrg************************************************************************************************************************ 34877ec681f3Smrg* Gfx10Lib::HwlComputeSurfaceInfoTiled 34887ec681f3Smrg* 34897ec681f3Smrg* @brief 34907ec681f3Smrg* Internal function to calculate alignment for tiled surface 34917ec681f3Smrg* 34927ec681f3Smrg* @return 34937ec681f3Smrg* ADDR_E_RETURNCODE 34947ec681f3Smrg************************************************************************************************************************ 34957ec681f3Smrg*/ 34967ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled( 34977ec681f3Smrg const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure 34987ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure 34997ec681f3Smrg ) const 35007ec681f3Smrg{ 35017ec681f3Smrg ADDR_E_RETURNCODE ret; 35027ec681f3Smrg 35037ec681f3Smrg // Mip chain dimesion and epitch has no meaning in GFX10, set to default value 35047ec681f3Smrg pOut->mipChainPitch = 0; 35057ec681f3Smrg pOut->mipChainHeight = 0; 35067ec681f3Smrg pOut->mipChainSlice = 0; 35077ec681f3Smrg pOut->epitchIsHeight = FALSE; 35087ec681f3Smrg 35097ec681f3Smrg // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary 35107ec681f3Smrg pOut->mipChainInTail = FALSE; 35117ec681f3Smrg pOut->firstMipIdInTail = pIn->numMipLevels; 35127ec681f3Smrg 35137ec681f3Smrg if (IsBlock256b(pIn->swizzleMode)) 35147ec681f3Smrg { 35157ec681f3Smrg ret = ComputeSurfaceInfoMicroTiled(pIn, pOut); 35167ec681f3Smrg } 35177ec681f3Smrg else 35187ec681f3Smrg { 35197ec681f3Smrg ret = ComputeSurfaceInfoMacroTiled(pIn, pOut); 35207ec681f3Smrg } 35217ec681f3Smrg 35227ec681f3Smrg return ret; 35237ec681f3Smrg} 35247ec681f3Smrg 35257ec681f3Smrg 35267ec681f3Smrg/** 35277ec681f3Smrg************************************************************************************************************************ 35287ec681f3Smrg* Gfx10Lib::ComputeSurfaceInfoMicroTiled 35297ec681f3Smrg* 35307ec681f3Smrg* @brief 35317ec681f3Smrg* Internal function to calculate alignment for micro tiled surface 35327ec681f3Smrg* 35337ec681f3Smrg* @return 35347ec681f3Smrg* ADDR_E_RETURNCODE 35357ec681f3Smrg************************************************************************************************************************ 35367ec681f3Smrg*/ 35377ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled( 35387ec681f3Smrg const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure 35397ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure 35407ec681f3Smrg ) const 35417ec681f3Smrg{ 35427ec681f3Smrg ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth, 35437ec681f3Smrg &pOut->blockHeight, 35447ec681f3Smrg &pOut->blockSlices, 35457ec681f3Smrg pIn->bpp, 35467ec681f3Smrg pIn->numFrags, 35477ec681f3Smrg pIn->resourceType, 35487ec681f3Smrg pIn->swizzleMode); 35497ec681f3Smrg 35507ec681f3Smrg if (ret == ADDR_OK) 35517ec681f3Smrg { 35527ec681f3Smrg const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode); 35537ec681f3Smrg 35547ec681f3Smrg pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth); 35557ec681f3Smrg pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight); 35567ec681f3Smrg pOut->numSlices = pIn->numSlices; 35577ec681f3Smrg pOut->baseAlign = blockSize; 35587ec681f3Smrg 35597ec681f3Smrg if (pIn->numMipLevels > 1) 35607ec681f3Smrg { 35617ec681f3Smrg const UINT_32 mip0Width = pIn->width; 35627ec681f3Smrg const UINT_32 mip0Height = pIn->height; 35637ec681f3Smrg UINT_64 mipSliceSize = 0; 35647ec681f3Smrg 35657ec681f3Smrg for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--) 35667ec681f3Smrg { 35677ec681f3Smrg UINT_32 mipWidth, mipHeight; 35687ec681f3Smrg 35697ec681f3Smrg GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight); 35707ec681f3Smrg 35717ec681f3Smrg const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth); 35727ec681f3Smrg const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight); 35737ec681f3Smrg 35747ec681f3Smrg if (pOut->pMipInfo != NULL) 35757ec681f3Smrg { 35767ec681f3Smrg pOut->pMipInfo[i].pitch = mipActualWidth; 35777ec681f3Smrg pOut->pMipInfo[i].height = mipActualHeight; 35787ec681f3Smrg pOut->pMipInfo[i].depth = 1; 35797ec681f3Smrg pOut->pMipInfo[i].offset = mipSliceSize; 35807ec681f3Smrg pOut->pMipInfo[i].mipTailOffset = 0; 35817ec681f3Smrg pOut->pMipInfo[i].macroBlockOffset = mipSliceSize; 35827ec681f3Smrg } 35837ec681f3Smrg 35847ec681f3Smrg mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3); 35857ec681f3Smrg } 35867ec681f3Smrg 35877ec681f3Smrg pOut->sliceSize = mipSliceSize; 35887ec681f3Smrg pOut->surfSize = mipSliceSize * pOut->numSlices; 35897ec681f3Smrg } 35907ec681f3Smrg else 35917ec681f3Smrg { 35927ec681f3Smrg pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3); 35937ec681f3Smrg pOut->surfSize = pOut->sliceSize * pOut->numSlices; 35947ec681f3Smrg 35957ec681f3Smrg if (pOut->pMipInfo != NULL) 35967ec681f3Smrg { 35977ec681f3Smrg pOut->pMipInfo[0].pitch = pOut->pitch; 35987ec681f3Smrg pOut->pMipInfo[0].height = pOut->height; 35997ec681f3Smrg pOut->pMipInfo[0].depth = 1; 36007ec681f3Smrg pOut->pMipInfo[0].offset = 0; 36017ec681f3Smrg pOut->pMipInfo[0].mipTailOffset = 0; 36027ec681f3Smrg pOut->pMipInfo[0].macroBlockOffset = 0; 36037ec681f3Smrg } 36047ec681f3Smrg } 36057ec681f3Smrg 36067ec681f3Smrg } 36077ec681f3Smrg 36087ec681f3Smrg return ret; 36097ec681f3Smrg} 36107ec681f3Smrg 36117ec681f3Smrg/** 36127ec681f3Smrg************************************************************************************************************************ 36137ec681f3Smrg* Gfx10Lib::ComputeSurfaceInfoMacroTiled 36147ec681f3Smrg* 36157ec681f3Smrg* @brief 36167ec681f3Smrg* Internal function to calculate alignment for macro tiled surface 36177ec681f3Smrg* 36187ec681f3Smrg* @return 36197ec681f3Smrg* ADDR_E_RETURNCODE 36207ec681f3Smrg************************************************************************************************************************ 36217ec681f3Smrg*/ 36227ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled( 36237ec681f3Smrg const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure 36247ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure 36257ec681f3Smrg ) const 36267ec681f3Smrg{ 36277ec681f3Smrg ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth, 36287ec681f3Smrg &pOut->blockHeight, 36297ec681f3Smrg &pOut->blockSlices, 36307ec681f3Smrg pIn->bpp, 36317ec681f3Smrg pIn->numFrags, 36327ec681f3Smrg pIn->resourceType, 36337ec681f3Smrg pIn->swizzleMode); 36347ec681f3Smrg 36357ec681f3Smrg if (returnCode == ADDR_OK) 36367ec681f3Smrg { 36377ec681f3Smrg UINT_32 heightAlign = pOut->blockHeight; 36387ec681f3Smrg 36397ec681f3Smrg if (pIn->flags.qbStereo) 36407ec681f3Smrg { 36417ec681f3Smrg UINT_32 rightXor = 0; 36427ec681f3Smrg 36437ec681f3Smrg returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor); 36447ec681f3Smrg 36457ec681f3Smrg if (returnCode == ADDR_OK) 36467ec681f3Smrg { 36477ec681f3Smrg pOut->pStereoInfo->rightSwizzle = rightXor; 36487ec681f3Smrg } 36497ec681f3Smrg } 36507ec681f3Smrg 36517ec681f3Smrg if (returnCode == ADDR_OK) 36527ec681f3Smrg { 36537ec681f3Smrg const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode); 36547ec681f3Smrg const UINT_32 blockSize = 1 << blockSizeLog2; 36557ec681f3Smrg 36567ec681f3Smrg pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth); 36577ec681f3Smrg pOut->height = PowTwoAlign(pIn->height, heightAlign); 36587ec681f3Smrg pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices); 36597ec681f3Smrg pOut->baseAlign = blockSize; 36607ec681f3Smrg 36617ec681f3Smrg if (pIn->numMipLevels > 1) 36627ec681f3Smrg { 36637ec681f3Smrg const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType, 36647ec681f3Smrg pIn->swizzleMode, 36657ec681f3Smrg pOut->blockWidth, 36667ec681f3Smrg pOut->blockHeight, 36677ec681f3Smrg pOut->blockSlices); 36687ec681f3Smrg const UINT_32 mip0Width = pIn->width; 36697ec681f3Smrg const UINT_32 mip0Height = pIn->height; 36707ec681f3Smrg const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode); 36717ec681f3Smrg const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices; 36727ec681f3Smrg const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin); 36737ec681f3Smrg const UINT_32 index = Log2(pIn->bpp >> 3); 36747ec681f3Smrg UINT_32 firstMipInTail = pIn->numMipLevels; 36757ec681f3Smrg UINT_64 mipChainSliceSize = 0; 36767ec681f3Smrg UINT_64 mipSize[MaxMipLevels]; 36777ec681f3Smrg UINT_64 mipSliceSize[MaxMipLevels]; 36787ec681f3Smrg 36797ec681f3Smrg Dim3d fixedTailMaxDim = tailMaxDim; 36807ec681f3Smrg 36817ec681f3Smrg if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1)) 36827ec681f3Smrg { 36837ec681f3Smrg fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w; 36847ec681f3Smrg fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h; 36857ec681f3Smrg } 36867ec681f3Smrg 36877ec681f3Smrg for (UINT_32 i = 0; i < pIn->numMipLevels; i++) 36887ec681f3Smrg { 36897ec681f3Smrg UINT_32 mipWidth, mipHeight, mipDepth; 36907ec681f3Smrg 36917ec681f3Smrg GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth); 36927ec681f3Smrg 36937ec681f3Smrg if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i)) 36947ec681f3Smrg { 36957ec681f3Smrg firstMipInTail = i; 36967ec681f3Smrg mipChainSliceSize += blockSize / pOut->blockSlices; 36977ec681f3Smrg break; 36987ec681f3Smrg } 36997ec681f3Smrg else 37007ec681f3Smrg { 37017ec681f3Smrg const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth); 37027ec681f3Smrg const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight); 37037ec681f3Smrg const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices); 37047ec681f3Smrg const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3); 37057ec681f3Smrg 37067ec681f3Smrg mipSize[i] = sliceSize * depth; 37077ec681f3Smrg mipSliceSize[i] = sliceSize * pOut->blockSlices; 37087ec681f3Smrg mipChainSliceSize += sliceSize; 37097ec681f3Smrg 37107ec681f3Smrg if (pOut->pMipInfo != NULL) 37117ec681f3Smrg { 37127ec681f3Smrg pOut->pMipInfo[i].pitch = pitch; 37137ec681f3Smrg pOut->pMipInfo[i].height = height; 37147ec681f3Smrg pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1; 37157ec681f3Smrg } 37167ec681f3Smrg } 37177ec681f3Smrg } 37187ec681f3Smrg 37197ec681f3Smrg pOut->sliceSize = mipChainSliceSize; 37207ec681f3Smrg pOut->surfSize = mipChainSliceSize * pOut->numSlices; 37217ec681f3Smrg pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE; 37227ec681f3Smrg pOut->firstMipIdInTail = firstMipInTail; 37237ec681f3Smrg 37247ec681f3Smrg if (pOut->pMipInfo != NULL) 37257ec681f3Smrg { 37267ec681f3Smrg UINT_64 offset = 0; 37277ec681f3Smrg UINT_64 macroBlkOffset = 0; 37287ec681f3Smrg UINT_32 tailMaxDepth = 0; 37297ec681f3Smrg 37307ec681f3Smrg if (firstMipInTail != pIn->numMipLevels) 37317ec681f3Smrg { 37327ec681f3Smrg UINT_32 mipWidth, mipHeight; 37337ec681f3Smrg 37347ec681f3Smrg GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail, 37357ec681f3Smrg &mipWidth, &mipHeight, &tailMaxDepth); 37367ec681f3Smrg 37377ec681f3Smrg offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices; 37387ec681f3Smrg macroBlkOffset = blockSize; 37397ec681f3Smrg } 37407ec681f3Smrg 37417ec681f3Smrg for (INT_32 i = firstMipInTail - 1; i >= 0; i--) 37427ec681f3Smrg { 37437ec681f3Smrg pOut->pMipInfo[i].offset = offset; 37447ec681f3Smrg pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset; 37457ec681f3Smrg pOut->pMipInfo[i].mipTailOffset = 0; 37467ec681f3Smrg 37477ec681f3Smrg offset += mipSize[i]; 37487ec681f3Smrg macroBlkOffset += mipSliceSize[i]; 37497ec681f3Smrg } 37507ec681f3Smrg 37517ec681f3Smrg UINT_32 pitch = tailMaxDim.w; 37527ec681f3Smrg UINT_32 height = tailMaxDim.h; 37537ec681f3Smrg UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d); 37547ec681f3Smrg 37557ec681f3Smrg tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d); 37567ec681f3Smrg 37577ec681f3Smrg for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++) 37587ec681f3Smrg { 37597ec681f3Smrg const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail); 37607ec681f3Smrg const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8); 37617ec681f3Smrg 37627ec681f3Smrg pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth; 37637ec681f3Smrg pOut->pMipInfo[i].mipTailOffset = mipOffset; 37647ec681f3Smrg pOut->pMipInfo[i].macroBlockOffset = 0; 37657ec681f3Smrg 37667ec681f3Smrg pOut->pMipInfo[i].pitch = pitch; 37677ec681f3Smrg pOut->pMipInfo[i].height = height; 37687ec681f3Smrg pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1; 37697ec681f3Smrg 37707ec681f3Smrg UINT_32 mipX = ((mipOffset >> 9) & 1) | 37717ec681f3Smrg ((mipOffset >> 10) & 2) | 37727ec681f3Smrg ((mipOffset >> 11) & 4) | 37737ec681f3Smrg ((mipOffset >> 12) & 8) | 37747ec681f3Smrg ((mipOffset >> 13) & 16) | 37757ec681f3Smrg ((mipOffset >> 14) & 32); 37767ec681f3Smrg UINT_32 mipY = ((mipOffset >> 8) & 1) | 37777ec681f3Smrg ((mipOffset >> 9) & 2) | 37787ec681f3Smrg ((mipOffset >> 10) & 4) | 37797ec681f3Smrg ((mipOffset >> 11) & 8) | 37807ec681f3Smrg ((mipOffset >> 12) & 16) | 37817ec681f3Smrg ((mipOffset >> 13) & 32); 37827ec681f3Smrg 37837ec681f3Smrg if (blockSizeLog2 & 1) 37847ec681f3Smrg { 37857ec681f3Smrg const UINT_32 temp = mipX; 37867ec681f3Smrg mipX = mipY; 37877ec681f3Smrg mipY = temp; 37887ec681f3Smrg 37897ec681f3Smrg if (index & 1) 37907ec681f3Smrg { 37917ec681f3Smrg mipY = (mipY << 1) | (mipX & 1); 37927ec681f3Smrg mipX = mipX >> 1; 37937ec681f3Smrg } 37947ec681f3Smrg } 37957ec681f3Smrg 37967ec681f3Smrg if (isThin) 37977ec681f3Smrg { 37987ec681f3Smrg pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w; 37997ec681f3Smrg pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h; 38007ec681f3Smrg pOut->pMipInfo[i].mipTailCoordZ = 0; 38017ec681f3Smrg 38027ec681f3Smrg pitch = Max(pitch >> 1, Block256_2d[index].w); 38037ec681f3Smrg height = Max(height >> 1, Block256_2d[index].h); 38047ec681f3Smrg } 38057ec681f3Smrg else 38067ec681f3Smrg { 38077ec681f3Smrg pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w; 38087ec681f3Smrg pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h; 38097ec681f3Smrg pOut->pMipInfo[i].mipTailCoordZ = 0; 38107ec681f3Smrg 38117ec681f3Smrg pitch = Max(pitch >> 1, Block256_3d[index].w); 38127ec681f3Smrg height = Max(height >> 1, Block256_3d[index].h); 38137ec681f3Smrg } 38147ec681f3Smrg } 38157ec681f3Smrg } 38167ec681f3Smrg } 38177ec681f3Smrg else 38187ec681f3Smrg { 38197ec681f3Smrg pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags; 38207ec681f3Smrg pOut->surfSize = pOut->sliceSize * pOut->numSlices; 38217ec681f3Smrg 38227ec681f3Smrg if (pOut->pMipInfo != NULL) 38237ec681f3Smrg { 38247ec681f3Smrg pOut->pMipInfo[0].pitch = pOut->pitch; 38257ec681f3Smrg pOut->pMipInfo[0].height = pOut->height; 38267ec681f3Smrg pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1; 38277ec681f3Smrg pOut->pMipInfo[0].offset = 0; 38287ec681f3Smrg pOut->pMipInfo[0].mipTailOffset = 0; 38297ec681f3Smrg pOut->pMipInfo[0].macroBlockOffset = 0; 38307ec681f3Smrg pOut->pMipInfo[0].mipTailCoordX = 0; 38317ec681f3Smrg pOut->pMipInfo[0].mipTailCoordY = 0; 38327ec681f3Smrg pOut->pMipInfo[0].mipTailCoordZ = 0; 38337ec681f3Smrg } 38347ec681f3Smrg } 38357ec681f3Smrg } 38367ec681f3Smrg } 38377ec681f3Smrg 38387ec681f3Smrg return returnCode; 38397ec681f3Smrg} 38407ec681f3Smrg 38417ec681f3Smrg/** 38427ec681f3Smrg************************************************************************************************************************ 38437ec681f3Smrg* Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled 38447ec681f3Smrg* 38457ec681f3Smrg* @brief 38467ec681f3Smrg* Internal function to calculate address from coord for tiled swizzle surface 38477ec681f3Smrg* 38487ec681f3Smrg* @return 38497ec681f3Smrg* ADDR_E_RETURNCODE 38507ec681f3Smrg************************************************************************************************************************ 38517ec681f3Smrg*/ 38527ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled( 38537ec681f3Smrg const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 38547ec681f3Smrg ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure 38557ec681f3Smrg ) const 38567ec681f3Smrg{ 38577ec681f3Smrg ADDR_E_RETURNCODE ret; 38587ec681f3Smrg 38597ec681f3Smrg if (IsBlock256b(pIn->swizzleMode)) 38607ec681f3Smrg { 38617ec681f3Smrg ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut); 38627ec681f3Smrg } 38637ec681f3Smrg else 38647ec681f3Smrg { 38657ec681f3Smrg ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut); 38667ec681f3Smrg } 38677ec681f3Smrg 38687ec681f3Smrg return ret; 38697ec681f3Smrg} 38707ec681f3Smrg 38717ec681f3Smrg/** 38727ec681f3Smrg************************************************************************************************************************ 38737ec681f3Smrg* Gfx10Lib::ComputeOffsetFromEquation 38747ec681f3Smrg* 38757ec681f3Smrg* @brief 38767ec681f3Smrg* Compute offset from equation 38777ec681f3Smrg* 38787ec681f3Smrg* @return 38797ec681f3Smrg* Offset 38807ec681f3Smrg************************************************************************************************************************ 38817ec681f3Smrg*/ 38827ec681f3SmrgUINT_32 Gfx10Lib::ComputeOffsetFromEquation( 38837ec681f3Smrg const ADDR_EQUATION* pEq, ///< Equation 38847ec681f3Smrg UINT_32 x, ///< x coord in bytes 38857ec681f3Smrg UINT_32 y, ///< y coord in pixel 38867ec681f3Smrg UINT_32 z ///< z coord in slice 38877ec681f3Smrg ) const 38887ec681f3Smrg{ 38897ec681f3Smrg UINT_32 offset = 0; 38907ec681f3Smrg 38917ec681f3Smrg for (UINT_32 i = 0; i < pEq->numBits; i++) 38927ec681f3Smrg { 38937ec681f3Smrg UINT_32 v = 0; 38947ec681f3Smrg 38957ec681f3Smrg if (pEq->addr[i].valid) 38967ec681f3Smrg { 38977ec681f3Smrg if (pEq->addr[i].channel == 0) 38987ec681f3Smrg { 38997ec681f3Smrg v ^= (x >> pEq->addr[i].index) & 1; 39007ec681f3Smrg } 39017ec681f3Smrg else if (pEq->addr[i].channel == 1) 39027ec681f3Smrg { 39037ec681f3Smrg v ^= (y >> pEq->addr[i].index) & 1; 39047ec681f3Smrg } 39057ec681f3Smrg else 39067ec681f3Smrg { 39077ec681f3Smrg ADDR_ASSERT(pEq->addr[i].channel == 2); 39087ec681f3Smrg v ^= (z >> pEq->addr[i].index) & 1; 39097ec681f3Smrg } 39107ec681f3Smrg } 39117ec681f3Smrg 39127ec681f3Smrg if (pEq->xor1[i].valid) 39137ec681f3Smrg { 39147ec681f3Smrg if (pEq->xor1[i].channel == 0) 39157ec681f3Smrg { 39167ec681f3Smrg v ^= (x >> pEq->xor1[i].index) & 1; 39177ec681f3Smrg } 39187ec681f3Smrg else if (pEq->xor1[i].channel == 1) 39197ec681f3Smrg { 39207ec681f3Smrg v ^= (y >> pEq->xor1[i].index) & 1; 39217ec681f3Smrg } 39227ec681f3Smrg else 39237ec681f3Smrg { 39247ec681f3Smrg ADDR_ASSERT(pEq->xor1[i].channel == 2); 39257ec681f3Smrg v ^= (z >> pEq->xor1[i].index) & 1; 39267ec681f3Smrg } 39277ec681f3Smrg } 39287ec681f3Smrg 39297ec681f3Smrg if (pEq->xor2[i].valid) 39307ec681f3Smrg { 39317ec681f3Smrg if (pEq->xor2[i].channel == 0) 39327ec681f3Smrg { 39337ec681f3Smrg v ^= (x >> pEq->xor2[i].index) & 1; 39347ec681f3Smrg } 39357ec681f3Smrg else if (pEq->xor2[i].channel == 1) 39367ec681f3Smrg { 39377ec681f3Smrg v ^= (y >> pEq->xor2[i].index) & 1; 39387ec681f3Smrg } 39397ec681f3Smrg else 39407ec681f3Smrg { 39417ec681f3Smrg ADDR_ASSERT(pEq->xor2[i].channel == 2); 39427ec681f3Smrg v ^= (z >> pEq->xor2[i].index) & 1; 39437ec681f3Smrg } 39447ec681f3Smrg } 39457ec681f3Smrg 39467ec681f3Smrg offset |= (v << i); 39477ec681f3Smrg } 39487ec681f3Smrg 39497ec681f3Smrg return offset; 39507ec681f3Smrg} 39517ec681f3Smrg 39527ec681f3Smrg/** 39537ec681f3Smrg************************************************************************************************************************ 39547ec681f3Smrg* Gfx10Lib::ComputeOffsetFromSwizzlePattern 39557ec681f3Smrg* 39567ec681f3Smrg* @brief 39577ec681f3Smrg* Compute offset from swizzle pattern 39587ec681f3Smrg* 39597ec681f3Smrg* @return 39607ec681f3Smrg* Offset 39617ec681f3Smrg************************************************************************************************************************ 39627ec681f3Smrg*/ 39637ec681f3SmrgUINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern( 39647ec681f3Smrg const UINT_64* pPattern, ///< Swizzle pattern 39657ec681f3Smrg UINT_32 numBits, ///< Number of bits in pattern 39667ec681f3Smrg UINT_32 x, ///< x coord in pixel 39677ec681f3Smrg UINT_32 y, ///< y coord in pixel 39687ec681f3Smrg UINT_32 z, ///< z coord in slice 39697ec681f3Smrg UINT_32 s ///< sample id 39707ec681f3Smrg ) const 39717ec681f3Smrg{ 39727ec681f3Smrg UINT_32 offset = 0; 39737ec681f3Smrg const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern); 39747ec681f3Smrg 39757ec681f3Smrg for (UINT_32 i = 0; i < numBits; i++) 39767ec681f3Smrg { 39777ec681f3Smrg UINT_32 v = 0; 39787ec681f3Smrg 39797ec681f3Smrg if (pSwizzlePattern[i].x != 0) 39807ec681f3Smrg { 39817ec681f3Smrg UINT_16 mask = pSwizzlePattern[i].x; 39827ec681f3Smrg UINT_32 xBits = x; 39837ec681f3Smrg 39847ec681f3Smrg while (mask != 0) 39857ec681f3Smrg { 39867ec681f3Smrg if (mask & 1) 39877ec681f3Smrg { 39887ec681f3Smrg v ^= xBits & 1; 39897ec681f3Smrg } 39907ec681f3Smrg 39917ec681f3Smrg xBits >>= 1; 39927ec681f3Smrg mask >>= 1; 39937ec681f3Smrg } 39947ec681f3Smrg } 39957ec681f3Smrg 39967ec681f3Smrg if (pSwizzlePattern[i].y != 0) 39977ec681f3Smrg { 39987ec681f3Smrg UINT_16 mask = pSwizzlePattern[i].y; 39997ec681f3Smrg UINT_32 yBits = y; 40007ec681f3Smrg 40017ec681f3Smrg while (mask != 0) 40027ec681f3Smrg { 40037ec681f3Smrg if (mask & 1) 40047ec681f3Smrg { 40057ec681f3Smrg v ^= yBits & 1; 40067ec681f3Smrg } 40077ec681f3Smrg 40087ec681f3Smrg yBits >>= 1; 40097ec681f3Smrg mask >>= 1; 40107ec681f3Smrg } 40117ec681f3Smrg } 40127ec681f3Smrg 40137ec681f3Smrg if (pSwizzlePattern[i].z != 0) 40147ec681f3Smrg { 40157ec681f3Smrg UINT_16 mask = pSwizzlePattern[i].z; 40167ec681f3Smrg UINT_32 zBits = z; 40177ec681f3Smrg 40187ec681f3Smrg while (mask != 0) 40197ec681f3Smrg { 40207ec681f3Smrg if (mask & 1) 40217ec681f3Smrg { 40227ec681f3Smrg v ^= zBits & 1; 40237ec681f3Smrg } 40247ec681f3Smrg 40257ec681f3Smrg zBits >>= 1; 40267ec681f3Smrg mask >>= 1; 40277ec681f3Smrg } 40287ec681f3Smrg } 40297ec681f3Smrg 40307ec681f3Smrg if (pSwizzlePattern[i].s != 0) 40317ec681f3Smrg { 40327ec681f3Smrg UINT_16 mask = pSwizzlePattern[i].s; 40337ec681f3Smrg UINT_32 sBits = s; 40347ec681f3Smrg 40357ec681f3Smrg while (mask != 0) 40367ec681f3Smrg { 40377ec681f3Smrg if (mask & 1) 40387ec681f3Smrg { 40397ec681f3Smrg v ^= sBits & 1; 40407ec681f3Smrg } 40417ec681f3Smrg 40427ec681f3Smrg sBits >>= 1; 40437ec681f3Smrg mask >>= 1; 40447ec681f3Smrg } 40457ec681f3Smrg } 40467ec681f3Smrg 40477ec681f3Smrg offset |= (v << i); 40487ec681f3Smrg } 40497ec681f3Smrg 40507ec681f3Smrg return offset; 40517ec681f3Smrg} 40527ec681f3Smrg 40537ec681f3Smrg/** 40547ec681f3Smrg************************************************************************************************************************ 40557ec681f3Smrg* Gfx10Lib::GetSwizzlePatternInfo 40567ec681f3Smrg* 40577ec681f3Smrg* @brief 40587ec681f3Smrg* Get swizzle pattern 40597ec681f3Smrg* 40607ec681f3Smrg* @return 40617ec681f3Smrg* Swizzle pattern information 40627ec681f3Smrg************************************************************************************************************************ 40637ec681f3Smrg*/ 40647ec681f3Smrgconst ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( 40657ec681f3Smrg AddrSwizzleMode swizzleMode, ///< Swizzle mode 40667ec681f3Smrg AddrResourceType resourceType, ///< Resource type 40677ec681f3Smrg UINT_32 elemLog2, ///< Element size in bytes log2 40687ec681f3Smrg UINT_32 numFrag ///< Number of fragment 40697ec681f3Smrg ) const 40707ec681f3Smrg{ 40717ec681f3Smrg const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2; 40727ec681f3Smrg const ADDR_SW_PATINFO* patInfo = NULL; 40737ec681f3Smrg const UINT_32 swizzleMask = 1 << swizzleMode; 40747ec681f3Smrg 40757ec681f3Smrg if (IsBlockVariable(swizzleMode)) 40767ec681f3Smrg { 40777ec681f3Smrg if (m_blockVarSizeLog2 != 0) 40787ec681f3Smrg { 40797ec681f3Smrg ADDR_ASSERT(m_settings.supportRbPlus); 40807ec681f3Smrg 40817ec681f3Smrg if (IsRtOptSwizzle(swizzleMode)) 40827ec681f3Smrg { 40837ec681f3Smrg if (numFrag == 1) 40847ec681f3Smrg { 40857ec681f3Smrg patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO; 40867ec681f3Smrg } 40877ec681f3Smrg else if (numFrag == 2) 40887ec681f3Smrg { 40897ec681f3Smrg patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO; 40907ec681f3Smrg } 40917ec681f3Smrg else if (numFrag == 4) 40927ec681f3Smrg { 40937ec681f3Smrg patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO; 40947ec681f3Smrg } 40957ec681f3Smrg else 40967ec681f3Smrg { 40977ec681f3Smrg ADDR_ASSERT(numFrag == 8); 40987ec681f3Smrg patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO; 40997ec681f3Smrg } 41007ec681f3Smrg } 41017ec681f3Smrg else if (IsZOrderSwizzle(swizzleMode)) 41027ec681f3Smrg { 41037ec681f3Smrg if (numFrag == 1) 41047ec681f3Smrg { 41057ec681f3Smrg patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO; 41067ec681f3Smrg } 41077ec681f3Smrg else if (numFrag == 2) 41087ec681f3Smrg { 41097ec681f3Smrg patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO; 41107ec681f3Smrg } 41117ec681f3Smrg else if (numFrag == 4) 41127ec681f3Smrg { 41137ec681f3Smrg patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO; 41147ec681f3Smrg } 41157ec681f3Smrg else 41167ec681f3Smrg { 41177ec681f3Smrg ADDR_ASSERT(numFrag == 8); 41187ec681f3Smrg patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO; 41197ec681f3Smrg } 41207ec681f3Smrg } 41217ec681f3Smrg } 41227ec681f3Smrg } 41237ec681f3Smrg else if (IsLinear(swizzleMode) == FALSE) 41247ec681f3Smrg { 41257ec681f3Smrg if (resourceType == ADDR_RSRC_TEX_3D) 41267ec681f3Smrg { 41277ec681f3Smrg ADDR_ASSERT(numFrag == 1); 41287ec681f3Smrg 41297ec681f3Smrg if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0) 41307ec681f3Smrg { 41317ec681f3Smrg if (IsRtOptSwizzle(swizzleMode)) 41327ec681f3Smrg { 41337ec681f3Smrg patInfo = m_settings.supportRbPlus ? 41347ec681f3Smrg GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO; 41357ec681f3Smrg } 41367ec681f3Smrg else if (IsZOrderSwizzle(swizzleMode)) 41377ec681f3Smrg { 41387ec681f3Smrg patInfo = m_settings.supportRbPlus ? 41397ec681f3Smrg GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO; 41407ec681f3Smrg } 41417ec681f3Smrg else if (IsDisplaySwizzle(resourceType, swizzleMode)) 41427ec681f3Smrg { 41437ec681f3Smrg ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X); 41447ec681f3Smrg patInfo = m_settings.supportRbPlus ? 41457ec681f3Smrg GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO; 41467ec681f3Smrg } 41477ec681f3Smrg else 41487ec681f3Smrg { 41497ec681f3Smrg ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode)); 41507ec681f3Smrg 41517ec681f3Smrg if (IsBlock4kb(swizzleMode)) 41527ec681f3Smrg { 41537ec681f3Smrg if (swizzleMode == ADDR_SW_4KB_S) 41547ec681f3Smrg { 41557ec681f3Smrg patInfo = m_settings.supportRbPlus ? 41567ec681f3Smrg GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO; 41577ec681f3Smrg } 41587ec681f3Smrg else 41597ec681f3Smrg { 41607ec681f3Smrg ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X); 41617ec681f3Smrg patInfo = m_settings.supportRbPlus ? 41627ec681f3Smrg GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO; 41637ec681f3Smrg } 41647ec681f3Smrg } 41657ec681f3Smrg else 41667ec681f3Smrg { 41677ec681f3Smrg if (swizzleMode == ADDR_SW_64KB_S) 41687ec681f3Smrg { 41697ec681f3Smrg patInfo = m_settings.supportRbPlus ? 41707ec681f3Smrg GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO; 41717ec681f3Smrg } 41727ec681f3Smrg else if (swizzleMode == ADDR_SW_64KB_S_X) 41737ec681f3Smrg { 41747ec681f3Smrg patInfo = m_settings.supportRbPlus ? 41757ec681f3Smrg GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO; 41767ec681f3Smrg } 41777ec681f3Smrg else 41787ec681f3Smrg { 41797ec681f3Smrg ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T); 41807ec681f3Smrg patInfo = m_settings.supportRbPlus ? 41817ec681f3Smrg GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO; 41827ec681f3Smrg } 41837ec681f3Smrg } 41847ec681f3Smrg } 41857ec681f3Smrg } 41867ec681f3Smrg } 41877ec681f3Smrg else 41887ec681f3Smrg { 41897ec681f3Smrg if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0) 41907ec681f3Smrg { 41917ec681f3Smrg if (IsBlock256b(swizzleMode)) 41927ec681f3Smrg { 41937ec681f3Smrg if (swizzleMode == ADDR_SW_256B_S) 41947ec681f3Smrg { 41957ec681f3Smrg patInfo = m_settings.supportRbPlus ? 41967ec681f3Smrg GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO; 41977ec681f3Smrg } 41987ec681f3Smrg else 41997ec681f3Smrg { 42007ec681f3Smrg ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D); 42017ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42027ec681f3Smrg GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO; 42037ec681f3Smrg } 42047ec681f3Smrg } 42057ec681f3Smrg else if (IsBlock4kb(swizzleMode)) 42067ec681f3Smrg { 42077ec681f3Smrg if (IsStandardSwizzle(resourceType, swizzleMode)) 42087ec681f3Smrg { 42097ec681f3Smrg if (swizzleMode == ADDR_SW_4KB_S) 42107ec681f3Smrg { 42117ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42127ec681f3Smrg GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO; 42137ec681f3Smrg } 42147ec681f3Smrg else 42157ec681f3Smrg { 42167ec681f3Smrg ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X); 42177ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42187ec681f3Smrg GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO; 42197ec681f3Smrg } 42207ec681f3Smrg } 42217ec681f3Smrg else 42227ec681f3Smrg { 42237ec681f3Smrg if (swizzleMode == ADDR_SW_4KB_D) 42247ec681f3Smrg { 42257ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42267ec681f3Smrg GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO; 42277ec681f3Smrg } 42287ec681f3Smrg else 42297ec681f3Smrg { 42307ec681f3Smrg ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X); 42317ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42327ec681f3Smrg GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO; 42337ec681f3Smrg } 42347ec681f3Smrg } 42357ec681f3Smrg } 42367ec681f3Smrg else 42377ec681f3Smrg { 42387ec681f3Smrg if (IsRtOptSwizzle(swizzleMode)) 42397ec681f3Smrg { 42407ec681f3Smrg if (numFrag == 1) 42417ec681f3Smrg { 42427ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42437ec681f3Smrg GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO; 42447ec681f3Smrg } 42457ec681f3Smrg else if (numFrag == 2) 42467ec681f3Smrg { 42477ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42487ec681f3Smrg GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO; 42497ec681f3Smrg } 42507ec681f3Smrg else if (numFrag == 4) 42517ec681f3Smrg { 42527ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42537ec681f3Smrg GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO; 42547ec681f3Smrg } 42557ec681f3Smrg else 42567ec681f3Smrg { 42577ec681f3Smrg ADDR_ASSERT(numFrag == 8); 42587ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42597ec681f3Smrg GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO; 42607ec681f3Smrg } 42617ec681f3Smrg } 42627ec681f3Smrg else if (IsZOrderSwizzle(swizzleMode)) 42637ec681f3Smrg { 42647ec681f3Smrg if (numFrag == 1) 42657ec681f3Smrg { 42667ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42677ec681f3Smrg GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO; 42687ec681f3Smrg } 42697ec681f3Smrg else if (numFrag == 2) 42707ec681f3Smrg { 42717ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42727ec681f3Smrg GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO; 42737ec681f3Smrg } 42747ec681f3Smrg else if (numFrag == 4) 42757ec681f3Smrg { 42767ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42777ec681f3Smrg GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO; 42787ec681f3Smrg } 42797ec681f3Smrg else 42807ec681f3Smrg { 42817ec681f3Smrg ADDR_ASSERT(numFrag == 8); 42827ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42837ec681f3Smrg GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO; 42847ec681f3Smrg } 42857ec681f3Smrg } 42867ec681f3Smrg else if (IsDisplaySwizzle(resourceType, swizzleMode)) 42877ec681f3Smrg { 42887ec681f3Smrg if (swizzleMode == ADDR_SW_64KB_D) 42897ec681f3Smrg { 42907ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42917ec681f3Smrg GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO; 42927ec681f3Smrg } 42937ec681f3Smrg else if (swizzleMode == ADDR_SW_64KB_D_X) 42947ec681f3Smrg { 42957ec681f3Smrg patInfo = m_settings.supportRbPlus ? 42967ec681f3Smrg GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO; 42977ec681f3Smrg } 42987ec681f3Smrg else 42997ec681f3Smrg { 43007ec681f3Smrg ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T); 43017ec681f3Smrg patInfo = m_settings.supportRbPlus ? 43027ec681f3Smrg GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO; 43037ec681f3Smrg } 43047ec681f3Smrg } 43057ec681f3Smrg else 43067ec681f3Smrg { 43077ec681f3Smrg if (swizzleMode == ADDR_SW_64KB_S) 43087ec681f3Smrg { 43097ec681f3Smrg patInfo = m_settings.supportRbPlus ? 43107ec681f3Smrg GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO; 43117ec681f3Smrg } 43127ec681f3Smrg else if (swizzleMode == ADDR_SW_64KB_S_X) 43137ec681f3Smrg { 43147ec681f3Smrg patInfo = m_settings.supportRbPlus ? 43157ec681f3Smrg GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO; 43167ec681f3Smrg } 43177ec681f3Smrg else 43187ec681f3Smrg { 43197ec681f3Smrg ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T); 43207ec681f3Smrg patInfo = m_settings.supportRbPlus ? 43217ec681f3Smrg GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO; 43227ec681f3Smrg } 43237ec681f3Smrg } 43247ec681f3Smrg } 43257ec681f3Smrg } 43267ec681f3Smrg } 43277ec681f3Smrg } 43287ec681f3Smrg 43297ec681f3Smrg return (patInfo != NULL) ? &patInfo[index] : NULL; 43307ec681f3Smrg} 43317ec681f3Smrg 43327ec681f3Smrg 43337ec681f3Smrg/** 43347ec681f3Smrg************************************************************************************************************************ 43357ec681f3Smrg* Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled 43367ec681f3Smrg* 43377ec681f3Smrg* @brief 43387ec681f3Smrg* Internal function to calculate address from coord for micro tiled swizzle surface 43397ec681f3Smrg* 43407ec681f3Smrg* @return 43417ec681f3Smrg* ADDR_E_RETURNCODE 43427ec681f3Smrg************************************************************************************************************************ 43437ec681f3Smrg*/ 43447ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled( 43457ec681f3Smrg const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 43467ec681f3Smrg ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure 43477ec681f3Smrg ) const 43487ec681f3Smrg{ 43497ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {}; 43507ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; 43517ec681f3Smrg ADDR2_MIP_INFO mipInfo[MaxMipLevels]; 43527ec681f3Smrg 43537ec681f3Smrg localIn.swizzleMode = pIn->swizzleMode; 43547ec681f3Smrg localIn.flags = pIn->flags; 43557ec681f3Smrg localIn.resourceType = pIn->resourceType; 43567ec681f3Smrg localIn.bpp = pIn->bpp; 43577ec681f3Smrg localIn.width = Max(pIn->unalignedWidth, 1u); 43587ec681f3Smrg localIn.height = Max(pIn->unalignedHeight, 1u); 43597ec681f3Smrg localIn.numSlices = Max(pIn->numSlices, 1u); 43607ec681f3Smrg localIn.numMipLevels = Max(pIn->numMipLevels, 1u); 43617ec681f3Smrg localIn.numSamples = Max(pIn->numSamples, 1u); 43627ec681f3Smrg localIn.numFrags = Max(pIn->numFrags, 1u); 43637ec681f3Smrg localOut.pMipInfo = mipInfo; 43647ec681f3Smrg 43657ec681f3Smrg ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut); 43667ec681f3Smrg 43677ec681f3Smrg if (ret == ADDR_OK) 43687ec681f3Smrg { 43697ec681f3Smrg const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); 43707ec681f3Smrg const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1; 43717ec681f3Smrg const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode); 43727ec681f3Smrg const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2]; 43737ec681f3Smrg 43747ec681f3Smrg if (eqIndex != ADDR_INVALID_EQUATION_INDEX) 43757ec681f3Smrg { 43767ec681f3Smrg const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth; 43777ec681f3Smrg const UINT_32 yb = pIn->y / localOut.blockHeight; 43787ec681f3Smrg const UINT_32 xb = pIn->x / localOut.blockWidth; 43797ec681f3Smrg const UINT_32 blockIndex = yb * pb + xb; 43807ec681f3Smrg const UINT_32 blockSize = 256; 43817ec681f3Smrg const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex], 43827ec681f3Smrg pIn->x << elemLog2, 43837ec681f3Smrg pIn->y, 43847ec681f3Smrg 0); 43857ec681f3Smrg pOut->addr = localOut.sliceSize * pIn->slice + 43867ec681f3Smrg mipInfo[pIn->mipId].macroBlockOffset + 43877ec681f3Smrg (blockIndex * blockSize) + 43887ec681f3Smrg blk256Offset; 43897ec681f3Smrg } 43907ec681f3Smrg else 43917ec681f3Smrg { 43927ec681f3Smrg ret = ADDR_INVALIDPARAMS; 43937ec681f3Smrg } 43947ec681f3Smrg } 43957ec681f3Smrg 43967ec681f3Smrg return ret; 43977ec681f3Smrg} 43987ec681f3Smrg 43997ec681f3Smrg/** 44007ec681f3Smrg************************************************************************************************************************ 44017ec681f3Smrg* Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled 44027ec681f3Smrg* 44037ec681f3Smrg* @brief 44047ec681f3Smrg* Internal function to calculate address from coord for macro tiled swizzle surface 44057ec681f3Smrg* 44067ec681f3Smrg* @return 44077ec681f3Smrg* ADDR_E_RETURNCODE 44087ec681f3Smrg************************************************************************************************************************ 44097ec681f3Smrg*/ 44107ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled( 44117ec681f3Smrg const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure 44127ec681f3Smrg ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure 44137ec681f3Smrg ) const 44147ec681f3Smrg{ 44157ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {}; 44167ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; 44177ec681f3Smrg ADDR2_MIP_INFO mipInfo[MaxMipLevels]; 44187ec681f3Smrg 44197ec681f3Smrg localIn.swizzleMode = pIn->swizzleMode; 44207ec681f3Smrg localIn.flags = pIn->flags; 44217ec681f3Smrg localIn.resourceType = pIn->resourceType; 44227ec681f3Smrg localIn.bpp = pIn->bpp; 44237ec681f3Smrg localIn.width = Max(pIn->unalignedWidth, 1u); 44247ec681f3Smrg localIn.height = Max(pIn->unalignedHeight, 1u); 44257ec681f3Smrg localIn.numSlices = Max(pIn->numSlices, 1u); 44267ec681f3Smrg localIn.numMipLevels = Max(pIn->numMipLevels, 1u); 44277ec681f3Smrg localIn.numSamples = Max(pIn->numSamples, 1u); 44287ec681f3Smrg localIn.numFrags = Max(pIn->numFrags, 1u); 44297ec681f3Smrg localOut.pMipInfo = mipInfo; 44307ec681f3Smrg 44317ec681f3Smrg ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut); 44327ec681f3Smrg 44337ec681f3Smrg if (ret == ADDR_OK) 44347ec681f3Smrg { 44357ec681f3Smrg const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); 44367ec681f3Smrg const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode); 44377ec681f3Smrg const UINT_32 blkMask = (1 << blkSizeLog2) - 1; 44387ec681f3Smrg const UINT_32 pipeMask = (1 << m_pipesLog2) - 1; 44397ec681f3Smrg const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits); 44407ec681f3Smrg const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ? 44417ec681f3Smrg (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0; 44427ec681f3Smrg 44437ec681f3Smrg if (localIn.numFrags > 1) 44447ec681f3Smrg { 44457ec681f3Smrg const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode, 44467ec681f3Smrg pIn->resourceType, 44477ec681f3Smrg elemLog2, 44487ec681f3Smrg localIn.numFrags); 44497ec681f3Smrg 44507ec681f3Smrg if (pPatInfo != NULL) 44517ec681f3Smrg { 44527ec681f3Smrg const UINT_32 pb = localOut.pitch / localOut.blockWidth; 44537ec681f3Smrg const UINT_32 yb = pIn->y / localOut.blockHeight; 44547ec681f3Smrg const UINT_32 xb = pIn->x / localOut.blockWidth; 44557ec681f3Smrg const UINT_64 blkIdx = yb * pb + xb; 44567ec681f3Smrg 44577ec681f3Smrg ADDR_BIT_SETTING fullSwizzlePattern[20]; 44587ec681f3Smrg GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern); 44597ec681f3Smrg 44607ec681f3Smrg const UINT_32 blkOffset = 44617ec681f3Smrg ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern), 44627ec681f3Smrg blkSizeLog2, 44637ec681f3Smrg pIn->x, 44647ec681f3Smrg pIn->y, 44657ec681f3Smrg pIn->slice, 44667ec681f3Smrg pIn->sample); 44677ec681f3Smrg 44687ec681f3Smrg pOut->addr = (localOut.sliceSize * pIn->slice) + 44697ec681f3Smrg (blkIdx << blkSizeLog2) + 44707ec681f3Smrg (blkOffset ^ pipeBankXor); 44717ec681f3Smrg } 44727ec681f3Smrg else 44737ec681f3Smrg { 44747ec681f3Smrg ret = ADDR_INVALIDPARAMS; 44757ec681f3Smrg } 44767ec681f3Smrg } 44777ec681f3Smrg else 44787ec681f3Smrg { 44797ec681f3Smrg const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0; 44807ec681f3Smrg const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode); 44817ec681f3Smrg const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2]; 44827ec681f3Smrg 44837ec681f3Smrg if (eqIndex != ADDR_INVALID_EQUATION_INDEX) 44847ec681f3Smrg { 44857ec681f3Smrg const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE; 44867ec681f3Smrg const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode); 44877ec681f3Smrg const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices); 44887ec681f3Smrg const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices); 44897ec681f3Smrg const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x; 44907ec681f3Smrg const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y; 44917ec681f3Smrg const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice; 44927ec681f3Smrg const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth; 44937ec681f3Smrg const UINT_32 yb = pIn->y / localOut.blockHeight; 44947ec681f3Smrg const UINT_32 xb = pIn->x / localOut.blockWidth; 44957ec681f3Smrg const UINT_64 blkIdx = yb * pb + xb; 44967ec681f3Smrg const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex], 44977ec681f3Smrg x << elemLog2, 44987ec681f3Smrg y, 44997ec681f3Smrg z); 45007ec681f3Smrg pOut->addr = sliceSize * sliceId + 45017ec681f3Smrg mipInfo[pIn->mipId].macroBlockOffset + 45027ec681f3Smrg (blkIdx << blkSizeLog2) + 45037ec681f3Smrg (blkOffset ^ pipeBankXor); 45047ec681f3Smrg } 45057ec681f3Smrg else 45067ec681f3Smrg { 45077ec681f3Smrg ret = ADDR_INVALIDPARAMS; 45087ec681f3Smrg } 45097ec681f3Smrg } 45107ec681f3Smrg } 45117ec681f3Smrg 45127ec681f3Smrg return ret; 45137ec681f3Smrg} 45147ec681f3Smrg 45157ec681f3Smrg/** 45167ec681f3Smrg************************************************************************************************************************ 45177ec681f3Smrg* Gfx10Lib::HwlComputeMaxBaseAlignments 45187ec681f3Smrg* 45197ec681f3Smrg* @brief 45207ec681f3Smrg* Gets maximum alignments 45217ec681f3Smrg* @return 45227ec681f3Smrg* maximum alignments 45237ec681f3Smrg************************************************************************************************************************ 45247ec681f3Smrg*/ 45257ec681f3SmrgUINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const 45267ec681f3Smrg{ 45277ec681f3Smrg return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K; 45287ec681f3Smrg} 45297ec681f3Smrg 45307ec681f3Smrg/** 45317ec681f3Smrg************************************************************************************************************************ 45327ec681f3Smrg* Gfx10Lib::HwlComputeMaxMetaBaseAlignments 45337ec681f3Smrg* 45347ec681f3Smrg* @brief 45357ec681f3Smrg* Gets maximum alignments for metadata 45367ec681f3Smrg* @return 45377ec681f3Smrg* maximum alignments for metadata 45387ec681f3Smrg************************************************************************************************************************ 45397ec681f3Smrg*/ 45407ec681f3SmrgUINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const 45417ec681f3Smrg{ 45427ec681f3Smrg Dim3d metaBlk; 45437ec681f3Smrg 45447ec681f3Smrg const AddrSwizzleMode ValidSwizzleModeForXmask[] = 45457ec681f3Smrg { 45467ec681f3Smrg ADDR_SW_64KB_Z_X, 45477ec681f3Smrg m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X, 45487ec681f3Smrg }; 45497ec681f3Smrg 45507ec681f3Smrg UINT_32 maxBaseAlignHtile = 0; 45517ec681f3Smrg UINT_32 maxBaseAlignCmask = 0; 45527ec681f3Smrg 45537ec681f3Smrg for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++) 45547ec681f3Smrg { 45557ec681f3Smrg for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++) 45567ec681f3Smrg { 45577ec681f3Smrg for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++) 45587ec681f3Smrg { 45597ec681f3Smrg // Max base alignment for Htile 45607ec681f3Smrg const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil, 45617ec681f3Smrg ADDR_RSRC_TEX_2D, 45627ec681f3Smrg ValidSwizzleModeForXmask[swIdx], 45637ec681f3Smrg bppLog2, 45647ec681f3Smrg numFragLog2, 45657ec681f3Smrg TRUE, 45667ec681f3Smrg &metaBlk); 45677ec681f3Smrg 45687ec681f3Smrg maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile); 45697ec681f3Smrg } 45707ec681f3Smrg } 45717ec681f3Smrg 45727ec681f3Smrg // Max base alignment for Cmask 45737ec681f3Smrg const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask, 45747ec681f3Smrg ADDR_RSRC_TEX_2D, 45757ec681f3Smrg ValidSwizzleModeForXmask[swIdx], 45767ec681f3Smrg 0, 45777ec681f3Smrg 0, 45787ec681f3Smrg TRUE, 45797ec681f3Smrg &metaBlk); 45807ec681f3Smrg 45817ec681f3Smrg maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask); 45827ec681f3Smrg } 45837ec681f3Smrg 45847ec681f3Smrg // Max base alignment for 2D Dcc 45857ec681f3Smrg const AddrSwizzleMode ValidSwizzleModeForDcc2D[] = 45867ec681f3Smrg { 45877ec681f3Smrg ADDR_SW_64KB_S_X, 45887ec681f3Smrg ADDR_SW_64KB_D_X, 45897ec681f3Smrg ADDR_SW_64KB_R_X, 45907ec681f3Smrg m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X, 45917ec681f3Smrg }; 45927ec681f3Smrg 45937ec681f3Smrg UINT_32 maxBaseAlignDcc2D = 0; 45947ec681f3Smrg 45957ec681f3Smrg for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++) 45967ec681f3Smrg { 45977ec681f3Smrg for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++) 45987ec681f3Smrg { 45997ec681f3Smrg for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++) 46007ec681f3Smrg { 46017ec681f3Smrg const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor, 46027ec681f3Smrg ADDR_RSRC_TEX_2D, 46037ec681f3Smrg ValidSwizzleModeForDcc2D[swIdx], 46047ec681f3Smrg bppLog2, 46057ec681f3Smrg numFragLog2, 46067ec681f3Smrg TRUE, 46077ec681f3Smrg &metaBlk); 46087ec681f3Smrg 46097ec681f3Smrg maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D); 46107ec681f3Smrg } 46117ec681f3Smrg } 46127ec681f3Smrg } 46137ec681f3Smrg 46147ec681f3Smrg // Max base alignment for 3D Dcc 46157ec681f3Smrg const AddrSwizzleMode ValidSwizzleModeForDcc3D[] = 46167ec681f3Smrg { 46177ec681f3Smrg ADDR_SW_64KB_Z_X, 46187ec681f3Smrg ADDR_SW_64KB_S_X, 46197ec681f3Smrg ADDR_SW_64KB_D_X, 46207ec681f3Smrg ADDR_SW_64KB_R_X, 46217ec681f3Smrg m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X, 46227ec681f3Smrg }; 46237ec681f3Smrg 46247ec681f3Smrg UINT_32 maxBaseAlignDcc3D = 0; 46257ec681f3Smrg 46267ec681f3Smrg for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++) 46277ec681f3Smrg { 46287ec681f3Smrg for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++) 46297ec681f3Smrg { 46307ec681f3Smrg const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor, 46317ec681f3Smrg ADDR_RSRC_TEX_3D, 46327ec681f3Smrg ValidSwizzleModeForDcc3D[swIdx], 46337ec681f3Smrg bppLog2, 46347ec681f3Smrg 0, 46357ec681f3Smrg TRUE, 46367ec681f3Smrg &metaBlk); 46377ec681f3Smrg 46387ec681f3Smrg maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D); 46397ec681f3Smrg } 46407ec681f3Smrg } 46417ec681f3Smrg 46427ec681f3Smrg return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D)); 46437ec681f3Smrg} 46447ec681f3Smrg 46457ec681f3Smrg/** 46467ec681f3Smrg************************************************************************************************************************ 46477ec681f3Smrg* Gfx10Lib::GetMetaElementSizeLog2 46487ec681f3Smrg* 46497ec681f3Smrg* @brief 46507ec681f3Smrg* Gets meta data element size log2 46517ec681f3Smrg* @return 46527ec681f3Smrg* Meta data element size log2 46537ec681f3Smrg************************************************************************************************************************ 46547ec681f3Smrg*/ 46557ec681f3SmrgINT_32 Gfx10Lib::GetMetaElementSizeLog2( 46567ec681f3Smrg Gfx10DataType dataType) ///< Data surface type 46577ec681f3Smrg{ 46587ec681f3Smrg INT_32 elemSizeLog2 = 0; 46597ec681f3Smrg 46607ec681f3Smrg if (dataType == Gfx10DataColor) 46617ec681f3Smrg { 46627ec681f3Smrg elemSizeLog2 = 0; 46637ec681f3Smrg } 46647ec681f3Smrg else if (dataType == Gfx10DataDepthStencil) 46657ec681f3Smrg { 46667ec681f3Smrg elemSizeLog2 = 2; 46677ec681f3Smrg } 46687ec681f3Smrg else 46697ec681f3Smrg { 46707ec681f3Smrg ADDR_ASSERT(dataType == Gfx10DataFmask); 46717ec681f3Smrg elemSizeLog2 = -1; 46727ec681f3Smrg } 46737ec681f3Smrg 46747ec681f3Smrg return elemSizeLog2; 46757ec681f3Smrg} 46767ec681f3Smrg 46777ec681f3Smrg/** 46787ec681f3Smrg************************************************************************************************************************ 46797ec681f3Smrg* Gfx10Lib::GetMetaCacheSizeLog2 46807ec681f3Smrg* 46817ec681f3Smrg* @brief 46827ec681f3Smrg* Gets meta data cache line size log2 46837ec681f3Smrg* @return 46847ec681f3Smrg* Meta data cache line size log2 46857ec681f3Smrg************************************************************************************************************************ 46867ec681f3Smrg*/ 46877ec681f3SmrgINT_32 Gfx10Lib::GetMetaCacheSizeLog2( 46887ec681f3Smrg Gfx10DataType dataType) ///< Data surface type 46897ec681f3Smrg{ 46907ec681f3Smrg INT_32 cacheSizeLog2 = 0; 46917ec681f3Smrg 46927ec681f3Smrg if (dataType == Gfx10DataColor) 46937ec681f3Smrg { 46947ec681f3Smrg cacheSizeLog2 = 6; 46957ec681f3Smrg } 46967ec681f3Smrg else if (dataType == Gfx10DataDepthStencil) 46977ec681f3Smrg { 46987ec681f3Smrg cacheSizeLog2 = 8; 46997ec681f3Smrg } 47007ec681f3Smrg else 47017ec681f3Smrg { 47027ec681f3Smrg ADDR_ASSERT(dataType == Gfx10DataFmask); 47037ec681f3Smrg cacheSizeLog2 = 8; 47047ec681f3Smrg } 47057ec681f3Smrg return cacheSizeLog2; 47067ec681f3Smrg} 47077ec681f3Smrg 47087ec681f3Smrg/** 47097ec681f3Smrg************************************************************************************************************************ 47107ec681f3Smrg* Gfx10Lib::HwlComputeSurfaceInfoLinear 47117ec681f3Smrg* 47127ec681f3Smrg* @brief 47137ec681f3Smrg* Internal function to calculate alignment for linear surface 47147ec681f3Smrg* 47157ec681f3Smrg* @return 47167ec681f3Smrg* ADDR_E_RETURNCODE 47177ec681f3Smrg************************************************************************************************************************ 47187ec681f3Smrg*/ 47197ec681f3SmrgADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear( 47207ec681f3Smrg const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure 47217ec681f3Smrg ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure 47227ec681f3Smrg ) const 47237ec681f3Smrg{ 47247ec681f3Smrg ADDR_E_RETURNCODE returnCode = ADDR_OK; 47257ec681f3Smrg 47267ec681f3Smrg if (IsTex1d(pIn->resourceType) && (pIn->height > 1)) 47277ec681f3Smrg { 47287ec681f3Smrg returnCode = ADDR_INVALIDPARAMS; 47297ec681f3Smrg } 47307ec681f3Smrg else 47317ec681f3Smrg { 47327ec681f3Smrg const UINT_32 elementBytes = pIn->bpp >> 3; 47337ec681f3Smrg const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes); 47347ec681f3Smrg const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1; 47357ec681f3Smrg UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign); 47367ec681f3Smrg UINT_32 actualHeight = pIn->height; 47377ec681f3Smrg UINT_64 sliceSize = 0; 47387ec681f3Smrg 47397ec681f3Smrg if (pIn->numMipLevels > 1) 47407ec681f3Smrg { 47417ec681f3Smrg for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--) 47427ec681f3Smrg { 47437ec681f3Smrg UINT_32 mipWidth, mipHeight; 47447ec681f3Smrg 47457ec681f3Smrg GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight); 47467ec681f3Smrg 47477ec681f3Smrg const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign); 47487ec681f3Smrg 47497ec681f3Smrg if (pOut->pMipInfo != NULL) 47507ec681f3Smrg { 47517ec681f3Smrg pOut->pMipInfo[i].pitch = mipActualWidth; 47527ec681f3Smrg pOut->pMipInfo[i].height = mipHeight; 47537ec681f3Smrg pOut->pMipInfo[i].depth = mipDepth; 47547ec681f3Smrg pOut->pMipInfo[i].offset = sliceSize; 47557ec681f3Smrg pOut->pMipInfo[i].mipTailOffset = 0; 47567ec681f3Smrg pOut->pMipInfo[i].macroBlockOffset = sliceSize; 47577ec681f3Smrg } 47587ec681f3Smrg 47597ec681f3Smrg sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes; 47607ec681f3Smrg } 47617ec681f3Smrg } 47627ec681f3Smrg else 47637ec681f3Smrg { 47647ec681f3Smrg returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight); 47657ec681f3Smrg 47667ec681f3Smrg if (returnCode == ADDR_OK) 47677ec681f3Smrg { 47687ec681f3Smrg sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes; 47697ec681f3Smrg 47707ec681f3Smrg if (pOut->pMipInfo != NULL) 47717ec681f3Smrg { 47727ec681f3Smrg pOut->pMipInfo[0].pitch = pitch; 47737ec681f3Smrg pOut->pMipInfo[0].height = actualHeight; 47747ec681f3Smrg pOut->pMipInfo[0].depth = mipDepth; 47757ec681f3Smrg pOut->pMipInfo[0].offset = 0; 47767ec681f3Smrg pOut->pMipInfo[0].mipTailOffset = 0; 47777ec681f3Smrg pOut->pMipInfo[0].macroBlockOffset = 0; 47787ec681f3Smrg } 47797ec681f3Smrg } 47807ec681f3Smrg } 47817ec681f3Smrg 47827ec681f3Smrg if (returnCode == ADDR_OK) 47837ec681f3Smrg { 47847ec681f3Smrg pOut->pitch = pitch; 47857ec681f3Smrg pOut->height = actualHeight; 47867ec681f3Smrg pOut->numSlices = pIn->numSlices; 47877ec681f3Smrg pOut->sliceSize = sliceSize; 47887ec681f3Smrg pOut->surfSize = sliceSize * pOut->numSlices; 47897ec681f3Smrg pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256; 47907ec681f3Smrg pOut->blockWidth = pitchAlign; 47917ec681f3Smrg pOut->blockHeight = 1; 47927ec681f3Smrg pOut->blockSlices = 1; 47937ec681f3Smrg 47947ec681f3Smrg // Following members are useless on GFX10 47957ec681f3Smrg pOut->mipChainPitch = 0; 47967ec681f3Smrg pOut->mipChainHeight = 0; 47977ec681f3Smrg pOut->mipChainSlice = 0; 47987ec681f3Smrg pOut->epitchIsHeight = FALSE; 47997ec681f3Smrg 48007ec681f3Smrg // Post calculation validate 48017ec681f3Smrg ADDR_ASSERT(pOut->sliceSize > 0); 48027ec681f3Smrg } 48037ec681f3Smrg } 48047ec681f3Smrg 48057ec681f3Smrg return returnCode; 48067ec681f3Smrg} 48077ec681f3Smrg 48087ec681f3Smrg} // V2 48097ec681f3Smrg} // Addr 4810