1/*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27/**
28************************************************************************************************************************
29* @file  gfx10addrlib.cpp
30* @brief Contain the implementation for the Gfx10Lib class.
31************************************************************************************************************************
32*/
33
34#include "gfx10addrlib.h"
35#include "gfx10_gb_reg.h"
36
37#include "amdgpu_asic_addr.h"
38
39////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
41
42namespace Addr
43{
44/**
45************************************************************************************************************************
46*   Gfx10HwlInit
47*
48*   @brief
49*       Creates an Gfx10Lib object.
50*
51*   @return
52*       Returns an Gfx10Lib object pointer.
53************************************************************************************************************************
54*/
55Addr::Lib* Gfx10HwlInit(const Client* pClient)
56{
57    return V2::Gfx10Lib::CreateObj(pClient);
58}
59
60namespace V2
61{
62
63////////////////////////////////////////////////////////////////////////////////////////////////////
64//                               Static Const Member
65////////////////////////////////////////////////////////////////////////////////////////////////////
66
67const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
68{//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
69    {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
70    {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
71    {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_D
72    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
73
74    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
75    {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
76    {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_D
77    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
78
79    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
80    {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
81    {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_D
82    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
83
84    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
85    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
86    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
87    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
88
89    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
90    {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_S_T
91    {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_D_T
92    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
93
94    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
95    {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_S_X
96    {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_D_X
97    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
98
99    {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_Z_X
100    {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_S_X
101    {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_D_X
102    {0,    0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    1,    0}, // ADDR_SW_64KB_R_X
103
104    {0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_VAR_Z_X
105    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
106    {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
107    {0,    0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0}, // ADDR_SW_VAR_R_X
108    {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
109};
110
111const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
112
113const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114const Dim3d Gfx10Lib::Block4K_Log2_3d[]  = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
115
116/**
117************************************************************************************************************************
118*   Gfx10Lib::Gfx10Lib
119*
120*   @brief
121*       Constructor
122*
123************************************************************************************************************************
124*/
125Gfx10Lib::Gfx10Lib(const Client* pClient)
126    :
127    Lib(pClient),
128    m_numPkrLog2(0),
129    m_numSaLog2(0),
130    m_colorBaseIndex(0),
131    m_xmaskBaseIndex(0),
132    m_dccBaseIndex(0)
133{
134    memset(&m_settings, 0, sizeof(m_settings));
135    memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
136}
137
138/**
139************************************************************************************************************************
140*   Gfx10Lib::~Gfx10Lib
141*
142*   @brief
143*       Destructor
144************************************************************************************************************************
145*/
146Gfx10Lib::~Gfx10Lib()
147{
148}
149
150/**
151************************************************************************************************************************
152*   Gfx10Lib::HwlComputeHtileInfo
153*
154*   @brief
155*       Interface function stub of AddrComputeHtilenfo
156*
157*   @return
158*       ADDR_E_RETURNCODE
159************************************************************************************************************************
160*/
161ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
162    const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,    ///< [in] input structure
163    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut    ///< [out] output structure
164    ) const
165{
166    ADDR_E_RETURNCODE ret = ADDR_OK;
167
168    if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
169         ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
170        (pIn->hTileFlags.pipeAligned != TRUE))
171    {
172        ret = ADDR_INVALIDPARAMS;
173    }
174    else
175    {
176        Dim3d         metaBlk     = {};
177        const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
178                                                   ADDR_RSRC_TEX_2D,
179                                                   pIn->swizzleMode,
180                                                   0,
181                                                   0,
182                                                   TRUE,
183                                                   &metaBlk);
184
185        pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
186        pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
187        pOut->baseAlign     = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
188        pOut->metaBlkWidth  = metaBlk.w;
189        pOut->metaBlkHeight = metaBlk.h;
190
191        if (pIn->numMipLevels > 1)
192        {
193            ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
194
195            UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
196
197            for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
198            {
199                UINT_32 mipWidth, mipHeight;
200
201                GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
202
203                mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
204                mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
205
206                const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
207                const UINT_32 heightInM    = mipHeight / metaBlk.h;
208                const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
209
210                if (pOut->pMipInfo != NULL)
211                {
212                    pOut->pMipInfo[i].inMiptail = FALSE;
213                    pOut->pMipInfo[i].offset    = offset;
214                    pOut->pMipInfo[i].sliceSize = mipSliceSize;
215                }
216
217                offset += mipSliceSize;
218            }
219
220            pOut->sliceSize          = offset;
221            pOut->metaBlkNumPerSlice = offset / metaBlkSize;
222            pOut->htileBytes         = pOut->sliceSize * pIn->numSlices;
223
224            if (pOut->pMipInfo != NULL)
225            {
226                for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
227                {
228                    pOut->pMipInfo[i].inMiptail = TRUE;
229                    pOut->pMipInfo[i].offset    = 0;
230                    pOut->pMipInfo[i].sliceSize = 0;
231                }
232
233                if (pIn->firstMipIdInTail != pIn->numMipLevels)
234                {
235                    pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
236                }
237            }
238        }
239        else
240        {
241            const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
242            const UINT_32 heightInM = pOut->height / metaBlk.h;
243
244            pOut->metaBlkNumPerSlice    = pitchInM * heightInM;
245            pOut->sliceSize             = pOut->metaBlkNumPerSlice * metaBlkSize;
246            pOut->htileBytes            = pOut->sliceSize * pIn->numSlices;
247
248            if (pOut->pMipInfo != NULL)
249            {
250                pOut->pMipInfo[0].inMiptail = FALSE;
251                pOut->pMipInfo[0].offset    = 0;
252                pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
253            }
254        }
255
256        // Get the HTILE address equation (copied from HtileAddrFromCoord).
257        // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
258        const UINT_32 index = m_xmaskBaseIndex;
259        const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
260
261        ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
262        pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]];
263    }
264
265    return ret;
266}
267
268/**
269************************************************************************************************************************
270*   Gfx10Lib::HwlComputeCmaskInfo
271*
272*   @brief
273*       Interface function stub of AddrComputeCmaskInfo
274*
275*   @return
276*       ADDR_E_RETURNCODE
277************************************************************************************************************************
278*/
279ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
280    const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,    ///< [in] input structure
281    ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut    ///< [out] output structure
282    ) const
283{
284    ADDR_E_RETURNCODE ret = ADDR_OK;
285
286    if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
287        (pIn->cMaskFlags.pipeAligned != TRUE)   ||
288        ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
289         ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
290    {
291        ret = ADDR_INVALIDPARAMS;
292    }
293    else
294    {
295        Dim3d         metaBlk     = {};
296        const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
297                                                   ADDR_RSRC_TEX_2D,
298                                                   pIn->swizzleMode,
299                                                   0,
300                                                   0,
301                                                   TRUE,
302                                                   &metaBlk);
303
304        pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
305        pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
306        pOut->baseAlign     = metaBlkSize;
307        pOut->metaBlkWidth  = metaBlk.w;
308        pOut->metaBlkHeight = metaBlk.h;
309
310        if (pIn->numMipLevels > 1)
311        {
312            ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
313
314            UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
315
316            for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
317            {
318                UINT_32 mipWidth, mipHeight;
319
320                GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
321
322                mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
323                mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
324
325                const UINT_32 pitchInM  = mipWidth  / metaBlk.w;
326                const UINT_32 heightInM = mipHeight / metaBlk.h;
327
328                if (pOut->pMipInfo != NULL)
329                {
330                    pOut->pMipInfo[i].inMiptail = FALSE;
331                    pOut->pMipInfo[i].offset    = metaBlkPerSlice * metaBlkSize;
332                    pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
333                }
334
335                metaBlkPerSlice += pitchInM * heightInM;
336            }
337
338            pOut->metaBlkNumPerSlice = metaBlkPerSlice;
339
340            if (pOut->pMipInfo != NULL)
341            {
342                for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
343                {
344                    pOut->pMipInfo[i].inMiptail = TRUE;
345                    pOut->pMipInfo[i].offset    = 0;
346                    pOut->pMipInfo[i].sliceSize = 0;
347                }
348
349                if (pIn->firstMipIdInTail != pIn->numMipLevels)
350                {
351                    pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
352                }
353            }
354        }
355        else
356        {
357            const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
358            const UINT_32 heightInM = pOut->height / metaBlk.h;
359
360            pOut->metaBlkNumPerSlice = pitchInM * heightInM;
361
362            if (pOut->pMipInfo != NULL)
363            {
364                pOut->pMipInfo[0].inMiptail = FALSE;
365                pOut->pMipInfo[0].offset    = 0;
366                pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
367            }
368        }
369
370        pOut->sliceSize  = pOut->metaBlkNumPerSlice * metaBlkSize;
371        pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
372
373        // Get the CMASK address equation (copied from CmaskAddrFromCoord)
374        const UINT_32  fmaskBpp      = GetFmaskBpp(1, 1);
375        const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
376        const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
377        const UINT_8*  patIdxTable   =
378            (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
379            (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
380
381        ADDR_C_ASSERT(sizeof(GFX10_CMASK_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
382        pOut->equation.gfx10_bits = (UINT_16*)GFX10_CMASK_SW_PATTERN[patIdxTable[index]];
383    }
384
385    return ret;
386}
387
388/**
389************************************************************************************************************************
390*   Gfx10Lib::HwlComputeDccInfo
391*
392*   @brief
393*       Interface function to compute DCC key info
394*
395*   @return
396*       ADDR_E_RETURNCODE
397************************************************************************************************************************
398*/
399ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
400    const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,    ///< [in] input structure
401    ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut    ///< [out] output structure
402    ) const
403{
404    ADDR_E_RETURNCODE ret = ADDR_OK;
405
406    if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
407    {
408        // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only
409        // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.
410        ret = ADDR_INVALIDPARAMS;
411    }
412    else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
413    {
414        // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
415        ret = ADDR_INVALIDPARAMS;
416    }
417    else
418    {
419        const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
420
421        {
422            // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
423            ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
424
425            const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
426
427            pOut->compressBlkWidth  = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
428            pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
429            pOut->compressBlkDepth  = isThick ? Block256_3d[elemLog2].d : 1;
430        }
431
432        if (ret == ADDR_OK)
433        {
434            Dim3d         metaBlk     = {};
435            const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
436            const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
437                                                       pIn->resourceType,
438                                                       pIn->swizzleMode,
439                                                       elemLog2,
440                                                       numFragLog2,
441                                                       pIn->dccKeyFlags.pipeAligned,
442                                                       &metaBlk);
443
444            pOut->dccRamBaseAlign   = metaBlkSize;
445            pOut->metaBlkWidth      = metaBlk.w;
446            pOut->metaBlkHeight     = metaBlk.h;
447            pOut->metaBlkDepth      = metaBlk.d;
448            pOut->metaBlkSize       = metaBlkSize;
449
450            pOut->pitch             = PowTwoAlign(pIn->unalignedWidth,     metaBlk.w);
451            pOut->height            = PowTwoAlign(pIn->unalignedHeight,    metaBlk.h);
452            pOut->depth             = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
453
454            if (pIn->numMipLevels > 1)
455            {
456                ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
457
458                UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
459
460                for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
461                {
462                    UINT_32 mipWidth, mipHeight;
463
464                    GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
465
466                    mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
467                    mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
468
469                    const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
470                    const UINT_32 heightInM    = mipHeight / metaBlk.h;
471                    const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
472
473                    if (pOut->pMipInfo != NULL)
474                    {
475                        pOut->pMipInfo[i].inMiptail = FALSE;
476                        pOut->pMipInfo[i].offset    = offset;
477                        pOut->pMipInfo[i].sliceSize = mipSliceSize;
478                    }
479
480                    offset += mipSliceSize;
481                }
482
483                pOut->dccRamSliceSize    = offset;
484                pOut->metaBlkNumPerSlice = offset / metaBlkSize;
485                pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
486
487                if (pOut->pMipInfo != NULL)
488                {
489                    for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
490                    {
491                        pOut->pMipInfo[i].inMiptail = TRUE;
492                        pOut->pMipInfo[i].offset    = 0;
493                        pOut->pMipInfo[i].sliceSize = 0;
494                    }
495
496                    if (pIn->firstMipIdInTail != pIn->numMipLevels)
497                    {
498                        pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
499                    }
500                }
501            }
502            else
503            {
504                const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
505                const UINT_32 heightInM = pOut->height / metaBlk.h;
506
507                pOut->metaBlkNumPerSlice = pitchInM * heightInM;
508                pOut->dccRamSliceSize    = pOut->metaBlkNumPerSlice * metaBlkSize;
509                pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
510
511                if (pOut->pMipInfo != NULL)
512                {
513                    pOut->pMipInfo[0].inMiptail = FALSE;
514                    pOut->pMipInfo[0].offset    = 0;
515                    pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
516                }
517            }
518
519            // Get the DCC address equation (copied from DccAddrFromCoord)
520            const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
521            const UINT_32 numPipeLog2 = m_pipesLog2;
522            UINT_32       index       = m_dccBaseIndex + elemLog2;
523            const UINT_8* patIdxTable;
524
525            if (m_settings.supportRbPlus)
526            {
527                patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
528
529                if (pIn->dccKeyFlags.pipeAligned)
530                {
531                    index += MaxNumOfBpp;
532
533                    if (m_numPkrLog2 < 2)
534                    {
535                        index += m_pipesLog2 * MaxNumOfBpp;
536                    }
537                    else
538                    {
539                        // 4 groups for "m_numPkrLog2 < 2" case
540                        index += 4 * MaxNumOfBpp;
541
542                        const UINT_32 dccPipePerPkr = 3;
543
544                        index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
545                                 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
546                    }
547                }
548            }
549            else
550            {
551                patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
552
553                if (pIn->dccKeyFlags.pipeAligned)
554                {
555                    index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
556                }
557                else
558                {
559                    index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
560                }
561            }
562
563            ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
564            pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]];
565        }
566    }
567
568    return ret;
569}
570
571/**
572************************************************************************************************************************
573*   Gfx10Lib::HwlComputeCmaskAddrFromCoord
574*
575*   @brief
576*       Interface function stub of AddrComputeCmaskAddrFromCoord
577*
578*   @return
579*       ADDR_E_RETURNCODE
580************************************************************************************************************************
581*/
582ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
583    const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
584    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
585{
586    // Only support pipe aligned CMask
587    ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
588
589    ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
590    input.size            = sizeof(input);
591    input.cMaskFlags      = pIn->cMaskFlags;
592    input.colorFlags      = pIn->colorFlags;
593    input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
594    input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
595    input.numSlices       = Max(pIn->numSlices,       1u);
596    input.swizzleMode     = pIn->swizzleMode;
597    input.resourceType    = pIn->resourceType;
598
599    ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
600    output.size = sizeof(output);
601
602    ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
603
604    if (returnCode == ADDR_OK)
605    {
606        const UINT_32  fmaskBpp      = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
607        const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
608        const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
609        const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
610        const UINT_8*  patIdxTable   =
611            (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
612            (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
613
614
615        const UINT_32  blkSizeLog2  = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
616        const UINT_32  blkMask      = (1 << blkSizeLog2) - 1;
617        const UINT_32  blkOffset    = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
618                                                                      blkSizeLog2 + 1, // +1 for nibble offset
619                                                                      pIn->x,
620                                                                      pIn->y,
621                                                                      pIn->slice,
622                                                                      0);
623        const UINT_32 xb       = pIn->x / output.metaBlkWidth;
624        const UINT_32 yb       = pIn->y / output.metaBlkHeight;
625        const UINT_32 pb       = output.pitch / output.metaBlkWidth;
626        const UINT_32 blkIndex = (yb * pb) + xb;
627        const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
628
629        pOut->addr = (output.sliceSize * pIn->slice) +
630                     (blkIndex * (1 << blkSizeLog2)) +
631                     ((blkOffset >> 1) ^ pipeXor);
632        pOut->bitPosition = (blkOffset & 1) << 2;
633    }
634
635    return returnCode;
636}
637
638/**
639************************************************************************************************************************
640*   Gfx10Lib::HwlComputeHtileAddrFromCoord
641*
642*   @brief
643*       Interface function stub of AddrComputeHtileAddrFromCoord
644*
645*   @return
646*       ADDR_E_RETURNCODE
647************************************************************************************************************************
648*/
649ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
650    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
651    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
652{
653    ADDR_E_RETURNCODE returnCode = ADDR_OK;
654
655    if (pIn->numMipLevels > 1)
656    {
657        returnCode = ADDR_NOTIMPLEMENTED;
658    }
659    else
660    {
661        ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
662        input.size            = sizeof(input);
663        input.hTileFlags      = pIn->hTileFlags;
664        input.depthFlags      = pIn->depthflags;
665        input.swizzleMode     = pIn->swizzleMode;
666        input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
667        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
668        input.numSlices       = Max(pIn->numSlices,       1u);
669        input.numMipLevels    = 1;
670
671        ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
672        output.size = sizeof(output);
673
674        returnCode = ComputeHtileInfo(&input, &output);
675
676        if (returnCode == ADDR_OK)
677        {
678            const UINT_32  numSampleLog2 = Log2(pIn->numSamples);
679            const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
680            const UINT_32  index         = m_xmaskBaseIndex + numSampleLog2;
681            const UINT_8*  patIdxTable   = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
682
683
684            const UINT_32  blkSizeLog2   = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
685            const UINT_32  blkMask       = (1 << blkSizeLog2) - 1;
686            const UINT_32  blkOffset     = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
687                                                                           blkSizeLog2 + 1, // +1 for nibble offset
688                                                                           pIn->x,
689                                                                           pIn->y,
690                                                                           pIn->slice,
691                                                                           0);
692            const UINT_32 xb       = pIn->x / output.metaBlkWidth;
693            const UINT_32 yb       = pIn->y / output.metaBlkHeight;
694            const UINT_32 pb       = output.pitch / output.metaBlkWidth;
695            const UINT_32 blkIndex = (yb * pb) + xb;
696            const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
697
698            pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
699                         (blkIndex * (1 << blkSizeLog2)) +
700                         ((blkOffset >> 1) ^ pipeXor);
701        }
702    }
703
704    return returnCode;
705}
706
707/**
708************************************************************************************************************************
709*   Gfx10Lib::HwlComputeHtileCoordFromAddr
710*
711*   @brief
712*       Interface function stub of AddrComputeHtileCoordFromAddr
713*
714*   @return
715*       ADDR_E_RETURNCODE
716************************************************************************************************************************
717*/
718ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
719    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
720    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)   ///< [out] output structure
721{
722    ADDR_NOT_IMPLEMENTED();
723
724    return ADDR_OK;
725}
726
727/**
728************************************************************************************************************************
729*   Gfx10Lib::HwlSupportComputeDccAddrFromCoord
730*
731*   @brief
732*       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
733*
734*   @return
735*       ADDR_E_RETURNCODE
736************************************************************************************************************************
737*/
738ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord(
739    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
740{
741    ADDR_E_RETURNCODE returnCode = ADDR_OK;
742
743    if ((pIn->resourceType       != ADDR_RSRC_TEX_2D) ||
744        (pIn->swizzleMode        != ADDR_SW_64KB_R_X) ||
745        (pIn->dccKeyFlags.linear == TRUE)             ||
746        (pIn->numFrags           >  1)                ||
747        (pIn->numMipLevels       >  1)                ||
748        (pIn->mipId              >  0))
749    {
750        returnCode = ADDR_NOTSUPPORTED;
751    }
752    else if ((pIn->pitch == 0)         ||
753             (pIn->metaBlkWidth == 0)  ||
754             (pIn->metaBlkHeight == 0) ||
755             (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
756    {
757        returnCode = ADDR_NOTSUPPORTED;
758    }
759
760    return returnCode;
761}
762
763/**
764************************************************************************************************************************
765*   Gfx10Lib::HwlComputeDccAddrFromCoord
766*
767*   @brief
768*       Interface function stub of AddrComputeDccAddrFromCoord
769*
770*   @return
771*       N/A
772************************************************************************************************************************
773*/
774VOID Gfx10Lib::HwlComputeDccAddrFromCoord(
775    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,  ///< [in] input structure
776    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] output structure
777{
778    const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
779    const UINT_32 numPipeLog2 = m_pipesLog2;
780    const UINT_32 pipeMask    = (1 << numPipeLog2) - 1;
781    UINT_32       index       = m_dccBaseIndex + elemLog2;
782    const UINT_8* patIdxTable;
783
784    if (m_settings.supportRbPlus)
785    {
786        patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
787
788        if (pIn->dccKeyFlags.pipeAligned)
789        {
790            index += MaxNumOfBpp;
791
792            if (m_numPkrLog2 < 2)
793            {
794                index += m_pipesLog2 * MaxNumOfBpp;
795            }
796            else
797            {
798                // 4 groups for "m_numPkrLog2 < 2" case
799                index += 4 * MaxNumOfBpp;
800
801                const UINT_32 dccPipePerPkr = 3;
802
803                index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
804                         (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
805            }
806        }
807    }
808    else
809    {
810        patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
811
812        if (pIn->dccKeyFlags.pipeAligned)
813        {
814            index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
815        }
816        else
817        {
818            index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
819        }
820    }
821
822    const UINT_32  blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
823    const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
824    const UINT_32  blkOffset   =
825        ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
826                                        blkSizeLog2 + 1, // +1 for nibble offset
827                                        pIn->x,
828                                        pIn->y,
829                                        pIn->slice,
830                                        0);
831    const UINT_32 xb       = pIn->x / pIn->metaBlkWidth;
832    const UINT_32 yb       = pIn->y / pIn->metaBlkHeight;
833    const UINT_32 pb       = pIn->pitch / pIn->metaBlkWidth;
834    const UINT_32 blkIndex = (yb * pb) + xb;
835    const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
836
837    pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
838                 (blkIndex * (1 << blkSizeLog2)) +
839                 ((blkOffset >> 1) ^ pipeXor);
840}
841
842/**
843************************************************************************************************************************
844*   Gfx10Lib::HwlInitGlobalParams
845*
846*   @brief
847*       Initializes global parameters
848*
849*   @return
850*       TRUE if all settings are valid
851*
852************************************************************************************************************************
853*/
854BOOL_32 Gfx10Lib::HwlInitGlobalParams(
855    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
856{
857    BOOL_32              valid = TRUE;
858    GB_ADDR_CONFIG_GFX10 gbAddrConfig;
859
860    gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
861
862    // These values are copied from CModel code
863    switch (gbAddrConfig.bits.NUM_PIPES)
864    {
865        case ADDR_CONFIG_1_PIPE:
866            m_pipes     = 1;
867            m_pipesLog2 = 0;
868            break;
869        case ADDR_CONFIG_2_PIPE:
870            m_pipes     = 2;
871            m_pipesLog2 = 1;
872            break;
873        case ADDR_CONFIG_4_PIPE:
874            m_pipes     = 4;
875            m_pipesLog2 = 2;
876            break;
877        case ADDR_CONFIG_8_PIPE:
878            m_pipes     = 8;
879            m_pipesLog2 = 3;
880            break;
881        case ADDR_CONFIG_16_PIPE:
882            m_pipes     = 16;
883            m_pipesLog2 = 4;
884            break;
885        case ADDR_CONFIG_32_PIPE:
886            m_pipes     = 32;
887            m_pipesLog2 = 5;
888            break;
889        case ADDR_CONFIG_64_PIPE:
890            m_pipes     = 64;
891            m_pipesLog2 = 6;
892            break;
893        default:
894            ADDR_ASSERT_ALWAYS();
895            valid = FALSE;
896            break;
897    }
898
899    switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
900    {
901        case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
902            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
903            m_pipeInterleaveLog2  = 8;
904            break;
905        case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
906            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
907            m_pipeInterleaveLog2  = 9;
908            break;
909        case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
910            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
911            m_pipeInterleaveLog2  = 10;
912            break;
913        case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
914            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
915            m_pipeInterleaveLog2  = 11;
916            break;
917        default:
918            ADDR_ASSERT_ALWAYS();
919            valid = FALSE;
920            break;
921    }
922
923    // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
924    // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
925    // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
926    ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
927
928    switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
929    {
930        case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
931            m_maxCompFrag     = 1;
932            m_maxCompFragLog2 = 0;
933            break;
934        case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
935            m_maxCompFrag     = 2;
936            m_maxCompFragLog2 = 1;
937            break;
938        case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
939            m_maxCompFrag     = 4;
940            m_maxCompFragLog2 = 2;
941            break;
942        case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
943            m_maxCompFrag     = 8;
944            m_maxCompFragLog2 = 3;
945            break;
946        default:
947            ADDR_ASSERT_ALWAYS();
948            valid = FALSE;
949            break;
950    }
951
952    {
953        // Skip unaligned case
954        m_xmaskBaseIndex += MaxNumOfAA;
955
956        m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA;
957        m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
958
959        if (m_settings.supportRbPlus)
960        {
961            m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
962            m_numSaLog2  = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
963
964            ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
965
966            ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
967                          sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));
968
969            if (m_numPkrLog2 >= 2)
970            {
971                m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
972                m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
973            }
974        }
975        else
976        {
977            const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
978                                        static_cast<UINT_32>(ADDR_CONFIG_1_PIPE)  +
979                                        1;
980
981            ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
982
983            ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) ==
984                          sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]));
985        }
986    }
987
988    if (m_settings.supportRbPlus)
989    {
990        // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
991        // corresponding SW_64KB_* mode
992        m_blockVarSizeLog2 = m_pipesLog2 + 14;
993    }
994
995
996    if (valid)
997    {
998        InitEquationTable();
999    }
1000
1001    return valid;
1002}
1003
1004/**
1005************************************************************************************************************************
1006*   Gfx10Lib::HwlConvertChipFamily
1007*
1008*   @brief
1009*       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1010*   @return
1011*       ChipFamily
1012************************************************************************************************************************
1013*/
1014ChipFamily Gfx10Lib::HwlConvertChipFamily(
1015    UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
1016    UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1017{
1018    ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
1019
1020    m_settings.dccUnsup3DSwDis  = 1;
1021    m_settings.dsMipmapHtileFix = 1;
1022
1023    switch (chipFamily)
1024    {
1025        case FAMILY_NV:
1026            if (ASICREV_IS_NAVI10_P(chipRevision))
1027            {
1028                m_settings.dsMipmapHtileFix = 0;
1029                m_settings.isDcn20          = 1;
1030            }
1031
1032            if (ASICREV_IS_NAVI12_P(chipRevision))
1033            {
1034                m_settings.isDcn20 = 1;
1035            }
1036
1037            if (ASICREV_IS_NAVI14_M(chipRevision))
1038            {
1039                m_settings.isDcn20 = 1;
1040            }
1041
1042            if (ASICREV_IS_SIENNA_CICHLID(chipRevision))
1043            {
1044                m_settings.supportRbPlus   = 1;
1045                m_settings.dccUnsup3DSwDis = 0;
1046            }
1047
1048            if (ASICREV_IS_NAVY_FLOUNDER(chipRevision))
1049            {
1050                m_settings.supportRbPlus   = 1;
1051                m_settings.dccUnsup3DSwDis = 0;
1052            }
1053
1054            if (ASICREV_IS_DIMGREY_CAVEFISH(chipRevision))
1055            {
1056                m_settings.supportRbPlus   = 1;
1057                m_settings.dccUnsup3DSwDis = 0;
1058            }
1059
1060            if (ASICREV_IS_BEIGE_GOBY(chipRevision))
1061            {
1062                m_settings.supportRbPlus   = 1;
1063                m_settings.dccUnsup3DSwDis = 0;
1064            }
1065            break;
1066
1067        case FAMILY_VGH:
1068            if (ASICREV_IS_VANGOGH(chipRevision))
1069            {
1070                m_settings.supportRbPlus   = 1;
1071                m_settings.dccUnsup3DSwDis = 0;
1072            }
1073            else
1074            {
1075                ADDR_ASSERT(!"Unknown chip revision");
1076            }
1077
1078            break;
1079
1080        case FAMILY_YC:
1081            if (ASICREV_IS_YELLOW_CARP(chipRevision))
1082            {
1083                m_settings.supportRbPlus   = 1;
1084                m_settings.dccUnsup3DSwDis = 0;
1085            }
1086            else
1087            {
1088                ADDR_ASSERT(!"Unknown chip revision");
1089            }
1090
1091            break;
1092
1093        default:
1094            ADDR_ASSERT(!"Unknown chip family");
1095            break;
1096    }
1097
1098    m_configFlags.use32bppFor422Fmt = TRUE;
1099
1100    return family;
1101}
1102
1103/**
1104************************************************************************************************************************
1105*   Gfx10Lib::GetBlk256SizeLog2
1106*
1107*   @brief
1108*       Get block 256 size
1109*
1110*   @return
1111*       N/A
1112************************************************************************************************************************
1113*/
1114void Gfx10Lib::GetBlk256SizeLog2(
1115    AddrResourceType resourceType,      ///< [in] Resource type
1116    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1117    UINT_32          elemLog2,          ///< [in] element size log2
1118    UINT_32          numSamplesLog2,    ///< [in] number of samples
1119    Dim3d*           pBlock             ///< [out] block size
1120    ) const
1121{
1122    if (IsThin(resourceType, swizzleMode))
1123    {
1124        UINT_32 blockBits = 8 - elemLog2;
1125
1126        if (IsZOrderSwizzle(swizzleMode))
1127        {
1128            blockBits -= numSamplesLog2;
1129        }
1130
1131        pBlock->w = (blockBits >> 1) + (blockBits & 1);
1132        pBlock->h = (blockBits >> 1);
1133        pBlock->d = 0;
1134    }
1135    else
1136    {
1137        ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1138
1139        UINT_32 blockBits = 8 - elemLog2;
1140
1141        pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1142        pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1143        pBlock->h = (blockBits / 3);
1144    }
1145}
1146
1147/**
1148************************************************************************************************************************
1149*   Gfx10Lib::GetCompressedBlockSizeLog2
1150*
1151*   @brief
1152*       Get compress block size
1153*
1154*   @return
1155*       N/A
1156************************************************************************************************************************
1157*/
1158void Gfx10Lib::GetCompressedBlockSizeLog2(
1159    Gfx10DataType    dataType,          ///< [in] Data type
1160    AddrResourceType resourceType,      ///< [in] Resource type
1161    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1162    UINT_32          elemLog2,          ///< [in] element size log2
1163    UINT_32          numSamplesLog2,    ///< [in] number of samples
1164    Dim3d*           pBlock             ///< [out] block size
1165    ) const
1166{
1167    if (dataType == Gfx10DataColor)
1168    {
1169        GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1170    }
1171    else
1172    {
1173        ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1174        pBlock->w = 3;
1175        pBlock->h = 3;
1176        pBlock->d = 0;
1177    }
1178}
1179
1180/**
1181************************************************************************************************************************
1182*   Gfx10Lib::GetMetaOverlapLog2
1183*
1184*   @brief
1185*       Get meta block overlap
1186*
1187*   @return
1188*       N/A
1189************************************************************************************************************************
1190*/
1191INT_32 Gfx10Lib::GetMetaOverlapLog2(
1192    Gfx10DataType    dataType,          ///< [in] Data type
1193    AddrResourceType resourceType,      ///< [in] Resource type
1194    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1195    UINT_32          elemLog2,          ///< [in] element size log2
1196    UINT_32          numSamplesLog2     ///< [in] number of samples
1197    ) const
1198{
1199    Dim3d compBlock;
1200    Dim3d microBlock;
1201
1202    GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1203    GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
1204
1205    const INT_32 compSizeLog2   = compBlock.w  + compBlock.h  + compBlock.d;
1206    const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1207    const INT_32 maxSizeLog2    = Max(compSizeLog2, blk256SizeLog2);
1208    const INT_32 numPipesLog2   = GetEffectiveNumPipes();
1209    INT_32       overlap        = numPipesLog2 - maxSizeLog2;
1210
1211    if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1212    {
1213        overlap++;
1214    }
1215
1216    // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1217    if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1218    {
1219        overlap--;
1220    }
1221    overlap = Max(overlap, 0);
1222    return overlap;
1223}
1224
1225/**
1226************************************************************************************************************************
1227*   Gfx10Lib::Get3DMetaOverlapLog2
1228*
1229*   @brief
1230*       Get 3d meta block overlap
1231*
1232*   @return
1233*       N/A
1234************************************************************************************************************************
1235*/
1236INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1237    AddrResourceType resourceType,      ///< [in] Resource type
1238    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1239    UINT_32          elemLog2           ///< [in] element size log2
1240    ) const
1241{
1242    Dim3d microBlock;
1243    GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
1244
1245    INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1246
1247    if (m_settings.supportRbPlus)
1248    {
1249        overlap++;
1250    }
1251
1252    if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1253    {
1254        overlap = 0;
1255    }
1256    return overlap;
1257}
1258
1259/**
1260************************************************************************************************************************
1261*   Gfx10Lib::GetPipeRotateAmount
1262*
1263*   @brief
1264*       Get pipe rotate amount
1265*
1266*   @return
1267*       Pipe rotate amount
1268************************************************************************************************************************
1269*/
1270
1271INT_32 Gfx10Lib::GetPipeRotateAmount(
1272    AddrResourceType resourceType,      ///< [in] Resource type
1273    AddrSwizzleMode  swizzleMode        ///< [in] Swizzle mode
1274    ) const
1275{
1276    INT_32 amount = 0;
1277
1278    if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1279    {
1280        amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1281                 1 : m_pipesLog2 - (m_numSaLog2 + 1);
1282    }
1283
1284    return amount;
1285}
1286
1287/**
1288************************************************************************************************************************
1289*   Gfx10Lib::GetMetaBlkSize
1290*
1291*   @brief
1292*       Get metadata block size
1293*
1294*   @return
1295*       Meta block size
1296************************************************************************************************************************
1297*/
1298UINT_32 Gfx10Lib::GetMetaBlkSize(
1299    Gfx10DataType    dataType,          ///< [in] Data type
1300    AddrResourceType resourceType,      ///< [in] Resource type
1301    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1302    UINT_32          elemLog2,          ///< [in] element size log2
1303    UINT_32          numSamplesLog2,    ///< [in] number of samples
1304    BOOL_32          pipeAlign,         ///< [in] pipe align
1305    Dim3d*           pBlock             ///< [out] block size
1306    ) const
1307{
1308    INT_32 metablkSizeLog2;
1309
1310    {
1311        const INT_32 metaElemSizeLog2   = GetMetaElementSizeLog2(dataType);
1312        const INT_32 metaCacheSizeLog2  = GetMetaCacheSizeLog2(dataType);
1313        const INT_32 compBlkSizeLog2    = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1314        const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1315                                          numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1316        const INT_32 dataBlkSizeLog2    = GetBlockSizeLog2(swizzleMode);
1317        INT_32       numPipesLog2       = m_pipesLog2;
1318
1319        if (IsThin(resourceType, swizzleMode))
1320        {
1321            if ((pipeAlign == FALSE) ||
1322                (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1323                (IsDisplaySwizzle(resourceType, swizzleMode)  == TRUE))
1324            {
1325                if (pipeAlign)
1326                {
1327                    metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1328                    metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1329                }
1330                else
1331                {
1332                    metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1333                }
1334            }
1335            else
1336            {
1337                if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1338                {
1339                    numPipesLog2++;
1340                }
1341
1342                INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1343
1344                if (numPipesLog2 >= 4)
1345                {
1346                    INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1347
1348                    // In 16Bpe 8xaa, we have an extra overlap bit
1349                    if ((pipeRotateLog2 > 0)  &&
1350                        (elemLog2 == 4)       &&
1351                        (numSamplesLog2 == 3) &&
1352                        (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1353                    {
1354                        overlapLog2++;
1355                    }
1356
1357                    metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1358                    metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1359
1360                    if (m_settings.supportRbPlus    &&
1361                        IsRtOptSwizzle(swizzleMode) &&
1362                        (numPipesLog2 == 6)         &&
1363                        (numSamplesLog2 == 3)       &&
1364                        (m_maxCompFragLog2 == 3)    &&
1365                        (metablkSizeLog2 < 15))
1366                    {
1367                        metablkSizeLog2 = 15;
1368                    }
1369                }
1370                else
1371                {
1372                    metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1373                }
1374
1375                if (dataType == Gfx10DataDepthStencil)
1376                {
1377                    // For htile surfaces, pad meta block size to 2K * num_pipes
1378                    metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1379                }
1380
1381                const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1382
1383                if  (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1384                {
1385                    const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1386
1387                    metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1388                }
1389            }
1390
1391            const INT_32 metablkBitsLog2 =
1392                metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1393            pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1394            pBlock->h = 1 << (metablkBitsLog2 >> 1);
1395            pBlock->d = 1;
1396        }
1397        else
1398        {
1399            ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1400
1401            if (pipeAlign)
1402            {
1403                if (m_settings.supportRbPlus         &&
1404                    (m_pipesLog2 == m_numSaLog2 + 1) &&
1405                    (m_pipesLog2 > 1)                &&
1406                    IsRbAligned(resourceType, swizzleMode))
1407                {
1408                    numPipesLog2++;
1409                }
1410
1411                const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1412
1413                metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1414                metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1415                metablkSizeLog2 = Max(metablkSizeLog2, 12);
1416            }
1417            else
1418            {
1419                metablkSizeLog2 = 12;
1420            }
1421
1422            const INT_32 metablkBitsLog2 =
1423                metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1424            pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1425            pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1426            pBlock->d = 1 << (metablkBitsLog2 / 3);
1427        }
1428    }
1429
1430    return (1 << static_cast<UINT_32>(metablkSizeLog2));
1431}
1432
1433/**
1434************************************************************************************************************************
1435*   Gfx10Lib::ConvertSwizzlePatternToEquation
1436*
1437*   @brief
1438*       Convert swizzle pattern to equation.
1439*
1440*   @return
1441*       N/A
1442************************************************************************************************************************
1443*/
1444VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1445    UINT_32                elemLog2,  ///< [in] element bytes log2
1446    AddrResourceType       rsrcType,  ///< [in] resource type
1447    AddrSwizzleMode        swMode,    ///< [in] swizzle mode
1448    const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern infor
1449    ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
1450    const
1451{
1452    ADDR_BIT_SETTING fullSwizzlePattern[20];
1453    GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1454
1455    const ADDR_BIT_SETTING* pSwizzle      = fullSwizzlePattern;
1456    const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode);
1457
1458    pEquation->numBits            = blockSizeLog2;
1459    pEquation->stackedDepthSlices = FALSE;
1460
1461    for (UINT_32 i = 0; i < elemLog2; i++)
1462    {
1463        pEquation->addr[i].channel = 0;
1464        pEquation->addr[i].valid   = 1;
1465        pEquation->addr[i].index   = i;
1466    }
1467
1468    if (IsXor(swMode) == FALSE)
1469    {
1470        for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1471        {
1472            ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1473
1474            if (pSwizzle[i].x != 0)
1475            {
1476                ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1477
1478                pEquation->addr[i].channel = 0;
1479                pEquation->addr[i].valid   = 1;
1480                pEquation->addr[i].index   = Log2(pSwizzle[i].x) + elemLog2;
1481            }
1482            else if (pSwizzle[i].y != 0)
1483            {
1484                ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1485
1486                pEquation->addr[i].channel = 1;
1487                pEquation->addr[i].valid   = 1;
1488                pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1489            }
1490            else
1491            {
1492                ADDR_ASSERT(pSwizzle[i].z != 0);
1493                ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1494
1495                pEquation->addr[i].channel = 2;
1496                pEquation->addr[i].valid   = 1;
1497                pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1498            }
1499
1500            pEquation->xor1[i].value = 0;
1501            pEquation->xor2[i].value = 0;
1502        }
1503    }
1504    else if (IsThin(rsrcType, swMode))
1505    {
1506        Dim3d dim;
1507        ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1508
1509        const UINT_32 blkXLog2 = Log2(dim.w);
1510        const UINT_32 blkYLog2 = Log2(dim.h);
1511        const UINT_32 blkXMask = dim.w - 1;
1512        const UINT_32 blkYMask = dim.h - 1;
1513
1514        ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1515        UINT_32          xMask = 0;
1516        UINT_32          yMask = 0;
1517        UINT_32          bMask = (1 << elemLog2) - 1;
1518
1519        for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1520        {
1521            if (IsPow2(pSwizzle[i].value))
1522            {
1523                if (pSwizzle[i].x != 0)
1524                {
1525                    ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1526                    xMask |= pSwizzle[i].x;
1527
1528                    const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1529
1530                    ADDR_ASSERT(xLog2 < blkXLog2);
1531
1532                    pEquation->addr[i].channel = 0;
1533                    pEquation->addr[i].valid   = 1;
1534                    pEquation->addr[i].index   = xLog2 + elemLog2;
1535                }
1536                else
1537                {
1538                    ADDR_ASSERT(pSwizzle[i].y != 0);
1539                    ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1540                    yMask |= pSwizzle[i].y;
1541
1542                    pEquation->addr[i].channel = 1;
1543                    pEquation->addr[i].valid   = 1;
1544                    pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1545
1546                    ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1547                }
1548
1549                swizzle[i].value = 0;
1550                bMask |= 1 << i;
1551            }
1552            else
1553            {
1554                if (pSwizzle[i].z != 0)
1555                {
1556                    ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1557
1558                    pEquation->xor2[i].channel = 2;
1559                    pEquation->xor2[i].valid   = 1;
1560                    pEquation->xor2[i].index   = Log2(pSwizzle[i].z);
1561                }
1562
1563                swizzle[i].x = pSwizzle[i].x;
1564                swizzle[i].y = pSwizzle[i].y;
1565                swizzle[i].z = swizzle[i].s = 0;
1566
1567                ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1568
1569                const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1570
1571                if (xHi != 0)
1572                {
1573                    ADDR_ASSERT(IsPow2(xHi));
1574                    ADDR_ASSERT(pEquation->xor1[i].value == 0);
1575
1576                    pEquation->xor1[i].channel = 0;
1577                    pEquation->xor1[i].valid   = 1;
1578                    pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1579
1580                    swizzle[i].x &= blkXMask;
1581                }
1582
1583                const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1584
1585                if (yHi != 0)
1586                {
1587                    ADDR_ASSERT(IsPow2(yHi));
1588
1589                    if (xHi == 0)
1590                    {
1591                        ADDR_ASSERT(pEquation->xor1[i].value == 0);
1592                        pEquation->xor1[i].channel = 1;
1593                        pEquation->xor1[i].valid   = 1;
1594                        pEquation->xor1[i].index   = Log2(yHi);
1595                    }
1596                    else
1597                    {
1598                        ADDR_ASSERT(pEquation->xor2[i].value == 0);
1599                        pEquation->xor2[i].channel = 1;
1600                        pEquation->xor2[i].valid   = 1;
1601                        pEquation->xor2[i].index   = Log2(yHi);
1602                    }
1603
1604                    swizzle[i].y &= blkYMask;
1605                }
1606
1607                if (swizzle[i].value == 0)
1608                {
1609                    bMask |= 1 << i;
1610                }
1611            }
1612        }
1613
1614        const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1615        const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1616
1617        ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1618
1619        while (bMask != blockMask)
1620        {
1621            for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1622            {
1623                if ((bMask & (1 << i)) == 0)
1624                {
1625                    if (IsPow2(swizzle[i].value))
1626                    {
1627                        if (swizzle[i].x != 0)
1628                        {
1629                            ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1630                            xMask |= swizzle[i].x;
1631
1632                            const UINT_32 xLog2 = Log2(swizzle[i].x);
1633
1634                            ADDR_ASSERT(xLog2 < blkXLog2);
1635
1636                            pEquation->addr[i].channel = 0;
1637                            pEquation->addr[i].valid   = 1;
1638                            pEquation->addr[i].index   = xLog2 + elemLog2;
1639                        }
1640                        else
1641                        {
1642                            ADDR_ASSERT(swizzle[i].y != 0);
1643                            ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1644                            yMask |= swizzle[i].y;
1645
1646                            pEquation->addr[i].channel = 1;
1647                            pEquation->addr[i].valid   = 1;
1648                            pEquation->addr[i].index   = Log2(swizzle[i].y);
1649
1650                            ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1651                        }
1652
1653                        swizzle[i].value = 0;
1654                        bMask |= 1 << i;
1655                    }
1656                    else
1657                    {
1658                        const UINT_32 x = swizzle[i].x & xMask;
1659                        const UINT_32 y = swizzle[i].y & yMask;
1660
1661                        if (x != 0)
1662                        {
1663                            ADDR_ASSERT(IsPow2(x));
1664
1665                            if (pEquation->xor1[i].value == 0)
1666                            {
1667                                pEquation->xor1[i].channel = 0;
1668                                pEquation->xor1[i].valid   = 1;
1669                                pEquation->xor1[i].index   = Log2(x) + elemLog2;
1670                            }
1671                            else
1672                            {
1673                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
1674                                pEquation->xor2[i].channel = 0;
1675                                pEquation->xor2[i].valid   = 1;
1676                                pEquation->xor2[i].index   = Log2(x) + elemLog2;
1677                            }
1678                        }
1679
1680                        if (y != 0)
1681                        {
1682                            ADDR_ASSERT(IsPow2(y));
1683
1684                            if (pEquation->xor1[i].value == 0)
1685                            {
1686                                pEquation->xor1[i].channel = 1;
1687                                pEquation->xor1[i].valid   = 1;
1688                                pEquation->xor1[i].index   = Log2(y);
1689                            }
1690                            else
1691                            {
1692                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
1693                                pEquation->xor2[i].channel = 1;
1694                                pEquation->xor2[i].valid   = 1;
1695                                pEquation->xor2[i].index   = Log2(y);
1696                            }
1697                        }
1698
1699                        swizzle[i].x &= ~x;
1700                        swizzle[i].y &= ~y;
1701                    }
1702                }
1703            }
1704        }
1705
1706        ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1707    }
1708    else
1709    {
1710        const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1711        const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1712        const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1713        const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1714        const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1715        const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1716
1717        ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1718        UINT_32          xMask = 0;
1719        UINT_32          yMask = 0;
1720        UINT_32          zMask = 0;
1721        UINT_32          bMask = (1 << elemLog2) - 1;
1722
1723        for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1724        {
1725            if (IsPow2(pSwizzle[i].value))
1726            {
1727                if (pSwizzle[i].x != 0)
1728                {
1729                    ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1730                    xMask |= pSwizzle[i].x;
1731
1732                    const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1733
1734                    ADDR_ASSERT(xLog2 < blkXLog2);
1735
1736                    pEquation->addr[i].channel = 0;
1737                    pEquation->addr[i].valid   = 1;
1738                    pEquation->addr[i].index   = xLog2 + elemLog2;
1739                }
1740                else if (pSwizzle[i].y != 0)
1741                {
1742                    ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1743                    yMask |= pSwizzle[i].y;
1744
1745                    pEquation->addr[i].channel = 1;
1746                    pEquation->addr[i].valid   = 1;
1747                    pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1748
1749                    ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1750                }
1751                else
1752                {
1753                    ADDR_ASSERT(pSwizzle[i].z != 0);
1754                    ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1755                    zMask |= pSwizzle[i].z;
1756
1757                    pEquation->addr[i].channel = 2;
1758                    pEquation->addr[i].valid   = 1;
1759                    pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1760
1761                    ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1762                }
1763
1764                swizzle[i].value = 0;
1765                bMask |= 1 << i;
1766            }
1767            else
1768            {
1769                swizzle[i].x = pSwizzle[i].x;
1770                swizzle[i].y = pSwizzle[i].y;
1771                swizzle[i].z = pSwizzle[i].z;
1772                swizzle[i].s = 0;
1773
1774                ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1775
1776                const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1777                const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1778                const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1779
1780                ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1781
1782                if (xHi != 0)
1783                {
1784                    ADDR_ASSERT(IsPow2(xHi));
1785                    ADDR_ASSERT(pEquation->xor1[i].value == 0);
1786
1787                    pEquation->xor1[i].channel = 0;
1788                    pEquation->xor1[i].valid   = 1;
1789                    pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1790
1791                    swizzle[i].x &= blkXMask;
1792                }
1793
1794                if (yHi != 0)
1795                {
1796                    ADDR_ASSERT(IsPow2(yHi));
1797
1798                    if (pEquation->xor1[i].value == 0)
1799                    {
1800                        pEquation->xor1[i].channel = 1;
1801                        pEquation->xor1[i].valid   = 1;
1802                        pEquation->xor1[i].index   = Log2(yHi);
1803                    }
1804                    else
1805                    {
1806                        ADDR_ASSERT(pEquation->xor2[i].value == 0);
1807                        pEquation->xor2[i].channel = 1;
1808                        pEquation->xor2[i].valid   = 1;
1809                        pEquation->xor2[i].index   = Log2(yHi);
1810                    }
1811
1812                    swizzle[i].y &= blkYMask;
1813                }
1814
1815                if (zHi != 0)
1816                {
1817                    ADDR_ASSERT(IsPow2(zHi));
1818
1819                    if (pEquation->xor1[i].value == 0)
1820                    {
1821                        pEquation->xor1[i].channel = 2;
1822                        pEquation->xor1[i].valid   = 1;
1823                        pEquation->xor1[i].index   = Log2(zHi);
1824                    }
1825                    else
1826                    {
1827                        ADDR_ASSERT(pEquation->xor2[i].value == 0);
1828                        pEquation->xor2[i].channel = 2;
1829                        pEquation->xor2[i].valid   = 1;
1830                        pEquation->xor2[i].index   = Log2(zHi);
1831                    }
1832
1833                    swizzle[i].z &= blkZMask;
1834                }
1835
1836                if (swizzle[i].value == 0)
1837                {
1838                    bMask |= 1 << i;
1839                }
1840            }
1841        }
1842
1843        const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1844        const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1845
1846        ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1847
1848        while (bMask != blockMask)
1849        {
1850            for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1851            {
1852                if ((bMask & (1 << i)) == 0)
1853                {
1854                    if (IsPow2(swizzle[i].value))
1855                    {
1856                        if (swizzle[i].x != 0)
1857                        {
1858                            ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1859                            xMask |= swizzle[i].x;
1860
1861                            const UINT_32 xLog2 = Log2(swizzle[i].x);
1862
1863                            ADDR_ASSERT(xLog2 < blkXLog2);
1864
1865                            pEquation->addr[i].channel = 0;
1866                            pEquation->addr[i].valid   = 1;
1867                            pEquation->addr[i].index   = xLog2 + elemLog2;
1868                        }
1869                        else if (swizzle[i].y != 0)
1870                        {
1871                            ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1872                            yMask |= swizzle[i].y;
1873
1874                            pEquation->addr[i].channel = 1;
1875                            pEquation->addr[i].valid   = 1;
1876                            pEquation->addr[i].index   = Log2(swizzle[i].y);
1877
1878                            ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1879                        }
1880                        else
1881                        {
1882                            ADDR_ASSERT(swizzle[i].z != 0);
1883                            ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1884                            zMask |= swizzle[i].z;
1885
1886                            pEquation->addr[i].channel = 2;
1887                            pEquation->addr[i].valid   = 1;
1888                            pEquation->addr[i].index   = Log2(swizzle[i].z);
1889
1890                            ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1891                        }
1892
1893                        swizzle[i].value = 0;
1894                        bMask |= 1 << i;
1895                    }
1896                    else
1897                    {
1898                        const UINT_32 x = swizzle[i].x & xMask;
1899                        const UINT_32 y = swizzle[i].y & yMask;
1900                        const UINT_32 z = swizzle[i].z & zMask;
1901
1902                        if (x != 0)
1903                        {
1904                            ADDR_ASSERT(IsPow2(x));
1905
1906                            if (pEquation->xor1[i].value == 0)
1907                            {
1908                                pEquation->xor1[i].channel = 0;
1909                                pEquation->xor1[i].valid   = 1;
1910                                pEquation->xor1[i].index   = Log2(x) + elemLog2;
1911                            }
1912                            else
1913                            {
1914                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
1915                                pEquation->xor2[i].channel = 0;
1916                                pEquation->xor2[i].valid   = 1;
1917                                pEquation->xor2[i].index   = Log2(x) + elemLog2;
1918                            }
1919                        }
1920
1921                        if (y != 0)
1922                        {
1923                            ADDR_ASSERT(IsPow2(y));
1924
1925                            if (pEquation->xor1[i].value == 0)
1926                            {
1927                                pEquation->xor1[i].channel = 1;
1928                                pEquation->xor1[i].valid   = 1;
1929                                pEquation->xor1[i].index   = Log2(y);
1930                            }
1931                            else
1932                            {
1933                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
1934                                pEquation->xor2[i].channel = 1;
1935                                pEquation->xor2[i].valid   = 1;
1936                                pEquation->xor2[i].index   = Log2(y);
1937                            }
1938                        }
1939
1940                        if (z != 0)
1941                        {
1942                            ADDR_ASSERT(IsPow2(z));
1943
1944                            if (pEquation->xor1[i].value == 0)
1945                            {
1946                                pEquation->xor1[i].channel = 2;
1947                                pEquation->xor1[i].valid   = 1;
1948                                pEquation->xor1[i].index   = Log2(z);
1949                            }
1950                            else
1951                            {
1952                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
1953                                pEquation->xor2[i].channel = 2;
1954                                pEquation->xor2[i].valid   = 1;
1955                                pEquation->xor2[i].index   = Log2(z);
1956                            }
1957                        }
1958
1959                        swizzle[i].x &= ~x;
1960                        swizzle[i].y &= ~y;
1961                        swizzle[i].z &= ~z;
1962                    }
1963                }
1964            }
1965        }
1966
1967        ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1968    }
1969}
1970
1971/**
1972************************************************************************************************************************
1973*   Gfx10Lib::InitEquationTable
1974*
1975*   @brief
1976*       Initialize Equation table.
1977*
1978*   @return
1979*       N/A
1980************************************************************************************************************************
1981*/
1982VOID Gfx10Lib::InitEquationTable()
1983{
1984    memset(m_equationTable, 0, sizeof(m_equationTable));
1985
1986    for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1987    {
1988        const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1989
1990        for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1991        {
1992            const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1993
1994            for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1995            {
1996                UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
1997                const ADDR_SW_PATINFO* pPatInfo      = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1998
1999                if (pPatInfo != NULL)
2000                {
2001                    ADDR_ASSERT(IsValidSwMode(swMode));
2002
2003                    if (pPatInfo->maxItemCount <= 3)
2004                    {
2005                        ADDR_EQUATION equation = {};
2006
2007                        ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
2008
2009                        equationIndex = m_numEquations;
2010                        ADDR_ASSERT(equationIndex < EquationTableSize);
2011
2012                        m_equationTable[equationIndex] = equation;
2013
2014                        m_numEquations++;
2015                    }
2016                    else
2017                    {
2018                        // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
2019                        ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
2020                        ADDR_ASSERT(rsrcTypeIdx == 1);
2021                        ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
2022                        ADDR_ASSERT(m_settings.supportRbPlus == 1);
2023                    }
2024                }
2025
2026                m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
2027            }
2028        }
2029    }
2030}
2031
2032/**
2033************************************************************************************************************************
2034*   Gfx10Lib::HwlGetEquationIndex
2035*
2036*   @brief
2037*       Interface function stub of GetEquationIndex
2038*
2039*   @return
2040*       ADDR_E_RETURNCODE
2041************************************************************************************************************************
2042*/
2043UINT_32 Gfx10Lib::HwlGetEquationIndex(
2044    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
2045    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
2046    ) const
2047{
2048    UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
2049
2050    if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
2051        (pIn->resourceType == ADDR_RSRC_TEX_3D))
2052    {
2053        const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
2054        const UINT_32 swModeIdx   = static_cast<UINT_32>(pIn->swizzleMode);
2055        const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
2056
2057        equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
2058    }
2059
2060    if (pOut->pMipInfo != NULL)
2061    {
2062        for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2063        {
2064            pOut->pMipInfo[i].equationIndex = equationIdx;
2065        }
2066    }
2067
2068    return equationIdx;
2069}
2070
2071/**
2072************************************************************************************************************************
2073*   Gfx10Lib::GetValidDisplaySwizzleModes
2074*
2075*   @brief
2076*       Get valid swizzle modes mask for displayable surface
2077*
2078*   @return
2079*       Valid swizzle modes mask for displayable surface
2080************************************************************************************************************************
2081*/
2082UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
2083    UINT_32 bpp
2084    ) const
2085{
2086    UINT_32 swModeMask = 0;
2087
2088    if (bpp <= 64)
2089    {
2090        if (m_settings.isDcn20)
2091        {
2092            swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
2093        }
2094        else
2095        {
2096            swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
2097        }
2098    }
2099
2100    return swModeMask;
2101}
2102
2103/**
2104************************************************************************************************************************
2105*   Gfx10Lib::IsValidDisplaySwizzleMode
2106*
2107*   @brief
2108*       Check if a swizzle mode is supported by display engine
2109*
2110*   @return
2111*       TRUE is swizzle mode is supported by display engine
2112************************************************************************************************************************
2113*/
2114BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
2115    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2116    ) const
2117{
2118    ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
2119
2120    return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
2121}
2122
2123/**
2124************************************************************************************************************************
2125*   Gfx10Lib::GetMaxNumMipsInTail
2126*
2127*   @brief
2128*       Return max number of mips in tails
2129*
2130*   @return
2131*       Max number of mips in tails
2132************************************************************************************************************************
2133*/
2134UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
2135    UINT_32 blockSizeLog2,     ///< block size log2
2136    BOOL_32 isThin             ///< is thin or thick
2137    ) const
2138{
2139    UINT_32 effectiveLog2 = blockSizeLog2;
2140
2141    if (isThin == FALSE)
2142    {
2143        effectiveLog2 -= (blockSizeLog2 - 8) / 3;
2144    }
2145
2146    return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
2147}
2148
2149/**
2150************************************************************************************************************************
2151*   Gfx10Lib::HwlComputePipeBankXor
2152*
2153*   @brief
2154*       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2155*
2156*   @return
2157*       PipeBankXor value
2158************************************************************************************************************************
2159*/
2160ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2161    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
2162    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
2163    ) const
2164{
2165    if (IsNonPrtXor(pIn->swizzleMode))
2166    {
2167        const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));
2168
2169        // No pipe xor...
2170        const UINT_32 pipeXor = 0;
2171        UINT_32       bankXor = 0;
2172
2173        const UINT_32         XorPatternLen = 8;
2174        static const UINT_32  XorBankRot1b[XorPatternLen] = {0,  1,  0,  1,  0,  1,  0,  1};
2175        static const UINT_32  XorBankRot2b[XorPatternLen] = {0,  2,  1,  3,  2,  0,  3,  1};
2176        static const UINT_32  XorBankRot3b[XorPatternLen] = {0,  4,  2,  6,  1,  5,  3,  7};
2177        static const UINT_32  XorBankRot4b[XorPatternLen] = {0,  8,  4, 12,  2, 10,  6, 14};
2178        static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};
2179
2180        switch (bankBits)
2181        {
2182            case 1:
2183            case 2:
2184            case 3:
2185            case 4:
2186                bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
2187                break;
2188            default:
2189                // valid bank bits should be 0~4
2190                ADDR_ASSERT_ALWAYS();
2191            case 0:
2192                break;
2193        }
2194
2195        pOut->pipeBankXor = bankXor | pipeXor;
2196    }
2197    else
2198    {
2199        pOut->pipeBankXor = 0;
2200    }
2201
2202    return ADDR_OK;
2203}
2204
2205/**
2206************************************************************************************************************************
2207*   Gfx10Lib::HwlComputeSlicePipeBankXor
2208*
2209*   @brief
2210*       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2211*
2212*   @return
2213*       PipeBankXor value
2214************************************************************************************************************************
2215*/
2216ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2217    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
2218    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
2219    ) const
2220{
2221    if (IsNonPrtXor(pIn->swizzleMode))
2222    {
2223        const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2224        const UINT_32 pipeBits  = GetPipeXorBits(blockBits);
2225        const UINT_32 pipeXor   = ReverseBitVector(pIn->slice, pipeBits);
2226
2227        pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2228
2229        if (pIn->bpe != 0)
2230        {
2231            const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
2232                                                                    pIn->resourceType,
2233                                                                    Log2(pIn->bpe >> 3),
2234                                                                    1);
2235
2236            if (pPatInfo != NULL)
2237            {
2238                ADDR_BIT_SETTING fullSwizzlePattern[20];
2239                GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
2240
2241                const UINT_32 pipeBankXorOffset =
2242                    ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
2243                                                    blockBits,
2244                                                    0,
2245                                                    0,
2246                                                    pIn->slice,
2247                                                    0);
2248
2249                const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
2250
2251                // Should have no bit set under pipe interleave
2252                ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
2253
2254                // This assertion firing means old approach doesn't calculate a correct sliceXor value...
2255                ADDR_ASSERT(pipeBankXor == pipeXor);
2256
2257                pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
2258            }
2259        }
2260    }
2261    else
2262    {
2263        pOut->pipeBankXor = 0;
2264    }
2265
2266    return ADDR_OK;
2267}
2268
2269/**
2270************************************************************************************************************************
2271*   Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2272*
2273*   @brief
2274*       Compute sub resource offset to support swizzle pattern
2275*
2276*   @return
2277*       Offset
2278************************************************************************************************************************
2279*/
2280ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2281    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
2282    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
2283    ) const
2284{
2285    ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2286
2287    pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2288
2289    return ADDR_OK;
2290}
2291
2292/**
2293************************************************************************************************************************
2294*   Gfx10Lib::HwlComputeNonBlockCompressedView
2295*
2296*   @brief
2297*       Compute non-block-compressed view for a given mipmap level/slice.
2298*
2299*   @return
2300*       ADDR_E_RETURNCODE
2301************************************************************************************************************************
2302*/
2303ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView(
2304    const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,    ///< [in] input structure
2305    ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut    ///< [out] output structure
2306    ) const
2307{
2308    ADDR_E_RETURNCODE returnCode = ADDR_OK;
2309
2310    if (pIn->resourceType != ADDR_RSRC_TEX_2D)
2311    {
2312        // Only 2D resource can have a NonBC view...
2313        returnCode = ADDR_INVALIDPARAMS;
2314    }
2315    else if ((pIn->format != ADDR_FMT_ASTC_8x8) &&
2316             ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
2317    {
2318        // Only support BC1~BC7 or ASTC_8x8 for now...
2319        returnCode = ADDR_NOTSUPPORTED;
2320    }
2321    else
2322    {
2323        UINT_32 bcWidth, bcHeight;
2324        UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
2325
2326        ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
2327        infoIn.flags        = pIn->flags;
2328        infoIn.swizzleMode  = pIn->swizzleMode;
2329        infoIn.resourceType = pIn->resourceType;
2330        infoIn.bpp          = bpp;
2331        infoIn.width        = PowTwoAlign(pIn->width, bcWidth) / bcWidth;
2332        infoIn.height       = PowTwoAlign(pIn->height, bcHeight) / bcHeight;
2333        infoIn.numSlices    = pIn->numSlices;
2334        infoIn.numMipLevels = pIn->numMipLevels;
2335        infoIn.numSamples   = 1;
2336        infoIn.numFrags     = 1;
2337
2338        ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
2339
2340        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
2341        infoOut.pMipInfo = mipInfo;
2342
2343        const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
2344
2345        if (tiled)
2346        {
2347            returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
2348        }
2349        else
2350        {
2351            returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
2352        }
2353
2354        if (returnCode == ADDR_OK)
2355        {
2356            ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
2357            subOffIn.swizzleMode      = infoIn.swizzleMode;
2358            subOffIn.resourceType     = infoIn.resourceType;
2359            subOffIn.slice            = pIn->slice;
2360            subOffIn.sliceSize        = infoOut.sliceSize;
2361            subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
2362            subOffIn.mipTailOffset    = mipInfo[pIn->mipId].mipTailOffset;
2363
2364            ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
2365
2366            // For any mipmap level, move nonBc view base address by offset
2367            HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
2368            pOut->offset = subOffOut.offset;
2369
2370            ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
2371            slicePbXorIn.bpe             = infoIn.bpp;
2372            slicePbXorIn.swizzleMode     = infoIn.swizzleMode;
2373            slicePbXorIn.resourceType    = infoIn.resourceType;
2374            slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2375            slicePbXorIn.slice           = pIn->slice;
2376
2377            ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2378
2379            // For any mipmap level, nonBc view should use computed pbXor
2380            HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2381            pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2382
2383            const BOOL_32 inTail           = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2384            const UINT_32 requestMipWidth  = PowTwoAlign(Max(pIn->width >> pIn->mipId, 1u), bcWidth) / bcWidth;
2385            const UINT_32 requestMipHeight = PowTwoAlign(Max(pIn->height >> pIn->mipId, 1u), bcHeight) / bcHeight;
2386
2387            if (inTail)
2388            {
2389                // For mipmap level that is in mip tail block, hack a lot of things...
2390                // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2391                // are fit in tail block:
2392
2393                // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2394                pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2395
2396                // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2397                pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2398
2399                // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2400                pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2401
2402                // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2403                pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2404            }
2405            // This check should cover at least mipId == 0
2406            else if (requestMipWidth << pIn->mipId == infoIn.width)
2407            {
2408                // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2409                // - only one mipmap level and mipId = 0
2410                pOut->mipId        = 0;
2411                pOut->numMipLevels = 1;
2412
2413                // (mip0) width = requestMipWidth
2414                pOut->unalignedWidth = requestMipWidth;
2415
2416                // (mip0) height = requestMipHeight
2417                pOut->unalignedHeight = requestMipHeight;
2418            }
2419            else
2420            {
2421                // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2422                // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2423                // because single mip view may have different pitch value than original (multiple) mip view...
2424                // A simple case would be:
2425                // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2426                // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2427                //   mip0 width = 0x101/mip1 width = 0x80
2428                // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2429                // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2430
2431                // - 2 levels and mipId = 1
2432                pOut->mipId        = 1;
2433                pOut->numMipLevels = 2;
2434
2435                const UINT_32 upperMipWidth  =
2436                    PowTwoAlign(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth) / bcWidth;
2437                const UINT_32 upperMipHeight =
2438                    PowTwoAlign(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight) / bcHeight;
2439
2440                const BOOL_32 needToAvoidInTail =
2441                    tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2442                    TRUE : FALSE;
2443
2444                const UINT_32 hwMipWidth  = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2445                const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2446
2447                const BOOL_32 needExtraWidth =
2448                    ((upperMipWidth < requestMipWidth * 2) ||
2449                     ((upperMipWidth == requestMipWidth * 2) &&
2450                      ((needToAvoidInTail == TRUE) ||
2451                       (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2452
2453                const BOOL_32 needExtraHeight =
2454                    ((upperMipHeight < requestMipHeight * 2) ||
2455                     ((upperMipHeight == requestMipHeight * 2) &&
2456                      ((needToAvoidInTail == TRUE) ||
2457                       (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2458
2459                // (mip0) width = requestLastMipLevelWidth
2460                pOut->unalignedWidth  = upperMipWidth + (needExtraWidth ? 1: 0);
2461
2462                // (mip0) height = requestLastMipLevelHeight
2463                pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2464            }
2465
2466            // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2467            ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2468            // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2469            ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2470        }
2471    }
2472
2473    return returnCode;
2474}
2475
2476/**
2477************************************************************************************************************************
2478*   Gfx10Lib::ValidateNonSwModeParams
2479*
2480*   @brief
2481*       Validate compute surface info params except swizzle mode
2482*
2483*   @return
2484*       TRUE if parameters are valid, FALSE otherwise
2485************************************************************************************************************************
2486*/
2487BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2488    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2489{
2490    BOOL_32 valid = TRUE;
2491
2492    if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2493    {
2494        ADDR_ASSERT_ALWAYS();
2495        valid = FALSE;
2496    }
2497
2498    if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2499    {
2500        ADDR_ASSERT_ALWAYS();
2501        valid = FALSE;
2502    }
2503
2504    const ADDR2_SURFACE_FLAGS flags    = pIn->flags;
2505    const AddrResourceType    rsrcType = pIn->resourceType;
2506    const BOOL_32             mipmap   = (pIn->numMipLevels > 1);
2507    const BOOL_32             msaa     = (pIn->numFrags > 1);
2508    const BOOL_32             display  = flags.display;
2509    const BOOL_32             tex3d    = IsTex3d(rsrcType);
2510    const BOOL_32             tex2d    = IsTex2d(rsrcType);
2511    const BOOL_32             tex1d    = IsTex1d(rsrcType);
2512    const BOOL_32             stereo   = flags.qbStereo;
2513
2514
2515    // Resource type check
2516    if (tex1d)
2517    {
2518        if (msaa || display || stereo)
2519        {
2520            ADDR_ASSERT_ALWAYS();
2521            valid = FALSE;
2522        }
2523    }
2524    else if (tex2d)
2525    {
2526        if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2527        {
2528            ADDR_ASSERT_ALWAYS();
2529            valid = FALSE;
2530        }
2531    }
2532    else if (tex3d)
2533    {
2534        if (msaa || display || stereo)
2535        {
2536            ADDR_ASSERT_ALWAYS();
2537            valid = FALSE;
2538        }
2539    }
2540    else
2541    {
2542        ADDR_ASSERT_ALWAYS();
2543        valid = FALSE;
2544    }
2545
2546    return valid;
2547}
2548
2549/**
2550************************************************************************************************************************
2551*   Gfx10Lib::ValidateSwModeParams
2552*
2553*   @brief
2554*       Validate compute surface info related to swizzle mode
2555*
2556*   @return
2557*       TRUE if parameters are valid, FALSE otherwise
2558************************************************************************************************************************
2559*/
2560BOOL_32 Gfx10Lib::ValidateSwModeParams(
2561    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2562{
2563    BOOL_32 valid = TRUE;
2564
2565    if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2566    {
2567        ADDR_ASSERT_ALWAYS();
2568        valid = FALSE;
2569    }
2570    else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2571    {
2572        {
2573            ADDR_ASSERT_ALWAYS();
2574            valid = FALSE;
2575        }
2576    }
2577
2578    const ADDR2_SURFACE_FLAGS flags       = pIn->flags;
2579    const AddrResourceType    rsrcType    = pIn->resourceType;
2580    const AddrSwizzleMode     swizzle     = pIn->swizzleMode;
2581    const BOOL_32             msaa        = (pIn->numFrags > 1);
2582    const BOOL_32             zbuffer     = flags.depth || flags.stencil;
2583    const BOOL_32             color       = flags.color;
2584    const BOOL_32             display     = flags.display;
2585    const BOOL_32             tex3d       = IsTex3d(rsrcType);
2586    const BOOL_32             tex2d       = IsTex2d(rsrcType);
2587    const BOOL_32             tex1d       = IsTex1d(rsrcType);
2588    const BOOL_32             thin3d      = flags.view3dAs2dArray;
2589    const BOOL_32             linear      = IsLinear(swizzle);
2590    const BOOL_32             blk256B     = IsBlock256b(swizzle);
2591    const BOOL_32             blkVar      = IsBlockVariable(swizzle);
2592    const BOOL_32             isNonPrtXor = IsNonPrtXor(swizzle);
2593    const BOOL_32             prt         = flags.prt;
2594    const BOOL_32             fmask       = flags.fmask;
2595
2596    // Misc check
2597    if ((pIn->numFrags > 1) &&
2598        (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2599    {
2600        // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2601        ADDR_ASSERT_ALWAYS();
2602        valid = FALSE;
2603    }
2604
2605    if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2606    {
2607        ADDR_ASSERT_ALWAYS();
2608        valid = FALSE;
2609    }
2610
2611    if ((pIn->bpp == 96) && (linear == FALSE))
2612    {
2613        ADDR_ASSERT_ALWAYS();
2614        valid = FALSE;
2615    }
2616
2617    const UINT_32 swizzleMask = 1 << swizzle;
2618
2619    // Resource type check
2620    if (tex1d)
2621    {
2622        if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2623        {
2624            ADDR_ASSERT_ALWAYS();
2625            valid = FALSE;
2626        }
2627    }
2628    else if (tex2d)
2629    {
2630        if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
2631        {
2632            {
2633                ADDR_ASSERT_ALWAYS();
2634                valid = FALSE;
2635            }
2636        }
2637        else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2638                 (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2639        {
2640            ADDR_ASSERT_ALWAYS();
2641            valid = FALSE;
2642        }
2643
2644    }
2645    else if (tex3d)
2646    {
2647        if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2648            (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2649            (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2650        {
2651            ADDR_ASSERT_ALWAYS();
2652            valid = FALSE;
2653        }
2654    }
2655
2656    // Swizzle type check
2657    if (linear)
2658    {
2659        if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2660        {
2661            ADDR_ASSERT_ALWAYS();
2662            valid = FALSE;
2663        }
2664    }
2665    else if (IsZOrderSwizzle(swizzle))
2666    {
2667        if ((pIn->bpp > 64)                         ||
2668            (msaa && (color || (pIn->bpp > 32)))    ||
2669            ElemLib::IsBlockCompressed(pIn->format) ||
2670            ElemLib::IsMacroPixelPacked(pIn->format))
2671        {
2672            ADDR_ASSERT_ALWAYS();
2673            valid = FALSE;
2674        }
2675    }
2676    else if (IsStandardSwizzle(rsrcType, swizzle))
2677    {
2678        if (zbuffer || msaa)
2679        {
2680            ADDR_ASSERT_ALWAYS();
2681            valid = FALSE;
2682        }
2683    }
2684    else if (IsDisplaySwizzle(rsrcType, swizzle))
2685    {
2686        if (zbuffer || msaa)
2687        {
2688            ADDR_ASSERT_ALWAYS();
2689            valid = FALSE;
2690        }
2691    }
2692    else if (IsRtOptSwizzle(swizzle))
2693    {
2694        if (zbuffer)
2695        {
2696            ADDR_ASSERT_ALWAYS();
2697            valid = FALSE;
2698        }
2699    }
2700    else
2701    {
2702        {
2703            ADDR_ASSERT_ALWAYS();
2704            valid = FALSE;
2705        }
2706    }
2707
2708    // Block type check
2709    if (blk256B)
2710    {
2711        if (zbuffer || tex3d || msaa)
2712        {
2713            ADDR_ASSERT_ALWAYS();
2714            valid = FALSE;
2715        }
2716    }
2717    else if (blkVar)
2718    {
2719        if (m_blockVarSizeLog2 == 0)
2720        {
2721            ADDR_ASSERT_ALWAYS();
2722            valid = FALSE;
2723        }
2724    }
2725
2726    return valid;
2727}
2728
2729/**
2730************************************************************************************************************************
2731*   Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2732*
2733*   @brief
2734*       Compute surface info sanity check
2735*
2736*   @return
2737*       Offset
2738************************************************************************************************************************
2739*/
2740ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2741    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2742    ) const
2743{
2744    return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2745}
2746
2747/**
2748************************************************************************************************************************
2749*   Gfx10Lib::HwlGetPreferredSurfaceSetting
2750*
2751*   @brief
2752*       Internal function to get suggested surface information for cliet to use
2753*
2754*   @return
2755*       ADDR_E_RETURNCODE
2756************************************************************************************************************************
2757*/
2758ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2759    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
2760    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
2761    ) const
2762{
2763    ADDR_E_RETURNCODE returnCode = ADDR_OK;
2764
2765    if (pIn->flags.fmask)
2766    {
2767        const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2768        const BOOL_32 forbidVarBlockType  = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2769
2770        if (forbid64KbBlockType && forbidVarBlockType)
2771        {
2772            // Invalid combination...
2773            ADDR_ASSERT_ALWAYS();
2774            returnCode = ADDR_INVALIDPARAMS;
2775        }
2776        else
2777        {
2778            pOut->resourceType                   = ADDR_RSRC_TEX_2D;
2779            pOut->validBlockSet.value            = 0;
2780            pOut->validBlockSet.macroThin64KB    = forbid64KbBlockType ? 0 : 1;
2781            pOut->validBlockSet.var              = forbidVarBlockType  ? 0 : 1;
2782            pOut->validSwModeSet.value           = 0;
2783            pOut->validSwModeSet.sw64KB_Z_X      = forbid64KbBlockType ? 0 : 1;
2784            pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType  ? 0 : 1;
2785            pOut->canXor                         = TRUE;
2786            pOut->validSwTypeSet.value           = AddrSwSetZ;
2787            pOut->clientPreferredSwSet           = pOut->validSwTypeSet;
2788
2789            BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2790
2791            if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2792            {
2793                const UINT_8  maxFmaskSwizzleModeType = 2;
2794                const UINT_32 ratioLow                = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2795                const UINT_32 ratioHi                 = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2796                const UINT_32 fmaskBpp                = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2797                const UINT_32 numSlices               = Max(pIn->numSlices, 1u);
2798                const UINT_32 width                   = Max(pIn->width, 1u);
2799                const UINT_32 height                  = Max(pIn->height, 1u);
2800                const UINT_64 sizeAlignInElement      = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2801
2802                AddrSwizzleMode swMode[maxFmaskSwizzleModeType]  = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2803                Dim3d           blkDim[maxFmaskSwizzleModeType]  = {{}, {}};
2804                Dim3d           padDim[maxFmaskSwizzleModeType]  = {{}, {}};
2805                UINT_64         padSize[maxFmaskSwizzleModeType] = {};
2806
2807                for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2808                {
2809                    ComputeBlockDimensionForSurf(&blkDim[i].w,
2810                                                 &blkDim[i].h,
2811                                                 &blkDim[i].d,
2812                                                 fmaskBpp,
2813                                                 1,
2814                                                 pOut->resourceType,
2815                                                 swMode[i]);
2816
2817                    padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2818                    padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2819                }
2820
2821                if (BlockTypeWithinMemoryBudget(padSize[0],
2822                                                padSize[1],
2823                                                ratioLow,
2824                                                ratioHi,
2825                                                pIn->memoryBudget,
2826                                                GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])))
2827                {
2828                    use64KbBlockType = FALSE;
2829                }
2830            }
2831            else if (forbidVarBlockType)
2832            {
2833                use64KbBlockType = TRUE;
2834            }
2835
2836            if (use64KbBlockType)
2837            {
2838                pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2839            }
2840            else
2841            {
2842                pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2843            }
2844        }
2845    }
2846    else
2847    {
2848        UINT_32 bpp    = pIn->bpp;
2849        UINT_32 width  = Max(pIn->width, 1u);
2850        UINT_32 height = Max(pIn->height, 1u);
2851
2852        // Set format to INVALID will skip this conversion
2853        if (pIn->format != ADDR_FMT_INVALID)
2854        {
2855            ElemMode elemMode = ADDR_UNCOMPRESSED;
2856            UINT_32 expandX, expandY;
2857
2858            // Get compression/expansion factors and element mode which indicates compression/expansion
2859            bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2860                                                &elemMode,
2861                                                &expandX,
2862                                                &expandY);
2863
2864            UINT_32 basePitch = 0;
2865            GetElemLib()->AdjustSurfaceInfo(elemMode,
2866                                            expandX,
2867                                            expandY,
2868                                            &bpp,
2869                                            &basePitch,
2870                                            &width,
2871                                            &height);
2872        }
2873
2874        const UINT_32 numSlices    = Max(pIn->numSlices,    1u);
2875        const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2876        const UINT_32 numSamples   = Max(pIn->numSamples,   1u);
2877        const UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2878        const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
2879
2880        // Pre sanity check on non swizzle mode parameters
2881        ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2882        localIn.flags        = pIn->flags;
2883        localIn.resourceType = pIn->resourceType;
2884        localIn.format       = pIn->format;
2885        localIn.bpp          = bpp;
2886        localIn.width        = width;
2887        localIn.height       = height;
2888        localIn.numSlices    = numSlices;
2889        localIn.numMipLevels = numMipLevels;
2890        localIn.numSamples   = numSamples;
2891        localIn.numFrags     = numFrags;
2892
2893        if (ValidateNonSwModeParams(&localIn))
2894        {
2895            // Forbid swizzle mode(s) by client setting
2896            ADDR2_SWMODE_SET allowedSwModeSet = {};
2897            allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2898            allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx10Blk256BSwModeMask;
2899            allowedSwModeSet.value |=
2900                pIn->forbiddenBlock.macroThin4KB ? 0 :
2901                ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2902            allowedSwModeSet.value |=
2903                pIn->forbiddenBlock.macroThick4KB ? 0 :
2904                ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2905            allowedSwModeSet.value |=
2906                pIn->forbiddenBlock.macroThin64KB ? 0 :
2907                ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2908            allowedSwModeSet.value |=
2909                pIn->forbiddenBlock.macroThick64KB ? 0 :
2910                ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2911            allowedSwModeSet.value |=
2912                pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2913
2914            if (pIn->preferredSwSet.value != 0)
2915            {
2916                allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2917                allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2918                allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2919                allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2920            }
2921
2922            if (pIn->noXor)
2923            {
2924                allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2925            }
2926
2927            if (pIn->maxAlign > 0)
2928            {
2929                if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2930                {
2931                    allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2932                }
2933
2934                if (pIn->maxAlign < Size64K)
2935                {
2936                    allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2937                }
2938
2939                if (pIn->maxAlign < Size4K)
2940                {
2941                    allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2942                }
2943
2944                if (pIn->maxAlign < Size256)
2945                {
2946                    allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2947                }
2948            }
2949
2950            // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2951            switch (pIn->resourceType)
2952            {
2953                case ADDR_RSRC_TEX_1D:
2954                    allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2955                    break;
2956
2957                case ADDR_RSRC_TEX_2D:
2958                    allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2959
2960                    break;
2961
2962                case ADDR_RSRC_TEX_3D:
2963                    allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2964
2965                    if (pIn->flags.view3dAs2dArray)
2966                    {
2967                        allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2968                    }
2969                    break;
2970
2971                default:
2972                    ADDR_ASSERT_ALWAYS();
2973                    allowedSwModeSet.value = 0;
2974                    break;
2975            }
2976
2977            if (ElemLib::IsBlockCompressed(pIn->format)  ||
2978                ElemLib::IsMacroPixelPacked(pIn->format) ||
2979                (bpp > 64)                               ||
2980                (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2981            {
2982                allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2983            }
2984
2985            if (pIn->format == ADDR_FMT_32_32_32)
2986            {
2987                allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2988            }
2989
2990            if (msaa)
2991            {
2992                allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2993            }
2994
2995            if (pIn->flags.depth || pIn->flags.stencil)
2996            {
2997                allowedSwModeSet.value &= Gfx10ZSwModeMask;
2998            }
2999
3000            if (pIn->flags.display)
3001            {
3002                allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
3003            }
3004
3005            if (allowedSwModeSet.value != 0)
3006            {
3007#if DEBUG
3008                // Post sanity check, at least AddrLib should accept the output generated by its own
3009                UINT_32 validateSwModeSet = allowedSwModeSet.value;
3010
3011                for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3012                {
3013                    if (validateSwModeSet & 1)
3014                    {
3015                        localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3016                        ADDR_ASSERT(ValidateSwModeParams(&localIn));
3017                    }
3018
3019                    validateSwModeSet >>= 1;
3020                }
3021#endif
3022
3023                pOut->resourceType   = pIn->resourceType;
3024                pOut->validSwModeSet = allowedSwModeSet;
3025                pOut->canXor         = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
3026                pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3027                pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3028
3029                pOut->clientPreferredSwSet = pIn->preferredSwSet;
3030
3031                if (pOut->clientPreferredSwSet.value == 0)
3032                {
3033                    pOut->clientPreferredSwSet.value = AddrSwSetAll;
3034                }
3035
3036                // Apply optional restrictions
3037                if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
3038                {
3039                    if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
3040                    {
3041                        // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
3042                        // the GL2 in VAR mode, so it should be avoided.
3043                        allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
3044                    }
3045                    else
3046                    {
3047                        // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
3048                        // But we have to suffer from low performance because there is no other choice...
3049                        ADDR_ASSERT_ALWAYS();
3050                    }
3051                }
3052
3053                if (pIn->flags.needEquation)
3054                {
3055                    FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3056                }
3057
3058                if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
3059                {
3060                    pOut->swizzleMode = ADDR_SW_LINEAR;
3061                }
3062                else
3063                {
3064                    const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3065
3066                    if ((height > 1) && (computeMinSize == FALSE))
3067                    {
3068                        // Always ignore linear swizzle mode if:
3069                        // 1. This is a (2D/3D) resource with height > 1
3070                        // 2. Client doesn't require computing minimize size
3071                        allowedSwModeSet.swLinear = 0;
3072                    }
3073
3074                    ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3075
3076                    // Determine block size if there are 2 or more block type candidates
3077                    if (IsPow2(allowedBlockSet.value) == FALSE)
3078                    {
3079                        AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3080
3081                        swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
3082
3083                        if (m_blockVarSizeLog2 != 0)
3084                        {
3085                            swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
3086                        }
3087
3088                        if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3089                        {
3090                            swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
3091                            swMode[AddrBlockThin64KB]  = ADDR_SW_64KB_R_X;
3092                            swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3093                        }
3094                        else
3095                        {
3096                            swMode[AddrBlockMicro]    = ADDR_SW_256B_S;
3097                            swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_S;
3098                            swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
3099                        }
3100
3101                        UINT_64 padSize[AddrBlockMaxTiledType] = {};
3102
3103                        const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3104                        const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3105                        const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3106                        UINT_32       minSizeBlk         = AddrBlockMicro;
3107                        UINT_64       minSize            = 0;
3108
3109                        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3110
3111                        for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3112                        {
3113                            if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3114                            {
3115                                localIn.swizzleMode = swMode[i];
3116
3117                                if (localIn.swizzleMode == ADDR_SW_LINEAR)
3118                                {
3119                                    returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3120                                }
3121                                else
3122                                {
3123                                    returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3124                                }
3125
3126                                if (returnCode == ADDR_OK)
3127                                {
3128                                    padSize[i] = localOut.surfSize;
3129
3130                                    if (minSize == 0)
3131                                    {
3132                                        minSize    = padSize[i];
3133                                        minSizeBlk = i;
3134                                    }
3135                                    else
3136                                    {
3137                                        if (BlockTypeWithinMemoryBudget(
3138                                                minSize,
3139                                                padSize[i],
3140                                                ratioLow,
3141                                                ratioHi,
3142                                                0.0,
3143                                                GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])))
3144                                        {
3145                                            minSize    = padSize[i];
3146                                            minSizeBlk = i;
3147                                        }
3148                                    }
3149                                }
3150                                else
3151                                {
3152                                    ADDR_ASSERT_ALWAYS();
3153                                    break;
3154                                }
3155                            }
3156                        }
3157
3158                        if (pIn->memoryBudget > 1.0)
3159                        {
3160                            // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3161                            // smaller-block type again in coming loop
3162                            switch (minSizeBlk)
3163                            {
3164                                case AddrBlockThick64KB:
3165                                    allowedBlockSet.macroThin64KB = 0;
3166                                case AddrBlockThinVar:
3167                                case AddrBlockThin64KB:
3168                                    allowedBlockSet.macroThick4KB = 0;
3169                                case AddrBlockThick4KB:
3170                                    allowedBlockSet.macroThin4KB = 0;
3171                                case AddrBlockThin4KB:
3172                                    allowedBlockSet.micro  = 0;
3173                                case AddrBlockMicro:
3174                                    allowedBlockSet.linear = 0;
3175                                case AddrBlockLinear:
3176                                    break;
3177
3178                                default:
3179                                    ADDR_ASSERT_ALWAYS();
3180                                    break;
3181                            }
3182
3183                            for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3184                            {
3185                                if ((i != minSizeBlk) &&
3186                                    IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3187                                {
3188                                    if (BlockTypeWithinMemoryBudget(
3189                                            minSize,
3190                                            padSize[i],
3191                                            0,
3192                                            0,
3193                                            pIn->memoryBudget,
3194                                            GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE)
3195                                    {
3196                                        // Clear the block type if the memory waste is unacceptable
3197                                        allowedBlockSet.value &= ~(1u << (i - 1));
3198                                    }
3199                                }
3200                            }
3201
3202                            // Remove VAR block type if bigger block type is allowed
3203                            if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X))
3204                            {
3205                                if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB)
3206                                {
3207                                    allowedBlockSet.var = 0;
3208                                }
3209                            }
3210
3211                            // Remove linear block type if 2 or more block types are allowed
3212                            if (IsPow2(allowedBlockSet.value) == FALSE)
3213                            {
3214                                allowedBlockSet.linear = 0;
3215                            }
3216
3217                            // Select the biggest allowed block type
3218                            minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3219
3220                            if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3221                            {
3222                                minSizeBlk = AddrBlockLinear;
3223                            }
3224                        }
3225
3226                        switch (minSizeBlk)
3227                        {
3228                            case AddrBlockLinear:
3229                                allowedSwModeSet.value &= Gfx10LinearSwModeMask;
3230                                break;
3231
3232                            case AddrBlockMicro:
3233                                ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3234                                allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
3235                                break;
3236
3237                            case AddrBlockThin4KB:
3238                                ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3239                                allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
3240                                break;
3241
3242                            case AddrBlockThick4KB:
3243                                ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3244                                allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
3245                                break;
3246
3247                            case AddrBlockThin64KB:
3248                                allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3249                                                          Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
3250                                break;
3251
3252                            case AddrBlockThick64KB:
3253                                ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3254                                allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
3255                                break;
3256
3257                            case AddrBlockThinVar:
3258                                allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
3259                                break;
3260
3261                            default:
3262                                ADDR_ASSERT_ALWAYS();
3263                                allowedSwModeSet.value = 0;
3264                                break;
3265                        }
3266                    }
3267
3268                    // Block type should be determined.
3269                    ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3270
3271                    ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3272
3273                    // Determine swizzle type if there are 2 or more swizzle type candidates
3274                    if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3275                    {
3276                        if (ElemLib::IsBlockCompressed(pIn->format))
3277                        {
3278                            if (allowedSwSet.sw_D)
3279                            {
3280                                allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3281                            }
3282                            else if (allowedSwSet.sw_S)
3283                            {
3284                                allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3285                            }
3286                            else
3287                            {
3288                                ADDR_ASSERT(allowedSwSet.sw_R);
3289                                allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3290                            }
3291                        }
3292                        else if (ElemLib::IsMacroPixelPacked(pIn->format))
3293                        {
3294                            if (allowedSwSet.sw_S)
3295                            {
3296                                allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3297                            }
3298                            else if (allowedSwSet.sw_D)
3299                            {
3300                                allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3301                            }
3302                            else
3303                            {
3304                                ADDR_ASSERT(allowedSwSet.sw_R);
3305                                allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3306                            }
3307                        }
3308                        else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
3309                        {
3310                            if (pIn->flags.color &&
3311                                GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
3312                                allowedSwSet.sw_D)
3313                            {
3314                                allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3315                            }
3316                            else if (allowedSwSet.sw_S)
3317                            {
3318                                allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3319                            }
3320                            else if (allowedSwSet.sw_R)
3321                            {
3322                                allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3323                            }
3324                            else
3325                            {
3326                                ADDR_ASSERT(allowedSwSet.sw_Z);
3327                                allowedSwModeSet.value &= Gfx10ZSwModeMask;
3328                            }
3329                        }
3330                        else
3331                        {
3332                            if (allowedSwSet.sw_R)
3333                            {
3334                                allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3335                            }
3336                            else if (allowedSwSet.sw_D)
3337                            {
3338                                allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3339                            }
3340                            else if (allowedSwSet.sw_S)
3341                            {
3342                                allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3343                            }
3344                            else
3345                            {
3346                                ADDR_ASSERT(allowedSwSet.sw_Z);
3347                                allowedSwModeSet.value &= Gfx10ZSwModeMask;
3348                            }
3349                        }
3350
3351                        // Swizzle type should be determined.
3352                        ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3353                    }
3354
3355                    // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
3356                    // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3357                    // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3358                    pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3359                }
3360            }
3361            else
3362            {
3363                // Invalid combination...
3364                ADDR_ASSERT_ALWAYS();
3365                returnCode = ADDR_INVALIDPARAMS;
3366            }
3367        }
3368        else
3369        {
3370            // Invalid combination...
3371            ADDR_ASSERT_ALWAYS();
3372            returnCode = ADDR_INVALIDPARAMS;
3373        }
3374    }
3375
3376    return returnCode;
3377}
3378
3379/**
3380************************************************************************************************************************
3381*   Gfx10Lib::ComputeStereoInfo
3382*
3383*   @brief
3384*       Compute height alignment and right eye pipeBankXor for stereo surface
3385*
3386*   @return
3387*       Error code
3388*
3389************************************************************************************************************************
3390*/
3391ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
3392    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< Compute surface info
3393    UINT_32*                                pAlignY,    ///< Stereo requested additional alignment in Y
3394    UINT_32*                                pRightXor   ///< Right eye xor
3395    ) const
3396{
3397    ADDR_E_RETURNCODE ret = ADDR_OK;
3398
3399    *pRightXor = 0;
3400
3401    if (IsNonPrtXor(pIn->swizzleMode))
3402    {
3403        const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3404        const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
3405        const UINT_32 rsrcType    = static_cast<UINT_32>(pIn->resourceType) - 1;
3406        const UINT_32 swMode      = static_cast<UINT_32>(pIn->swizzleMode);
3407        const UINT_32 eqIndex     = m_equationLookupTable[rsrcType][swMode][elemLog2];
3408
3409        if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3410        {
3411            UINT_32 yMax     = 0;
3412            UINT_32 yPosMask = 0;
3413
3414            // First get "max y bit"
3415            for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3416            {
3417                ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3418
3419                if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3420                    (m_equationTable[eqIndex].addr[i].index > yMax))
3421                {
3422                    yMax = m_equationTable[eqIndex].addr[i].index;
3423                }
3424
3425                if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3426                    (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3427                    (m_equationTable[eqIndex].xor1[i].index > yMax))
3428                {
3429                    yMax = m_equationTable[eqIndex].xor1[i].index;
3430                }
3431
3432                if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3433                    (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3434                    (m_equationTable[eqIndex].xor2[i].index > yMax))
3435                {
3436                    yMax = m_equationTable[eqIndex].xor2[i].index;
3437                }
3438            }
3439
3440            // Then loop again for populating a position mask of "max Y bit"
3441            for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3442            {
3443                if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3444                    (m_equationTable[eqIndex].addr[i].index == yMax))
3445                {
3446                    yPosMask |= 1u << i;
3447                }
3448                else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3449                         (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3450                         (m_equationTable[eqIndex].xor1[i].index == yMax))
3451                {
3452                    yPosMask |= 1u << i;
3453                }
3454                else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3455                         (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3456                         (m_equationTable[eqIndex].xor2[i].index == yMax))
3457                {
3458                    yPosMask |= 1u << i;
3459                }
3460            }
3461
3462            const UINT_32 additionalAlign = 1 << yMax;
3463
3464            if (additionalAlign >= *pAlignY)
3465            {
3466                *pAlignY = additionalAlign;
3467
3468                const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3469
3470                if ((alignedHeight >> yMax) & 1)
3471                {
3472                    *pRightXor = yPosMask >> m_pipeInterleaveLog2;
3473                }
3474            }
3475        }
3476        else
3477        {
3478            ret = ADDR_INVALIDPARAMS;
3479        }
3480    }
3481
3482    return ret;
3483}
3484
3485/**
3486************************************************************************************************************************
3487*   Gfx10Lib::HwlComputeSurfaceInfoTiled
3488*
3489*   @brief
3490*       Internal function to calculate alignment for tiled surface
3491*
3492*   @return
3493*       ADDR_E_RETURNCODE
3494************************************************************************************************************************
3495*/
3496ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3497     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3498     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3499     ) const
3500{
3501    ADDR_E_RETURNCODE ret;
3502
3503    // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3504    pOut->mipChainPitch    = 0;
3505    pOut->mipChainHeight   = 0;
3506    pOut->mipChainSlice    = 0;
3507    pOut->epitchIsHeight   = FALSE;
3508
3509    // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3510    pOut->mipChainInTail   = FALSE;
3511    pOut->firstMipIdInTail = pIn->numMipLevels;
3512
3513    if (IsBlock256b(pIn->swizzleMode))
3514    {
3515        ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3516    }
3517    else
3518    {
3519        ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3520    }
3521
3522    return ret;
3523}
3524
3525
3526/**
3527************************************************************************************************************************
3528*   Gfx10Lib::ComputeSurfaceInfoMicroTiled
3529*
3530*   @brief
3531*       Internal function to calculate alignment for micro tiled surface
3532*
3533*   @return
3534*       ADDR_E_RETURNCODE
3535************************************************************************************************************************
3536*/
3537ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3538     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3539     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3540     ) const
3541{
3542    ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3543                                                         &pOut->blockHeight,
3544                                                         &pOut->blockSlices,
3545                                                         pIn->bpp,
3546                                                         pIn->numFrags,
3547                                                         pIn->resourceType,
3548                                                         pIn->swizzleMode);
3549
3550    if (ret == ADDR_OK)
3551    {
3552        const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3553
3554        pOut->pitch     = PowTwoAlign(pIn->width,  pOut->blockWidth);
3555        pOut->height    = PowTwoAlign(pIn->height, pOut->blockHeight);
3556        pOut->numSlices = pIn->numSlices;
3557        pOut->baseAlign = blockSize;
3558
3559        if (pIn->numMipLevels > 1)
3560        {
3561            const UINT_32 mip0Width    = pIn->width;
3562            const UINT_32 mip0Height   = pIn->height;
3563            UINT_64       mipSliceSize = 0;
3564
3565            for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3566            {
3567                UINT_32 mipWidth, mipHeight;
3568
3569                GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3570
3571                const UINT_32 mipActualWidth  = PowTwoAlign(mipWidth,  pOut->blockWidth);
3572                const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3573
3574                if (pOut->pMipInfo != NULL)
3575                {
3576                    pOut->pMipInfo[i].pitch            = mipActualWidth;
3577                    pOut->pMipInfo[i].height           = mipActualHeight;
3578                    pOut->pMipInfo[i].depth            = 1;
3579                    pOut->pMipInfo[i].offset           = mipSliceSize;
3580                    pOut->pMipInfo[i].mipTailOffset    = 0;
3581                    pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3582                }
3583
3584                mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3585            }
3586
3587            pOut->sliceSize = mipSliceSize;
3588            pOut->surfSize  = mipSliceSize * pOut->numSlices;
3589        }
3590        else
3591        {
3592            pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3593            pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3594
3595            if (pOut->pMipInfo != NULL)
3596            {
3597                pOut->pMipInfo[0].pitch            = pOut->pitch;
3598                pOut->pMipInfo[0].height           = pOut->height;
3599                pOut->pMipInfo[0].depth            = 1;
3600                pOut->pMipInfo[0].offset           = 0;
3601                pOut->pMipInfo[0].mipTailOffset    = 0;
3602                pOut->pMipInfo[0].macroBlockOffset = 0;
3603            }
3604        }
3605
3606    }
3607
3608    return ret;
3609}
3610
3611/**
3612************************************************************************************************************************
3613*   Gfx10Lib::ComputeSurfaceInfoMacroTiled
3614*
3615*   @brief
3616*       Internal function to calculate alignment for macro tiled surface
3617*
3618*   @return
3619*       ADDR_E_RETURNCODE
3620************************************************************************************************************************
3621*/
3622ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3623     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3624     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3625     ) const
3626{
3627    ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3628                                                                &pOut->blockHeight,
3629                                                                &pOut->blockSlices,
3630                                                                pIn->bpp,
3631                                                                pIn->numFrags,
3632                                                                pIn->resourceType,
3633                                                                pIn->swizzleMode);
3634
3635    if (returnCode == ADDR_OK)
3636    {
3637        UINT_32 heightAlign = pOut->blockHeight;
3638
3639        if (pIn->flags.qbStereo)
3640        {
3641            UINT_32 rightXor = 0;
3642
3643            returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3644
3645            if (returnCode == ADDR_OK)
3646            {
3647                pOut->pStereoInfo->rightSwizzle = rightXor;
3648            }
3649        }
3650
3651        if (returnCode == ADDR_OK)
3652        {
3653            const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3654            const UINT_32 blockSize     = 1 << blockSizeLog2;
3655
3656            pOut->pitch     = PowTwoAlign(pIn->width,     pOut->blockWidth);
3657            pOut->height    = PowTwoAlign(pIn->height,    heightAlign);
3658            pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3659            pOut->baseAlign = blockSize;
3660
3661            if (pIn->numMipLevels > 1)
3662            {
3663                const Dim3d  tailMaxDim         = GetMipTailDim(pIn->resourceType,
3664                                                                pIn->swizzleMode,
3665                                                                pOut->blockWidth,
3666                                                                pOut->blockHeight,
3667                                                                pOut->blockSlices);
3668                const UINT_32 mip0Width         = pIn->width;
3669                const UINT_32 mip0Height        = pIn->height;
3670                const BOOL_32 isThin            = IsThin(pIn->resourceType, pIn->swizzleMode);
3671                const UINT_32 mip0Depth         = isThin ? 1 : pIn->numSlices;
3672                const UINT_32 maxMipsInTail     = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3673                const UINT_32 index             = Log2(pIn->bpp >> 3);
3674                UINT_32       firstMipInTail    = pIn->numMipLevels;
3675                UINT_64       mipChainSliceSize = 0;
3676                UINT_64       mipSize[MaxMipLevels];
3677                UINT_64       mipSliceSize[MaxMipLevels];
3678
3679                Dim3d fixedTailMaxDim = tailMaxDim;
3680
3681                if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3682                {
3683                    fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3684                    fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3685                }
3686
3687                for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3688                {
3689                    UINT_32 mipWidth, mipHeight, mipDepth;
3690
3691                    GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3692
3693                    if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3694                    {
3695                        firstMipInTail     = i;
3696                        mipChainSliceSize += blockSize / pOut->blockSlices;
3697                        break;
3698                    }
3699                    else
3700                    {
3701                        const UINT_32 pitch     = PowTwoAlign(mipWidth,  pOut->blockWidth);
3702                        const UINT_32 height    = PowTwoAlign(mipHeight, pOut->blockHeight);
3703                        const UINT_32 depth     = PowTwoAlign(mipDepth,  pOut->blockSlices);
3704                        const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3705
3706                        mipSize[i]         = sliceSize * depth;
3707                        mipSliceSize[i]    = sliceSize * pOut->blockSlices;
3708                        mipChainSliceSize += sliceSize;
3709
3710                        if (pOut->pMipInfo != NULL)
3711                        {
3712                            pOut->pMipInfo[i].pitch  = pitch;
3713                            pOut->pMipInfo[i].height = height;
3714                            pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3715                        }
3716                    }
3717                }
3718
3719                pOut->sliceSize        = mipChainSliceSize;
3720                pOut->surfSize         = mipChainSliceSize * pOut->numSlices;
3721                pOut->mipChainInTail   = (firstMipInTail == 0) ? TRUE : FALSE;
3722                pOut->firstMipIdInTail = firstMipInTail;
3723
3724                if (pOut->pMipInfo != NULL)
3725                {
3726                    UINT_64 offset         = 0;
3727                    UINT_64 macroBlkOffset = 0;
3728                    UINT_32 tailMaxDepth   = 0;
3729
3730                    if (firstMipInTail != pIn->numMipLevels)
3731                    {
3732                        UINT_32 mipWidth, mipHeight;
3733
3734                        GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3735                                   &mipWidth, &mipHeight, &tailMaxDepth);
3736
3737                        offset         = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3738                        macroBlkOffset = blockSize;
3739                    }
3740
3741                    for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3742                    {
3743                        pOut->pMipInfo[i].offset           = offset;
3744                        pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3745                        pOut->pMipInfo[i].mipTailOffset    = 0;
3746
3747                        offset         += mipSize[i];
3748                        macroBlkOffset += mipSliceSize[i];
3749                    }
3750
3751                    UINT_32 pitch  = tailMaxDim.w;
3752                    UINT_32 height = tailMaxDim.h;
3753                    UINT_32 depth  = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3754
3755                    tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3756
3757                    for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3758                    {
3759                        const UINT_32 m         = maxMipsInTail - 1 - (i - firstMipInTail);
3760                        const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3761
3762                        pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
3763                        pOut->pMipInfo[i].mipTailOffset    = mipOffset;
3764                        pOut->pMipInfo[i].macroBlockOffset = 0;
3765
3766                        pOut->pMipInfo[i].pitch  = pitch;
3767                        pOut->pMipInfo[i].height = height;
3768                        pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3769
3770                        UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
3771                                       ((mipOffset >> 10) & 2)  |
3772                                       ((mipOffset >> 11) & 4)  |
3773                                       ((mipOffset >> 12) & 8)  |
3774                                       ((mipOffset >> 13) & 16) |
3775                                       ((mipOffset >> 14) & 32);
3776                        UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
3777                                       ((mipOffset >> 9)  & 2)  |
3778                                       ((mipOffset >> 10) & 4)  |
3779                                       ((mipOffset >> 11) & 8)  |
3780                                       ((mipOffset >> 12) & 16) |
3781                                       ((mipOffset >> 13) & 32);
3782
3783                        if (blockSizeLog2 & 1)
3784                        {
3785                            const UINT_32 temp = mipX;
3786                            mipX = mipY;
3787                            mipY = temp;
3788
3789                            if (index & 1)
3790                            {
3791                                mipY = (mipY << 1) | (mipX & 1);
3792                                mipX = mipX >> 1;
3793                            }
3794                        }
3795
3796                        if (isThin)
3797                        {
3798                            pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3799                            pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3800                            pOut->pMipInfo[i].mipTailCoordZ = 0;
3801
3802                            pitch  = Max(pitch  >> 1, Block256_2d[index].w);
3803                            height = Max(height >> 1, Block256_2d[index].h);
3804                        }
3805                        else
3806                        {
3807                            pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3808                            pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3809                            pOut->pMipInfo[i].mipTailCoordZ = 0;
3810
3811                            pitch  = Max(pitch  >> 1, Block256_3d[index].w);
3812                            height = Max(height >> 1, Block256_3d[index].h);
3813                        }
3814                    }
3815                }
3816            }
3817            else
3818            {
3819                pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3820                pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3821
3822                if (pOut->pMipInfo != NULL)
3823                {
3824                    pOut->pMipInfo[0].pitch            = pOut->pitch;
3825                    pOut->pMipInfo[0].height           = pOut->height;
3826                    pOut->pMipInfo[0].depth            = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3827                    pOut->pMipInfo[0].offset           = 0;
3828                    pOut->pMipInfo[0].mipTailOffset    = 0;
3829                    pOut->pMipInfo[0].macroBlockOffset = 0;
3830                    pOut->pMipInfo[0].mipTailCoordX    = 0;
3831                    pOut->pMipInfo[0].mipTailCoordY    = 0;
3832                    pOut->pMipInfo[0].mipTailCoordZ    = 0;
3833                }
3834            }
3835        }
3836    }
3837
3838    return returnCode;
3839}
3840
3841/**
3842************************************************************************************************************************
3843*   Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3844*
3845*   @brief
3846*       Internal function to calculate address from coord for tiled swizzle surface
3847*
3848*   @return
3849*       ADDR_E_RETURNCODE
3850************************************************************************************************************************
3851*/
3852ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3853     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
3854     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
3855     ) const
3856{
3857    ADDR_E_RETURNCODE ret;
3858
3859    if (IsBlock256b(pIn->swizzleMode))
3860    {
3861        ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3862    }
3863    else
3864    {
3865        ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3866    }
3867
3868    return ret;
3869}
3870
3871/**
3872************************************************************************************************************************
3873*   Gfx10Lib::ComputeOffsetFromEquation
3874*
3875*   @brief
3876*       Compute offset from equation
3877*
3878*   @return
3879*       Offset
3880************************************************************************************************************************
3881*/
3882UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3883    const ADDR_EQUATION* pEq,   ///< Equation
3884    UINT_32              x,     ///< x coord in bytes
3885    UINT_32              y,     ///< y coord in pixel
3886    UINT_32              z      ///< z coord in slice
3887    ) const
3888{
3889    UINT_32 offset = 0;
3890
3891    for (UINT_32 i = 0; i < pEq->numBits; i++)
3892    {
3893        UINT_32 v = 0;
3894
3895        if (pEq->addr[i].valid)
3896        {
3897            if (pEq->addr[i].channel == 0)
3898            {
3899                v ^= (x >> pEq->addr[i].index) & 1;
3900            }
3901            else if (pEq->addr[i].channel == 1)
3902            {
3903                v ^= (y >> pEq->addr[i].index) & 1;
3904            }
3905            else
3906            {
3907                ADDR_ASSERT(pEq->addr[i].channel == 2);
3908                v ^= (z >> pEq->addr[i].index) & 1;
3909            }
3910        }
3911
3912        if (pEq->xor1[i].valid)
3913        {
3914            if (pEq->xor1[i].channel == 0)
3915            {
3916                v ^= (x >> pEq->xor1[i].index) & 1;
3917            }
3918            else if (pEq->xor1[i].channel == 1)
3919            {
3920                v ^= (y >> pEq->xor1[i].index) & 1;
3921            }
3922            else
3923            {
3924                ADDR_ASSERT(pEq->xor1[i].channel == 2);
3925                v ^= (z >> pEq->xor1[i].index) & 1;
3926            }
3927        }
3928
3929        if (pEq->xor2[i].valid)
3930        {
3931            if (pEq->xor2[i].channel == 0)
3932            {
3933                v ^= (x >> pEq->xor2[i].index) & 1;
3934            }
3935            else if (pEq->xor2[i].channel == 1)
3936            {
3937                v ^= (y >> pEq->xor2[i].index) & 1;
3938            }
3939            else
3940            {
3941                ADDR_ASSERT(pEq->xor2[i].channel == 2);
3942                v ^= (z >> pEq->xor2[i].index) & 1;
3943            }
3944        }
3945
3946        offset |= (v << i);
3947    }
3948
3949    return offset;
3950}
3951
3952/**
3953************************************************************************************************************************
3954*   Gfx10Lib::ComputeOffsetFromSwizzlePattern
3955*
3956*   @brief
3957*       Compute offset from swizzle pattern
3958*
3959*   @return
3960*       Offset
3961************************************************************************************************************************
3962*/
3963UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3964    const UINT_64* pPattern,    ///< Swizzle pattern
3965    UINT_32        numBits,     ///< Number of bits in pattern
3966    UINT_32        x,           ///< x coord in pixel
3967    UINT_32        y,           ///< y coord in pixel
3968    UINT_32        z,           ///< z coord in slice
3969    UINT_32        s            ///< sample id
3970    ) const
3971{
3972    UINT_32                 offset          = 0;
3973    const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3974
3975    for (UINT_32 i = 0; i < numBits; i++)
3976    {
3977        UINT_32 v = 0;
3978
3979        if (pSwizzlePattern[i].x != 0)
3980        {
3981            UINT_16 mask  = pSwizzlePattern[i].x;
3982            UINT_32 xBits = x;
3983
3984            while (mask != 0)
3985            {
3986                if (mask & 1)
3987                {
3988                    v ^= xBits & 1;
3989                }
3990
3991                xBits >>= 1;
3992                mask  >>= 1;
3993            }
3994        }
3995
3996        if (pSwizzlePattern[i].y != 0)
3997        {
3998            UINT_16 mask  = pSwizzlePattern[i].y;
3999            UINT_32 yBits = y;
4000
4001            while (mask != 0)
4002            {
4003                if (mask & 1)
4004                {
4005                    v ^= yBits & 1;
4006                }
4007
4008                yBits >>= 1;
4009                mask  >>= 1;
4010            }
4011        }
4012
4013        if (pSwizzlePattern[i].z != 0)
4014        {
4015            UINT_16 mask  = pSwizzlePattern[i].z;
4016            UINT_32 zBits = z;
4017
4018            while (mask != 0)
4019            {
4020                if (mask & 1)
4021                {
4022                    v ^= zBits & 1;
4023                }
4024
4025                zBits >>= 1;
4026                mask  >>= 1;
4027            }
4028        }
4029
4030        if (pSwizzlePattern[i].s != 0)
4031        {
4032            UINT_16 mask  = pSwizzlePattern[i].s;
4033            UINT_32 sBits = s;
4034
4035            while (mask != 0)
4036            {
4037                if (mask & 1)
4038                {
4039                    v ^= sBits & 1;
4040                }
4041
4042                sBits >>= 1;
4043                mask  >>= 1;
4044            }
4045        }
4046
4047        offset |= (v << i);
4048    }
4049
4050    return offset;
4051}
4052
4053/**
4054************************************************************************************************************************
4055*   Gfx10Lib::GetSwizzlePatternInfo
4056*
4057*   @brief
4058*       Get swizzle pattern
4059*
4060*   @return
4061*       Swizzle pattern information
4062************************************************************************************************************************
4063*/
4064const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
4065    AddrSwizzleMode  swizzleMode,       ///< Swizzle mode
4066    AddrResourceType resourceType,      ///< Resource type
4067    UINT_32          elemLog2,          ///< Element size in bytes log2
4068    UINT_32          numFrag            ///< Number of fragment
4069    ) const
4070{
4071    const UINT_32          index       = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
4072    const ADDR_SW_PATINFO* patInfo     = NULL;
4073    const UINT_32          swizzleMask = 1 << swizzleMode;
4074
4075    if (IsBlockVariable(swizzleMode))
4076    {
4077        if (m_blockVarSizeLog2 != 0)
4078        {
4079            ADDR_ASSERT(m_settings.supportRbPlus);
4080
4081            if (IsRtOptSwizzle(swizzleMode))
4082            {
4083                if (numFrag == 1)
4084                {
4085                    patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
4086                }
4087                else if (numFrag == 2)
4088                {
4089                    patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
4090                }
4091                else if (numFrag == 4)
4092                {
4093                    patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
4094                }
4095                else
4096                {
4097                    ADDR_ASSERT(numFrag == 8);
4098                    patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
4099                }
4100            }
4101            else if (IsZOrderSwizzle(swizzleMode))
4102            {
4103                if (numFrag == 1)
4104                {
4105                    patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
4106                }
4107                else if (numFrag == 2)
4108                {
4109                    patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
4110                }
4111                else if (numFrag == 4)
4112                {
4113                    patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
4114                }
4115                else
4116                {
4117                    ADDR_ASSERT(numFrag == 8);
4118                    patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
4119                }
4120            }
4121        }
4122    }
4123    else if (IsLinear(swizzleMode) == FALSE)
4124    {
4125        if (resourceType == ADDR_RSRC_TEX_3D)
4126        {
4127            ADDR_ASSERT(numFrag == 1);
4128
4129            if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
4130            {
4131                if (IsRtOptSwizzle(swizzleMode))
4132                {
4133                    patInfo = m_settings.supportRbPlus ?
4134                              GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4135                }
4136                else if (IsZOrderSwizzle(swizzleMode))
4137                {
4138                    patInfo = m_settings.supportRbPlus ?
4139                              GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4140                }
4141                else if (IsDisplaySwizzle(resourceType, swizzleMode))
4142                {
4143                    ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
4144                    patInfo = m_settings.supportRbPlus ?
4145                              GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
4146                }
4147                else
4148                {
4149                    ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
4150
4151                    if (IsBlock4kb(swizzleMode))
4152                    {
4153                        if (swizzleMode == ADDR_SW_4KB_S)
4154                        {
4155                            patInfo = m_settings.supportRbPlus ?
4156                                      GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
4157                        }
4158                        else
4159                        {
4160                            ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4161                            patInfo = m_settings.supportRbPlus ?
4162                                      GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
4163                        }
4164                    }
4165                    else
4166                    {
4167                        if (swizzleMode == ADDR_SW_64KB_S)
4168                        {
4169                            patInfo = m_settings.supportRbPlus ?
4170                                      GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
4171                        }
4172                        else if (swizzleMode == ADDR_SW_64KB_S_X)
4173                        {
4174                            patInfo = m_settings.supportRbPlus ?
4175                                      GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
4176                        }
4177                        else
4178                        {
4179                            ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4180                            patInfo = m_settings.supportRbPlus ?
4181                                      GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
4182                        }
4183                    }
4184                }
4185            }
4186        }
4187        else
4188        {
4189            if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
4190            {
4191                if (IsBlock256b(swizzleMode))
4192                {
4193                    if (swizzleMode == ADDR_SW_256B_S)
4194                    {
4195                        patInfo = m_settings.supportRbPlus ?
4196                                  GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
4197                    }
4198                    else
4199                    {
4200                        ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
4201                        patInfo = m_settings.supportRbPlus ?
4202                                  GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
4203                    }
4204                }
4205                else if (IsBlock4kb(swizzleMode))
4206                {
4207                    if (IsStandardSwizzle(resourceType, swizzleMode))
4208                    {
4209                        if (swizzleMode == ADDR_SW_4KB_S)
4210                        {
4211                            patInfo = m_settings.supportRbPlus ?
4212                                      GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
4213                        }
4214                        else
4215                        {
4216                            ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4217                            patInfo = m_settings.supportRbPlus ?
4218                                      GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
4219                        }
4220                    }
4221                    else
4222                    {
4223                        if (swizzleMode == ADDR_SW_4KB_D)
4224                        {
4225                            patInfo = m_settings.supportRbPlus ?
4226                                      GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
4227                        }
4228                        else
4229                        {
4230                            ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
4231                            patInfo = m_settings.supportRbPlus ?
4232                                      GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
4233                        }
4234                    }
4235                }
4236                else
4237                {
4238                    if (IsRtOptSwizzle(swizzleMode))
4239                    {
4240                        if (numFrag == 1)
4241                        {
4242                            patInfo = m_settings.supportRbPlus ?
4243                                      GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4244                        }
4245                        else if (numFrag == 2)
4246                        {
4247                            patInfo = m_settings.supportRbPlus ?
4248                                      GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
4249                        }
4250                        else if (numFrag == 4)
4251                        {
4252                            patInfo = m_settings.supportRbPlus ?
4253                                      GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
4254                        }
4255                        else
4256                        {
4257                            ADDR_ASSERT(numFrag == 8);
4258                            patInfo = m_settings.supportRbPlus ?
4259                                      GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
4260                        }
4261                    }
4262                    else if (IsZOrderSwizzle(swizzleMode))
4263                    {
4264                        if (numFrag == 1)
4265                        {
4266                            patInfo = m_settings.supportRbPlus ?
4267                                      GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4268                        }
4269                        else if (numFrag == 2)
4270                        {
4271                            patInfo = m_settings.supportRbPlus ?
4272                                      GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
4273                        }
4274                        else if (numFrag == 4)
4275                        {
4276                            patInfo = m_settings.supportRbPlus ?
4277                                      GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
4278                        }
4279                        else
4280                        {
4281                            ADDR_ASSERT(numFrag == 8);
4282                            patInfo = m_settings.supportRbPlus ?
4283                                      GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
4284                        }
4285                    }
4286                    else if (IsDisplaySwizzle(resourceType, swizzleMode))
4287                    {
4288                        if (swizzleMode == ADDR_SW_64KB_D)
4289                        {
4290                            patInfo = m_settings.supportRbPlus ?
4291                                      GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
4292                        }
4293                        else if (swizzleMode == ADDR_SW_64KB_D_X)
4294                        {
4295                            patInfo = m_settings.supportRbPlus ?
4296                                      GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
4297                        }
4298                        else
4299                        {
4300                            ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
4301                            patInfo = m_settings.supportRbPlus ?
4302                                      GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
4303                        }
4304                    }
4305                    else
4306                    {
4307                        if (swizzleMode == ADDR_SW_64KB_S)
4308                        {
4309                            patInfo = m_settings.supportRbPlus ?
4310                                      GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
4311                        }
4312                        else if (swizzleMode == ADDR_SW_64KB_S_X)
4313                        {
4314                            patInfo = m_settings.supportRbPlus ?
4315                                      GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
4316                        }
4317                        else
4318                        {
4319                            ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4320                            patInfo = m_settings.supportRbPlus ?
4321                                      GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
4322                        }
4323                    }
4324                }
4325            }
4326        }
4327    }
4328
4329    return (patInfo != NULL) ? &patInfo[index] : NULL;
4330}
4331
4332
4333/**
4334************************************************************************************************************************
4335*   Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
4336*
4337*   @brief
4338*       Internal function to calculate address from coord for micro tiled swizzle surface
4339*
4340*   @return
4341*       ADDR_E_RETURNCODE
4342************************************************************************************************************************
4343*/
4344ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
4345     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4346     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4347     ) const
4348{
4349    ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4350    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4351    ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4352
4353    localIn.swizzleMode  = pIn->swizzleMode;
4354    localIn.flags        = pIn->flags;
4355    localIn.resourceType = pIn->resourceType;
4356    localIn.bpp          = pIn->bpp;
4357    localIn.width        = Max(pIn->unalignedWidth,  1u);
4358    localIn.height       = Max(pIn->unalignedHeight, 1u);
4359    localIn.numSlices    = Max(pIn->numSlices,       1u);
4360    localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4361    localIn.numSamples   = Max(pIn->numSamples,      1u);
4362    localIn.numFrags     = Max(pIn->numFrags,        1u);
4363    localOut.pMipInfo    = mipInfo;
4364
4365    ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
4366
4367    if (ret == ADDR_OK)
4368    {
4369        const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4370        const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
4371        const UINT_32 swMode   = static_cast<UINT_32>(pIn->swizzleMode);
4372        const UINT_32 eqIndex  = m_equationLookupTable[rsrcType][swMode][elemLog2];
4373
4374        if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4375        {
4376            const UINT_32 pb           = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4377            const UINT_32 yb           = pIn->y / localOut.blockHeight;
4378            const UINT_32 xb           = pIn->x / localOut.blockWidth;
4379            const UINT_32 blockIndex   = yb * pb + xb;
4380            const UINT_32 blockSize    = 256;
4381            const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4382                                                                   pIn->x << elemLog2,
4383                                                                   pIn->y,
4384                                                                   0);
4385            pOut->addr = localOut.sliceSize * pIn->slice +
4386                         mipInfo[pIn->mipId].macroBlockOffset +
4387                         (blockIndex * blockSize) +
4388                         blk256Offset;
4389        }
4390        else
4391        {
4392            ret = ADDR_INVALIDPARAMS;
4393        }
4394    }
4395
4396    return ret;
4397}
4398
4399/**
4400************************************************************************************************************************
4401*   Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
4402*
4403*   @brief
4404*       Internal function to calculate address from coord for macro tiled swizzle surface
4405*
4406*   @return
4407*       ADDR_E_RETURNCODE
4408************************************************************************************************************************
4409*/
4410ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4411     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4412     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4413     ) const
4414{
4415    ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4416    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4417    ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4418
4419    localIn.swizzleMode  = pIn->swizzleMode;
4420    localIn.flags        = pIn->flags;
4421    localIn.resourceType = pIn->resourceType;
4422    localIn.bpp          = pIn->bpp;
4423    localIn.width        = Max(pIn->unalignedWidth,  1u);
4424    localIn.height       = Max(pIn->unalignedHeight, 1u);
4425    localIn.numSlices    = Max(pIn->numSlices,       1u);
4426    localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4427    localIn.numSamples   = Max(pIn->numSamples,      1u);
4428    localIn.numFrags     = Max(pIn->numFrags,        1u);
4429    localOut.pMipInfo    = mipInfo;
4430
4431    ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4432
4433    if (ret == ADDR_OK)
4434    {
4435        const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
4436        const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4437        const UINT_32 blkMask     = (1 << blkSizeLog2) - 1;
4438        const UINT_32 pipeMask    = (1 << m_pipesLog2) - 1;
4439        const UINT_32 bankMask    = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4440        const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4441                                    (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4442
4443        if (localIn.numFrags > 1)
4444        {
4445            const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4446                                                                    pIn->resourceType,
4447                                                                    elemLog2,
4448                                                                    localIn.numFrags);
4449
4450            if (pPatInfo != NULL)
4451            {
4452                const UINT_32 pb        = localOut.pitch / localOut.blockWidth;
4453                const UINT_32 yb        = pIn->y / localOut.blockHeight;
4454                const UINT_32 xb        = pIn->x / localOut.blockWidth;
4455                const UINT_64 blkIdx    = yb * pb + xb;
4456
4457                ADDR_BIT_SETTING fullSwizzlePattern[20];
4458                GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4459
4460                const UINT_32 blkOffset =
4461                    ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4462                                                    blkSizeLog2,
4463                                                    pIn->x,
4464                                                    pIn->y,
4465                                                    pIn->slice,
4466                                                    pIn->sample);
4467
4468                pOut->addr = (localOut.sliceSize * pIn->slice) +
4469                             (blkIdx << blkSizeLog2) +
4470                             (blkOffset ^ pipeBankXor);
4471            }
4472            else
4473            {
4474                ret = ADDR_INVALIDPARAMS;
4475            }
4476        }
4477        else
4478        {
4479            const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4480            const UINT_32 swMode  = static_cast<UINT_32>(pIn->swizzleMode);
4481            const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4482
4483            if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4484            {
4485                const BOOL_32 inTail    = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4486                const BOOL_32 isThin    = IsThin(pIn->resourceType, pIn->swizzleMode);
4487                const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4488                const UINT_32 sliceId   = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4489                const UINT_32 x         = inTail ? (pIn->x     + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4490                const UINT_32 y         = inTail ? (pIn->y     + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4491                const UINT_32 z         = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4492                const UINT_32 pb        = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4493                const UINT_32 yb        = pIn->y / localOut.blockHeight;
4494                const UINT_32 xb        = pIn->x / localOut.blockWidth;
4495                const UINT_64 blkIdx    = yb * pb + xb;
4496                const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4497                                                                    x << elemLog2,
4498                                                                    y,
4499                                                                    z);
4500                pOut->addr = sliceSize * sliceId +
4501                             mipInfo[pIn->mipId].macroBlockOffset +
4502                             (blkIdx << blkSizeLog2) +
4503                             (blkOffset ^ pipeBankXor);
4504            }
4505            else
4506            {
4507                ret = ADDR_INVALIDPARAMS;
4508            }
4509        }
4510    }
4511
4512    return ret;
4513}
4514
4515/**
4516************************************************************************************************************************
4517*   Gfx10Lib::HwlComputeMaxBaseAlignments
4518*
4519*   @brief
4520*       Gets maximum alignments
4521*   @return
4522*       maximum alignments
4523************************************************************************************************************************
4524*/
4525UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4526{
4527    return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4528}
4529
4530/**
4531************************************************************************************************************************
4532*   Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4533*
4534*   @brief
4535*       Gets maximum alignments for metadata
4536*   @return
4537*       maximum alignments for metadata
4538************************************************************************************************************************
4539*/
4540UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4541{
4542    Dim3d metaBlk;
4543
4544    const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4545    {
4546        ADDR_SW_64KB_Z_X,
4547        m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4548    };
4549
4550    UINT_32 maxBaseAlignHtile = 0;
4551    UINT_32 maxBaseAlignCmask = 0;
4552
4553    for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4554    {
4555        for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4556        {
4557            for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4558            {
4559                // Max base alignment for Htile
4560                const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4561                                                                ADDR_RSRC_TEX_2D,
4562                                                                ValidSwizzleModeForXmask[swIdx],
4563                                                                bppLog2,
4564                                                                numFragLog2,
4565                                                                TRUE,
4566                                                                &metaBlk);
4567
4568                maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4569            }
4570        }
4571
4572        // Max base alignment for Cmask
4573        const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4574                                                        ADDR_RSRC_TEX_2D,
4575                                                        ValidSwizzleModeForXmask[swIdx],
4576                                                        0,
4577                                                        0,
4578                                                        TRUE,
4579                                                        &metaBlk);
4580
4581        maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4582    }
4583
4584    // Max base alignment for 2D Dcc
4585    const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4586    {
4587        ADDR_SW_64KB_S_X,
4588        ADDR_SW_64KB_D_X,
4589        ADDR_SW_64KB_R_X,
4590        m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4591    };
4592
4593    UINT_32 maxBaseAlignDcc2D = 0;
4594
4595    for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4596    {
4597        for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4598        {
4599            for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4600            {
4601                const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4602                                                             ADDR_RSRC_TEX_2D,
4603                                                             ValidSwizzleModeForDcc2D[swIdx],
4604                                                             bppLog2,
4605                                                             numFragLog2,
4606                                                             TRUE,
4607                                                             &metaBlk);
4608
4609                maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4610            }
4611        }
4612    }
4613
4614    // Max base alignment for 3D Dcc
4615    const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4616    {
4617        ADDR_SW_64KB_Z_X,
4618        ADDR_SW_64KB_S_X,
4619        ADDR_SW_64KB_D_X,
4620        ADDR_SW_64KB_R_X,
4621        m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4622    };
4623
4624    UINT_32 maxBaseAlignDcc3D = 0;
4625
4626    for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4627    {
4628        for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4629        {
4630            const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4631                                                         ADDR_RSRC_TEX_3D,
4632                                                         ValidSwizzleModeForDcc3D[swIdx],
4633                                                         bppLog2,
4634                                                         0,
4635                                                         TRUE,
4636                                                         &metaBlk);
4637
4638            maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4639        }
4640    }
4641
4642    return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4643}
4644
4645/**
4646************************************************************************************************************************
4647*   Gfx10Lib::GetMetaElementSizeLog2
4648*
4649*   @brief
4650*       Gets meta data element size log2
4651*   @return
4652*       Meta data element size log2
4653************************************************************************************************************************
4654*/
4655INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4656    Gfx10DataType dataType) ///< Data surface type
4657{
4658    INT_32 elemSizeLog2 = 0;
4659
4660    if (dataType == Gfx10DataColor)
4661    {
4662        elemSizeLog2 = 0;
4663    }
4664    else if (dataType == Gfx10DataDepthStencil)
4665    {
4666        elemSizeLog2 = 2;
4667    }
4668    else
4669    {
4670        ADDR_ASSERT(dataType == Gfx10DataFmask);
4671        elemSizeLog2 = -1;
4672    }
4673
4674    return elemSizeLog2;
4675}
4676
4677/**
4678************************************************************************************************************************
4679*   Gfx10Lib::GetMetaCacheSizeLog2
4680*
4681*   @brief
4682*       Gets meta data cache line size log2
4683*   @return
4684*       Meta data cache line size log2
4685************************************************************************************************************************
4686*/
4687INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4688    Gfx10DataType dataType) ///< Data surface type
4689{
4690    INT_32 cacheSizeLog2 = 0;
4691
4692    if (dataType == Gfx10DataColor)
4693    {
4694        cacheSizeLog2 = 6;
4695    }
4696    else if (dataType == Gfx10DataDepthStencil)
4697    {
4698        cacheSizeLog2 = 8;
4699    }
4700    else
4701    {
4702        ADDR_ASSERT(dataType == Gfx10DataFmask);
4703        cacheSizeLog2 = 8;
4704    }
4705    return cacheSizeLog2;
4706}
4707
4708/**
4709************************************************************************************************************************
4710*   Gfx10Lib::HwlComputeSurfaceInfoLinear
4711*
4712*   @brief
4713*       Internal function to calculate alignment for linear surface
4714*
4715*   @return
4716*       ADDR_E_RETURNCODE
4717************************************************************************************************************************
4718*/
4719ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4720     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4721     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4722     ) const
4723{
4724    ADDR_E_RETURNCODE returnCode = ADDR_OK;
4725
4726    if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4727    {
4728        returnCode = ADDR_INVALIDPARAMS;
4729    }
4730    else
4731    {
4732        const UINT_32 elementBytes = pIn->bpp >> 3;
4733        const UINT_32 pitchAlign   = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4734        const UINT_32 mipDepth     = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4735        UINT_32       pitch        = PowTwoAlign(pIn->width, pitchAlign);
4736        UINT_32       actualHeight = pIn->height;
4737        UINT_64       sliceSize    = 0;
4738
4739        if (pIn->numMipLevels > 1)
4740        {
4741            for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4742            {
4743                UINT_32 mipWidth, mipHeight;
4744
4745                GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4746
4747                const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4748
4749                if (pOut->pMipInfo != NULL)
4750                {
4751                    pOut->pMipInfo[i].pitch            = mipActualWidth;
4752                    pOut->pMipInfo[i].height           = mipHeight;
4753                    pOut->pMipInfo[i].depth            = mipDepth;
4754                    pOut->pMipInfo[i].offset           = sliceSize;
4755                    pOut->pMipInfo[i].mipTailOffset    = 0;
4756                    pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4757                }
4758
4759                sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4760            }
4761        }
4762        else
4763        {
4764            returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4765
4766            if (returnCode == ADDR_OK)
4767            {
4768                sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4769
4770                if (pOut->pMipInfo != NULL)
4771                {
4772                    pOut->pMipInfo[0].pitch            = pitch;
4773                    pOut->pMipInfo[0].height           = actualHeight;
4774                    pOut->pMipInfo[0].depth            = mipDepth;
4775                    pOut->pMipInfo[0].offset           = 0;
4776                    pOut->pMipInfo[0].mipTailOffset    = 0;
4777                    pOut->pMipInfo[0].macroBlockOffset = 0;
4778                }
4779            }
4780        }
4781
4782        if (returnCode == ADDR_OK)
4783        {
4784            pOut->pitch          = pitch;
4785            pOut->height         = actualHeight;
4786            pOut->numSlices      = pIn->numSlices;
4787            pOut->sliceSize      = sliceSize;
4788            pOut->surfSize       = sliceSize * pOut->numSlices;
4789            pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4790            pOut->blockWidth     = pitchAlign;
4791            pOut->blockHeight    = 1;
4792            pOut->blockSlices    = 1;
4793
4794            // Following members are useless on GFX10
4795            pOut->mipChainPitch  = 0;
4796            pOut->mipChainHeight = 0;
4797            pOut->mipChainSlice  = 0;
4798            pOut->epitchIsHeight = FALSE;
4799
4800            // Post calculation validate
4801            ADDR_ASSERT(pOut->sliceSize > 0);
4802        }
4803    }
4804
4805    return returnCode;
4806}
4807
4808} // V2
4809} // Addr
4810