1/****************************************************************************
2* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3*
4* Permission is hereby granted, free of charge, to any person obtaining a
5* copy of this software and associated documentation files (the "Software"),
6* to deal in the Software without restriction, including without limitation
7* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8* and/or sell copies of the Software, and to permit persons to whom the
9* Software is furnished to do so, subject to the following conditions:
10*
11* The above copyright notice and this permission notice (including the next
12* paragraph) shall be included in all copies or substantial portions of the
13* Software.
14*
15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21* IN THE SOFTWARE.
22*
23* @file TilingFunctions.h
24*
25* @brief Tiling functions.
26*
27******************************************************************************/
28#pragma once
29
30#include "core/state.h"
31#include "core/format_traits.h"
32#include "memory/tilingtraits.h"
33#include "memory/SurfaceState.h"
34
35#include <algorithm>
36
37#define MAX_NUM_LOD 15
38
39#define GFX_ALIGN(x, a) (((x) + ((a) - 1)) - (((x) + ((a) - 1)) & ((a) - 1))) // Alt implementation with bitwise not (~) has issue with uint32 align used with 64-bit value, since ~'ed value will remain 32-bit.
40
41//////////////////////////////////////////////////////////////////////////
42/// SimdTile SSE(2x2), AVX(4x2), or AVX-512(4x4?)
43//////////////////////////////////////////////////////////////////////////
44template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
45struct SimdTile
46{
47    // SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
48    float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD_WIDTH];
49
50    //////////////////////////////////////////////////////////////////////////
51    /// @brief Retrieve color from simd.
52    /// @param index - linear index to color within simd.
53    /// @param outputColor - output color
54    INLINE void GetSwizzledColor(
55        uint32_t index,
56        float outputColor[4])
57    {
58        // SOA pattern for 2x2 is a subset of 4x2.
59        //   0 1 4 5
60        //   2 3 6 7
61        // The offset converts pattern to linear
62#if (SIMD_TILE_X_DIM == 4)
63        static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
64#elif (SIMD_TILE_X_DIM == 2)
65        static const uint32_t offset[] = { 0, 1, 2, 3 };
66#endif
67
68        for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
69        {
70            outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
71        }
72    }
73
74    //////////////////////////////////////////////////////////////////////////
75    /// @brief Retrieve color from simd.
76    /// @param index - linear index to color within simd.
77    /// @param outputColor - output color
78    INLINE void SetSwizzledColor(
79        uint32_t index,
80        const float src[4])
81    {
82        // SOA pattern for 2x2 is a subset of 4x2.
83        //   0 1 4 5
84        //   2 3 6 7
85        // The offset converts pattern to linear
86#if (SIMD_TILE_X_DIM == 4)
87        static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
88#elif (SIMD_TILE_X_DIM == 2)
89        static const uint32_t offset[] = { 0, 1, 2, 3 };
90#endif
91
92        // Only loop over the components needed for destination.
93        for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
94        {
95            this->color[i][offset[index]] = src[i];
96        }
97    }
98};
99
100template<>
101struct SimdTile <R8_UINT,R8_UINT>
102{
103    // SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
104    uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD_WIDTH];
105
106    //////////////////////////////////////////////////////////////////////////
107    /// @brief Retrieve color from simd.
108    /// @param index - linear index to color within simd.
109    /// @param outputColor - output color
110    INLINE void GetSwizzledColor(
111        uint32_t index,
112        float outputColor[4])
113    {
114        // SOA pattern for 2x2 is a subset of 4x2.
115        //   0 1 4 5
116        //   2 3 6 7
117        // The offset converts pattern to linear
118#if (SIMD_TILE_X_DIM == 4)
119        static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
120#elif (SIMD_TILE_X_DIM == 2)
121        static const uint32_t offset[] = { 0, 1, 2, 3 };
122#endif
123
124        for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
125        {
126            uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
127            outputColor[i] = *(float*)&src;
128        }
129    }
130
131    //////////////////////////////////////////////////////////////////////////
132    /// @brief Retrieve color from simd.
133    /// @param index - linear index to color within simd.
134    /// @param outputColor - output color
135    INLINE void SetSwizzledColor(
136        uint32_t index,
137        const float src[4])
138    {
139        // SOA pattern for 2x2 is a subset of 4x2.
140        //   0 1 4 5
141        //   2 3 6 7
142        // The offset converts pattern to linear
143#if (SIMD_TILE_X_DIM == 4)
144        static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
145#elif (SIMD_TILE_X_DIM == 2)
146        static const uint32_t offset[] = { 0, 1, 2, 3 };
147#endif
148
149        // Only loop over the components needed for destination.
150        for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
151        {
152            this->color[i][offset[index]] = *(uint8_t*)&src[i];
153        }
154    }
155};
156
157//////////////////////////////////////////////////////////////////////////
158/// SimdTile 8x2 for AVX-512
159//////////////////////////////////////////////////////////////////////////
160
161template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
162struct SimdTile_16
163{
164    // SimdTile is SOA (e.g. rrrrrrrrrrrrrrrr gggggggggggggggg bbbbbbbbbbbbbbbb aaaaaaaaaaaaaaaa )
165    float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD16_WIDTH];
166
167    //////////////////////////////////////////////////////////////////////////
168    /// @brief Retrieve color from simd.
169    /// @param index - linear index to color within simd.
170    /// @param outputColor - output color
171    INLINE void GetSwizzledColor(
172        uint32_t index,
173        float outputColor[4])
174    {
175        // SOA pattern for 8x2..
176        //   0 1 4 5 8 9 C D
177        //   2 3 6 7 A B E F
178        // The offset converts pattern to linear
179        static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
180
181        for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
182        {
183            outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
184        }
185    }
186
187    //////////////////////////////////////////////////////////////////////////
188    /// @brief Retrieve color from simd.
189    /// @param index - linear index to color within simd.
190    /// @param outputColor - output color
191    INLINE void SetSwizzledColor(
192        uint32_t index,
193        const float src[4])
194    {
195        // SOA pattern for 8x2..
196        //   0 1 4 5 8 9 C D
197        //   2 3 6 7 A B E F
198        // The offset converts pattern to linear
199        static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
200
201        for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
202        {
203            this->color[i][offset[index]] = src[i];
204        }
205    }
206};
207
208template<>
209struct SimdTile_16 <R8_UINT, R8_UINT>
210{
211    // SimdTile is SOA (e.g. rrrrrrrrrrrrrrrr gggggggggggggggg bbbbbbbbbbbbbbbb aaaaaaaaaaaaaaaa )
212    uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD16_WIDTH];
213
214    //////////////////////////////////////////////////////////////////////////
215    /// @brief Retrieve color from simd.
216    /// @param index - linear index to color within simd.
217    /// @param outputColor - output color
218    INLINE void GetSwizzledColor(
219        uint32_t index,
220        float outputColor[4])
221    {
222        // SOA pattern for 8x2..
223        //   0 1 4 5 8 9 C D
224        //   2 3 6 7 A B E F
225        // The offset converts pattern to linear
226        static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
227
228        for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
229        {
230            uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
231            outputColor[i] = *(float*)&src;
232        }
233    }
234
235    //////////////////////////////////////////////////////////////////////////
236    /// @brief Retrieve color from simd.
237    /// @param index - linear index to color within simd.
238    /// @param outputColor - output color
239    INLINE void SetSwizzledColor(
240        uint32_t index,
241        const float src[4])
242    {
243        // SOA pattern for 8x2..
244        //   0 1 4 5 8 9 C D
245        //   2 3 6 7 A B E F
246        // The offset converts pattern to linear
247        static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
248
249        for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
250        {
251            this->color[i][offset[index]] = *(uint8_t*)&src[i];
252        }
253    }
254};
255
256//////////////////////////////////////////////////////////////////////////
257/// @brief Computes lod offset for 1D surface at specified lod.
258/// @param baseWidth - width of basemip (mip 0).
259/// @param hAlign - horizontal alignment per miip, in texels
260/// @param lod - lod index
261/// @param offset - output offset.
262INLINE void ComputeLODOffset1D(
263    const SWR_FORMAT_INFO& info,
264    uint32_t baseWidth,
265    uint32_t hAlign,
266    uint32_t lod,
267    uint32_t &offset)
268{
269    if (lod == 0)
270    {
271        offset = 0;
272    }
273    else
274    {
275        uint32_t curWidth = baseWidth;
276        // @note hAlign is already in blocks for compressed formats so upconvert
277        //       so that we have the desired alignment post-divide.
278        if (info.isBC)
279        {
280            hAlign *= info.bcWidth;
281        }
282
283        offset = GFX_ALIGN(curWidth, hAlign);
284        for (uint32_t l = 1; l < lod; ++l)
285        {
286            curWidth = std::max<uint32_t>(curWidth >> 1, 1U);
287            offset += GFX_ALIGN(curWidth, hAlign);
288        }
289
290        if (info.isSubsampled || info.isBC)
291        {
292            offset /= info.bcWidth;
293        }
294    }
295}
296
297//////////////////////////////////////////////////////////////////////////
298/// @brief Computes x lod offset for 2D surface at specified lod.
299/// @param baseWidth - width of basemip (mip 0).
300/// @param hAlign - horizontal alignment per mip, in texels
301/// @param lod - lod index
302/// @param offset - output offset.
303INLINE void ComputeLODOffsetX(
304    const SWR_FORMAT_INFO& info,
305    uint32_t baseWidth,
306    uint32_t hAlign,
307    uint32_t lod,
308    uint32_t &offset)
309{
310    if (lod < 2)
311    {
312        offset = 0;
313    }
314    else
315    {
316        uint32_t curWidth = baseWidth;
317        // @note hAlign is already in blocks for compressed formats so upconvert
318        //       so that we have the desired alignment post-divide.
319        if (info.isBC)
320        {
321            hAlign *= info.bcWidth;
322        }
323
324        curWidth = std::max<uint32_t>(curWidth >> 1, 1U);
325        curWidth = GFX_ALIGN(curWidth, hAlign);
326
327        if (info.isSubsampled || info.isBC)
328        {
329            curWidth /= info.bcWidth;
330        }
331
332        offset = curWidth;
333    }
334}
335
336//////////////////////////////////////////////////////////////////////////
337/// @brief Computes y lod offset for 2D surface at specified lod.
338/// @param baseWidth - width of basemip (mip 0).
339/// @param vAlign - vertical alignment per mip, in rows
340/// @param lod - lod index
341/// @param offset - output offset.
342INLINE void ComputeLODOffsetY(
343    const SWR_FORMAT_INFO& info,
344    uint32_t baseHeight,
345    uint32_t vAlign,
346    uint32_t lod,
347    uint32_t &offset)
348{
349    if (lod == 0)
350    {
351        offset = 0;
352    }
353    else
354    {
355        offset = 0;
356        uint32_t mipHeight = baseHeight;
357
358        // @note vAlign is already in blocks for compressed formats so upconvert
359        //       so that we have the desired alignment post-divide.
360        if (info.isBC)
361        {
362            vAlign *= info.bcHeight;
363        }
364
365        for (uint32_t l = 1; l <= lod; ++l)
366        {
367            uint32_t alignedMipHeight = GFX_ALIGN(mipHeight, vAlign);
368            offset += ((l != 2) ? alignedMipHeight : 0);
369            mipHeight = std::max<uint32_t>(mipHeight >> 1, 1U);
370        }
371
372        if (info.isBC)
373        {
374            offset /= info.bcHeight;
375        }
376    }
377}
378
379//////////////////////////////////////////////////////////////////////////
380/// @brief Computes 1D surface offset
381/// @param x - offset from start of array slice at given lod.
382/// @param array - array slice index
383/// @param lod - lod index
384/// @param pState - surface state
385/// @param xOffsetBytes - output offset in bytes.
386template<bool UseCachedOffsets>
387INLINE void ComputeSurfaceOffset1D(
388    uint32_t x,
389    uint32_t array,
390    uint32_t lod,
391    const SWR_SURFACE_STATE *pState,
392    uint32_t &xOffsetBytes)
393{
394    const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
395    uint32_t lodOffset;
396
397    if (UseCachedOffsets)
398    {
399        lodOffset = pState->lodOffsets[0][lod];
400    }
401    else
402    {
403        ComputeLODOffset1D(info, pState->width, pState->halign, lod, lodOffset);
404    }
405
406    xOffsetBytes = (array * pState->qpitch + lodOffset + x) * info.Bpp;
407}
408
409//////////////////////////////////////////////////////////////////////////
410/// @brief Adjusts the array slice for legacy TileY MSAA
411/// @param pState - surface state
412/// @param array - array slice index
413/// @param sampleNum - requested sample
414INLINE void AdjustCoordsForMSAA(const SWR_SURFACE_STATE *pState, uint32_t& x, uint32_t& y, uint32_t& arrayIndex, uint32_t sampleNum)
415{
416    /// @todo: might want to templatize adjusting for sample slices when we support tileYS/tileYF.
417    if((pState->tileMode == SWR_TILE_MODE_YMAJOR ||
418        pState->tileMode == SWR_TILE_MODE_WMAJOR) &&
419       pState->bInterleavedSamples)
420    {
421        uint32_t newX, newY, newSampleX, newSampleY;
422        switch(pState->numSamples)
423        {
424        case 1:
425            newX = x;
426            newY = y;
427            newSampleX = newSampleY = 0;
428            break;
429        case 2:
430        {
431            assert(pState->type == SURFACE_2D);
432            static const uint32_t xMask = 0xFFFFFFFD;
433            static const uint32_t sampleMaskX = 0x1;
434            newX = pdep_u32(x, xMask);
435            newY = y;
436            newSampleX = pext_u32(sampleNum, sampleMaskX);
437            newSampleY = 0;
438        }
439            break;
440        case 4:
441        {
442            assert(pState->type == SURFACE_2D);
443            static const uint32_t mask = 0xFFFFFFFD;
444            static const uint32_t sampleMaskX = 0x1;
445            static const uint32_t sampleMaskY = 0x2;
446            newX = pdep_u32(x, mask);
447            newY = pdep_u32(y, mask);
448            newSampleX = pext_u32(sampleNum, sampleMaskX);
449            newSampleY = pext_u32(sampleNum, sampleMaskY);
450        }
451            break;
452        case 8:
453        {
454            assert(pState->type == SURFACE_2D);
455            static const uint32_t xMask = 0xFFFFFFF9;
456            static const uint32_t yMask = 0xFFFFFFFD;
457            static const uint32_t sampleMaskX = 0x5;
458            static const uint32_t sampleMaskY = 0x2;
459            newX = pdep_u32(x, xMask);
460            newY = pdep_u32(y, yMask);
461            newSampleX = pext_u32(sampleNum, sampleMaskX);
462            newSampleY = pext_u32(sampleNum, sampleMaskY);
463        }
464            break;
465        case 16:
466        {
467            assert(pState->type == SURFACE_2D);
468            static const uint32_t mask = 0xFFFFFFF9;
469            static const uint32_t sampleMaskX = 0x5;
470            static const uint32_t sampleMaskY = 0xA;
471            newX = pdep_u32(x, mask);
472            newY = pdep_u32(y, mask);
473            newSampleX = pext_u32(sampleNum, sampleMaskX);
474            newSampleY = pext_u32(sampleNum, sampleMaskY);
475        }
476            break;
477        default:
478            assert(0 && "Unsupported sample count");
479            newX = newY = 0;
480            newSampleX = newSampleY = 0;
481            break;
482        }
483        x = newX | (newSampleX << 1);
484        y = newY | (newSampleY << 1);
485    }
486    else if(pState->tileMode == SWR_TILE_MODE_YMAJOR ||
487            pState->tileMode == SWR_TILE_NONE)
488    {
489        uint32_t sampleShift;
490        switch(pState->numSamples)
491        {
492        case 1:
493            assert(sampleNum == 0);
494            sampleShift = 0;
495            break;
496        case 2:
497            assert(pState->type == SURFACE_2D);
498            sampleShift = 1;
499            break;
500        case 4:
501            assert(pState->type == SURFACE_2D);
502            sampleShift = 2;
503            break;
504        case 8:
505            assert(pState->type == SURFACE_2D);
506            sampleShift = 3;
507            break;
508        case 16:
509            assert(pState->type == SURFACE_2D);
510            sampleShift = 4;
511            break;
512        default:
513            assert(0 && "Unsupported sample count");
514            sampleShift = 0;
515            break;
516        }
517        arrayIndex = (arrayIndex << sampleShift) | sampleNum;
518    }
519}
520
521//////////////////////////////////////////////////////////////////////////
522/// @brief Computes 2D surface offset
523/// @param x - horizontal offset from start of array slice and lod.
524/// @param y - vertical offset from start of array slice and lod.
525/// @param array - array slice index
526/// @param lod - lod index
527/// @param pState - surface state
528/// @param xOffsetBytes - output x offset in bytes.
529/// @param yOffsetRows - output y offset in bytes.
530template<bool UseCachedOffsets>
531INLINE void ComputeSurfaceOffset2D(uint32_t x, uint32_t y, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows)
532{
533    const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
534    uint32_t lodOffsetX, lodOffsetY;
535
536    if (UseCachedOffsets)
537    {
538        lodOffsetX = pState->lodOffsets[0][lod];
539        lodOffsetY = pState->lodOffsets[1][lod];
540    }
541    else
542    {
543        ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
544        ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
545    }
546
547    AdjustCoordsForMSAA(pState, x, y, array, sampleNum);
548    xOffsetBytes = (x + lodOffsetX + pState->xOffset) * info.Bpp;
549    yOffsetRows = (array * pState->qpitch) + lodOffsetY + y + pState->yOffset;
550}
551
552//////////////////////////////////////////////////////////////////////////
553/// @brief Computes 3D surface offset
554/// @param x - horizontal offset from start of array slice and lod.
555/// @param y - vertical offset from start of array slice and lod.
556/// @param z - depth offset from start of array slice and lod.
557/// @param lod - lod index
558/// @param pState - surface state
559/// @param xOffsetBytes - output x offset in bytes.
560/// @param yOffsetRows - output y offset in rows.
561/// @param zOffsetSlices - output y offset in slices.
562template<bool UseCachedOffsets>
563INLINE void ComputeSurfaceOffset3D(uint32_t x, uint32_t y, uint32_t z, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows, uint32_t &zOffsetSlices)
564{
565    const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
566    uint32_t lodOffsetX, lodOffsetY;
567
568    if (UseCachedOffsets)
569    {
570        lodOffsetX = pState->lodOffsets[0][lod];
571        lodOffsetY = pState->lodOffsets[1][lod];
572    }
573    else
574    {
575        ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
576        ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
577    }
578
579    xOffsetBytes = (x + lodOffsetX) * info.Bpp;
580    yOffsetRows = lodOffsetY + y;
581    zOffsetSlices = z;
582}
583
584//////////////////////////////////////////////////////////////////////////
585/// @brief Swizzles the linear x,y offsets depending on surface tiling mode
586///        and returns final surface address
587/// @param xOffsetBytes - x offset from base of surface in bytes
588/// @param yOffsetRows - y offset from base of surface in rows
589/// @param pState - pointer to the surface state
590template<typename TTraits>
591INLINE uint32_t ComputeTileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
592{
593    return ComputeOffset2D<TTraits>(pState->pitch, xOffsetBytes, yOffsetRows);
594}
595
596//////////////////////////////////////////////////////////////////////////
597/// @brief Swizzles the linear x,y offsets depending on surface tiling mode
598///        and returns final surface address
599/// @param xOffsetBytes - x offset from base of surface in bytes
600/// @param yOffsetRows - y offset from base of surface in rows
601/// @param pState - pointer to the surface state
602template<typename TTraits>
603INLINE uint32_t ComputeTileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
604{
605    return ComputeOffset3D<TTraits>(pState->qpitch, pState->pitch, xOffsetBytes, yOffsetRows, zOffsetSlices);
606}
607
608//////////////////////////////////////////////////////////////////////////
609/// @brief Swizzles the linear x,y offsets depending on surface tiling mode
610///        and returns final surface address
611/// @param xOffsetBytes - x offset from base of surface in bytes
612/// @param yOffsetRows - y offset from base of surface in rows
613/// @param pState - pointer to the surface state
614INLINE
615uint32_t TileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
616{
617    switch (pState->tileMode)
618    {
619    case SWR_TILE_NONE: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, pState);
620    case SWR_TILE_SWRZ: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, pState);
621    case SWR_TILE_MODE_XMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_XMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
622    case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, pState);
623    case SWR_TILE_MODE_WMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_WMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
624    default: SWR_INVALID("Unsupported tiling mode");
625    }
626    return 0;
627}
628
629//////////////////////////////////////////////////////////////////////////
630/// @brief Swizzles the linear x,y,z offsets depending on surface tiling mode
631///        and returns final surface address
632/// @param xOffsetBytes - x offset from base of surface in bytes
633/// @param yOffsetRows - y offset from base of surface in rows
634/// @param zOffsetSlices - z offset from base of surface in slices
635/// @param pState - pointer to the surface state
636INLINE
637uint32_t TileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
638{
639    switch (pState->tileMode)
640    {
641    case SWR_TILE_NONE: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
642    case SWR_TILE_SWRZ: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
643    case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
644    default: SWR_INVALID("Unsupported tiling mode");
645    }
646    return 0;
647}
648
649template<bool UseCachedOffsets>
650INLINE
651uint32_t ComputeSurfaceOffset(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
652{
653    uint32_t offsetX = 0, offsetY = 0, offsetZ = 0;
654    switch (pState->type)
655    {
656    case SURFACE_BUFFER:
657    case SURFACE_STRUCTURED_BUFFER:
658        offsetX = x * pState->pitch;
659        return offsetX;
660        break;
661    case SURFACE_1D:
662        ComputeSurfaceOffset1D<UseCachedOffsets>(x, array, lod, pState, offsetX);
663        return TileSwizzle2D(offsetX, 0, pState);
664        break;
665    case SURFACE_2D:
666        ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
667        return TileSwizzle2D(offsetX, offsetY, pState);
668    case SURFACE_3D:
669        ComputeSurfaceOffset3D<UseCachedOffsets>(x, y, z, lod, pState, offsetX, offsetY, offsetZ);
670        return TileSwizzle3D(offsetX, offsetY, offsetZ, pState);
671        break;
672    case SURFACE_CUBE:
673        ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
674        return TileSwizzle2D(offsetX, offsetY, pState);
675        break;
676    default: SWR_INVALID("Unsupported format");
677    }
678
679    return 0;
680}
681
682typedef void*(*PFN_COMPUTESURFADDR)(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, const SWR_SURFACE_STATE*);
683
684//////////////////////////////////////////////////////////////////////////
685/// @brief Computes surface address at the given location and lod
686/// @param x - x location in pixels
687/// @param y - y location in rows
688/// @param z - z location for 3D surfaces
689/// @param array - array slice for 1D and 2D surfaces
690/// @param lod - level of detail
691/// @param pState - pointer to the surface state
692template<bool UseCachedOffsets, bool IsRead>
693INLINE
694void* ComputeSurfaceAddress(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
695{
696    return (void*)(pState->xpBaseAddress + ComputeSurfaceOffset<UseCachedOffsets>(x, y, z, array, sampleNum, lod, pState));
697}
698