1/****************************************************************************
2* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3*
4* Permission is hereby granted, free of charge, to any person obtaining a
5* copy of this software and associated documentation files (the "Software"),
6* to deal in the Software without restriction, including without limitation
7* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8* and/or sell copies of the Software, and to permit persons to whom the
9* Software is furnished to do so, subject to the following conditions:
10*
11* The above copyright notice and this permission notice (including the next
12* paragraph) shall be included in all copies or substantial portions of the
13* Software.
14*
15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21* IN THE SOFTWARE.
22*
23* @file TilingFunctions.h
24*
25* @brief Tiling functions.
26*
27******************************************************************************/
28#pragma once
29
30#include "core/state.h"
31#include "core/format_traits.h"
32#include "memory/tilingtraits.h"
33
34#include <algorithm>
35
36#define MAX_NUM_LOD 15
37
38#define GFX_ALIGN(x, a) (((x) + ((a) - 1)) - (((x) + ((a) - 1)) & ((a) - 1))) // Alt implementation with bitwise not (~) has issue with uint32 align used with 64-bit value, since ~'ed value will remain 32-bit.
39
40//////////////////////////////////////////////////////////////////////////
41/// SimdTile SSE(2x2), AVX(4x2), or AVX-512(4x4?)
42//////////////////////////////////////////////////////////////////////////
43template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
44struct SimdTile
45{
46    // SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
47    float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD_WIDTH];
48
49    //////////////////////////////////////////////////////////////////////////
50    /// @brief Retrieve color from simd.
51    /// @param index - linear index to color within simd.
52    /// @param outputColor - output color
53    INLINE void GetSwizzledColor(
54        uint32_t index,
55        float outputColor[4])
56    {
57        // SOA pattern for 2x2 is a subset of 4x2.
58        //   0 1 4 5
59        //   2 3 6 7
60        // The offset converts pattern to linear
61#if (SIMD_TILE_X_DIM == 4)
62        static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
63#elif (SIMD_TILE_X_DIM == 2)
64        static const uint32_t offset[] = { 0, 1, 2, 3 };
65#endif
66
67        for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
68        {
69            outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
70        }
71    }
72
73    //////////////////////////////////////////////////////////////////////////
74    /// @brief Retrieve color from simd.
75    /// @param index - linear index to color within simd.
76    /// @param outputColor - output color
77    INLINE void SetSwizzledColor(
78        uint32_t index,
79        const float src[4])
80    {
81        // SOA pattern for 2x2 is a subset of 4x2.
82        //   0 1 4 5
83        //   2 3 6 7
84        // The offset converts pattern to linear
85#if (SIMD_TILE_X_DIM == 4)
86        static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
87#elif (SIMD_TILE_X_DIM == 2)
88        static const uint32_t offset[] = { 0, 1, 2, 3 };
89#endif
90
91        // Only loop over the components needed for destination.
92        for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
93        {
94            this->color[i][offset[index]] = src[i];
95        }
96    }
97};
98
99template<>
100struct SimdTile <R8_UINT,R8_UINT>
101{
102    // SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
103    uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD_WIDTH];
104
105    //////////////////////////////////////////////////////////////////////////
106    /// @brief Retrieve color from simd.
107    /// @param index - linear index to color within simd.
108    /// @param outputColor - output color
109    INLINE void GetSwizzledColor(
110        uint32_t index,
111        float outputColor[4])
112    {
113        // SOA pattern for 2x2 is a subset of 4x2.
114        //   0 1 4 5
115        //   2 3 6 7
116        // The offset converts pattern to linear
117#if (SIMD_TILE_X_DIM == 4)
118        static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
119#elif (SIMD_TILE_X_DIM == 2)
120        static const uint32_t offset[] = { 0, 1, 2, 3 };
121#endif
122
123        for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
124        {
125            uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
126            outputColor[i] = *(float*)&src;
127        }
128    }
129
130    //////////////////////////////////////////////////////////////////////////
131    /// @brief Retrieve color from simd.
132    /// @param index - linear index to color within simd.
133    /// @param outputColor - output color
134    INLINE void SetSwizzledColor(
135        uint32_t index,
136        const float src[4])
137    {
138        // SOA pattern for 2x2 is a subset of 4x2.
139        //   0 1 4 5
140        //   2 3 6 7
141        // The offset converts pattern to linear
142#if (SIMD_TILE_X_DIM == 4)
143        static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
144#elif (SIMD_TILE_X_DIM == 2)
145        static const uint32_t offset[] = { 0, 1, 2, 3 };
146#endif
147
148        // Only loop over the components needed for destination.
149        for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
150        {
151            this->color[i][offset[index]] = *(uint8_t*)&src[i];
152        }
153    }
154};
155
156//////////////////////////////////////////////////////////////////////////
157/// SimdTile 8x2 for AVX-512
158//////////////////////////////////////////////////////////////////////////
159
160template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
161struct SimdTile_16
162{
163    // SimdTile is SOA (e.g. rrrrrrrrrrrrrrrr gggggggggggggggg bbbbbbbbbbbbbbbb aaaaaaaaaaaaaaaa )
164    float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD16_WIDTH];
165
166    //////////////////////////////////////////////////////////////////////////
167    /// @brief Retrieve color from simd.
168    /// @param index - linear index to color within simd.
169    /// @param outputColor - output color
170    INLINE void GetSwizzledColor(
171        uint32_t index,
172        float outputColor[4])
173    {
174        // SOA pattern for 8x2..
175        //   0 1 4 5 8 9 C D
176        //   2 3 6 7 A B E F
177        // The offset converts pattern to linear
178        static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
179
180        for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
181        {
182            outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
183        }
184    }
185
186    //////////////////////////////////////////////////////////////////////////
187    /// @brief Retrieve color from simd.
188    /// @param index - linear index to color within simd.
189    /// @param outputColor - output color
190    INLINE void SetSwizzledColor(
191        uint32_t index,
192        const float src[4])
193    {
194        // SOA pattern for 8x2..
195        //   0 1 4 5 8 9 C D
196        //   2 3 6 7 A B E F
197        // The offset converts pattern to linear
198        static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
199
200        for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
201        {
202            this->color[i][offset[index]] = src[i];
203        }
204    }
205};
206
207template<>
208struct SimdTile_16 <R8_UINT, R8_UINT>
209{
210    // SimdTile is SOA (e.g. rrrrrrrrrrrrrrrr gggggggggggggggg bbbbbbbbbbbbbbbb aaaaaaaaaaaaaaaa )
211    uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD16_WIDTH];
212
213    //////////////////////////////////////////////////////////////////////////
214    /// @brief Retrieve color from simd.
215    /// @param index - linear index to color within simd.
216    /// @param outputColor - output color
217    INLINE void GetSwizzledColor(
218        uint32_t index,
219        float outputColor[4])
220    {
221        // SOA pattern for 8x2..
222        //   0 1 4 5 8 9 C D
223        //   2 3 6 7 A B E F
224        // The offset converts pattern to linear
225        static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
226
227        for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
228        {
229            uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
230            outputColor[i] = *(float*)&src;
231        }
232    }
233
234    //////////////////////////////////////////////////////////////////////////
235    /// @brief Retrieve color from simd.
236    /// @param index - linear index to color within simd.
237    /// @param outputColor - output color
238    INLINE void SetSwizzledColor(
239        uint32_t index,
240        const float src[4])
241    {
242        // SOA pattern for 8x2..
243        //   0 1 4 5 8 9 C D
244        //   2 3 6 7 A B E F
245        // The offset converts pattern to linear
246        static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
247
248        for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
249        {
250            this->color[i][offset[index]] = *(uint8_t*)&src[i];
251        }
252    }
253};
254
255//////////////////////////////////////////////////////////////////////////
256/// @brief Computes lod offset for 1D surface at specified lod.
257/// @param baseWidth - width of basemip (mip 0).
258/// @param hAlign - horizontal alignment per miip, in texels
259/// @param lod - lod index
260/// @param offset - output offset.
261INLINE void ComputeLODOffset1D(
262    const SWR_FORMAT_INFO& info,
263    uint32_t baseWidth,
264    uint32_t hAlign,
265    uint32_t lod,
266    uint32_t &offset)
267{
268    if (lod == 0)
269    {
270        offset = 0;
271    }
272    else
273    {
274        uint32_t curWidth = baseWidth;
275        // @note hAlign is already in blocks for compressed formats so upconvert
276        //       so that we have the desired alignment post-divide.
277        if (info.isBC)
278        {
279            hAlign *= info.bcWidth;
280        }
281
282        offset = GFX_ALIGN(curWidth, hAlign);
283        for (uint32_t l = 1; l < lod; ++l)
284        {
285            curWidth = std::max<uint32_t>(curWidth >> 1, 1U);
286            offset += GFX_ALIGN(curWidth, hAlign);
287        }
288
289        if (info.isSubsampled || info.isBC)
290        {
291            offset /= info.bcWidth;
292        }
293    }
294}
295
296//////////////////////////////////////////////////////////////////////////
297/// @brief Computes x lod offset for 2D surface at specified lod.
298/// @param baseWidth - width of basemip (mip 0).
299/// @param hAlign - horizontal alignment per mip, in texels
300/// @param lod - lod index
301/// @param offset - output offset.
302INLINE void ComputeLODOffsetX(
303    const SWR_FORMAT_INFO& info,
304    uint32_t baseWidth,
305    uint32_t hAlign,
306    uint32_t lod,
307    uint32_t &offset)
308{
309    if (lod < 2)
310    {
311        offset = 0;
312    }
313    else
314    {
315        uint32_t curWidth = baseWidth;
316        // @note hAlign is already in blocks for compressed formats so upconvert
317        //       so that we have the desired alignment post-divide.
318        if (info.isBC)
319        {
320            hAlign *= info.bcWidth;
321        }
322
323        curWidth = std::max<uint32_t>(curWidth >> 1, 1U);
324        curWidth = GFX_ALIGN(curWidth, hAlign);
325
326        if (info.isSubsampled || info.isBC)
327        {
328            curWidth /= info.bcWidth;
329        }
330
331        offset = curWidth;
332    }
333}
334
335//////////////////////////////////////////////////////////////////////////
336/// @brief Computes y lod offset for 2D surface at specified lod.
337/// @param baseWidth - width of basemip (mip 0).
338/// @param vAlign - vertical alignment per mip, in rows
339/// @param lod - lod index
340/// @param offset - output offset.
341INLINE void ComputeLODOffsetY(
342    const SWR_FORMAT_INFO& info,
343    uint32_t baseHeight,
344    uint32_t vAlign,
345    uint32_t lod,
346    uint32_t &offset)
347{
348    if (lod == 0)
349    {
350        offset = 0;
351    }
352    else
353    {
354        offset = 0;
355        uint32_t mipHeight = baseHeight;
356
357        // @note vAlign is already in blocks for compressed formats so upconvert
358        //       so that we have the desired alignment post-divide.
359        if (info.isBC)
360        {
361            vAlign *= info.bcHeight;
362        }
363
364        for (uint32_t l = 1; l <= lod; ++l)
365        {
366            uint32_t alignedMipHeight = GFX_ALIGN(mipHeight, vAlign);
367            offset += ((l != 2) ? alignedMipHeight : 0);
368            mipHeight = std::max<uint32_t>(mipHeight >> 1, 1U);
369        }
370
371        if (info.isBC)
372        {
373            offset /= info.bcHeight;
374        }
375    }
376}
377
378//////////////////////////////////////////////////////////////////////////
379/// @brief Computes 1D surface offset
380/// @param x - offset from start of array slice at given lod.
381/// @param array - array slice index
382/// @param lod - lod index
383/// @param pState - surface state
384/// @param xOffsetBytes - output offset in bytes.
385template<bool UseCachedOffsets>
386INLINE void ComputeSurfaceOffset1D(
387    uint32_t x,
388    uint32_t array,
389    uint32_t lod,
390    const SWR_SURFACE_STATE *pState,
391    uint32_t &xOffsetBytes)
392{
393    const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
394    uint32_t lodOffset;
395
396    if (UseCachedOffsets)
397    {
398        lodOffset = pState->lodOffsets[0][lod];
399    }
400    else
401    {
402        ComputeLODOffset1D(info, pState->width, pState->halign, lod, lodOffset);
403    }
404
405    xOffsetBytes = (array * pState->qpitch + lodOffset + x) * info.Bpp;
406}
407
408//////////////////////////////////////////////////////////////////////////
409/// @brief Adjusts the array slice for legacy TileY MSAA
410/// @param pState - surface state
411/// @param array - array slice index
412/// @param sampleNum - requested sample
413INLINE void AdjustCoordsForMSAA(const SWR_SURFACE_STATE *pState, uint32_t& x, uint32_t& y, uint32_t& arrayIndex, uint32_t sampleNum)
414{
415    /// @todo: might want to templatize adjusting for sample slices when we support tileYS/tileYF.
416    if((pState->tileMode == SWR_TILE_MODE_YMAJOR ||
417        pState->tileMode == SWR_TILE_MODE_WMAJOR) &&
418       pState->bInterleavedSamples)
419    {
420        uint32_t newX, newY, newSampleX, newSampleY;
421        switch(pState->numSamples)
422        {
423        case 1:
424            newX = x;
425            newY = y;
426            newSampleX = newSampleY = 0;
427            break;
428        case 2:
429        {
430            assert(pState->type == SURFACE_2D);
431            static const uint32_t xMask = 0xFFFFFFFD;
432            static const uint32_t sampleMaskX = 0x1;
433            newX = pdep_u32(x, xMask);
434            newY = y;
435            newSampleX = pext_u32(sampleNum, sampleMaskX);
436            newSampleY = 0;
437        }
438            break;
439        case 4:
440        {
441            assert(pState->type == SURFACE_2D);
442            static const uint32_t mask = 0xFFFFFFFD;
443            static const uint32_t sampleMaskX = 0x1;
444            static const uint32_t sampleMaskY = 0x2;
445            newX = pdep_u32(x, mask);
446            newY = pdep_u32(y, mask);
447            newSampleX = pext_u32(sampleNum, sampleMaskX);
448            newSampleY = pext_u32(sampleNum, sampleMaskY);
449        }
450            break;
451        case 8:
452        {
453            assert(pState->type == SURFACE_2D);
454            static const uint32_t xMask = 0xFFFFFFF9;
455            static const uint32_t yMask = 0xFFFFFFFD;
456            static const uint32_t sampleMaskX = 0x5;
457            static const uint32_t sampleMaskY = 0x2;
458            newX = pdep_u32(x, xMask);
459            newY = pdep_u32(y, yMask);
460            newSampleX = pext_u32(sampleNum, sampleMaskX);
461            newSampleY = pext_u32(sampleNum, sampleMaskY);
462        }
463            break;
464        case 16:
465        {
466            assert(pState->type == SURFACE_2D);
467            static const uint32_t mask = 0xFFFFFFF9;
468            static const uint32_t sampleMaskX = 0x5;
469            static const uint32_t sampleMaskY = 0xA;
470            newX = pdep_u32(x, mask);
471            newY = pdep_u32(y, mask);
472            newSampleX = pext_u32(sampleNum, sampleMaskX);
473            newSampleY = pext_u32(sampleNum, sampleMaskY);
474        }
475            break;
476        default:
477            assert(0 && "Unsupported sample count");
478            newX = newY = 0;
479            newSampleX = newSampleY = 0;
480            break;
481        }
482        x = newX | (newSampleX << 1);
483        y = newY | (newSampleY << 1);
484    }
485    else if(pState->tileMode == SWR_TILE_MODE_YMAJOR ||
486            pState->tileMode == SWR_TILE_NONE)
487    {
488        uint32_t sampleShift;
489        switch(pState->numSamples)
490        {
491        case 1:
492            assert(sampleNum == 0);
493            sampleShift = 0;
494            break;
495        case 2:
496            assert(pState->type == SURFACE_2D);
497            sampleShift = 1;
498            break;
499        case 4:
500            assert(pState->type == SURFACE_2D);
501            sampleShift = 2;
502            break;
503        case 8:
504            assert(pState->type == SURFACE_2D);
505            sampleShift = 3;
506            break;
507        case 16:
508            assert(pState->type == SURFACE_2D);
509            sampleShift = 4;
510            break;
511        default:
512            assert(0 && "Unsupported sample count");
513            sampleShift = 0;
514            break;
515        }
516        arrayIndex = (arrayIndex << sampleShift) | sampleNum;
517    }
518}
519
520//////////////////////////////////////////////////////////////////////////
521/// @brief Computes 2D surface offset
522/// @param x - horizontal offset from start of array slice and lod.
523/// @param y - vertical offset from start of array slice and lod.
524/// @param array - array slice index
525/// @param lod - lod index
526/// @param pState - surface state
527/// @param xOffsetBytes - output x offset in bytes.
528/// @param yOffsetRows - output y offset in bytes.
529template<bool UseCachedOffsets>
530INLINE void ComputeSurfaceOffset2D(uint32_t x, uint32_t y, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows)
531{
532    const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
533    uint32_t lodOffsetX, lodOffsetY;
534
535    if (UseCachedOffsets)
536    {
537        lodOffsetX = pState->lodOffsets[0][lod];
538        lodOffsetY = pState->lodOffsets[1][lod];
539    }
540    else
541    {
542        ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
543        ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
544    }
545
546    AdjustCoordsForMSAA(pState, x, y, array, sampleNum);
547    xOffsetBytes = (x + lodOffsetX + pState->xOffset) * info.Bpp;
548    yOffsetRows = (array * pState->qpitch) + lodOffsetY + y + pState->yOffset;
549}
550
551//////////////////////////////////////////////////////////////////////////
552/// @brief Computes 3D surface offset
553/// @param x - horizontal offset from start of array slice and lod.
554/// @param y - vertical offset from start of array slice and lod.
555/// @param z - depth offset from start of array slice and lod.
556/// @param lod - lod index
557/// @param pState - surface state
558/// @param xOffsetBytes - output x offset in bytes.
559/// @param yOffsetRows - output y offset in rows.
560/// @param zOffsetSlices - output y offset in slices.
561template<bool UseCachedOffsets>
562INLINE void ComputeSurfaceOffset3D(uint32_t x, uint32_t y, uint32_t z, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows, uint32_t &zOffsetSlices)
563{
564    const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
565    uint32_t lodOffsetX, lodOffsetY;
566
567    if (UseCachedOffsets)
568    {
569        lodOffsetX = pState->lodOffsets[0][lod];
570        lodOffsetY = pState->lodOffsets[1][lod];
571    }
572    else
573    {
574        ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
575        ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
576    }
577
578    xOffsetBytes = (x + lodOffsetX) * info.Bpp;
579    yOffsetRows = lodOffsetY + y;
580    zOffsetSlices = z;
581}
582
583//////////////////////////////////////////////////////////////////////////
584/// @brief Swizzles the linear x,y offsets depending on surface tiling mode
585///        and returns final surface address
586/// @param xOffsetBytes - x offset from base of surface in bytes
587/// @param yOffsetRows - y offset from base of surface in rows
588/// @param pState - pointer to the surface state
589template<typename TTraits>
590INLINE uint32_t ComputeTileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
591{
592    return ComputeOffset2D<TTraits>(pState->pitch, xOffsetBytes, yOffsetRows);
593}
594
595//////////////////////////////////////////////////////////////////////////
596/// @brief Swizzles the linear x,y offsets depending on surface tiling mode
597///        and returns final surface address
598/// @param xOffsetBytes - x offset from base of surface in bytes
599/// @param yOffsetRows - y offset from base of surface in rows
600/// @param pState - pointer to the surface state
601template<typename TTraits>
602INLINE uint32_t ComputeTileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
603{
604    return ComputeOffset3D<TTraits>(pState->qpitch, pState->pitch, xOffsetBytes, yOffsetRows, zOffsetSlices);
605}
606
607//////////////////////////////////////////////////////////////////////////
608/// @brief Swizzles the linear x,y offsets depending on surface tiling mode
609///        and returns final surface address
610/// @param xOffsetBytes - x offset from base of surface in bytes
611/// @param yOffsetRows - y offset from base of surface in rows
612/// @param pState - pointer to the surface state
613INLINE
614uint32_t TileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
615{
616    switch (pState->tileMode)
617    {
618    case SWR_TILE_NONE: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, pState);
619    case SWR_TILE_SWRZ: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, pState);
620    case SWR_TILE_MODE_XMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_XMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
621    case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, pState);
622    case SWR_TILE_MODE_WMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_WMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
623    default: SWR_INVALID("Unsupported tiling mode");
624    }
625    return 0;
626}
627
628//////////////////////////////////////////////////////////////////////////
629/// @brief Swizzles the linear x,y,z offsets depending on surface tiling mode
630///        and returns final surface address
631/// @param xOffsetBytes - x offset from base of surface in bytes
632/// @param yOffsetRows - y offset from base of surface in rows
633/// @param zOffsetSlices - z offset from base of surface in slices
634/// @param pState - pointer to the surface state
635INLINE
636uint32_t TileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
637{
638    switch (pState->tileMode)
639    {
640    case SWR_TILE_NONE: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
641    case SWR_TILE_SWRZ: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
642    case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
643    default: SWR_INVALID("Unsupported tiling mode");
644    }
645    return 0;
646}
647
648template<bool UseCachedOffsets>
649INLINE
650uint32_t ComputeSurfaceOffset(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
651{
652    uint32_t offsetX = 0, offsetY = 0, offsetZ = 0;
653    switch (pState->type)
654    {
655    case SURFACE_BUFFER:
656    case SURFACE_STRUCTURED_BUFFER:
657        offsetX = x * pState->pitch;
658        return offsetX;
659        break;
660    case SURFACE_1D:
661        ComputeSurfaceOffset1D<UseCachedOffsets>(x, array, lod, pState, offsetX);
662        return TileSwizzle2D(offsetX, 0, pState);
663        break;
664    case SURFACE_2D:
665        ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
666        return TileSwizzle2D(offsetX, offsetY, pState);
667    case SURFACE_3D:
668        ComputeSurfaceOffset3D<UseCachedOffsets>(x, y, z, lod, pState, offsetX, offsetY, offsetZ);
669        return TileSwizzle3D(offsetX, offsetY, offsetZ, pState);
670        break;
671    case SURFACE_CUBE:
672        ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
673        return TileSwizzle2D(offsetX, offsetY, pState);
674        break;
675    default: SWR_INVALID("Unsupported format");
676    }
677
678    return 0;
679}
680
681typedef void*(*PFN_COMPUTESURFADDR)(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, const SWR_SURFACE_STATE*);
682
683//////////////////////////////////////////////////////////////////////////
684/// @brief Computes surface address at the given location and lod
685/// @param x - x location in pixels
686/// @param y - y location in rows
687/// @param z - z location for 3D surfaces
688/// @param array - array slice for 1D and 2D surfaces
689/// @param lod - level of detail
690/// @param pState - pointer to the surface state
691template<bool UseCachedOffsets, bool IsRead>
692INLINE
693void* ComputeSurfaceAddress(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
694{
695    return (void*)(pState->xpBaseAddress + ComputeSurfaceOffset<UseCachedOffsets>(x, y, z, array, sampleNum, lod, pState));
696}
697