17ec681f3Smrg/*
27ec681f3Smrg * Copyright (c) 2011-2013 Luc Verhaegen <libv@skynet.be>
37ec681f3Smrg * Copyright (c) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
47ec681f3Smrg * Copyright (c) 2018 Vasily Khoruzhick <anarsoul@gmail.com>
57ec681f3Smrg * Copyright (c) 2019 Collabora, Ltd.
67ec681f3Smrg *
77ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
87ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
97ec681f3Smrg * to deal in the Software without restriction, including without limitation
107ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sub license,
117ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
127ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
137ec681f3Smrg *
147ec681f3Smrg * The above copyright notice and this permission notice (including the
157ec681f3Smrg * next paragraph) shall be included in all copies or substantial portions
167ec681f3Smrg * of the Software.
177ec681f3Smrg *
187ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
197ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
207ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
217ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
227ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
237ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
247ec681f3Smrg * DEALINGS IN THE SOFTWARE.
257ec681f3Smrg *
267ec681f3Smrg */
277ec681f3Smrg
287ec681f3Smrg#include "pan_tiling.h"
297ec681f3Smrg#include <stdbool.h>
307ec681f3Smrg#include "util/macros.h"
317ec681f3Smrg
327ec681f3Smrg/* This file implements software encode/decode of the tiling format used for
337ec681f3Smrg * textures and framebuffers primarily on Utgard GPUs. Names for this format
347ec681f3Smrg * include "Utgard-style tiling", "(Mali) swizzled textures", and
357ec681f3Smrg * "U-interleaved" (the former two names being used in the community
367ec681f3Smrg * Lima/Panfrost drivers; the latter name used internally at Arm).
377ec681f3Smrg * Conceptually, like any tiling scheme, the pixel reordering attempts to 2D
387ec681f3Smrg * spatial locality, to improve cache locality in both horizontal and vertical
397ec681f3Smrg * directions.
407ec681f3Smrg *
417ec681f3Smrg * This format is tiled: first, the image dimensions must be aligned to 16
427ec681f3Smrg * pixels in each axis. Once aligned, the image is divided into 16x16 tiles.
437ec681f3Smrg * This size harmonizes with other properties of the GPU; on Midgard,
447ec681f3Smrg * framebuffer tiles are logically 16x16 (this is the tile size used in
457ec681f3Smrg * Transaction Elimination and the minimum tile size used in Hierarchical
467ec681f3Smrg * Tiling). Conversely, for a standard 4 bytes-per-pixel format (like
477ec681f3Smrg * RGBA8888), 16 pixels * 4 bytes/pixel = 64 bytes, equal to the cache line
487ec681f3Smrg * size.
497ec681f3Smrg *
507ec681f3Smrg * Within each 16x16 block, the bits are reordered according to this pattern:
517ec681f3Smrg *
527ec681f3Smrg * | y3 | (x3 ^ y3) | y2 | (y2 ^ x2) | y1 | (y1 ^ x1) | y0 | (y0 ^ x0) |
537ec681f3Smrg *
547ec681f3Smrg * Basically, interleaving the X and Y bits, with XORs thrown in for every
557ec681f3Smrg * adjacent bit pair.
567ec681f3Smrg *
577ec681f3Smrg * This is cheap to implement both encode/decode in both hardware and software.
587ec681f3Smrg * In hardware, lines are simply rerouted to reorder and some XOR gates are
597ec681f3Smrg * thrown in. Software has to be a bit more clever.
607ec681f3Smrg *
617ec681f3Smrg * In software, the trick is to divide the pattern into two lines:
627ec681f3Smrg *
637ec681f3Smrg *    | y3 | y3 | y2 | y2 | y1 | y1 | y0 | y0 |
647ec681f3Smrg *  ^ |  0 | x3 |  0 | x2 |  0 | x1 |  0 | x0 |
657ec681f3Smrg *
667ec681f3Smrg * That is, duplicate the bits of the Y and space out the bits of the X. The
677ec681f3Smrg * top line is a function only of Y, so it can be calculated once per row and
687ec681f3Smrg * stored in a register. The bottom line is simply X with the bits spaced out.
697ec681f3Smrg * Spacing out the X is easy enough with a LUT, or by subtracting+ANDing the
707ec681f3Smrg * mask pattern (abusing carry bits).
717ec681f3Smrg *
727ec681f3Smrg * This format is also supported on Midgard GPUs, where it *can* be used for
737ec681f3Smrg * textures and framebuffers. That said, in practice it is usually as a
747ec681f3Smrg * fallback layout; Midgard introduces Arm FrameBuffer Compression, which is
757ec681f3Smrg * significantly more efficient than Utgard-style tiling and preferred for both
767ec681f3Smrg * textures and framebuffers, where possible. For unsupported texture types,
777ec681f3Smrg * for instance sRGB textures and framebuffers, this tiling scheme is used at a
787ec681f3Smrg * performance penalty, as AFBC is not compatible.
797ec681f3Smrg */
807ec681f3Smrg
817ec681f3Smrg/* Given the lower 4-bits of the Y coordinate, we would like to
827ec681f3Smrg * duplicate every bit over. So instead of 0b1010, we would like
837ec681f3Smrg * 0b11001100. The idea is that for the bits in the solely Y place, we
847ec681f3Smrg * get a Y place, and the bits in the XOR place *also* get a Y. */
857ec681f3Smrg
867ec681f3Smrgconst uint32_t bit_duplication[16] = {
877ec681f3Smrg   0b00000000,
887ec681f3Smrg   0b00000011,
897ec681f3Smrg   0b00001100,
907ec681f3Smrg   0b00001111,
917ec681f3Smrg   0b00110000,
927ec681f3Smrg   0b00110011,
937ec681f3Smrg   0b00111100,
947ec681f3Smrg   0b00111111,
957ec681f3Smrg   0b11000000,
967ec681f3Smrg   0b11000011,
977ec681f3Smrg   0b11001100,
987ec681f3Smrg   0b11001111,
997ec681f3Smrg   0b11110000,
1007ec681f3Smrg   0b11110011,
1017ec681f3Smrg   0b11111100,
1027ec681f3Smrg   0b11111111,
1037ec681f3Smrg};
1047ec681f3Smrg
1057ec681f3Smrg/* Space the bits out of a 4-bit nibble */
1067ec681f3Smrg
1077ec681f3Smrgconst unsigned space_4[16] = {
1087ec681f3Smrg   0b0000000,
1097ec681f3Smrg   0b0000001,
1107ec681f3Smrg   0b0000100,
1117ec681f3Smrg   0b0000101,
1127ec681f3Smrg   0b0010000,
1137ec681f3Smrg   0b0010001,
1147ec681f3Smrg   0b0010100,
1157ec681f3Smrg   0b0010101,
1167ec681f3Smrg   0b1000000,
1177ec681f3Smrg   0b1000001,
1187ec681f3Smrg   0b1000100,
1197ec681f3Smrg   0b1000101,
1207ec681f3Smrg   0b1010000,
1217ec681f3Smrg   0b1010001,
1227ec681f3Smrg   0b1010100,
1237ec681f3Smrg   0b1010101
1247ec681f3Smrg};
1257ec681f3Smrg
1267ec681f3Smrg/* The scheme uses 16x16 tiles */
1277ec681f3Smrg
1287ec681f3Smrg#define TILE_WIDTH 16
1297ec681f3Smrg#define TILE_HEIGHT 16
1307ec681f3Smrg#define PIXELS_PER_TILE (TILE_WIDTH * TILE_HEIGHT)
1317ec681f3Smrg
1327ec681f3Smrg/* We need a 128-bit type for idiomatically tiling bpp128 formats. The type must
1337ec681f3Smrg * only support copies and sizeof, so emulating with a packed structure works
1347ec681f3Smrg * well enough, but if there's a native 128-bit type we may we well prefer
1357ec681f3Smrg * that. */
1367ec681f3Smrg
1377ec681f3Smrg#ifdef __SIZEOF_INT128__
1387ec681f3Smrgtypedef __uint128_t pan_uint128_t;
1397ec681f3Smrg#else
1407ec681f3Smrgtypedef struct {
1417ec681f3Smrg  uint64_t lo;
1427ec681f3Smrg  uint64_t hi;
1437ec681f3Smrg} __attribute__((packed)) pan_uint128_t;
1447ec681f3Smrg#endif
1457ec681f3Smrg
1467ec681f3Smrgtypedef struct {
1477ec681f3Smrg  uint16_t lo;
1487ec681f3Smrg  uint8_t hi;
1497ec681f3Smrg} __attribute__((packed)) pan_uint24_t;
1507ec681f3Smrg
1517ec681f3Smrg/* Optimized routine to tile an aligned (w & 0xF == 0) texture. Explanation:
1527ec681f3Smrg *
1537ec681f3Smrg * dest_start precomputes the offset to the beginning of the first horizontal
1547ec681f3Smrg * tile we're writing to, knowing that x is 16-aligned. Tiles themselves are
1557ec681f3Smrg * stored linearly, so we get the X tile number by shifting and then multiply
1567ec681f3Smrg * by the bytes per tile .
1577ec681f3Smrg *
1587ec681f3Smrg * We iterate across the pixels we're trying to store in source-order. For each
1597ec681f3Smrg * row in the destination image, we figure out which row of 16x16 block we're
1607ec681f3Smrg * in, by slicing off the lower 4-bits (block_y).
1617ec681f3Smrg *
1627ec681f3Smrg * dest then precomputes the location of the top-left corner of the block the
1637ec681f3Smrg * row starts in. In pixel coordinates (where the origin is the top-left),
1647ec681f3Smrg * (block_y, 0) is the top-left corner of the leftmost tile in this row.  While
1657ec681f3Smrg * pixels are reordered within a block, the blocks themselves are stored
1667ec681f3Smrg * linearly, so multiplying block_y by the pixel stride of the destination
1677ec681f3Smrg * image equals the byte offset of that top-left corner of the block this row
1687ec681f3Smrg * is in.
1697ec681f3Smrg *
1707ec681f3Smrg * On the other hand, the source is linear so we compute the locations of the
1717ec681f3Smrg * start and end of the row in the source by a simple linear addressing.
1727ec681f3Smrg *
1737ec681f3Smrg * For indexing within the tile, we need to XOR with the [y3 y3 y2 y2 y1 y1 y0
1747ec681f3Smrg * y0] value. Since this is constant across a row, we look it up per-row and
1757ec681f3Smrg * store in expanded_y.
1767ec681f3Smrg *
1777ec681f3Smrg * Finally, we iterate each row in source order. In the outer loop, we iterate
1787ec681f3Smrg * each 16 pixel tile. Within each tile, we iterate the 16 pixels (this should
1797ec681f3Smrg * be unrolled), calculating the index within the tile and writing.
1807ec681f3Smrg */
1817ec681f3Smrg
1827ec681f3Smrg#define TILED_ACCESS_TYPE(pixel_t, shift) \
1837ec681f3Smrgstatic ALWAYS_INLINE void \
1847ec681f3Smrgpanfrost_access_tiled_image_##pixel_t \
1857ec681f3Smrg                              (void *dst, void *src, \
1867ec681f3Smrg                               uint16_t sx, uint16_t sy, \
1877ec681f3Smrg                               uint16_t w, uint16_t h, \
1887ec681f3Smrg                               uint32_t dst_stride, \
1897ec681f3Smrg                               uint32_t src_stride, \
1907ec681f3Smrg                               bool is_store) \
1917ec681f3Smrg{ \
1927ec681f3Smrg   uint8_t *dest_start = dst + ((sx >> 4) * PIXELS_PER_TILE * sizeof(pixel_t)); \
1937ec681f3Smrg   for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) { \
1947ec681f3Smrg      uint16_t block_y = y & ~0x0f; \
1957ec681f3Smrg      uint8_t *dest = (uint8_t *) (dest_start + (block_y * dst_stride)); \
1967ec681f3Smrg      pixel_t *source = src + (src_y * src_stride); \
1977ec681f3Smrg      pixel_t *source_end = source + w; \
1987ec681f3Smrg      unsigned expanded_y = bit_duplication[y & 0xF] << shift; \
1997ec681f3Smrg      for (; source < source_end; dest += (PIXELS_PER_TILE << shift)) { \
2007ec681f3Smrg         for (uint8_t i = 0; i < 16; ++i) { \
2017ec681f3Smrg            unsigned index = expanded_y ^ (space_4[i] << shift); \
2027ec681f3Smrg            if (is_store) \
2037ec681f3Smrg                *((pixel_t *) (dest + index)) = *(source++); \
2047ec681f3Smrg            else \
2057ec681f3Smrg                *(source++) = *((pixel_t *) (dest + index)); \
2067ec681f3Smrg         } \
2077ec681f3Smrg      } \
2087ec681f3Smrg   } \
2097ec681f3Smrg} \
2107ec681f3Smrg
2117ec681f3SmrgTILED_ACCESS_TYPE(uint8_t, 0);
2127ec681f3SmrgTILED_ACCESS_TYPE(uint16_t, 1);
2137ec681f3SmrgTILED_ACCESS_TYPE(uint32_t, 2);
2147ec681f3SmrgTILED_ACCESS_TYPE(uint64_t, 3);
2157ec681f3SmrgTILED_ACCESS_TYPE(pan_uint128_t, 4);
2167ec681f3Smrg
2177ec681f3Smrg#define TILED_UNALIGNED_TYPE(pixel_t, is_store, tile_shift) { \
2187ec681f3Smrg   const unsigned mask = (1 << tile_shift) - 1; \
2197ec681f3Smrg   for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) { \
2207ec681f3Smrg      unsigned block_y = y & ~mask; \
2217ec681f3Smrg      unsigned block_start_s = block_y * dst_stride; \
2227ec681f3Smrg      unsigned source_start = src_y * src_stride; \
2237ec681f3Smrg      unsigned expanded_y = bit_duplication[y & mask]; \
2247ec681f3Smrg \
2257ec681f3Smrg      for (int x = sx, src_x = 0; src_x < w; ++x, ++src_x) { \
2267ec681f3Smrg         unsigned block_x_s = (x >> tile_shift) * (1 << (tile_shift * 2)); \
2277ec681f3Smrg         unsigned index = expanded_y ^ space_4[x & mask]; \
2287ec681f3Smrg         uint8_t *source = src + source_start + sizeof(pixel_t) * src_x; \
2297ec681f3Smrg         uint8_t *dest = dst + block_start_s + sizeof(pixel_t) * (block_x_s + index); \
2307ec681f3Smrg \
2317ec681f3Smrg         pixel_t *outp = (pixel_t *) (is_store ? dest : source); \
2327ec681f3Smrg         pixel_t *inp = (pixel_t *) (is_store ? source : dest); \
2337ec681f3Smrg         *outp = *inp; \
2347ec681f3Smrg      } \
2357ec681f3Smrg   } \
2367ec681f3Smrg}
2377ec681f3Smrg
2387ec681f3Smrg#define TILED_UNALIGNED_TYPES(store, shift) { \
2397ec681f3Smrg   if (bpp == 8) \
2407ec681f3Smrg      TILED_UNALIGNED_TYPE(uint8_t, store, shift) \
2417ec681f3Smrg   else if (bpp == 16) \
2427ec681f3Smrg      TILED_UNALIGNED_TYPE(uint16_t, store, shift) \
2437ec681f3Smrg   else if (bpp == 24) \
2447ec681f3Smrg      TILED_UNALIGNED_TYPE(pan_uint24_t, store, shift) \
2457ec681f3Smrg   else if (bpp == 32) \
2467ec681f3Smrg      TILED_UNALIGNED_TYPE(uint32_t, store, shift) \
2477ec681f3Smrg   else if (bpp == 64) \
2487ec681f3Smrg      TILED_UNALIGNED_TYPE(uint64_t, store, shift) \
2497ec681f3Smrg   else if (bpp == 128) \
2507ec681f3Smrg      TILED_UNALIGNED_TYPE(pan_uint128_t, store, shift) \
2517ec681f3Smrg}
2527ec681f3Smrg
2537ec681f3Smrg/*
2547ec681f3Smrg * Perform a generic access to a tiled image with a given format. This works
2557ec681f3Smrg * even for block-compressed images on entire blocks at a time. sx/sy/w/h are
2567ec681f3Smrg * specified in pixels, not blocks, but our internal routines work in blocks,
2577ec681f3Smrg * so we divide here. Alignment is assumed.
2587ec681f3Smrg */
2597ec681f3Smrgstatic void
2607ec681f3Smrgpanfrost_access_tiled_image_generic(void *dst, void *src,
2617ec681f3Smrg                               unsigned sx, unsigned sy,
2627ec681f3Smrg                               unsigned w, unsigned h,
2637ec681f3Smrg                               uint32_t dst_stride,
2647ec681f3Smrg                               uint32_t src_stride,
2657ec681f3Smrg                               const struct util_format_description *desc,
2667ec681f3Smrg                               bool _is_store)
2677ec681f3Smrg{
2687ec681f3Smrg   unsigned bpp = desc->block.bits;
2697ec681f3Smrg
2707ec681f3Smrg   /* Convert units */
2717ec681f3Smrg   sx /= desc->block.width;
2727ec681f3Smrg   sy /= desc->block.height;
2737ec681f3Smrg   w = DIV_ROUND_UP(w, desc->block.width);
2747ec681f3Smrg   h = DIV_ROUND_UP(h, desc->block.height);
2757ec681f3Smrg
2767ec681f3Smrg   if (desc->block.width > 1) {
2777ec681f3Smrg      if (_is_store)
2787ec681f3Smrg         TILED_UNALIGNED_TYPES(true, 2)
2797ec681f3Smrg      else
2807ec681f3Smrg         TILED_UNALIGNED_TYPES(false, 2)
2817ec681f3Smrg   } else {
2827ec681f3Smrg      if (_is_store)
2837ec681f3Smrg         TILED_UNALIGNED_TYPES(true, 4)
2847ec681f3Smrg      else
2857ec681f3Smrg         TILED_UNALIGNED_TYPES(false, 4)
2867ec681f3Smrg   }
2877ec681f3Smrg}
2887ec681f3Smrg
2897ec681f3Smrg#define OFFSET(src, _x, _y) (void *) ((uint8_t *) src + ((_y) - orig_y) * src_stride + (((_x) - orig_x) * (bpp / 8)))
2907ec681f3Smrg
2917ec681f3Smrgstatic ALWAYS_INLINE void
2927ec681f3Smrgpanfrost_access_tiled_image(void *dst, void *src,
2937ec681f3Smrg                           unsigned x, unsigned y,
2947ec681f3Smrg                           unsigned w, unsigned h,
2957ec681f3Smrg                           uint32_t dst_stride,
2967ec681f3Smrg                           uint32_t src_stride,
2977ec681f3Smrg                           enum pipe_format format,
2987ec681f3Smrg                           bool is_store)
2997ec681f3Smrg{
3007ec681f3Smrg   const struct util_format_description *desc = util_format_description(format);
3017ec681f3Smrg
3027ec681f3Smrg   if (desc->block.width > 1 || desc->block.bits == 24) {
3037ec681f3Smrg      panfrost_access_tiled_image_generic(dst, (void *) src,
3047ec681f3Smrg            x, y, w, h,
3057ec681f3Smrg            dst_stride, src_stride, desc, is_store);
3067ec681f3Smrg
3077ec681f3Smrg      return;
3087ec681f3Smrg   }
3097ec681f3Smrg
3107ec681f3Smrg   unsigned bpp = desc->block.bits;
3117ec681f3Smrg   unsigned first_full_tile_x = DIV_ROUND_UP(x, TILE_WIDTH) * TILE_WIDTH;
3127ec681f3Smrg   unsigned first_full_tile_y = DIV_ROUND_UP(y, TILE_HEIGHT) * TILE_HEIGHT;
3137ec681f3Smrg   unsigned last_full_tile_x = ((x + w) / TILE_WIDTH) * TILE_WIDTH;
3147ec681f3Smrg   unsigned last_full_tile_y = ((y + h) / TILE_HEIGHT) * TILE_HEIGHT;
3157ec681f3Smrg
3167ec681f3Smrg   /* First, tile the top portion */
3177ec681f3Smrg
3187ec681f3Smrg   unsigned orig_x = x, orig_y = y;
3197ec681f3Smrg
3207ec681f3Smrg   if (first_full_tile_y != y) {
3217ec681f3Smrg      unsigned dist = MIN2(first_full_tile_y - y, h);
3227ec681f3Smrg
3237ec681f3Smrg      panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y),
3247ec681f3Smrg            x, y, w, dist,
3257ec681f3Smrg            dst_stride, src_stride, desc, is_store);
3267ec681f3Smrg
3277ec681f3Smrg      if (dist == h)
3287ec681f3Smrg         return;
3297ec681f3Smrg
3307ec681f3Smrg      y += dist;
3317ec681f3Smrg      h -= dist;
3327ec681f3Smrg   }
3337ec681f3Smrg
3347ec681f3Smrg   /* Next, the bottom portion */
3357ec681f3Smrg   if (last_full_tile_y != (y + h)) {
3367ec681f3Smrg      unsigned dist = (y + h) - last_full_tile_y;
3377ec681f3Smrg
3387ec681f3Smrg      panfrost_access_tiled_image_generic(dst, OFFSET(src, x, last_full_tile_y),
3397ec681f3Smrg            x, last_full_tile_y, w, dist,
3407ec681f3Smrg            dst_stride, src_stride, desc, is_store);
3417ec681f3Smrg
3427ec681f3Smrg      h -= dist;
3437ec681f3Smrg   }
3447ec681f3Smrg
3457ec681f3Smrg   /* The left portion */
3467ec681f3Smrg   if (first_full_tile_x != x) {
3477ec681f3Smrg      unsigned dist = MIN2(first_full_tile_x - x, w);
3487ec681f3Smrg
3497ec681f3Smrg      panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y),
3507ec681f3Smrg            x, y, dist, h,
3517ec681f3Smrg            dst_stride, src_stride, desc, is_store);
3527ec681f3Smrg
3537ec681f3Smrg      if (dist == w)
3547ec681f3Smrg         return;
3557ec681f3Smrg
3567ec681f3Smrg      x += dist;
3577ec681f3Smrg      w -= dist;
3587ec681f3Smrg   }
3597ec681f3Smrg
3607ec681f3Smrg   /* Finally, the right portion */
3617ec681f3Smrg   if (last_full_tile_x != (x + w)) {
3627ec681f3Smrg      unsigned dist = (x + w) - last_full_tile_x;
3637ec681f3Smrg
3647ec681f3Smrg      panfrost_access_tiled_image_generic(dst, OFFSET(src, last_full_tile_x, y),
3657ec681f3Smrg            last_full_tile_x, y, dist, h,
3667ec681f3Smrg            dst_stride, src_stride, desc, is_store);
3677ec681f3Smrg
3687ec681f3Smrg      w -= dist;
3697ec681f3Smrg   }
3707ec681f3Smrg
3717ec681f3Smrg   if (bpp == 8)
3727ec681f3Smrg      panfrost_access_tiled_image_uint8_t(dst,  OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
3737ec681f3Smrg   else if (bpp == 16)
3747ec681f3Smrg      panfrost_access_tiled_image_uint16_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
3757ec681f3Smrg   else if (bpp == 32)
3767ec681f3Smrg      panfrost_access_tiled_image_uint32_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
3777ec681f3Smrg   else if (bpp == 64)
3787ec681f3Smrg      panfrost_access_tiled_image_uint64_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
3797ec681f3Smrg   else if (bpp == 128)
3807ec681f3Smrg      panfrost_access_tiled_image_pan_uint128_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
3817ec681f3Smrg}
3827ec681f3Smrg
3837ec681f3Smrg/**
3847ec681f3Smrg * Access a tiled image (load or store). Note: the region of interest (x, y, w,
3857ec681f3Smrg * h) is specified in pixels, not blocks. It is expected that these quantities
3867ec681f3Smrg * are aligned to the block size.
3877ec681f3Smrg */
3887ec681f3Smrgvoid
3897ec681f3Smrgpanfrost_store_tiled_image(void *dst, const void *src,
3907ec681f3Smrg                           unsigned x, unsigned y,
3917ec681f3Smrg                           unsigned w, unsigned h,
3927ec681f3Smrg                           uint32_t dst_stride,
3937ec681f3Smrg                           uint32_t src_stride,
3947ec681f3Smrg                           enum pipe_format format)
3957ec681f3Smrg{
3967ec681f3Smrg    panfrost_access_tiled_image(dst, (void *) src,
3977ec681f3Smrg        x, y, w, h,
3987ec681f3Smrg        dst_stride, src_stride, format, true);
3997ec681f3Smrg}
4007ec681f3Smrg
4017ec681f3Smrgvoid
4027ec681f3Smrgpanfrost_load_tiled_image(void *dst, const void *src,
4037ec681f3Smrg                           unsigned x, unsigned y,
4047ec681f3Smrg                           unsigned w, unsigned h,
4057ec681f3Smrg                           uint32_t dst_stride,
4067ec681f3Smrg                           uint32_t src_stride,
4077ec681f3Smrg                           enum pipe_format format)
4087ec681f3Smrg{
4097ec681f3Smrg   panfrost_access_tiled_image((void *) src, dst,
4107ec681f3Smrg       x, y, w, h,
4117ec681f3Smrg       src_stride, dst_stride, format, false);
4127ec681f3Smrg}
413