panfrost/shared/pan_tiling.c

7ec681f3Smrg/*
7ec681f3Smrg * Copyright (c) 2011-2013 Luc Verhaegen <libv@skynet.be>
7ec681f3Smrg * Copyright (c) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
7ec681f3Smrg * Copyright (c) 2018 Vasily Khoruzhick <anarsoul@gmail.com>
7ec681f3Smrg * Copyright (c) 2019 Collabora, Ltd.
7ec681f3Smrg *
7ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
7ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
7ec681f3Smrg * to deal in the Software without restriction, including without limitation
7ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sub license,
7ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
7ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
7ec681f3Smrg *
7ec681f3Smrg * The above copyright notice and this permission notice (including the
7ec681f3Smrg * next paragraph) shall be included in all copies or substantial portions
7ec681f3Smrg * of the Software.
7ec681f3Smrg *
7ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
7ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
7ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
7ec681f3Smrg * DEALINGS IN THE SOFTWARE.
7ec681f3Smrg *
7ec681f3Smrg */
7ec681f3Smrg
7ec681f3Smrg#include "pan_tiling.h"
7ec681f3Smrg#include <stdbool.h>
7ec681f3Smrg#include "util/macros.h"
7ec681f3Smrg
7ec681f3Smrg/* This file implements software encode/decode of the tiling format used for
7ec681f3Smrg * textures and framebuffers primarily on Utgard GPUs. Names for this format
7ec681f3Smrg * include "Utgard-style tiling", "(Mali) swizzled textures", and
7ec681f3Smrg * "U-interleaved" (the former two names being used in the community
7ec681f3Smrg * Lima/Panfrost drivers; the latter name used internally at Arm).
7ec681f3Smrg * Conceptually, like any tiling scheme, the pixel reordering attempts to 2D
7ec681f3Smrg * spatial locality, to improve cache locality in both horizontal and vertical
7ec681f3Smrg * directions.
7ec681f3Smrg *
7ec681f3Smrg * This format is tiled: first, the image dimensions must be aligned to 16
7ec681f3Smrg * pixels in each axis. Once aligned, the image is divided into 16x16 tiles.
7ec681f3Smrg * This size harmonizes with other properties of the GPU; on Midgard,
7ec681f3Smrg * framebuffer tiles are logically 16x16 (this is the tile size used in
7ec681f3Smrg * Transaction Elimination and the minimum tile size used in Hierarchical
7ec681f3Smrg * Tiling). Conversely, for a standard 4 bytes-per-pixel format (like
7ec681f3Smrg * RGBA8888), 16 pixels * 4 bytes/pixel = 64 bytes, equal to the cache line
7ec681f3Smrg * size.
7ec681f3Smrg *
7ec681f3Smrg * Within each 16x16 block, the bits are reordered according to this pattern:
7ec681f3Smrg *
7ec681f3Smrg * | y3 | (x3 ^ y3) | y2 | (y2 ^ x2) | y1 | (y1 ^ x1) | y0 | (y0 ^ x0) |
7ec681f3Smrg *
7ec681f3Smrg * Basically, interleaving the X and Y bits, with XORs thrown in for every
7ec681f3Smrg * adjacent bit pair.
7ec681f3Smrg *
7ec681f3Smrg * This is cheap to implement both encode/decode in both hardware and software.
7ec681f3Smrg * In hardware, lines are simply rerouted to reorder and some XOR gates are
7ec681f3Smrg * thrown in. Software has to be a bit more clever.
7ec681f3Smrg *
7ec681f3Smrg * In software, the trick is to divide the pattern into two lines:
7ec681f3Smrg *
7ec681f3Smrg *    | y3 | y3 | y2 | y2 | y1 | y1 | y0 | y0 |
7ec681f3Smrg *  ^ |  0 | x3 |  0 | x2 |  0 | x1 |  0 | x0 |
7ec681f3Smrg *
7ec681f3Smrg * That is, duplicate the bits of the Y and space out the bits of the X. The
7ec681f3Smrg * top line is a function only of Y, so it can be calculated once per row and
7ec681f3Smrg * stored in a register. The bottom line is simply X with the bits spaced out.
7ec681f3Smrg * Spacing out the X is easy enough with a LUT, or by subtracting+ANDing the
7ec681f3Smrg * mask pattern (abusing carry bits).
7ec681f3Smrg *
7ec681f3Smrg * This format is also supported on Midgard GPUs, where it *can* be used for
7ec681f3Smrg * textures and framebuffers. That said, in practice it is usually as a
7ec681f3Smrg * fallback layout; Midgard introduces Arm FrameBuffer Compression, which is
7ec681f3Smrg * significantly more efficient than Utgard-style tiling and preferred for both
7ec681f3Smrg * textures and framebuffers, where possible. For unsupported texture types,
7ec681f3Smrg * for instance sRGB textures and framebuffers, this tiling scheme is used at a
7ec681f3Smrg * performance penalty, as AFBC is not compatible.
7ec681f3Smrg */
7ec681f3Smrg
7ec681f3Smrg/* Given the lower 4-bits of the Y coordinate, we would like to
7ec681f3Smrg * duplicate every bit over. So instead of 0b1010, we would like
7ec681f3Smrg * 0b11001100. The idea is that for the bits in the solely Y place, we
7ec681f3Smrg * get a Y place, and the bits in the XOR place *also* get a Y. */
7ec681f3Smrg
7ec681f3Smrgconst uint32_t bit_duplication[16] = {
7ec681f3Smrg   0b00000000,
7ec681f3Smrg   0b00000011,
7ec681f3Smrg   0b00001100,
7ec681f3Smrg   0b00001111,
7ec681f3Smrg   0b00110000,
7ec681f3Smrg   0b00110011,
7ec681f3Smrg   0b00111100,
7ec681f3Smrg   0b00111111,
7ec681f3Smrg   0b11000000,
7ec681f3Smrg   0b11000011,
7ec681f3Smrg   0b11001100,
7ec681f3Smrg   0b11001111,
7ec681f3Smrg   0b11110000,
7ec681f3Smrg   0b11110011,
7ec681f3Smrg   0b11111100,
7ec681f3Smrg   0b11111111,
7ec681f3Smrg};
7ec681f3Smrg
7ec681f3Smrg/* Space the bits out of a 4-bit nibble */
7ec681f3Smrg
7ec681f3Smrgconst unsigned space_4[16] = {
7ec681f3Smrg   0b0000000,
7ec681f3Smrg   0b0000001,
7ec681f3Smrg   0b0000100,
7ec681f3Smrg   0b0000101,
7ec681f3Smrg   0b0010000,
7ec681f3Smrg   0b0010001,
7ec681f3Smrg   0b0010100,
7ec681f3Smrg   0b0010101,
7ec681f3Smrg   0b1000000,
7ec681f3Smrg   0b1000001,
7ec681f3Smrg   0b1000100,
7ec681f3Smrg   0b1000101,
7ec681f3Smrg   0b1010000,
7ec681f3Smrg   0b1010001,
7ec681f3Smrg   0b1010100,
7ec681f3Smrg   0b1010101
7ec681f3Smrg};
7ec681f3Smrg
7ec681f3Smrg/* The scheme uses 16x16 tiles */
7ec681f3Smrg
7ec681f3Smrg#define TILE_WIDTH 16
7ec681f3Smrg#define TILE_HEIGHT 16
7ec681f3Smrg#define PIXELS_PER_TILE (TILE_WIDTH * TILE_HEIGHT)
7ec681f3Smrg
7ec681f3Smrg/* We need a 128-bit type for idiomatically tiling bpp128 formats. The type must
7ec681f3Smrg * only support copies and sizeof, so emulating with a packed structure works
7ec681f3Smrg * well enough, but if there's a native 128-bit type we may we well prefer
7ec681f3Smrg * that. */
7ec681f3Smrg
7ec681f3Smrg#ifdef __SIZEOF_INT128__
7ec681f3Smrgtypedef __uint128_t pan_uint128_t;
7ec681f3Smrg#else
7ec681f3Smrgtypedef struct {
7ec681f3Smrg  uint64_t lo;
7ec681f3Smrg  uint64_t hi;
7ec681f3Smrg} __attribute__((packed)) pan_uint128_t;
7ec681f3Smrg#endif
7ec681f3Smrg
7ec681f3Smrgtypedef struct {
7ec681f3Smrg  uint16_t lo;
7ec681f3Smrg  uint8_t hi;
7ec681f3Smrg} __attribute__((packed)) pan_uint24_t;
7ec681f3Smrg
7ec681f3Smrg/* Optimized routine to tile an aligned (w & 0xF == 0) texture. Explanation:
7ec681f3Smrg *
7ec681f3Smrg * dest_start precomputes the offset to the beginning of the first horizontal
7ec681f3Smrg * tile we're writing to, knowing that x is 16-aligned. Tiles themselves are
7ec681f3Smrg * stored linearly, so we get the X tile number by shifting and then multiply
7ec681f3Smrg * by the bytes per tile .
7ec681f3Smrg *
7ec681f3Smrg * We iterate across the pixels we're trying to store in source-order. For each
7ec681f3Smrg * row in the destination image, we figure out which row of 16x16 block we're
7ec681f3Smrg * in, by slicing off the lower 4-bits (block_y).
7ec681f3Smrg *
7ec681f3Smrg * dest then precomputes the location of the top-left corner of the block the
7ec681f3Smrg * row starts in. In pixel coordinates (where the origin is the top-left),
7ec681f3Smrg * (block_y, 0) is the top-left corner of the leftmost tile in this row.  While
7ec681f3Smrg * pixels are reordered within a block, the blocks themselves are stored
7ec681f3Smrg * linearly, so multiplying block_y by the pixel stride of the destination
7ec681f3Smrg * image equals the byte offset of that top-left corner of the block this row
7ec681f3Smrg * is in.
7ec681f3Smrg *
7ec681f3Smrg * On the other hand, the source is linear so we compute the locations of the
7ec681f3Smrg * start and end of the row in the source by a simple linear addressing.
7ec681f3Smrg *
7ec681f3Smrg * For indexing within the tile, we need to XOR with the [y3 y3 y2 y2 y1 y1 y0
7ec681f3Smrg * y0] value. Since this is constant across a row, we look it up per-row and
7ec681f3Smrg * store in expanded_y.
7ec681f3Smrg *
7ec681f3Smrg * Finally, we iterate each row in source order. In the outer loop, we iterate
7ec681f3Smrg * each 16 pixel tile. Within each tile, we iterate the 16 pixels (this should
7ec681f3Smrg * be unrolled), calculating the index within the tile and writing.
7ec681f3Smrg */
7ec681f3Smrg
7ec681f3Smrg#define TILED_ACCESS_TYPE(pixel_t, shift) \
7ec681f3Smrgstatic ALWAYS_INLINE void \
7ec681f3Smrgpanfrost_access_tiled_image_##pixel_t \
7ec681f3Smrg                              (void *dst, void *src, \
7ec681f3Smrg                               uint16_t sx, uint16_t sy, \
7ec681f3Smrg                               uint16_t w, uint16_t h, \
7ec681f3Smrg                               uint32_t dst_stride, \
7ec681f3Smrg                               uint32_t src_stride, \
7ec681f3Smrg                               bool is_store) \
7ec681f3Smrg{ \
7ec681f3Smrg   uint8_t *dest_start = dst + ((sx >> 4) * PIXELS_PER_TILE * sizeof(pixel_t)); \
7ec681f3Smrg   for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) { \
7ec681f3Smrg      uint16_t block_y = y & ~0x0f; \
7ec681f3Smrg      uint8_t *dest = (uint8_t *) (dest_start + (block_y * dst_stride)); \
7ec681f3Smrg      pixel_t *source = src + (src_y * src_stride); \
7ec681f3Smrg      pixel_t *source_end = source + w; \
7ec681f3Smrg      unsigned expanded_y = bit_duplication[y & 0xF] << shift; \
7ec681f3Smrg      for (; source < source_end; dest += (PIXELS_PER_TILE << shift)) { \
7ec681f3Smrg         for (uint8_t i = 0; i < 16; ++i) { \
7ec681f3Smrg            unsigned index = expanded_y ^ (space_4[i] << shift); \
7ec681f3Smrg            if (is_store) \
7ec681f3Smrg                *((pixel_t *) (dest + index)) = *(source++); \
7ec681f3Smrg            else \
7ec681f3Smrg                *(source++) = *((pixel_t *) (dest + index)); \
7ec681f3Smrg         } \
7ec681f3Smrg      } \
7ec681f3Smrg   } \
7ec681f3Smrg} \
7ec681f3Smrg
7ec681f3SmrgTILED_ACCESS_TYPE(uint8_t, 0);
7ec681f3SmrgTILED_ACCESS_TYPE(uint16_t, 1);
7ec681f3SmrgTILED_ACCESS_TYPE(uint32_t, 2);
7ec681f3SmrgTILED_ACCESS_TYPE(uint64_t, 3);
7ec681f3SmrgTILED_ACCESS_TYPE(pan_uint128_t, 4);
7ec681f3Smrg
7ec681f3Smrg#define TILED_UNALIGNED_TYPE(pixel_t, is_store, tile_shift) { \
7ec681f3Smrg   const unsigned mask = (1 << tile_shift) - 1; \
7ec681f3Smrg   for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) { \
7ec681f3Smrg      unsigned block_y = y & ~mask; \
7ec681f3Smrg      unsigned block_start_s = block_y * dst_stride; \
7ec681f3Smrg      unsigned source_start = src_y * src_stride; \
7ec681f3Smrg      unsigned expanded_y = bit_duplication[y & mask]; \
7ec681f3Smrg \
7ec681f3Smrg      for (int x = sx, src_x = 0; src_x < w; ++x, ++src_x) { \
7ec681f3Smrg         unsigned block_x_s = (x >> tile_shift) * (1 << (tile_shift * 2)); \
7ec681f3Smrg         unsigned index = expanded_y ^ space_4[x & mask]; \
7ec681f3Smrg         uint8_t *source = src + source_start + sizeof(pixel_t) * src_x; \
7ec681f3Smrg         uint8_t *dest = dst + block_start_s + sizeof(pixel_t) * (block_x_s + index); \
7ec681f3Smrg \
7ec681f3Smrg         pixel_t *outp = (pixel_t *) (is_store ? dest : source); \
7ec681f3Smrg         pixel_t *inp = (pixel_t *) (is_store ? source : dest); \
7ec681f3Smrg         *outp = *inp; \
7ec681f3Smrg      } \
7ec681f3Smrg   } \
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg#define TILED_UNALIGNED_TYPES(store, shift) { \
7ec681f3Smrg   if (bpp == 8) \
7ec681f3Smrg      TILED_UNALIGNED_TYPE(uint8_t, store, shift) \
7ec681f3Smrg   else if (bpp == 16) \
7ec681f3Smrg      TILED_UNALIGNED_TYPE(uint16_t, store, shift) \
7ec681f3Smrg   else if (bpp == 24) \
7ec681f3Smrg      TILED_UNALIGNED_TYPE(pan_uint24_t, store, shift) \
7ec681f3Smrg   else if (bpp == 32) \
7ec681f3Smrg      TILED_UNALIGNED_TYPE(uint32_t, store, shift) \
7ec681f3Smrg   else if (bpp == 64) \
7ec681f3Smrg      TILED_UNALIGNED_TYPE(uint64_t, store, shift) \
7ec681f3Smrg   else if (bpp == 128) \
7ec681f3Smrg      TILED_UNALIGNED_TYPE(pan_uint128_t, store, shift) \
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/*
7ec681f3Smrg * Perform a generic access to a tiled image with a given format. This works
7ec681f3Smrg * even for block-compressed images on entire blocks at a time. sx/sy/w/h are
7ec681f3Smrg * specified in pixels, not blocks, but our internal routines work in blocks,
7ec681f3Smrg * so we divide here. Alignment is assumed.
7ec681f3Smrg */
7ec681f3Smrgstatic void
7ec681f3Smrgpanfrost_access_tiled_image_generic(void *dst, void *src,
7ec681f3Smrg                               unsigned sx, unsigned sy,
7ec681f3Smrg                               unsigned w, unsigned h,
7ec681f3Smrg                               uint32_t dst_stride,
7ec681f3Smrg                               uint32_t src_stride,
7ec681f3Smrg                               const struct util_format_description *desc,
7ec681f3Smrg                               bool _is_store)
7ec681f3Smrg{
7ec681f3Smrg   unsigned bpp = desc->block.bits;
7ec681f3Smrg
7ec681f3Smrg   /* Convert units */
7ec681f3Smrg   sx /= desc->block.width;
7ec681f3Smrg   sy /= desc->block.height;
7ec681f3Smrg   w = DIV_ROUND_UP(w, desc->block.width);
7ec681f3Smrg   h = DIV_ROUND_UP(h, desc->block.height);
7ec681f3Smrg
7ec681f3Smrg   if (desc->block.width > 1) {
7ec681f3Smrg      if (_is_store)
7ec681f3Smrg         TILED_UNALIGNED_TYPES(true, 2)
7ec681f3Smrg      else
7ec681f3Smrg         TILED_UNALIGNED_TYPES(false, 2)
7ec681f3Smrg   } else {
7ec681f3Smrg      if (_is_store)
7ec681f3Smrg         TILED_UNALIGNED_TYPES(true, 4)
7ec681f3Smrg      else
7ec681f3Smrg         TILED_UNALIGNED_TYPES(false, 4)
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg#define OFFSET(src, _x, _y) (void *) ((uint8_t *) src + ((_y) - orig_y) * src_stride + (((_x) - orig_x) * (bpp / 8)))
7ec681f3Smrg
7ec681f3Smrgstatic ALWAYS_INLINE void
7ec681f3Smrgpanfrost_access_tiled_image(void *dst, void *src,
7ec681f3Smrg                           unsigned x, unsigned y,
7ec681f3Smrg                           unsigned w, unsigned h,
7ec681f3Smrg                           uint32_t dst_stride,
7ec681f3Smrg                           uint32_t src_stride,
7ec681f3Smrg                           enum pipe_format format,
7ec681f3Smrg                           bool is_store)
7ec681f3Smrg{
7ec681f3Smrg   const struct util_format_description *desc = util_format_description(format);
7ec681f3Smrg
7ec681f3Smrg   if (desc->block.width > 1 || desc->block.bits == 24) {
7ec681f3Smrg      panfrost_access_tiled_image_generic(dst, (void *) src,
7ec681f3Smrg            x, y, w, h,
7ec681f3Smrg            dst_stride, src_stride, desc, is_store);
7ec681f3Smrg
7ec681f3Smrg      return;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   unsigned bpp = desc->block.bits;
7ec681f3Smrg   unsigned first_full_tile_x = DIV_ROUND_UP(x, TILE_WIDTH) * TILE_WIDTH;
7ec681f3Smrg   unsigned first_full_tile_y = DIV_ROUND_UP(y, TILE_HEIGHT) * TILE_HEIGHT;
7ec681f3Smrg   unsigned last_full_tile_x = ((x + w) / TILE_WIDTH) * TILE_WIDTH;
7ec681f3Smrg   unsigned last_full_tile_y = ((y + h) / TILE_HEIGHT) * TILE_HEIGHT;
7ec681f3Smrg
7ec681f3Smrg   /* First, tile the top portion */
7ec681f3Smrg
7ec681f3Smrg   unsigned orig_x = x, orig_y = y;
7ec681f3Smrg
7ec681f3Smrg   if (first_full_tile_y != y) {
7ec681f3Smrg      unsigned dist = MIN2(first_full_tile_y - y, h);
7ec681f3Smrg
7ec681f3Smrg      panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y),
7ec681f3Smrg            x, y, w, dist,
7ec681f3Smrg            dst_stride, src_stride, desc, is_store);
7ec681f3Smrg
7ec681f3Smrg      if (dist == h)
7ec681f3Smrg         return;
7ec681f3Smrg
7ec681f3Smrg      y += dist;
7ec681f3Smrg      h -= dist;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   /* Next, the bottom portion */
7ec681f3Smrg   if (last_full_tile_y != (y + h)) {
7ec681f3Smrg      unsigned dist = (y + h) - last_full_tile_y;
7ec681f3Smrg
7ec681f3Smrg      panfrost_access_tiled_image_generic(dst, OFFSET(src, x, last_full_tile_y),
7ec681f3Smrg            x, last_full_tile_y, w, dist,
7ec681f3Smrg            dst_stride, src_stride, desc, is_store);
7ec681f3Smrg
7ec681f3Smrg      h -= dist;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   /* The left portion */
7ec681f3Smrg   if (first_full_tile_x != x) {
7ec681f3Smrg      unsigned dist = MIN2(first_full_tile_x - x, w);
7ec681f3Smrg
7ec681f3Smrg      panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y),
7ec681f3Smrg            x, y, dist, h,
7ec681f3Smrg            dst_stride, src_stride, desc, is_store);
7ec681f3Smrg
7ec681f3Smrg      if (dist == w)
7ec681f3Smrg         return;
7ec681f3Smrg
7ec681f3Smrg      x += dist;
7ec681f3Smrg      w -= dist;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   /* Finally, the right portion */
7ec681f3Smrg   if (last_full_tile_x != (x + w)) {
7ec681f3Smrg      unsigned dist = (x + w) - last_full_tile_x;
7ec681f3Smrg
7ec681f3Smrg      panfrost_access_tiled_image_generic(dst, OFFSET(src, last_full_tile_x, y),
7ec681f3Smrg            last_full_tile_x, y, dist, h,
7ec681f3Smrg            dst_stride, src_stride, desc, is_store);
7ec681f3Smrg
7ec681f3Smrg      w -= dist;
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   if (bpp == 8)
7ec681f3Smrg      panfrost_access_tiled_image_uint8_t(dst,  OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
7ec681f3Smrg   else if (bpp == 16)
7ec681f3Smrg      panfrost_access_tiled_image_uint16_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
7ec681f3Smrg   else if (bpp == 32)
7ec681f3Smrg      panfrost_access_tiled_image_uint32_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
7ec681f3Smrg   else if (bpp == 64)
7ec681f3Smrg      panfrost_access_tiled_image_uint64_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
7ec681f3Smrg   else if (bpp == 128)
7ec681f3Smrg      panfrost_access_tiled_image_pan_uint128_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/**
7ec681f3Smrg * Access a tiled image (load or store). Note: the region of interest (x, y, w,
7ec681f3Smrg * h) is specified in pixels, not blocks. It is expected that these quantities
7ec681f3Smrg * are aligned to the block size.
7ec681f3Smrg */
7ec681f3Smrgvoid
7ec681f3Smrgpanfrost_store_tiled_image(void *dst, const void *src,
7ec681f3Smrg                           unsigned x, unsigned y,
7ec681f3Smrg                           unsigned w, unsigned h,
7ec681f3Smrg                           uint32_t dst_stride,
7ec681f3Smrg                           uint32_t src_stride,
7ec681f3Smrg                           enum pipe_format format)
7ec681f3Smrg{
7ec681f3Smrg    panfrost_access_tiled_image(dst, (void *) src,
7ec681f3Smrg        x, y, w, h,
7ec681f3Smrg        dst_stride, src_stride, format, true);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgvoid
7ec681f3Smrgpanfrost_load_tiled_image(void *dst, const void *src,
7ec681f3Smrg                           unsigned x, unsigned y,
7ec681f3Smrg                           unsigned w, unsigned h,
7ec681f3Smrg                           uint32_t dst_stride,
7ec681f3Smrg                           uint32_t src_stride,
7ec681f3Smrg                           enum pipe_format format)
7ec681f3Smrg{
7ec681f3Smrg   panfrost_access_tiled_image((void *) src, dst,
7ec681f3Smrg       x, y, w, h,
7ec681f3Smrg       src_stride, dst_stride, format, false);
7ec681f3Smrg}