17ec681f3Smrg/* 27ec681f3Smrg * Copyright (c) 2011-2013 Luc Verhaegen <libv@skynet.be> 37ec681f3Smrg * Copyright (c) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io> 47ec681f3Smrg * Copyright (c) 2018 Vasily Khoruzhick <anarsoul@gmail.com> 57ec681f3Smrg * Copyright (c) 2019 Collabora, Ltd. 67ec681f3Smrg * 77ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 87ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 97ec681f3Smrg * to deal in the Software without restriction, including without limitation 107ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sub license, 117ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 127ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 137ec681f3Smrg * 147ec681f3Smrg * The above copyright notice and this permission notice (including the 157ec681f3Smrg * next paragraph) shall be included in all copies or substantial portions 167ec681f3Smrg * of the Software. 177ec681f3Smrg * 187ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 197ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 207ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 217ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 227ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 237ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 247ec681f3Smrg * DEALINGS IN THE SOFTWARE. 257ec681f3Smrg * 267ec681f3Smrg */ 277ec681f3Smrg 287ec681f3Smrg#include "pan_tiling.h" 297ec681f3Smrg#include <stdbool.h> 307ec681f3Smrg#include "util/macros.h" 317ec681f3Smrg 327ec681f3Smrg/* This file implements software encode/decode of the tiling format used for 337ec681f3Smrg * textures and framebuffers primarily on Utgard GPUs. Names for this format 347ec681f3Smrg * include "Utgard-style tiling", "(Mali) swizzled textures", and 357ec681f3Smrg * "U-interleaved" (the former two names being used in the community 367ec681f3Smrg * Lima/Panfrost drivers; the latter name used internally at Arm). 377ec681f3Smrg * Conceptually, like any tiling scheme, the pixel reordering attempts to 2D 387ec681f3Smrg * spatial locality, to improve cache locality in both horizontal and vertical 397ec681f3Smrg * directions. 407ec681f3Smrg * 417ec681f3Smrg * This format is tiled: first, the image dimensions must be aligned to 16 427ec681f3Smrg * pixels in each axis. Once aligned, the image is divided into 16x16 tiles. 437ec681f3Smrg * This size harmonizes with other properties of the GPU; on Midgard, 447ec681f3Smrg * framebuffer tiles are logically 16x16 (this is the tile size used in 457ec681f3Smrg * Transaction Elimination and the minimum tile size used in Hierarchical 467ec681f3Smrg * Tiling). Conversely, for a standard 4 bytes-per-pixel format (like 477ec681f3Smrg * RGBA8888), 16 pixels * 4 bytes/pixel = 64 bytes, equal to the cache line 487ec681f3Smrg * size. 497ec681f3Smrg * 507ec681f3Smrg * Within each 16x16 block, the bits are reordered according to this pattern: 517ec681f3Smrg * 527ec681f3Smrg * | y3 | (x3 ^ y3) | y2 | (y2 ^ x2) | y1 | (y1 ^ x1) | y0 | (y0 ^ x0) | 537ec681f3Smrg * 547ec681f3Smrg * Basically, interleaving the X and Y bits, with XORs thrown in for every 557ec681f3Smrg * adjacent bit pair. 567ec681f3Smrg * 577ec681f3Smrg * This is cheap to implement both encode/decode in both hardware and software. 587ec681f3Smrg * In hardware, lines are simply rerouted to reorder and some XOR gates are 597ec681f3Smrg * thrown in. Software has to be a bit more clever. 607ec681f3Smrg * 617ec681f3Smrg * In software, the trick is to divide the pattern into two lines: 627ec681f3Smrg * 637ec681f3Smrg * | y3 | y3 | y2 | y2 | y1 | y1 | y0 | y0 | 647ec681f3Smrg * ^ | 0 | x3 | 0 | x2 | 0 | x1 | 0 | x0 | 657ec681f3Smrg * 667ec681f3Smrg * That is, duplicate the bits of the Y and space out the bits of the X. The 677ec681f3Smrg * top line is a function only of Y, so it can be calculated once per row and 687ec681f3Smrg * stored in a register. The bottom line is simply X with the bits spaced out. 697ec681f3Smrg * Spacing out the X is easy enough with a LUT, or by subtracting+ANDing the 707ec681f3Smrg * mask pattern (abusing carry bits). 717ec681f3Smrg * 727ec681f3Smrg * This format is also supported on Midgard GPUs, where it *can* be used for 737ec681f3Smrg * textures and framebuffers. That said, in practice it is usually as a 747ec681f3Smrg * fallback layout; Midgard introduces Arm FrameBuffer Compression, which is 757ec681f3Smrg * significantly more efficient than Utgard-style tiling and preferred for both 767ec681f3Smrg * textures and framebuffers, where possible. For unsupported texture types, 777ec681f3Smrg * for instance sRGB textures and framebuffers, this tiling scheme is used at a 787ec681f3Smrg * performance penalty, as AFBC is not compatible. 797ec681f3Smrg */ 807ec681f3Smrg 817ec681f3Smrg/* Given the lower 4-bits of the Y coordinate, we would like to 827ec681f3Smrg * duplicate every bit over. So instead of 0b1010, we would like 837ec681f3Smrg * 0b11001100. The idea is that for the bits in the solely Y place, we 847ec681f3Smrg * get a Y place, and the bits in the XOR place *also* get a Y. */ 857ec681f3Smrg 867ec681f3Smrgconst uint32_t bit_duplication[16] = { 877ec681f3Smrg 0b00000000, 887ec681f3Smrg 0b00000011, 897ec681f3Smrg 0b00001100, 907ec681f3Smrg 0b00001111, 917ec681f3Smrg 0b00110000, 927ec681f3Smrg 0b00110011, 937ec681f3Smrg 0b00111100, 947ec681f3Smrg 0b00111111, 957ec681f3Smrg 0b11000000, 967ec681f3Smrg 0b11000011, 977ec681f3Smrg 0b11001100, 987ec681f3Smrg 0b11001111, 997ec681f3Smrg 0b11110000, 1007ec681f3Smrg 0b11110011, 1017ec681f3Smrg 0b11111100, 1027ec681f3Smrg 0b11111111, 1037ec681f3Smrg}; 1047ec681f3Smrg 1057ec681f3Smrg/* Space the bits out of a 4-bit nibble */ 1067ec681f3Smrg 1077ec681f3Smrgconst unsigned space_4[16] = { 1087ec681f3Smrg 0b0000000, 1097ec681f3Smrg 0b0000001, 1107ec681f3Smrg 0b0000100, 1117ec681f3Smrg 0b0000101, 1127ec681f3Smrg 0b0010000, 1137ec681f3Smrg 0b0010001, 1147ec681f3Smrg 0b0010100, 1157ec681f3Smrg 0b0010101, 1167ec681f3Smrg 0b1000000, 1177ec681f3Smrg 0b1000001, 1187ec681f3Smrg 0b1000100, 1197ec681f3Smrg 0b1000101, 1207ec681f3Smrg 0b1010000, 1217ec681f3Smrg 0b1010001, 1227ec681f3Smrg 0b1010100, 1237ec681f3Smrg 0b1010101 1247ec681f3Smrg}; 1257ec681f3Smrg 1267ec681f3Smrg/* The scheme uses 16x16 tiles */ 1277ec681f3Smrg 1287ec681f3Smrg#define TILE_WIDTH 16 1297ec681f3Smrg#define TILE_HEIGHT 16 1307ec681f3Smrg#define PIXELS_PER_TILE (TILE_WIDTH * TILE_HEIGHT) 1317ec681f3Smrg 1327ec681f3Smrg/* We need a 128-bit type for idiomatically tiling bpp128 formats. The type must 1337ec681f3Smrg * only support copies and sizeof, so emulating with a packed structure works 1347ec681f3Smrg * well enough, but if there's a native 128-bit type we may we well prefer 1357ec681f3Smrg * that. */ 1367ec681f3Smrg 1377ec681f3Smrg#ifdef __SIZEOF_INT128__ 1387ec681f3Smrgtypedef __uint128_t pan_uint128_t; 1397ec681f3Smrg#else 1407ec681f3Smrgtypedef struct { 1417ec681f3Smrg uint64_t lo; 1427ec681f3Smrg uint64_t hi; 1437ec681f3Smrg} __attribute__((packed)) pan_uint128_t; 1447ec681f3Smrg#endif 1457ec681f3Smrg 1467ec681f3Smrgtypedef struct { 1477ec681f3Smrg uint16_t lo; 1487ec681f3Smrg uint8_t hi; 1497ec681f3Smrg} __attribute__((packed)) pan_uint24_t; 1507ec681f3Smrg 1517ec681f3Smrg/* Optimized routine to tile an aligned (w & 0xF == 0) texture. Explanation: 1527ec681f3Smrg * 1537ec681f3Smrg * dest_start precomputes the offset to the beginning of the first horizontal 1547ec681f3Smrg * tile we're writing to, knowing that x is 16-aligned. Tiles themselves are 1557ec681f3Smrg * stored linearly, so we get the X tile number by shifting and then multiply 1567ec681f3Smrg * by the bytes per tile . 1577ec681f3Smrg * 1587ec681f3Smrg * We iterate across the pixels we're trying to store in source-order. For each 1597ec681f3Smrg * row in the destination image, we figure out which row of 16x16 block we're 1607ec681f3Smrg * in, by slicing off the lower 4-bits (block_y). 1617ec681f3Smrg * 1627ec681f3Smrg * dest then precomputes the location of the top-left corner of the block the 1637ec681f3Smrg * row starts in. In pixel coordinates (where the origin is the top-left), 1647ec681f3Smrg * (block_y, 0) is the top-left corner of the leftmost tile in this row. While 1657ec681f3Smrg * pixels are reordered within a block, the blocks themselves are stored 1667ec681f3Smrg * linearly, so multiplying block_y by the pixel stride of the destination 1677ec681f3Smrg * image equals the byte offset of that top-left corner of the block this row 1687ec681f3Smrg * is in. 1697ec681f3Smrg * 1707ec681f3Smrg * On the other hand, the source is linear so we compute the locations of the 1717ec681f3Smrg * start and end of the row in the source by a simple linear addressing. 1727ec681f3Smrg * 1737ec681f3Smrg * For indexing within the tile, we need to XOR with the [y3 y3 y2 y2 y1 y1 y0 1747ec681f3Smrg * y0] value. Since this is constant across a row, we look it up per-row and 1757ec681f3Smrg * store in expanded_y. 1767ec681f3Smrg * 1777ec681f3Smrg * Finally, we iterate each row in source order. In the outer loop, we iterate 1787ec681f3Smrg * each 16 pixel tile. Within each tile, we iterate the 16 pixels (this should 1797ec681f3Smrg * be unrolled), calculating the index within the tile and writing. 1807ec681f3Smrg */ 1817ec681f3Smrg 1827ec681f3Smrg#define TILED_ACCESS_TYPE(pixel_t, shift) \ 1837ec681f3Smrgstatic ALWAYS_INLINE void \ 1847ec681f3Smrgpanfrost_access_tiled_image_##pixel_t \ 1857ec681f3Smrg (void *dst, void *src, \ 1867ec681f3Smrg uint16_t sx, uint16_t sy, \ 1877ec681f3Smrg uint16_t w, uint16_t h, \ 1887ec681f3Smrg uint32_t dst_stride, \ 1897ec681f3Smrg uint32_t src_stride, \ 1907ec681f3Smrg bool is_store) \ 1917ec681f3Smrg{ \ 1927ec681f3Smrg uint8_t *dest_start = dst + ((sx >> 4) * PIXELS_PER_TILE * sizeof(pixel_t)); \ 1937ec681f3Smrg for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) { \ 1947ec681f3Smrg uint16_t block_y = y & ~0x0f; \ 1957ec681f3Smrg uint8_t *dest = (uint8_t *) (dest_start + (block_y * dst_stride)); \ 1967ec681f3Smrg pixel_t *source = src + (src_y * src_stride); \ 1977ec681f3Smrg pixel_t *source_end = source + w; \ 1987ec681f3Smrg unsigned expanded_y = bit_duplication[y & 0xF] << shift; \ 1997ec681f3Smrg for (; source < source_end; dest += (PIXELS_PER_TILE << shift)) { \ 2007ec681f3Smrg for (uint8_t i = 0; i < 16; ++i) { \ 2017ec681f3Smrg unsigned index = expanded_y ^ (space_4[i] << shift); \ 2027ec681f3Smrg if (is_store) \ 2037ec681f3Smrg *((pixel_t *) (dest + index)) = *(source++); \ 2047ec681f3Smrg else \ 2057ec681f3Smrg *(source++) = *((pixel_t *) (dest + index)); \ 2067ec681f3Smrg } \ 2077ec681f3Smrg } \ 2087ec681f3Smrg } \ 2097ec681f3Smrg} \ 2107ec681f3Smrg 2117ec681f3SmrgTILED_ACCESS_TYPE(uint8_t, 0); 2127ec681f3SmrgTILED_ACCESS_TYPE(uint16_t, 1); 2137ec681f3SmrgTILED_ACCESS_TYPE(uint32_t, 2); 2147ec681f3SmrgTILED_ACCESS_TYPE(uint64_t, 3); 2157ec681f3SmrgTILED_ACCESS_TYPE(pan_uint128_t, 4); 2167ec681f3Smrg 2177ec681f3Smrg#define TILED_UNALIGNED_TYPE(pixel_t, is_store, tile_shift) { \ 2187ec681f3Smrg const unsigned mask = (1 << tile_shift) - 1; \ 2197ec681f3Smrg for (int y = sy, src_y = 0; src_y < h; ++y, ++src_y) { \ 2207ec681f3Smrg unsigned block_y = y & ~mask; \ 2217ec681f3Smrg unsigned block_start_s = block_y * dst_stride; \ 2227ec681f3Smrg unsigned source_start = src_y * src_stride; \ 2237ec681f3Smrg unsigned expanded_y = bit_duplication[y & mask]; \ 2247ec681f3Smrg \ 2257ec681f3Smrg for (int x = sx, src_x = 0; src_x < w; ++x, ++src_x) { \ 2267ec681f3Smrg unsigned block_x_s = (x >> tile_shift) * (1 << (tile_shift * 2)); \ 2277ec681f3Smrg unsigned index = expanded_y ^ space_4[x & mask]; \ 2287ec681f3Smrg uint8_t *source = src + source_start + sizeof(pixel_t) * src_x; \ 2297ec681f3Smrg uint8_t *dest = dst + block_start_s + sizeof(pixel_t) * (block_x_s + index); \ 2307ec681f3Smrg \ 2317ec681f3Smrg pixel_t *outp = (pixel_t *) (is_store ? dest : source); \ 2327ec681f3Smrg pixel_t *inp = (pixel_t *) (is_store ? source : dest); \ 2337ec681f3Smrg *outp = *inp; \ 2347ec681f3Smrg } \ 2357ec681f3Smrg } \ 2367ec681f3Smrg} 2377ec681f3Smrg 2387ec681f3Smrg#define TILED_UNALIGNED_TYPES(store, shift) { \ 2397ec681f3Smrg if (bpp == 8) \ 2407ec681f3Smrg TILED_UNALIGNED_TYPE(uint8_t, store, shift) \ 2417ec681f3Smrg else if (bpp == 16) \ 2427ec681f3Smrg TILED_UNALIGNED_TYPE(uint16_t, store, shift) \ 2437ec681f3Smrg else if (bpp == 24) \ 2447ec681f3Smrg TILED_UNALIGNED_TYPE(pan_uint24_t, store, shift) \ 2457ec681f3Smrg else if (bpp == 32) \ 2467ec681f3Smrg TILED_UNALIGNED_TYPE(uint32_t, store, shift) \ 2477ec681f3Smrg else if (bpp == 64) \ 2487ec681f3Smrg TILED_UNALIGNED_TYPE(uint64_t, store, shift) \ 2497ec681f3Smrg else if (bpp == 128) \ 2507ec681f3Smrg TILED_UNALIGNED_TYPE(pan_uint128_t, store, shift) \ 2517ec681f3Smrg} 2527ec681f3Smrg 2537ec681f3Smrg/* 2547ec681f3Smrg * Perform a generic access to a tiled image with a given format. This works 2557ec681f3Smrg * even for block-compressed images on entire blocks at a time. sx/sy/w/h are 2567ec681f3Smrg * specified in pixels, not blocks, but our internal routines work in blocks, 2577ec681f3Smrg * so we divide here. Alignment is assumed. 2587ec681f3Smrg */ 2597ec681f3Smrgstatic void 2607ec681f3Smrgpanfrost_access_tiled_image_generic(void *dst, void *src, 2617ec681f3Smrg unsigned sx, unsigned sy, 2627ec681f3Smrg unsigned w, unsigned h, 2637ec681f3Smrg uint32_t dst_stride, 2647ec681f3Smrg uint32_t src_stride, 2657ec681f3Smrg const struct util_format_description *desc, 2667ec681f3Smrg bool _is_store) 2677ec681f3Smrg{ 2687ec681f3Smrg unsigned bpp = desc->block.bits; 2697ec681f3Smrg 2707ec681f3Smrg /* Convert units */ 2717ec681f3Smrg sx /= desc->block.width; 2727ec681f3Smrg sy /= desc->block.height; 2737ec681f3Smrg w = DIV_ROUND_UP(w, desc->block.width); 2747ec681f3Smrg h = DIV_ROUND_UP(h, desc->block.height); 2757ec681f3Smrg 2767ec681f3Smrg if (desc->block.width > 1) { 2777ec681f3Smrg if (_is_store) 2787ec681f3Smrg TILED_UNALIGNED_TYPES(true, 2) 2797ec681f3Smrg else 2807ec681f3Smrg TILED_UNALIGNED_TYPES(false, 2) 2817ec681f3Smrg } else { 2827ec681f3Smrg if (_is_store) 2837ec681f3Smrg TILED_UNALIGNED_TYPES(true, 4) 2847ec681f3Smrg else 2857ec681f3Smrg TILED_UNALIGNED_TYPES(false, 4) 2867ec681f3Smrg } 2877ec681f3Smrg} 2887ec681f3Smrg 2897ec681f3Smrg#define OFFSET(src, _x, _y) (void *) ((uint8_t *) src + ((_y) - orig_y) * src_stride + (((_x) - orig_x) * (bpp / 8))) 2907ec681f3Smrg 2917ec681f3Smrgstatic ALWAYS_INLINE void 2927ec681f3Smrgpanfrost_access_tiled_image(void *dst, void *src, 2937ec681f3Smrg unsigned x, unsigned y, 2947ec681f3Smrg unsigned w, unsigned h, 2957ec681f3Smrg uint32_t dst_stride, 2967ec681f3Smrg uint32_t src_stride, 2977ec681f3Smrg enum pipe_format format, 2987ec681f3Smrg bool is_store) 2997ec681f3Smrg{ 3007ec681f3Smrg const struct util_format_description *desc = util_format_description(format); 3017ec681f3Smrg 3027ec681f3Smrg if (desc->block.width > 1 || desc->block.bits == 24) { 3037ec681f3Smrg panfrost_access_tiled_image_generic(dst, (void *) src, 3047ec681f3Smrg x, y, w, h, 3057ec681f3Smrg dst_stride, src_stride, desc, is_store); 3067ec681f3Smrg 3077ec681f3Smrg return; 3087ec681f3Smrg } 3097ec681f3Smrg 3107ec681f3Smrg unsigned bpp = desc->block.bits; 3117ec681f3Smrg unsigned first_full_tile_x = DIV_ROUND_UP(x, TILE_WIDTH) * TILE_WIDTH; 3127ec681f3Smrg unsigned first_full_tile_y = DIV_ROUND_UP(y, TILE_HEIGHT) * TILE_HEIGHT; 3137ec681f3Smrg unsigned last_full_tile_x = ((x + w) / TILE_WIDTH) * TILE_WIDTH; 3147ec681f3Smrg unsigned last_full_tile_y = ((y + h) / TILE_HEIGHT) * TILE_HEIGHT; 3157ec681f3Smrg 3167ec681f3Smrg /* First, tile the top portion */ 3177ec681f3Smrg 3187ec681f3Smrg unsigned orig_x = x, orig_y = y; 3197ec681f3Smrg 3207ec681f3Smrg if (first_full_tile_y != y) { 3217ec681f3Smrg unsigned dist = MIN2(first_full_tile_y - y, h); 3227ec681f3Smrg 3237ec681f3Smrg panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y), 3247ec681f3Smrg x, y, w, dist, 3257ec681f3Smrg dst_stride, src_stride, desc, is_store); 3267ec681f3Smrg 3277ec681f3Smrg if (dist == h) 3287ec681f3Smrg return; 3297ec681f3Smrg 3307ec681f3Smrg y += dist; 3317ec681f3Smrg h -= dist; 3327ec681f3Smrg } 3337ec681f3Smrg 3347ec681f3Smrg /* Next, the bottom portion */ 3357ec681f3Smrg if (last_full_tile_y != (y + h)) { 3367ec681f3Smrg unsigned dist = (y + h) - last_full_tile_y; 3377ec681f3Smrg 3387ec681f3Smrg panfrost_access_tiled_image_generic(dst, OFFSET(src, x, last_full_tile_y), 3397ec681f3Smrg x, last_full_tile_y, w, dist, 3407ec681f3Smrg dst_stride, src_stride, desc, is_store); 3417ec681f3Smrg 3427ec681f3Smrg h -= dist; 3437ec681f3Smrg } 3447ec681f3Smrg 3457ec681f3Smrg /* The left portion */ 3467ec681f3Smrg if (first_full_tile_x != x) { 3477ec681f3Smrg unsigned dist = MIN2(first_full_tile_x - x, w); 3487ec681f3Smrg 3497ec681f3Smrg panfrost_access_tiled_image_generic(dst, OFFSET(src, x, y), 3507ec681f3Smrg x, y, dist, h, 3517ec681f3Smrg dst_stride, src_stride, desc, is_store); 3527ec681f3Smrg 3537ec681f3Smrg if (dist == w) 3547ec681f3Smrg return; 3557ec681f3Smrg 3567ec681f3Smrg x += dist; 3577ec681f3Smrg w -= dist; 3587ec681f3Smrg } 3597ec681f3Smrg 3607ec681f3Smrg /* Finally, the right portion */ 3617ec681f3Smrg if (last_full_tile_x != (x + w)) { 3627ec681f3Smrg unsigned dist = (x + w) - last_full_tile_x; 3637ec681f3Smrg 3647ec681f3Smrg panfrost_access_tiled_image_generic(dst, OFFSET(src, last_full_tile_x, y), 3657ec681f3Smrg last_full_tile_x, y, dist, h, 3667ec681f3Smrg dst_stride, src_stride, desc, is_store); 3677ec681f3Smrg 3687ec681f3Smrg w -= dist; 3697ec681f3Smrg } 3707ec681f3Smrg 3717ec681f3Smrg if (bpp == 8) 3727ec681f3Smrg panfrost_access_tiled_image_uint8_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store); 3737ec681f3Smrg else if (bpp == 16) 3747ec681f3Smrg panfrost_access_tiled_image_uint16_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store); 3757ec681f3Smrg else if (bpp == 32) 3767ec681f3Smrg panfrost_access_tiled_image_uint32_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store); 3777ec681f3Smrg else if (bpp == 64) 3787ec681f3Smrg panfrost_access_tiled_image_uint64_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store); 3797ec681f3Smrg else if (bpp == 128) 3807ec681f3Smrg panfrost_access_tiled_image_pan_uint128_t(dst, OFFSET(src, x, y), x, y, w, h, dst_stride, src_stride, is_store); 3817ec681f3Smrg} 3827ec681f3Smrg 3837ec681f3Smrg/** 3847ec681f3Smrg * Access a tiled image (load or store). Note: the region of interest (x, y, w, 3857ec681f3Smrg * h) is specified in pixels, not blocks. It is expected that these quantities 3867ec681f3Smrg * are aligned to the block size. 3877ec681f3Smrg */ 3887ec681f3Smrgvoid 3897ec681f3Smrgpanfrost_store_tiled_image(void *dst, const void *src, 3907ec681f3Smrg unsigned x, unsigned y, 3917ec681f3Smrg unsigned w, unsigned h, 3927ec681f3Smrg uint32_t dst_stride, 3937ec681f3Smrg uint32_t src_stride, 3947ec681f3Smrg enum pipe_format format) 3957ec681f3Smrg{ 3967ec681f3Smrg panfrost_access_tiled_image(dst, (void *) src, 3977ec681f3Smrg x, y, w, h, 3987ec681f3Smrg dst_stride, src_stride, format, true); 3997ec681f3Smrg} 4007ec681f3Smrg 4017ec681f3Smrgvoid 4027ec681f3Smrgpanfrost_load_tiled_image(void *dst, const void *src, 4037ec681f3Smrg unsigned x, unsigned y, 4047ec681f3Smrg unsigned w, unsigned h, 4057ec681f3Smrg uint32_t dst_stride, 4067ec681f3Smrg uint32_t src_stride, 4077ec681f3Smrg enum pipe_format format) 4087ec681f3Smrg{ 4097ec681f3Smrg panfrost_access_tiled_image((void *) src, dst, 4107ec681f3Smrg x, y, w, h, 4117ec681f3Smrg src_stride, dst_stride, format, false); 4127ec681f3Smrg} 413