17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2008 VMware, Inc.
37ec681f3Smrg * Copyright (C) 2014 Broadcom
47ec681f3Smrg * Copyright (C) 2018-2019 Alyssa Rosenzweig
57ec681f3Smrg * Copyright (C) 2019-2020 Collabora, Ltd.
67ec681f3Smrg *
77ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
87ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
97ec681f3Smrg * to deal in the Software without restriction, including without limitation
107ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
117ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
127ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
137ec681f3Smrg *
147ec681f3Smrg * The above copyright notice and this permission notice (including the next
157ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
167ec681f3Smrg * Software.
177ec681f3Smrg *
187ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
197ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
207ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
217ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
227ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
237ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
247ec681f3Smrg * SOFTWARE.
257ec681f3Smrg *
267ec681f3Smrg */
277ec681f3Smrg
287ec681f3Smrg#include "util/macros.h"
297ec681f3Smrg#include "util/u_math.h"
307ec681f3Smrg#include "pan_texture.h"
317ec681f3Smrg#include "panfrost-quirks.h"
327ec681f3Smrg
337ec681f3Smrg#ifndef PAN_ARCH
347ec681f3Smrg
357ec681f3Smrg/* Generates a texture descriptor. Ideally, descriptors are immutable after the
367ec681f3Smrg * texture is created, so we can keep these hanging around in GPU memory in a
377ec681f3Smrg * dedicated BO and not have to worry. In practice there are some minor gotchas
387ec681f3Smrg * with this (the driver sometimes will change the format of a texture on the
397ec681f3Smrg * fly for compression) but it's fast enough to just regenerate the descriptor
407ec681f3Smrg * in those cases, rather than monkeypatching at drawtime. A texture descriptor
417ec681f3Smrg * consists of a 32-byte header followed by pointers.
427ec681f3Smrg */
437ec681f3Smrg
447ec681f3Smrg/* List of supported modifiers, in descending order of preference. AFBC is
457ec681f3Smrg * faster than u-interleaved tiling which is faster than linear. Within AFBC,
467ec681f3Smrg * enabling the YUV-like transform is typically a win where possible. */
477ec681f3Smrg
487ec681f3Smrguint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = {
497ec681f3Smrg        DRM_FORMAT_MOD_ARM_AFBC(
507ec681f3Smrg                AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
517ec681f3Smrg                AFBC_FORMAT_MOD_SPARSE |
527ec681f3Smrg                AFBC_FORMAT_MOD_YTR),
537ec681f3Smrg
547ec681f3Smrg        DRM_FORMAT_MOD_ARM_AFBC(
557ec681f3Smrg                AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
567ec681f3Smrg                AFBC_FORMAT_MOD_SPARSE),
577ec681f3Smrg
587ec681f3Smrg        DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
597ec681f3Smrg        DRM_FORMAT_MOD_LINEAR
607ec681f3Smrg};
617ec681f3Smrg
627ec681f3Smrg/* If not explicitly, line stride is calculated for block-based formats as
637ec681f3Smrg * (ceil(width / block_width) * block_size). As a special case, this is left
647ec681f3Smrg * zero if there is only a single block vertically. So, we have a helper to
657ec681f3Smrg * extract the dimensions of a block-based format and use that to calculate the
667ec681f3Smrg * line stride as such.
677ec681f3Smrg */
687ec681f3Smrg
697ec681f3Smrgunsigned
707ec681f3Smrgpanfrost_block_dim(uint64_t modifier, bool width, unsigned plane)
717ec681f3Smrg{
727ec681f3Smrg        if (!drm_is_afbc(modifier)) {
737ec681f3Smrg                assert(modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
747ec681f3Smrg                return 16;
757ec681f3Smrg        }
767ec681f3Smrg
777ec681f3Smrg        switch (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) {
787ec681f3Smrg        case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16:
797ec681f3Smrg                return 16;
807ec681f3Smrg        case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8:
817ec681f3Smrg                return width ? 32 : 8;
827ec681f3Smrg        case AFBC_FORMAT_MOD_BLOCK_SIZE_64x4:
837ec681f3Smrg                return width ? 64 : 4;
847ec681f3Smrg        case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4:
857ec681f3Smrg                return plane ? (width ? 64 : 4) : (width ? 32 : 8);
867ec681f3Smrg        default:
877ec681f3Smrg                unreachable("Invalid AFBC block size");
887ec681f3Smrg        }
897ec681f3Smrg}
907ec681f3Smrg
917ec681f3Smrg/* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
927ec681f3Smrg * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
937ec681f3Smrg * This feature is also known as "transaction elimination". */
947ec681f3Smrg
957ec681f3Smrg#define CHECKSUM_TILE_WIDTH 16
967ec681f3Smrg#define CHECKSUM_TILE_HEIGHT 16
977ec681f3Smrg#define CHECKSUM_BYTES_PER_TILE 8
987ec681f3Smrg
997ec681f3Smrgunsigned
1007ec681f3Smrgpanfrost_compute_checksum_size(
1017ec681f3Smrg        struct pan_image_slice_layout *slice,
1027ec681f3Smrg        unsigned width,
1037ec681f3Smrg        unsigned height)
1047ec681f3Smrg{
1057ec681f3Smrg        unsigned tile_count_x = DIV_ROUND_UP(width, CHECKSUM_TILE_WIDTH);
1067ec681f3Smrg        unsigned tile_count_y = DIV_ROUND_UP(height, CHECKSUM_TILE_HEIGHT);
1077ec681f3Smrg
1087ec681f3Smrg        slice->crc.stride = tile_count_x * CHECKSUM_BYTES_PER_TILE;
1097ec681f3Smrg
1107ec681f3Smrg        return slice->crc.stride * tile_count_y;
1117ec681f3Smrg}
1127ec681f3Smrg
1137ec681f3Smrgunsigned
1147ec681f3Smrgpanfrost_get_layer_stride(const struct pan_image_layout *layout,
1157ec681f3Smrg                          unsigned level)
1167ec681f3Smrg{
1177ec681f3Smrg        if (layout->dim != MALI_TEXTURE_DIMENSION_3D)
1187ec681f3Smrg                return layout->array_stride;
1197ec681f3Smrg        else if (drm_is_afbc(layout->modifier))
1207ec681f3Smrg                return layout->slices[level].afbc.surface_stride;
1217ec681f3Smrg        else
1227ec681f3Smrg                return layout->slices[level].surface_stride;
1237ec681f3Smrg}
1247ec681f3Smrg
1257ec681f3Smrg/* Computes the offset into a texture at a particular level/face. Add to
1267ec681f3Smrg * the base address of a texture to get the address to that level/face */
1277ec681f3Smrg
1287ec681f3Smrgunsigned
1297ec681f3Smrgpanfrost_texture_offset(const struct pan_image_layout *layout,
1307ec681f3Smrg                        unsigned level, unsigned array_idx,
1317ec681f3Smrg                        unsigned surface_idx)
1327ec681f3Smrg{
1337ec681f3Smrg        return layout->slices[level].offset +
1347ec681f3Smrg               (array_idx * layout->array_stride) +
1357ec681f3Smrg               (surface_idx * layout->slices[level].surface_stride);
1367ec681f3Smrg}
1377ec681f3Smrg
1387ec681f3Smrgbool
1397ec681f3Smrgpan_image_layout_init(const struct panfrost_device *dev,
1407ec681f3Smrg                      struct pan_image_layout *layout,
1417ec681f3Smrg                      uint64_t modifier,
1427ec681f3Smrg                      enum pipe_format format,
1437ec681f3Smrg                      enum mali_texture_dimension dim,
1447ec681f3Smrg                      unsigned width, unsigned height, unsigned depth,
1457ec681f3Smrg                      unsigned array_size, unsigned nr_samples,
1467ec681f3Smrg                      unsigned nr_slices, enum pan_image_crc_mode crc_mode,
1477ec681f3Smrg                      const struct pan_image_explicit_layout *explicit_layout)
1487ec681f3Smrg{
1497ec681f3Smrg        /* Explicit stride only work with non-mipmap, non-array; single-sample
1507ec681f3Smrg         * 2D image, and in-band CRC can't be used.
1517ec681f3Smrg         */
1527ec681f3Smrg        if (explicit_layout &&
1537ec681f3Smrg	    (depth > 1 || nr_samples > 1 || array_size > 1 ||
1547ec681f3Smrg             dim != MALI_TEXTURE_DIMENSION_2D || nr_slices > 1 ||
1557ec681f3Smrg             crc_mode == PAN_IMAGE_CRC_INBAND))
1567ec681f3Smrg                return false;
1577ec681f3Smrg
1587ec681f3Smrg        /* Mandate 64 byte alignement */
1597ec681f3Smrg        if (explicit_layout && (explicit_layout->offset & 63))
1607ec681f3Smrg                return false;
1617ec681f3Smrg
1627ec681f3Smrg        layout->crc_mode = crc_mode;
1637ec681f3Smrg        layout->modifier = modifier;
1647ec681f3Smrg        layout->format = format;
1657ec681f3Smrg        layout->dim = dim;
1667ec681f3Smrg        layout->width = width;
1677ec681f3Smrg        layout->height = height;
1687ec681f3Smrg        layout->depth = depth;
1697ec681f3Smrg        layout->array_size = array_size;
1707ec681f3Smrg        layout->nr_samples = nr_samples;
1717ec681f3Smrg        layout->nr_slices = nr_slices;
1727ec681f3Smrg
1737ec681f3Smrg        unsigned bytes_per_pixel = util_format_get_blocksize(format);
1747ec681f3Smrg
1757ec681f3Smrg        /* MSAA is implemented as a 3D texture with z corresponding to the
1767ec681f3Smrg         * sample #, horrifyingly enough */
1777ec681f3Smrg
1787ec681f3Smrg        assert(depth == 1 || nr_samples == 1);
1797ec681f3Smrg
1807ec681f3Smrg        bool afbc = drm_is_afbc(layout->modifier);
1817ec681f3Smrg        bool tiled = layout->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED;
1827ec681f3Smrg        bool linear = layout->modifier == DRM_FORMAT_MOD_LINEAR;
1837ec681f3Smrg        bool should_align = tiled || afbc;
1847ec681f3Smrg        bool is_3d = layout->dim == MALI_TEXTURE_DIMENSION_3D;
1857ec681f3Smrg
1867ec681f3Smrg        unsigned oob_crc_offset = 0;
1877ec681f3Smrg        unsigned offset = explicit_layout ? explicit_layout->offset : 0;
1887ec681f3Smrg        unsigned tile_h = 1, tile_w = 1, tile_shift = 0;
1897ec681f3Smrg
1907ec681f3Smrg        if (tiled || afbc) {
1917ec681f3Smrg                tile_w = panfrost_block_dim(layout->modifier, true, 0);
1927ec681f3Smrg                tile_h = panfrost_block_dim(layout->modifier, false, 0);
1937ec681f3Smrg                if (util_format_is_compressed(format))
1947ec681f3Smrg                        tile_shift = 2;
1957ec681f3Smrg        }
1967ec681f3Smrg
1977ec681f3Smrg        for (unsigned l = 0; l < nr_slices; ++l) {
1987ec681f3Smrg                struct pan_image_slice_layout *slice = &layout->slices[l];
1997ec681f3Smrg
2007ec681f3Smrg                unsigned effective_width = width;
2017ec681f3Smrg                unsigned effective_height = height;
2027ec681f3Smrg                unsigned effective_depth = depth;
2037ec681f3Smrg
2047ec681f3Smrg                if (should_align) {
2057ec681f3Smrg                        effective_width = ALIGN_POT(effective_width, tile_w) >> tile_shift;
2067ec681f3Smrg                        effective_height = ALIGN_POT(effective_height, tile_h);
2077ec681f3Smrg
2087ec681f3Smrg                        /* We don't need to align depth */
2097ec681f3Smrg                }
2107ec681f3Smrg
2117ec681f3Smrg                /* Align levels to cache-line as a performance improvement for
2127ec681f3Smrg                 * linear/tiled and as a requirement for AFBC */
2137ec681f3Smrg
2147ec681f3Smrg                offset = ALIGN_POT(offset, 64);
2157ec681f3Smrg
2167ec681f3Smrg                slice->offset = offset;
2177ec681f3Smrg
2187ec681f3Smrg                /* Compute the would-be stride */
2197ec681f3Smrg                unsigned stride = bytes_per_pixel * effective_width;
2207ec681f3Smrg
2217ec681f3Smrg                if (explicit_layout) {
2227ec681f3Smrg                        /* Make sure the explicit stride is valid */
2237ec681f3Smrg                        if (explicit_layout->line_stride < stride)
2247ec681f3Smrg                                return false;
2257ec681f3Smrg
2267ec681f3Smrg                        stride = explicit_layout->line_stride;
2277ec681f3Smrg                } else if (linear) {
2287ec681f3Smrg                        /* Keep lines alignment on 64 byte for performance */
2297ec681f3Smrg                        stride = ALIGN_POT(stride, 64);
2307ec681f3Smrg                }
2317ec681f3Smrg
2327ec681f3Smrg                slice->line_stride = stride;
2337ec681f3Smrg                slice->row_stride = stride * (tile_h >> tile_shift);
2347ec681f3Smrg
2357ec681f3Smrg                unsigned slice_one_size = slice->line_stride * effective_height;
2367ec681f3Smrg
2377ec681f3Smrg                /* Compute AFBC sizes if necessary */
2387ec681f3Smrg                if (afbc) {
2397ec681f3Smrg                        slice->afbc.header_size =
2407ec681f3Smrg                                panfrost_afbc_header_size(width, height);
2417ec681f3Smrg
2427ec681f3Smrg                        /* Stride between two rows of AFBC headers */
2437ec681f3Smrg                        slice->afbc.row_stride =
2447ec681f3Smrg                                (effective_width / tile_w) *
2457ec681f3Smrg                                AFBC_HEADER_BYTES_PER_TILE;
2467ec681f3Smrg
2477ec681f3Smrg                        /* AFBC body size */
2487ec681f3Smrg                        slice->afbc.body_size = slice_one_size;
2497ec681f3Smrg
2507ec681f3Smrg                        /* 3D AFBC resources have all headers placed at the
2517ec681f3Smrg                         * beginning instead of having them split per depth
2527ec681f3Smrg                         * level
2537ec681f3Smrg                         */
2547ec681f3Smrg                        if (is_3d) {
2557ec681f3Smrg                                slice->afbc.surface_stride =
2567ec681f3Smrg                                        slice->afbc.header_size;
2577ec681f3Smrg                                slice->afbc.header_size *= effective_depth;
2587ec681f3Smrg                                slice->afbc.body_size *= effective_depth;
2597ec681f3Smrg                                offset += slice->afbc.header_size;
2607ec681f3Smrg                        } else {
2617ec681f3Smrg                                slice_one_size += slice->afbc.header_size;
2627ec681f3Smrg                                slice->afbc.surface_stride = slice_one_size;
2637ec681f3Smrg                        }
2647ec681f3Smrg                }
2657ec681f3Smrg
2667ec681f3Smrg                unsigned slice_full_size =
2677ec681f3Smrg                        slice_one_size * effective_depth * nr_samples;
2687ec681f3Smrg
2697ec681f3Smrg                slice->surface_stride = slice_one_size;
2707ec681f3Smrg
2717ec681f3Smrg                /* Compute AFBC sizes if necessary */
2727ec681f3Smrg
2737ec681f3Smrg                offset += slice_full_size;
2747ec681f3Smrg                slice->size = slice_full_size;
2757ec681f3Smrg
2767ec681f3Smrg                /* Add a checksum region if necessary */
2777ec681f3Smrg                if (crc_mode != PAN_IMAGE_CRC_NONE) {
2787ec681f3Smrg                        slice->crc.size =
2797ec681f3Smrg                                panfrost_compute_checksum_size(slice, width, height);
2807ec681f3Smrg
2817ec681f3Smrg                        if (crc_mode == PAN_IMAGE_CRC_INBAND) {
2827ec681f3Smrg                                slice->crc.offset = offset;
2837ec681f3Smrg                                offset += slice->crc.size;
2847ec681f3Smrg                                slice->size += slice->crc.size;
2857ec681f3Smrg                        } else {
2867ec681f3Smrg                                slice->crc.offset = oob_crc_offset;
2877ec681f3Smrg                                oob_crc_offset += slice->crc.size;
2887ec681f3Smrg                        }
2897ec681f3Smrg                }
2907ec681f3Smrg
2917ec681f3Smrg                width = u_minify(width, 1);
2927ec681f3Smrg                height = u_minify(height, 1);
2937ec681f3Smrg                depth = u_minify(depth, 1);
2947ec681f3Smrg        }
2957ec681f3Smrg
2967ec681f3Smrg        /* Arrays and cubemaps have the entire miptree duplicated */
2977ec681f3Smrg        layout->array_stride = ALIGN_POT(offset, 64);
2987ec681f3Smrg        if (explicit_layout)
2997ec681f3Smrg                layout->data_size = offset;
3007ec681f3Smrg        else
3017ec681f3Smrg                layout->data_size = ALIGN_POT(layout->array_stride * array_size, 4096);
3027ec681f3Smrg        layout->crc_size = oob_crc_offset;
3037ec681f3Smrg
3047ec681f3Smrg        return true;
3057ec681f3Smrg}
3067ec681f3Smrg
3077ec681f3Smrgvoid
3087ec681f3Smrgpan_iview_get_surface(const struct pan_image_view *iview,
3097ec681f3Smrg                      unsigned level, unsigned layer, unsigned sample,
3107ec681f3Smrg                      struct pan_surface *surf)
3117ec681f3Smrg{
3127ec681f3Smrg        level += iview->first_level;
3137ec681f3Smrg        assert(level < iview->image->layout.nr_slices);
3147ec681f3Smrg
3157ec681f3Smrg       layer += iview->first_layer;
3167ec681f3Smrg
3177ec681f3Smrg        bool is_3d = iview->image->layout.dim == MALI_TEXTURE_DIMENSION_3D;
3187ec681f3Smrg        const struct pan_image_slice_layout *slice = &iview->image->layout.slices[level];
3197ec681f3Smrg        mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset;
3207ec681f3Smrg
3217ec681f3Smrg        if (drm_is_afbc(iview->image->layout.modifier)) {
3227ec681f3Smrg                assert(!sample);
3237ec681f3Smrg
3247ec681f3Smrg                if (is_3d) {
3257ec681f3Smrg                        ASSERTED unsigned depth = u_minify(iview->image->layout.depth, level);
3267ec681f3Smrg                        assert(layer < depth);
3277ec681f3Smrg                        surf->afbc.header = base + slice->offset +
3287ec681f3Smrg                                           (layer * slice->afbc.surface_stride);
3297ec681f3Smrg                        surf->afbc.body = base + slice->offset +
3307ec681f3Smrg                                          slice->afbc.header_size +
3317ec681f3Smrg                                          (slice->surface_stride * layer);
3327ec681f3Smrg                } else {
3337ec681f3Smrg                        assert(layer < iview->image->layout.array_size);
3347ec681f3Smrg                        surf->afbc.header = base +
3357ec681f3Smrg                                            panfrost_texture_offset(&iview->image->layout,
3367ec681f3Smrg                                                                    level, layer, 0);
3377ec681f3Smrg                        surf->afbc.body = surf->afbc.header + slice->afbc.header_size;
3387ec681f3Smrg                }
3397ec681f3Smrg        } else {
3407ec681f3Smrg                unsigned array_idx = is_3d ? 0 : layer;
3417ec681f3Smrg                unsigned surface_idx = is_3d ? layer : sample;
3427ec681f3Smrg
3437ec681f3Smrg                surf->data = base +
3447ec681f3Smrg                             panfrost_texture_offset(&iview->image->layout, level,
3457ec681f3Smrg                                                     array_idx, surface_idx);
3467ec681f3Smrg        }
3477ec681f3Smrg}
3487ec681f3Smrg
3497ec681f3Smrg#else /* ifndef PAN_ARCH */
3507ec681f3Smrg
3517ec681f3Smrg#if PAN_ARCH >= 5
3527ec681f3Smrg/* Arm Scalable Texture Compression (ASTC) corresponds to just a few formats.
3537ec681f3Smrg * The block dimension is not part of the format. Instead, it is encoded as a
3547ec681f3Smrg * 6-bit tag on the payload pointer. Map the block size for a single dimension.
3557ec681f3Smrg */
3567ec681f3Smrg
3577ec681f3Smrgstatic inline enum mali_astc_2d_dimension
3587ec681f3Smrgpanfrost_astc_dim_2d(unsigned dim)
3597ec681f3Smrg{
3607ec681f3Smrg        switch (dim) {
3617ec681f3Smrg        case  4: return MALI_ASTC_2D_DIMENSION_4;
3627ec681f3Smrg        case  5: return MALI_ASTC_2D_DIMENSION_5;
3637ec681f3Smrg        case  6: return MALI_ASTC_2D_DIMENSION_6;
3647ec681f3Smrg        case  8: return MALI_ASTC_2D_DIMENSION_8;
3657ec681f3Smrg        case 10: return MALI_ASTC_2D_DIMENSION_10;
3667ec681f3Smrg        case 12: return MALI_ASTC_2D_DIMENSION_12;
3677ec681f3Smrg        default: unreachable("Invalid ASTC dimension");
3687ec681f3Smrg        }
3697ec681f3Smrg}
3707ec681f3Smrg
3717ec681f3Smrgstatic inline enum mali_astc_3d_dimension
3727ec681f3Smrgpanfrost_astc_dim_3d(unsigned dim)
3737ec681f3Smrg{
3747ec681f3Smrg        switch (dim) {
3757ec681f3Smrg        case  3: return MALI_ASTC_3D_DIMENSION_3;
3767ec681f3Smrg        case  4: return MALI_ASTC_3D_DIMENSION_4;
3777ec681f3Smrg        case  5: return MALI_ASTC_3D_DIMENSION_5;
3787ec681f3Smrg        case  6: return MALI_ASTC_3D_DIMENSION_6;
3797ec681f3Smrg        default: unreachable("Invalid ASTC dimension");
3807ec681f3Smrg        }
3817ec681f3Smrg}
3827ec681f3Smrg
3837ec681f3Smrg/* Texture addresses are tagged with information about compressed formats.
3847ec681f3Smrg * AFBC uses a bit for whether the colorspace transform is enabled (RGB and
3857ec681f3Smrg * RGBA only).
3867ec681f3Smrg * For ASTC, this is a "stretch factor" encoding the block size. */
3877ec681f3Smrg
3887ec681f3Smrgstatic unsigned
3897ec681f3Smrgpanfrost_compression_tag(const struct util_format_description *desc,
3907ec681f3Smrg                         enum mali_texture_dimension dim,
3917ec681f3Smrg                         uint64_t modifier)
3927ec681f3Smrg{
3937ec681f3Smrg        if (drm_is_afbc(modifier)) {
3947ec681f3Smrg                unsigned flags = (modifier & AFBC_FORMAT_MOD_YTR) ?
3957ec681f3Smrg                                 MALI_AFBC_SURFACE_FLAG_YTR : 0;
3967ec681f3Smrg
3977ec681f3Smrg#if PAN_ARCH >= 6
3987ec681f3Smrg                /* Prefetch enable */
3997ec681f3Smrg                flags |= MALI_AFBC_SURFACE_FLAG_PREFETCH;
4007ec681f3Smrg
4017ec681f3Smrg                /* Wide blocks (> 16x16) */
4027ec681f3Smrg                if (panfrost_block_dim(modifier, true, 0) > 16)
4037ec681f3Smrg                        flags |= MALI_AFBC_SURFACE_FLAG_WIDE_BLOCK;
4047ec681f3Smrg
4057ec681f3Smrg                /* Used to make sure AFBC headers don't point outside the AFBC
4067ec681f3Smrg                 * body. HW is using the AFBC surface stride to do this check,
4077ec681f3Smrg                 * which doesn't work for 3D textures because the surface
4087ec681f3Smrg                 * stride does not cover the body. Only supported on v7+.
4097ec681f3Smrg                 */
4107ec681f3Smrg#endif
4117ec681f3Smrg
4127ec681f3Smrg#if PAN_ARCH >= 7
4137ec681f3Smrg                if (dim != MALI_TEXTURE_DIMENSION_3D)
4147ec681f3Smrg                        flags |= MALI_AFBC_SURFACE_FLAG_CHECK_PAYLOAD_RANGE;
4157ec681f3Smrg#endif
4167ec681f3Smrg
4177ec681f3Smrg                return flags;
4187ec681f3Smrg        } else if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
4197ec681f3Smrg                if (desc->block.depth > 1) {
4207ec681f3Smrg                        return (panfrost_astc_dim_3d(desc->block.depth) << 4) |
4217ec681f3Smrg                               (panfrost_astc_dim_3d(desc->block.height) << 2) |
4227ec681f3Smrg                                panfrost_astc_dim_3d(desc->block.width);
4237ec681f3Smrg                } else {
4247ec681f3Smrg                        return (panfrost_astc_dim_2d(desc->block.height) << 3) |
4257ec681f3Smrg                                panfrost_astc_dim_2d(desc->block.width);
4267ec681f3Smrg                }
4277ec681f3Smrg        } else {
4287ec681f3Smrg                return 0;
4297ec681f3Smrg        }
4307ec681f3Smrg}
4317ec681f3Smrg#endif
4327ec681f3Smrg
4337ec681f3Smrg/* Cubemaps have 6 faces as "layers" in between each actual layer. We
4347ec681f3Smrg * need to fix this up. TODO: logic wrong in the asserted out cases ...
4357ec681f3Smrg * can they happen, perhaps from cubemap arrays? */
4367ec681f3Smrg
4377ec681f3Smrgstatic void
4387ec681f3Smrgpanfrost_adjust_cube_dimensions(
4397ec681f3Smrg                unsigned *first_face, unsigned *last_face,
4407ec681f3Smrg                unsigned *first_layer, unsigned *last_layer)
4417ec681f3Smrg{
4427ec681f3Smrg        *first_face = *first_layer % 6;
4437ec681f3Smrg        *last_face = *last_layer % 6;
4447ec681f3Smrg        *first_layer /= 6;
4457ec681f3Smrg        *last_layer /= 6;
4467ec681f3Smrg
4477ec681f3Smrg        assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5));
4487ec681f3Smrg}
4497ec681f3Smrg
4507ec681f3Smrg/* Following the texture descriptor is a number of pointers. How many? */
4517ec681f3Smrg
4527ec681f3Smrgstatic unsigned
4537ec681f3Smrgpanfrost_texture_num_elements(
4547ec681f3Smrg                unsigned first_level, unsigned last_level,
4557ec681f3Smrg                unsigned first_layer, unsigned last_layer,
4567ec681f3Smrg                unsigned nr_samples,
4577ec681f3Smrg                bool is_cube, bool manual_stride)
4587ec681f3Smrg{
4597ec681f3Smrg        unsigned first_face  = 0, last_face = 0;
4607ec681f3Smrg
4617ec681f3Smrg        if (is_cube) {
4627ec681f3Smrg                panfrost_adjust_cube_dimensions(&first_face, &last_face,
4637ec681f3Smrg                                &first_layer, &last_layer);
4647ec681f3Smrg        }
4657ec681f3Smrg
4667ec681f3Smrg        unsigned levels = 1 + last_level - first_level;
4677ec681f3Smrg        unsigned layers = 1 + last_layer - first_layer;
4687ec681f3Smrg        unsigned faces  = 1 + last_face  - first_face;
4697ec681f3Smrg        unsigned num_elements = levels * layers * faces * MAX2(nr_samples, 1);
4707ec681f3Smrg
4717ec681f3Smrg        if (manual_stride)
4727ec681f3Smrg                num_elements *= 2;
4737ec681f3Smrg
4747ec681f3Smrg        return num_elements;
4757ec681f3Smrg}
4767ec681f3Smrg
4777ec681f3Smrg/* Conservative estimate of the size of the texture payload a priori.
4787ec681f3Smrg * Average case, size equal to the actual size. Worst case, off by 2x (if
4797ec681f3Smrg * a manual stride is not needed on a linear texture). Returned value
4807ec681f3Smrg * must be greater than or equal to the actual size, so it's safe to use
4817ec681f3Smrg * as an allocation amount */
4827ec681f3Smrg
4837ec681f3Smrgunsigned
4847ec681f3SmrgGENX(panfrost_estimate_texture_payload_size)(const struct pan_image_view *iview)
4857ec681f3Smrg{
4867ec681f3Smrg        /* Assume worst case */
4877ec681f3Smrg        unsigned manual_stride = PAN_ARCH >= 6 ||
4887ec681f3Smrg                                 (iview->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
4897ec681f3Smrg
4907ec681f3Smrg        unsigned elements =
4917ec681f3Smrg                panfrost_texture_num_elements(iview->first_level, iview->last_level,
4927ec681f3Smrg                                              iview->first_layer, iview->last_layer,
4937ec681f3Smrg                                              iview->image->layout.nr_samples,
4947ec681f3Smrg                                              iview->dim == MALI_TEXTURE_DIMENSION_CUBE,
4957ec681f3Smrg                                              manual_stride);
4967ec681f3Smrg
4977ec681f3Smrg        return sizeof(mali_ptr) * elements;
4987ec681f3Smrg}
4997ec681f3Smrg
5007ec681f3Smrgstruct panfrost_surface_iter {
5017ec681f3Smrg        unsigned layer, last_layer;
5027ec681f3Smrg        unsigned level, first_level, last_level;
5037ec681f3Smrg        unsigned face, first_face, last_face;
5047ec681f3Smrg        unsigned sample, first_sample, last_sample;
5057ec681f3Smrg};
5067ec681f3Smrg
5077ec681f3Smrgstatic void
5087ec681f3Smrgpanfrost_surface_iter_begin(struct panfrost_surface_iter *iter,
5097ec681f3Smrg                            unsigned first_layer, unsigned last_layer,
5107ec681f3Smrg                            unsigned first_level, unsigned last_level,
5117ec681f3Smrg                            unsigned first_face, unsigned last_face,
5127ec681f3Smrg                            unsigned nr_samples)
5137ec681f3Smrg{
5147ec681f3Smrg        iter->layer = first_layer;
5157ec681f3Smrg        iter->last_layer = last_layer;
5167ec681f3Smrg        iter->level = iter->first_level = first_level;
5177ec681f3Smrg        iter->last_level = last_level;
5187ec681f3Smrg        iter->face = iter->first_face = first_face;
5197ec681f3Smrg        iter->last_face = last_face;
5207ec681f3Smrg        iter->sample = iter->first_sample = 0;
5217ec681f3Smrg        iter->last_sample = nr_samples - 1;
5227ec681f3Smrg}
5237ec681f3Smrg
5247ec681f3Smrgstatic bool
5257ec681f3Smrgpanfrost_surface_iter_end(const struct panfrost_surface_iter *iter)
5267ec681f3Smrg{
5277ec681f3Smrg        return iter->layer > iter->last_layer;
5287ec681f3Smrg}
5297ec681f3Smrg
5307ec681f3Smrgstatic void
5317ec681f3Smrgpanfrost_surface_iter_next(struct panfrost_surface_iter *iter)
5327ec681f3Smrg{
5337ec681f3Smrg#define INC_TEST(field) \
5347ec681f3Smrg        do { \
5357ec681f3Smrg                if (iter->field++ < iter->last_ ## field) \
5367ec681f3Smrg                       return; \
5377ec681f3Smrg                iter->field = iter->first_ ## field; \
5387ec681f3Smrg        } while (0)
5397ec681f3Smrg
5407ec681f3Smrg        /* Ordering is different on v7: inner loop is iterating on levels */
5417ec681f3Smrg        if (PAN_ARCH >= 7)
5427ec681f3Smrg                INC_TEST(level);
5437ec681f3Smrg
5447ec681f3Smrg        INC_TEST(sample);
5457ec681f3Smrg        INC_TEST(face);
5467ec681f3Smrg
5477ec681f3Smrg        if (PAN_ARCH < 7)
5487ec681f3Smrg                INC_TEST(level);
5497ec681f3Smrg
5507ec681f3Smrg        iter->layer++;
5517ec681f3Smrg
5527ec681f3Smrg#undef INC_TEST
5537ec681f3Smrg}
5547ec681f3Smrg
5557ec681f3Smrgstatic void
5567ec681f3Smrgpanfrost_get_surface_strides(const struct pan_image_layout *layout,
5577ec681f3Smrg                             unsigned l,
5587ec681f3Smrg                             int32_t *row_stride, int32_t *surf_stride)
5597ec681f3Smrg{
5607ec681f3Smrg        const struct pan_image_slice_layout *slice = &layout->slices[l];
5617ec681f3Smrg
5627ec681f3Smrg        if (drm_is_afbc(layout->modifier)) {
5637ec681f3Smrg                /* Pre v7 don't have a row stride field. This field is
5647ec681f3Smrg                 * repurposed as a Y offset which we don't use */
5657ec681f3Smrg                *row_stride = PAN_ARCH < 7 ? 0 : slice->afbc.row_stride;
5667ec681f3Smrg                *surf_stride = slice->afbc.surface_stride;
5677ec681f3Smrg        } else {
5687ec681f3Smrg                *row_stride = slice->row_stride;
5697ec681f3Smrg                *surf_stride = slice->surface_stride;
5707ec681f3Smrg        }
5717ec681f3Smrg}
5727ec681f3Smrg
5737ec681f3Smrgstatic mali_ptr
5747ec681f3Smrgpanfrost_get_surface_pointer(const struct pan_image_layout *layout,
5757ec681f3Smrg                             enum mali_texture_dimension dim,
5767ec681f3Smrg                             mali_ptr base,
5777ec681f3Smrg                             unsigned l, unsigned w, unsigned f, unsigned s)
5787ec681f3Smrg{
5797ec681f3Smrg        unsigned face_mult = dim == MALI_TEXTURE_DIMENSION_CUBE ? 6 : 1;
5807ec681f3Smrg        unsigned offset;
5817ec681f3Smrg
5827ec681f3Smrg        if (layout->dim == MALI_TEXTURE_DIMENSION_3D) {
5837ec681f3Smrg                assert(!f && !s);
5847ec681f3Smrg                offset = layout->slices[l].offset +
5857ec681f3Smrg                         (w * panfrost_get_layer_stride(layout, l));
5867ec681f3Smrg        } else {
5877ec681f3Smrg                offset = panfrost_texture_offset(layout, l, (w * face_mult) + f, s);
5887ec681f3Smrg        }
5897ec681f3Smrg
5907ec681f3Smrg        return base + offset;
5917ec681f3Smrg}
5927ec681f3Smrg
5937ec681f3Smrgstatic void
5947ec681f3Smrgpanfrost_emit_texture_payload(const struct pan_image_view *iview,
5957ec681f3Smrg                              enum pipe_format format,
5967ec681f3Smrg                              bool manual_stride,
5977ec681f3Smrg                              void *payload)
5987ec681f3Smrg{
5997ec681f3Smrg        const struct pan_image_layout *layout = &iview->image->layout;
6007ec681f3Smrg        ASSERTED const struct util_format_description *desc =
6017ec681f3Smrg                util_format_description(format);
6027ec681f3Smrg
6037ec681f3Smrg        mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset;
6047ec681f3Smrg
6057ec681f3Smrg        if (iview->buf.size) {
6067ec681f3Smrg                assert (iview->dim == MALI_TEXTURE_DIMENSION_1D);
6077ec681f3Smrg                base += iview->buf.offset;
6087ec681f3Smrg        }
6097ec681f3Smrg
6107ec681f3Smrg#if PAN_ARCH >= 5
6117ec681f3Smrg        /* panfrost_compression_tag() wants the dimension of the resource, not the
6127ec681f3Smrg         * one of the image view (those might differ).
6137ec681f3Smrg         */
6147ec681f3Smrg        base |= panfrost_compression_tag(desc, layout->dim, layout->modifier);
6157ec681f3Smrg#else
6167ec681f3Smrg        assert(!drm_is_afbc(layout->modifier) && "no AFBC on v4");
6177ec681f3Smrg        assert(desc->layout != UTIL_FORMAT_LAYOUT_ASTC && "no ASTC on v4");
6187ec681f3Smrg#endif
6197ec681f3Smrg
6207ec681f3Smrg        /* Inject the addresses in, interleaving array indices, mip levels,
6217ec681f3Smrg         * cube faces, and strides in that order */
6227ec681f3Smrg
6237ec681f3Smrg        unsigned first_layer = iview->first_layer, last_layer = iview->last_layer;
6247ec681f3Smrg        unsigned nr_samples = layout->nr_samples;
6257ec681f3Smrg        unsigned first_face = 0, last_face = 0;
6267ec681f3Smrg
6277ec681f3Smrg        if (iview->dim == MALI_TEXTURE_DIMENSION_CUBE) {
6287ec681f3Smrg                panfrost_adjust_cube_dimensions(&first_face, &last_face,
6297ec681f3Smrg                                                &first_layer, &last_layer);
6307ec681f3Smrg        }
6317ec681f3Smrg
6327ec681f3Smrg        struct panfrost_surface_iter iter;
6337ec681f3Smrg
6347ec681f3Smrg        for (panfrost_surface_iter_begin(&iter, first_layer, last_layer,
6357ec681f3Smrg                                         iview->first_level, iview->last_level,
6367ec681f3Smrg                                         first_face, last_face, nr_samples);
6377ec681f3Smrg             !panfrost_surface_iter_end(&iter);
6387ec681f3Smrg             panfrost_surface_iter_next(&iter)) {
6397ec681f3Smrg                mali_ptr pointer =
6407ec681f3Smrg                        panfrost_get_surface_pointer(layout, iview->dim, base,
6417ec681f3Smrg                                                     iter.level, iter.layer,
6427ec681f3Smrg                                                     iter.face, iter.sample);
6437ec681f3Smrg
6447ec681f3Smrg                if (!manual_stride) {
6457ec681f3Smrg                        pan_pack(payload, SURFACE, cfg) {
6467ec681f3Smrg                                cfg.pointer = pointer;
6477ec681f3Smrg                        }
6487ec681f3Smrg                        payload += pan_size(SURFACE);
6497ec681f3Smrg                } else {
6507ec681f3Smrg                        pan_pack(payload, SURFACE_WITH_STRIDE, cfg) {
6517ec681f3Smrg                                cfg.pointer = pointer;
6527ec681f3Smrg                                panfrost_get_surface_strides(layout, iter.level,
6537ec681f3Smrg                                                             &cfg.row_stride,
6547ec681f3Smrg                                                             &cfg.surface_stride);
6557ec681f3Smrg                        }
6567ec681f3Smrg                        payload += pan_size(SURFACE_WITH_STRIDE);
6577ec681f3Smrg                }
6587ec681f3Smrg        }
6597ec681f3Smrg}
6607ec681f3Smrg
6617ec681f3Smrg/* Check if we need to set a custom stride by computing the "expected"
6627ec681f3Smrg * stride and comparing it to what the user actually wants. Only applies
6637ec681f3Smrg * to linear textures, since tiled/compressed textures have strict
6647ec681f3Smrg * alignment requirements for their strides as it is */
6657ec681f3Smrg
6667ec681f3Smrgstatic bool
6677ec681f3Smrgpanfrost_needs_explicit_stride(const struct pan_image_view *iview)
6687ec681f3Smrg{
6697ec681f3Smrg        /* Stride is explicit on Bifrost */
6707ec681f3Smrg        if (PAN_ARCH >= 6)
6717ec681f3Smrg                return true;
6727ec681f3Smrg
6737ec681f3Smrg        if (iview->image->layout.modifier != DRM_FORMAT_MOD_LINEAR)
6747ec681f3Smrg                return false;
6757ec681f3Smrg
6767ec681f3Smrg        unsigned bytes_per_block = util_format_get_blocksize(iview->format);
6777ec681f3Smrg        unsigned block_w = util_format_get_blockwidth(iview->format);
6787ec681f3Smrg
6797ec681f3Smrg        for (unsigned l = iview->first_level; l <= iview->last_level; ++l) {
6807ec681f3Smrg                unsigned actual = iview->image->layout.slices[l].line_stride;
6817ec681f3Smrg                unsigned expected =
6827ec681f3Smrg                        DIV_ROUND_UP(u_minify(iview->image->layout.width, l), block_w) *
6837ec681f3Smrg                        bytes_per_block;
6847ec681f3Smrg
6857ec681f3Smrg                if (actual != expected)
6867ec681f3Smrg                        return true;
6877ec681f3Smrg        }
6887ec681f3Smrg
6897ec681f3Smrg        return false;
6907ec681f3Smrg}
6917ec681f3Smrg
6927ec681f3Smrg/* Map modifiers to mali_texture_layout for packing in a texture descriptor */
6937ec681f3Smrg
6947ec681f3Smrgstatic enum mali_texture_layout
6957ec681f3Smrgpanfrost_modifier_to_layout(uint64_t modifier)
6967ec681f3Smrg{
6977ec681f3Smrg        if (drm_is_afbc(modifier))
6987ec681f3Smrg                return MALI_TEXTURE_LAYOUT_AFBC;
6997ec681f3Smrg        else if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
7007ec681f3Smrg                return MALI_TEXTURE_LAYOUT_TILED;
7017ec681f3Smrg        else if (modifier == DRM_FORMAT_MOD_LINEAR)
7027ec681f3Smrg                return MALI_TEXTURE_LAYOUT_LINEAR;
7037ec681f3Smrg        else
7047ec681f3Smrg                unreachable("Invalid modifer");
7057ec681f3Smrg}
7067ec681f3Smrg
7077ec681f3Smrgvoid
7087ec681f3SmrgGENX(panfrost_new_texture)(const struct panfrost_device *dev,
7097ec681f3Smrg                           const struct pan_image_view *iview,
7107ec681f3Smrg                           void *out, const struct panfrost_ptr *payload)
7117ec681f3Smrg{
7127ec681f3Smrg        const struct pan_image_layout *layout = &iview->image->layout;
7137ec681f3Smrg        enum pipe_format format = iview->format;
7147ec681f3Smrg        unsigned swizzle;
7157ec681f3Smrg
7167ec681f3Smrg        if (PAN_ARCH == 7 && util_format_is_depth_or_stencil(format)) {
7177ec681f3Smrg                /* v7 doesn't have an _RRRR component order, combine the
7187ec681f3Smrg                 * user swizzle with a .XXXX swizzle to emulate that.
7197ec681f3Smrg                 */
7207ec681f3Smrg                static const unsigned char replicate_x[4] = {
7217ec681f3Smrg                        PIPE_SWIZZLE_X, PIPE_SWIZZLE_X,
7227ec681f3Smrg                        PIPE_SWIZZLE_X, PIPE_SWIZZLE_X,
7237ec681f3Smrg                };
7247ec681f3Smrg                unsigned char patched_swizzle[4];
7257ec681f3Smrg
7267ec681f3Smrg                util_format_compose_swizzles(replicate_x,
7277ec681f3Smrg                                             iview->swizzle,
7287ec681f3Smrg                                             patched_swizzle);
7297ec681f3Smrg                swizzle = panfrost_translate_swizzle_4(patched_swizzle);
7307ec681f3Smrg        } else {
7317ec681f3Smrg                swizzle = panfrost_translate_swizzle_4(iview->swizzle);
7327ec681f3Smrg        }
7337ec681f3Smrg
7347ec681f3Smrg        bool manual_stride =
7357ec681f3Smrg                panfrost_needs_explicit_stride(iview);
7367ec681f3Smrg
7377ec681f3Smrg        panfrost_emit_texture_payload(iview, format,
7387ec681f3Smrg                                      manual_stride,
7397ec681f3Smrg                                      payload->cpu);
7407ec681f3Smrg
7417ec681f3Smrg        unsigned array_size = iview->last_layer - iview->first_layer + 1;
7427ec681f3Smrg
7437ec681f3Smrg        if (iview->dim == MALI_TEXTURE_DIMENSION_CUBE) {
7447ec681f3Smrg                assert(iview->first_layer % 6 == 0);
7457ec681f3Smrg                assert(iview->last_layer % 6 == 5);
7467ec681f3Smrg                array_size /=  6;
7477ec681f3Smrg        }
7487ec681f3Smrg
7497ec681f3Smrg        unsigned width;
7507ec681f3Smrg
7517ec681f3Smrg        if (iview->buf.size) {
7527ec681f3Smrg                assert(iview->dim == MALI_TEXTURE_DIMENSION_1D);
7537ec681f3Smrg                assert(!iview->first_level && !iview->last_level);
7547ec681f3Smrg                assert(!iview->first_layer && !iview->last_layer);
7557ec681f3Smrg                assert(layout->nr_samples == 1);
7567ec681f3Smrg                assert(layout->height == 1 && layout->depth == 1);
7577ec681f3Smrg                assert(iview->buf.offset + iview->buf.size <= layout->width);
7587ec681f3Smrg                width = iview->buf.size;
7597ec681f3Smrg        } else {
7607ec681f3Smrg                width = u_minify(layout->width, iview->first_level);
7617ec681f3Smrg        }
7627ec681f3Smrg
7637ec681f3Smrg        pan_pack(out, TEXTURE, cfg) {
7647ec681f3Smrg                cfg.dimension = iview->dim;
7657ec681f3Smrg                cfg.format = dev->formats[format].hw;
7667ec681f3Smrg                cfg.width = width;
7677ec681f3Smrg                cfg.height = u_minify(layout->height, iview->first_level);
7687ec681f3Smrg                if (iview->dim == MALI_TEXTURE_DIMENSION_3D)
7697ec681f3Smrg                        cfg.depth = u_minify(layout->depth, iview->first_level);
7707ec681f3Smrg                else
7717ec681f3Smrg                        cfg.sample_count = layout->nr_samples;
7727ec681f3Smrg                cfg.swizzle = swizzle;
7737ec681f3Smrg                cfg.texel_ordering =
7747ec681f3Smrg                        panfrost_modifier_to_layout(layout->modifier);
7757ec681f3Smrg                cfg.levels = iview->last_level - iview->first_level + 1;
7767ec681f3Smrg                cfg.array_size = array_size;
7777ec681f3Smrg
7787ec681f3Smrg#if PAN_ARCH >= 6
7797ec681f3Smrg                cfg.surfaces = payload->gpu;
7807ec681f3Smrg
7817ec681f3Smrg                /* We specify API-level LOD clamps in the sampler descriptor
7827ec681f3Smrg                 * and use these clamps simply for bounds checking */
7837ec681f3Smrg                cfg.minimum_lod = FIXED_16(0, false);
7847ec681f3Smrg                cfg.maximum_lod = FIXED_16(cfg.levels - 1, false);
7857ec681f3Smrg#else
7867ec681f3Smrg                cfg.manual_stride = manual_stride;
7877ec681f3Smrg#endif
7887ec681f3Smrg        }
7897ec681f3Smrg}
7907ec681f3Smrg#endif /* ifdef PAN_ARCH */
791