17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2008 VMware, Inc. 37ec681f3Smrg * Copyright (C) 2014 Broadcom 47ec681f3Smrg * Copyright (C) 2018-2019 Alyssa Rosenzweig 57ec681f3Smrg * Copyright (C) 2019-2020 Collabora, Ltd. 67ec681f3Smrg * 77ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 87ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 97ec681f3Smrg * to deal in the Software without restriction, including without limitation 107ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 117ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 127ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 137ec681f3Smrg * 147ec681f3Smrg * The above copyright notice and this permission notice (including the next 157ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 167ec681f3Smrg * Software. 177ec681f3Smrg * 187ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 197ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 207ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 217ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 227ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 237ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 247ec681f3Smrg * SOFTWARE. 257ec681f3Smrg * 267ec681f3Smrg */ 277ec681f3Smrg 287ec681f3Smrg#include "util/macros.h" 297ec681f3Smrg#include "util/u_math.h" 307ec681f3Smrg#include "pan_texture.h" 317ec681f3Smrg#include "panfrost-quirks.h" 327ec681f3Smrg 337ec681f3Smrg#ifndef PAN_ARCH 347ec681f3Smrg 357ec681f3Smrg/* Generates a texture descriptor. Ideally, descriptors are immutable after the 367ec681f3Smrg * texture is created, so we can keep these hanging around in GPU memory in a 377ec681f3Smrg * dedicated BO and not have to worry. In practice there are some minor gotchas 387ec681f3Smrg * with this (the driver sometimes will change the format of a texture on the 397ec681f3Smrg * fly for compression) but it's fast enough to just regenerate the descriptor 407ec681f3Smrg * in those cases, rather than monkeypatching at drawtime. A texture descriptor 417ec681f3Smrg * consists of a 32-byte header followed by pointers. 427ec681f3Smrg */ 437ec681f3Smrg 447ec681f3Smrg/* List of supported modifiers, in descending order of preference. AFBC is 457ec681f3Smrg * faster than u-interleaved tiling which is faster than linear. Within AFBC, 467ec681f3Smrg * enabling the YUV-like transform is typically a win where possible. */ 477ec681f3Smrg 487ec681f3Smrguint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = { 497ec681f3Smrg DRM_FORMAT_MOD_ARM_AFBC( 507ec681f3Smrg AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | 517ec681f3Smrg AFBC_FORMAT_MOD_SPARSE | 527ec681f3Smrg AFBC_FORMAT_MOD_YTR), 537ec681f3Smrg 547ec681f3Smrg DRM_FORMAT_MOD_ARM_AFBC( 557ec681f3Smrg AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | 567ec681f3Smrg AFBC_FORMAT_MOD_SPARSE), 577ec681f3Smrg 587ec681f3Smrg DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, 597ec681f3Smrg DRM_FORMAT_MOD_LINEAR 607ec681f3Smrg}; 617ec681f3Smrg 627ec681f3Smrg/* If not explicitly, line stride is calculated for block-based formats as 637ec681f3Smrg * (ceil(width / block_width) * block_size). As a special case, this is left 647ec681f3Smrg * zero if there is only a single block vertically. So, we have a helper to 657ec681f3Smrg * extract the dimensions of a block-based format and use that to calculate the 667ec681f3Smrg * line stride as such. 677ec681f3Smrg */ 687ec681f3Smrg 697ec681f3Smrgunsigned 707ec681f3Smrgpanfrost_block_dim(uint64_t modifier, bool width, unsigned plane) 717ec681f3Smrg{ 727ec681f3Smrg if (!drm_is_afbc(modifier)) { 737ec681f3Smrg assert(modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED); 747ec681f3Smrg return 16; 757ec681f3Smrg } 767ec681f3Smrg 777ec681f3Smrg switch (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) { 787ec681f3Smrg case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16: 797ec681f3Smrg return 16; 807ec681f3Smrg case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8: 817ec681f3Smrg return width ? 32 : 8; 827ec681f3Smrg case AFBC_FORMAT_MOD_BLOCK_SIZE_64x4: 837ec681f3Smrg return width ? 64 : 4; 847ec681f3Smrg case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4: 857ec681f3Smrg return plane ? (width ? 64 : 4) : (width ? 32 : 8); 867ec681f3Smrg default: 877ec681f3Smrg unreachable("Invalid AFBC block size"); 887ec681f3Smrg } 897ec681f3Smrg} 907ec681f3Smrg 917ec681f3Smrg/* Computes sizes for checksumming, which is 8 bytes per 16x16 tile. 927ec681f3Smrg * Checksumming is believed to be a CRC variant (CRC64 based on the size?). 937ec681f3Smrg * This feature is also known as "transaction elimination". */ 947ec681f3Smrg 957ec681f3Smrg#define CHECKSUM_TILE_WIDTH 16 967ec681f3Smrg#define CHECKSUM_TILE_HEIGHT 16 977ec681f3Smrg#define CHECKSUM_BYTES_PER_TILE 8 987ec681f3Smrg 997ec681f3Smrgunsigned 1007ec681f3Smrgpanfrost_compute_checksum_size( 1017ec681f3Smrg struct pan_image_slice_layout *slice, 1027ec681f3Smrg unsigned width, 1037ec681f3Smrg unsigned height) 1047ec681f3Smrg{ 1057ec681f3Smrg unsigned tile_count_x = DIV_ROUND_UP(width, CHECKSUM_TILE_WIDTH); 1067ec681f3Smrg unsigned tile_count_y = DIV_ROUND_UP(height, CHECKSUM_TILE_HEIGHT); 1077ec681f3Smrg 1087ec681f3Smrg slice->crc.stride = tile_count_x * CHECKSUM_BYTES_PER_TILE; 1097ec681f3Smrg 1107ec681f3Smrg return slice->crc.stride * tile_count_y; 1117ec681f3Smrg} 1127ec681f3Smrg 1137ec681f3Smrgunsigned 1147ec681f3Smrgpanfrost_get_layer_stride(const struct pan_image_layout *layout, 1157ec681f3Smrg unsigned level) 1167ec681f3Smrg{ 1177ec681f3Smrg if (layout->dim != MALI_TEXTURE_DIMENSION_3D) 1187ec681f3Smrg return layout->array_stride; 1197ec681f3Smrg else if (drm_is_afbc(layout->modifier)) 1207ec681f3Smrg return layout->slices[level].afbc.surface_stride; 1217ec681f3Smrg else 1227ec681f3Smrg return layout->slices[level].surface_stride; 1237ec681f3Smrg} 1247ec681f3Smrg 1257ec681f3Smrg/* Computes the offset into a texture at a particular level/face. Add to 1267ec681f3Smrg * the base address of a texture to get the address to that level/face */ 1277ec681f3Smrg 1287ec681f3Smrgunsigned 1297ec681f3Smrgpanfrost_texture_offset(const struct pan_image_layout *layout, 1307ec681f3Smrg unsigned level, unsigned array_idx, 1317ec681f3Smrg unsigned surface_idx) 1327ec681f3Smrg{ 1337ec681f3Smrg return layout->slices[level].offset + 1347ec681f3Smrg (array_idx * layout->array_stride) + 1357ec681f3Smrg (surface_idx * layout->slices[level].surface_stride); 1367ec681f3Smrg} 1377ec681f3Smrg 1387ec681f3Smrgbool 1397ec681f3Smrgpan_image_layout_init(const struct panfrost_device *dev, 1407ec681f3Smrg struct pan_image_layout *layout, 1417ec681f3Smrg uint64_t modifier, 1427ec681f3Smrg enum pipe_format format, 1437ec681f3Smrg enum mali_texture_dimension dim, 1447ec681f3Smrg unsigned width, unsigned height, unsigned depth, 1457ec681f3Smrg unsigned array_size, unsigned nr_samples, 1467ec681f3Smrg unsigned nr_slices, enum pan_image_crc_mode crc_mode, 1477ec681f3Smrg const struct pan_image_explicit_layout *explicit_layout) 1487ec681f3Smrg{ 1497ec681f3Smrg /* Explicit stride only work with non-mipmap, non-array; single-sample 1507ec681f3Smrg * 2D image, and in-band CRC can't be used. 1517ec681f3Smrg */ 1527ec681f3Smrg if (explicit_layout && 1537ec681f3Smrg (depth > 1 || nr_samples > 1 || array_size > 1 || 1547ec681f3Smrg dim != MALI_TEXTURE_DIMENSION_2D || nr_slices > 1 || 1557ec681f3Smrg crc_mode == PAN_IMAGE_CRC_INBAND)) 1567ec681f3Smrg return false; 1577ec681f3Smrg 1587ec681f3Smrg /* Mandate 64 byte alignement */ 1597ec681f3Smrg if (explicit_layout && (explicit_layout->offset & 63)) 1607ec681f3Smrg return false; 1617ec681f3Smrg 1627ec681f3Smrg layout->crc_mode = crc_mode; 1637ec681f3Smrg layout->modifier = modifier; 1647ec681f3Smrg layout->format = format; 1657ec681f3Smrg layout->dim = dim; 1667ec681f3Smrg layout->width = width; 1677ec681f3Smrg layout->height = height; 1687ec681f3Smrg layout->depth = depth; 1697ec681f3Smrg layout->array_size = array_size; 1707ec681f3Smrg layout->nr_samples = nr_samples; 1717ec681f3Smrg layout->nr_slices = nr_slices; 1727ec681f3Smrg 1737ec681f3Smrg unsigned bytes_per_pixel = util_format_get_blocksize(format); 1747ec681f3Smrg 1757ec681f3Smrg /* MSAA is implemented as a 3D texture with z corresponding to the 1767ec681f3Smrg * sample #, horrifyingly enough */ 1777ec681f3Smrg 1787ec681f3Smrg assert(depth == 1 || nr_samples == 1); 1797ec681f3Smrg 1807ec681f3Smrg bool afbc = drm_is_afbc(layout->modifier); 1817ec681f3Smrg bool tiled = layout->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED; 1827ec681f3Smrg bool linear = layout->modifier == DRM_FORMAT_MOD_LINEAR; 1837ec681f3Smrg bool should_align = tiled || afbc; 1847ec681f3Smrg bool is_3d = layout->dim == MALI_TEXTURE_DIMENSION_3D; 1857ec681f3Smrg 1867ec681f3Smrg unsigned oob_crc_offset = 0; 1877ec681f3Smrg unsigned offset = explicit_layout ? explicit_layout->offset : 0; 1887ec681f3Smrg unsigned tile_h = 1, tile_w = 1, tile_shift = 0; 1897ec681f3Smrg 1907ec681f3Smrg if (tiled || afbc) { 1917ec681f3Smrg tile_w = panfrost_block_dim(layout->modifier, true, 0); 1927ec681f3Smrg tile_h = panfrost_block_dim(layout->modifier, false, 0); 1937ec681f3Smrg if (util_format_is_compressed(format)) 1947ec681f3Smrg tile_shift = 2; 1957ec681f3Smrg } 1967ec681f3Smrg 1977ec681f3Smrg for (unsigned l = 0; l < nr_slices; ++l) { 1987ec681f3Smrg struct pan_image_slice_layout *slice = &layout->slices[l]; 1997ec681f3Smrg 2007ec681f3Smrg unsigned effective_width = width; 2017ec681f3Smrg unsigned effective_height = height; 2027ec681f3Smrg unsigned effective_depth = depth; 2037ec681f3Smrg 2047ec681f3Smrg if (should_align) { 2057ec681f3Smrg effective_width = ALIGN_POT(effective_width, tile_w) >> tile_shift; 2067ec681f3Smrg effective_height = ALIGN_POT(effective_height, tile_h); 2077ec681f3Smrg 2087ec681f3Smrg /* We don't need to align depth */ 2097ec681f3Smrg } 2107ec681f3Smrg 2117ec681f3Smrg /* Align levels to cache-line as a performance improvement for 2127ec681f3Smrg * linear/tiled and as a requirement for AFBC */ 2137ec681f3Smrg 2147ec681f3Smrg offset = ALIGN_POT(offset, 64); 2157ec681f3Smrg 2167ec681f3Smrg slice->offset = offset; 2177ec681f3Smrg 2187ec681f3Smrg /* Compute the would-be stride */ 2197ec681f3Smrg unsigned stride = bytes_per_pixel * effective_width; 2207ec681f3Smrg 2217ec681f3Smrg if (explicit_layout) { 2227ec681f3Smrg /* Make sure the explicit stride is valid */ 2237ec681f3Smrg if (explicit_layout->line_stride < stride) 2247ec681f3Smrg return false; 2257ec681f3Smrg 2267ec681f3Smrg stride = explicit_layout->line_stride; 2277ec681f3Smrg } else if (linear) { 2287ec681f3Smrg /* Keep lines alignment on 64 byte for performance */ 2297ec681f3Smrg stride = ALIGN_POT(stride, 64); 2307ec681f3Smrg } 2317ec681f3Smrg 2327ec681f3Smrg slice->line_stride = stride; 2337ec681f3Smrg slice->row_stride = stride * (tile_h >> tile_shift); 2347ec681f3Smrg 2357ec681f3Smrg unsigned slice_one_size = slice->line_stride * effective_height; 2367ec681f3Smrg 2377ec681f3Smrg /* Compute AFBC sizes if necessary */ 2387ec681f3Smrg if (afbc) { 2397ec681f3Smrg slice->afbc.header_size = 2407ec681f3Smrg panfrost_afbc_header_size(width, height); 2417ec681f3Smrg 2427ec681f3Smrg /* Stride between two rows of AFBC headers */ 2437ec681f3Smrg slice->afbc.row_stride = 2447ec681f3Smrg (effective_width / tile_w) * 2457ec681f3Smrg AFBC_HEADER_BYTES_PER_TILE; 2467ec681f3Smrg 2477ec681f3Smrg /* AFBC body size */ 2487ec681f3Smrg slice->afbc.body_size = slice_one_size; 2497ec681f3Smrg 2507ec681f3Smrg /* 3D AFBC resources have all headers placed at the 2517ec681f3Smrg * beginning instead of having them split per depth 2527ec681f3Smrg * level 2537ec681f3Smrg */ 2547ec681f3Smrg if (is_3d) { 2557ec681f3Smrg slice->afbc.surface_stride = 2567ec681f3Smrg slice->afbc.header_size; 2577ec681f3Smrg slice->afbc.header_size *= effective_depth; 2587ec681f3Smrg slice->afbc.body_size *= effective_depth; 2597ec681f3Smrg offset += slice->afbc.header_size; 2607ec681f3Smrg } else { 2617ec681f3Smrg slice_one_size += slice->afbc.header_size; 2627ec681f3Smrg slice->afbc.surface_stride = slice_one_size; 2637ec681f3Smrg } 2647ec681f3Smrg } 2657ec681f3Smrg 2667ec681f3Smrg unsigned slice_full_size = 2677ec681f3Smrg slice_one_size * effective_depth * nr_samples; 2687ec681f3Smrg 2697ec681f3Smrg slice->surface_stride = slice_one_size; 2707ec681f3Smrg 2717ec681f3Smrg /* Compute AFBC sizes if necessary */ 2727ec681f3Smrg 2737ec681f3Smrg offset += slice_full_size; 2747ec681f3Smrg slice->size = slice_full_size; 2757ec681f3Smrg 2767ec681f3Smrg /* Add a checksum region if necessary */ 2777ec681f3Smrg if (crc_mode != PAN_IMAGE_CRC_NONE) { 2787ec681f3Smrg slice->crc.size = 2797ec681f3Smrg panfrost_compute_checksum_size(slice, width, height); 2807ec681f3Smrg 2817ec681f3Smrg if (crc_mode == PAN_IMAGE_CRC_INBAND) { 2827ec681f3Smrg slice->crc.offset = offset; 2837ec681f3Smrg offset += slice->crc.size; 2847ec681f3Smrg slice->size += slice->crc.size; 2857ec681f3Smrg } else { 2867ec681f3Smrg slice->crc.offset = oob_crc_offset; 2877ec681f3Smrg oob_crc_offset += slice->crc.size; 2887ec681f3Smrg } 2897ec681f3Smrg } 2907ec681f3Smrg 2917ec681f3Smrg width = u_minify(width, 1); 2927ec681f3Smrg height = u_minify(height, 1); 2937ec681f3Smrg depth = u_minify(depth, 1); 2947ec681f3Smrg } 2957ec681f3Smrg 2967ec681f3Smrg /* Arrays and cubemaps have the entire miptree duplicated */ 2977ec681f3Smrg layout->array_stride = ALIGN_POT(offset, 64); 2987ec681f3Smrg if (explicit_layout) 2997ec681f3Smrg layout->data_size = offset; 3007ec681f3Smrg else 3017ec681f3Smrg layout->data_size = ALIGN_POT(layout->array_stride * array_size, 4096); 3027ec681f3Smrg layout->crc_size = oob_crc_offset; 3037ec681f3Smrg 3047ec681f3Smrg return true; 3057ec681f3Smrg} 3067ec681f3Smrg 3077ec681f3Smrgvoid 3087ec681f3Smrgpan_iview_get_surface(const struct pan_image_view *iview, 3097ec681f3Smrg unsigned level, unsigned layer, unsigned sample, 3107ec681f3Smrg struct pan_surface *surf) 3117ec681f3Smrg{ 3127ec681f3Smrg level += iview->first_level; 3137ec681f3Smrg assert(level < iview->image->layout.nr_slices); 3147ec681f3Smrg 3157ec681f3Smrg layer += iview->first_layer; 3167ec681f3Smrg 3177ec681f3Smrg bool is_3d = iview->image->layout.dim == MALI_TEXTURE_DIMENSION_3D; 3187ec681f3Smrg const struct pan_image_slice_layout *slice = &iview->image->layout.slices[level]; 3197ec681f3Smrg mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset; 3207ec681f3Smrg 3217ec681f3Smrg if (drm_is_afbc(iview->image->layout.modifier)) { 3227ec681f3Smrg assert(!sample); 3237ec681f3Smrg 3247ec681f3Smrg if (is_3d) { 3257ec681f3Smrg ASSERTED unsigned depth = u_minify(iview->image->layout.depth, level); 3267ec681f3Smrg assert(layer < depth); 3277ec681f3Smrg surf->afbc.header = base + slice->offset + 3287ec681f3Smrg (layer * slice->afbc.surface_stride); 3297ec681f3Smrg surf->afbc.body = base + slice->offset + 3307ec681f3Smrg slice->afbc.header_size + 3317ec681f3Smrg (slice->surface_stride * layer); 3327ec681f3Smrg } else { 3337ec681f3Smrg assert(layer < iview->image->layout.array_size); 3347ec681f3Smrg surf->afbc.header = base + 3357ec681f3Smrg panfrost_texture_offset(&iview->image->layout, 3367ec681f3Smrg level, layer, 0); 3377ec681f3Smrg surf->afbc.body = surf->afbc.header + slice->afbc.header_size; 3387ec681f3Smrg } 3397ec681f3Smrg } else { 3407ec681f3Smrg unsigned array_idx = is_3d ? 0 : layer; 3417ec681f3Smrg unsigned surface_idx = is_3d ? layer : sample; 3427ec681f3Smrg 3437ec681f3Smrg surf->data = base + 3447ec681f3Smrg panfrost_texture_offset(&iview->image->layout, level, 3457ec681f3Smrg array_idx, surface_idx); 3467ec681f3Smrg } 3477ec681f3Smrg} 3487ec681f3Smrg 3497ec681f3Smrg#else /* ifndef PAN_ARCH */ 3507ec681f3Smrg 3517ec681f3Smrg#if PAN_ARCH >= 5 3527ec681f3Smrg/* Arm Scalable Texture Compression (ASTC) corresponds to just a few formats. 3537ec681f3Smrg * The block dimension is not part of the format. Instead, it is encoded as a 3547ec681f3Smrg * 6-bit tag on the payload pointer. Map the block size for a single dimension. 3557ec681f3Smrg */ 3567ec681f3Smrg 3577ec681f3Smrgstatic inline enum mali_astc_2d_dimension 3587ec681f3Smrgpanfrost_astc_dim_2d(unsigned dim) 3597ec681f3Smrg{ 3607ec681f3Smrg switch (dim) { 3617ec681f3Smrg case 4: return MALI_ASTC_2D_DIMENSION_4; 3627ec681f3Smrg case 5: return MALI_ASTC_2D_DIMENSION_5; 3637ec681f3Smrg case 6: return MALI_ASTC_2D_DIMENSION_6; 3647ec681f3Smrg case 8: return MALI_ASTC_2D_DIMENSION_8; 3657ec681f3Smrg case 10: return MALI_ASTC_2D_DIMENSION_10; 3667ec681f3Smrg case 12: return MALI_ASTC_2D_DIMENSION_12; 3677ec681f3Smrg default: unreachable("Invalid ASTC dimension"); 3687ec681f3Smrg } 3697ec681f3Smrg} 3707ec681f3Smrg 3717ec681f3Smrgstatic inline enum mali_astc_3d_dimension 3727ec681f3Smrgpanfrost_astc_dim_3d(unsigned dim) 3737ec681f3Smrg{ 3747ec681f3Smrg switch (dim) { 3757ec681f3Smrg case 3: return MALI_ASTC_3D_DIMENSION_3; 3767ec681f3Smrg case 4: return MALI_ASTC_3D_DIMENSION_4; 3777ec681f3Smrg case 5: return MALI_ASTC_3D_DIMENSION_5; 3787ec681f3Smrg case 6: return MALI_ASTC_3D_DIMENSION_6; 3797ec681f3Smrg default: unreachable("Invalid ASTC dimension"); 3807ec681f3Smrg } 3817ec681f3Smrg} 3827ec681f3Smrg 3837ec681f3Smrg/* Texture addresses are tagged with information about compressed formats. 3847ec681f3Smrg * AFBC uses a bit for whether the colorspace transform is enabled (RGB and 3857ec681f3Smrg * RGBA only). 3867ec681f3Smrg * For ASTC, this is a "stretch factor" encoding the block size. */ 3877ec681f3Smrg 3887ec681f3Smrgstatic unsigned 3897ec681f3Smrgpanfrost_compression_tag(const struct util_format_description *desc, 3907ec681f3Smrg enum mali_texture_dimension dim, 3917ec681f3Smrg uint64_t modifier) 3927ec681f3Smrg{ 3937ec681f3Smrg if (drm_is_afbc(modifier)) { 3947ec681f3Smrg unsigned flags = (modifier & AFBC_FORMAT_MOD_YTR) ? 3957ec681f3Smrg MALI_AFBC_SURFACE_FLAG_YTR : 0; 3967ec681f3Smrg 3977ec681f3Smrg#if PAN_ARCH >= 6 3987ec681f3Smrg /* Prefetch enable */ 3997ec681f3Smrg flags |= MALI_AFBC_SURFACE_FLAG_PREFETCH; 4007ec681f3Smrg 4017ec681f3Smrg /* Wide blocks (> 16x16) */ 4027ec681f3Smrg if (panfrost_block_dim(modifier, true, 0) > 16) 4037ec681f3Smrg flags |= MALI_AFBC_SURFACE_FLAG_WIDE_BLOCK; 4047ec681f3Smrg 4057ec681f3Smrg /* Used to make sure AFBC headers don't point outside the AFBC 4067ec681f3Smrg * body. HW is using the AFBC surface stride to do this check, 4077ec681f3Smrg * which doesn't work for 3D textures because the surface 4087ec681f3Smrg * stride does not cover the body. Only supported on v7+. 4097ec681f3Smrg */ 4107ec681f3Smrg#endif 4117ec681f3Smrg 4127ec681f3Smrg#if PAN_ARCH >= 7 4137ec681f3Smrg if (dim != MALI_TEXTURE_DIMENSION_3D) 4147ec681f3Smrg flags |= MALI_AFBC_SURFACE_FLAG_CHECK_PAYLOAD_RANGE; 4157ec681f3Smrg#endif 4167ec681f3Smrg 4177ec681f3Smrg return flags; 4187ec681f3Smrg } else if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) { 4197ec681f3Smrg if (desc->block.depth > 1) { 4207ec681f3Smrg return (panfrost_astc_dim_3d(desc->block.depth) << 4) | 4217ec681f3Smrg (panfrost_astc_dim_3d(desc->block.height) << 2) | 4227ec681f3Smrg panfrost_astc_dim_3d(desc->block.width); 4237ec681f3Smrg } else { 4247ec681f3Smrg return (panfrost_astc_dim_2d(desc->block.height) << 3) | 4257ec681f3Smrg panfrost_astc_dim_2d(desc->block.width); 4267ec681f3Smrg } 4277ec681f3Smrg } else { 4287ec681f3Smrg return 0; 4297ec681f3Smrg } 4307ec681f3Smrg} 4317ec681f3Smrg#endif 4327ec681f3Smrg 4337ec681f3Smrg/* Cubemaps have 6 faces as "layers" in between each actual layer. We 4347ec681f3Smrg * need to fix this up. TODO: logic wrong in the asserted out cases ... 4357ec681f3Smrg * can they happen, perhaps from cubemap arrays? */ 4367ec681f3Smrg 4377ec681f3Smrgstatic void 4387ec681f3Smrgpanfrost_adjust_cube_dimensions( 4397ec681f3Smrg unsigned *first_face, unsigned *last_face, 4407ec681f3Smrg unsigned *first_layer, unsigned *last_layer) 4417ec681f3Smrg{ 4427ec681f3Smrg *first_face = *first_layer % 6; 4437ec681f3Smrg *last_face = *last_layer % 6; 4447ec681f3Smrg *first_layer /= 6; 4457ec681f3Smrg *last_layer /= 6; 4467ec681f3Smrg 4477ec681f3Smrg assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5)); 4487ec681f3Smrg} 4497ec681f3Smrg 4507ec681f3Smrg/* Following the texture descriptor is a number of pointers. How many? */ 4517ec681f3Smrg 4527ec681f3Smrgstatic unsigned 4537ec681f3Smrgpanfrost_texture_num_elements( 4547ec681f3Smrg unsigned first_level, unsigned last_level, 4557ec681f3Smrg unsigned first_layer, unsigned last_layer, 4567ec681f3Smrg unsigned nr_samples, 4577ec681f3Smrg bool is_cube, bool manual_stride) 4587ec681f3Smrg{ 4597ec681f3Smrg unsigned first_face = 0, last_face = 0; 4607ec681f3Smrg 4617ec681f3Smrg if (is_cube) { 4627ec681f3Smrg panfrost_adjust_cube_dimensions(&first_face, &last_face, 4637ec681f3Smrg &first_layer, &last_layer); 4647ec681f3Smrg } 4657ec681f3Smrg 4667ec681f3Smrg unsigned levels = 1 + last_level - first_level; 4677ec681f3Smrg unsigned layers = 1 + last_layer - first_layer; 4687ec681f3Smrg unsigned faces = 1 + last_face - first_face; 4697ec681f3Smrg unsigned num_elements = levels * layers * faces * MAX2(nr_samples, 1); 4707ec681f3Smrg 4717ec681f3Smrg if (manual_stride) 4727ec681f3Smrg num_elements *= 2; 4737ec681f3Smrg 4747ec681f3Smrg return num_elements; 4757ec681f3Smrg} 4767ec681f3Smrg 4777ec681f3Smrg/* Conservative estimate of the size of the texture payload a priori. 4787ec681f3Smrg * Average case, size equal to the actual size. Worst case, off by 2x (if 4797ec681f3Smrg * a manual stride is not needed on a linear texture). Returned value 4807ec681f3Smrg * must be greater than or equal to the actual size, so it's safe to use 4817ec681f3Smrg * as an allocation amount */ 4827ec681f3Smrg 4837ec681f3Smrgunsigned 4847ec681f3SmrgGENX(panfrost_estimate_texture_payload_size)(const struct pan_image_view *iview) 4857ec681f3Smrg{ 4867ec681f3Smrg /* Assume worst case */ 4877ec681f3Smrg unsigned manual_stride = PAN_ARCH >= 6 || 4887ec681f3Smrg (iview->image->layout.modifier == DRM_FORMAT_MOD_LINEAR); 4897ec681f3Smrg 4907ec681f3Smrg unsigned elements = 4917ec681f3Smrg panfrost_texture_num_elements(iview->first_level, iview->last_level, 4927ec681f3Smrg iview->first_layer, iview->last_layer, 4937ec681f3Smrg iview->image->layout.nr_samples, 4947ec681f3Smrg iview->dim == MALI_TEXTURE_DIMENSION_CUBE, 4957ec681f3Smrg manual_stride); 4967ec681f3Smrg 4977ec681f3Smrg return sizeof(mali_ptr) * elements; 4987ec681f3Smrg} 4997ec681f3Smrg 5007ec681f3Smrgstruct panfrost_surface_iter { 5017ec681f3Smrg unsigned layer, last_layer; 5027ec681f3Smrg unsigned level, first_level, last_level; 5037ec681f3Smrg unsigned face, first_face, last_face; 5047ec681f3Smrg unsigned sample, first_sample, last_sample; 5057ec681f3Smrg}; 5067ec681f3Smrg 5077ec681f3Smrgstatic void 5087ec681f3Smrgpanfrost_surface_iter_begin(struct panfrost_surface_iter *iter, 5097ec681f3Smrg unsigned first_layer, unsigned last_layer, 5107ec681f3Smrg unsigned first_level, unsigned last_level, 5117ec681f3Smrg unsigned first_face, unsigned last_face, 5127ec681f3Smrg unsigned nr_samples) 5137ec681f3Smrg{ 5147ec681f3Smrg iter->layer = first_layer; 5157ec681f3Smrg iter->last_layer = last_layer; 5167ec681f3Smrg iter->level = iter->first_level = first_level; 5177ec681f3Smrg iter->last_level = last_level; 5187ec681f3Smrg iter->face = iter->first_face = first_face; 5197ec681f3Smrg iter->last_face = last_face; 5207ec681f3Smrg iter->sample = iter->first_sample = 0; 5217ec681f3Smrg iter->last_sample = nr_samples - 1; 5227ec681f3Smrg} 5237ec681f3Smrg 5247ec681f3Smrgstatic bool 5257ec681f3Smrgpanfrost_surface_iter_end(const struct panfrost_surface_iter *iter) 5267ec681f3Smrg{ 5277ec681f3Smrg return iter->layer > iter->last_layer; 5287ec681f3Smrg} 5297ec681f3Smrg 5307ec681f3Smrgstatic void 5317ec681f3Smrgpanfrost_surface_iter_next(struct panfrost_surface_iter *iter) 5327ec681f3Smrg{ 5337ec681f3Smrg#define INC_TEST(field) \ 5347ec681f3Smrg do { \ 5357ec681f3Smrg if (iter->field++ < iter->last_ ## field) \ 5367ec681f3Smrg return; \ 5377ec681f3Smrg iter->field = iter->first_ ## field; \ 5387ec681f3Smrg } while (0) 5397ec681f3Smrg 5407ec681f3Smrg /* Ordering is different on v7: inner loop is iterating on levels */ 5417ec681f3Smrg if (PAN_ARCH >= 7) 5427ec681f3Smrg INC_TEST(level); 5437ec681f3Smrg 5447ec681f3Smrg INC_TEST(sample); 5457ec681f3Smrg INC_TEST(face); 5467ec681f3Smrg 5477ec681f3Smrg if (PAN_ARCH < 7) 5487ec681f3Smrg INC_TEST(level); 5497ec681f3Smrg 5507ec681f3Smrg iter->layer++; 5517ec681f3Smrg 5527ec681f3Smrg#undef INC_TEST 5537ec681f3Smrg} 5547ec681f3Smrg 5557ec681f3Smrgstatic void 5567ec681f3Smrgpanfrost_get_surface_strides(const struct pan_image_layout *layout, 5577ec681f3Smrg unsigned l, 5587ec681f3Smrg int32_t *row_stride, int32_t *surf_stride) 5597ec681f3Smrg{ 5607ec681f3Smrg const struct pan_image_slice_layout *slice = &layout->slices[l]; 5617ec681f3Smrg 5627ec681f3Smrg if (drm_is_afbc(layout->modifier)) { 5637ec681f3Smrg /* Pre v7 don't have a row stride field. This field is 5647ec681f3Smrg * repurposed as a Y offset which we don't use */ 5657ec681f3Smrg *row_stride = PAN_ARCH < 7 ? 0 : slice->afbc.row_stride; 5667ec681f3Smrg *surf_stride = slice->afbc.surface_stride; 5677ec681f3Smrg } else { 5687ec681f3Smrg *row_stride = slice->row_stride; 5697ec681f3Smrg *surf_stride = slice->surface_stride; 5707ec681f3Smrg } 5717ec681f3Smrg} 5727ec681f3Smrg 5737ec681f3Smrgstatic mali_ptr 5747ec681f3Smrgpanfrost_get_surface_pointer(const struct pan_image_layout *layout, 5757ec681f3Smrg enum mali_texture_dimension dim, 5767ec681f3Smrg mali_ptr base, 5777ec681f3Smrg unsigned l, unsigned w, unsigned f, unsigned s) 5787ec681f3Smrg{ 5797ec681f3Smrg unsigned face_mult = dim == MALI_TEXTURE_DIMENSION_CUBE ? 6 : 1; 5807ec681f3Smrg unsigned offset; 5817ec681f3Smrg 5827ec681f3Smrg if (layout->dim == MALI_TEXTURE_DIMENSION_3D) { 5837ec681f3Smrg assert(!f && !s); 5847ec681f3Smrg offset = layout->slices[l].offset + 5857ec681f3Smrg (w * panfrost_get_layer_stride(layout, l)); 5867ec681f3Smrg } else { 5877ec681f3Smrg offset = panfrost_texture_offset(layout, l, (w * face_mult) + f, s); 5887ec681f3Smrg } 5897ec681f3Smrg 5907ec681f3Smrg return base + offset; 5917ec681f3Smrg} 5927ec681f3Smrg 5937ec681f3Smrgstatic void 5947ec681f3Smrgpanfrost_emit_texture_payload(const struct pan_image_view *iview, 5957ec681f3Smrg enum pipe_format format, 5967ec681f3Smrg bool manual_stride, 5977ec681f3Smrg void *payload) 5987ec681f3Smrg{ 5997ec681f3Smrg const struct pan_image_layout *layout = &iview->image->layout; 6007ec681f3Smrg ASSERTED const struct util_format_description *desc = 6017ec681f3Smrg util_format_description(format); 6027ec681f3Smrg 6037ec681f3Smrg mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset; 6047ec681f3Smrg 6057ec681f3Smrg if (iview->buf.size) { 6067ec681f3Smrg assert (iview->dim == MALI_TEXTURE_DIMENSION_1D); 6077ec681f3Smrg base += iview->buf.offset; 6087ec681f3Smrg } 6097ec681f3Smrg 6107ec681f3Smrg#if PAN_ARCH >= 5 6117ec681f3Smrg /* panfrost_compression_tag() wants the dimension of the resource, not the 6127ec681f3Smrg * one of the image view (those might differ). 6137ec681f3Smrg */ 6147ec681f3Smrg base |= panfrost_compression_tag(desc, layout->dim, layout->modifier); 6157ec681f3Smrg#else 6167ec681f3Smrg assert(!drm_is_afbc(layout->modifier) && "no AFBC on v4"); 6177ec681f3Smrg assert(desc->layout != UTIL_FORMAT_LAYOUT_ASTC && "no ASTC on v4"); 6187ec681f3Smrg#endif 6197ec681f3Smrg 6207ec681f3Smrg /* Inject the addresses in, interleaving array indices, mip levels, 6217ec681f3Smrg * cube faces, and strides in that order */ 6227ec681f3Smrg 6237ec681f3Smrg unsigned first_layer = iview->first_layer, last_layer = iview->last_layer; 6247ec681f3Smrg unsigned nr_samples = layout->nr_samples; 6257ec681f3Smrg unsigned first_face = 0, last_face = 0; 6267ec681f3Smrg 6277ec681f3Smrg if (iview->dim == MALI_TEXTURE_DIMENSION_CUBE) { 6287ec681f3Smrg panfrost_adjust_cube_dimensions(&first_face, &last_face, 6297ec681f3Smrg &first_layer, &last_layer); 6307ec681f3Smrg } 6317ec681f3Smrg 6327ec681f3Smrg struct panfrost_surface_iter iter; 6337ec681f3Smrg 6347ec681f3Smrg for (panfrost_surface_iter_begin(&iter, first_layer, last_layer, 6357ec681f3Smrg iview->first_level, iview->last_level, 6367ec681f3Smrg first_face, last_face, nr_samples); 6377ec681f3Smrg !panfrost_surface_iter_end(&iter); 6387ec681f3Smrg panfrost_surface_iter_next(&iter)) { 6397ec681f3Smrg mali_ptr pointer = 6407ec681f3Smrg panfrost_get_surface_pointer(layout, iview->dim, base, 6417ec681f3Smrg iter.level, iter.layer, 6427ec681f3Smrg iter.face, iter.sample); 6437ec681f3Smrg 6447ec681f3Smrg if (!manual_stride) { 6457ec681f3Smrg pan_pack(payload, SURFACE, cfg) { 6467ec681f3Smrg cfg.pointer = pointer; 6477ec681f3Smrg } 6487ec681f3Smrg payload += pan_size(SURFACE); 6497ec681f3Smrg } else { 6507ec681f3Smrg pan_pack(payload, SURFACE_WITH_STRIDE, cfg) { 6517ec681f3Smrg cfg.pointer = pointer; 6527ec681f3Smrg panfrost_get_surface_strides(layout, iter.level, 6537ec681f3Smrg &cfg.row_stride, 6547ec681f3Smrg &cfg.surface_stride); 6557ec681f3Smrg } 6567ec681f3Smrg payload += pan_size(SURFACE_WITH_STRIDE); 6577ec681f3Smrg } 6587ec681f3Smrg } 6597ec681f3Smrg} 6607ec681f3Smrg 6617ec681f3Smrg/* Check if we need to set a custom stride by computing the "expected" 6627ec681f3Smrg * stride and comparing it to what the user actually wants. Only applies 6637ec681f3Smrg * to linear textures, since tiled/compressed textures have strict 6647ec681f3Smrg * alignment requirements for their strides as it is */ 6657ec681f3Smrg 6667ec681f3Smrgstatic bool 6677ec681f3Smrgpanfrost_needs_explicit_stride(const struct pan_image_view *iview) 6687ec681f3Smrg{ 6697ec681f3Smrg /* Stride is explicit on Bifrost */ 6707ec681f3Smrg if (PAN_ARCH >= 6) 6717ec681f3Smrg return true; 6727ec681f3Smrg 6737ec681f3Smrg if (iview->image->layout.modifier != DRM_FORMAT_MOD_LINEAR) 6747ec681f3Smrg return false; 6757ec681f3Smrg 6767ec681f3Smrg unsigned bytes_per_block = util_format_get_blocksize(iview->format); 6777ec681f3Smrg unsigned block_w = util_format_get_blockwidth(iview->format); 6787ec681f3Smrg 6797ec681f3Smrg for (unsigned l = iview->first_level; l <= iview->last_level; ++l) { 6807ec681f3Smrg unsigned actual = iview->image->layout.slices[l].line_stride; 6817ec681f3Smrg unsigned expected = 6827ec681f3Smrg DIV_ROUND_UP(u_minify(iview->image->layout.width, l), block_w) * 6837ec681f3Smrg bytes_per_block; 6847ec681f3Smrg 6857ec681f3Smrg if (actual != expected) 6867ec681f3Smrg return true; 6877ec681f3Smrg } 6887ec681f3Smrg 6897ec681f3Smrg return false; 6907ec681f3Smrg} 6917ec681f3Smrg 6927ec681f3Smrg/* Map modifiers to mali_texture_layout for packing in a texture descriptor */ 6937ec681f3Smrg 6947ec681f3Smrgstatic enum mali_texture_layout 6957ec681f3Smrgpanfrost_modifier_to_layout(uint64_t modifier) 6967ec681f3Smrg{ 6977ec681f3Smrg if (drm_is_afbc(modifier)) 6987ec681f3Smrg return MALI_TEXTURE_LAYOUT_AFBC; 6997ec681f3Smrg else if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) 7007ec681f3Smrg return MALI_TEXTURE_LAYOUT_TILED; 7017ec681f3Smrg else if (modifier == DRM_FORMAT_MOD_LINEAR) 7027ec681f3Smrg return MALI_TEXTURE_LAYOUT_LINEAR; 7037ec681f3Smrg else 7047ec681f3Smrg unreachable("Invalid modifer"); 7057ec681f3Smrg} 7067ec681f3Smrg 7077ec681f3Smrgvoid 7087ec681f3SmrgGENX(panfrost_new_texture)(const struct panfrost_device *dev, 7097ec681f3Smrg const struct pan_image_view *iview, 7107ec681f3Smrg void *out, const struct panfrost_ptr *payload) 7117ec681f3Smrg{ 7127ec681f3Smrg const struct pan_image_layout *layout = &iview->image->layout; 7137ec681f3Smrg enum pipe_format format = iview->format; 7147ec681f3Smrg unsigned swizzle; 7157ec681f3Smrg 7167ec681f3Smrg if (PAN_ARCH == 7 && util_format_is_depth_or_stencil(format)) { 7177ec681f3Smrg /* v7 doesn't have an _RRRR component order, combine the 7187ec681f3Smrg * user swizzle with a .XXXX swizzle to emulate that. 7197ec681f3Smrg */ 7207ec681f3Smrg static const unsigned char replicate_x[4] = { 7217ec681f3Smrg PIPE_SWIZZLE_X, PIPE_SWIZZLE_X, 7227ec681f3Smrg PIPE_SWIZZLE_X, PIPE_SWIZZLE_X, 7237ec681f3Smrg }; 7247ec681f3Smrg unsigned char patched_swizzle[4]; 7257ec681f3Smrg 7267ec681f3Smrg util_format_compose_swizzles(replicate_x, 7277ec681f3Smrg iview->swizzle, 7287ec681f3Smrg patched_swizzle); 7297ec681f3Smrg swizzle = panfrost_translate_swizzle_4(patched_swizzle); 7307ec681f3Smrg } else { 7317ec681f3Smrg swizzle = panfrost_translate_swizzle_4(iview->swizzle); 7327ec681f3Smrg } 7337ec681f3Smrg 7347ec681f3Smrg bool manual_stride = 7357ec681f3Smrg panfrost_needs_explicit_stride(iview); 7367ec681f3Smrg 7377ec681f3Smrg panfrost_emit_texture_payload(iview, format, 7387ec681f3Smrg manual_stride, 7397ec681f3Smrg payload->cpu); 7407ec681f3Smrg 7417ec681f3Smrg unsigned array_size = iview->last_layer - iview->first_layer + 1; 7427ec681f3Smrg 7437ec681f3Smrg if (iview->dim == MALI_TEXTURE_DIMENSION_CUBE) { 7447ec681f3Smrg assert(iview->first_layer % 6 == 0); 7457ec681f3Smrg assert(iview->last_layer % 6 == 5); 7467ec681f3Smrg array_size /= 6; 7477ec681f3Smrg } 7487ec681f3Smrg 7497ec681f3Smrg unsigned width; 7507ec681f3Smrg 7517ec681f3Smrg if (iview->buf.size) { 7527ec681f3Smrg assert(iview->dim == MALI_TEXTURE_DIMENSION_1D); 7537ec681f3Smrg assert(!iview->first_level && !iview->last_level); 7547ec681f3Smrg assert(!iview->first_layer && !iview->last_layer); 7557ec681f3Smrg assert(layout->nr_samples == 1); 7567ec681f3Smrg assert(layout->height == 1 && layout->depth == 1); 7577ec681f3Smrg assert(iview->buf.offset + iview->buf.size <= layout->width); 7587ec681f3Smrg width = iview->buf.size; 7597ec681f3Smrg } else { 7607ec681f3Smrg width = u_minify(layout->width, iview->first_level); 7617ec681f3Smrg } 7627ec681f3Smrg 7637ec681f3Smrg pan_pack(out, TEXTURE, cfg) { 7647ec681f3Smrg cfg.dimension = iview->dim; 7657ec681f3Smrg cfg.format = dev->formats[format].hw; 7667ec681f3Smrg cfg.width = width; 7677ec681f3Smrg cfg.height = u_minify(layout->height, iview->first_level); 7687ec681f3Smrg if (iview->dim == MALI_TEXTURE_DIMENSION_3D) 7697ec681f3Smrg cfg.depth = u_minify(layout->depth, iview->first_level); 7707ec681f3Smrg else 7717ec681f3Smrg cfg.sample_count = layout->nr_samples; 7727ec681f3Smrg cfg.swizzle = swizzle; 7737ec681f3Smrg cfg.texel_ordering = 7747ec681f3Smrg panfrost_modifier_to_layout(layout->modifier); 7757ec681f3Smrg cfg.levels = iview->last_level - iview->first_level + 1; 7767ec681f3Smrg cfg.array_size = array_size; 7777ec681f3Smrg 7787ec681f3Smrg#if PAN_ARCH >= 6 7797ec681f3Smrg cfg.surfaces = payload->gpu; 7807ec681f3Smrg 7817ec681f3Smrg /* We specify API-level LOD clamps in the sampler descriptor 7827ec681f3Smrg * and use these clamps simply for bounds checking */ 7837ec681f3Smrg cfg.minimum_lod = FIXED_16(0, false); 7847ec681f3Smrg cfg.maximum_lod = FIXED_16(cfg.levels - 1, false); 7857ec681f3Smrg#else 7867ec681f3Smrg cfg.manual_stride = manual_stride; 7877ec681f3Smrg#endif 7887ec681f3Smrg } 7897ec681f3Smrg} 7907ec681f3Smrg#endif /* ifdef PAN_ARCH */ 791