17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2019 Collabora, Ltd. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg * Authors: 247ec681f3Smrg * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 257ec681f3Smrg */ 267ec681f3Smrg 277ec681f3Smrg#include "pan_texture.h" 287ec681f3Smrg 297ec681f3Smrg/* Arm FrameBuffer Compression (AFBC) is a lossless compression scheme natively 307ec681f3Smrg * implemented in Mali GPUs (as well as many display controllers paired with 317ec681f3Smrg * Mali GPUs, etc). Where possible, Panfrost prefers to use AFBC for both 327ec681f3Smrg * rendering and texturing. In most cases, this is a performance-win due to a 337ec681f3Smrg * dramatic reduction in memory bandwidth and cache locality compared to a 347ec681f3Smrg * linear resources. 357ec681f3Smrg * 367ec681f3Smrg * AFBC divides the framebuffer into 16x16 tiles (other sizes possible, TODO: 377ec681f3Smrg * do we need to support this?). So, the width and height each must be aligned 387ec681f3Smrg * up to 16 pixels. This is inherently good for performance; note that for a 4 397ec681f3Smrg * byte-per-pixel format like RGBA8888, that means that rows are 16*4=64 byte 407ec681f3Smrg * aligned, which is the cache-line size. 417ec681f3Smrg * 427ec681f3Smrg * For each AFBC-compressed resource, there is a single contiguous 437ec681f3Smrg * (CPU/GPU-shared) buffer. This buffer itself is divided into two parts: 447ec681f3Smrg * header and body, placed immediately after each other. 457ec681f3Smrg * 467ec681f3Smrg * The AFBC header contains 16 bytes of metadata per tile. 477ec681f3Smrg * 487ec681f3Smrg * The AFBC body is the same size as the original linear resource (padded to 497ec681f3Smrg * the nearest tile). Although the body comes immediately after the header, it 507ec681f3Smrg * must also be cache-line aligned, so there can sometimes be a bit of padding 517ec681f3Smrg * between the header and body. 527ec681f3Smrg * 537ec681f3Smrg * As an example, a 64x64 RGBA framebuffer contains 64/16 = 4 tiles horizontally and 547ec681f3Smrg * 4 tiles vertically. There are 4*4=16 tiles in total, each containing 16 557ec681f3Smrg * bytes of metadata, so there is a 16*16=256 byte header. 64x64 is already 567ec681f3Smrg * tile aligned, so the body is 64*64 * 4 bytes per pixel = 16384 bytes of 577ec681f3Smrg * body. 587ec681f3Smrg * 597ec681f3Smrg * From userspace, Panfrost needs to be able to calculate these sizes. It 607ec681f3Smrg * explicitly does not and can not know the format of the data contained within 617ec681f3Smrg * this header and body. The GPU has native support for AFBC encode/decode. For 627ec681f3Smrg * an internal FBO or a framebuffer used for scanout with an AFBC-compatible 637ec681f3Smrg * winsys/display-controller, the buffer is maintained AFBC throughout flight, 647ec681f3Smrg * and the driver never needs to know the internal data. For edge cases where 657ec681f3Smrg * the driver really does need to read/write from the AFBC resource, we 667ec681f3Smrg * generate a linear staging buffer and use the GPU to blit AFBC<--->linear. 677ec681f3Smrg * TODO: Implement me. */ 687ec681f3Smrg 697ec681f3Smrg#define AFBC_TILE_WIDTH 16 707ec681f3Smrg#define AFBC_TILE_HEIGHT 16 717ec681f3Smrg#define AFBC_CACHE_ALIGN 64 727ec681f3Smrg 737ec681f3Smrg/* AFBC supports compressing a few canonical formats. Additional formats are 747ec681f3Smrg * available by using a canonical internal format. Given a PIPE format, find 757ec681f3Smrg * the canonical AFBC internal format if it exists, or NONE if the format 767ec681f3Smrg * cannot be compressed. */ 777ec681f3Smrg 787ec681f3Smrgenum pipe_format 797ec681f3Smrgpanfrost_afbc_format(const struct panfrost_device *dev, enum pipe_format format) 807ec681f3Smrg{ 817ec681f3Smrg /* Don't allow swizzled formats on v7 */ 827ec681f3Smrg switch (format) { 837ec681f3Smrg case PIPE_FORMAT_B8G8R8A8_UNORM: 847ec681f3Smrg case PIPE_FORMAT_B8G8R8X8_UNORM: 857ec681f3Smrg case PIPE_FORMAT_A8R8G8B8_UNORM: 867ec681f3Smrg case PIPE_FORMAT_X8R8G8B8_UNORM: 877ec681f3Smrg case PIPE_FORMAT_X8B8G8R8_UNORM: 887ec681f3Smrg case PIPE_FORMAT_A8B8G8R8_UNORM: 897ec681f3Smrg case PIPE_FORMAT_B8G8R8_UNORM: 907ec681f3Smrg case PIPE_FORMAT_B5G6R5_UNORM: 917ec681f3Smrg if (dev->arch >= 7) 927ec681f3Smrg return PIPE_FORMAT_NONE; 937ec681f3Smrg 947ec681f3Smrg break; 957ec681f3Smrg default: 967ec681f3Smrg break; 977ec681f3Smrg } 987ec681f3Smrg 997ec681f3Smrg switch (format) { 1007ec681f3Smrg case PIPE_FORMAT_Z16_UNORM: 1017ec681f3Smrg return PIPE_FORMAT_R8G8_UNORM; 1027ec681f3Smrg 1037ec681f3Smrg case PIPE_FORMAT_R8G8B8_UNORM: 1047ec681f3Smrg case PIPE_FORMAT_B8G8R8_UNORM: 1057ec681f3Smrg return PIPE_FORMAT_R8G8B8_UNORM; 1067ec681f3Smrg 1077ec681f3Smrg case PIPE_FORMAT_R8G8B8A8_UNORM: 1087ec681f3Smrg case PIPE_FORMAT_R8G8B8X8_UNORM: 1097ec681f3Smrg case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1107ec681f3Smrg case PIPE_FORMAT_Z24X8_UNORM: 1117ec681f3Smrg case PIPE_FORMAT_X24S8_UINT: 1127ec681f3Smrg case PIPE_FORMAT_B8G8R8A8_UNORM: 1137ec681f3Smrg case PIPE_FORMAT_B8G8R8X8_UNORM: 1147ec681f3Smrg case PIPE_FORMAT_A8R8G8B8_UNORM: 1157ec681f3Smrg case PIPE_FORMAT_X8R8G8B8_UNORM: 1167ec681f3Smrg case PIPE_FORMAT_X8B8G8R8_UNORM: 1177ec681f3Smrg case PIPE_FORMAT_A8B8G8R8_UNORM: 1187ec681f3Smrg return PIPE_FORMAT_R8G8B8A8_UNORM; 1197ec681f3Smrg 1207ec681f3Smrg case PIPE_FORMAT_R5G6B5_UNORM: 1217ec681f3Smrg case PIPE_FORMAT_B5G6R5_UNORM: 1227ec681f3Smrg return PIPE_FORMAT_R5G6B5_UNORM; 1237ec681f3Smrg 1247ec681f3Smrg /* TODO: More AFBC formats */ 1257ec681f3Smrg default: 1267ec681f3Smrg return PIPE_FORMAT_NONE; 1277ec681f3Smrg } 1287ec681f3Smrg} 1297ec681f3Smrg 1307ec681f3Smrg/* A format may be compressed as AFBC if it has an AFBC internal format */ 1317ec681f3Smrg 1327ec681f3Smrgbool 1337ec681f3Smrgpanfrost_format_supports_afbc(const struct panfrost_device *dev, enum pipe_format format) 1347ec681f3Smrg{ 1357ec681f3Smrg return panfrost_afbc_format(dev, format) != PIPE_FORMAT_NONE; 1367ec681f3Smrg} 1377ec681f3Smrg 1387ec681f3Smrgunsigned 1397ec681f3Smrgpanfrost_afbc_header_size(unsigned width, unsigned height) 1407ec681f3Smrg{ 1417ec681f3Smrg /* Align to tile */ 1427ec681f3Smrg unsigned aligned_width = ALIGN_POT(width, AFBC_TILE_WIDTH); 1437ec681f3Smrg unsigned aligned_height = ALIGN_POT(height, AFBC_TILE_HEIGHT); 1447ec681f3Smrg 1457ec681f3Smrg /* Compute size in tiles, rather than pixels */ 1467ec681f3Smrg unsigned tile_count_x = aligned_width / AFBC_TILE_WIDTH; 1477ec681f3Smrg unsigned tile_count_y = aligned_height / AFBC_TILE_HEIGHT; 1487ec681f3Smrg unsigned tile_count = tile_count_x * tile_count_y; 1497ec681f3Smrg 1507ec681f3Smrg /* Multiply to find the header size */ 1517ec681f3Smrg unsigned header_bytes = tile_count * AFBC_HEADER_BYTES_PER_TILE; 1527ec681f3Smrg 1537ec681f3Smrg /* Align and go */ 1547ec681f3Smrg return ALIGN_POT(header_bytes, AFBC_CACHE_ALIGN); 1557ec681f3Smrg 1567ec681f3Smrg} 1577ec681f3Smrg 1587ec681f3Smrg/* The lossless colour transform (AFBC_FORMAT_MOD_YTR) requires RGB. */ 1597ec681f3Smrg 1607ec681f3Smrgbool 1617ec681f3Smrgpanfrost_afbc_can_ytr(enum pipe_format format) 1627ec681f3Smrg{ 1637ec681f3Smrg const struct util_format_description *desc = 1647ec681f3Smrg util_format_description(format); 1657ec681f3Smrg 1667ec681f3Smrg /* YTR is only defined for RGB(A) */ 1677ec681f3Smrg if (desc->nr_channels != 3 && desc->nr_channels != 4) 1687ec681f3Smrg return false; 1697ec681f3Smrg 1707ec681f3Smrg /* The fourth channel if it exists doesn't matter */ 1717ec681f3Smrg return desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB; 1727ec681f3Smrg} 173