1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2014 Broadcom 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg/** @file vc4_tiling.c 25b8e80941Smrg * 26b8e80941Smrg * Handles information about the VC4 tiling formats, and loading and storing 27b8e80941Smrg * from them. 28b8e80941Smrg * 29b8e80941Smrg * Texture mipmap levels on VC4 are (with the exception of 32-bit RGBA raster 30b8e80941Smrg * textures for scanout) stored as groups of microtiles. If the texture is at 31b8e80941Smrg * least 4x4 microtiles (utiles), then those microtiles are arranged in a sort 32b8e80941Smrg * of Hilbert-fractal-ish layout (T), otherwise the microtiles are in raster 33b8e80941Smrg * order (LT). 34b8e80941Smrg * 35b8e80941Smrg * Specifically, the T format has: 36b8e80941Smrg * 37b8e80941Smrg * - 64b utiles of pixels in a raster-order grid according to cpp. It's 4x4 38b8e80941Smrg * pixels at 32 bit depth. 39b8e80941Smrg * 40b8e80941Smrg * - 1k subtiles made of a 4x4 raster-order grid of 64b utiles (so usually 41b8e80941Smrg * 16x16 pixels). 42b8e80941Smrg * 43b8e80941Smrg * - 4k tiles made of a 2x2 grid of 1k subtiles (so usually 32x32 pixels). On 44b8e80941Smrg * even 4k tile rows, they're arranged as (BL, TL, TR, BR), and on odd rows 45b8e80941Smrg * they're (TR, BR, BL, TL), where bottom left is start of memory. 46b8e80941Smrg * 47b8e80941Smrg * - an image made of 4k tiles in rows either left-to-right (even rows of 4k 48b8e80941Smrg * tiles) or right-to-left (odd rows of 4k tiles). 49b8e80941Smrg */ 50b8e80941Smrg 51b8e80941Smrg#include "vc4_screen.h" 52b8e80941Smrg#include "vc4_context.h" 53b8e80941Smrg#include "vc4_tiling.h" 54b8e80941Smrg 55b8e80941Smrg/** 56b8e80941Smrg * The texture unit decides what tiling format a particular miplevel is using 57b8e80941Smrg * this function, so we lay out our miptrees accordingly. 58b8e80941Smrg */ 59b8e80941Smrgbool 60b8e80941Smrgvc4_size_is_lt(uint32_t width, uint32_t height, int cpp) 61b8e80941Smrg{ 62b8e80941Smrg return (width <= 4 * vc4_utile_width(cpp) || 63b8e80941Smrg height <= 4 * vc4_utile_height(cpp)); 64b8e80941Smrg} 65b8e80941Smrg 66b8e80941Smrg/** 67b8e80941Smrg * Takes a utile x and y (and the number of utiles of width of the image) and 68b8e80941Smrg * returns the offset to the utile within a VC4_TILING_FORMAT_TF image. 69b8e80941Smrg */ 70b8e80941Smrgstatic uint32_t 71b8e80941Smrgt_utile_address(uint32_t utile_x, uint32_t utile_y, 72b8e80941Smrg uint32_t utile_stride) 73b8e80941Smrg{ 74b8e80941Smrg /* T images have to be aligned to 8 utiles (4x4 subtiles, which are 75b8e80941Smrg * 2x2 in a 4k tile). 76b8e80941Smrg */ 77b8e80941Smrg assert(!(utile_stride & 7)); 78b8e80941Smrg uint32_t tile_stride = utile_stride >> 3; 79b8e80941Smrg /* 4k tile offsets. */ 80b8e80941Smrg uint32_t tile_x = utile_x >> 3; 81b8e80941Smrg uint32_t tile_y = utile_y >> 3; 82b8e80941Smrg bool odd_tile_y = tile_y & 1; 83b8e80941Smrg 84b8e80941Smrg /* Odd lines of 4k tiles go right-to-left. */ 85b8e80941Smrg if (odd_tile_y) 86b8e80941Smrg tile_x = tile_stride - tile_x - 1; 87b8e80941Smrg 88b8e80941Smrg uint32_t tile_offset = 4096 * (tile_y * tile_stride + tile_x); 89b8e80941Smrg 90b8e80941Smrg uint32_t stile_x = (utile_x >> 2) & 1; 91b8e80941Smrg uint32_t stile_y = (utile_y >> 2) & 1; 92b8e80941Smrg uint32_t stile_index = (stile_y << 1) + stile_x; 93b8e80941Smrg static const uint32_t odd_stile_map[4] = {2, 1, 3, 0}; 94b8e80941Smrg static const uint32_t even_stile_map[4] = {0, 3, 1, 2}; 95b8e80941Smrg 96b8e80941Smrg uint32_t stile_offset = 1024 * (odd_tile_y ? 97b8e80941Smrg odd_stile_map[stile_index] : 98b8e80941Smrg even_stile_map[stile_index]); 99b8e80941Smrg 100b8e80941Smrg /* This function no longer handles the utile offset within a subtile. 101b8e80941Smrg * Walking subtiles is the job of the LT image handler. 102b8e80941Smrg */ 103b8e80941Smrg assert(!(utile_x & 3) && !(utile_y & 3)); 104b8e80941Smrg 105b8e80941Smrg#if 0 106b8e80941Smrg fprintf(stderr, "utile %d,%d -> %d + %d + %d (stride %d,%d) = %d\n", 107b8e80941Smrg utile_x, utile_y, 108b8e80941Smrg tile_offset, stile_offset, utile_offset, 109b8e80941Smrg utile_stride, tile_stride, 110b8e80941Smrg tile_offset + stile_offset + utile_offset); 111b8e80941Smrg#endif 112b8e80941Smrg 113b8e80941Smrg return tile_offset + stile_offset; 114b8e80941Smrg} 115b8e80941Smrg 116b8e80941Smrg/** 117b8e80941Smrg * Loads or stores a T texture image by breaking it down into subtiles 118b8e80941Smrg * (1024-byte, 4x4-utile) sub-images that we can use the LT tiling functions 119b8e80941Smrg * on. 120b8e80941Smrg */ 121b8e80941Smrgstatic inline void 122b8e80941Smrgvc4_t_image_helper(void *gpu, uint32_t gpu_stride, 123b8e80941Smrg void *cpu, uint32_t cpu_stride, 124b8e80941Smrg int cpp, const struct pipe_box *box, 125b8e80941Smrg bool to_cpu) 126b8e80941Smrg{ 127b8e80941Smrg uint32_t utile_w = vc4_utile_width(cpp); 128b8e80941Smrg uint32_t utile_h = vc4_utile_height(cpp); 129b8e80941Smrg uint32_t utile_w_shift = ffs(utile_w) - 1; 130b8e80941Smrg uint32_t utile_h_shift = ffs(utile_h) - 1; 131b8e80941Smrg uint32_t stile_w = 4 * utile_w; 132b8e80941Smrg uint32_t stile_h = 4 * utile_h; 133b8e80941Smrg assert(stile_w * stile_h * cpp == 1024); 134b8e80941Smrg uint32_t utile_stride = gpu_stride / cpp / utile_w; 135b8e80941Smrg uint32_t x1 = box->x; 136b8e80941Smrg uint32_t y1 = box->y; 137b8e80941Smrg uint32_t x2 = box->x + box->width; 138b8e80941Smrg uint32_t y2 = box->y + box->height; 139b8e80941Smrg struct pipe_box partial_box; 140b8e80941Smrg uint32_t gpu_lt_stride = stile_w * cpp; 141b8e80941Smrg 142b8e80941Smrg for (uint32_t y = y1; y < y2; y = align(y + 1, stile_h)) { 143b8e80941Smrg partial_box.y = y & (stile_h - 1); 144b8e80941Smrg partial_box.height = MIN2(y2 - y, stile_h - partial_box.y); 145b8e80941Smrg 146b8e80941Smrg uint32_t cpu_offset = 0; 147b8e80941Smrg for (uint32_t x = x1; x < x2; x = align(x + 1, stile_w)) { 148b8e80941Smrg partial_box.x = x & (stile_w - 1); 149b8e80941Smrg partial_box.width = MIN2(x2 - x, 150b8e80941Smrg stile_w - partial_box.x); 151b8e80941Smrg 152b8e80941Smrg /* The dst offset we want is the start of this 153b8e80941Smrg * subtile 154b8e80941Smrg */ 155b8e80941Smrg uint32_t gpu_offset = 156b8e80941Smrg t_utile_address((x >> utile_w_shift) & ~0x3, 157b8e80941Smrg (y >> utile_h_shift) & ~0x3, 158b8e80941Smrg utile_stride); 159b8e80941Smrg 160b8e80941Smrg if (to_cpu) { 161b8e80941Smrg vc4_load_lt_image(cpu + cpu_offset, 162b8e80941Smrg cpu_stride, 163b8e80941Smrg gpu + gpu_offset, 164b8e80941Smrg gpu_lt_stride, 165b8e80941Smrg cpp, &partial_box); 166b8e80941Smrg } else { 167b8e80941Smrg vc4_store_lt_image(gpu + gpu_offset, 168b8e80941Smrg gpu_lt_stride, 169b8e80941Smrg cpu + cpu_offset, 170b8e80941Smrg cpu_stride, 171b8e80941Smrg cpp, &partial_box); 172b8e80941Smrg } 173b8e80941Smrg 174b8e80941Smrg cpu_offset += partial_box.width * cpp; 175b8e80941Smrg } 176b8e80941Smrg cpu += cpu_stride * partial_box.height; 177b8e80941Smrg } 178b8e80941Smrg} 179b8e80941Smrg 180b8e80941Smrgstatic void 181b8e80941Smrgvc4_store_t_image(void *dst, uint32_t dst_stride, 182b8e80941Smrg void *src, uint32_t src_stride, 183b8e80941Smrg int cpp, const struct pipe_box *box) 184b8e80941Smrg{ 185b8e80941Smrg vc4_t_image_helper(dst, dst_stride, 186b8e80941Smrg src, src_stride, 187b8e80941Smrg cpp, box, false); 188b8e80941Smrg} 189b8e80941Smrg 190b8e80941Smrgstatic void 191b8e80941Smrgvc4_load_t_image(void *dst, uint32_t dst_stride, 192b8e80941Smrg void *src, uint32_t src_stride, 193b8e80941Smrg int cpp, const struct pipe_box *box) 194b8e80941Smrg{ 195b8e80941Smrg vc4_t_image_helper(src, src_stride, 196b8e80941Smrg dst, dst_stride, 197b8e80941Smrg cpp, box, true); 198b8e80941Smrg} 199b8e80941Smrg 200b8e80941Smrg/** 201b8e80941Smrg * Loads pixel data from the start (microtile-aligned) box in \p src to the 202b8e80941Smrg * start of \p dst according to the given tiling format. 203b8e80941Smrg */ 204b8e80941Smrgvoid 205b8e80941Smrgvc4_load_tiled_image(void *dst, uint32_t dst_stride, 206b8e80941Smrg void *src, uint32_t src_stride, 207b8e80941Smrg uint8_t tiling_format, int cpp, 208b8e80941Smrg const struct pipe_box *box) 209b8e80941Smrg{ 210b8e80941Smrg if (tiling_format == VC4_TILING_FORMAT_LT) { 211b8e80941Smrg vc4_load_lt_image(dst, dst_stride, 212b8e80941Smrg src, src_stride, 213b8e80941Smrg cpp, box); 214b8e80941Smrg } else { 215b8e80941Smrg assert(tiling_format == VC4_TILING_FORMAT_T); 216b8e80941Smrg vc4_load_t_image(dst, dst_stride, 217b8e80941Smrg src, src_stride, 218b8e80941Smrg cpp, box); 219b8e80941Smrg } 220b8e80941Smrg} 221b8e80941Smrg 222b8e80941Smrg/** 223b8e80941Smrg * Stores pixel data from the start of \p src into a (microtile-aligned) box in 224b8e80941Smrg * \p dst according to the given tiling format. 225b8e80941Smrg */ 226b8e80941Smrgvoid 227b8e80941Smrgvc4_store_tiled_image(void *dst, uint32_t dst_stride, 228b8e80941Smrg void *src, uint32_t src_stride, 229b8e80941Smrg uint8_t tiling_format, int cpp, 230b8e80941Smrg const struct pipe_box *box) 231b8e80941Smrg{ 232b8e80941Smrg if (tiling_format == VC4_TILING_FORMAT_LT) { 233b8e80941Smrg vc4_store_lt_image(dst, dst_stride, 234b8e80941Smrg src, src_stride, 235b8e80941Smrg cpp, box); 236b8e80941Smrg } else { 237b8e80941Smrg assert(tiling_format == VC4_TILING_FORMAT_T); 238b8e80941Smrg vc4_store_t_image(dst, dst_stride, 239b8e80941Smrg src, src_stride, 240b8e80941Smrg cpp, box); 241b8e80941Smrg } 242b8e80941Smrg} 243b8e80941Smrg 244