1b8e80941Smrg/* 2b8e80941Smrg * Copyright (c) 2012-2013 Luc Verhaegen <libv@skynet.be> 3b8e80941Smrg * Copyright (c) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io> 4b8e80941Smrg * 5b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 6b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 7b8e80941Smrg * to deal in the Software without restriction, including without limitation 8b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sub license, 9b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 10b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 11b8e80941Smrg * 12b8e80941Smrg * The above copyright notice and this permission notice (including the 13b8e80941Smrg * next paragraph) shall be included in all copies or substantial portions 14b8e80941Smrg * of the Software. 15b8e80941Smrg * 16b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22b8e80941Smrg * DEALINGS IN THE SOFTWARE. 23b8e80941Smrg */ 24b8e80941Smrg 25b8e80941Smrg#include <stdio.h> 26b8e80941Smrg#include "pan_swizzle.h" 27b8e80941Smrg#include "pan_allocate.h" 28b8e80941Smrg 29b8e80941Smrg/* Space a group of 4-bits out. For instance, 0x7 -- that is, 0b111 -- would 30b8e80941Smrg * become 0b10101 */ 31b8e80941Smrg 32b8e80941Smrgstatic inline int 33b8e80941Smrgspace_bits_4(int i) 34b8e80941Smrg{ 35b8e80941Smrg return ((i & 0x8) << 3) | 36b8e80941Smrg ((i & 0x4) << 2) | 37b8e80941Smrg ((i & 0x2) << 1) | 38b8e80941Smrg ((i & 0x1) << 0); 39b8e80941Smrg} 40b8e80941Smrg 41b8e80941Smrg/* Generate lookup table for the space filler curve. Note this is a 1:1 42b8e80941Smrg * mapping, just with bits twiddled around. */ 43b8e80941Smrg 44b8e80941Smrguint32_t space_filler[16][16]; 45b8e80941Smrguint32_t space_filler_packed4[16][4]; 46b8e80941Smrg 47b8e80941Smrgvoid 48b8e80941Smrgpanfrost_generate_space_filler_indices() 49b8e80941Smrg{ 50b8e80941Smrg for (int y = 0; y < 16; ++y) { 51b8e80941Smrg for (int x = 0; x < 16; ++x) { 52b8e80941Smrg space_filler[y][x] = 53b8e80941Smrg space_bits_4(y ^ x) | (space_bits_4(y) << 1); 54b8e80941Smrg } 55b8e80941Smrg 56b8e80941Smrg for (int q = 0; q < 4; ++q) { 57b8e80941Smrg space_filler_packed4[y][q] = 58b8e80941Smrg (space_filler[y][(q * 4) + 0] << 0) | 59b8e80941Smrg (space_filler[y][(q * 4) + 1] << 8) | 60b8e80941Smrg (space_filler[y][(q * 4) + 2] << 16) | 61b8e80941Smrg (space_filler[y][(q * 4) + 3] << 24); 62b8e80941Smrg } 63b8e80941Smrg } 64b8e80941Smrg} 65b8e80941Smrg 66b8e80941Smrgstatic void 67b8e80941Smrgswizzle_bpp1_align16(int width, int height, int source_stride, int block_pitch, 68b8e80941Smrg const uint8_t *pixels, 69b8e80941Smrg uint8_t *ldest) 70b8e80941Smrg{ 71b8e80941Smrg for (int y = 0; y < height; ++y) { 72b8e80941Smrg { 73b8e80941Smrg int block_y = y & ~(0x0f); 74b8e80941Smrg int rem_y = y & 0x0f; 75b8e80941Smrg uint8_t *block_start_s = ldest + (block_y * block_pitch); 76b8e80941Smrg const uint8_t *source_start = pixels + (y * source_stride); 77b8e80941Smrg const uint8_t *source_end = source_start + width; 78b8e80941Smrg 79b8e80941Smrg /* Operate on blocks of 16 pixels to minimise bookkeeping */ 80b8e80941Smrg 81b8e80941Smrg for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) { 82b8e80941Smrg const uint32_t *src_32 = (const uint32_t *) source_start; 83b8e80941Smrg 84b8e80941Smrg for (int q = 0; q < 4; ++q) { 85b8e80941Smrg uint32_t src = src_32[q]; 86b8e80941Smrg uint32_t spaced = space_filler_packed4[rem_y][q]; 87b8e80941Smrg uint16_t *bs = (uint16_t *) block_start_s; 88b8e80941Smrg 89b8e80941Smrg int spacedA = (spaced >> 0) & 0xFF; 90b8e80941Smrg int spacedB = (spaced >> 16) & 0xFF; 91b8e80941Smrg 92b8e80941Smrg bs[spacedA >> 1] = (src >> 0) & 0xFFFF; 93b8e80941Smrg bs[spacedB >> 1] = (src >> 16) & 0xFFFF; 94b8e80941Smrg } 95b8e80941Smrg } 96b8e80941Smrg } 97b8e80941Smrg 98b8e80941Smrg ++y; 99b8e80941Smrg 100b8e80941Smrg if (y >= height) 101b8e80941Smrg break; 102b8e80941Smrg 103b8e80941Smrg { 104b8e80941Smrg int block_y = y & ~(0x0f); 105b8e80941Smrg int rem_y = y & 0x0f; 106b8e80941Smrg uint8_t *block_start_s = ldest + (block_y * block_pitch); 107b8e80941Smrg const uint8_t *source_start = pixels + (y * source_stride); 108b8e80941Smrg const uint8_t *source_end = source_start + width; 109b8e80941Smrg 110b8e80941Smrg /* Operate on blocks of 16 pixels to minimise bookkeeping */ 111b8e80941Smrg 112b8e80941Smrg for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) { 113b8e80941Smrg const uint32_t *src_32 = (const uint32_t *) source_start; 114b8e80941Smrg 115b8e80941Smrg for (int q = 0; q < 4; ++q) { 116b8e80941Smrg uint32_t src = src_32[q]; 117b8e80941Smrg uint32_t spaced = space_filler_packed4[rem_y][q]; 118b8e80941Smrg 119b8e80941Smrg block_start_s[(spaced >> 0) & 0xFF] = (src >> 0) & 0xFF; 120b8e80941Smrg block_start_s[(spaced >> 8) & 0xFF] = (src >> 8) & 0xFF; 121b8e80941Smrg 122b8e80941Smrg block_start_s[(spaced >> 16) & 0xFF] = (src >> 16) & 0xFF; 123b8e80941Smrg block_start_s[(spaced >> 24) & 0xFF] = (src >> 24) & 0xFF; 124b8e80941Smrg } 125b8e80941Smrg } 126b8e80941Smrg } 127b8e80941Smrg 128b8e80941Smrg } 129b8e80941Smrg} 130b8e80941Smrg 131b8e80941Smrgstatic void 132b8e80941Smrgswizzle_bpp4_align16(int width, int height, int source_stride, int block_pitch, 133b8e80941Smrg const uint32_t *pixels, 134b8e80941Smrg uint32_t *ldest) 135b8e80941Smrg{ 136b8e80941Smrg for (int y = 0; y < height; ++y) { 137b8e80941Smrg int block_y = y & ~(0x0f); 138b8e80941Smrg int rem_y = y & 0x0f; 139b8e80941Smrg uint32_t *block_start_s = ldest + (block_y * block_pitch); 140b8e80941Smrg const uint32_t *source_start = pixels + (y * source_stride); 141b8e80941Smrg const uint32_t *source_end = source_start + width; 142b8e80941Smrg 143b8e80941Smrg /* Operate on blocks of 16 pixels to minimise bookkeeping */ 144b8e80941Smrg 145b8e80941Smrg for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) { 146b8e80941Smrg for (int j = 0; j < 16; ++j) 147b8e80941Smrg block_start_s[space_filler[rem_y][j]] = source_start[j]; 148b8e80941Smrg } 149b8e80941Smrg } 150b8e80941Smrg} 151b8e80941Smrg 152b8e80941Smrgvoid 153b8e80941Smrgpanfrost_texture_swizzle(unsigned off_x, 154b8e80941Smrg unsigned off_y, 155b8e80941Smrg int width, int height, int bytes_per_pixel, int dest_width, 156b8e80941Smrg const uint8_t *pixels, 157b8e80941Smrg uint8_t *ldest) 158b8e80941Smrg{ 159b8e80941Smrg /* Calculate maximum size, overestimating a bit */ 160b8e80941Smrg int block_pitch = ALIGN(dest_width, 16) >> 4; 161b8e80941Smrg 162b8e80941Smrg /* Strides must be tight, since we're only ever called indirectly */ 163b8e80941Smrg int source_stride = width * bytes_per_pixel; 164b8e80941Smrg 165b8e80941Smrg /* Use fast path if available */ 166b8e80941Smrg if (!(off_x || off_y) && (width == dest_width)) { 167b8e80941Smrg if (bytes_per_pixel == 4 && (ALIGN(width, 16) == width)) { 168b8e80941Smrg swizzle_bpp4_align16(width, height, source_stride >> 2, (block_pitch * 256 >> 4), (const uint32_t *) pixels, (uint32_t *) ldest); 169b8e80941Smrg return; 170b8e80941Smrg } else if (bytes_per_pixel == 1 && (ALIGN(width, 16) == width)) { 171b8e80941Smrg swizzle_bpp1_align16(width, height, source_stride, (block_pitch * 256 >> 4), pixels, (uint8_t *) ldest); 172b8e80941Smrg return; 173b8e80941Smrg } 174b8e80941Smrg } 175b8e80941Smrg 176b8e80941Smrg /* Otherwise, default back on generic path */ 177b8e80941Smrg 178b8e80941Smrg for (int y = 0; y < height; ++y) { 179b8e80941Smrg int block_y = (y + off_y) >> 4; 180b8e80941Smrg int rem_y = (y + off_y) & 0x0F; 181b8e80941Smrg int block_start_s = block_y * block_pitch * 256; 182b8e80941Smrg int source_start = y * source_stride; 183b8e80941Smrg 184b8e80941Smrg for (int x = 0; x < width; ++x) { 185b8e80941Smrg int block_x_s = ((x + off_x) >> 4) * 256; 186b8e80941Smrg int rem_x = (x + off_x) & 0x0F; 187b8e80941Smrg 188b8e80941Smrg int index = space_filler[rem_y][rem_x]; 189b8e80941Smrg const uint8_t *source = &pixels[source_start + bytes_per_pixel * x]; 190b8e80941Smrg uint8_t *dest = ldest + bytes_per_pixel * (block_start_s + block_x_s + index); 191b8e80941Smrg 192b8e80941Smrg for (int b = 0; b < bytes_per_pixel; ++b) 193b8e80941Smrg dest[b] = source[b]; 194b8e80941Smrg } 195b8e80941Smrg } 196b8e80941Smrg} 197