1/* 2 * Copyright (c) 2012-2013 Luc Verhaegen <libv@skynet.be> 3 * Copyright (c) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io> 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sub license, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the 13 * next paragraph) shall be included in all copies or substantial portions 14 * of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 */ 24 25#include <stdio.h> 26#include "pan_swizzle.h" 27#include "pan_allocate.h" 28 29/* Space a group of 4-bits out. For instance, 0x7 -- that is, 0b111 -- would 30 * become 0b10101 */ 31 32static inline int 33space_bits_4(int i) 34{ 35 return ((i & 0x8) << 3) | 36 ((i & 0x4) << 2) | 37 ((i & 0x2) << 1) | 38 ((i & 0x1) << 0); 39} 40 41/* Generate lookup table for the space filler curve. Note this is a 1:1 42 * mapping, just with bits twiddled around. */ 43 44uint32_t space_filler[16][16]; 45uint32_t space_filler_packed4[16][4]; 46 47void 48panfrost_generate_space_filler_indices() 49{ 50 for (int y = 0; y < 16; ++y) { 51 for (int x = 0; x < 16; ++x) { 52 space_filler[y][x] = 53 space_bits_4(y ^ x) | (space_bits_4(y) << 1); 54 } 55 56 for (int q = 0; q < 4; ++q) { 57 space_filler_packed4[y][q] = 58 (space_filler[y][(q * 4) + 0] << 0) | 59 (space_filler[y][(q * 4) + 1] << 8) | 60 (space_filler[y][(q * 4) + 2] << 16) | 61 (space_filler[y][(q * 4) + 3] << 24); 62 } 63 } 64} 65 66static void 67swizzle_bpp1_align16(int width, int height, int source_stride, int block_pitch, 68 const uint8_t *pixels, 69 uint8_t *ldest) 70{ 71 for (int y = 0; y < height; ++y) { 72 { 73 int block_y = y & ~(0x0f); 74 int rem_y = y & 0x0f; 75 uint8_t *block_start_s = ldest + (block_y * block_pitch); 76 const uint8_t *source_start = pixels + (y * source_stride); 77 const uint8_t *source_end = source_start + width; 78 79 /* Operate on blocks of 16 pixels to minimise bookkeeping */ 80 81 for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) { 82 const uint32_t *src_32 = (const uint32_t *) source_start; 83 84 for (int q = 0; q < 4; ++q) { 85 uint32_t src = src_32[q]; 86 uint32_t spaced = space_filler_packed4[rem_y][q]; 87 uint16_t *bs = (uint16_t *) block_start_s; 88 89 int spacedA = (spaced >> 0) & 0xFF; 90 int spacedB = (spaced >> 16) & 0xFF; 91 92 bs[spacedA >> 1] = (src >> 0) & 0xFFFF; 93 bs[spacedB >> 1] = (src >> 16) & 0xFFFF; 94 } 95 } 96 } 97 98 ++y; 99 100 if (y >= height) 101 break; 102 103 { 104 int block_y = y & ~(0x0f); 105 int rem_y = y & 0x0f; 106 uint8_t *block_start_s = ldest + (block_y * block_pitch); 107 const uint8_t *source_start = pixels + (y * source_stride); 108 const uint8_t *source_end = source_start + width; 109 110 /* Operate on blocks of 16 pixels to minimise bookkeeping */ 111 112 for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) { 113 const uint32_t *src_32 = (const uint32_t *) source_start; 114 115 for (int q = 0; q < 4; ++q) { 116 uint32_t src = src_32[q]; 117 uint32_t spaced = space_filler_packed4[rem_y][q]; 118 119 block_start_s[(spaced >> 0) & 0xFF] = (src >> 0) & 0xFF; 120 block_start_s[(spaced >> 8) & 0xFF] = (src >> 8) & 0xFF; 121 122 block_start_s[(spaced >> 16) & 0xFF] = (src >> 16) & 0xFF; 123 block_start_s[(spaced >> 24) & 0xFF] = (src >> 24) & 0xFF; 124 } 125 } 126 } 127 128 } 129} 130 131static void 132swizzle_bpp4_align16(int width, int height, int source_stride, int block_pitch, 133 const uint32_t *pixels, 134 uint32_t *ldest) 135{ 136 for (int y = 0; y < height; ++y) { 137 int block_y = y & ~(0x0f); 138 int rem_y = y & 0x0f; 139 uint32_t *block_start_s = ldest + (block_y * block_pitch); 140 const uint32_t *source_start = pixels + (y * source_stride); 141 const uint32_t *source_end = source_start + width; 142 143 /* Operate on blocks of 16 pixels to minimise bookkeeping */ 144 145 for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) { 146 for (int j = 0; j < 16; ++j) 147 block_start_s[space_filler[rem_y][j]] = source_start[j]; 148 } 149 } 150} 151 152void 153panfrost_texture_swizzle(unsigned off_x, 154 unsigned off_y, 155 int width, int height, int bytes_per_pixel, int dest_width, 156 const uint8_t *pixels, 157 uint8_t *ldest) 158{ 159 /* Calculate maximum size, overestimating a bit */ 160 int block_pitch = ALIGN(dest_width, 16) >> 4; 161 162 /* Strides must be tight, since we're only ever called indirectly */ 163 int source_stride = width * bytes_per_pixel; 164 165 /* Use fast path if available */ 166 if (!(off_x || off_y) && (width == dest_width)) { 167 if (bytes_per_pixel == 4 && (ALIGN(width, 16) == width)) { 168 swizzle_bpp4_align16(width, height, source_stride >> 2, (block_pitch * 256 >> 4), (const uint32_t *) pixels, (uint32_t *) ldest); 169 return; 170 } else if (bytes_per_pixel == 1 && (ALIGN(width, 16) == width)) { 171 swizzle_bpp1_align16(width, height, source_stride, (block_pitch * 256 >> 4), pixels, (uint8_t *) ldest); 172 return; 173 } 174 } 175 176 /* Otherwise, default back on generic path */ 177 178 for (int y = 0; y < height; ++y) { 179 int block_y = (y + off_y) >> 4; 180 int rem_y = (y + off_y) & 0x0F; 181 int block_start_s = block_y * block_pitch * 256; 182 int source_start = y * source_stride; 183 184 for (int x = 0; x < width; ++x) { 185 int block_x_s = ((x + off_x) >> 4) * 256; 186 int rem_x = (x + off_x) & 0x0F; 187 188 int index = space_filler[rem_y][rem_x]; 189 const uint8_t *source = &pixels[source_start + bytes_per_pixel * x]; 190 uint8_t *dest = ldest + bytes_per_pixel * (block_start_s + block_x_s + index); 191 192 for (int b = 0; b < bytes_per_pixel; ++b) 193 dest[b] = source[b]; 194 } 195 } 196} 197