1b8e80941Smrg/* 2b8e80941Smrg * Copyright 2015 Philip Taylor <philip@zaynar.co.uk> 3b8e80941Smrg * Copyright 2018 Advanced Micro Devices, Inc. 4b8e80941Smrg * 5b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 6b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 7b8e80941Smrg * to deal in the Software without restriction, including without limitation 8b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 10b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 11b8e80941Smrg * 12b8e80941Smrg * The above copyright notice and this permission notice (including the next 13b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 14b8e80941Smrg * Software. 15b8e80941Smrg * 16b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22b8e80941Smrg * DEALINGS IN THE SOFTWARE. 23b8e80941Smrg */ 24b8e80941Smrg 25b8e80941Smrg/** 26b8e80941Smrg * \file texcompress_astc.c 27b8e80941Smrg * 28b8e80941Smrg * Decompression code for GL_KHR_texture_compression_astc_ldr, which is just 29b8e80941Smrg * ASTC 2D LDR. 30b8e80941Smrg * 31b8e80941Smrg * The ASTC 2D LDR decoder (without the sRGB part) was copied from the OASTC 32b8e80941Smrg * library written by Philip Taylor. I added sRGB support and adjusted it for 33b8e80941Smrg * Mesa. - Marek 34b8e80941Smrg */ 35b8e80941Smrg 36b8e80941Smrg#include "texcompress_astc.h" 37b8e80941Smrg#include "macros.h" 38b8e80941Smrg#include "util/half_float.h" 39b8e80941Smrg#include <stdio.h> 40b8e80941Smrg 41b8e80941Smrgstatic bool VERBOSE_DECODE = false; 42b8e80941Smrgstatic bool VERBOSE_WRITE = false; 43b8e80941Smrg 44b8e80941Smrgstatic inline uint8_t 45b8e80941Smrguint16_div_64k_to_half_to_unorm8(uint16_t v) 46b8e80941Smrg{ 47b8e80941Smrg return _mesa_half_to_unorm8(_mesa_uint16_div_64k_to_half(v)); 48b8e80941Smrg} 49b8e80941Smrg 50b8e80941Smrgclass decode_error 51b8e80941Smrg{ 52b8e80941Smrgpublic: 53b8e80941Smrg enum type { 54b8e80941Smrg ok, 55b8e80941Smrg unsupported_hdr_void_extent, 56b8e80941Smrg reserved_block_mode_1, 57b8e80941Smrg reserved_block_mode_2, 58b8e80941Smrg dual_plane_and_too_many_partitions, 59b8e80941Smrg invalid_range_in_void_extent, 60b8e80941Smrg weight_grid_exceeds_block_size, 61b8e80941Smrg invalid_colour_endpoints_size, 62b8e80941Smrg invalid_colour_endpoints_count, 63b8e80941Smrg invalid_weight_bits, 64b8e80941Smrg invalid_num_weights, 65b8e80941Smrg }; 66b8e80941Smrg}; 67b8e80941Smrg 68b8e80941Smrg 69b8e80941Smrgstruct cem_range { 70b8e80941Smrg uint8_t max; 71b8e80941Smrg uint8_t t, q, b; 72b8e80941Smrg}; 73b8e80941Smrg 74b8e80941Smrg/* Based on the Color Unquantization Parameters table, 75b8e80941Smrg * plus the bit-only representations, sorted by increasing size 76b8e80941Smrg */ 77b8e80941Smrgstatic cem_range cem_ranges[] = { 78b8e80941Smrg { 5, 1, 0, 1 }, 79b8e80941Smrg { 7, 0, 0, 3 }, 80b8e80941Smrg { 9, 0, 1, 1 }, 81b8e80941Smrg { 11, 1, 0, 2 }, 82b8e80941Smrg { 15, 0, 0, 4 }, 83b8e80941Smrg { 19, 0, 1, 2 }, 84b8e80941Smrg { 23, 1, 0, 3 }, 85b8e80941Smrg { 31, 0, 0, 5 }, 86b8e80941Smrg { 39, 0, 1, 3 }, 87b8e80941Smrg { 47, 1, 0, 4 }, 88b8e80941Smrg { 63, 0, 0, 6 }, 89b8e80941Smrg { 79, 0, 1, 4 }, 90b8e80941Smrg { 95, 1, 0, 5 }, 91b8e80941Smrg { 127, 0, 0, 7 }, 92b8e80941Smrg { 159, 0, 1, 5 }, 93b8e80941Smrg { 191, 1, 0, 6 }, 94b8e80941Smrg { 255, 0, 0, 8 }, 95b8e80941Smrg}; 96b8e80941Smrg 97b8e80941Smrg#define CAT_BITS_2(a, b) ( ((a) << 1) | (b) ) 98b8e80941Smrg#define CAT_BITS_3(a, b, c) ( ((a) << 2) | ((b) << 1) | (c) ) 99b8e80941Smrg#define CAT_BITS_4(a, b, c, d) ( ((a) << 3) | ((b) << 2) | ((c) << 1) | (d) ) 100b8e80941Smrg#define CAT_BITS_5(a, b, c, d, e) ( ((a) << 4) | ((b) << 3) | ((c) << 2) | ((d) << 1) | (e) ) 101b8e80941Smrg 102b8e80941Smrg/** 103b8e80941Smrg * Unpack 5n+8 bits from 'in' into 5 output values. 104b8e80941Smrg * If n <= 4 then T should be uint32_t, else it must be uint64_t. 105b8e80941Smrg */ 106b8e80941Smrgtemplate <typename T> 107b8e80941Smrgstatic void unpack_trit_block(int n, T in, uint8_t *out) 108b8e80941Smrg{ 109b8e80941Smrg assert(n <= 6); /* else output will overflow uint8_t */ 110b8e80941Smrg 111b8e80941Smrg uint8_t T0 = (in >> (n)) & 0x1; 112b8e80941Smrg uint8_t T1 = (in >> (n+1)) & 0x1; 113b8e80941Smrg uint8_t T2 = (in >> (2*n+2)) & 0x1; 114b8e80941Smrg uint8_t T3 = (in >> (2*n+3)) & 0x1; 115b8e80941Smrg uint8_t T4 = (in >> (3*n+4)) & 0x1; 116b8e80941Smrg uint8_t T5 = (in >> (4*n+5)) & 0x1; 117b8e80941Smrg uint8_t T6 = (in >> (4*n+6)) & 0x1; 118b8e80941Smrg uint8_t T7 = (in >> (5*n+7)) & 0x1; 119b8e80941Smrg uint8_t mmask = (1 << n) - 1; 120b8e80941Smrg uint8_t m0 = (in >> (0)) & mmask; 121b8e80941Smrg uint8_t m1 = (in >> (n+2)) & mmask; 122b8e80941Smrg uint8_t m2 = (in >> (2*n+4)) & mmask; 123b8e80941Smrg uint8_t m3 = (in >> (3*n+5)) & mmask; 124b8e80941Smrg uint8_t m4 = (in >> (4*n+7)) & mmask; 125b8e80941Smrg 126b8e80941Smrg uint8_t C; 127b8e80941Smrg uint8_t t4, t3, t2, t1, t0; 128b8e80941Smrg if (CAT_BITS_3(T4, T3, T2) == 0x7) { 129b8e80941Smrg C = CAT_BITS_5(T7, T6, T5, T1, T0); 130b8e80941Smrg t4 = t3 = 2; 131b8e80941Smrg } else { 132b8e80941Smrg C = CAT_BITS_5(T4, T3, T2, T1, T0); 133b8e80941Smrg if (CAT_BITS_2(T6, T5) == 0x3) { 134b8e80941Smrg t4 = 2; 135b8e80941Smrg t3 = T7; 136b8e80941Smrg } else { 137b8e80941Smrg t4 = T7; 138b8e80941Smrg t3 = CAT_BITS_2(T6, T5); 139b8e80941Smrg } 140b8e80941Smrg } 141b8e80941Smrg 142b8e80941Smrg if ((C & 0x3) == 0x3) { 143b8e80941Smrg t2 = 2; 144b8e80941Smrg t1 = (C >> 4) & 0x1; 145b8e80941Smrg uint8_t C3 = (C >> 3) & 0x1; 146b8e80941Smrg uint8_t C2 = (C >> 2) & 0x1; 147b8e80941Smrg t0 = (C3 << 1) | (C2 & ~C3); 148b8e80941Smrg } else if (((C >> 2) & 0x3) == 0x3) { 149b8e80941Smrg t2 = 2; 150b8e80941Smrg t1 = 2; 151b8e80941Smrg t0 = C & 0x3; 152b8e80941Smrg } else { 153b8e80941Smrg t2 = (C >> 4) & 0x1; 154b8e80941Smrg t1 = (C >> 2) & 0x3; 155b8e80941Smrg uint8_t C1 = (C >> 1) & 0x1; 156b8e80941Smrg uint8_t C0 = (C >> 0) & 0x1; 157b8e80941Smrg t0 = (C1 << 1) | (C0 & ~C1); 158b8e80941Smrg } 159b8e80941Smrg 160b8e80941Smrg out[0] = (t0 << n) | m0; 161b8e80941Smrg out[1] = (t1 << n) | m1; 162b8e80941Smrg out[2] = (t2 << n) | m2; 163b8e80941Smrg out[3] = (t3 << n) | m3; 164b8e80941Smrg out[4] = (t4 << n) | m4; 165b8e80941Smrg} 166b8e80941Smrg 167b8e80941Smrg/** 168b8e80941Smrg * Unpack 3n+7 bits from 'in' into 3 output values 169b8e80941Smrg */ 170b8e80941Smrgstatic void unpack_quint_block(int n, uint32_t in, uint8_t *out) 171b8e80941Smrg{ 172b8e80941Smrg assert(n <= 5); /* else output will overflow uint8_t */ 173b8e80941Smrg 174b8e80941Smrg uint8_t Q0 = (in >> (n)) & 0x1; 175b8e80941Smrg uint8_t Q1 = (in >> (n+1)) & 0x1; 176b8e80941Smrg uint8_t Q2 = (in >> (n+2)) & 0x1; 177b8e80941Smrg uint8_t Q3 = (in >> (2*n+3)) & 0x1; 178b8e80941Smrg uint8_t Q4 = (in >> (2*n+4)) & 0x1; 179b8e80941Smrg uint8_t Q5 = (in >> (3*n+5)) & 0x1; 180b8e80941Smrg uint8_t Q6 = (in >> (3*n+6)) & 0x1; 181b8e80941Smrg uint8_t mmask = (1 << n) - 1; 182b8e80941Smrg uint8_t m0 = (in >> (0)) & mmask; 183b8e80941Smrg uint8_t m1 = (in >> (n+3)) & mmask; 184b8e80941Smrg uint8_t m2 = (in >> (2*n+5)) & mmask; 185b8e80941Smrg 186b8e80941Smrg uint8_t C; 187b8e80941Smrg uint8_t q2, q1, q0; 188b8e80941Smrg if (CAT_BITS_4(Q6, Q5, Q2, Q1) == 0x3) { 189b8e80941Smrg q2 = CAT_BITS_3(Q0, Q4 & ~Q0, Q3 & ~Q0); 190b8e80941Smrg q1 = 4; 191b8e80941Smrg q0 = 4; 192b8e80941Smrg } else { 193b8e80941Smrg if (CAT_BITS_2(Q2, Q1) == 0x3) { 194b8e80941Smrg q2 = 4; 195b8e80941Smrg C = CAT_BITS_5(Q4, Q3, 0x1 & ~Q6, 0x1 & ~Q5, Q0); 196b8e80941Smrg } else { 197b8e80941Smrg q2 = CAT_BITS_2(Q6, Q5); 198b8e80941Smrg C = CAT_BITS_5(Q4, Q3, Q2, Q1, Q0); 199b8e80941Smrg } 200b8e80941Smrg if ((C & 0x7) == 0x5) { 201b8e80941Smrg q1 = 4; 202b8e80941Smrg q0 = (C >> 3) & 0x3; 203b8e80941Smrg } else { 204b8e80941Smrg q1 = (C >> 3) & 0x3; 205b8e80941Smrg q0 = C & 0x7; 206b8e80941Smrg } 207b8e80941Smrg } 208b8e80941Smrg out[0] = (q0 << n) | m0; 209b8e80941Smrg out[1] = (q1 << n) | m1; 210b8e80941Smrg out[2] = (q2 << n) | m2; 211b8e80941Smrg} 212b8e80941Smrg 213b8e80941Smrg 214b8e80941Smrgstruct uint8x4_t 215b8e80941Smrg{ 216b8e80941Smrg uint8_t v[4]; 217b8e80941Smrg 218b8e80941Smrg uint8x4_t() { } 219b8e80941Smrg 220b8e80941Smrg uint8x4_t(int a, int b, int c, int d) 221b8e80941Smrg { 222b8e80941Smrg assert(0 <= a && a <= 255); 223b8e80941Smrg assert(0 <= b && b <= 255); 224b8e80941Smrg assert(0 <= c && c <= 255); 225b8e80941Smrg assert(0 <= d && d <= 255); 226b8e80941Smrg v[0] = a; 227b8e80941Smrg v[1] = b; 228b8e80941Smrg v[2] = c; 229b8e80941Smrg v[3] = d; 230b8e80941Smrg } 231b8e80941Smrg 232b8e80941Smrg static uint8x4_t clamped(int a, int b, int c, int d) 233b8e80941Smrg { 234b8e80941Smrg uint8x4_t r; 235b8e80941Smrg r.v[0] = MAX2(0, MIN2(255, a)); 236b8e80941Smrg r.v[1] = MAX2(0, MIN2(255, b)); 237b8e80941Smrg r.v[2] = MAX2(0, MIN2(255, c)); 238b8e80941Smrg r.v[3] = MAX2(0, MIN2(255, d)); 239b8e80941Smrg return r; 240b8e80941Smrg } 241b8e80941Smrg}; 242b8e80941Smrg 243b8e80941Smrgstatic uint8x4_t blue_contract(int r, int g, int b, int a) 244b8e80941Smrg{ 245b8e80941Smrg return uint8x4_t((r+b) >> 1, (g+b) >> 1, b, a); 246b8e80941Smrg} 247b8e80941Smrg 248b8e80941Smrgstatic uint8x4_t blue_contract_clamped(int r, int g, int b, int a) 249b8e80941Smrg{ 250b8e80941Smrg return uint8x4_t::clamped((r+b) >> 1, (g+b) >> 1, b, a); 251b8e80941Smrg} 252b8e80941Smrg 253b8e80941Smrgstatic void bit_transfer_signed(int &a, int &b) 254b8e80941Smrg{ 255b8e80941Smrg b >>= 1; 256b8e80941Smrg b |= a & 0x80; 257b8e80941Smrg a >>= 1; 258b8e80941Smrg a &= 0x3f; 259b8e80941Smrg if (a & 0x20) 260b8e80941Smrg a -= 0x40; 261b8e80941Smrg} 262b8e80941Smrg 263b8e80941Smrgstatic uint32_t hash52(uint32_t p) 264b8e80941Smrg{ 265b8e80941Smrg p ^= p >> 15; 266b8e80941Smrg p -= p << 17; 267b8e80941Smrg p += p << 7; 268b8e80941Smrg p += p << 4; 269b8e80941Smrg p ^= p >> 5; 270b8e80941Smrg p += p << 16; 271b8e80941Smrg p ^= p >> 7; 272b8e80941Smrg p ^= p >> 3; 273b8e80941Smrg p ^= p << 6; 274b8e80941Smrg p ^= p >> 17; 275b8e80941Smrg return p; 276b8e80941Smrg} 277b8e80941Smrg 278b8e80941Smrgstatic int select_partition(int seed, int x, int y, int z, int partitioncount, 279b8e80941Smrg int small_block) 280b8e80941Smrg{ 281b8e80941Smrg if (small_block) { 282b8e80941Smrg x <<= 1; 283b8e80941Smrg y <<= 1; 284b8e80941Smrg z <<= 1; 285b8e80941Smrg } 286b8e80941Smrg seed += (partitioncount - 1) * 1024; 287b8e80941Smrg uint32_t rnum = hash52(seed); 288b8e80941Smrg uint8_t seed1 = rnum & 0xF; 289b8e80941Smrg uint8_t seed2 = (rnum >> 4) & 0xF; 290b8e80941Smrg uint8_t seed3 = (rnum >> 8) & 0xF; 291b8e80941Smrg uint8_t seed4 = (rnum >> 12) & 0xF; 292b8e80941Smrg uint8_t seed5 = (rnum >> 16) & 0xF; 293b8e80941Smrg uint8_t seed6 = (rnum >> 20) & 0xF; 294b8e80941Smrg uint8_t seed7 = (rnum >> 24) & 0xF; 295b8e80941Smrg uint8_t seed8 = (rnum >> 28) & 0xF; 296b8e80941Smrg uint8_t seed9 = (rnum >> 18) & 0xF; 297b8e80941Smrg uint8_t seed10 = (rnum >> 22) & 0xF; 298b8e80941Smrg uint8_t seed11 = (rnum >> 26) & 0xF; 299b8e80941Smrg uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; 300b8e80941Smrg 301b8e80941Smrg seed1 *= seed1; 302b8e80941Smrg seed2 *= seed2; 303b8e80941Smrg seed3 *= seed3; 304b8e80941Smrg seed4 *= seed4; 305b8e80941Smrg seed5 *= seed5; 306b8e80941Smrg seed6 *= seed6; 307b8e80941Smrg seed7 *= seed7; 308b8e80941Smrg seed8 *= seed8; 309b8e80941Smrg seed9 *= seed9; 310b8e80941Smrg seed10 *= seed10; 311b8e80941Smrg seed11 *= seed11; 312b8e80941Smrg seed12 *= seed12; 313b8e80941Smrg 314b8e80941Smrg int sh1, sh2, sh3; 315b8e80941Smrg if (seed & 1) { 316b8e80941Smrg sh1 = (seed & 2 ? 4 : 5); 317b8e80941Smrg sh2 = (partitioncount == 3 ? 6 : 5); 318b8e80941Smrg } else { 319b8e80941Smrg sh1 = (partitioncount == 3 ? 6 : 5); 320b8e80941Smrg sh2 = (seed & 2 ? 4 : 5); 321b8e80941Smrg } 322b8e80941Smrg sh3 = (seed & 0x10) ? sh1 : sh2; 323b8e80941Smrg 324b8e80941Smrg seed1 >>= sh1; 325b8e80941Smrg seed2 >>= sh2; 326b8e80941Smrg seed3 >>= sh1; 327b8e80941Smrg seed4 >>= sh2; 328b8e80941Smrg seed5 >>= sh1; 329b8e80941Smrg seed6 >>= sh2; 330b8e80941Smrg seed7 >>= sh1; 331b8e80941Smrg seed8 >>= sh2; 332b8e80941Smrg seed9 >>= sh3; 333b8e80941Smrg seed10 >>= sh3; 334b8e80941Smrg seed11 >>= sh3; 335b8e80941Smrg seed12 >>= sh3; 336b8e80941Smrg 337b8e80941Smrg int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); 338b8e80941Smrg int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); 339b8e80941Smrg int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); 340b8e80941Smrg int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); 341b8e80941Smrg 342b8e80941Smrg a &= 0x3F; 343b8e80941Smrg b &= 0x3F; 344b8e80941Smrg c &= 0x3F; 345b8e80941Smrg d &= 0x3F; 346b8e80941Smrg 347b8e80941Smrg if (partitioncount < 4) 348b8e80941Smrg d = 0; 349b8e80941Smrg if (partitioncount < 3) 350b8e80941Smrg c = 0; 351b8e80941Smrg 352b8e80941Smrg if (a >= b && a >= c && a >= d) 353b8e80941Smrg return 0; 354b8e80941Smrg else if (b >= c && b >= d) 355b8e80941Smrg return 1; 356b8e80941Smrg else if (c >= d) 357b8e80941Smrg return 2; 358b8e80941Smrg else 359b8e80941Smrg return 3; 360b8e80941Smrg} 361b8e80941Smrg 362b8e80941Smrg 363b8e80941Smrgstruct InputBitVector 364b8e80941Smrg{ 365b8e80941Smrg uint32_t data[4]; 366b8e80941Smrg 367b8e80941Smrg void printf_bits(int offset, int count, const char *fmt = "", ...) 368b8e80941Smrg { 369b8e80941Smrg char out[129]; 370b8e80941Smrg memset(out, '.', 128); 371b8e80941Smrg out[128] = '\0'; 372b8e80941Smrg int idx = offset; 373b8e80941Smrg for (int i = 0; i < count; ++i) { 374b8e80941Smrg out[127 - idx] = ((data[idx >> 5] >> (idx & 31)) & 1) ? '1' : '0'; 375b8e80941Smrg ++idx; 376b8e80941Smrg } 377b8e80941Smrg printf("%s ", out); 378b8e80941Smrg va_list ap; 379b8e80941Smrg va_start(ap, fmt); 380b8e80941Smrg vprintf(fmt, ap); 381b8e80941Smrg va_end(ap); 382b8e80941Smrg printf("\n"); 383b8e80941Smrg } 384b8e80941Smrg 385b8e80941Smrg uint32_t get_bits(int offset, int count) 386b8e80941Smrg { 387b8e80941Smrg assert(count >= 0 && count < 32); 388b8e80941Smrg 389b8e80941Smrg uint32_t out = 0; 390b8e80941Smrg if (offset < 32) 391b8e80941Smrg out |= data[0] >> offset; 392b8e80941Smrg 393b8e80941Smrg if (0 < offset && offset <= 32) 394b8e80941Smrg out |= data[1] << (32 - offset); 395b8e80941Smrg if (32 < offset && offset < 64) 396b8e80941Smrg out |= data[1] >> (offset - 32); 397b8e80941Smrg 398b8e80941Smrg if (32 < offset && offset <= 64) 399b8e80941Smrg out |= data[2] << (64 - offset); 400b8e80941Smrg if (64 < offset && offset < 96) 401b8e80941Smrg out |= data[2] >> (offset - 64); 402b8e80941Smrg 403b8e80941Smrg if (64 < offset && offset <= 96) 404b8e80941Smrg out |= data[3] << (96 - offset); 405b8e80941Smrg if (96 < offset && offset < 128) 406b8e80941Smrg out |= data[3] >> (offset - 96); 407b8e80941Smrg 408b8e80941Smrg out &= (1 << count) - 1; 409b8e80941Smrg return out; 410b8e80941Smrg } 411b8e80941Smrg 412b8e80941Smrg uint64_t get_bits64(int offset, int count) 413b8e80941Smrg { 414b8e80941Smrg assert(count >= 0 && count < 64); 415b8e80941Smrg 416b8e80941Smrg uint64_t out = 0; 417b8e80941Smrg if (offset < 32) 418b8e80941Smrg out |= data[0] >> offset; 419b8e80941Smrg 420b8e80941Smrg if (offset <= 32) 421b8e80941Smrg out |= (uint64_t)data[1] << (32 - offset); 422b8e80941Smrg if (32 < offset && offset < 64) 423b8e80941Smrg out |= data[1] >> (offset - 32); 424b8e80941Smrg 425b8e80941Smrg if (0 < offset && offset <= 64) 426b8e80941Smrg out |= (uint64_t)data[2] << (64 - offset); 427b8e80941Smrg if (64 < offset && offset < 96) 428b8e80941Smrg out |= data[2] >> (offset - 64); 429b8e80941Smrg 430b8e80941Smrg if (32 < offset && offset <= 96) 431b8e80941Smrg out |= (uint64_t)data[3] << (96 - offset); 432b8e80941Smrg if (96 < offset && offset < 128) 433b8e80941Smrg out |= data[3] >> (offset - 96); 434b8e80941Smrg 435b8e80941Smrg out &= ((uint64_t)1 << count) - 1; 436b8e80941Smrg return out; 437b8e80941Smrg } 438b8e80941Smrg 439b8e80941Smrg uint32_t get_bits_rev(int offset, int count) 440b8e80941Smrg { 441b8e80941Smrg assert(offset >= count); 442b8e80941Smrg uint32_t tmp = get_bits(offset - count, count); 443b8e80941Smrg uint32_t out = 0; 444b8e80941Smrg for (int i = 0; i < count; ++i) 445b8e80941Smrg out |= ((tmp >> i) & 1) << (count - 1 - i); 446b8e80941Smrg return out; 447b8e80941Smrg } 448b8e80941Smrg}; 449b8e80941Smrg 450b8e80941Smrgstruct OutputBitVector 451b8e80941Smrg{ 452b8e80941Smrg uint32_t data[4]; 453b8e80941Smrg int offset; 454b8e80941Smrg 455b8e80941Smrg OutputBitVector() 456b8e80941Smrg : offset(0) 457b8e80941Smrg { 458b8e80941Smrg memset(data, 0, sizeof(data)); 459b8e80941Smrg } 460b8e80941Smrg 461b8e80941Smrg void append(uint32_t value, int size) 462b8e80941Smrg { 463b8e80941Smrg if (VERBOSE_WRITE) 464b8e80941Smrg printf("append offset=%d size=%d values=0x%x\n", offset, size, value); 465b8e80941Smrg 466b8e80941Smrg assert(offset + size <= 128); 467b8e80941Smrg 468b8e80941Smrg assert(size <= 32); 469b8e80941Smrg if (size < 32) 470b8e80941Smrg assert((value >> size) == 0); 471b8e80941Smrg 472b8e80941Smrg while (size) { 473b8e80941Smrg int c = MIN2(size, 32 - (offset & 31)); 474b8e80941Smrg data[offset >> 5] |= (value << (offset & 31)); 475b8e80941Smrg offset += c; 476b8e80941Smrg size -= c; 477b8e80941Smrg value >>= c; 478b8e80941Smrg } 479b8e80941Smrg } 480b8e80941Smrg 481b8e80941Smrg void append64(uint64_t value, int size) 482b8e80941Smrg { 483b8e80941Smrg if (VERBOSE_WRITE) 484b8e80941Smrg printf("append offset=%d size=%d values=0x%llx\n", offset, size, (unsigned long long)value); 485b8e80941Smrg 486b8e80941Smrg assert(offset + size <= 128); 487b8e80941Smrg 488b8e80941Smrg assert(size <= 64); 489b8e80941Smrg if (size < 64) 490b8e80941Smrg assert((value >> size) == 0); 491b8e80941Smrg 492b8e80941Smrg while (size) { 493b8e80941Smrg int c = MIN2(size, 32 - (offset & 31)); 494b8e80941Smrg data[offset >> 5] |= (value << (offset & 31)); 495b8e80941Smrg offset += c; 496b8e80941Smrg size -= c; 497b8e80941Smrg value >>= c; 498b8e80941Smrg } 499b8e80941Smrg } 500b8e80941Smrg 501b8e80941Smrg void append(OutputBitVector &v, int size) 502b8e80941Smrg { 503b8e80941Smrg if (VERBOSE_WRITE) 504b8e80941Smrg printf("append vector offset=%d size=%d\n", offset, size); 505b8e80941Smrg 506b8e80941Smrg assert(offset + size <= 128); 507b8e80941Smrg int i = 0; 508b8e80941Smrg while (size >= 32) { 509b8e80941Smrg append(v.data[i++], 32); 510b8e80941Smrg size -= 32; 511b8e80941Smrg } 512b8e80941Smrg if (size > 0) 513b8e80941Smrg append(v.data[i] & ((1 << size) - 1), size); 514b8e80941Smrg } 515b8e80941Smrg 516b8e80941Smrg void append_end(OutputBitVector &v, int size) 517b8e80941Smrg { 518b8e80941Smrg for (int i = 0; i < size; ++i) 519b8e80941Smrg data[(127 - i) >> 5] |= ((v.data[i >> 5] >> (i & 31)) & 1) << ((127 - i) & 31); 520b8e80941Smrg } 521b8e80941Smrg 522b8e80941Smrg /* Insert the given number of '1' bits. (We could use 0s instead, but 1s are 523b8e80941Smrg * more likely to flush out bugs where we accidentally read undefined bits.) 524b8e80941Smrg */ 525b8e80941Smrg void skip(int size) 526b8e80941Smrg { 527b8e80941Smrg if (VERBOSE_WRITE) 528b8e80941Smrg printf("skip offset=%d size=%d\n", offset, size); 529b8e80941Smrg 530b8e80941Smrg assert(offset + size <= 128); 531b8e80941Smrg while (size >= 32) { 532b8e80941Smrg append(0xffffffff, 32); 533b8e80941Smrg size -= 32; 534b8e80941Smrg } 535b8e80941Smrg if (size > 0) 536b8e80941Smrg append(0xffffffff >> (32 - size), size); 537b8e80941Smrg } 538b8e80941Smrg}; 539b8e80941Smrg 540b8e80941Smrg 541b8e80941Smrgclass Decoder 542b8e80941Smrg{ 543b8e80941Smrgpublic: 544b8e80941Smrg Decoder(int block_w, int block_h, int block_d, bool srgb, bool output_unorm8) 545b8e80941Smrg : block_w(block_w), block_h(block_h), block_d(block_d), srgb(srgb), 546b8e80941Smrg output_unorm8(output_unorm8) {} 547b8e80941Smrg 548b8e80941Smrg decode_error::type decode(const uint8_t *in, uint16_t *output) const; 549b8e80941Smrg 550b8e80941Smrg int block_w, block_h, block_d; 551b8e80941Smrg bool srgb, output_unorm8; 552b8e80941Smrg}; 553b8e80941Smrg 554b8e80941Smrgstruct Block 555b8e80941Smrg{ 556b8e80941Smrg bool is_error; 557b8e80941Smrg bool bogus_colour_endpoints; 558b8e80941Smrg bool bogus_weights; 559b8e80941Smrg 560b8e80941Smrg int high_prec; 561b8e80941Smrg int dual_plane; 562b8e80941Smrg int colour_component_selector; 563b8e80941Smrg int wt_range; 564b8e80941Smrg int wt_w, wt_h, wt_d; 565b8e80941Smrg int num_parts; 566b8e80941Smrg int partition_index; 567b8e80941Smrg 568b8e80941Smrg bool is_void_extent; 569b8e80941Smrg int void_extent_d; 570b8e80941Smrg int void_extent_min_s; 571b8e80941Smrg int void_extent_max_s; 572b8e80941Smrg int void_extent_min_t; 573b8e80941Smrg int void_extent_max_t; 574b8e80941Smrg uint16_t void_extent_colour_r; 575b8e80941Smrg uint16_t void_extent_colour_g; 576b8e80941Smrg uint16_t void_extent_colour_b; 577b8e80941Smrg uint16_t void_extent_colour_a; 578b8e80941Smrg 579b8e80941Smrg bool is_multi_cem; 580b8e80941Smrg int num_extra_cem_bits; 581b8e80941Smrg int colour_endpoint_data_offset; 582b8e80941Smrg int extra_cem_bits; 583b8e80941Smrg int cem_base_class; 584b8e80941Smrg int cems[4]; 585b8e80941Smrg 586b8e80941Smrg int num_cem_values; 587b8e80941Smrg 588b8e80941Smrg /* Calculated by unpack_weights(): */ 589b8e80941Smrg uint8_t weights_quant[64 + 4]; /* max 64 values, plus padding for overflows in trit parsing */ 590b8e80941Smrg 591b8e80941Smrg /* Calculated by unquantise_weights(): */ 592b8e80941Smrg uint8_t weights[64 + 18]; /* max 64 values, plus padding for the infill interpolation */ 593b8e80941Smrg 594b8e80941Smrg /* Calculated by unpack_colour_endpoints(): */ 595b8e80941Smrg uint8_t colour_endpoints_quant[18 + 4]; /* max 18 values, plus padding for overflows in trit parsing */ 596b8e80941Smrg 597b8e80941Smrg /* Calculated by unquantise_colour_endpoints(): */ 598b8e80941Smrg uint8_t colour_endpoints[18]; 599b8e80941Smrg 600b8e80941Smrg /* Calculated by calculate_from_weights(): */ 601b8e80941Smrg int wt_trits; 602b8e80941Smrg int wt_quints; 603b8e80941Smrg int wt_bits; 604b8e80941Smrg int wt_max; 605b8e80941Smrg int num_weights; 606b8e80941Smrg int weight_bits; 607b8e80941Smrg 608b8e80941Smrg /* Calculated by calculate_remaining_bits(): */ 609b8e80941Smrg int remaining_bits; 610b8e80941Smrg 611b8e80941Smrg /* Calculated by calculate_colour_endpoints_size(): */ 612b8e80941Smrg int colour_endpoint_bits; 613b8e80941Smrg int ce_max; 614b8e80941Smrg int ce_trits; 615b8e80941Smrg int ce_quints; 616b8e80941Smrg int ce_bits; 617b8e80941Smrg 618b8e80941Smrg /* Calculated by compute_infill_weights(); */ 619b8e80941Smrg uint8_t infill_weights[2][216]; /* large enough for 6x6x6 */ 620b8e80941Smrg 621b8e80941Smrg /* Calculated by decode_colour_endpoints(); */ 622b8e80941Smrg uint8x4_t endpoints_decoded[2][4]; 623b8e80941Smrg 624b8e80941Smrg void calculate_from_weights(); 625b8e80941Smrg void calculate_remaining_bits(); 626b8e80941Smrg decode_error::type calculate_colour_endpoints_size(); 627b8e80941Smrg 628b8e80941Smrg void unquantise_weights(); 629b8e80941Smrg void unquantise_colour_endpoints(); 630b8e80941Smrg 631b8e80941Smrg decode_error::type decode(const Decoder &decoder, InputBitVector in); 632b8e80941Smrg 633b8e80941Smrg decode_error::type decode_block_mode(InputBitVector in); 634b8e80941Smrg decode_error::type decode_void_extent(InputBitVector in); 635b8e80941Smrg void decode_cem(InputBitVector in); 636b8e80941Smrg void unpack_colour_endpoints(InputBitVector in); 637b8e80941Smrg void decode_colour_endpoints(); 638b8e80941Smrg void unpack_weights(InputBitVector in); 639b8e80941Smrg void compute_infill_weights(int block_w, int block_h, int block_d); 640b8e80941Smrg 641b8e80941Smrg void write_decoded(const Decoder &decoder, uint16_t *output); 642b8e80941Smrg}; 643b8e80941Smrg 644b8e80941Smrg 645b8e80941Smrgdecode_error::type Decoder::decode(const uint8_t *in, uint16_t *output) const 646b8e80941Smrg{ 647b8e80941Smrg Block blk; 648b8e80941Smrg InputBitVector in_vec; 649b8e80941Smrg memcpy(&in_vec.data, in, 16); 650b8e80941Smrg decode_error::type err = blk.decode(*this, in_vec); 651b8e80941Smrg if (err == decode_error::ok) { 652b8e80941Smrg blk.write_decoded(*this, output); 653b8e80941Smrg } else { 654b8e80941Smrg /* Fill output with the error colour */ 655b8e80941Smrg for (int i = 0; i < block_w * block_h * block_d; ++i) { 656b8e80941Smrg if (output_unorm8) { 657b8e80941Smrg output[i*4+0] = 0xff; 658b8e80941Smrg output[i*4+1] = 0; 659b8e80941Smrg output[i*4+2] = 0xff; 660b8e80941Smrg output[i*4+3] = 0xff; 661b8e80941Smrg } else { 662b8e80941Smrg assert(!srgb); /* srgb must use unorm8 */ 663b8e80941Smrg 664b8e80941Smrg output[i*4+0] = FP16_ONE; 665b8e80941Smrg output[i*4+1] = FP16_ZERO; 666b8e80941Smrg output[i*4+2] = FP16_ONE; 667b8e80941Smrg output[i*4+3] = FP16_ONE; 668b8e80941Smrg } 669b8e80941Smrg } 670b8e80941Smrg } 671b8e80941Smrg return err; 672b8e80941Smrg} 673b8e80941Smrg 674b8e80941Smrg 675b8e80941Smrgdecode_error::type Block::decode_void_extent(InputBitVector block) 676b8e80941Smrg{ 677b8e80941Smrg /* TODO: 3D */ 678b8e80941Smrg 679b8e80941Smrg is_void_extent = true; 680b8e80941Smrg void_extent_d = block.get_bits(9, 1); 681b8e80941Smrg void_extent_min_s = block.get_bits(12, 13); 682b8e80941Smrg void_extent_max_s = block.get_bits(25, 13); 683b8e80941Smrg void_extent_min_t = block.get_bits(38, 13); 684b8e80941Smrg void_extent_max_t = block.get_bits(51, 13); 685b8e80941Smrg void_extent_colour_r = block.get_bits(64, 16); 686b8e80941Smrg void_extent_colour_g = block.get_bits(80, 16); 687b8e80941Smrg void_extent_colour_b = block.get_bits(96, 16); 688b8e80941Smrg void_extent_colour_a = block.get_bits(112, 16); 689b8e80941Smrg 690b8e80941Smrg /* TODO: maybe we should do something useful with the extent coordinates? */ 691b8e80941Smrg 692b8e80941Smrg if (void_extent_d) { 693b8e80941Smrg return decode_error::unsupported_hdr_void_extent; 694b8e80941Smrg } 695b8e80941Smrg 696b8e80941Smrg if (void_extent_min_s == 0x1fff && void_extent_max_s == 0x1fff 697b8e80941Smrg && void_extent_min_t == 0x1fff && void_extent_max_t == 0x1fff) { 698b8e80941Smrg 699b8e80941Smrg /* No extents */ 700b8e80941Smrg 701b8e80941Smrg } else { 702b8e80941Smrg 703b8e80941Smrg /* Check for illegal encoding */ 704b8e80941Smrg if (void_extent_min_s >= void_extent_max_s || void_extent_min_t >= void_extent_max_t) { 705b8e80941Smrg return decode_error::invalid_range_in_void_extent; 706b8e80941Smrg } 707b8e80941Smrg } 708b8e80941Smrg 709b8e80941Smrg return decode_error::ok; 710b8e80941Smrg} 711b8e80941Smrg 712b8e80941Smrgdecode_error::type Block::decode_block_mode(InputBitVector in) 713b8e80941Smrg{ 714b8e80941Smrg dual_plane = in.get_bits(10, 1); 715b8e80941Smrg high_prec = in.get_bits(9, 1); 716b8e80941Smrg 717b8e80941Smrg if (in.get_bits(0, 2) != 0x0) { 718b8e80941Smrg wt_range = (in.get_bits(0, 2) << 1) | in.get_bits(4, 1); 719b8e80941Smrg int a = in.get_bits(5, 2); 720b8e80941Smrg int b = in.get_bits(7, 2); 721b8e80941Smrg switch (in.get_bits(2, 2)) { 722b8e80941Smrg case 0x0: 723b8e80941Smrg if (VERBOSE_DECODE) 724b8e80941Smrg in.printf_bits(0, 11, "DHBBAAR00RR"); 725b8e80941Smrg wt_w = b + 4; 726b8e80941Smrg wt_h = a + 2; 727b8e80941Smrg break; 728b8e80941Smrg case 0x1: 729b8e80941Smrg if (VERBOSE_DECODE) 730b8e80941Smrg in.printf_bits(0, 11, "DHBBAAR01RR"); 731b8e80941Smrg wt_w = b + 8; 732b8e80941Smrg wt_h = a + 2; 733b8e80941Smrg break; 734b8e80941Smrg case 0x2: 735b8e80941Smrg if (VERBOSE_DECODE) 736b8e80941Smrg in.printf_bits(0, 11, "DHBBAAR10RR"); 737b8e80941Smrg wt_w = a + 2; 738b8e80941Smrg wt_h = b + 8; 739b8e80941Smrg break; 740b8e80941Smrg case 0x3: 741b8e80941Smrg if ((b & 0x2) == 0) { 742b8e80941Smrg if (VERBOSE_DECODE) 743b8e80941Smrg in.printf_bits(0, 11, "DH0BAAR11RR"); 744b8e80941Smrg wt_w = a + 2; 745b8e80941Smrg wt_h = b + 6; 746b8e80941Smrg } else { 747b8e80941Smrg if (VERBOSE_DECODE) 748b8e80941Smrg in.printf_bits(0, 11, "DH1BAAR11RR"); 749b8e80941Smrg wt_w = (b & 0x1) + 2; 750b8e80941Smrg wt_h = a + 2; 751b8e80941Smrg } 752b8e80941Smrg break; 753b8e80941Smrg } 754b8e80941Smrg } else { 755b8e80941Smrg if (in.get_bits(6, 3) == 0x7) { 756b8e80941Smrg if (in.get_bits(0, 9) == 0x1fc) { 757b8e80941Smrg if (VERBOSE_DECODE) 758b8e80941Smrg in.printf_bits(0, 11, "xx111111100 (void extent)"); 759b8e80941Smrg return decode_void_extent(in); 760b8e80941Smrg } else { 761b8e80941Smrg if (VERBOSE_DECODE) 762b8e80941Smrg in.printf_bits(0, 11, "xx111xxxx00"); 763b8e80941Smrg return decode_error::reserved_block_mode_1; 764b8e80941Smrg } 765b8e80941Smrg } 766b8e80941Smrg if (in.get_bits(0, 4) == 0x0) { 767b8e80941Smrg if (VERBOSE_DECODE) 768b8e80941Smrg in.printf_bits(0, 11, "xxxxxxx0000"); 769b8e80941Smrg return decode_error::reserved_block_mode_2; 770b8e80941Smrg } 771b8e80941Smrg 772b8e80941Smrg wt_range = in.get_bits(1, 3) | in.get_bits(4, 1); 773b8e80941Smrg int a = in.get_bits(5, 2); 774b8e80941Smrg int b; 775b8e80941Smrg 776b8e80941Smrg switch (in.get_bits(7, 2)) { 777b8e80941Smrg case 0x0: 778b8e80941Smrg if (VERBOSE_DECODE) 779b8e80941Smrg in.printf_bits(0, 11, "DH00AARRR00"); 780b8e80941Smrg wt_w = 12; 781b8e80941Smrg wt_h = a + 2; 782b8e80941Smrg break; 783b8e80941Smrg case 0x1: 784b8e80941Smrg if (VERBOSE_DECODE) 785b8e80941Smrg in.printf_bits(0, 11, "DH01AARRR00"); 786b8e80941Smrg wt_w = a + 2; 787b8e80941Smrg wt_h = 12; 788b8e80941Smrg break; 789b8e80941Smrg case 0x3: 790b8e80941Smrg if (in.get_bits(5, 1) == 0) { 791b8e80941Smrg if (VERBOSE_DECODE) 792b8e80941Smrg in.printf_bits(0, 11, "DH1100RRR00"); 793b8e80941Smrg wt_w = 6; 794b8e80941Smrg wt_h = 10; 795b8e80941Smrg } else { 796b8e80941Smrg if (VERBOSE_DECODE) 797b8e80941Smrg in.printf_bits(0, 11, "DH1101RRR00"); 798b8e80941Smrg wt_w = 10; 799b8e80941Smrg wt_h = 6; 800b8e80941Smrg } 801b8e80941Smrg break; 802b8e80941Smrg case 0x2: 803b8e80941Smrg if (VERBOSE_DECODE) 804b8e80941Smrg in.printf_bits(0, 11, "BB10AARRR00"); 805b8e80941Smrg b = in.get_bits(9, 2); 806b8e80941Smrg wt_w = a + 6; 807b8e80941Smrg wt_h = b + 6; 808b8e80941Smrg dual_plane = 0; 809b8e80941Smrg high_prec = 0; 810b8e80941Smrg break; 811b8e80941Smrg } 812b8e80941Smrg } 813b8e80941Smrg return decode_error::ok; 814b8e80941Smrg} 815b8e80941Smrg 816b8e80941Smrgvoid Block::decode_cem(InputBitVector in) 817b8e80941Smrg{ 818b8e80941Smrg cems[0] = cems[1] = cems[2] = cems[3] = -1; 819b8e80941Smrg 820b8e80941Smrg num_extra_cem_bits = 0; 821b8e80941Smrg extra_cem_bits = 0; 822b8e80941Smrg 823b8e80941Smrg if (num_parts > 1) { 824b8e80941Smrg 825b8e80941Smrg partition_index = in.get_bits(13, 10); 826b8e80941Smrg if (VERBOSE_DECODE) 827b8e80941Smrg in.printf_bits(13, 10, "partition ID (%d)", partition_index); 828b8e80941Smrg 829b8e80941Smrg uint32_t cem = in.get_bits(23, 6); 830b8e80941Smrg 831b8e80941Smrg if ((cem & 0x3) == 0x0) { 832b8e80941Smrg cem >>= 2; 833b8e80941Smrg cem_base_class = cem >> 2; 834b8e80941Smrg is_multi_cem = false; 835b8e80941Smrg 836b8e80941Smrg for (int i = 0; i < num_parts; ++i) 837b8e80941Smrg cems[i] = cem; 838b8e80941Smrg 839b8e80941Smrg if (VERBOSE_DECODE) 840b8e80941Smrg in.printf_bits(23, 6, "CEM (single, %d)", cem); 841b8e80941Smrg } else { 842b8e80941Smrg 843b8e80941Smrg cem_base_class = (cem & 0x3) - 1; 844b8e80941Smrg is_multi_cem = true; 845b8e80941Smrg 846b8e80941Smrg if (VERBOSE_DECODE) 847b8e80941Smrg in.printf_bits(23, 6, "CEM (multi, base class %d)", cem_base_class); 848b8e80941Smrg 849b8e80941Smrg int offset = 128 - weight_bits; 850b8e80941Smrg 851b8e80941Smrg if (num_parts == 2) { 852b8e80941Smrg if (VERBOSE_DECODE) { 853b8e80941Smrg in.printf_bits(25, 4, "M0M0 C1 C0"); 854b8e80941Smrg in.printf_bits(offset - 2, 2, "M1M1"); 855b8e80941Smrg } 856b8e80941Smrg 857b8e80941Smrg uint32_t c0 = in.get_bits(25, 1); 858b8e80941Smrg uint32_t c1 = in.get_bits(26, 1); 859b8e80941Smrg 860b8e80941Smrg extra_cem_bits = c0 + c1; 861b8e80941Smrg 862b8e80941Smrg num_extra_cem_bits = 2; 863b8e80941Smrg 864b8e80941Smrg uint32_t m0 = in.get_bits(27, 2); 865b8e80941Smrg uint32_t m1 = in.get_bits(offset - 2, 2); 866b8e80941Smrg 867b8e80941Smrg cems[0] = ((cem_base_class + c0) << 2) | m0; 868b8e80941Smrg cems[1] = ((cem_base_class + c1) << 2) | m1; 869b8e80941Smrg 870b8e80941Smrg } else if (num_parts == 3) { 871b8e80941Smrg if (VERBOSE_DECODE) { 872b8e80941Smrg in.printf_bits(25, 4, "M0 C2 C1 C0"); 873b8e80941Smrg in.printf_bits(offset - 5, 5, "M2M2 M1M1 M0"); 874b8e80941Smrg } 875b8e80941Smrg 876b8e80941Smrg uint32_t c0 = in.get_bits(25, 1); 877b8e80941Smrg uint32_t c1 = in.get_bits(26, 1); 878b8e80941Smrg uint32_t c2 = in.get_bits(27, 1); 879b8e80941Smrg 880b8e80941Smrg extra_cem_bits = c0 + c1 + c2; 881b8e80941Smrg 882b8e80941Smrg num_extra_cem_bits = 5; 883b8e80941Smrg 884b8e80941Smrg uint32_t m0 = in.get_bits(28, 1) | (in.get_bits(128 - weight_bits - 5, 1) << 1); 885b8e80941Smrg uint32_t m1 = in.get_bits(offset - 4, 2); 886b8e80941Smrg uint32_t m2 = in.get_bits(offset - 2, 2); 887b8e80941Smrg 888b8e80941Smrg cems[0] = ((cem_base_class + c0) << 2) | m0; 889b8e80941Smrg cems[1] = ((cem_base_class + c1) << 2) | m1; 890b8e80941Smrg cems[2] = ((cem_base_class + c2) << 2) | m2; 891b8e80941Smrg 892b8e80941Smrg } else if (num_parts == 4) { 893b8e80941Smrg if (VERBOSE_DECODE) { 894b8e80941Smrg in.printf_bits(25, 4, "C3 C2 C1 C0"); 895b8e80941Smrg in.printf_bits(offset - 8, 8, "M3M3 M2M2 M1M1 M0M0"); 896b8e80941Smrg } 897b8e80941Smrg 898b8e80941Smrg uint32_t c0 = in.get_bits(25, 1); 899b8e80941Smrg uint32_t c1 = in.get_bits(26, 1); 900b8e80941Smrg uint32_t c2 = in.get_bits(27, 1); 901b8e80941Smrg uint32_t c3 = in.get_bits(28, 1); 902b8e80941Smrg 903b8e80941Smrg extra_cem_bits = c0 + c1 + c2 + c3; 904b8e80941Smrg 905b8e80941Smrg num_extra_cem_bits = 8; 906b8e80941Smrg 907b8e80941Smrg uint32_t m0 = in.get_bits(offset - 8, 2); 908b8e80941Smrg uint32_t m1 = in.get_bits(offset - 6, 2); 909b8e80941Smrg uint32_t m2 = in.get_bits(offset - 4, 2); 910b8e80941Smrg uint32_t m3 = in.get_bits(offset - 2, 2); 911b8e80941Smrg 912b8e80941Smrg cems[0] = ((cem_base_class + c0) << 2) | m0; 913b8e80941Smrg cems[1] = ((cem_base_class + c1) << 2) | m1; 914b8e80941Smrg cems[2] = ((cem_base_class + c2) << 2) | m2; 915b8e80941Smrg cems[3] = ((cem_base_class + c3) << 2) | m3; 916b8e80941Smrg } else { 917b8e80941Smrg unreachable(""); 918b8e80941Smrg } 919b8e80941Smrg } 920b8e80941Smrg 921b8e80941Smrg colour_endpoint_data_offset = 29; 922b8e80941Smrg 923b8e80941Smrg } else { 924b8e80941Smrg uint32_t cem = in.get_bits(13, 4); 925b8e80941Smrg 926b8e80941Smrg cem_base_class = cem >> 2; 927b8e80941Smrg is_multi_cem = false; 928b8e80941Smrg 929b8e80941Smrg cems[0] = cem; 930b8e80941Smrg 931b8e80941Smrg partition_index = -1; 932b8e80941Smrg 933b8e80941Smrg if (VERBOSE_DECODE) 934b8e80941Smrg in.printf_bits(13, 4, "CEM = %d (class %d)", cem, cem_base_class); 935b8e80941Smrg 936b8e80941Smrg colour_endpoint_data_offset = 17; 937b8e80941Smrg } 938b8e80941Smrg} 939b8e80941Smrg 940b8e80941Smrgvoid Block::unpack_colour_endpoints(InputBitVector in) 941b8e80941Smrg{ 942b8e80941Smrg if (ce_trits) { 943b8e80941Smrg int offset = colour_endpoint_data_offset; 944b8e80941Smrg int bits_left = colour_endpoint_bits; 945b8e80941Smrg for (int i = 0; i < num_cem_values; i += 5) { 946b8e80941Smrg int bits_to_read = MIN2(bits_left, 8 + ce_bits * 5); 947b8e80941Smrg /* If ce_trits then ce_bits <= 6, so bits_to_read <= 38 and we have to use uint64_t */ 948b8e80941Smrg uint64_t raw = in.get_bits64(offset, bits_to_read); 949b8e80941Smrg unpack_trit_block(ce_bits, raw, &colour_endpoints_quant[i]); 950b8e80941Smrg 951b8e80941Smrg if (VERBOSE_DECODE) 952b8e80941Smrg in.printf_bits(offset, bits_to_read, 953b8e80941Smrg "trits [%d,%d,%d,%d,%d]", 954b8e80941Smrg colour_endpoints_quant[i+0], colour_endpoints_quant[i+1], 955b8e80941Smrg colour_endpoints_quant[i+2], colour_endpoints_quant[i+3], 956b8e80941Smrg colour_endpoints_quant[i+4]); 957b8e80941Smrg 958b8e80941Smrg offset += 8 + ce_bits * 5; 959b8e80941Smrg bits_left -= 8 + ce_bits * 5; 960b8e80941Smrg } 961b8e80941Smrg } else if (ce_quints) { 962b8e80941Smrg int offset = colour_endpoint_data_offset; 963b8e80941Smrg int bits_left = colour_endpoint_bits; 964b8e80941Smrg for (int i = 0; i < num_cem_values; i += 3) { 965b8e80941Smrg int bits_to_read = MIN2(bits_left, 7 + ce_bits * 3); 966b8e80941Smrg /* If ce_quints then ce_bits <= 5, so bits_to_read <= 22 and we can use uint32_t */ 967b8e80941Smrg uint32_t raw = in.get_bits(offset, bits_to_read); 968b8e80941Smrg unpack_quint_block(ce_bits, raw, &colour_endpoints_quant[i]); 969b8e80941Smrg 970b8e80941Smrg if (VERBOSE_DECODE) 971b8e80941Smrg in.printf_bits(offset, bits_to_read, 972b8e80941Smrg "quints [%d,%d,%d]", 973b8e80941Smrg colour_endpoints_quant[i], colour_endpoints_quant[i+1], colour_endpoints_quant[i+2]); 974b8e80941Smrg 975b8e80941Smrg offset += 7 + ce_bits * 3; 976b8e80941Smrg bits_left -= 7 + ce_bits * 3; 977b8e80941Smrg } 978b8e80941Smrg } else { 979b8e80941Smrg assert((colour_endpoint_bits % ce_bits) == 0); 980b8e80941Smrg int offset = colour_endpoint_data_offset; 981b8e80941Smrg for (int i = 0; i < num_cem_values; i++) { 982b8e80941Smrg colour_endpoints_quant[i] = in.get_bits(offset, ce_bits); 983b8e80941Smrg 984b8e80941Smrg if (VERBOSE_DECODE) 985b8e80941Smrg in.printf_bits(offset, ce_bits, "bits [%d]", colour_endpoints_quant[i]); 986b8e80941Smrg 987b8e80941Smrg offset += ce_bits; 988b8e80941Smrg } 989b8e80941Smrg } 990b8e80941Smrg} 991b8e80941Smrg 992b8e80941Smrgvoid Block::decode_colour_endpoints() 993b8e80941Smrg{ 994b8e80941Smrg int cem_values_idx = 0; 995b8e80941Smrg for (int part = 0; part < num_parts; ++part) { 996b8e80941Smrg uint8_t *v = &colour_endpoints[cem_values_idx]; 997b8e80941Smrg int v0 = v[0]; 998b8e80941Smrg int v1 = v[1]; 999b8e80941Smrg int v2 = v[2]; 1000b8e80941Smrg int v3 = v[3]; 1001b8e80941Smrg int v4 = v[4]; 1002b8e80941Smrg int v5 = v[5]; 1003b8e80941Smrg int v6 = v[6]; 1004b8e80941Smrg int v7 = v[7]; 1005b8e80941Smrg cem_values_idx += ((cems[part] >> 2) + 1) * 2; 1006b8e80941Smrg 1007b8e80941Smrg uint8x4_t e0, e1; 1008b8e80941Smrg int s0, s1, L0, L1; 1009b8e80941Smrg 1010b8e80941Smrg switch (cems[part]) 1011b8e80941Smrg { 1012b8e80941Smrg case 0: 1013b8e80941Smrg e0 = uint8x4_t(v0, v0, v0, 0xff); 1014b8e80941Smrg e1 = uint8x4_t(v1, v1, v1, 0xff); 1015b8e80941Smrg break; 1016b8e80941Smrg case 1: 1017b8e80941Smrg L0 = (v0 >> 2) | (v1 & 0xc0); 1018b8e80941Smrg L1 = L0 + (v1 & 0x3f); 1019b8e80941Smrg if (L1 > 0xff) 1020b8e80941Smrg L1 = 0xff; 1021b8e80941Smrg e0 = uint8x4_t(L0, L0, L0, 0xff); 1022b8e80941Smrg e1 = uint8x4_t(L1, L1, L1, 0xff); 1023b8e80941Smrg break; 1024b8e80941Smrg case 4: 1025b8e80941Smrg e0 = uint8x4_t(v0, v0, v0, v2); 1026b8e80941Smrg e1 = uint8x4_t(v1, v1, v1, v3); 1027b8e80941Smrg break; 1028b8e80941Smrg case 5: 1029b8e80941Smrg bit_transfer_signed(v1, v0); 1030b8e80941Smrg bit_transfer_signed(v3, v2); 1031b8e80941Smrg e0 = uint8x4_t(v0, v0, v0, v2); 1032b8e80941Smrg e1 = uint8x4_t::clamped(v0+v1, v0+v1, v0+v1, v2+v3); 1033b8e80941Smrg break; 1034b8e80941Smrg case 6: 1035b8e80941Smrg e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, 0xff); 1036b8e80941Smrg e1 = uint8x4_t(v0, v1, v2, 0xff); 1037b8e80941Smrg break; 1038b8e80941Smrg case 8: 1039b8e80941Smrg s0 = v0 + v2 + v4; 1040b8e80941Smrg s1 = v1 + v3 + v5; 1041b8e80941Smrg if (s1 >= s0) { 1042b8e80941Smrg e0 = uint8x4_t(v0, v2, v4, 0xff); 1043b8e80941Smrg e1 = uint8x4_t(v1, v3, v5, 0xff); 1044b8e80941Smrg } else { 1045b8e80941Smrg e0 = blue_contract(v1, v3, v5, 0xff); 1046b8e80941Smrg e1 = blue_contract(v0, v2, v4, 0xff); 1047b8e80941Smrg } 1048b8e80941Smrg break; 1049b8e80941Smrg case 9: 1050b8e80941Smrg bit_transfer_signed(v1, v0); 1051b8e80941Smrg bit_transfer_signed(v3, v2); 1052b8e80941Smrg bit_transfer_signed(v5, v4); 1053b8e80941Smrg if (v1 + v3 + v5 >= 0) { 1054b8e80941Smrg e0 = uint8x4_t(v0, v2, v4, 0xff); 1055b8e80941Smrg e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, 0xff); 1056b8e80941Smrg } else { 1057b8e80941Smrg e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, 0xff); 1058b8e80941Smrg e1 = blue_contract(v0, v2, v4, 0xff); 1059b8e80941Smrg } 1060b8e80941Smrg break; 1061b8e80941Smrg case 10: 1062b8e80941Smrg e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, v4); 1063b8e80941Smrg e1 = uint8x4_t(v0, v1, v2, v5); 1064b8e80941Smrg break; 1065b8e80941Smrg case 12: 1066b8e80941Smrg s0 = v0 + v2 + v4; 1067b8e80941Smrg s1 = v1 + v3 + v5; 1068b8e80941Smrg if (s1 >= s0) { 1069b8e80941Smrg e0 = uint8x4_t(v0, v2, v4, v6); 1070b8e80941Smrg e1 = uint8x4_t(v1, v3, v5, v7); 1071b8e80941Smrg } else { 1072b8e80941Smrg e0 = blue_contract(v1, v3, v5, v7); 1073b8e80941Smrg e1 = blue_contract(v0, v2, v4, v6); 1074b8e80941Smrg } 1075b8e80941Smrg break; 1076b8e80941Smrg case 13: 1077b8e80941Smrg bit_transfer_signed(v1, v0); 1078b8e80941Smrg bit_transfer_signed(v3, v2); 1079b8e80941Smrg bit_transfer_signed(v5, v4); 1080b8e80941Smrg bit_transfer_signed(v7, v6); 1081b8e80941Smrg if (v1 + v3 + v5 >= 0) { 1082b8e80941Smrg e0 = uint8x4_t(v0, v2, v4, v6); 1083b8e80941Smrg e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, v6+v7); 1084b8e80941Smrg } else { 1085b8e80941Smrg e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, v6+v7); 1086b8e80941Smrg e1 = blue_contract(v0, v2, v4, v6); 1087b8e80941Smrg } 1088b8e80941Smrg break; 1089b8e80941Smrg default: 1090b8e80941Smrg /* HDR endpoints not supported; return error colour */ 1091b8e80941Smrg e0 = uint8x4_t(255, 0, 255, 255); 1092b8e80941Smrg e1 = uint8x4_t(255, 0, 255, 255); 1093b8e80941Smrg break; 1094b8e80941Smrg } 1095b8e80941Smrg 1096b8e80941Smrg endpoints_decoded[0][part] = e0; 1097b8e80941Smrg endpoints_decoded[1][part] = e1; 1098b8e80941Smrg 1099b8e80941Smrg if (VERBOSE_DECODE) { 1100b8e80941Smrg printf("cems[%d]=%d v=[", part, cems[part]); 1101b8e80941Smrg for (int i = 0; i < (cems[part] >> 2) + 1; ++i) { 1102b8e80941Smrg if (i) 1103b8e80941Smrg printf(", "); 1104b8e80941Smrg printf("%3d", v[i]); 1105b8e80941Smrg } 1106b8e80941Smrg printf("] e0=[%3d,%4d,%4d,%4d] e1=[%3d,%4d,%4d,%4d]\n", 1107b8e80941Smrg e0.v[0], e0.v[1], e0.v[2], e0.v[3], 1108b8e80941Smrg e1.v[0], e1.v[1], e1.v[2], e1.v[3]); 1109b8e80941Smrg } 1110b8e80941Smrg } 1111b8e80941Smrg} 1112b8e80941Smrg 1113b8e80941Smrgvoid Block::unpack_weights(InputBitVector in) 1114b8e80941Smrg{ 1115b8e80941Smrg if (wt_trits) { 1116b8e80941Smrg int offset = 128; 1117b8e80941Smrg int bits_left = weight_bits; 1118b8e80941Smrg for (int i = 0; i < num_weights; i += 5) { 1119b8e80941Smrg int bits_to_read = MIN2(bits_left, 8 + 5*wt_bits); 1120b8e80941Smrg /* If wt_trits then wt_bits <= 3, so bits_to_read <= 23 and we can use uint32_t */ 1121b8e80941Smrg uint32_t raw = in.get_bits_rev(offset, bits_to_read); 1122b8e80941Smrg unpack_trit_block(wt_bits, raw, &weights_quant[i]); 1123b8e80941Smrg 1124b8e80941Smrg if (VERBOSE_DECODE) 1125b8e80941Smrg in.printf_bits(offset - bits_to_read, bits_to_read, "weight trits [%d,%d,%d,%d,%d]", 1126b8e80941Smrg weights_quant[i+0], weights_quant[i+1], 1127b8e80941Smrg weights_quant[i+2], weights_quant[i+3], 1128b8e80941Smrg weights_quant[i+4]); 1129b8e80941Smrg 1130b8e80941Smrg offset -= 8 + wt_bits * 5; 1131b8e80941Smrg bits_left -= 8 + wt_bits * 5; 1132b8e80941Smrg } 1133b8e80941Smrg 1134b8e80941Smrg } else if (wt_quints) { 1135b8e80941Smrg 1136b8e80941Smrg int offset = 128; 1137b8e80941Smrg int bits_left = weight_bits; 1138b8e80941Smrg for (int i = 0; i < num_weights; i += 3) { 1139b8e80941Smrg int bits_to_read = MIN2(bits_left, 7 + 3*wt_bits); 1140b8e80941Smrg /* If wt_quints then wt_bits <= 2, so bits_to_read <= 13 and we can use uint32_t */ 1141b8e80941Smrg uint32_t raw = in.get_bits_rev(offset, bits_to_read); 1142b8e80941Smrg unpack_quint_block(wt_bits, raw, &weights_quant[i]); 1143b8e80941Smrg 1144b8e80941Smrg if (VERBOSE_DECODE) 1145b8e80941Smrg in.printf_bits(offset - bits_to_read, bits_to_read, "weight quints [%d,%d,%d]", 1146b8e80941Smrg weights_quant[i], weights_quant[i+1], weights_quant[i+2]); 1147b8e80941Smrg 1148b8e80941Smrg offset -= 7 + wt_bits * 3; 1149b8e80941Smrg bits_left -= 7 + wt_bits * 3; 1150b8e80941Smrg } 1151b8e80941Smrg 1152b8e80941Smrg } else { 1153b8e80941Smrg int offset = 128; 1154b8e80941Smrg assert((weight_bits % wt_bits) == 0); 1155b8e80941Smrg for (int i = 0; i < num_weights; ++i) { 1156b8e80941Smrg weights_quant[i] = in.get_bits_rev(offset, wt_bits); 1157b8e80941Smrg 1158b8e80941Smrg if (VERBOSE_DECODE) 1159b8e80941Smrg in.printf_bits(offset - wt_bits, wt_bits, "weight bits [%d]", weights_quant[i]); 1160b8e80941Smrg 1161b8e80941Smrg offset -= wt_bits; 1162b8e80941Smrg } 1163b8e80941Smrg } 1164b8e80941Smrg} 1165b8e80941Smrg 1166b8e80941Smrgvoid Block::unquantise_weights() 1167b8e80941Smrg{ 1168b8e80941Smrg assert(num_weights <= (int)ARRAY_SIZE(weights_quant)); 1169b8e80941Smrg assert(num_weights <= (int)ARRAY_SIZE(weights)); 1170b8e80941Smrg 1171b8e80941Smrg memset(weights, 0, sizeof(weights)); 1172b8e80941Smrg 1173b8e80941Smrg for (int i = 0; i < num_weights; ++i) { 1174b8e80941Smrg 1175b8e80941Smrg uint8_t v = weights_quant[i]; 1176b8e80941Smrg uint8_t w; 1177b8e80941Smrg 1178b8e80941Smrg if (wt_trits) { 1179b8e80941Smrg 1180b8e80941Smrg if (wt_bits == 0) { 1181b8e80941Smrg w = v * 32; 1182b8e80941Smrg } else { 1183b8e80941Smrg uint8_t A, B, C, D; 1184b8e80941Smrg A = (v & 0x1) ? 0x7F : 0x00; 1185b8e80941Smrg switch (wt_bits) { 1186b8e80941Smrg case 1: 1187b8e80941Smrg B = 0; 1188b8e80941Smrg C = 50; 1189b8e80941Smrg D = v >> 1; 1190b8e80941Smrg break; 1191b8e80941Smrg case 2: 1192b8e80941Smrg B = (v & 0x2) ? 0x45 : 0x00; 1193b8e80941Smrg C = 23; 1194b8e80941Smrg D = v >> 2; 1195b8e80941Smrg break; 1196b8e80941Smrg case 3: 1197b8e80941Smrg B = ((v & 0x6) >> 1) | ((v & 0x6) << 4); 1198b8e80941Smrg C = 11; 1199b8e80941Smrg D = v >> 3; 1200b8e80941Smrg break; 1201b8e80941Smrg default: 1202b8e80941Smrg unreachable(""); 1203b8e80941Smrg } 1204b8e80941Smrg uint16_t T = D * C + B; 1205b8e80941Smrg T = T ^ A; 1206b8e80941Smrg T = (A & 0x20) | (T >> 2); 1207b8e80941Smrg assert(T < 64); 1208b8e80941Smrg if (T > 32) 1209b8e80941Smrg T++; 1210b8e80941Smrg w = T; 1211b8e80941Smrg } 1212b8e80941Smrg 1213b8e80941Smrg } else if (wt_quints) { 1214b8e80941Smrg 1215b8e80941Smrg if (wt_bits == 0) { 1216b8e80941Smrg w = v * 16; 1217b8e80941Smrg } else { 1218b8e80941Smrg uint8_t A, B, C, D; 1219b8e80941Smrg A = (v & 0x1) ? 0x7F : 0x00; 1220b8e80941Smrg switch (wt_bits) { 1221b8e80941Smrg case 1: 1222b8e80941Smrg B = 0; 1223b8e80941Smrg C = 28; 1224b8e80941Smrg D = v >> 1; 1225b8e80941Smrg break; 1226b8e80941Smrg case 2: 1227b8e80941Smrg B = (v & 0x2) ? 0x42 : 0x00; 1228b8e80941Smrg C = 13; 1229b8e80941Smrg D = v >> 2; 1230b8e80941Smrg break; 1231b8e80941Smrg default: 1232b8e80941Smrg unreachable(""); 1233b8e80941Smrg } 1234b8e80941Smrg uint16_t T = D * C + B; 1235b8e80941Smrg T = T ^ A; 1236b8e80941Smrg T = (A & 0x20) | (T >> 2); 1237b8e80941Smrg assert(T < 64); 1238b8e80941Smrg if (T > 32) 1239b8e80941Smrg T++; 1240b8e80941Smrg w = T; 1241b8e80941Smrg } 1242b8e80941Smrg weights[i] = w; 1243b8e80941Smrg 1244b8e80941Smrg } else { 1245b8e80941Smrg 1246b8e80941Smrg switch (wt_bits) { 1247b8e80941Smrg case 1: w = v ? 0x3F : 0x00; break; 1248b8e80941Smrg case 2: w = v | (v << 2) | (v << 4); break; 1249b8e80941Smrg case 3: w = v | (v << 3); break; 1250b8e80941Smrg case 4: w = (v >> 2) | (v << 2); break; 1251b8e80941Smrg case 5: w = (v >> 4) | (v << 1); break; 1252b8e80941Smrg default: unreachable(""); 1253b8e80941Smrg } 1254b8e80941Smrg assert(w < 64); 1255b8e80941Smrg if (w > 32) 1256b8e80941Smrg w++; 1257b8e80941Smrg } 1258b8e80941Smrg weights[i] = w; 1259b8e80941Smrg } 1260b8e80941Smrg} 1261b8e80941Smrg 1262b8e80941Smrgvoid Block::compute_infill_weights(int block_w, int block_h, int block_d) 1263b8e80941Smrg{ 1264b8e80941Smrg int Ds = block_w <= 1 ? 0 : (1024 + block_w / 2) / (block_w - 1); 1265b8e80941Smrg int Dt = block_h <= 1 ? 0 : (1024 + block_h / 2) / (block_h - 1); 1266b8e80941Smrg int Dr = block_d <= 1 ? 0 : (1024 + block_d / 2) / (block_d - 1); 1267b8e80941Smrg for (int r = 0; r < block_d; ++r) { 1268b8e80941Smrg for (int t = 0; t < block_h; ++t) { 1269b8e80941Smrg for (int s = 0; s < block_w; ++s) { 1270b8e80941Smrg int cs = Ds * s; 1271b8e80941Smrg int ct = Dt * t; 1272b8e80941Smrg int cr = Dr * r; 1273b8e80941Smrg int gs = (cs * (wt_w - 1) + 32) >> 6; 1274b8e80941Smrg int gt = (ct * (wt_h - 1) + 32) >> 6; 1275b8e80941Smrg int gr = (cr * (wt_d - 1) + 32) >> 6; 1276b8e80941Smrg assert(gs >= 0 && gs <= 176); 1277b8e80941Smrg assert(gt >= 0 && gt <= 176); 1278b8e80941Smrg assert(gr >= 0 && gr <= 176); 1279b8e80941Smrg int js = gs >> 4; 1280b8e80941Smrg int fs = gs & 0xf; 1281b8e80941Smrg int jt = gt >> 4; 1282b8e80941Smrg int ft = gt & 0xf; 1283b8e80941Smrg int jr = gr >> 4; 1284b8e80941Smrg int fr = gr & 0xf; 1285b8e80941Smrg 1286b8e80941Smrg /* TODO: 3D */ 1287b8e80941Smrg (void)jr; 1288b8e80941Smrg (void)fr; 1289b8e80941Smrg 1290b8e80941Smrg int w11 = (fs * ft + 8) >> 4; 1291b8e80941Smrg int w10 = ft - w11; 1292b8e80941Smrg int w01 = fs - w11; 1293b8e80941Smrg int w00 = 16 - fs - ft + w11; 1294b8e80941Smrg 1295b8e80941Smrg if (dual_plane) { 1296b8e80941Smrg int p00, p01, p10, p11, i0, i1; 1297b8e80941Smrg int v0 = js + jt * wt_w; 1298b8e80941Smrg p00 = weights[(v0) * 2]; 1299b8e80941Smrg p01 = weights[(v0 + 1) * 2]; 1300b8e80941Smrg p10 = weights[(v0 + wt_w) * 2]; 1301b8e80941Smrg p11 = weights[(v0 + wt_w + 1) * 2]; 1302b8e80941Smrg i0 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; 1303b8e80941Smrg p00 = weights[(v0) * 2 + 1]; 1304b8e80941Smrg p01 = weights[(v0 + 1) * 2 + 1]; 1305b8e80941Smrg p10 = weights[(v0 + wt_w) * 2 + 1]; 1306b8e80941Smrg p11 = weights[(v0 + wt_w + 1) * 2 + 1]; 1307b8e80941Smrg assert((v0 + wt_w + 1) * 2 + 1 < (int)ARRAY_SIZE(weights)); 1308b8e80941Smrg i1 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; 1309b8e80941Smrg assert(0 <= i0 && i0 <= 64); 1310b8e80941Smrg infill_weights[0][s + t*block_w + r*block_w*block_h] = i0; 1311b8e80941Smrg infill_weights[1][s + t*block_w + r*block_w*block_h] = i1; 1312b8e80941Smrg } else { 1313b8e80941Smrg int p00, p01, p10, p11, i; 1314b8e80941Smrg int v0 = js + jt * wt_w; 1315b8e80941Smrg p00 = weights[v0]; 1316b8e80941Smrg p01 = weights[v0 + 1]; 1317b8e80941Smrg p10 = weights[v0 + wt_w]; 1318b8e80941Smrg p11 = weights[v0 + wt_w + 1]; 1319b8e80941Smrg assert(v0 + wt_w + 1 < (int)ARRAY_SIZE(weights)); 1320b8e80941Smrg i = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; 1321b8e80941Smrg assert(0 <= i && i <= 64); 1322b8e80941Smrg infill_weights[0][s + t*block_w + r*block_w*block_h] = i; 1323b8e80941Smrg } 1324b8e80941Smrg } 1325b8e80941Smrg } 1326b8e80941Smrg } 1327b8e80941Smrg} 1328b8e80941Smrg 1329b8e80941Smrgvoid Block::unquantise_colour_endpoints() 1330b8e80941Smrg{ 1331b8e80941Smrg assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints_quant)); 1332b8e80941Smrg assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints)); 1333b8e80941Smrg 1334b8e80941Smrg for (int i = 0; i < num_cem_values; ++i) { 1335b8e80941Smrg uint8_t v = colour_endpoints_quant[i]; 1336b8e80941Smrg 1337b8e80941Smrg if (ce_trits) { 1338b8e80941Smrg uint16_t A, B, C, D; 1339b8e80941Smrg uint16_t t; 1340b8e80941Smrg A = (v & 0x1) ? 0x1FF : 0x000; 1341b8e80941Smrg switch (ce_bits) { 1342b8e80941Smrg case 1: 1343b8e80941Smrg B = 0; 1344b8e80941Smrg C = 204; 1345b8e80941Smrg D = v >> 1; 1346b8e80941Smrg break; 1347b8e80941Smrg case 2: 1348b8e80941Smrg B = (v & 0x2) ? 0x116 : 0x000; 1349b8e80941Smrg C = 93; 1350b8e80941Smrg D = v >> 2; 1351b8e80941Smrg break; 1352b8e80941Smrg case 3: 1353b8e80941Smrg t = ((v >> 1) & 0x3); 1354b8e80941Smrg B = t | (t << 2) | (t << 7); 1355b8e80941Smrg C = 44; 1356b8e80941Smrg D = v >> 3; 1357b8e80941Smrg break; 1358b8e80941Smrg case 4: 1359b8e80941Smrg t = ((v >> 1) & 0x7); 1360b8e80941Smrg B = t | (t << 6); 1361b8e80941Smrg C = 22; 1362b8e80941Smrg D = v >> 4; 1363b8e80941Smrg break; 1364b8e80941Smrg case 5: 1365b8e80941Smrg t = ((v >> 1) & 0xF); 1366b8e80941Smrg B = (t >> 2) | (t << 5); 1367b8e80941Smrg C = 11; 1368b8e80941Smrg D = v >> 5; 1369b8e80941Smrg break; 1370b8e80941Smrg case 6: 1371b8e80941Smrg B = ((v & 0x3E) << 3) | ((v >> 5) & 0x1); 1372b8e80941Smrg C = 5; 1373b8e80941Smrg D = v >> 6; 1374b8e80941Smrg break; 1375b8e80941Smrg default: 1376b8e80941Smrg unreachable(""); 1377b8e80941Smrg } 1378b8e80941Smrg uint16_t T = D * C + B; 1379b8e80941Smrg T = T ^ A; 1380b8e80941Smrg T = (A & 0x80) | (T >> 2); 1381b8e80941Smrg assert(T < 256); 1382b8e80941Smrg colour_endpoints[i] = T; 1383b8e80941Smrg } else if (ce_quints) { 1384b8e80941Smrg uint16_t A, B, C, D; 1385b8e80941Smrg uint16_t t; 1386b8e80941Smrg A = (v & 0x1) ? 0x1FF : 0x000; 1387b8e80941Smrg switch (ce_bits) { 1388b8e80941Smrg case 1: 1389b8e80941Smrg B = 0; 1390b8e80941Smrg C = 113; 1391b8e80941Smrg D = v >> 1; 1392b8e80941Smrg break; 1393b8e80941Smrg case 2: 1394b8e80941Smrg B = (v & 0x2) ? 0x10C : 0x000; 1395b8e80941Smrg C = 54; 1396b8e80941Smrg D = v >> 2; 1397b8e80941Smrg break; 1398b8e80941Smrg case 3: 1399b8e80941Smrg t = ((v >> 1) & 0x3); 1400b8e80941Smrg B = (t >> 1) | (t << 1) | (t << 7); 1401b8e80941Smrg C = 26; 1402b8e80941Smrg D = v >> 3; 1403b8e80941Smrg break; 1404b8e80941Smrg case 4: 1405b8e80941Smrg t = ((v >> 1) & 0x7); 1406b8e80941Smrg B = (t >> 1) | (t << 6); 1407b8e80941Smrg C = 13; 1408b8e80941Smrg D = v >> 4; 1409b8e80941Smrg break; 1410b8e80941Smrg case 5: 1411b8e80941Smrg t = ((v >> 1) & 0xF); 1412b8e80941Smrg B = (t >> 4) | (t << 5); 1413b8e80941Smrg C = 6; 1414b8e80941Smrg D = v >> 5; 1415b8e80941Smrg break; 1416b8e80941Smrg default: 1417b8e80941Smrg unreachable(""); 1418b8e80941Smrg } 1419b8e80941Smrg uint16_t T = D * C + B; 1420b8e80941Smrg T = T ^ A; 1421b8e80941Smrg T = (A & 0x80) | (T >> 2); 1422b8e80941Smrg assert(T < 256); 1423b8e80941Smrg colour_endpoints[i] = T; 1424b8e80941Smrg } else { 1425b8e80941Smrg switch (ce_bits) { 1426b8e80941Smrg case 1: v = v ? 0xFF : 0x00; break; 1427b8e80941Smrg case 2: v = (v << 6) | (v << 4) | (v << 2) | v; break; 1428b8e80941Smrg case 3: v = (v << 5) | (v << 2) | (v >> 1); break; 1429b8e80941Smrg case 4: v = (v << 4) | v; break; 1430b8e80941Smrg case 5: v = (v << 3) | (v >> 2); break; 1431b8e80941Smrg case 6: v = (v << 2) | (v >> 4); break; 1432b8e80941Smrg case 7: v = (v << 1) | (v >> 6); break; 1433b8e80941Smrg case 8: break; 1434b8e80941Smrg default: unreachable(""); 1435b8e80941Smrg } 1436b8e80941Smrg colour_endpoints[i] = v; 1437b8e80941Smrg } 1438b8e80941Smrg } 1439b8e80941Smrg} 1440b8e80941Smrg 1441b8e80941Smrgdecode_error::type Block::decode(const Decoder &decoder, InputBitVector in) 1442b8e80941Smrg{ 1443b8e80941Smrg decode_error::type err; 1444b8e80941Smrg 1445b8e80941Smrg is_error = false; 1446b8e80941Smrg bogus_colour_endpoints = false; 1447b8e80941Smrg bogus_weights = false; 1448b8e80941Smrg is_void_extent = false; 1449b8e80941Smrg 1450b8e80941Smrg wt_d = 1; 1451b8e80941Smrg /* TODO: 3D */ 1452b8e80941Smrg 1453b8e80941Smrg /* TODO: test for all the illegal encodings */ 1454b8e80941Smrg 1455b8e80941Smrg if (VERBOSE_DECODE) 1456b8e80941Smrg in.printf_bits(0, 128); 1457b8e80941Smrg 1458b8e80941Smrg err = decode_block_mode(in); 1459b8e80941Smrg if (err != decode_error::ok) 1460b8e80941Smrg return err; 1461b8e80941Smrg 1462b8e80941Smrg if (is_void_extent) 1463b8e80941Smrg return decode_error::ok; 1464b8e80941Smrg 1465b8e80941Smrg /* TODO: 3D */ 1466b8e80941Smrg 1467b8e80941Smrg calculate_from_weights(); 1468b8e80941Smrg 1469b8e80941Smrg if (VERBOSE_DECODE) 1470b8e80941Smrg printf("weights_grid=%dx%dx%d dual_plane=%d num_weights=%d high_prec=%d r=%d range=0..%d (%dt %dq %db) weight_bits=%d\n", 1471b8e80941Smrg wt_w, wt_h, wt_d, dual_plane, num_weights, high_prec, wt_range, wt_max, wt_trits, wt_quints, wt_bits, weight_bits); 1472b8e80941Smrg 1473b8e80941Smrg if (wt_w > decoder.block_w || wt_h > decoder.block_h || wt_d > decoder.block_d) 1474b8e80941Smrg return decode_error::weight_grid_exceeds_block_size; 1475b8e80941Smrg 1476b8e80941Smrg num_parts = in.get_bits(11, 2) + 1; 1477b8e80941Smrg 1478b8e80941Smrg if (VERBOSE_DECODE) 1479b8e80941Smrg in.printf_bits(11, 2, "partitions = %d", num_parts); 1480b8e80941Smrg 1481b8e80941Smrg if (dual_plane && num_parts > 3) 1482b8e80941Smrg return decode_error::dual_plane_and_too_many_partitions; 1483b8e80941Smrg 1484b8e80941Smrg decode_cem(in); 1485b8e80941Smrg 1486b8e80941Smrg if (VERBOSE_DECODE) 1487b8e80941Smrg printf("cem=[%d,%d,%d,%d] base_cem_class=%d\n", cems[0], cems[1], cems[2], cems[3], cem_base_class); 1488b8e80941Smrg 1489b8e80941Smrg int num_cem_pairs = (cem_base_class + 1) * num_parts + extra_cem_bits; 1490b8e80941Smrg num_cem_values = num_cem_pairs * 2; 1491b8e80941Smrg 1492b8e80941Smrg calculate_remaining_bits(); 1493b8e80941Smrg err = calculate_colour_endpoints_size(); 1494b8e80941Smrg if (err != decode_error::ok) 1495b8e80941Smrg return err; 1496b8e80941Smrg 1497b8e80941Smrg if (VERBOSE_DECODE) 1498b8e80941Smrg in.printf_bits(colour_endpoint_data_offset, colour_endpoint_bits, 1499b8e80941Smrg "endpoint data (%d bits, %d vals, %dt %dq %db)", 1500b8e80941Smrg colour_endpoint_bits, num_cem_values, ce_trits, ce_quints, ce_bits); 1501b8e80941Smrg 1502b8e80941Smrg unpack_colour_endpoints(in); 1503b8e80941Smrg 1504b8e80941Smrg if (VERBOSE_DECODE) { 1505b8e80941Smrg printf("cem values raw =["); 1506b8e80941Smrg for (int i = 0; i < num_cem_values; i++) { 1507b8e80941Smrg if (i) 1508b8e80941Smrg printf(", "); 1509b8e80941Smrg printf("%3d", colour_endpoints_quant[i]); 1510b8e80941Smrg } 1511b8e80941Smrg printf("]\n"); 1512b8e80941Smrg } 1513b8e80941Smrg 1514b8e80941Smrg if (num_cem_values > 18) 1515b8e80941Smrg return decode_error::invalid_colour_endpoints_count; 1516b8e80941Smrg 1517b8e80941Smrg unquantise_colour_endpoints(); 1518b8e80941Smrg 1519b8e80941Smrg if (VERBOSE_DECODE) { 1520b8e80941Smrg printf("cem values norm=["); 1521b8e80941Smrg for (int i = 0; i < num_cem_values; i++) { 1522b8e80941Smrg if (i) 1523b8e80941Smrg printf(", "); 1524b8e80941Smrg printf("%3d", colour_endpoints[i]); 1525b8e80941Smrg } 1526b8e80941Smrg printf("]\n"); 1527b8e80941Smrg } 1528b8e80941Smrg 1529b8e80941Smrg decode_colour_endpoints(); 1530b8e80941Smrg 1531b8e80941Smrg if (dual_plane) { 1532b8e80941Smrg int ccs_offset = 128 - weight_bits - num_extra_cem_bits - 2; 1533b8e80941Smrg colour_component_selector = in.get_bits(ccs_offset, 2); 1534b8e80941Smrg 1535b8e80941Smrg if (VERBOSE_DECODE) 1536b8e80941Smrg in.printf_bits(ccs_offset, 2, "colour component selector = %d", colour_component_selector); 1537b8e80941Smrg } else { 1538b8e80941Smrg colour_component_selector = 0; 1539b8e80941Smrg } 1540b8e80941Smrg 1541b8e80941Smrg 1542b8e80941Smrg if (VERBOSE_DECODE) 1543b8e80941Smrg in.printf_bits(128 - weight_bits, weight_bits, "weights (%d bits)", weight_bits); 1544b8e80941Smrg 1545b8e80941Smrg if (num_weights > 64) 1546b8e80941Smrg return decode_error::invalid_num_weights; 1547b8e80941Smrg 1548b8e80941Smrg if (weight_bits < 24 || weight_bits > 96) 1549b8e80941Smrg return decode_error::invalid_weight_bits; 1550b8e80941Smrg 1551b8e80941Smrg unpack_weights(in); 1552b8e80941Smrg 1553b8e80941Smrg unquantise_weights(); 1554b8e80941Smrg 1555b8e80941Smrg if (VERBOSE_DECODE) { 1556b8e80941Smrg printf("weights=["); 1557b8e80941Smrg for (int i = 0; i < num_weights; ++i) { 1558b8e80941Smrg if (i) 1559b8e80941Smrg printf(", "); 1560b8e80941Smrg printf("%d", weights[i]); 1561b8e80941Smrg } 1562b8e80941Smrg printf("]\n"); 1563b8e80941Smrg 1564b8e80941Smrg for (int plane = 0; plane <= dual_plane; ++plane) { 1565b8e80941Smrg printf("weights (plane %d):\n", plane); 1566b8e80941Smrg int i = 0; 1567b8e80941Smrg (void)i; 1568b8e80941Smrg 1569b8e80941Smrg for (int r = 0; r < wt_d; ++r) { 1570b8e80941Smrg for (int t = 0; t < wt_h; ++t) { 1571b8e80941Smrg for (int s = 0; s < wt_w; ++s) { 1572b8e80941Smrg printf("%3d", weights[i++ * (1 + dual_plane) + plane]); 1573b8e80941Smrg } 1574b8e80941Smrg printf("\n"); 1575b8e80941Smrg } 1576b8e80941Smrg if (r < wt_d - 1) 1577b8e80941Smrg printf("\n"); 1578b8e80941Smrg } 1579b8e80941Smrg } 1580b8e80941Smrg } 1581b8e80941Smrg 1582b8e80941Smrg compute_infill_weights(decoder.block_w, decoder.block_h, decoder.block_d); 1583b8e80941Smrg 1584b8e80941Smrg if (VERBOSE_DECODE) { 1585b8e80941Smrg for (int plane = 0; plane <= dual_plane; ++plane) { 1586b8e80941Smrg printf("infilled weights (plane %d):\n", plane); 1587b8e80941Smrg int i = 0; 1588b8e80941Smrg (void)i; 1589b8e80941Smrg 1590b8e80941Smrg for (int r = 0; r < decoder.block_d; ++r) { 1591b8e80941Smrg for (int t = 0; t < decoder.block_h; ++t) { 1592b8e80941Smrg for (int s = 0; s < decoder.block_w; ++s) { 1593b8e80941Smrg printf("%3d", infill_weights[plane][i++]); 1594b8e80941Smrg } 1595b8e80941Smrg printf("\n"); 1596b8e80941Smrg } 1597b8e80941Smrg if (r < decoder.block_d - 1) 1598b8e80941Smrg printf("\n"); 1599b8e80941Smrg } 1600b8e80941Smrg } 1601b8e80941Smrg } 1602b8e80941Smrg if (VERBOSE_DECODE) 1603b8e80941Smrg printf("\n"); 1604b8e80941Smrg 1605b8e80941Smrg return decode_error::ok; 1606b8e80941Smrg} 1607b8e80941Smrg 1608b8e80941Smrgvoid Block::write_decoded(const Decoder &decoder, uint16_t *output) 1609b8e80941Smrg{ 1610b8e80941Smrg /* sRGB can only be stored as unorm8. */ 1611b8e80941Smrg assert(!decoder.srgb || decoder.output_unorm8); 1612b8e80941Smrg 1613b8e80941Smrg if (is_void_extent) { 1614b8e80941Smrg for (int idx = 0; idx < decoder.block_w*decoder.block_h*decoder.block_d; ++idx) { 1615b8e80941Smrg if (decoder.output_unorm8) { 1616b8e80941Smrg if (decoder.srgb) { 1617b8e80941Smrg output[idx*4+0] = void_extent_colour_r >> 8; 1618b8e80941Smrg output[idx*4+1] = void_extent_colour_g >> 8; 1619b8e80941Smrg output[idx*4+2] = void_extent_colour_b >> 8; 1620b8e80941Smrg } else { 1621b8e80941Smrg output[idx*4+0] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_r); 1622b8e80941Smrg output[idx*4+1] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_g); 1623b8e80941Smrg output[idx*4+2] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_b); 1624b8e80941Smrg } 1625b8e80941Smrg output[idx*4+3] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_a); 1626b8e80941Smrg } else { 1627b8e80941Smrg /* Store the color as FP16. */ 1628b8e80941Smrg output[idx*4+0] = _mesa_uint16_div_64k_to_half(void_extent_colour_r); 1629b8e80941Smrg output[idx*4+1] = _mesa_uint16_div_64k_to_half(void_extent_colour_g); 1630b8e80941Smrg output[idx*4+2] = _mesa_uint16_div_64k_to_half(void_extent_colour_b); 1631b8e80941Smrg output[idx*4+3] = _mesa_uint16_div_64k_to_half(void_extent_colour_a); 1632b8e80941Smrg } 1633b8e80941Smrg } 1634b8e80941Smrg return; 1635b8e80941Smrg } 1636b8e80941Smrg 1637b8e80941Smrg int small_block = (decoder.block_w * decoder.block_h * decoder.block_d) < 31; 1638b8e80941Smrg 1639b8e80941Smrg int idx = 0; 1640b8e80941Smrg for (int z = 0; z < decoder.block_d; ++z) { 1641b8e80941Smrg for (int y = 0; y < decoder.block_h; ++y) { 1642b8e80941Smrg for (int x = 0; x < decoder.block_w; ++x) { 1643b8e80941Smrg 1644b8e80941Smrg int partition; 1645b8e80941Smrg if (num_parts > 1) { 1646b8e80941Smrg partition = select_partition(partition_index, x, y, z, num_parts, small_block); 1647b8e80941Smrg assert(partition < num_parts); 1648b8e80941Smrg } else { 1649b8e80941Smrg partition = 0; 1650b8e80941Smrg } 1651b8e80941Smrg 1652b8e80941Smrg /* TODO: HDR */ 1653b8e80941Smrg 1654b8e80941Smrg uint8x4_t e0 = endpoints_decoded[0][partition]; 1655b8e80941Smrg uint8x4_t e1 = endpoints_decoded[1][partition]; 1656b8e80941Smrg uint16_t c0[4], c1[4]; 1657b8e80941Smrg 1658b8e80941Smrg /* Expand to 16 bits. */ 1659b8e80941Smrg if (decoder.srgb) { 1660b8e80941Smrg c0[0] = (uint16_t)((e0.v[0] << 8) | 0x80); 1661b8e80941Smrg c0[1] = (uint16_t)((e0.v[1] << 8) | 0x80); 1662b8e80941Smrg c0[2] = (uint16_t)((e0.v[2] << 8) | 0x80); 1663b8e80941Smrg c0[3] = (uint16_t)((e0.v[3] << 8) | 0x80); 1664b8e80941Smrg 1665b8e80941Smrg c1[0] = (uint16_t)((e1.v[0] << 8) | 0x80); 1666b8e80941Smrg c1[1] = (uint16_t)((e1.v[1] << 8) | 0x80); 1667b8e80941Smrg c1[2] = (uint16_t)((e1.v[2] << 8) | 0x80); 1668b8e80941Smrg c1[3] = (uint16_t)((e1.v[3] << 8) | 0x80); 1669b8e80941Smrg } else { 1670b8e80941Smrg c0[0] = (uint16_t)((e0.v[0] << 8) | e0.v[0]); 1671b8e80941Smrg c0[1] = (uint16_t)((e0.v[1] << 8) | e0.v[1]); 1672b8e80941Smrg c0[2] = (uint16_t)((e0.v[2] << 8) | e0.v[2]); 1673b8e80941Smrg c0[3] = (uint16_t)((e0.v[3] << 8) | e0.v[3]); 1674b8e80941Smrg 1675b8e80941Smrg c1[0] = (uint16_t)((e1.v[0] << 8) | e1.v[0]); 1676b8e80941Smrg c1[1] = (uint16_t)((e1.v[1] << 8) | e1.v[1]); 1677b8e80941Smrg c1[2] = (uint16_t)((e1.v[2] << 8) | e1.v[2]); 1678b8e80941Smrg c1[3] = (uint16_t)((e1.v[3] << 8) | e1.v[3]); 1679b8e80941Smrg } 1680b8e80941Smrg 1681b8e80941Smrg int w[4]; 1682b8e80941Smrg if (dual_plane) { 1683b8e80941Smrg int w0 = infill_weights[0][idx]; 1684b8e80941Smrg int w1 = infill_weights[1][idx]; 1685b8e80941Smrg w[0] = w[1] = w[2] = w[3] = w0; 1686b8e80941Smrg w[colour_component_selector] = w1; 1687b8e80941Smrg } else { 1688b8e80941Smrg int w0 = infill_weights[0][idx]; 1689b8e80941Smrg w[0] = w[1] = w[2] = w[3] = w0; 1690b8e80941Smrg } 1691b8e80941Smrg 1692b8e80941Smrg /* Interpolate to produce UNORM16, applying weights. */ 1693b8e80941Smrg uint16_t c[4] = { 1694b8e80941Smrg (uint16_t)((c0[0] * (64 - w[0]) + c1[0] * w[0] + 32) >> 6), 1695b8e80941Smrg (uint16_t)((c0[1] * (64 - w[1]) + c1[1] * w[1] + 32) >> 6), 1696b8e80941Smrg (uint16_t)((c0[2] * (64 - w[2]) + c1[2] * w[2] + 32) >> 6), 1697b8e80941Smrg (uint16_t)((c0[3] * (64 - w[3]) + c1[3] * w[3] + 32) >> 6), 1698b8e80941Smrg }; 1699b8e80941Smrg 1700b8e80941Smrg if (decoder.output_unorm8) { 1701b8e80941Smrg if (decoder.srgb) { 1702b8e80941Smrg output[idx*4+0] = c[0] >> 8; 1703b8e80941Smrg output[idx*4+1] = c[1] >> 8; 1704b8e80941Smrg output[idx*4+2] = c[2] >> 8; 1705b8e80941Smrg } else { 1706b8e80941Smrg output[idx*4+0] = c[0] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[0]); 1707b8e80941Smrg output[idx*4+1] = c[1] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[1]); 1708b8e80941Smrg output[idx*4+2] = c[2] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[2]); 1709b8e80941Smrg } 1710b8e80941Smrg output[idx*4+3] = c[3] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[3]); 1711b8e80941Smrg } else { 1712b8e80941Smrg /* Store the color as FP16. */ 1713b8e80941Smrg output[idx*4+0] = c[0] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[0]); 1714b8e80941Smrg output[idx*4+1] = c[1] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[1]); 1715b8e80941Smrg output[idx*4+2] = c[2] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[2]); 1716b8e80941Smrg output[idx*4+3] = c[3] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[3]); 1717b8e80941Smrg } 1718b8e80941Smrg 1719b8e80941Smrg idx++; 1720b8e80941Smrg } 1721b8e80941Smrg } 1722b8e80941Smrg } 1723b8e80941Smrg} 1724b8e80941Smrg 1725b8e80941Smrgvoid Block::calculate_from_weights() 1726b8e80941Smrg{ 1727b8e80941Smrg wt_trits = 0; 1728b8e80941Smrg wt_quints = 0; 1729b8e80941Smrg wt_bits = 0; 1730b8e80941Smrg switch (high_prec) { 1731b8e80941Smrg case 0: 1732b8e80941Smrg switch (wt_range) { 1733b8e80941Smrg case 0x2: wt_max = 1; wt_bits = 1; break; 1734b8e80941Smrg case 0x3: wt_max = 2; wt_trits = 1; break; 1735b8e80941Smrg case 0x4: wt_max = 3; wt_bits = 2; break; 1736b8e80941Smrg case 0x5: wt_max = 4; wt_quints = 1; break; 1737b8e80941Smrg case 0x6: wt_max = 5; wt_trits = 1; wt_bits = 1; break; 1738b8e80941Smrg case 0x7: wt_max = 7; wt_bits = 3; break; 1739b8e80941Smrg default: abort(); 1740b8e80941Smrg } 1741b8e80941Smrg break; 1742b8e80941Smrg case 1: 1743b8e80941Smrg switch (wt_range) { 1744b8e80941Smrg case 0x2: wt_max = 9; wt_quints = 1; wt_bits = 1; break; 1745b8e80941Smrg case 0x3: wt_max = 11; wt_trits = 1; wt_bits = 2; break; 1746b8e80941Smrg case 0x4: wt_max = 15; wt_bits = 4; break; 1747b8e80941Smrg case 0x5: wt_max = 19; wt_quints = 1; wt_bits = 2; break; 1748b8e80941Smrg case 0x6: wt_max = 23; wt_trits = 1; wt_bits = 3; break; 1749b8e80941Smrg case 0x7: wt_max = 31; wt_bits = 5; break; 1750b8e80941Smrg default: abort(); 1751b8e80941Smrg } 1752b8e80941Smrg break; 1753b8e80941Smrg } 1754b8e80941Smrg 1755b8e80941Smrg assert(wt_trits || wt_quints || wt_bits); 1756b8e80941Smrg 1757b8e80941Smrg num_weights = wt_w * wt_h * wt_d; 1758b8e80941Smrg 1759b8e80941Smrg if (dual_plane) 1760b8e80941Smrg num_weights *= 2; 1761b8e80941Smrg 1762b8e80941Smrg weight_bits = 1763b8e80941Smrg (num_weights * 8 * wt_trits + 4) / 5 1764b8e80941Smrg + (num_weights * 7 * wt_quints + 2) / 3 1765b8e80941Smrg + num_weights * wt_bits; 1766b8e80941Smrg} 1767b8e80941Smrg 1768b8e80941Smrgvoid Block::calculate_remaining_bits() 1769b8e80941Smrg{ 1770b8e80941Smrg int config_bits; 1771b8e80941Smrg if (num_parts > 1) { 1772b8e80941Smrg if (!is_multi_cem) 1773b8e80941Smrg config_bits = 29; 1774b8e80941Smrg else 1775b8e80941Smrg config_bits = 25 + 3 * num_parts; 1776b8e80941Smrg } else { 1777b8e80941Smrg config_bits = 17; 1778b8e80941Smrg } 1779b8e80941Smrg 1780b8e80941Smrg if (dual_plane) 1781b8e80941Smrg config_bits += 2; 1782b8e80941Smrg 1783b8e80941Smrg remaining_bits = 128 - config_bits - weight_bits; 1784b8e80941Smrg} 1785b8e80941Smrg 1786b8e80941Smrgdecode_error::type Block::calculate_colour_endpoints_size() 1787b8e80941Smrg{ 1788b8e80941Smrg /* Specified as illegal */ 1789b8e80941Smrg if (remaining_bits < (13 * num_cem_values + 4) / 5) { 1790b8e80941Smrg colour_endpoint_bits = ce_max = ce_trits = ce_quints = ce_bits = 0; 1791b8e80941Smrg return decode_error::invalid_colour_endpoints_size; 1792b8e80941Smrg } 1793b8e80941Smrg 1794b8e80941Smrg /* Find the largest cem_ranges that fits within remaining_bits */ 1795b8e80941Smrg for (int i = ARRAY_SIZE(cem_ranges)-1; i >= 0; --i) { 1796b8e80941Smrg int cem_bits; 1797b8e80941Smrg cem_bits = (num_cem_values * 8 * cem_ranges[i].t + 4) / 5 1798b8e80941Smrg + (num_cem_values * 7 * cem_ranges[i].q + 2) / 3 1799b8e80941Smrg + num_cem_values * cem_ranges[i].b; 1800b8e80941Smrg 1801b8e80941Smrg if (cem_bits <= remaining_bits) 1802b8e80941Smrg { 1803b8e80941Smrg colour_endpoint_bits = cem_bits; 1804b8e80941Smrg ce_max = cem_ranges[i].max; 1805b8e80941Smrg ce_trits = cem_ranges[i].t; 1806b8e80941Smrg ce_quints = cem_ranges[i].q; 1807b8e80941Smrg ce_bits = cem_ranges[i].b; 1808b8e80941Smrg return decode_error::ok; 1809b8e80941Smrg } 1810b8e80941Smrg } 1811b8e80941Smrg 1812b8e80941Smrg assert(0); 1813b8e80941Smrg return decode_error::invalid_colour_endpoints_size; 1814b8e80941Smrg} 1815b8e80941Smrg 1816b8e80941Smrg/** 1817b8e80941Smrg * Decode ASTC 2D LDR texture data. 1818b8e80941Smrg * 1819b8e80941Smrg * \param src_width in pixels 1820b8e80941Smrg * \param src_height in pixels 1821b8e80941Smrg * \param dst_stride in bytes 1822b8e80941Smrg */ 1823b8e80941Smrgextern "C" void 1824b8e80941Smrg_mesa_unpack_astc_2d_ldr(uint8_t *dst_row, 1825b8e80941Smrg unsigned dst_stride, 1826b8e80941Smrg const uint8_t *src_row, 1827b8e80941Smrg unsigned src_stride, 1828b8e80941Smrg unsigned src_width, 1829b8e80941Smrg unsigned src_height, 1830b8e80941Smrg mesa_format format) 1831b8e80941Smrg{ 1832b8e80941Smrg assert(_mesa_is_format_astc_2d(format)); 1833b8e80941Smrg bool srgb = _mesa_get_format_color_encoding(format) == GL_SRGB; 1834b8e80941Smrg 1835b8e80941Smrg unsigned blk_w, blk_h; 1836b8e80941Smrg _mesa_get_format_block_size(format, &blk_w, &blk_h); 1837b8e80941Smrg 1838b8e80941Smrg const unsigned block_size = 16; 1839b8e80941Smrg unsigned x_blocks = (src_width + blk_w - 1) / blk_w; 1840b8e80941Smrg unsigned y_blocks = (src_height + blk_h - 1) / blk_h; 1841b8e80941Smrg 1842b8e80941Smrg Decoder dec(blk_w, blk_h, 1, srgb, true); 1843b8e80941Smrg 1844b8e80941Smrg for (unsigned y = 0; y < y_blocks; ++y) { 1845b8e80941Smrg for (unsigned x = 0; x < x_blocks; ++x) { 1846b8e80941Smrg /* Same size as the largest block. */ 1847b8e80941Smrg uint16_t block_out[12 * 12 * 4]; 1848b8e80941Smrg 1849b8e80941Smrg dec.decode(src_row + x * block_size, block_out); 1850b8e80941Smrg 1851b8e80941Smrg /* This can be smaller with NPOT dimensions. */ 1852b8e80941Smrg unsigned dst_blk_w = MIN2(blk_w, src_width - x*blk_w); 1853b8e80941Smrg unsigned dst_blk_h = MIN2(blk_h, src_height - y*blk_h); 1854b8e80941Smrg 1855b8e80941Smrg for (unsigned sub_y = 0; sub_y < dst_blk_h; ++sub_y) { 1856b8e80941Smrg for (unsigned sub_x = 0; sub_x < dst_blk_w; ++sub_x) { 1857b8e80941Smrg uint8_t *dst = dst_row + sub_y * dst_stride + 1858b8e80941Smrg (x * blk_w + sub_x) * 4; 1859b8e80941Smrg const uint16_t *src = &block_out[(sub_y * blk_w + sub_x) * 4]; 1860b8e80941Smrg 1861b8e80941Smrg dst[0] = src[0]; 1862b8e80941Smrg dst[1] = src[1]; 1863b8e80941Smrg dst[2] = src[2]; 1864b8e80941Smrg dst[3] = src[3]; 1865b8e80941Smrg } 1866b8e80941Smrg } 1867b8e80941Smrg } 1868b8e80941Smrg src_row += src_stride; 1869b8e80941Smrg dst_row += dst_stride * blk_h; 1870b8e80941Smrg } 1871b8e80941Smrg} 1872