101e04c3fSmrg/* 201e04c3fSmrg * Copyright 2015 Philip Taylor <philip@zaynar.co.uk> 301e04c3fSmrg * Copyright 2018 Advanced Micro Devices, Inc. 401e04c3fSmrg * 501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 601e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 701e04c3fSmrg * to deal in the Software without restriction, including without limitation 801e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 901e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 1001e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1101e04c3fSmrg * 1201e04c3fSmrg * The above copyright notice and this permission notice (including the next 1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1401e04c3fSmrg * Software. 1501e04c3fSmrg * 1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1901e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2001e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2101e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 2201e04c3fSmrg * DEALINGS IN THE SOFTWARE. 2301e04c3fSmrg */ 2401e04c3fSmrg 2501e04c3fSmrg/** 2601e04c3fSmrg * \file texcompress_astc.c 2701e04c3fSmrg * 2801e04c3fSmrg * Decompression code for GL_KHR_texture_compression_astc_ldr, which is just 2901e04c3fSmrg * ASTC 2D LDR. 3001e04c3fSmrg * 3101e04c3fSmrg * The ASTC 2D LDR decoder (without the sRGB part) was copied from the OASTC 3201e04c3fSmrg * library written by Philip Taylor. I added sRGB support and adjusted it for 3301e04c3fSmrg * Mesa. - Marek 3401e04c3fSmrg */ 3501e04c3fSmrg 3601e04c3fSmrg#include "texcompress_astc.h" 3701e04c3fSmrg#include "macros.h" 3801e04c3fSmrg#include "util/half_float.h" 3901e04c3fSmrg#include <stdio.h> 407ec681f3Smrg#include <cstdlib> // for abort() on windows 4101e04c3fSmrg 4201e04c3fSmrgstatic bool VERBOSE_DECODE = false; 4301e04c3fSmrgstatic bool VERBOSE_WRITE = false; 4401e04c3fSmrg 4501e04c3fSmrgstatic inline uint8_t 4601e04c3fSmrguint16_div_64k_to_half_to_unorm8(uint16_t v) 4701e04c3fSmrg{ 4801e04c3fSmrg return _mesa_half_to_unorm8(_mesa_uint16_div_64k_to_half(v)); 4901e04c3fSmrg} 5001e04c3fSmrg 5101e04c3fSmrgclass decode_error 5201e04c3fSmrg{ 5301e04c3fSmrgpublic: 5401e04c3fSmrg enum type { 5501e04c3fSmrg ok, 5601e04c3fSmrg unsupported_hdr_void_extent, 5701e04c3fSmrg reserved_block_mode_1, 5801e04c3fSmrg reserved_block_mode_2, 5901e04c3fSmrg dual_plane_and_too_many_partitions, 6001e04c3fSmrg invalid_range_in_void_extent, 6101e04c3fSmrg weight_grid_exceeds_block_size, 6201e04c3fSmrg invalid_colour_endpoints_size, 6301e04c3fSmrg invalid_colour_endpoints_count, 6401e04c3fSmrg invalid_weight_bits, 6501e04c3fSmrg invalid_num_weights, 6601e04c3fSmrg }; 6701e04c3fSmrg}; 6801e04c3fSmrg 6901e04c3fSmrg 7001e04c3fSmrgstruct cem_range { 7101e04c3fSmrg uint8_t max; 7201e04c3fSmrg uint8_t t, q, b; 7301e04c3fSmrg}; 7401e04c3fSmrg 7501e04c3fSmrg/* Based on the Color Unquantization Parameters table, 7601e04c3fSmrg * plus the bit-only representations, sorted by increasing size 7701e04c3fSmrg */ 7801e04c3fSmrgstatic cem_range cem_ranges[] = { 7901e04c3fSmrg { 5, 1, 0, 1 }, 8001e04c3fSmrg { 7, 0, 0, 3 }, 8101e04c3fSmrg { 9, 0, 1, 1 }, 8201e04c3fSmrg { 11, 1, 0, 2 }, 8301e04c3fSmrg { 15, 0, 0, 4 }, 8401e04c3fSmrg { 19, 0, 1, 2 }, 8501e04c3fSmrg { 23, 1, 0, 3 }, 8601e04c3fSmrg { 31, 0, 0, 5 }, 8701e04c3fSmrg { 39, 0, 1, 3 }, 8801e04c3fSmrg { 47, 1, 0, 4 }, 8901e04c3fSmrg { 63, 0, 0, 6 }, 9001e04c3fSmrg { 79, 0, 1, 4 }, 9101e04c3fSmrg { 95, 1, 0, 5 }, 9201e04c3fSmrg { 127, 0, 0, 7 }, 9301e04c3fSmrg { 159, 0, 1, 5 }, 9401e04c3fSmrg { 191, 1, 0, 6 }, 9501e04c3fSmrg { 255, 0, 0, 8 }, 9601e04c3fSmrg}; 9701e04c3fSmrg 9801e04c3fSmrg#define CAT_BITS_2(a, b) ( ((a) << 1) | (b) ) 9901e04c3fSmrg#define CAT_BITS_3(a, b, c) ( ((a) << 2) | ((b) << 1) | (c) ) 10001e04c3fSmrg#define CAT_BITS_4(a, b, c, d) ( ((a) << 3) | ((b) << 2) | ((c) << 1) | (d) ) 10101e04c3fSmrg#define CAT_BITS_5(a, b, c, d, e) ( ((a) << 4) | ((b) << 3) | ((c) << 2) | ((d) << 1) | (e) ) 10201e04c3fSmrg 10301e04c3fSmrg/** 10401e04c3fSmrg * Unpack 5n+8 bits from 'in' into 5 output values. 10501e04c3fSmrg * If n <= 4 then T should be uint32_t, else it must be uint64_t. 10601e04c3fSmrg */ 10701e04c3fSmrgtemplate <typename T> 10801e04c3fSmrgstatic void unpack_trit_block(int n, T in, uint8_t *out) 10901e04c3fSmrg{ 11001e04c3fSmrg assert(n <= 6); /* else output will overflow uint8_t */ 11101e04c3fSmrg 11201e04c3fSmrg uint8_t T0 = (in >> (n)) & 0x1; 11301e04c3fSmrg uint8_t T1 = (in >> (n+1)) & 0x1; 11401e04c3fSmrg uint8_t T2 = (in >> (2*n+2)) & 0x1; 11501e04c3fSmrg uint8_t T3 = (in >> (2*n+3)) & 0x1; 11601e04c3fSmrg uint8_t T4 = (in >> (3*n+4)) & 0x1; 11701e04c3fSmrg uint8_t T5 = (in >> (4*n+5)) & 0x1; 11801e04c3fSmrg uint8_t T6 = (in >> (4*n+6)) & 0x1; 11901e04c3fSmrg uint8_t T7 = (in >> (5*n+7)) & 0x1; 12001e04c3fSmrg uint8_t mmask = (1 << n) - 1; 12101e04c3fSmrg uint8_t m0 = (in >> (0)) & mmask; 12201e04c3fSmrg uint8_t m1 = (in >> (n+2)) & mmask; 12301e04c3fSmrg uint8_t m2 = (in >> (2*n+4)) & mmask; 12401e04c3fSmrg uint8_t m3 = (in >> (3*n+5)) & mmask; 12501e04c3fSmrg uint8_t m4 = (in >> (4*n+7)) & mmask; 12601e04c3fSmrg 12701e04c3fSmrg uint8_t C; 12801e04c3fSmrg uint8_t t4, t3, t2, t1, t0; 12901e04c3fSmrg if (CAT_BITS_3(T4, T3, T2) == 0x7) { 13001e04c3fSmrg C = CAT_BITS_5(T7, T6, T5, T1, T0); 13101e04c3fSmrg t4 = t3 = 2; 13201e04c3fSmrg } else { 13301e04c3fSmrg C = CAT_BITS_5(T4, T3, T2, T1, T0); 13401e04c3fSmrg if (CAT_BITS_2(T6, T5) == 0x3) { 13501e04c3fSmrg t4 = 2; 13601e04c3fSmrg t3 = T7; 13701e04c3fSmrg } else { 13801e04c3fSmrg t4 = T7; 13901e04c3fSmrg t3 = CAT_BITS_2(T6, T5); 14001e04c3fSmrg } 14101e04c3fSmrg } 14201e04c3fSmrg 14301e04c3fSmrg if ((C & 0x3) == 0x3) { 14401e04c3fSmrg t2 = 2; 14501e04c3fSmrg t1 = (C >> 4) & 0x1; 14601e04c3fSmrg uint8_t C3 = (C >> 3) & 0x1; 14701e04c3fSmrg uint8_t C2 = (C >> 2) & 0x1; 14801e04c3fSmrg t0 = (C3 << 1) | (C2 & ~C3); 14901e04c3fSmrg } else if (((C >> 2) & 0x3) == 0x3) { 15001e04c3fSmrg t2 = 2; 15101e04c3fSmrg t1 = 2; 15201e04c3fSmrg t0 = C & 0x3; 15301e04c3fSmrg } else { 15401e04c3fSmrg t2 = (C >> 4) & 0x1; 15501e04c3fSmrg t1 = (C >> 2) & 0x3; 15601e04c3fSmrg uint8_t C1 = (C >> 1) & 0x1; 15701e04c3fSmrg uint8_t C0 = (C >> 0) & 0x1; 15801e04c3fSmrg t0 = (C1 << 1) | (C0 & ~C1); 15901e04c3fSmrg } 16001e04c3fSmrg 16101e04c3fSmrg out[0] = (t0 << n) | m0; 16201e04c3fSmrg out[1] = (t1 << n) | m1; 16301e04c3fSmrg out[2] = (t2 << n) | m2; 16401e04c3fSmrg out[3] = (t3 << n) | m3; 16501e04c3fSmrg out[4] = (t4 << n) | m4; 16601e04c3fSmrg} 16701e04c3fSmrg 16801e04c3fSmrg/** 16901e04c3fSmrg * Unpack 3n+7 bits from 'in' into 3 output values 17001e04c3fSmrg */ 17101e04c3fSmrgstatic void unpack_quint_block(int n, uint32_t in, uint8_t *out) 17201e04c3fSmrg{ 17301e04c3fSmrg assert(n <= 5); /* else output will overflow uint8_t */ 17401e04c3fSmrg 17501e04c3fSmrg uint8_t Q0 = (in >> (n)) & 0x1; 17601e04c3fSmrg uint8_t Q1 = (in >> (n+1)) & 0x1; 17701e04c3fSmrg uint8_t Q2 = (in >> (n+2)) & 0x1; 17801e04c3fSmrg uint8_t Q3 = (in >> (2*n+3)) & 0x1; 17901e04c3fSmrg uint8_t Q4 = (in >> (2*n+4)) & 0x1; 18001e04c3fSmrg uint8_t Q5 = (in >> (3*n+5)) & 0x1; 18101e04c3fSmrg uint8_t Q6 = (in >> (3*n+6)) & 0x1; 18201e04c3fSmrg uint8_t mmask = (1 << n) - 1; 18301e04c3fSmrg uint8_t m0 = (in >> (0)) & mmask; 18401e04c3fSmrg uint8_t m1 = (in >> (n+3)) & mmask; 18501e04c3fSmrg uint8_t m2 = (in >> (2*n+5)) & mmask; 18601e04c3fSmrg 18701e04c3fSmrg uint8_t C; 18801e04c3fSmrg uint8_t q2, q1, q0; 18901e04c3fSmrg if (CAT_BITS_4(Q6, Q5, Q2, Q1) == 0x3) { 19001e04c3fSmrg q2 = CAT_BITS_3(Q0, Q4 & ~Q0, Q3 & ~Q0); 19101e04c3fSmrg q1 = 4; 19201e04c3fSmrg q0 = 4; 19301e04c3fSmrg } else { 19401e04c3fSmrg if (CAT_BITS_2(Q2, Q1) == 0x3) { 19501e04c3fSmrg q2 = 4; 19601e04c3fSmrg C = CAT_BITS_5(Q4, Q3, 0x1 & ~Q6, 0x1 & ~Q5, Q0); 19701e04c3fSmrg } else { 19801e04c3fSmrg q2 = CAT_BITS_2(Q6, Q5); 19901e04c3fSmrg C = CAT_BITS_5(Q4, Q3, Q2, Q1, Q0); 20001e04c3fSmrg } 20101e04c3fSmrg if ((C & 0x7) == 0x5) { 20201e04c3fSmrg q1 = 4; 20301e04c3fSmrg q0 = (C >> 3) & 0x3; 20401e04c3fSmrg } else { 20501e04c3fSmrg q1 = (C >> 3) & 0x3; 20601e04c3fSmrg q0 = C & 0x7; 20701e04c3fSmrg } 20801e04c3fSmrg } 20901e04c3fSmrg out[0] = (q0 << n) | m0; 21001e04c3fSmrg out[1] = (q1 << n) | m1; 21101e04c3fSmrg out[2] = (q2 << n) | m2; 21201e04c3fSmrg} 21301e04c3fSmrg 21401e04c3fSmrg 21501e04c3fSmrgstruct uint8x4_t 21601e04c3fSmrg{ 21701e04c3fSmrg uint8_t v[4]; 21801e04c3fSmrg 21901e04c3fSmrg uint8x4_t() { } 22001e04c3fSmrg 22101e04c3fSmrg uint8x4_t(int a, int b, int c, int d) 22201e04c3fSmrg { 22301e04c3fSmrg assert(0 <= a && a <= 255); 22401e04c3fSmrg assert(0 <= b && b <= 255); 22501e04c3fSmrg assert(0 <= c && c <= 255); 22601e04c3fSmrg assert(0 <= d && d <= 255); 22701e04c3fSmrg v[0] = a; 22801e04c3fSmrg v[1] = b; 22901e04c3fSmrg v[2] = c; 23001e04c3fSmrg v[3] = d; 23101e04c3fSmrg } 23201e04c3fSmrg 23301e04c3fSmrg static uint8x4_t clamped(int a, int b, int c, int d) 23401e04c3fSmrg { 23501e04c3fSmrg uint8x4_t r; 23601e04c3fSmrg r.v[0] = MAX2(0, MIN2(255, a)); 23701e04c3fSmrg r.v[1] = MAX2(0, MIN2(255, b)); 23801e04c3fSmrg r.v[2] = MAX2(0, MIN2(255, c)); 23901e04c3fSmrg r.v[3] = MAX2(0, MIN2(255, d)); 24001e04c3fSmrg return r; 24101e04c3fSmrg } 24201e04c3fSmrg}; 24301e04c3fSmrg 24401e04c3fSmrgstatic uint8x4_t blue_contract(int r, int g, int b, int a) 24501e04c3fSmrg{ 24601e04c3fSmrg return uint8x4_t((r+b) >> 1, (g+b) >> 1, b, a); 24701e04c3fSmrg} 24801e04c3fSmrg 24901e04c3fSmrgstatic uint8x4_t blue_contract_clamped(int r, int g, int b, int a) 25001e04c3fSmrg{ 25101e04c3fSmrg return uint8x4_t::clamped((r+b) >> 1, (g+b) >> 1, b, a); 25201e04c3fSmrg} 25301e04c3fSmrg 25401e04c3fSmrgstatic void bit_transfer_signed(int &a, int &b) 25501e04c3fSmrg{ 25601e04c3fSmrg b >>= 1; 25701e04c3fSmrg b |= a & 0x80; 25801e04c3fSmrg a >>= 1; 25901e04c3fSmrg a &= 0x3f; 26001e04c3fSmrg if (a & 0x20) 26101e04c3fSmrg a -= 0x40; 26201e04c3fSmrg} 26301e04c3fSmrg 26401e04c3fSmrgstatic uint32_t hash52(uint32_t p) 26501e04c3fSmrg{ 26601e04c3fSmrg p ^= p >> 15; 26701e04c3fSmrg p -= p << 17; 26801e04c3fSmrg p += p << 7; 26901e04c3fSmrg p += p << 4; 27001e04c3fSmrg p ^= p >> 5; 27101e04c3fSmrg p += p << 16; 27201e04c3fSmrg p ^= p >> 7; 27301e04c3fSmrg p ^= p >> 3; 27401e04c3fSmrg p ^= p << 6; 27501e04c3fSmrg p ^= p >> 17; 27601e04c3fSmrg return p; 27701e04c3fSmrg} 27801e04c3fSmrg 27901e04c3fSmrgstatic int select_partition(int seed, int x, int y, int z, int partitioncount, 28001e04c3fSmrg int small_block) 28101e04c3fSmrg{ 28201e04c3fSmrg if (small_block) { 28301e04c3fSmrg x <<= 1; 28401e04c3fSmrg y <<= 1; 28501e04c3fSmrg z <<= 1; 28601e04c3fSmrg } 28701e04c3fSmrg seed += (partitioncount - 1) * 1024; 28801e04c3fSmrg uint32_t rnum = hash52(seed); 28901e04c3fSmrg uint8_t seed1 = rnum & 0xF; 29001e04c3fSmrg uint8_t seed2 = (rnum >> 4) & 0xF; 29101e04c3fSmrg uint8_t seed3 = (rnum >> 8) & 0xF; 29201e04c3fSmrg uint8_t seed4 = (rnum >> 12) & 0xF; 29301e04c3fSmrg uint8_t seed5 = (rnum >> 16) & 0xF; 29401e04c3fSmrg uint8_t seed6 = (rnum >> 20) & 0xF; 29501e04c3fSmrg uint8_t seed7 = (rnum >> 24) & 0xF; 29601e04c3fSmrg uint8_t seed8 = (rnum >> 28) & 0xF; 29701e04c3fSmrg uint8_t seed9 = (rnum >> 18) & 0xF; 29801e04c3fSmrg uint8_t seed10 = (rnum >> 22) & 0xF; 29901e04c3fSmrg uint8_t seed11 = (rnum >> 26) & 0xF; 30001e04c3fSmrg uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; 30101e04c3fSmrg 30201e04c3fSmrg seed1 *= seed1; 30301e04c3fSmrg seed2 *= seed2; 30401e04c3fSmrg seed3 *= seed3; 30501e04c3fSmrg seed4 *= seed4; 30601e04c3fSmrg seed5 *= seed5; 30701e04c3fSmrg seed6 *= seed6; 30801e04c3fSmrg seed7 *= seed7; 30901e04c3fSmrg seed8 *= seed8; 31001e04c3fSmrg seed9 *= seed9; 31101e04c3fSmrg seed10 *= seed10; 31201e04c3fSmrg seed11 *= seed11; 31301e04c3fSmrg seed12 *= seed12; 31401e04c3fSmrg 31501e04c3fSmrg int sh1, sh2, sh3; 31601e04c3fSmrg if (seed & 1) { 31701e04c3fSmrg sh1 = (seed & 2 ? 4 : 5); 31801e04c3fSmrg sh2 = (partitioncount == 3 ? 6 : 5); 31901e04c3fSmrg } else { 32001e04c3fSmrg sh1 = (partitioncount == 3 ? 6 : 5); 32101e04c3fSmrg sh2 = (seed & 2 ? 4 : 5); 32201e04c3fSmrg } 32301e04c3fSmrg sh3 = (seed & 0x10) ? sh1 : sh2; 32401e04c3fSmrg 32501e04c3fSmrg seed1 >>= sh1; 32601e04c3fSmrg seed2 >>= sh2; 32701e04c3fSmrg seed3 >>= sh1; 32801e04c3fSmrg seed4 >>= sh2; 32901e04c3fSmrg seed5 >>= sh1; 33001e04c3fSmrg seed6 >>= sh2; 33101e04c3fSmrg seed7 >>= sh1; 33201e04c3fSmrg seed8 >>= sh2; 33301e04c3fSmrg seed9 >>= sh3; 33401e04c3fSmrg seed10 >>= sh3; 33501e04c3fSmrg seed11 >>= sh3; 33601e04c3fSmrg seed12 >>= sh3; 33701e04c3fSmrg 33801e04c3fSmrg int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); 33901e04c3fSmrg int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); 34001e04c3fSmrg int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); 34101e04c3fSmrg int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); 34201e04c3fSmrg 34301e04c3fSmrg a &= 0x3F; 34401e04c3fSmrg b &= 0x3F; 34501e04c3fSmrg c &= 0x3F; 34601e04c3fSmrg d &= 0x3F; 34701e04c3fSmrg 34801e04c3fSmrg if (partitioncount < 4) 34901e04c3fSmrg d = 0; 35001e04c3fSmrg if (partitioncount < 3) 35101e04c3fSmrg c = 0; 35201e04c3fSmrg 35301e04c3fSmrg if (a >= b && a >= c && a >= d) 35401e04c3fSmrg return 0; 35501e04c3fSmrg else if (b >= c && b >= d) 35601e04c3fSmrg return 1; 35701e04c3fSmrg else if (c >= d) 35801e04c3fSmrg return 2; 35901e04c3fSmrg else 36001e04c3fSmrg return 3; 36101e04c3fSmrg} 36201e04c3fSmrg 36301e04c3fSmrg 36401e04c3fSmrgstruct InputBitVector 36501e04c3fSmrg{ 36601e04c3fSmrg uint32_t data[4]; 36701e04c3fSmrg 36801e04c3fSmrg void printf_bits(int offset, int count, const char *fmt = "", ...) 36901e04c3fSmrg { 37001e04c3fSmrg char out[129]; 37101e04c3fSmrg memset(out, '.', 128); 37201e04c3fSmrg out[128] = '\0'; 37301e04c3fSmrg int idx = offset; 37401e04c3fSmrg for (int i = 0; i < count; ++i) { 37501e04c3fSmrg out[127 - idx] = ((data[idx >> 5] >> (idx & 31)) & 1) ? '1' : '0'; 37601e04c3fSmrg ++idx; 37701e04c3fSmrg } 37801e04c3fSmrg printf("%s ", out); 37901e04c3fSmrg va_list ap; 38001e04c3fSmrg va_start(ap, fmt); 38101e04c3fSmrg vprintf(fmt, ap); 38201e04c3fSmrg va_end(ap); 38301e04c3fSmrg printf("\n"); 38401e04c3fSmrg } 38501e04c3fSmrg 38601e04c3fSmrg uint32_t get_bits(int offset, int count) 38701e04c3fSmrg { 38801e04c3fSmrg assert(count >= 0 && count < 32); 38901e04c3fSmrg 39001e04c3fSmrg uint32_t out = 0; 39101e04c3fSmrg if (offset < 32) 39201e04c3fSmrg out |= data[0] >> offset; 39301e04c3fSmrg 39401e04c3fSmrg if (0 < offset && offset <= 32) 39501e04c3fSmrg out |= data[1] << (32 - offset); 39601e04c3fSmrg if (32 < offset && offset < 64) 39701e04c3fSmrg out |= data[1] >> (offset - 32); 39801e04c3fSmrg 39901e04c3fSmrg if (32 < offset && offset <= 64) 40001e04c3fSmrg out |= data[2] << (64 - offset); 40101e04c3fSmrg if (64 < offset && offset < 96) 40201e04c3fSmrg out |= data[2] >> (offset - 64); 40301e04c3fSmrg 40401e04c3fSmrg if (64 < offset && offset <= 96) 40501e04c3fSmrg out |= data[3] << (96 - offset); 40601e04c3fSmrg if (96 < offset && offset < 128) 40701e04c3fSmrg out |= data[3] >> (offset - 96); 40801e04c3fSmrg 40901e04c3fSmrg out &= (1 << count) - 1; 41001e04c3fSmrg return out; 41101e04c3fSmrg } 41201e04c3fSmrg 41301e04c3fSmrg uint64_t get_bits64(int offset, int count) 41401e04c3fSmrg { 41501e04c3fSmrg assert(count >= 0 && count < 64); 41601e04c3fSmrg 41701e04c3fSmrg uint64_t out = 0; 41801e04c3fSmrg if (offset < 32) 41901e04c3fSmrg out |= data[0] >> offset; 42001e04c3fSmrg 42101e04c3fSmrg if (offset <= 32) 42201e04c3fSmrg out |= (uint64_t)data[1] << (32 - offset); 42301e04c3fSmrg if (32 < offset && offset < 64) 42401e04c3fSmrg out |= data[1] >> (offset - 32); 42501e04c3fSmrg 42601e04c3fSmrg if (0 < offset && offset <= 64) 42701e04c3fSmrg out |= (uint64_t)data[2] << (64 - offset); 42801e04c3fSmrg if (64 < offset && offset < 96) 42901e04c3fSmrg out |= data[2] >> (offset - 64); 43001e04c3fSmrg 43101e04c3fSmrg if (32 < offset && offset <= 96) 43201e04c3fSmrg out |= (uint64_t)data[3] << (96 - offset); 43301e04c3fSmrg if (96 < offset && offset < 128) 43401e04c3fSmrg out |= data[3] >> (offset - 96); 43501e04c3fSmrg 43601e04c3fSmrg out &= ((uint64_t)1 << count) - 1; 43701e04c3fSmrg return out; 43801e04c3fSmrg } 43901e04c3fSmrg 44001e04c3fSmrg uint32_t get_bits_rev(int offset, int count) 44101e04c3fSmrg { 44201e04c3fSmrg assert(offset >= count); 44301e04c3fSmrg uint32_t tmp = get_bits(offset - count, count); 44401e04c3fSmrg uint32_t out = 0; 44501e04c3fSmrg for (int i = 0; i < count; ++i) 44601e04c3fSmrg out |= ((tmp >> i) & 1) << (count - 1 - i); 44701e04c3fSmrg return out; 44801e04c3fSmrg } 44901e04c3fSmrg}; 45001e04c3fSmrg 45101e04c3fSmrgstruct OutputBitVector 45201e04c3fSmrg{ 45301e04c3fSmrg uint32_t data[4]; 45401e04c3fSmrg int offset; 45501e04c3fSmrg 45601e04c3fSmrg OutputBitVector() 45701e04c3fSmrg : offset(0) 45801e04c3fSmrg { 45901e04c3fSmrg memset(data, 0, sizeof(data)); 46001e04c3fSmrg } 46101e04c3fSmrg 46201e04c3fSmrg void append(uint32_t value, int size) 46301e04c3fSmrg { 46401e04c3fSmrg if (VERBOSE_WRITE) 46501e04c3fSmrg printf("append offset=%d size=%d values=0x%x\n", offset, size, value); 46601e04c3fSmrg 46701e04c3fSmrg assert(offset + size <= 128); 46801e04c3fSmrg 46901e04c3fSmrg assert(size <= 32); 47001e04c3fSmrg if (size < 32) 47101e04c3fSmrg assert((value >> size) == 0); 47201e04c3fSmrg 47301e04c3fSmrg while (size) { 47401e04c3fSmrg int c = MIN2(size, 32 - (offset & 31)); 47501e04c3fSmrg data[offset >> 5] |= (value << (offset & 31)); 47601e04c3fSmrg offset += c; 47701e04c3fSmrg size -= c; 47801e04c3fSmrg value >>= c; 47901e04c3fSmrg } 48001e04c3fSmrg } 48101e04c3fSmrg 48201e04c3fSmrg void append64(uint64_t value, int size) 48301e04c3fSmrg { 48401e04c3fSmrg if (VERBOSE_WRITE) 48501e04c3fSmrg printf("append offset=%d size=%d values=0x%llx\n", offset, size, (unsigned long long)value); 48601e04c3fSmrg 48701e04c3fSmrg assert(offset + size <= 128); 48801e04c3fSmrg 48901e04c3fSmrg assert(size <= 64); 49001e04c3fSmrg if (size < 64) 49101e04c3fSmrg assert((value >> size) == 0); 49201e04c3fSmrg 49301e04c3fSmrg while (size) { 49401e04c3fSmrg int c = MIN2(size, 32 - (offset & 31)); 49501e04c3fSmrg data[offset >> 5] |= (value << (offset & 31)); 49601e04c3fSmrg offset += c; 49701e04c3fSmrg size -= c; 49801e04c3fSmrg value >>= c; 49901e04c3fSmrg } 50001e04c3fSmrg } 50101e04c3fSmrg 50201e04c3fSmrg void append(OutputBitVector &v, int size) 50301e04c3fSmrg { 50401e04c3fSmrg if (VERBOSE_WRITE) 50501e04c3fSmrg printf("append vector offset=%d size=%d\n", offset, size); 50601e04c3fSmrg 50701e04c3fSmrg assert(offset + size <= 128); 50801e04c3fSmrg int i = 0; 50901e04c3fSmrg while (size >= 32) { 51001e04c3fSmrg append(v.data[i++], 32); 51101e04c3fSmrg size -= 32; 51201e04c3fSmrg } 51301e04c3fSmrg if (size > 0) 51401e04c3fSmrg append(v.data[i] & ((1 << size) - 1), size); 51501e04c3fSmrg } 51601e04c3fSmrg 51701e04c3fSmrg void append_end(OutputBitVector &v, int size) 51801e04c3fSmrg { 51901e04c3fSmrg for (int i = 0; i < size; ++i) 52001e04c3fSmrg data[(127 - i) >> 5] |= ((v.data[i >> 5] >> (i & 31)) & 1) << ((127 - i) & 31); 52101e04c3fSmrg } 52201e04c3fSmrg 52301e04c3fSmrg /* Insert the given number of '1' bits. (We could use 0s instead, but 1s are 52401e04c3fSmrg * more likely to flush out bugs where we accidentally read undefined bits.) 52501e04c3fSmrg */ 52601e04c3fSmrg void skip(int size) 52701e04c3fSmrg { 52801e04c3fSmrg if (VERBOSE_WRITE) 52901e04c3fSmrg printf("skip offset=%d size=%d\n", offset, size); 53001e04c3fSmrg 53101e04c3fSmrg assert(offset + size <= 128); 53201e04c3fSmrg while (size >= 32) { 53301e04c3fSmrg append(0xffffffff, 32); 53401e04c3fSmrg size -= 32; 53501e04c3fSmrg } 53601e04c3fSmrg if (size > 0) 53701e04c3fSmrg append(0xffffffff >> (32 - size), size); 53801e04c3fSmrg } 53901e04c3fSmrg}; 54001e04c3fSmrg 54101e04c3fSmrg 54201e04c3fSmrgclass Decoder 54301e04c3fSmrg{ 54401e04c3fSmrgpublic: 54501e04c3fSmrg Decoder(int block_w, int block_h, int block_d, bool srgb, bool output_unorm8) 54601e04c3fSmrg : block_w(block_w), block_h(block_h), block_d(block_d), srgb(srgb), 54701e04c3fSmrg output_unorm8(output_unorm8) {} 54801e04c3fSmrg 54901e04c3fSmrg decode_error::type decode(const uint8_t *in, uint16_t *output) const; 55001e04c3fSmrg 55101e04c3fSmrg int block_w, block_h, block_d; 55201e04c3fSmrg bool srgb, output_unorm8; 55301e04c3fSmrg}; 55401e04c3fSmrg 55501e04c3fSmrgstruct Block 55601e04c3fSmrg{ 55701e04c3fSmrg bool is_error; 55801e04c3fSmrg bool bogus_colour_endpoints; 55901e04c3fSmrg bool bogus_weights; 56001e04c3fSmrg 56101e04c3fSmrg int high_prec; 56201e04c3fSmrg int dual_plane; 56301e04c3fSmrg int colour_component_selector; 56401e04c3fSmrg int wt_range; 56501e04c3fSmrg int wt_w, wt_h, wt_d; 56601e04c3fSmrg int num_parts; 56701e04c3fSmrg int partition_index; 56801e04c3fSmrg 56901e04c3fSmrg bool is_void_extent; 57001e04c3fSmrg int void_extent_d; 57101e04c3fSmrg int void_extent_min_s; 57201e04c3fSmrg int void_extent_max_s; 57301e04c3fSmrg int void_extent_min_t; 57401e04c3fSmrg int void_extent_max_t; 57501e04c3fSmrg uint16_t void_extent_colour_r; 57601e04c3fSmrg uint16_t void_extent_colour_g; 57701e04c3fSmrg uint16_t void_extent_colour_b; 57801e04c3fSmrg uint16_t void_extent_colour_a; 57901e04c3fSmrg 58001e04c3fSmrg bool is_multi_cem; 58101e04c3fSmrg int num_extra_cem_bits; 58201e04c3fSmrg int colour_endpoint_data_offset; 58301e04c3fSmrg int extra_cem_bits; 58401e04c3fSmrg int cem_base_class; 58501e04c3fSmrg int cems[4]; 58601e04c3fSmrg 58701e04c3fSmrg int num_cem_values; 58801e04c3fSmrg 58901e04c3fSmrg /* Calculated by unpack_weights(): */ 59001e04c3fSmrg uint8_t weights_quant[64 + 4]; /* max 64 values, plus padding for overflows in trit parsing */ 59101e04c3fSmrg 59201e04c3fSmrg /* Calculated by unquantise_weights(): */ 59301e04c3fSmrg uint8_t weights[64 + 18]; /* max 64 values, plus padding for the infill interpolation */ 59401e04c3fSmrg 59501e04c3fSmrg /* Calculated by unpack_colour_endpoints(): */ 59601e04c3fSmrg uint8_t colour_endpoints_quant[18 + 4]; /* max 18 values, plus padding for overflows in trit parsing */ 59701e04c3fSmrg 59801e04c3fSmrg /* Calculated by unquantise_colour_endpoints(): */ 59901e04c3fSmrg uint8_t colour_endpoints[18]; 60001e04c3fSmrg 60101e04c3fSmrg /* Calculated by calculate_from_weights(): */ 60201e04c3fSmrg int wt_trits; 60301e04c3fSmrg int wt_quints; 60401e04c3fSmrg int wt_bits; 60501e04c3fSmrg int wt_max; 60601e04c3fSmrg int num_weights; 60701e04c3fSmrg int weight_bits; 60801e04c3fSmrg 60901e04c3fSmrg /* Calculated by calculate_remaining_bits(): */ 61001e04c3fSmrg int remaining_bits; 61101e04c3fSmrg 61201e04c3fSmrg /* Calculated by calculate_colour_endpoints_size(): */ 61301e04c3fSmrg int colour_endpoint_bits; 61401e04c3fSmrg int ce_max; 61501e04c3fSmrg int ce_trits; 61601e04c3fSmrg int ce_quints; 61701e04c3fSmrg int ce_bits; 61801e04c3fSmrg 61901e04c3fSmrg /* Calculated by compute_infill_weights(); */ 62001e04c3fSmrg uint8_t infill_weights[2][216]; /* large enough for 6x6x6 */ 62101e04c3fSmrg 62201e04c3fSmrg /* Calculated by decode_colour_endpoints(); */ 62301e04c3fSmrg uint8x4_t endpoints_decoded[2][4]; 62401e04c3fSmrg 62501e04c3fSmrg void calculate_from_weights(); 62601e04c3fSmrg void calculate_remaining_bits(); 62701e04c3fSmrg decode_error::type calculate_colour_endpoints_size(); 62801e04c3fSmrg 62901e04c3fSmrg void unquantise_weights(); 63001e04c3fSmrg void unquantise_colour_endpoints(); 63101e04c3fSmrg 63201e04c3fSmrg decode_error::type decode(const Decoder &decoder, InputBitVector in); 63301e04c3fSmrg 63401e04c3fSmrg decode_error::type decode_block_mode(InputBitVector in); 63501e04c3fSmrg decode_error::type decode_void_extent(InputBitVector in); 63601e04c3fSmrg void decode_cem(InputBitVector in); 63701e04c3fSmrg void unpack_colour_endpoints(InputBitVector in); 63801e04c3fSmrg void decode_colour_endpoints(); 63901e04c3fSmrg void unpack_weights(InputBitVector in); 64001e04c3fSmrg void compute_infill_weights(int block_w, int block_h, int block_d); 64101e04c3fSmrg 64201e04c3fSmrg void write_decoded(const Decoder &decoder, uint16_t *output); 64301e04c3fSmrg}; 64401e04c3fSmrg 64501e04c3fSmrg 64601e04c3fSmrgdecode_error::type Decoder::decode(const uint8_t *in, uint16_t *output) const 64701e04c3fSmrg{ 64801e04c3fSmrg Block blk; 64901e04c3fSmrg InputBitVector in_vec; 65001e04c3fSmrg memcpy(&in_vec.data, in, 16); 65101e04c3fSmrg decode_error::type err = blk.decode(*this, in_vec); 65201e04c3fSmrg if (err == decode_error::ok) { 65301e04c3fSmrg blk.write_decoded(*this, output); 65401e04c3fSmrg } else { 65501e04c3fSmrg /* Fill output with the error colour */ 65601e04c3fSmrg for (int i = 0; i < block_w * block_h * block_d; ++i) { 65701e04c3fSmrg if (output_unorm8) { 65801e04c3fSmrg output[i*4+0] = 0xff; 65901e04c3fSmrg output[i*4+1] = 0; 66001e04c3fSmrg output[i*4+2] = 0xff; 66101e04c3fSmrg output[i*4+3] = 0xff; 66201e04c3fSmrg } else { 66301e04c3fSmrg assert(!srgb); /* srgb must use unorm8 */ 66401e04c3fSmrg 66501e04c3fSmrg output[i*4+0] = FP16_ONE; 66601e04c3fSmrg output[i*4+1] = FP16_ZERO; 66701e04c3fSmrg output[i*4+2] = FP16_ONE; 66801e04c3fSmrg output[i*4+3] = FP16_ONE; 66901e04c3fSmrg } 67001e04c3fSmrg } 67101e04c3fSmrg } 67201e04c3fSmrg return err; 67301e04c3fSmrg} 67401e04c3fSmrg 67501e04c3fSmrg 67601e04c3fSmrgdecode_error::type Block::decode_void_extent(InputBitVector block) 67701e04c3fSmrg{ 67801e04c3fSmrg /* TODO: 3D */ 67901e04c3fSmrg 68001e04c3fSmrg is_void_extent = true; 68101e04c3fSmrg void_extent_d = block.get_bits(9, 1); 68201e04c3fSmrg void_extent_min_s = block.get_bits(12, 13); 68301e04c3fSmrg void_extent_max_s = block.get_bits(25, 13); 68401e04c3fSmrg void_extent_min_t = block.get_bits(38, 13); 68501e04c3fSmrg void_extent_max_t = block.get_bits(51, 13); 68601e04c3fSmrg void_extent_colour_r = block.get_bits(64, 16); 68701e04c3fSmrg void_extent_colour_g = block.get_bits(80, 16); 68801e04c3fSmrg void_extent_colour_b = block.get_bits(96, 16); 68901e04c3fSmrg void_extent_colour_a = block.get_bits(112, 16); 69001e04c3fSmrg 69101e04c3fSmrg /* TODO: maybe we should do something useful with the extent coordinates? */ 69201e04c3fSmrg 69301e04c3fSmrg if (void_extent_d) { 69401e04c3fSmrg return decode_error::unsupported_hdr_void_extent; 69501e04c3fSmrg } 69601e04c3fSmrg 69701e04c3fSmrg if (void_extent_min_s == 0x1fff && void_extent_max_s == 0x1fff 69801e04c3fSmrg && void_extent_min_t == 0x1fff && void_extent_max_t == 0x1fff) { 69901e04c3fSmrg 70001e04c3fSmrg /* No extents */ 70101e04c3fSmrg 70201e04c3fSmrg } else { 70301e04c3fSmrg 70401e04c3fSmrg /* Check for illegal encoding */ 70501e04c3fSmrg if (void_extent_min_s >= void_extent_max_s || void_extent_min_t >= void_extent_max_t) { 70601e04c3fSmrg return decode_error::invalid_range_in_void_extent; 70701e04c3fSmrg } 70801e04c3fSmrg } 70901e04c3fSmrg 71001e04c3fSmrg return decode_error::ok; 71101e04c3fSmrg} 71201e04c3fSmrg 71301e04c3fSmrgdecode_error::type Block::decode_block_mode(InputBitVector in) 71401e04c3fSmrg{ 71501e04c3fSmrg dual_plane = in.get_bits(10, 1); 71601e04c3fSmrg high_prec = in.get_bits(9, 1); 71701e04c3fSmrg 71801e04c3fSmrg if (in.get_bits(0, 2) != 0x0) { 71901e04c3fSmrg wt_range = (in.get_bits(0, 2) << 1) | in.get_bits(4, 1); 72001e04c3fSmrg int a = in.get_bits(5, 2); 72101e04c3fSmrg int b = in.get_bits(7, 2); 72201e04c3fSmrg switch (in.get_bits(2, 2)) { 72301e04c3fSmrg case 0x0: 72401e04c3fSmrg if (VERBOSE_DECODE) 72501e04c3fSmrg in.printf_bits(0, 11, "DHBBAAR00RR"); 72601e04c3fSmrg wt_w = b + 4; 72701e04c3fSmrg wt_h = a + 2; 72801e04c3fSmrg break; 72901e04c3fSmrg case 0x1: 73001e04c3fSmrg if (VERBOSE_DECODE) 73101e04c3fSmrg in.printf_bits(0, 11, "DHBBAAR01RR"); 73201e04c3fSmrg wt_w = b + 8; 73301e04c3fSmrg wt_h = a + 2; 73401e04c3fSmrg break; 73501e04c3fSmrg case 0x2: 73601e04c3fSmrg if (VERBOSE_DECODE) 73701e04c3fSmrg in.printf_bits(0, 11, "DHBBAAR10RR"); 73801e04c3fSmrg wt_w = a + 2; 73901e04c3fSmrg wt_h = b + 8; 74001e04c3fSmrg break; 74101e04c3fSmrg case 0x3: 74201e04c3fSmrg if ((b & 0x2) == 0) { 74301e04c3fSmrg if (VERBOSE_DECODE) 74401e04c3fSmrg in.printf_bits(0, 11, "DH0BAAR11RR"); 74501e04c3fSmrg wt_w = a + 2; 74601e04c3fSmrg wt_h = b + 6; 74701e04c3fSmrg } else { 74801e04c3fSmrg if (VERBOSE_DECODE) 74901e04c3fSmrg in.printf_bits(0, 11, "DH1BAAR11RR"); 75001e04c3fSmrg wt_w = (b & 0x1) + 2; 75101e04c3fSmrg wt_h = a + 2; 75201e04c3fSmrg } 75301e04c3fSmrg break; 75401e04c3fSmrg } 75501e04c3fSmrg } else { 75601e04c3fSmrg if (in.get_bits(6, 3) == 0x7) { 75701e04c3fSmrg if (in.get_bits(0, 9) == 0x1fc) { 75801e04c3fSmrg if (VERBOSE_DECODE) 75901e04c3fSmrg in.printf_bits(0, 11, "xx111111100 (void extent)"); 76001e04c3fSmrg return decode_void_extent(in); 76101e04c3fSmrg } else { 76201e04c3fSmrg if (VERBOSE_DECODE) 76301e04c3fSmrg in.printf_bits(0, 11, "xx111xxxx00"); 76401e04c3fSmrg return decode_error::reserved_block_mode_1; 76501e04c3fSmrg } 76601e04c3fSmrg } 76701e04c3fSmrg if (in.get_bits(0, 4) == 0x0) { 76801e04c3fSmrg if (VERBOSE_DECODE) 76901e04c3fSmrg in.printf_bits(0, 11, "xxxxxxx0000"); 77001e04c3fSmrg return decode_error::reserved_block_mode_2; 77101e04c3fSmrg } 77201e04c3fSmrg 77301e04c3fSmrg wt_range = in.get_bits(1, 3) | in.get_bits(4, 1); 77401e04c3fSmrg int a = in.get_bits(5, 2); 77501e04c3fSmrg int b; 77601e04c3fSmrg 77701e04c3fSmrg switch (in.get_bits(7, 2)) { 77801e04c3fSmrg case 0x0: 77901e04c3fSmrg if (VERBOSE_DECODE) 78001e04c3fSmrg in.printf_bits(0, 11, "DH00AARRR00"); 78101e04c3fSmrg wt_w = 12; 78201e04c3fSmrg wt_h = a + 2; 78301e04c3fSmrg break; 78401e04c3fSmrg case 0x1: 78501e04c3fSmrg if (VERBOSE_DECODE) 78601e04c3fSmrg in.printf_bits(0, 11, "DH01AARRR00"); 78701e04c3fSmrg wt_w = a + 2; 78801e04c3fSmrg wt_h = 12; 78901e04c3fSmrg break; 79001e04c3fSmrg case 0x3: 79101e04c3fSmrg if (in.get_bits(5, 1) == 0) { 79201e04c3fSmrg if (VERBOSE_DECODE) 79301e04c3fSmrg in.printf_bits(0, 11, "DH1100RRR00"); 79401e04c3fSmrg wt_w = 6; 79501e04c3fSmrg wt_h = 10; 79601e04c3fSmrg } else { 79701e04c3fSmrg if (VERBOSE_DECODE) 79801e04c3fSmrg in.printf_bits(0, 11, "DH1101RRR00"); 79901e04c3fSmrg wt_w = 10; 80001e04c3fSmrg wt_h = 6; 80101e04c3fSmrg } 80201e04c3fSmrg break; 80301e04c3fSmrg case 0x2: 80401e04c3fSmrg if (VERBOSE_DECODE) 80501e04c3fSmrg in.printf_bits(0, 11, "BB10AARRR00"); 80601e04c3fSmrg b = in.get_bits(9, 2); 80701e04c3fSmrg wt_w = a + 6; 80801e04c3fSmrg wt_h = b + 6; 80901e04c3fSmrg dual_plane = 0; 81001e04c3fSmrg high_prec = 0; 81101e04c3fSmrg break; 81201e04c3fSmrg } 81301e04c3fSmrg } 81401e04c3fSmrg return decode_error::ok; 81501e04c3fSmrg} 81601e04c3fSmrg 81701e04c3fSmrgvoid Block::decode_cem(InputBitVector in) 81801e04c3fSmrg{ 81901e04c3fSmrg cems[0] = cems[1] = cems[2] = cems[3] = -1; 82001e04c3fSmrg 82101e04c3fSmrg num_extra_cem_bits = 0; 82201e04c3fSmrg extra_cem_bits = 0; 82301e04c3fSmrg 82401e04c3fSmrg if (num_parts > 1) { 82501e04c3fSmrg 82601e04c3fSmrg partition_index = in.get_bits(13, 10); 82701e04c3fSmrg if (VERBOSE_DECODE) 82801e04c3fSmrg in.printf_bits(13, 10, "partition ID (%d)", partition_index); 82901e04c3fSmrg 83001e04c3fSmrg uint32_t cem = in.get_bits(23, 6); 83101e04c3fSmrg 83201e04c3fSmrg if ((cem & 0x3) == 0x0) { 83301e04c3fSmrg cem >>= 2; 83401e04c3fSmrg cem_base_class = cem >> 2; 83501e04c3fSmrg is_multi_cem = false; 83601e04c3fSmrg 83701e04c3fSmrg for (int i = 0; i < num_parts; ++i) 83801e04c3fSmrg cems[i] = cem; 83901e04c3fSmrg 84001e04c3fSmrg if (VERBOSE_DECODE) 84101e04c3fSmrg in.printf_bits(23, 6, "CEM (single, %d)", cem); 84201e04c3fSmrg } else { 84301e04c3fSmrg 84401e04c3fSmrg cem_base_class = (cem & 0x3) - 1; 84501e04c3fSmrg is_multi_cem = true; 84601e04c3fSmrg 84701e04c3fSmrg if (VERBOSE_DECODE) 84801e04c3fSmrg in.printf_bits(23, 6, "CEM (multi, base class %d)", cem_base_class); 84901e04c3fSmrg 85001e04c3fSmrg int offset = 128 - weight_bits; 85101e04c3fSmrg 85201e04c3fSmrg if (num_parts == 2) { 85301e04c3fSmrg if (VERBOSE_DECODE) { 85401e04c3fSmrg in.printf_bits(25, 4, "M0M0 C1 C0"); 85501e04c3fSmrg in.printf_bits(offset - 2, 2, "M1M1"); 85601e04c3fSmrg } 85701e04c3fSmrg 85801e04c3fSmrg uint32_t c0 = in.get_bits(25, 1); 85901e04c3fSmrg uint32_t c1 = in.get_bits(26, 1); 86001e04c3fSmrg 86101e04c3fSmrg extra_cem_bits = c0 + c1; 86201e04c3fSmrg 86301e04c3fSmrg num_extra_cem_bits = 2; 86401e04c3fSmrg 86501e04c3fSmrg uint32_t m0 = in.get_bits(27, 2); 86601e04c3fSmrg uint32_t m1 = in.get_bits(offset - 2, 2); 86701e04c3fSmrg 86801e04c3fSmrg cems[0] = ((cem_base_class + c0) << 2) | m0; 86901e04c3fSmrg cems[1] = ((cem_base_class + c1) << 2) | m1; 87001e04c3fSmrg 87101e04c3fSmrg } else if (num_parts == 3) { 87201e04c3fSmrg if (VERBOSE_DECODE) { 87301e04c3fSmrg in.printf_bits(25, 4, "M0 C2 C1 C0"); 87401e04c3fSmrg in.printf_bits(offset - 5, 5, "M2M2 M1M1 M0"); 87501e04c3fSmrg } 87601e04c3fSmrg 87701e04c3fSmrg uint32_t c0 = in.get_bits(25, 1); 87801e04c3fSmrg uint32_t c1 = in.get_bits(26, 1); 87901e04c3fSmrg uint32_t c2 = in.get_bits(27, 1); 88001e04c3fSmrg 88101e04c3fSmrg extra_cem_bits = c0 + c1 + c2; 88201e04c3fSmrg 88301e04c3fSmrg num_extra_cem_bits = 5; 88401e04c3fSmrg 88501e04c3fSmrg uint32_t m0 = in.get_bits(28, 1) | (in.get_bits(128 - weight_bits - 5, 1) << 1); 88601e04c3fSmrg uint32_t m1 = in.get_bits(offset - 4, 2); 88701e04c3fSmrg uint32_t m2 = in.get_bits(offset - 2, 2); 88801e04c3fSmrg 88901e04c3fSmrg cems[0] = ((cem_base_class + c0) << 2) | m0; 89001e04c3fSmrg cems[1] = ((cem_base_class + c1) << 2) | m1; 89101e04c3fSmrg cems[2] = ((cem_base_class + c2) << 2) | m2; 89201e04c3fSmrg 89301e04c3fSmrg } else if (num_parts == 4) { 89401e04c3fSmrg if (VERBOSE_DECODE) { 89501e04c3fSmrg in.printf_bits(25, 4, "C3 C2 C1 C0"); 89601e04c3fSmrg in.printf_bits(offset - 8, 8, "M3M3 M2M2 M1M1 M0M0"); 89701e04c3fSmrg } 89801e04c3fSmrg 89901e04c3fSmrg uint32_t c0 = in.get_bits(25, 1); 90001e04c3fSmrg uint32_t c1 = in.get_bits(26, 1); 90101e04c3fSmrg uint32_t c2 = in.get_bits(27, 1); 90201e04c3fSmrg uint32_t c3 = in.get_bits(28, 1); 90301e04c3fSmrg 90401e04c3fSmrg extra_cem_bits = c0 + c1 + c2 + c3; 90501e04c3fSmrg 90601e04c3fSmrg num_extra_cem_bits = 8; 90701e04c3fSmrg 90801e04c3fSmrg uint32_t m0 = in.get_bits(offset - 8, 2); 90901e04c3fSmrg uint32_t m1 = in.get_bits(offset - 6, 2); 91001e04c3fSmrg uint32_t m2 = in.get_bits(offset - 4, 2); 91101e04c3fSmrg uint32_t m3 = in.get_bits(offset - 2, 2); 91201e04c3fSmrg 91301e04c3fSmrg cems[0] = ((cem_base_class + c0) << 2) | m0; 91401e04c3fSmrg cems[1] = ((cem_base_class + c1) << 2) | m1; 91501e04c3fSmrg cems[2] = ((cem_base_class + c2) << 2) | m2; 91601e04c3fSmrg cems[3] = ((cem_base_class + c3) << 2) | m3; 91701e04c3fSmrg } else { 91801e04c3fSmrg unreachable(""); 91901e04c3fSmrg } 92001e04c3fSmrg } 92101e04c3fSmrg 92201e04c3fSmrg colour_endpoint_data_offset = 29; 92301e04c3fSmrg 92401e04c3fSmrg } else { 92501e04c3fSmrg uint32_t cem = in.get_bits(13, 4); 92601e04c3fSmrg 92701e04c3fSmrg cem_base_class = cem >> 2; 92801e04c3fSmrg is_multi_cem = false; 92901e04c3fSmrg 93001e04c3fSmrg cems[0] = cem; 93101e04c3fSmrg 93201e04c3fSmrg partition_index = -1; 93301e04c3fSmrg 93401e04c3fSmrg if (VERBOSE_DECODE) 93501e04c3fSmrg in.printf_bits(13, 4, "CEM = %d (class %d)", cem, cem_base_class); 93601e04c3fSmrg 93701e04c3fSmrg colour_endpoint_data_offset = 17; 93801e04c3fSmrg } 93901e04c3fSmrg} 94001e04c3fSmrg 94101e04c3fSmrgvoid Block::unpack_colour_endpoints(InputBitVector in) 94201e04c3fSmrg{ 94301e04c3fSmrg if (ce_trits) { 94401e04c3fSmrg int offset = colour_endpoint_data_offset; 94501e04c3fSmrg int bits_left = colour_endpoint_bits; 94601e04c3fSmrg for (int i = 0; i < num_cem_values; i += 5) { 94701e04c3fSmrg int bits_to_read = MIN2(bits_left, 8 + ce_bits * 5); 94801e04c3fSmrg /* If ce_trits then ce_bits <= 6, so bits_to_read <= 38 and we have to use uint64_t */ 94901e04c3fSmrg uint64_t raw = in.get_bits64(offset, bits_to_read); 95001e04c3fSmrg unpack_trit_block(ce_bits, raw, &colour_endpoints_quant[i]); 95101e04c3fSmrg 95201e04c3fSmrg if (VERBOSE_DECODE) 95301e04c3fSmrg in.printf_bits(offset, bits_to_read, 95401e04c3fSmrg "trits [%d,%d,%d,%d,%d]", 95501e04c3fSmrg colour_endpoints_quant[i+0], colour_endpoints_quant[i+1], 95601e04c3fSmrg colour_endpoints_quant[i+2], colour_endpoints_quant[i+3], 95701e04c3fSmrg colour_endpoints_quant[i+4]); 95801e04c3fSmrg 95901e04c3fSmrg offset += 8 + ce_bits * 5; 96001e04c3fSmrg bits_left -= 8 + ce_bits * 5; 96101e04c3fSmrg } 96201e04c3fSmrg } else if (ce_quints) { 96301e04c3fSmrg int offset = colour_endpoint_data_offset; 96401e04c3fSmrg int bits_left = colour_endpoint_bits; 96501e04c3fSmrg for (int i = 0; i < num_cem_values; i += 3) { 96601e04c3fSmrg int bits_to_read = MIN2(bits_left, 7 + ce_bits * 3); 96701e04c3fSmrg /* If ce_quints then ce_bits <= 5, so bits_to_read <= 22 and we can use uint32_t */ 96801e04c3fSmrg uint32_t raw = in.get_bits(offset, bits_to_read); 96901e04c3fSmrg unpack_quint_block(ce_bits, raw, &colour_endpoints_quant[i]); 97001e04c3fSmrg 97101e04c3fSmrg if (VERBOSE_DECODE) 97201e04c3fSmrg in.printf_bits(offset, bits_to_read, 97301e04c3fSmrg "quints [%d,%d,%d]", 97401e04c3fSmrg colour_endpoints_quant[i], colour_endpoints_quant[i+1], colour_endpoints_quant[i+2]); 97501e04c3fSmrg 97601e04c3fSmrg offset += 7 + ce_bits * 3; 97701e04c3fSmrg bits_left -= 7 + ce_bits * 3; 97801e04c3fSmrg } 97901e04c3fSmrg } else { 98001e04c3fSmrg assert((colour_endpoint_bits % ce_bits) == 0); 98101e04c3fSmrg int offset = colour_endpoint_data_offset; 98201e04c3fSmrg for (int i = 0; i < num_cem_values; i++) { 98301e04c3fSmrg colour_endpoints_quant[i] = in.get_bits(offset, ce_bits); 98401e04c3fSmrg 98501e04c3fSmrg if (VERBOSE_DECODE) 98601e04c3fSmrg in.printf_bits(offset, ce_bits, "bits [%d]", colour_endpoints_quant[i]); 98701e04c3fSmrg 98801e04c3fSmrg offset += ce_bits; 98901e04c3fSmrg } 99001e04c3fSmrg } 99101e04c3fSmrg} 99201e04c3fSmrg 99301e04c3fSmrgvoid Block::decode_colour_endpoints() 99401e04c3fSmrg{ 99501e04c3fSmrg int cem_values_idx = 0; 99601e04c3fSmrg for (int part = 0; part < num_parts; ++part) { 99701e04c3fSmrg uint8_t *v = &colour_endpoints[cem_values_idx]; 99801e04c3fSmrg int v0 = v[0]; 99901e04c3fSmrg int v1 = v[1]; 100001e04c3fSmrg int v2 = v[2]; 100101e04c3fSmrg int v3 = v[3]; 100201e04c3fSmrg int v4 = v[4]; 100301e04c3fSmrg int v5 = v[5]; 100401e04c3fSmrg int v6 = v[6]; 100501e04c3fSmrg int v7 = v[7]; 100601e04c3fSmrg cem_values_idx += ((cems[part] >> 2) + 1) * 2; 100701e04c3fSmrg 100801e04c3fSmrg uint8x4_t e0, e1; 100901e04c3fSmrg int s0, s1, L0, L1; 101001e04c3fSmrg 101101e04c3fSmrg switch (cems[part]) 101201e04c3fSmrg { 101301e04c3fSmrg case 0: 101401e04c3fSmrg e0 = uint8x4_t(v0, v0, v0, 0xff); 101501e04c3fSmrg e1 = uint8x4_t(v1, v1, v1, 0xff); 101601e04c3fSmrg break; 101701e04c3fSmrg case 1: 101801e04c3fSmrg L0 = (v0 >> 2) | (v1 & 0xc0); 101901e04c3fSmrg L1 = L0 + (v1 & 0x3f); 102001e04c3fSmrg if (L1 > 0xff) 102101e04c3fSmrg L1 = 0xff; 102201e04c3fSmrg e0 = uint8x4_t(L0, L0, L0, 0xff); 102301e04c3fSmrg e1 = uint8x4_t(L1, L1, L1, 0xff); 102401e04c3fSmrg break; 102501e04c3fSmrg case 4: 102601e04c3fSmrg e0 = uint8x4_t(v0, v0, v0, v2); 102701e04c3fSmrg e1 = uint8x4_t(v1, v1, v1, v3); 102801e04c3fSmrg break; 102901e04c3fSmrg case 5: 103001e04c3fSmrg bit_transfer_signed(v1, v0); 103101e04c3fSmrg bit_transfer_signed(v3, v2); 103201e04c3fSmrg e0 = uint8x4_t(v0, v0, v0, v2); 103301e04c3fSmrg e1 = uint8x4_t::clamped(v0+v1, v0+v1, v0+v1, v2+v3); 103401e04c3fSmrg break; 103501e04c3fSmrg case 6: 103601e04c3fSmrg e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, 0xff); 103701e04c3fSmrg e1 = uint8x4_t(v0, v1, v2, 0xff); 103801e04c3fSmrg break; 103901e04c3fSmrg case 8: 104001e04c3fSmrg s0 = v0 + v2 + v4; 104101e04c3fSmrg s1 = v1 + v3 + v5; 104201e04c3fSmrg if (s1 >= s0) { 104301e04c3fSmrg e0 = uint8x4_t(v0, v2, v4, 0xff); 104401e04c3fSmrg e1 = uint8x4_t(v1, v3, v5, 0xff); 104501e04c3fSmrg } else { 104601e04c3fSmrg e0 = blue_contract(v1, v3, v5, 0xff); 104701e04c3fSmrg e1 = blue_contract(v0, v2, v4, 0xff); 104801e04c3fSmrg } 104901e04c3fSmrg break; 105001e04c3fSmrg case 9: 105101e04c3fSmrg bit_transfer_signed(v1, v0); 105201e04c3fSmrg bit_transfer_signed(v3, v2); 105301e04c3fSmrg bit_transfer_signed(v5, v4); 105401e04c3fSmrg if (v1 + v3 + v5 >= 0) { 105501e04c3fSmrg e0 = uint8x4_t(v0, v2, v4, 0xff); 105601e04c3fSmrg e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, 0xff); 105701e04c3fSmrg } else { 105801e04c3fSmrg e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, 0xff); 105901e04c3fSmrg e1 = blue_contract(v0, v2, v4, 0xff); 106001e04c3fSmrg } 106101e04c3fSmrg break; 106201e04c3fSmrg case 10: 106301e04c3fSmrg e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, v4); 106401e04c3fSmrg e1 = uint8x4_t(v0, v1, v2, v5); 106501e04c3fSmrg break; 106601e04c3fSmrg case 12: 106701e04c3fSmrg s0 = v0 + v2 + v4; 106801e04c3fSmrg s1 = v1 + v3 + v5; 106901e04c3fSmrg if (s1 >= s0) { 107001e04c3fSmrg e0 = uint8x4_t(v0, v2, v4, v6); 107101e04c3fSmrg e1 = uint8x4_t(v1, v3, v5, v7); 107201e04c3fSmrg } else { 107301e04c3fSmrg e0 = blue_contract(v1, v3, v5, v7); 107401e04c3fSmrg e1 = blue_contract(v0, v2, v4, v6); 107501e04c3fSmrg } 107601e04c3fSmrg break; 107701e04c3fSmrg case 13: 107801e04c3fSmrg bit_transfer_signed(v1, v0); 107901e04c3fSmrg bit_transfer_signed(v3, v2); 108001e04c3fSmrg bit_transfer_signed(v5, v4); 108101e04c3fSmrg bit_transfer_signed(v7, v6); 108201e04c3fSmrg if (v1 + v3 + v5 >= 0) { 108301e04c3fSmrg e0 = uint8x4_t(v0, v2, v4, v6); 108401e04c3fSmrg e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, v6+v7); 108501e04c3fSmrg } else { 108601e04c3fSmrg e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, v6+v7); 108701e04c3fSmrg e1 = blue_contract(v0, v2, v4, v6); 108801e04c3fSmrg } 108901e04c3fSmrg break; 109001e04c3fSmrg default: 109101e04c3fSmrg /* HDR endpoints not supported; return error colour */ 109201e04c3fSmrg e0 = uint8x4_t(255, 0, 255, 255); 109301e04c3fSmrg e1 = uint8x4_t(255, 0, 255, 255); 109401e04c3fSmrg break; 109501e04c3fSmrg } 109601e04c3fSmrg 109701e04c3fSmrg endpoints_decoded[0][part] = e0; 109801e04c3fSmrg endpoints_decoded[1][part] = e1; 109901e04c3fSmrg 110001e04c3fSmrg if (VERBOSE_DECODE) { 110101e04c3fSmrg printf("cems[%d]=%d v=[", part, cems[part]); 110201e04c3fSmrg for (int i = 0; i < (cems[part] >> 2) + 1; ++i) { 110301e04c3fSmrg if (i) 110401e04c3fSmrg printf(", "); 110501e04c3fSmrg printf("%3d", v[i]); 110601e04c3fSmrg } 110701e04c3fSmrg printf("] e0=[%3d,%4d,%4d,%4d] e1=[%3d,%4d,%4d,%4d]\n", 110801e04c3fSmrg e0.v[0], e0.v[1], e0.v[2], e0.v[3], 110901e04c3fSmrg e1.v[0], e1.v[1], e1.v[2], e1.v[3]); 111001e04c3fSmrg } 111101e04c3fSmrg } 111201e04c3fSmrg} 111301e04c3fSmrg 111401e04c3fSmrgvoid Block::unpack_weights(InputBitVector in) 111501e04c3fSmrg{ 111601e04c3fSmrg if (wt_trits) { 111701e04c3fSmrg int offset = 128; 111801e04c3fSmrg int bits_left = weight_bits; 111901e04c3fSmrg for (int i = 0; i < num_weights; i += 5) { 112001e04c3fSmrg int bits_to_read = MIN2(bits_left, 8 + 5*wt_bits); 112101e04c3fSmrg /* If wt_trits then wt_bits <= 3, so bits_to_read <= 23 and we can use uint32_t */ 112201e04c3fSmrg uint32_t raw = in.get_bits_rev(offset, bits_to_read); 112301e04c3fSmrg unpack_trit_block(wt_bits, raw, &weights_quant[i]); 112401e04c3fSmrg 112501e04c3fSmrg if (VERBOSE_DECODE) 112601e04c3fSmrg in.printf_bits(offset - bits_to_read, bits_to_read, "weight trits [%d,%d,%d,%d,%d]", 112701e04c3fSmrg weights_quant[i+0], weights_quant[i+1], 112801e04c3fSmrg weights_quant[i+2], weights_quant[i+3], 112901e04c3fSmrg weights_quant[i+4]); 113001e04c3fSmrg 113101e04c3fSmrg offset -= 8 + wt_bits * 5; 113201e04c3fSmrg bits_left -= 8 + wt_bits * 5; 113301e04c3fSmrg } 113401e04c3fSmrg 113501e04c3fSmrg } else if (wt_quints) { 113601e04c3fSmrg 113701e04c3fSmrg int offset = 128; 113801e04c3fSmrg int bits_left = weight_bits; 113901e04c3fSmrg for (int i = 0; i < num_weights; i += 3) { 114001e04c3fSmrg int bits_to_read = MIN2(bits_left, 7 + 3*wt_bits); 114101e04c3fSmrg /* If wt_quints then wt_bits <= 2, so bits_to_read <= 13 and we can use uint32_t */ 114201e04c3fSmrg uint32_t raw = in.get_bits_rev(offset, bits_to_read); 114301e04c3fSmrg unpack_quint_block(wt_bits, raw, &weights_quant[i]); 114401e04c3fSmrg 114501e04c3fSmrg if (VERBOSE_DECODE) 114601e04c3fSmrg in.printf_bits(offset - bits_to_read, bits_to_read, "weight quints [%d,%d,%d]", 114701e04c3fSmrg weights_quant[i], weights_quant[i+1], weights_quant[i+2]); 114801e04c3fSmrg 114901e04c3fSmrg offset -= 7 + wt_bits * 3; 115001e04c3fSmrg bits_left -= 7 + wt_bits * 3; 115101e04c3fSmrg } 115201e04c3fSmrg 115301e04c3fSmrg } else { 115401e04c3fSmrg int offset = 128; 115501e04c3fSmrg assert((weight_bits % wt_bits) == 0); 115601e04c3fSmrg for (int i = 0; i < num_weights; ++i) { 115701e04c3fSmrg weights_quant[i] = in.get_bits_rev(offset, wt_bits); 115801e04c3fSmrg 115901e04c3fSmrg if (VERBOSE_DECODE) 116001e04c3fSmrg in.printf_bits(offset - wt_bits, wt_bits, "weight bits [%d]", weights_quant[i]); 116101e04c3fSmrg 116201e04c3fSmrg offset -= wt_bits; 116301e04c3fSmrg } 116401e04c3fSmrg } 116501e04c3fSmrg} 116601e04c3fSmrg 116701e04c3fSmrgvoid Block::unquantise_weights() 116801e04c3fSmrg{ 116901e04c3fSmrg assert(num_weights <= (int)ARRAY_SIZE(weights_quant)); 117001e04c3fSmrg assert(num_weights <= (int)ARRAY_SIZE(weights)); 117101e04c3fSmrg 117201e04c3fSmrg memset(weights, 0, sizeof(weights)); 117301e04c3fSmrg 117401e04c3fSmrg for (int i = 0; i < num_weights; ++i) { 117501e04c3fSmrg 117601e04c3fSmrg uint8_t v = weights_quant[i]; 117701e04c3fSmrg uint8_t w; 117801e04c3fSmrg 117901e04c3fSmrg if (wt_trits) { 118001e04c3fSmrg 118101e04c3fSmrg if (wt_bits == 0) { 118201e04c3fSmrg w = v * 32; 118301e04c3fSmrg } else { 118401e04c3fSmrg uint8_t A, B, C, D; 118501e04c3fSmrg A = (v & 0x1) ? 0x7F : 0x00; 118601e04c3fSmrg switch (wt_bits) { 118701e04c3fSmrg case 1: 118801e04c3fSmrg B = 0; 118901e04c3fSmrg C = 50; 119001e04c3fSmrg D = v >> 1; 119101e04c3fSmrg break; 119201e04c3fSmrg case 2: 119301e04c3fSmrg B = (v & 0x2) ? 0x45 : 0x00; 119401e04c3fSmrg C = 23; 119501e04c3fSmrg D = v >> 2; 119601e04c3fSmrg break; 119701e04c3fSmrg case 3: 119801e04c3fSmrg B = ((v & 0x6) >> 1) | ((v & 0x6) << 4); 119901e04c3fSmrg C = 11; 120001e04c3fSmrg D = v >> 3; 120101e04c3fSmrg break; 120201e04c3fSmrg default: 120301e04c3fSmrg unreachable(""); 120401e04c3fSmrg } 120501e04c3fSmrg uint16_t T = D * C + B; 120601e04c3fSmrg T = T ^ A; 120701e04c3fSmrg T = (A & 0x20) | (T >> 2); 120801e04c3fSmrg assert(T < 64); 120901e04c3fSmrg if (T > 32) 121001e04c3fSmrg T++; 121101e04c3fSmrg w = T; 121201e04c3fSmrg } 121301e04c3fSmrg 121401e04c3fSmrg } else if (wt_quints) { 121501e04c3fSmrg 121601e04c3fSmrg if (wt_bits == 0) { 121701e04c3fSmrg w = v * 16; 121801e04c3fSmrg } else { 121901e04c3fSmrg uint8_t A, B, C, D; 122001e04c3fSmrg A = (v & 0x1) ? 0x7F : 0x00; 122101e04c3fSmrg switch (wt_bits) { 122201e04c3fSmrg case 1: 122301e04c3fSmrg B = 0; 122401e04c3fSmrg C = 28; 122501e04c3fSmrg D = v >> 1; 122601e04c3fSmrg break; 122701e04c3fSmrg case 2: 122801e04c3fSmrg B = (v & 0x2) ? 0x42 : 0x00; 122901e04c3fSmrg C = 13; 123001e04c3fSmrg D = v >> 2; 123101e04c3fSmrg break; 123201e04c3fSmrg default: 123301e04c3fSmrg unreachable(""); 123401e04c3fSmrg } 123501e04c3fSmrg uint16_t T = D * C + B; 123601e04c3fSmrg T = T ^ A; 123701e04c3fSmrg T = (A & 0x20) | (T >> 2); 123801e04c3fSmrg assert(T < 64); 123901e04c3fSmrg if (T > 32) 124001e04c3fSmrg T++; 124101e04c3fSmrg w = T; 124201e04c3fSmrg } 124301e04c3fSmrg weights[i] = w; 124401e04c3fSmrg 124501e04c3fSmrg } else { 124601e04c3fSmrg 124701e04c3fSmrg switch (wt_bits) { 124801e04c3fSmrg case 1: w = v ? 0x3F : 0x00; break; 124901e04c3fSmrg case 2: w = v | (v << 2) | (v << 4); break; 125001e04c3fSmrg case 3: w = v | (v << 3); break; 125101e04c3fSmrg case 4: w = (v >> 2) | (v << 2); break; 125201e04c3fSmrg case 5: w = (v >> 4) | (v << 1); break; 125301e04c3fSmrg default: unreachable(""); 125401e04c3fSmrg } 125501e04c3fSmrg assert(w < 64); 125601e04c3fSmrg if (w > 32) 125701e04c3fSmrg w++; 125801e04c3fSmrg } 125901e04c3fSmrg weights[i] = w; 126001e04c3fSmrg } 126101e04c3fSmrg} 126201e04c3fSmrg 126301e04c3fSmrgvoid Block::compute_infill_weights(int block_w, int block_h, int block_d) 126401e04c3fSmrg{ 126501e04c3fSmrg int Ds = block_w <= 1 ? 0 : (1024 + block_w / 2) / (block_w - 1); 126601e04c3fSmrg int Dt = block_h <= 1 ? 0 : (1024 + block_h / 2) / (block_h - 1); 126701e04c3fSmrg int Dr = block_d <= 1 ? 0 : (1024 + block_d / 2) / (block_d - 1); 126801e04c3fSmrg for (int r = 0; r < block_d; ++r) { 126901e04c3fSmrg for (int t = 0; t < block_h; ++t) { 127001e04c3fSmrg for (int s = 0; s < block_w; ++s) { 127101e04c3fSmrg int cs = Ds * s; 127201e04c3fSmrg int ct = Dt * t; 127301e04c3fSmrg int cr = Dr * r; 127401e04c3fSmrg int gs = (cs * (wt_w - 1) + 32) >> 6; 127501e04c3fSmrg int gt = (ct * (wt_h - 1) + 32) >> 6; 127601e04c3fSmrg int gr = (cr * (wt_d - 1) + 32) >> 6; 127701e04c3fSmrg assert(gs >= 0 && gs <= 176); 127801e04c3fSmrg assert(gt >= 0 && gt <= 176); 127901e04c3fSmrg assert(gr >= 0 && gr <= 176); 128001e04c3fSmrg int js = gs >> 4; 128101e04c3fSmrg int fs = gs & 0xf; 128201e04c3fSmrg int jt = gt >> 4; 128301e04c3fSmrg int ft = gt & 0xf; 128401e04c3fSmrg int jr = gr >> 4; 128501e04c3fSmrg int fr = gr & 0xf; 128601e04c3fSmrg 128701e04c3fSmrg /* TODO: 3D */ 128801e04c3fSmrg (void)jr; 128901e04c3fSmrg (void)fr; 129001e04c3fSmrg 129101e04c3fSmrg int w11 = (fs * ft + 8) >> 4; 129201e04c3fSmrg int w10 = ft - w11; 129301e04c3fSmrg int w01 = fs - w11; 129401e04c3fSmrg int w00 = 16 - fs - ft + w11; 129501e04c3fSmrg 129601e04c3fSmrg if (dual_plane) { 129701e04c3fSmrg int p00, p01, p10, p11, i0, i1; 129801e04c3fSmrg int v0 = js + jt * wt_w; 129901e04c3fSmrg p00 = weights[(v0) * 2]; 130001e04c3fSmrg p01 = weights[(v0 + 1) * 2]; 130101e04c3fSmrg p10 = weights[(v0 + wt_w) * 2]; 130201e04c3fSmrg p11 = weights[(v0 + wt_w + 1) * 2]; 130301e04c3fSmrg i0 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; 130401e04c3fSmrg p00 = weights[(v0) * 2 + 1]; 130501e04c3fSmrg p01 = weights[(v0 + 1) * 2 + 1]; 130601e04c3fSmrg p10 = weights[(v0 + wt_w) * 2 + 1]; 130701e04c3fSmrg p11 = weights[(v0 + wt_w + 1) * 2 + 1]; 130801e04c3fSmrg assert((v0 + wt_w + 1) * 2 + 1 < (int)ARRAY_SIZE(weights)); 130901e04c3fSmrg i1 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; 131001e04c3fSmrg assert(0 <= i0 && i0 <= 64); 131101e04c3fSmrg infill_weights[0][s + t*block_w + r*block_w*block_h] = i0; 131201e04c3fSmrg infill_weights[1][s + t*block_w + r*block_w*block_h] = i1; 131301e04c3fSmrg } else { 131401e04c3fSmrg int p00, p01, p10, p11, i; 131501e04c3fSmrg int v0 = js + jt * wt_w; 131601e04c3fSmrg p00 = weights[v0]; 131701e04c3fSmrg p01 = weights[v0 + 1]; 131801e04c3fSmrg p10 = weights[v0 + wt_w]; 131901e04c3fSmrg p11 = weights[v0 + wt_w + 1]; 132001e04c3fSmrg assert(v0 + wt_w + 1 < (int)ARRAY_SIZE(weights)); 132101e04c3fSmrg i = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; 132201e04c3fSmrg assert(0 <= i && i <= 64); 132301e04c3fSmrg infill_weights[0][s + t*block_w + r*block_w*block_h] = i; 132401e04c3fSmrg } 132501e04c3fSmrg } 132601e04c3fSmrg } 132701e04c3fSmrg } 132801e04c3fSmrg} 132901e04c3fSmrg 133001e04c3fSmrgvoid Block::unquantise_colour_endpoints() 133101e04c3fSmrg{ 133201e04c3fSmrg assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints_quant)); 133301e04c3fSmrg assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints)); 133401e04c3fSmrg 133501e04c3fSmrg for (int i = 0; i < num_cem_values; ++i) { 133601e04c3fSmrg uint8_t v = colour_endpoints_quant[i]; 133701e04c3fSmrg 133801e04c3fSmrg if (ce_trits) { 133901e04c3fSmrg uint16_t A, B, C, D; 134001e04c3fSmrg uint16_t t; 134101e04c3fSmrg A = (v & 0x1) ? 0x1FF : 0x000; 134201e04c3fSmrg switch (ce_bits) { 134301e04c3fSmrg case 1: 134401e04c3fSmrg B = 0; 134501e04c3fSmrg C = 204; 134601e04c3fSmrg D = v >> 1; 134701e04c3fSmrg break; 134801e04c3fSmrg case 2: 134901e04c3fSmrg B = (v & 0x2) ? 0x116 : 0x000; 135001e04c3fSmrg C = 93; 135101e04c3fSmrg D = v >> 2; 135201e04c3fSmrg break; 135301e04c3fSmrg case 3: 135401e04c3fSmrg t = ((v >> 1) & 0x3); 135501e04c3fSmrg B = t | (t << 2) | (t << 7); 135601e04c3fSmrg C = 44; 135701e04c3fSmrg D = v >> 3; 135801e04c3fSmrg break; 135901e04c3fSmrg case 4: 136001e04c3fSmrg t = ((v >> 1) & 0x7); 136101e04c3fSmrg B = t | (t << 6); 136201e04c3fSmrg C = 22; 136301e04c3fSmrg D = v >> 4; 136401e04c3fSmrg break; 136501e04c3fSmrg case 5: 136601e04c3fSmrg t = ((v >> 1) & 0xF); 136701e04c3fSmrg B = (t >> 2) | (t << 5); 136801e04c3fSmrg C = 11; 136901e04c3fSmrg D = v >> 5; 137001e04c3fSmrg break; 137101e04c3fSmrg case 6: 137201e04c3fSmrg B = ((v & 0x3E) << 3) | ((v >> 5) & 0x1); 137301e04c3fSmrg C = 5; 137401e04c3fSmrg D = v >> 6; 137501e04c3fSmrg break; 137601e04c3fSmrg default: 137701e04c3fSmrg unreachable(""); 137801e04c3fSmrg } 137901e04c3fSmrg uint16_t T = D * C + B; 138001e04c3fSmrg T = T ^ A; 138101e04c3fSmrg T = (A & 0x80) | (T >> 2); 138201e04c3fSmrg assert(T < 256); 138301e04c3fSmrg colour_endpoints[i] = T; 138401e04c3fSmrg } else if (ce_quints) { 138501e04c3fSmrg uint16_t A, B, C, D; 138601e04c3fSmrg uint16_t t; 138701e04c3fSmrg A = (v & 0x1) ? 0x1FF : 0x000; 138801e04c3fSmrg switch (ce_bits) { 138901e04c3fSmrg case 1: 139001e04c3fSmrg B = 0; 139101e04c3fSmrg C = 113; 139201e04c3fSmrg D = v >> 1; 139301e04c3fSmrg break; 139401e04c3fSmrg case 2: 139501e04c3fSmrg B = (v & 0x2) ? 0x10C : 0x000; 139601e04c3fSmrg C = 54; 139701e04c3fSmrg D = v >> 2; 139801e04c3fSmrg break; 139901e04c3fSmrg case 3: 140001e04c3fSmrg t = ((v >> 1) & 0x3); 140101e04c3fSmrg B = (t >> 1) | (t << 1) | (t << 7); 140201e04c3fSmrg C = 26; 140301e04c3fSmrg D = v >> 3; 140401e04c3fSmrg break; 140501e04c3fSmrg case 4: 140601e04c3fSmrg t = ((v >> 1) & 0x7); 140701e04c3fSmrg B = (t >> 1) | (t << 6); 140801e04c3fSmrg C = 13; 140901e04c3fSmrg D = v >> 4; 141001e04c3fSmrg break; 141101e04c3fSmrg case 5: 141201e04c3fSmrg t = ((v >> 1) & 0xF); 141301e04c3fSmrg B = (t >> 4) | (t << 5); 141401e04c3fSmrg C = 6; 141501e04c3fSmrg D = v >> 5; 141601e04c3fSmrg break; 141701e04c3fSmrg default: 141801e04c3fSmrg unreachable(""); 141901e04c3fSmrg } 142001e04c3fSmrg uint16_t T = D * C + B; 142101e04c3fSmrg T = T ^ A; 142201e04c3fSmrg T = (A & 0x80) | (T >> 2); 142301e04c3fSmrg assert(T < 256); 142401e04c3fSmrg colour_endpoints[i] = T; 142501e04c3fSmrg } else { 142601e04c3fSmrg switch (ce_bits) { 142701e04c3fSmrg case 1: v = v ? 0xFF : 0x00; break; 142801e04c3fSmrg case 2: v = (v << 6) | (v << 4) | (v << 2) | v; break; 142901e04c3fSmrg case 3: v = (v << 5) | (v << 2) | (v >> 1); break; 143001e04c3fSmrg case 4: v = (v << 4) | v; break; 143101e04c3fSmrg case 5: v = (v << 3) | (v >> 2); break; 143201e04c3fSmrg case 6: v = (v << 2) | (v >> 4); break; 143301e04c3fSmrg case 7: v = (v << 1) | (v >> 6); break; 143401e04c3fSmrg case 8: break; 143501e04c3fSmrg default: unreachable(""); 143601e04c3fSmrg } 143701e04c3fSmrg colour_endpoints[i] = v; 143801e04c3fSmrg } 143901e04c3fSmrg } 144001e04c3fSmrg} 144101e04c3fSmrg 144201e04c3fSmrgdecode_error::type Block::decode(const Decoder &decoder, InputBitVector in) 144301e04c3fSmrg{ 144401e04c3fSmrg decode_error::type err; 144501e04c3fSmrg 144601e04c3fSmrg is_error = false; 144701e04c3fSmrg bogus_colour_endpoints = false; 144801e04c3fSmrg bogus_weights = false; 144901e04c3fSmrg is_void_extent = false; 145001e04c3fSmrg 145101e04c3fSmrg wt_d = 1; 145201e04c3fSmrg /* TODO: 3D */ 145301e04c3fSmrg 145401e04c3fSmrg /* TODO: test for all the illegal encodings */ 145501e04c3fSmrg 145601e04c3fSmrg if (VERBOSE_DECODE) 145701e04c3fSmrg in.printf_bits(0, 128); 145801e04c3fSmrg 145901e04c3fSmrg err = decode_block_mode(in); 146001e04c3fSmrg if (err != decode_error::ok) 146101e04c3fSmrg return err; 146201e04c3fSmrg 146301e04c3fSmrg if (is_void_extent) 146401e04c3fSmrg return decode_error::ok; 146501e04c3fSmrg 146601e04c3fSmrg /* TODO: 3D */ 146701e04c3fSmrg 146801e04c3fSmrg calculate_from_weights(); 146901e04c3fSmrg 147001e04c3fSmrg if (VERBOSE_DECODE) 147101e04c3fSmrg printf("weights_grid=%dx%dx%d dual_plane=%d num_weights=%d high_prec=%d r=%d range=0..%d (%dt %dq %db) weight_bits=%d\n", 147201e04c3fSmrg wt_w, wt_h, wt_d, dual_plane, num_weights, high_prec, wt_range, wt_max, wt_trits, wt_quints, wt_bits, weight_bits); 147301e04c3fSmrg 147401e04c3fSmrg if (wt_w > decoder.block_w || wt_h > decoder.block_h || wt_d > decoder.block_d) 147501e04c3fSmrg return decode_error::weight_grid_exceeds_block_size; 147601e04c3fSmrg 147701e04c3fSmrg num_parts = in.get_bits(11, 2) + 1; 147801e04c3fSmrg 147901e04c3fSmrg if (VERBOSE_DECODE) 148001e04c3fSmrg in.printf_bits(11, 2, "partitions = %d", num_parts); 148101e04c3fSmrg 148201e04c3fSmrg if (dual_plane && num_parts > 3) 148301e04c3fSmrg return decode_error::dual_plane_and_too_many_partitions; 148401e04c3fSmrg 148501e04c3fSmrg decode_cem(in); 148601e04c3fSmrg 148701e04c3fSmrg if (VERBOSE_DECODE) 148801e04c3fSmrg printf("cem=[%d,%d,%d,%d] base_cem_class=%d\n", cems[0], cems[1], cems[2], cems[3], cem_base_class); 148901e04c3fSmrg 149001e04c3fSmrg int num_cem_pairs = (cem_base_class + 1) * num_parts + extra_cem_bits; 149101e04c3fSmrg num_cem_values = num_cem_pairs * 2; 149201e04c3fSmrg 149301e04c3fSmrg calculate_remaining_bits(); 149401e04c3fSmrg err = calculate_colour_endpoints_size(); 149501e04c3fSmrg if (err != decode_error::ok) 149601e04c3fSmrg return err; 149701e04c3fSmrg 149801e04c3fSmrg if (VERBOSE_DECODE) 149901e04c3fSmrg in.printf_bits(colour_endpoint_data_offset, colour_endpoint_bits, 150001e04c3fSmrg "endpoint data (%d bits, %d vals, %dt %dq %db)", 150101e04c3fSmrg colour_endpoint_bits, num_cem_values, ce_trits, ce_quints, ce_bits); 150201e04c3fSmrg 150301e04c3fSmrg unpack_colour_endpoints(in); 150401e04c3fSmrg 150501e04c3fSmrg if (VERBOSE_DECODE) { 150601e04c3fSmrg printf("cem values raw =["); 150701e04c3fSmrg for (int i = 0; i < num_cem_values; i++) { 150801e04c3fSmrg if (i) 150901e04c3fSmrg printf(", "); 151001e04c3fSmrg printf("%3d", colour_endpoints_quant[i]); 151101e04c3fSmrg } 151201e04c3fSmrg printf("]\n"); 151301e04c3fSmrg } 151401e04c3fSmrg 151501e04c3fSmrg if (num_cem_values > 18) 151601e04c3fSmrg return decode_error::invalid_colour_endpoints_count; 151701e04c3fSmrg 151801e04c3fSmrg unquantise_colour_endpoints(); 151901e04c3fSmrg 152001e04c3fSmrg if (VERBOSE_DECODE) { 152101e04c3fSmrg printf("cem values norm=["); 152201e04c3fSmrg for (int i = 0; i < num_cem_values; i++) { 152301e04c3fSmrg if (i) 152401e04c3fSmrg printf(", "); 152501e04c3fSmrg printf("%3d", colour_endpoints[i]); 152601e04c3fSmrg } 152701e04c3fSmrg printf("]\n"); 152801e04c3fSmrg } 152901e04c3fSmrg 153001e04c3fSmrg decode_colour_endpoints(); 153101e04c3fSmrg 153201e04c3fSmrg if (dual_plane) { 153301e04c3fSmrg int ccs_offset = 128 - weight_bits - num_extra_cem_bits - 2; 153401e04c3fSmrg colour_component_selector = in.get_bits(ccs_offset, 2); 153501e04c3fSmrg 153601e04c3fSmrg if (VERBOSE_DECODE) 153701e04c3fSmrg in.printf_bits(ccs_offset, 2, "colour component selector = %d", colour_component_selector); 153801e04c3fSmrg } else { 153901e04c3fSmrg colour_component_selector = 0; 154001e04c3fSmrg } 154101e04c3fSmrg 154201e04c3fSmrg 154301e04c3fSmrg if (VERBOSE_DECODE) 154401e04c3fSmrg in.printf_bits(128 - weight_bits, weight_bits, "weights (%d bits)", weight_bits); 154501e04c3fSmrg 154601e04c3fSmrg if (num_weights > 64) 154701e04c3fSmrg return decode_error::invalid_num_weights; 154801e04c3fSmrg 154901e04c3fSmrg if (weight_bits < 24 || weight_bits > 96) 155001e04c3fSmrg return decode_error::invalid_weight_bits; 155101e04c3fSmrg 155201e04c3fSmrg unpack_weights(in); 155301e04c3fSmrg 155401e04c3fSmrg unquantise_weights(); 155501e04c3fSmrg 155601e04c3fSmrg if (VERBOSE_DECODE) { 155701e04c3fSmrg printf("weights=["); 155801e04c3fSmrg for (int i = 0; i < num_weights; ++i) { 155901e04c3fSmrg if (i) 156001e04c3fSmrg printf(", "); 156101e04c3fSmrg printf("%d", weights[i]); 156201e04c3fSmrg } 156301e04c3fSmrg printf("]\n"); 156401e04c3fSmrg 156501e04c3fSmrg for (int plane = 0; plane <= dual_plane; ++plane) { 156601e04c3fSmrg printf("weights (plane %d):\n", plane); 156701e04c3fSmrg int i = 0; 156801e04c3fSmrg (void)i; 156901e04c3fSmrg 157001e04c3fSmrg for (int r = 0; r < wt_d; ++r) { 157101e04c3fSmrg for (int t = 0; t < wt_h; ++t) { 157201e04c3fSmrg for (int s = 0; s < wt_w; ++s) { 157301e04c3fSmrg printf("%3d", weights[i++ * (1 + dual_plane) + plane]); 157401e04c3fSmrg } 157501e04c3fSmrg printf("\n"); 157601e04c3fSmrg } 157701e04c3fSmrg if (r < wt_d - 1) 157801e04c3fSmrg printf("\n"); 157901e04c3fSmrg } 158001e04c3fSmrg } 158101e04c3fSmrg } 158201e04c3fSmrg 158301e04c3fSmrg compute_infill_weights(decoder.block_w, decoder.block_h, decoder.block_d); 158401e04c3fSmrg 158501e04c3fSmrg if (VERBOSE_DECODE) { 158601e04c3fSmrg for (int plane = 0; plane <= dual_plane; ++plane) { 158701e04c3fSmrg printf("infilled weights (plane %d):\n", plane); 158801e04c3fSmrg int i = 0; 158901e04c3fSmrg (void)i; 159001e04c3fSmrg 159101e04c3fSmrg for (int r = 0; r < decoder.block_d; ++r) { 159201e04c3fSmrg for (int t = 0; t < decoder.block_h; ++t) { 159301e04c3fSmrg for (int s = 0; s < decoder.block_w; ++s) { 159401e04c3fSmrg printf("%3d", infill_weights[plane][i++]); 159501e04c3fSmrg } 159601e04c3fSmrg printf("\n"); 159701e04c3fSmrg } 159801e04c3fSmrg if (r < decoder.block_d - 1) 159901e04c3fSmrg printf("\n"); 160001e04c3fSmrg } 160101e04c3fSmrg } 160201e04c3fSmrg } 160301e04c3fSmrg if (VERBOSE_DECODE) 160401e04c3fSmrg printf("\n"); 160501e04c3fSmrg 160601e04c3fSmrg return decode_error::ok; 160701e04c3fSmrg} 160801e04c3fSmrg 160901e04c3fSmrgvoid Block::write_decoded(const Decoder &decoder, uint16_t *output) 161001e04c3fSmrg{ 161101e04c3fSmrg /* sRGB can only be stored as unorm8. */ 161201e04c3fSmrg assert(!decoder.srgb || decoder.output_unorm8); 161301e04c3fSmrg 161401e04c3fSmrg if (is_void_extent) { 161501e04c3fSmrg for (int idx = 0; idx < decoder.block_w*decoder.block_h*decoder.block_d; ++idx) { 161601e04c3fSmrg if (decoder.output_unorm8) { 161701e04c3fSmrg if (decoder.srgb) { 161801e04c3fSmrg output[idx*4+0] = void_extent_colour_r >> 8; 161901e04c3fSmrg output[idx*4+1] = void_extent_colour_g >> 8; 162001e04c3fSmrg output[idx*4+2] = void_extent_colour_b >> 8; 162101e04c3fSmrg } else { 162201e04c3fSmrg output[idx*4+0] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_r); 162301e04c3fSmrg output[idx*4+1] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_g); 162401e04c3fSmrg output[idx*4+2] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_b); 162501e04c3fSmrg } 162601e04c3fSmrg output[idx*4+3] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_a); 162701e04c3fSmrg } else { 162801e04c3fSmrg /* Store the color as FP16. */ 162901e04c3fSmrg output[idx*4+0] = _mesa_uint16_div_64k_to_half(void_extent_colour_r); 163001e04c3fSmrg output[idx*4+1] = _mesa_uint16_div_64k_to_half(void_extent_colour_g); 163101e04c3fSmrg output[idx*4+2] = _mesa_uint16_div_64k_to_half(void_extent_colour_b); 163201e04c3fSmrg output[idx*4+3] = _mesa_uint16_div_64k_to_half(void_extent_colour_a); 163301e04c3fSmrg } 163401e04c3fSmrg } 163501e04c3fSmrg return; 163601e04c3fSmrg } 163701e04c3fSmrg 163801e04c3fSmrg int small_block = (decoder.block_w * decoder.block_h * decoder.block_d) < 31; 163901e04c3fSmrg 164001e04c3fSmrg int idx = 0; 164101e04c3fSmrg for (int z = 0; z < decoder.block_d; ++z) { 164201e04c3fSmrg for (int y = 0; y < decoder.block_h; ++y) { 164301e04c3fSmrg for (int x = 0; x < decoder.block_w; ++x) { 164401e04c3fSmrg 164501e04c3fSmrg int partition; 164601e04c3fSmrg if (num_parts > 1) { 164701e04c3fSmrg partition = select_partition(partition_index, x, y, z, num_parts, small_block); 164801e04c3fSmrg assert(partition < num_parts); 164901e04c3fSmrg } else { 165001e04c3fSmrg partition = 0; 165101e04c3fSmrg } 165201e04c3fSmrg 165301e04c3fSmrg /* TODO: HDR */ 165401e04c3fSmrg 165501e04c3fSmrg uint8x4_t e0 = endpoints_decoded[0][partition]; 165601e04c3fSmrg uint8x4_t e1 = endpoints_decoded[1][partition]; 165701e04c3fSmrg uint16_t c0[4], c1[4]; 165801e04c3fSmrg 165901e04c3fSmrg /* Expand to 16 bits. */ 166001e04c3fSmrg if (decoder.srgb) { 166101e04c3fSmrg c0[0] = (uint16_t)((e0.v[0] << 8) | 0x80); 166201e04c3fSmrg c0[1] = (uint16_t)((e0.v[1] << 8) | 0x80); 166301e04c3fSmrg c0[2] = (uint16_t)((e0.v[2] << 8) | 0x80); 166401e04c3fSmrg c0[3] = (uint16_t)((e0.v[3] << 8) | 0x80); 166501e04c3fSmrg 166601e04c3fSmrg c1[0] = (uint16_t)((e1.v[0] << 8) | 0x80); 166701e04c3fSmrg c1[1] = (uint16_t)((e1.v[1] << 8) | 0x80); 166801e04c3fSmrg c1[2] = (uint16_t)((e1.v[2] << 8) | 0x80); 166901e04c3fSmrg c1[3] = (uint16_t)((e1.v[3] << 8) | 0x80); 167001e04c3fSmrg } else { 167101e04c3fSmrg c0[0] = (uint16_t)((e0.v[0] << 8) | e0.v[0]); 167201e04c3fSmrg c0[1] = (uint16_t)((e0.v[1] << 8) | e0.v[1]); 167301e04c3fSmrg c0[2] = (uint16_t)((e0.v[2] << 8) | e0.v[2]); 167401e04c3fSmrg c0[3] = (uint16_t)((e0.v[3] << 8) | e0.v[3]); 167501e04c3fSmrg 167601e04c3fSmrg c1[0] = (uint16_t)((e1.v[0] << 8) | e1.v[0]); 167701e04c3fSmrg c1[1] = (uint16_t)((e1.v[1] << 8) | e1.v[1]); 167801e04c3fSmrg c1[2] = (uint16_t)((e1.v[2] << 8) | e1.v[2]); 167901e04c3fSmrg c1[3] = (uint16_t)((e1.v[3] << 8) | e1.v[3]); 168001e04c3fSmrg } 168101e04c3fSmrg 168201e04c3fSmrg int w[4]; 168301e04c3fSmrg if (dual_plane) { 168401e04c3fSmrg int w0 = infill_weights[0][idx]; 168501e04c3fSmrg int w1 = infill_weights[1][idx]; 168601e04c3fSmrg w[0] = w[1] = w[2] = w[3] = w0; 168701e04c3fSmrg w[colour_component_selector] = w1; 168801e04c3fSmrg } else { 168901e04c3fSmrg int w0 = infill_weights[0][idx]; 169001e04c3fSmrg w[0] = w[1] = w[2] = w[3] = w0; 169101e04c3fSmrg } 169201e04c3fSmrg 169301e04c3fSmrg /* Interpolate to produce UNORM16, applying weights. */ 169401e04c3fSmrg uint16_t c[4] = { 169501e04c3fSmrg (uint16_t)((c0[0] * (64 - w[0]) + c1[0] * w[0] + 32) >> 6), 169601e04c3fSmrg (uint16_t)((c0[1] * (64 - w[1]) + c1[1] * w[1] + 32) >> 6), 169701e04c3fSmrg (uint16_t)((c0[2] * (64 - w[2]) + c1[2] * w[2] + 32) >> 6), 169801e04c3fSmrg (uint16_t)((c0[3] * (64 - w[3]) + c1[3] * w[3] + 32) >> 6), 169901e04c3fSmrg }; 170001e04c3fSmrg 170101e04c3fSmrg if (decoder.output_unorm8) { 170201e04c3fSmrg if (decoder.srgb) { 170301e04c3fSmrg output[idx*4+0] = c[0] >> 8; 170401e04c3fSmrg output[idx*4+1] = c[1] >> 8; 170501e04c3fSmrg output[idx*4+2] = c[2] >> 8; 170601e04c3fSmrg } else { 170701e04c3fSmrg output[idx*4+0] = c[0] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[0]); 170801e04c3fSmrg output[idx*4+1] = c[1] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[1]); 170901e04c3fSmrg output[idx*4+2] = c[2] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[2]); 171001e04c3fSmrg } 171101e04c3fSmrg output[idx*4+3] = c[3] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[3]); 171201e04c3fSmrg } else { 171301e04c3fSmrg /* Store the color as FP16. */ 171401e04c3fSmrg output[idx*4+0] = c[0] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[0]); 171501e04c3fSmrg output[idx*4+1] = c[1] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[1]); 171601e04c3fSmrg output[idx*4+2] = c[2] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[2]); 171701e04c3fSmrg output[idx*4+3] = c[3] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[3]); 171801e04c3fSmrg } 171901e04c3fSmrg 172001e04c3fSmrg idx++; 172101e04c3fSmrg } 172201e04c3fSmrg } 172301e04c3fSmrg } 172401e04c3fSmrg} 172501e04c3fSmrg 172601e04c3fSmrgvoid Block::calculate_from_weights() 172701e04c3fSmrg{ 172801e04c3fSmrg wt_trits = 0; 172901e04c3fSmrg wt_quints = 0; 173001e04c3fSmrg wt_bits = 0; 173101e04c3fSmrg switch (high_prec) { 173201e04c3fSmrg case 0: 173301e04c3fSmrg switch (wt_range) { 173401e04c3fSmrg case 0x2: wt_max = 1; wt_bits = 1; break; 173501e04c3fSmrg case 0x3: wt_max = 2; wt_trits = 1; break; 173601e04c3fSmrg case 0x4: wt_max = 3; wt_bits = 2; break; 173701e04c3fSmrg case 0x5: wt_max = 4; wt_quints = 1; break; 173801e04c3fSmrg case 0x6: wt_max = 5; wt_trits = 1; wt_bits = 1; break; 173901e04c3fSmrg case 0x7: wt_max = 7; wt_bits = 3; break; 174001e04c3fSmrg default: abort(); 174101e04c3fSmrg } 174201e04c3fSmrg break; 174301e04c3fSmrg case 1: 174401e04c3fSmrg switch (wt_range) { 174501e04c3fSmrg case 0x2: wt_max = 9; wt_quints = 1; wt_bits = 1; break; 174601e04c3fSmrg case 0x3: wt_max = 11; wt_trits = 1; wt_bits = 2; break; 174701e04c3fSmrg case 0x4: wt_max = 15; wt_bits = 4; break; 174801e04c3fSmrg case 0x5: wt_max = 19; wt_quints = 1; wt_bits = 2; break; 174901e04c3fSmrg case 0x6: wt_max = 23; wt_trits = 1; wt_bits = 3; break; 175001e04c3fSmrg case 0x7: wt_max = 31; wt_bits = 5; break; 175101e04c3fSmrg default: abort(); 175201e04c3fSmrg } 175301e04c3fSmrg break; 175401e04c3fSmrg } 175501e04c3fSmrg 175601e04c3fSmrg assert(wt_trits || wt_quints || wt_bits); 175701e04c3fSmrg 175801e04c3fSmrg num_weights = wt_w * wt_h * wt_d; 175901e04c3fSmrg 176001e04c3fSmrg if (dual_plane) 176101e04c3fSmrg num_weights *= 2; 176201e04c3fSmrg 176301e04c3fSmrg weight_bits = 176401e04c3fSmrg (num_weights * 8 * wt_trits + 4) / 5 176501e04c3fSmrg + (num_weights * 7 * wt_quints + 2) / 3 176601e04c3fSmrg + num_weights * wt_bits; 176701e04c3fSmrg} 176801e04c3fSmrg 176901e04c3fSmrgvoid Block::calculate_remaining_bits() 177001e04c3fSmrg{ 177101e04c3fSmrg int config_bits; 177201e04c3fSmrg if (num_parts > 1) { 177301e04c3fSmrg if (!is_multi_cem) 177401e04c3fSmrg config_bits = 29; 177501e04c3fSmrg else 177601e04c3fSmrg config_bits = 25 + 3 * num_parts; 177701e04c3fSmrg } else { 177801e04c3fSmrg config_bits = 17; 177901e04c3fSmrg } 178001e04c3fSmrg 178101e04c3fSmrg if (dual_plane) 178201e04c3fSmrg config_bits += 2; 178301e04c3fSmrg 178401e04c3fSmrg remaining_bits = 128 - config_bits - weight_bits; 178501e04c3fSmrg} 178601e04c3fSmrg 178701e04c3fSmrgdecode_error::type Block::calculate_colour_endpoints_size() 178801e04c3fSmrg{ 178901e04c3fSmrg /* Specified as illegal */ 179001e04c3fSmrg if (remaining_bits < (13 * num_cem_values + 4) / 5) { 179101e04c3fSmrg colour_endpoint_bits = ce_max = ce_trits = ce_quints = ce_bits = 0; 179201e04c3fSmrg return decode_error::invalid_colour_endpoints_size; 179301e04c3fSmrg } 179401e04c3fSmrg 179501e04c3fSmrg /* Find the largest cem_ranges that fits within remaining_bits */ 179601e04c3fSmrg for (int i = ARRAY_SIZE(cem_ranges)-1; i >= 0; --i) { 179701e04c3fSmrg int cem_bits; 179801e04c3fSmrg cem_bits = (num_cem_values * 8 * cem_ranges[i].t + 4) / 5 179901e04c3fSmrg + (num_cem_values * 7 * cem_ranges[i].q + 2) / 3 180001e04c3fSmrg + num_cem_values * cem_ranges[i].b; 180101e04c3fSmrg 180201e04c3fSmrg if (cem_bits <= remaining_bits) 180301e04c3fSmrg { 180401e04c3fSmrg colour_endpoint_bits = cem_bits; 180501e04c3fSmrg ce_max = cem_ranges[i].max; 180601e04c3fSmrg ce_trits = cem_ranges[i].t; 180701e04c3fSmrg ce_quints = cem_ranges[i].q; 180801e04c3fSmrg ce_bits = cem_ranges[i].b; 180901e04c3fSmrg return decode_error::ok; 181001e04c3fSmrg } 181101e04c3fSmrg } 181201e04c3fSmrg 181301e04c3fSmrg assert(0); 181401e04c3fSmrg return decode_error::invalid_colour_endpoints_size; 181501e04c3fSmrg} 181601e04c3fSmrg 181701e04c3fSmrg/** 181801e04c3fSmrg * Decode ASTC 2D LDR texture data. 181901e04c3fSmrg * 182001e04c3fSmrg * \param src_width in pixels 182101e04c3fSmrg * \param src_height in pixels 182201e04c3fSmrg * \param dst_stride in bytes 182301e04c3fSmrg */ 182401e04c3fSmrgextern "C" void 182501e04c3fSmrg_mesa_unpack_astc_2d_ldr(uint8_t *dst_row, 182601e04c3fSmrg unsigned dst_stride, 182701e04c3fSmrg const uint8_t *src_row, 182801e04c3fSmrg unsigned src_stride, 182901e04c3fSmrg unsigned src_width, 183001e04c3fSmrg unsigned src_height, 183101e04c3fSmrg mesa_format format) 183201e04c3fSmrg{ 183301e04c3fSmrg assert(_mesa_is_format_astc_2d(format)); 18347ec681f3Smrg bool srgb = _mesa_is_format_srgb(format); 183501e04c3fSmrg 183601e04c3fSmrg unsigned blk_w, blk_h; 183701e04c3fSmrg _mesa_get_format_block_size(format, &blk_w, &blk_h); 183801e04c3fSmrg 183901e04c3fSmrg const unsigned block_size = 16; 184001e04c3fSmrg unsigned x_blocks = (src_width + blk_w - 1) / blk_w; 184101e04c3fSmrg unsigned y_blocks = (src_height + blk_h - 1) / blk_h; 184201e04c3fSmrg 184301e04c3fSmrg Decoder dec(blk_w, blk_h, 1, srgb, true); 184401e04c3fSmrg 184501e04c3fSmrg for (unsigned y = 0; y < y_blocks; ++y) { 184601e04c3fSmrg for (unsigned x = 0; x < x_blocks; ++x) { 184701e04c3fSmrg /* Same size as the largest block. */ 184801e04c3fSmrg uint16_t block_out[12 * 12 * 4]; 184901e04c3fSmrg 185001e04c3fSmrg dec.decode(src_row + x * block_size, block_out); 185101e04c3fSmrg 185201e04c3fSmrg /* This can be smaller with NPOT dimensions. */ 185301e04c3fSmrg unsigned dst_blk_w = MIN2(blk_w, src_width - x*blk_w); 185401e04c3fSmrg unsigned dst_blk_h = MIN2(blk_h, src_height - y*blk_h); 185501e04c3fSmrg 185601e04c3fSmrg for (unsigned sub_y = 0; sub_y < dst_blk_h; ++sub_y) { 185701e04c3fSmrg for (unsigned sub_x = 0; sub_x < dst_blk_w; ++sub_x) { 185801e04c3fSmrg uint8_t *dst = dst_row + sub_y * dst_stride + 185901e04c3fSmrg (x * blk_w + sub_x) * 4; 186001e04c3fSmrg const uint16_t *src = &block_out[(sub_y * blk_w + sub_x) * 4]; 186101e04c3fSmrg 186201e04c3fSmrg dst[0] = src[0]; 186301e04c3fSmrg dst[1] = src[1]; 186401e04c3fSmrg dst[2] = src[2]; 186501e04c3fSmrg dst[3] = src[3]; 186601e04c3fSmrg } 186701e04c3fSmrg } 186801e04c3fSmrg } 186901e04c3fSmrg src_row += src_stride; 187001e04c3fSmrg dst_row += dst_stride * blk_h; 187101e04c3fSmrg } 187201e04c3fSmrg} 1873