1b8e80941Smrg/*
2b8e80941Smrg * Copyright 2015 Philip Taylor <philip@zaynar.co.uk>
3b8e80941Smrg * Copyright 2018 Advanced Micro Devices, Inc.
4b8e80941Smrg *
5b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
6b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
7b8e80941Smrg * to deal in the Software without restriction, including without limitation
8b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
10b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
11b8e80941Smrg *
12b8e80941Smrg * The above copyright notice and this permission notice (including the next
13b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
14b8e80941Smrg * Software.
15b8e80941Smrg *
16b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22b8e80941Smrg * DEALINGS IN THE SOFTWARE.
23b8e80941Smrg */
24b8e80941Smrg
25b8e80941Smrg/**
26b8e80941Smrg * \file texcompress_astc.c
27b8e80941Smrg *
28b8e80941Smrg * Decompression code for GL_KHR_texture_compression_astc_ldr, which is just
29b8e80941Smrg * ASTC 2D LDR.
30b8e80941Smrg *
31b8e80941Smrg * The ASTC 2D LDR decoder (without the sRGB part) was copied from the OASTC
32b8e80941Smrg * library written by Philip Taylor. I added sRGB support and adjusted it for
33b8e80941Smrg * Mesa. - Marek
34b8e80941Smrg */
35b8e80941Smrg
36b8e80941Smrg#include "texcompress_astc.h"
37b8e80941Smrg#include "macros.h"
38b8e80941Smrg#include "util/half_float.h"
39b8e80941Smrg#include <stdio.h>
40b8e80941Smrg
41b8e80941Smrgstatic bool VERBOSE_DECODE = false;
42b8e80941Smrgstatic bool VERBOSE_WRITE = false;
43b8e80941Smrg
44b8e80941Smrgstatic inline uint8_t
45b8e80941Smrguint16_div_64k_to_half_to_unorm8(uint16_t v)
46b8e80941Smrg{
47b8e80941Smrg   return _mesa_half_to_unorm8(_mesa_uint16_div_64k_to_half(v));
48b8e80941Smrg}
49b8e80941Smrg
50b8e80941Smrgclass decode_error
51b8e80941Smrg{
52b8e80941Smrgpublic:
53b8e80941Smrg   enum type {
54b8e80941Smrg      ok,
55b8e80941Smrg      unsupported_hdr_void_extent,
56b8e80941Smrg      reserved_block_mode_1,
57b8e80941Smrg      reserved_block_mode_2,
58b8e80941Smrg      dual_plane_and_too_many_partitions,
59b8e80941Smrg      invalid_range_in_void_extent,
60b8e80941Smrg      weight_grid_exceeds_block_size,
61b8e80941Smrg      invalid_colour_endpoints_size,
62b8e80941Smrg      invalid_colour_endpoints_count,
63b8e80941Smrg      invalid_weight_bits,
64b8e80941Smrg      invalid_num_weights,
65b8e80941Smrg   };
66b8e80941Smrg};
67b8e80941Smrg
68b8e80941Smrg
69b8e80941Smrgstruct cem_range {
70b8e80941Smrg   uint8_t max;
71b8e80941Smrg   uint8_t t, q, b;
72b8e80941Smrg};
73b8e80941Smrg
74b8e80941Smrg/* Based on the Color Unquantization Parameters table,
75b8e80941Smrg * plus the bit-only representations, sorted by increasing size
76b8e80941Smrg */
77b8e80941Smrgstatic cem_range cem_ranges[] = {
78b8e80941Smrg   { 5, 1, 0, 1 },
79b8e80941Smrg   { 7, 0, 0, 3 },
80b8e80941Smrg   { 9, 0, 1, 1 },
81b8e80941Smrg   { 11, 1, 0, 2 },
82b8e80941Smrg   { 15, 0, 0, 4 },
83b8e80941Smrg   { 19, 0, 1, 2 },
84b8e80941Smrg   { 23, 1, 0, 3 },
85b8e80941Smrg   { 31, 0, 0, 5 },
86b8e80941Smrg   { 39, 0, 1, 3 },
87b8e80941Smrg   { 47, 1, 0, 4 },
88b8e80941Smrg   { 63, 0, 0, 6 },
89b8e80941Smrg   { 79, 0, 1, 4 },
90b8e80941Smrg   { 95, 1, 0, 5 },
91b8e80941Smrg   { 127, 0, 0, 7 },
92b8e80941Smrg   { 159, 0, 1, 5 },
93b8e80941Smrg   { 191, 1, 0, 6 },
94b8e80941Smrg   { 255, 0, 0, 8 },
95b8e80941Smrg};
96b8e80941Smrg
97b8e80941Smrg#define CAT_BITS_2(a, b)          ( ((a) << 1) | (b) )
98b8e80941Smrg#define CAT_BITS_3(a, b, c)       ( ((a) << 2) | ((b) << 1) | (c) )
99b8e80941Smrg#define CAT_BITS_4(a, b, c, d)    ( ((a) << 3) | ((b) << 2) | ((c) << 1) | (d) )
100b8e80941Smrg#define CAT_BITS_5(a, b, c, d, e) ( ((a) << 4) | ((b) << 3) | ((c) << 2) | ((d) << 1) | (e) )
101b8e80941Smrg
102b8e80941Smrg/**
103b8e80941Smrg * Unpack 5n+8 bits from 'in' into 5 output values.
104b8e80941Smrg * If n <= 4 then T should be uint32_t, else it must be uint64_t.
105b8e80941Smrg */
106b8e80941Smrgtemplate <typename T>
107b8e80941Smrgstatic void unpack_trit_block(int n, T in, uint8_t *out)
108b8e80941Smrg{
109b8e80941Smrg   assert(n <= 6); /* else output will overflow uint8_t */
110b8e80941Smrg
111b8e80941Smrg   uint8_t T0 = (in >> (n)) & 0x1;
112b8e80941Smrg   uint8_t T1 = (in >> (n+1)) & 0x1;
113b8e80941Smrg   uint8_t T2 = (in >> (2*n+2)) & 0x1;
114b8e80941Smrg   uint8_t T3 = (in >> (2*n+3)) & 0x1;
115b8e80941Smrg   uint8_t T4 = (in >> (3*n+4)) & 0x1;
116b8e80941Smrg   uint8_t T5 = (in >> (4*n+5)) & 0x1;
117b8e80941Smrg   uint8_t T6 = (in >> (4*n+6)) & 0x1;
118b8e80941Smrg   uint8_t T7 = (in >> (5*n+7)) & 0x1;
119b8e80941Smrg   uint8_t mmask = (1 << n) - 1;
120b8e80941Smrg   uint8_t m0 = (in >> (0)) & mmask;
121b8e80941Smrg   uint8_t m1 = (in >> (n+2)) & mmask;
122b8e80941Smrg   uint8_t m2 = (in >> (2*n+4)) & mmask;
123b8e80941Smrg   uint8_t m3 = (in >> (3*n+5)) & mmask;
124b8e80941Smrg   uint8_t m4 = (in >> (4*n+7)) & mmask;
125b8e80941Smrg
126b8e80941Smrg   uint8_t C;
127b8e80941Smrg   uint8_t t4, t3, t2, t1, t0;
128b8e80941Smrg   if (CAT_BITS_3(T4, T3, T2) == 0x7) {
129b8e80941Smrg      C = CAT_BITS_5(T7, T6, T5, T1, T0);
130b8e80941Smrg      t4 = t3 = 2;
131b8e80941Smrg   } else {
132b8e80941Smrg      C = CAT_BITS_5(T4, T3, T2, T1, T0);
133b8e80941Smrg      if (CAT_BITS_2(T6, T5) == 0x3) {
134b8e80941Smrg         t4 = 2;
135b8e80941Smrg         t3 = T7;
136b8e80941Smrg      } else {
137b8e80941Smrg         t4 = T7;
138b8e80941Smrg         t3 = CAT_BITS_2(T6, T5);
139b8e80941Smrg      }
140b8e80941Smrg   }
141b8e80941Smrg
142b8e80941Smrg   if ((C & 0x3) == 0x3) {
143b8e80941Smrg      t2 = 2;
144b8e80941Smrg      t1 = (C >> 4) & 0x1;
145b8e80941Smrg      uint8_t C3 = (C >> 3) & 0x1;
146b8e80941Smrg      uint8_t C2 = (C >> 2) & 0x1;
147b8e80941Smrg      t0 = (C3 << 1) | (C2 & ~C3);
148b8e80941Smrg   } else if (((C >> 2) & 0x3) == 0x3) {
149b8e80941Smrg      t2 = 2;
150b8e80941Smrg      t1 = 2;
151b8e80941Smrg      t0 = C & 0x3;
152b8e80941Smrg   } else {
153b8e80941Smrg      t2 = (C >> 4) & 0x1;
154b8e80941Smrg      t1 = (C >> 2) & 0x3;
155b8e80941Smrg      uint8_t C1 = (C >> 1) & 0x1;
156b8e80941Smrg      uint8_t C0 = (C >> 0) & 0x1;
157b8e80941Smrg      t0 = (C1 << 1) | (C0 & ~C1);
158b8e80941Smrg   }
159b8e80941Smrg
160b8e80941Smrg   out[0] = (t0 << n) | m0;
161b8e80941Smrg   out[1] = (t1 << n) | m1;
162b8e80941Smrg   out[2] = (t2 << n) | m2;
163b8e80941Smrg   out[3] = (t3 << n) | m3;
164b8e80941Smrg   out[4] = (t4 << n) | m4;
165b8e80941Smrg}
166b8e80941Smrg
167b8e80941Smrg/**
168b8e80941Smrg * Unpack 3n+7 bits from 'in' into 3 output values
169b8e80941Smrg */
170b8e80941Smrgstatic void unpack_quint_block(int n, uint32_t in, uint8_t *out)
171b8e80941Smrg{
172b8e80941Smrg   assert(n <= 5); /* else output will overflow uint8_t */
173b8e80941Smrg
174b8e80941Smrg   uint8_t Q0 = (in >> (n)) & 0x1;
175b8e80941Smrg   uint8_t Q1 = (in >> (n+1)) & 0x1;
176b8e80941Smrg   uint8_t Q2 = (in >> (n+2)) & 0x1;
177b8e80941Smrg   uint8_t Q3 = (in >> (2*n+3)) & 0x1;
178b8e80941Smrg   uint8_t Q4 = (in >> (2*n+4)) & 0x1;
179b8e80941Smrg   uint8_t Q5 = (in >> (3*n+5)) & 0x1;
180b8e80941Smrg   uint8_t Q6 = (in >> (3*n+6)) & 0x1;
181b8e80941Smrg   uint8_t mmask = (1 << n) - 1;
182b8e80941Smrg   uint8_t m0 = (in >> (0)) & mmask;
183b8e80941Smrg   uint8_t m1 = (in >> (n+3)) & mmask;
184b8e80941Smrg   uint8_t m2 = (in >> (2*n+5)) & mmask;
185b8e80941Smrg
186b8e80941Smrg   uint8_t C;
187b8e80941Smrg   uint8_t q2, q1, q0;
188b8e80941Smrg   if (CAT_BITS_4(Q6, Q5, Q2, Q1) == 0x3) {
189b8e80941Smrg      q2 = CAT_BITS_3(Q0, Q4 & ~Q0, Q3 & ~Q0);
190b8e80941Smrg      q1 = 4;
191b8e80941Smrg      q0 = 4;
192b8e80941Smrg   } else {
193b8e80941Smrg      if (CAT_BITS_2(Q2, Q1) == 0x3) {
194b8e80941Smrg         q2 = 4;
195b8e80941Smrg         C = CAT_BITS_5(Q4, Q3, 0x1 & ~Q6, 0x1 & ~Q5, Q0);
196b8e80941Smrg      } else {
197b8e80941Smrg         q2 = CAT_BITS_2(Q6, Q5);
198b8e80941Smrg         C = CAT_BITS_5(Q4, Q3, Q2, Q1, Q0);
199b8e80941Smrg      }
200b8e80941Smrg      if ((C & 0x7) == 0x5) {
201b8e80941Smrg         q1 = 4;
202b8e80941Smrg         q0 = (C >> 3) & 0x3;
203b8e80941Smrg      } else {
204b8e80941Smrg         q1 = (C >> 3) & 0x3;
205b8e80941Smrg         q0 = C & 0x7;
206b8e80941Smrg      }
207b8e80941Smrg   }
208b8e80941Smrg   out[0] = (q0 << n) | m0;
209b8e80941Smrg   out[1] = (q1 << n) | m1;
210b8e80941Smrg   out[2] = (q2 << n) | m2;
211b8e80941Smrg}
212b8e80941Smrg
213b8e80941Smrg
214b8e80941Smrgstruct uint8x4_t
215b8e80941Smrg{
216b8e80941Smrg   uint8_t v[4];
217b8e80941Smrg
218b8e80941Smrg   uint8x4_t() { }
219b8e80941Smrg
220b8e80941Smrg   uint8x4_t(int a, int b, int c, int d)
221b8e80941Smrg   {
222b8e80941Smrg      assert(0 <= a && a <= 255);
223b8e80941Smrg      assert(0 <= b && b <= 255);
224b8e80941Smrg      assert(0 <= c && c <= 255);
225b8e80941Smrg      assert(0 <= d && d <= 255);
226b8e80941Smrg      v[0] = a;
227b8e80941Smrg      v[1] = b;
228b8e80941Smrg      v[2] = c;
229b8e80941Smrg      v[3] = d;
230b8e80941Smrg   }
231b8e80941Smrg
232b8e80941Smrg   static uint8x4_t clamped(int a, int b, int c, int d)
233b8e80941Smrg   {
234b8e80941Smrg      uint8x4_t r;
235b8e80941Smrg      r.v[0] = MAX2(0, MIN2(255, a));
236b8e80941Smrg      r.v[1] = MAX2(0, MIN2(255, b));
237b8e80941Smrg      r.v[2] = MAX2(0, MIN2(255, c));
238b8e80941Smrg      r.v[3] = MAX2(0, MIN2(255, d));
239b8e80941Smrg      return r;
240b8e80941Smrg   }
241b8e80941Smrg};
242b8e80941Smrg
243b8e80941Smrgstatic uint8x4_t blue_contract(int r, int g, int b, int a)
244b8e80941Smrg{
245b8e80941Smrg   return uint8x4_t((r+b) >> 1, (g+b) >> 1, b, a);
246b8e80941Smrg}
247b8e80941Smrg
248b8e80941Smrgstatic uint8x4_t blue_contract_clamped(int r, int g, int b, int a)
249b8e80941Smrg{
250b8e80941Smrg   return uint8x4_t::clamped((r+b) >> 1, (g+b) >> 1, b, a);
251b8e80941Smrg}
252b8e80941Smrg
253b8e80941Smrgstatic void bit_transfer_signed(int &a, int &b)
254b8e80941Smrg{
255b8e80941Smrg   b >>= 1;
256b8e80941Smrg   b |= a & 0x80;
257b8e80941Smrg   a >>= 1;
258b8e80941Smrg   a &= 0x3f;
259b8e80941Smrg   if (a & 0x20)
260b8e80941Smrg      a -= 0x40;
261b8e80941Smrg}
262b8e80941Smrg
263b8e80941Smrgstatic uint32_t hash52(uint32_t p)
264b8e80941Smrg{
265b8e80941Smrg   p ^= p >> 15;
266b8e80941Smrg   p -= p << 17;
267b8e80941Smrg   p += p << 7;
268b8e80941Smrg   p += p << 4;
269b8e80941Smrg   p ^= p >> 5;
270b8e80941Smrg   p += p << 16;
271b8e80941Smrg   p ^= p >> 7;
272b8e80941Smrg   p ^= p >> 3;
273b8e80941Smrg   p ^= p << 6;
274b8e80941Smrg   p ^= p >> 17;
275b8e80941Smrg   return p;
276b8e80941Smrg}
277b8e80941Smrg
278b8e80941Smrgstatic int select_partition(int seed, int x, int y, int z, int partitioncount,
279b8e80941Smrg                            int small_block)
280b8e80941Smrg{
281b8e80941Smrg   if (small_block) {
282b8e80941Smrg      x <<= 1;
283b8e80941Smrg      y <<= 1;
284b8e80941Smrg      z <<= 1;
285b8e80941Smrg   }
286b8e80941Smrg   seed += (partitioncount - 1) * 1024;
287b8e80941Smrg   uint32_t rnum = hash52(seed);
288b8e80941Smrg   uint8_t seed1 = rnum & 0xF;
289b8e80941Smrg   uint8_t seed2 = (rnum >> 4) & 0xF;
290b8e80941Smrg   uint8_t seed3 = (rnum >> 8) & 0xF;
291b8e80941Smrg   uint8_t seed4 = (rnum >> 12) & 0xF;
292b8e80941Smrg   uint8_t seed5 = (rnum >> 16) & 0xF;
293b8e80941Smrg   uint8_t seed6 = (rnum >> 20) & 0xF;
294b8e80941Smrg   uint8_t seed7 = (rnum >> 24) & 0xF;
295b8e80941Smrg   uint8_t seed8 = (rnum >> 28) & 0xF;
296b8e80941Smrg   uint8_t seed9 = (rnum >> 18) & 0xF;
297b8e80941Smrg   uint8_t seed10 = (rnum >> 22) & 0xF;
298b8e80941Smrg   uint8_t seed11 = (rnum >> 26) & 0xF;
299b8e80941Smrg   uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
300b8e80941Smrg
301b8e80941Smrg   seed1 *= seed1;
302b8e80941Smrg   seed2 *= seed2;
303b8e80941Smrg   seed3 *= seed3;
304b8e80941Smrg   seed4 *= seed4;
305b8e80941Smrg   seed5 *= seed5;
306b8e80941Smrg   seed6 *= seed6;
307b8e80941Smrg   seed7 *= seed7;
308b8e80941Smrg   seed8 *= seed8;
309b8e80941Smrg   seed9 *= seed9;
310b8e80941Smrg   seed10 *= seed10;
311b8e80941Smrg   seed11 *= seed11;
312b8e80941Smrg   seed12 *= seed12;
313b8e80941Smrg
314b8e80941Smrg   int sh1, sh2, sh3;
315b8e80941Smrg   if (seed & 1) {
316b8e80941Smrg      sh1 = (seed & 2 ? 4 : 5);
317b8e80941Smrg      sh2 = (partitioncount == 3 ? 6 : 5);
318b8e80941Smrg   } else {
319b8e80941Smrg      sh1 = (partitioncount == 3 ? 6 : 5);
320b8e80941Smrg      sh2 = (seed & 2 ? 4 : 5);
321b8e80941Smrg   }
322b8e80941Smrg   sh3 = (seed & 0x10) ? sh1 : sh2;
323b8e80941Smrg
324b8e80941Smrg   seed1 >>= sh1;
325b8e80941Smrg   seed2 >>= sh2;
326b8e80941Smrg   seed3 >>= sh1;
327b8e80941Smrg   seed4 >>= sh2;
328b8e80941Smrg   seed5 >>= sh1;
329b8e80941Smrg   seed6 >>= sh2;
330b8e80941Smrg   seed7 >>= sh1;
331b8e80941Smrg   seed8 >>= sh2;
332b8e80941Smrg   seed9 >>= sh3;
333b8e80941Smrg   seed10 >>= sh3;
334b8e80941Smrg   seed11 >>= sh3;
335b8e80941Smrg   seed12 >>= sh3;
336b8e80941Smrg
337b8e80941Smrg   int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
338b8e80941Smrg   int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
339b8e80941Smrg   int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
340b8e80941Smrg   int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
341b8e80941Smrg
342b8e80941Smrg   a &= 0x3F;
343b8e80941Smrg   b &= 0x3F;
344b8e80941Smrg   c &= 0x3F;
345b8e80941Smrg   d &= 0x3F;
346b8e80941Smrg
347b8e80941Smrg   if (partitioncount < 4)
348b8e80941Smrg      d = 0;
349b8e80941Smrg   if (partitioncount < 3)
350b8e80941Smrg      c = 0;
351b8e80941Smrg
352b8e80941Smrg   if (a >= b && a >= c && a >= d)
353b8e80941Smrg      return 0;
354b8e80941Smrg   else if (b >= c && b >= d)
355b8e80941Smrg      return 1;
356b8e80941Smrg   else if (c >= d)
357b8e80941Smrg      return 2;
358b8e80941Smrg   else
359b8e80941Smrg      return 3;
360b8e80941Smrg}
361b8e80941Smrg
362b8e80941Smrg
363b8e80941Smrgstruct InputBitVector
364b8e80941Smrg{
365b8e80941Smrg   uint32_t data[4];
366b8e80941Smrg
367b8e80941Smrg   void printf_bits(int offset, int count, const char *fmt = "", ...)
368b8e80941Smrg   {
369b8e80941Smrg      char out[129];
370b8e80941Smrg      memset(out, '.', 128);
371b8e80941Smrg      out[128] = '\0';
372b8e80941Smrg      int idx = offset;
373b8e80941Smrg      for (int i = 0; i < count; ++i) {
374b8e80941Smrg         out[127 - idx] = ((data[idx >> 5] >> (idx & 31)) & 1) ? '1' : '0';
375b8e80941Smrg         ++idx;
376b8e80941Smrg      }
377b8e80941Smrg      printf("%s ", out);
378b8e80941Smrg      va_list ap;
379b8e80941Smrg      va_start(ap, fmt);
380b8e80941Smrg      vprintf(fmt, ap);
381b8e80941Smrg      va_end(ap);
382b8e80941Smrg      printf("\n");
383b8e80941Smrg   }
384b8e80941Smrg
385b8e80941Smrg   uint32_t get_bits(int offset, int count)
386b8e80941Smrg   {
387b8e80941Smrg      assert(count >= 0 && count < 32);
388b8e80941Smrg
389b8e80941Smrg      uint32_t out = 0;
390b8e80941Smrg      if (offset < 32)
391b8e80941Smrg         out |= data[0] >> offset;
392b8e80941Smrg
393b8e80941Smrg      if (0 < offset && offset <= 32)
394b8e80941Smrg         out |= data[1] << (32 - offset);
395b8e80941Smrg      if (32 < offset && offset < 64)
396b8e80941Smrg         out |= data[1] >> (offset - 32);
397b8e80941Smrg
398b8e80941Smrg      if (32 < offset && offset <= 64)
399b8e80941Smrg         out |= data[2] << (64 - offset);
400b8e80941Smrg      if (64 < offset && offset < 96)
401b8e80941Smrg         out |= data[2] >> (offset - 64);
402b8e80941Smrg
403b8e80941Smrg      if (64 < offset && offset <= 96)
404b8e80941Smrg         out |= data[3] << (96 - offset);
405b8e80941Smrg      if (96 < offset && offset < 128)
406b8e80941Smrg         out |= data[3] >> (offset - 96);
407b8e80941Smrg
408b8e80941Smrg      out &= (1 << count) - 1;
409b8e80941Smrg      return out;
410b8e80941Smrg   }
411b8e80941Smrg
412b8e80941Smrg   uint64_t get_bits64(int offset, int count)
413b8e80941Smrg   {
414b8e80941Smrg      assert(count >= 0 && count < 64);
415b8e80941Smrg
416b8e80941Smrg      uint64_t out = 0;
417b8e80941Smrg      if (offset < 32)
418b8e80941Smrg         out |= data[0] >> offset;
419b8e80941Smrg
420b8e80941Smrg      if (offset <= 32)
421b8e80941Smrg         out |= (uint64_t)data[1] << (32 - offset);
422b8e80941Smrg      if (32 < offset && offset < 64)
423b8e80941Smrg         out |= data[1] >> (offset - 32);
424b8e80941Smrg
425b8e80941Smrg      if (0 < offset && offset <= 64)
426b8e80941Smrg         out |= (uint64_t)data[2] << (64 - offset);
427b8e80941Smrg      if (64 < offset && offset < 96)
428b8e80941Smrg         out |= data[2] >> (offset - 64);
429b8e80941Smrg
430b8e80941Smrg      if (32 < offset && offset <= 96)
431b8e80941Smrg         out |= (uint64_t)data[3] << (96 - offset);
432b8e80941Smrg      if (96 < offset && offset < 128)
433b8e80941Smrg         out |= data[3] >> (offset - 96);
434b8e80941Smrg
435b8e80941Smrg      out &= ((uint64_t)1 << count) - 1;
436b8e80941Smrg      return out;
437b8e80941Smrg   }
438b8e80941Smrg
439b8e80941Smrg   uint32_t get_bits_rev(int offset, int count)
440b8e80941Smrg   {
441b8e80941Smrg      assert(offset >= count);
442b8e80941Smrg      uint32_t tmp = get_bits(offset - count, count);
443b8e80941Smrg      uint32_t out = 0;
444b8e80941Smrg      for (int i = 0; i < count; ++i)
445b8e80941Smrg         out |= ((tmp >> i) & 1) << (count - 1 - i);
446b8e80941Smrg      return out;
447b8e80941Smrg   }
448b8e80941Smrg};
449b8e80941Smrg
450b8e80941Smrgstruct OutputBitVector
451b8e80941Smrg{
452b8e80941Smrg   uint32_t data[4];
453b8e80941Smrg   int offset;
454b8e80941Smrg
455b8e80941Smrg   OutputBitVector()
456b8e80941Smrg      : offset(0)
457b8e80941Smrg   {
458b8e80941Smrg      memset(data, 0, sizeof(data));
459b8e80941Smrg   }
460b8e80941Smrg
461b8e80941Smrg   void append(uint32_t value, int size)
462b8e80941Smrg   {
463b8e80941Smrg      if (VERBOSE_WRITE)
464b8e80941Smrg         printf("append offset=%d size=%d values=0x%x\n", offset, size, value);
465b8e80941Smrg
466b8e80941Smrg      assert(offset + size <= 128);
467b8e80941Smrg
468b8e80941Smrg      assert(size <= 32);
469b8e80941Smrg      if (size < 32)
470b8e80941Smrg         assert((value >> size) == 0);
471b8e80941Smrg
472b8e80941Smrg      while (size) {
473b8e80941Smrg         int c = MIN2(size, 32 - (offset & 31));
474b8e80941Smrg         data[offset >> 5] |= (value << (offset & 31));
475b8e80941Smrg         offset += c;
476b8e80941Smrg         size -= c;
477b8e80941Smrg         value >>= c;
478b8e80941Smrg      }
479b8e80941Smrg   }
480b8e80941Smrg
481b8e80941Smrg   void append64(uint64_t value, int size)
482b8e80941Smrg   {
483b8e80941Smrg      if (VERBOSE_WRITE)
484b8e80941Smrg         printf("append offset=%d size=%d values=0x%llx\n", offset, size, (unsigned long long)value);
485b8e80941Smrg
486b8e80941Smrg      assert(offset + size <= 128);
487b8e80941Smrg
488b8e80941Smrg      assert(size <= 64);
489b8e80941Smrg      if (size < 64)
490b8e80941Smrg         assert((value >> size) == 0);
491b8e80941Smrg
492b8e80941Smrg      while (size) {
493b8e80941Smrg         int c = MIN2(size, 32 - (offset & 31));
494b8e80941Smrg         data[offset >> 5] |= (value << (offset & 31));
495b8e80941Smrg         offset += c;
496b8e80941Smrg         size -= c;
497b8e80941Smrg         value >>= c;
498b8e80941Smrg      }
499b8e80941Smrg   }
500b8e80941Smrg
501b8e80941Smrg   void append(OutputBitVector &v, int size)
502b8e80941Smrg   {
503b8e80941Smrg      if (VERBOSE_WRITE)
504b8e80941Smrg         printf("append vector offset=%d size=%d\n", offset, size);
505b8e80941Smrg
506b8e80941Smrg      assert(offset + size <= 128);
507b8e80941Smrg      int i = 0;
508b8e80941Smrg      while (size >= 32) {
509b8e80941Smrg         append(v.data[i++], 32);
510b8e80941Smrg         size -= 32;
511b8e80941Smrg      }
512b8e80941Smrg      if (size > 0)
513b8e80941Smrg         append(v.data[i] & ((1 << size) - 1), size);
514b8e80941Smrg   }
515b8e80941Smrg
516b8e80941Smrg   void append_end(OutputBitVector &v, int size)
517b8e80941Smrg   {
518b8e80941Smrg      for (int i = 0; i < size; ++i)
519b8e80941Smrg         data[(127 - i) >> 5] |= ((v.data[i >> 5] >> (i & 31)) & 1) << ((127 - i) & 31);
520b8e80941Smrg   }
521b8e80941Smrg
522b8e80941Smrg   /* Insert the given number of '1' bits. (We could use 0s instead, but 1s are
523b8e80941Smrg    * more likely to flush out bugs where we accidentally read undefined bits.)
524b8e80941Smrg    */
525b8e80941Smrg   void skip(int size)
526b8e80941Smrg   {
527b8e80941Smrg      if (VERBOSE_WRITE)
528b8e80941Smrg         printf("skip offset=%d size=%d\n", offset, size);
529b8e80941Smrg
530b8e80941Smrg      assert(offset + size <= 128);
531b8e80941Smrg      while (size >= 32) {
532b8e80941Smrg         append(0xffffffff, 32);
533b8e80941Smrg         size -= 32;
534b8e80941Smrg      }
535b8e80941Smrg      if (size > 0)
536b8e80941Smrg         append(0xffffffff >> (32 - size), size);
537b8e80941Smrg   }
538b8e80941Smrg};
539b8e80941Smrg
540b8e80941Smrg
541b8e80941Smrgclass Decoder
542b8e80941Smrg{
543b8e80941Smrgpublic:
544b8e80941Smrg   Decoder(int block_w, int block_h, int block_d, bool srgb, bool output_unorm8)
545b8e80941Smrg      : block_w(block_w), block_h(block_h), block_d(block_d), srgb(srgb),
546b8e80941Smrg        output_unorm8(output_unorm8) {}
547b8e80941Smrg
548b8e80941Smrg   decode_error::type decode(const uint8_t *in, uint16_t *output) const;
549b8e80941Smrg
550b8e80941Smrg   int block_w, block_h, block_d;
551b8e80941Smrg   bool srgb, output_unorm8;
552b8e80941Smrg};
553b8e80941Smrg
554b8e80941Smrgstruct Block
555b8e80941Smrg{
556b8e80941Smrg   bool is_error;
557b8e80941Smrg   bool bogus_colour_endpoints;
558b8e80941Smrg   bool bogus_weights;
559b8e80941Smrg
560b8e80941Smrg   int high_prec;
561b8e80941Smrg   int dual_plane;
562b8e80941Smrg   int colour_component_selector;
563b8e80941Smrg   int wt_range;
564b8e80941Smrg   int wt_w, wt_h, wt_d;
565b8e80941Smrg   int num_parts;
566b8e80941Smrg   int partition_index;
567b8e80941Smrg
568b8e80941Smrg   bool is_void_extent;
569b8e80941Smrg   int void_extent_d;
570b8e80941Smrg   int void_extent_min_s;
571b8e80941Smrg   int void_extent_max_s;
572b8e80941Smrg   int void_extent_min_t;
573b8e80941Smrg   int void_extent_max_t;
574b8e80941Smrg   uint16_t void_extent_colour_r;
575b8e80941Smrg   uint16_t void_extent_colour_g;
576b8e80941Smrg   uint16_t void_extent_colour_b;
577b8e80941Smrg   uint16_t void_extent_colour_a;
578b8e80941Smrg
579b8e80941Smrg   bool is_multi_cem;
580b8e80941Smrg   int num_extra_cem_bits;
581b8e80941Smrg   int colour_endpoint_data_offset;
582b8e80941Smrg   int extra_cem_bits;
583b8e80941Smrg   int cem_base_class;
584b8e80941Smrg   int cems[4];
585b8e80941Smrg
586b8e80941Smrg   int num_cem_values;
587b8e80941Smrg
588b8e80941Smrg   /* Calculated by unpack_weights(): */
589b8e80941Smrg   uint8_t weights_quant[64 + 4]; /* max 64 values, plus padding for overflows in trit parsing */
590b8e80941Smrg
591b8e80941Smrg   /* Calculated by unquantise_weights(): */
592b8e80941Smrg   uint8_t weights[64 + 18]; /* max 64 values, plus padding for the infill interpolation */
593b8e80941Smrg
594b8e80941Smrg   /* Calculated by unpack_colour_endpoints(): */
595b8e80941Smrg   uint8_t colour_endpoints_quant[18 + 4]; /* max 18 values, plus padding for overflows in trit parsing */
596b8e80941Smrg
597b8e80941Smrg   /* Calculated by unquantise_colour_endpoints(): */
598b8e80941Smrg   uint8_t colour_endpoints[18];
599b8e80941Smrg
600b8e80941Smrg   /* Calculated by calculate_from_weights(): */
601b8e80941Smrg   int wt_trits;
602b8e80941Smrg   int wt_quints;
603b8e80941Smrg   int wt_bits;
604b8e80941Smrg   int wt_max;
605b8e80941Smrg   int num_weights;
606b8e80941Smrg   int weight_bits;
607b8e80941Smrg
608b8e80941Smrg   /* Calculated by calculate_remaining_bits(): */
609b8e80941Smrg   int remaining_bits;
610b8e80941Smrg
611b8e80941Smrg   /* Calculated by calculate_colour_endpoints_size(): */
612b8e80941Smrg   int colour_endpoint_bits;
613b8e80941Smrg   int ce_max;
614b8e80941Smrg   int ce_trits;
615b8e80941Smrg   int ce_quints;
616b8e80941Smrg   int ce_bits;
617b8e80941Smrg
618b8e80941Smrg   /* Calculated by compute_infill_weights(); */
619b8e80941Smrg   uint8_t infill_weights[2][216]; /* large enough for 6x6x6 */
620b8e80941Smrg
621b8e80941Smrg   /* Calculated by decode_colour_endpoints(); */
622b8e80941Smrg   uint8x4_t endpoints_decoded[2][4];
623b8e80941Smrg
624b8e80941Smrg   void calculate_from_weights();
625b8e80941Smrg   void calculate_remaining_bits();
626b8e80941Smrg   decode_error::type calculate_colour_endpoints_size();
627b8e80941Smrg
628b8e80941Smrg   void unquantise_weights();
629b8e80941Smrg   void unquantise_colour_endpoints();
630b8e80941Smrg
631b8e80941Smrg   decode_error::type decode(const Decoder &decoder, InputBitVector in);
632b8e80941Smrg
633b8e80941Smrg   decode_error::type decode_block_mode(InputBitVector in);
634b8e80941Smrg   decode_error::type decode_void_extent(InputBitVector in);
635b8e80941Smrg   void decode_cem(InputBitVector in);
636b8e80941Smrg   void unpack_colour_endpoints(InputBitVector in);
637b8e80941Smrg   void decode_colour_endpoints();
638b8e80941Smrg   void unpack_weights(InputBitVector in);
639b8e80941Smrg   void compute_infill_weights(int block_w, int block_h, int block_d);
640b8e80941Smrg
641b8e80941Smrg   void write_decoded(const Decoder &decoder, uint16_t *output);
642b8e80941Smrg};
643b8e80941Smrg
644b8e80941Smrg
645b8e80941Smrgdecode_error::type Decoder::decode(const uint8_t *in, uint16_t *output) const
646b8e80941Smrg{
647b8e80941Smrg   Block blk;
648b8e80941Smrg   InputBitVector in_vec;
649b8e80941Smrg   memcpy(&in_vec.data, in, 16);
650b8e80941Smrg   decode_error::type err = blk.decode(*this, in_vec);
651b8e80941Smrg   if (err == decode_error::ok) {
652b8e80941Smrg      blk.write_decoded(*this, output);
653b8e80941Smrg   } else {
654b8e80941Smrg      /* Fill output with the error colour */
655b8e80941Smrg      for (int i = 0; i < block_w * block_h * block_d; ++i) {
656b8e80941Smrg         if (output_unorm8) {
657b8e80941Smrg            output[i*4+0] = 0xff;
658b8e80941Smrg            output[i*4+1] = 0;
659b8e80941Smrg            output[i*4+2] = 0xff;
660b8e80941Smrg            output[i*4+3] = 0xff;
661b8e80941Smrg         } else {
662b8e80941Smrg            assert(!srgb); /* srgb must use unorm8 */
663b8e80941Smrg
664b8e80941Smrg            output[i*4+0] = FP16_ONE;
665b8e80941Smrg            output[i*4+1] = FP16_ZERO;
666b8e80941Smrg            output[i*4+2] = FP16_ONE;
667b8e80941Smrg            output[i*4+3] = FP16_ONE;
668b8e80941Smrg         }
669b8e80941Smrg      }
670b8e80941Smrg   }
671b8e80941Smrg   return err;
672b8e80941Smrg}
673b8e80941Smrg
674b8e80941Smrg
675b8e80941Smrgdecode_error::type Block::decode_void_extent(InputBitVector block)
676b8e80941Smrg{
677b8e80941Smrg   /* TODO: 3D */
678b8e80941Smrg
679b8e80941Smrg   is_void_extent = true;
680b8e80941Smrg   void_extent_d = block.get_bits(9, 1);
681b8e80941Smrg   void_extent_min_s = block.get_bits(12, 13);
682b8e80941Smrg   void_extent_max_s = block.get_bits(25, 13);
683b8e80941Smrg   void_extent_min_t = block.get_bits(38, 13);
684b8e80941Smrg   void_extent_max_t = block.get_bits(51, 13);
685b8e80941Smrg   void_extent_colour_r = block.get_bits(64, 16);
686b8e80941Smrg   void_extent_colour_g = block.get_bits(80, 16);
687b8e80941Smrg   void_extent_colour_b = block.get_bits(96, 16);
688b8e80941Smrg   void_extent_colour_a = block.get_bits(112, 16);
689b8e80941Smrg
690b8e80941Smrg   /* TODO: maybe we should do something useful with the extent coordinates? */
691b8e80941Smrg
692b8e80941Smrg   if (void_extent_d) {
693b8e80941Smrg      return decode_error::unsupported_hdr_void_extent;
694b8e80941Smrg   }
695b8e80941Smrg
696b8e80941Smrg   if (void_extent_min_s == 0x1fff && void_extent_max_s == 0x1fff
697b8e80941Smrg       && void_extent_min_t == 0x1fff && void_extent_max_t == 0x1fff) {
698b8e80941Smrg
699b8e80941Smrg      /* No extents */
700b8e80941Smrg
701b8e80941Smrg   } else {
702b8e80941Smrg
703b8e80941Smrg      /* Check for illegal encoding */
704b8e80941Smrg      if (void_extent_min_s >= void_extent_max_s || void_extent_min_t >= void_extent_max_t) {
705b8e80941Smrg         return decode_error::invalid_range_in_void_extent;
706b8e80941Smrg      }
707b8e80941Smrg   }
708b8e80941Smrg
709b8e80941Smrg   return decode_error::ok;
710b8e80941Smrg}
711b8e80941Smrg
712b8e80941Smrgdecode_error::type Block::decode_block_mode(InputBitVector in)
713b8e80941Smrg{
714b8e80941Smrg   dual_plane = in.get_bits(10, 1);
715b8e80941Smrg   high_prec = in.get_bits(9, 1);
716b8e80941Smrg
717b8e80941Smrg   if (in.get_bits(0, 2) != 0x0) {
718b8e80941Smrg      wt_range = (in.get_bits(0, 2) << 1) | in.get_bits(4, 1);
719b8e80941Smrg      int a = in.get_bits(5, 2);
720b8e80941Smrg      int b = in.get_bits(7, 2);
721b8e80941Smrg      switch (in.get_bits(2, 2)) {
722b8e80941Smrg      case 0x0:
723b8e80941Smrg         if (VERBOSE_DECODE)
724b8e80941Smrg            in.printf_bits(0, 11, "DHBBAAR00RR");
725b8e80941Smrg         wt_w = b + 4;
726b8e80941Smrg         wt_h = a + 2;
727b8e80941Smrg         break;
728b8e80941Smrg      case 0x1:
729b8e80941Smrg         if (VERBOSE_DECODE)
730b8e80941Smrg            in.printf_bits(0, 11, "DHBBAAR01RR");
731b8e80941Smrg         wt_w = b + 8;
732b8e80941Smrg         wt_h = a + 2;
733b8e80941Smrg         break;
734b8e80941Smrg      case 0x2:
735b8e80941Smrg         if (VERBOSE_DECODE)
736b8e80941Smrg            in.printf_bits(0, 11, "DHBBAAR10RR");
737b8e80941Smrg         wt_w = a + 2;
738b8e80941Smrg         wt_h = b + 8;
739b8e80941Smrg         break;
740b8e80941Smrg      case 0x3:
741b8e80941Smrg         if ((b & 0x2) == 0) {
742b8e80941Smrg            if (VERBOSE_DECODE)
743b8e80941Smrg               in.printf_bits(0, 11, "DH0BAAR11RR");
744b8e80941Smrg            wt_w = a + 2;
745b8e80941Smrg            wt_h = b + 6;
746b8e80941Smrg         } else {
747b8e80941Smrg            if (VERBOSE_DECODE)
748b8e80941Smrg               in.printf_bits(0, 11, "DH1BAAR11RR");
749b8e80941Smrg            wt_w = (b & 0x1) + 2;
750b8e80941Smrg            wt_h = a + 2;
751b8e80941Smrg         }
752b8e80941Smrg         break;
753b8e80941Smrg      }
754b8e80941Smrg   } else {
755b8e80941Smrg      if (in.get_bits(6, 3) == 0x7) {
756b8e80941Smrg         if (in.get_bits(0, 9) == 0x1fc) {
757b8e80941Smrg            if (VERBOSE_DECODE)
758b8e80941Smrg               in.printf_bits(0, 11, "xx111111100 (void extent)");
759b8e80941Smrg            return decode_void_extent(in);
760b8e80941Smrg         } else {
761b8e80941Smrg            if (VERBOSE_DECODE)
762b8e80941Smrg               in.printf_bits(0, 11, "xx111xxxx00");
763b8e80941Smrg            return decode_error::reserved_block_mode_1;
764b8e80941Smrg         }
765b8e80941Smrg      }
766b8e80941Smrg      if (in.get_bits(0, 4) == 0x0) {
767b8e80941Smrg         if (VERBOSE_DECODE)
768b8e80941Smrg            in.printf_bits(0, 11, "xxxxxxx0000");
769b8e80941Smrg         return decode_error::reserved_block_mode_2;
770b8e80941Smrg      }
771b8e80941Smrg
772b8e80941Smrg      wt_range = in.get_bits(1, 3) | in.get_bits(4, 1);
773b8e80941Smrg      int a = in.get_bits(5, 2);
774b8e80941Smrg      int b;
775b8e80941Smrg
776b8e80941Smrg      switch (in.get_bits(7, 2)) {
777b8e80941Smrg      case 0x0:
778b8e80941Smrg         if (VERBOSE_DECODE)
779b8e80941Smrg            in.printf_bits(0, 11, "DH00AARRR00");
780b8e80941Smrg         wt_w = 12;
781b8e80941Smrg         wt_h = a + 2;
782b8e80941Smrg         break;
783b8e80941Smrg      case 0x1:
784b8e80941Smrg         if (VERBOSE_DECODE)
785b8e80941Smrg            in.printf_bits(0, 11, "DH01AARRR00");
786b8e80941Smrg         wt_w = a + 2;
787b8e80941Smrg         wt_h = 12;
788b8e80941Smrg         break;
789b8e80941Smrg      case 0x3:
790b8e80941Smrg         if (in.get_bits(5, 1) == 0) {
791b8e80941Smrg            if (VERBOSE_DECODE)
792b8e80941Smrg               in.printf_bits(0, 11, "DH1100RRR00");
793b8e80941Smrg            wt_w = 6;
794b8e80941Smrg            wt_h = 10;
795b8e80941Smrg         } else {
796b8e80941Smrg            if (VERBOSE_DECODE)
797b8e80941Smrg               in.printf_bits(0, 11, "DH1101RRR00");
798b8e80941Smrg            wt_w = 10;
799b8e80941Smrg            wt_h = 6;
800b8e80941Smrg         }
801b8e80941Smrg         break;
802b8e80941Smrg      case 0x2:
803b8e80941Smrg         if (VERBOSE_DECODE)
804b8e80941Smrg            in.printf_bits(0, 11, "BB10AARRR00");
805b8e80941Smrg         b = in.get_bits(9, 2);
806b8e80941Smrg         wt_w = a + 6;
807b8e80941Smrg         wt_h = b + 6;
808b8e80941Smrg         dual_plane = 0;
809b8e80941Smrg         high_prec = 0;
810b8e80941Smrg         break;
811b8e80941Smrg      }
812b8e80941Smrg   }
813b8e80941Smrg   return decode_error::ok;
814b8e80941Smrg}
815b8e80941Smrg
816b8e80941Smrgvoid Block::decode_cem(InputBitVector in)
817b8e80941Smrg{
818b8e80941Smrg   cems[0] = cems[1] = cems[2] = cems[3] = -1;
819b8e80941Smrg
820b8e80941Smrg   num_extra_cem_bits = 0;
821b8e80941Smrg   extra_cem_bits = 0;
822b8e80941Smrg
823b8e80941Smrg   if (num_parts > 1) {
824b8e80941Smrg
825b8e80941Smrg      partition_index = in.get_bits(13, 10);
826b8e80941Smrg      if (VERBOSE_DECODE)
827b8e80941Smrg         in.printf_bits(13, 10, "partition ID (%d)", partition_index);
828b8e80941Smrg
829b8e80941Smrg      uint32_t cem = in.get_bits(23, 6);
830b8e80941Smrg
831b8e80941Smrg      if ((cem & 0x3) == 0x0) {
832b8e80941Smrg         cem >>= 2;
833b8e80941Smrg         cem_base_class = cem >> 2;
834b8e80941Smrg         is_multi_cem = false;
835b8e80941Smrg
836b8e80941Smrg         for (int i = 0; i < num_parts; ++i)
837b8e80941Smrg            cems[i] = cem;
838b8e80941Smrg
839b8e80941Smrg         if (VERBOSE_DECODE)
840b8e80941Smrg            in.printf_bits(23, 6, "CEM (single, %d)", cem);
841b8e80941Smrg      } else {
842b8e80941Smrg
843b8e80941Smrg         cem_base_class = (cem & 0x3) - 1;
844b8e80941Smrg         is_multi_cem = true;
845b8e80941Smrg
846b8e80941Smrg         if (VERBOSE_DECODE)
847b8e80941Smrg            in.printf_bits(23, 6, "CEM (multi, base class %d)", cem_base_class);
848b8e80941Smrg
849b8e80941Smrg         int offset = 128 - weight_bits;
850b8e80941Smrg
851b8e80941Smrg         if (num_parts == 2) {
852b8e80941Smrg            if (VERBOSE_DECODE) {
853b8e80941Smrg               in.printf_bits(25, 4, "M0M0 C1 C0");
854b8e80941Smrg               in.printf_bits(offset - 2, 2, "M1M1");
855b8e80941Smrg            }
856b8e80941Smrg
857b8e80941Smrg            uint32_t c0 = in.get_bits(25, 1);
858b8e80941Smrg            uint32_t c1 = in.get_bits(26, 1);
859b8e80941Smrg
860b8e80941Smrg            extra_cem_bits = c0 + c1;
861b8e80941Smrg
862b8e80941Smrg            num_extra_cem_bits = 2;
863b8e80941Smrg
864b8e80941Smrg            uint32_t m0 = in.get_bits(27, 2);
865b8e80941Smrg            uint32_t m1 = in.get_bits(offset - 2, 2);
866b8e80941Smrg
867b8e80941Smrg            cems[0] = ((cem_base_class + c0) << 2) | m0;
868b8e80941Smrg            cems[1] = ((cem_base_class + c1) << 2) | m1;
869b8e80941Smrg
870b8e80941Smrg         } else if (num_parts == 3) {
871b8e80941Smrg            if (VERBOSE_DECODE) {
872b8e80941Smrg               in.printf_bits(25, 4, "M0 C2 C1 C0");
873b8e80941Smrg               in.printf_bits(offset - 5, 5, "M2M2 M1M1 M0");
874b8e80941Smrg            }
875b8e80941Smrg
876b8e80941Smrg            uint32_t c0 = in.get_bits(25, 1);
877b8e80941Smrg            uint32_t c1 = in.get_bits(26, 1);
878b8e80941Smrg            uint32_t c2 = in.get_bits(27, 1);
879b8e80941Smrg
880b8e80941Smrg            extra_cem_bits = c0 + c1 + c2;
881b8e80941Smrg
882b8e80941Smrg            num_extra_cem_bits = 5;
883b8e80941Smrg
884b8e80941Smrg            uint32_t m0 = in.get_bits(28, 1) | (in.get_bits(128 - weight_bits - 5, 1) << 1);
885b8e80941Smrg            uint32_t m1 = in.get_bits(offset - 4, 2);
886b8e80941Smrg            uint32_t m2 = in.get_bits(offset - 2, 2);
887b8e80941Smrg
888b8e80941Smrg            cems[0] = ((cem_base_class + c0) << 2) | m0;
889b8e80941Smrg            cems[1] = ((cem_base_class + c1) << 2) | m1;
890b8e80941Smrg            cems[2] = ((cem_base_class + c2) << 2) | m2;
891b8e80941Smrg
892b8e80941Smrg         } else if (num_parts == 4) {
893b8e80941Smrg            if (VERBOSE_DECODE) {
894b8e80941Smrg               in.printf_bits(25, 4, "C3 C2 C1 C0");
895b8e80941Smrg               in.printf_bits(offset - 8, 8, "M3M3 M2M2 M1M1 M0M0");
896b8e80941Smrg            }
897b8e80941Smrg
898b8e80941Smrg            uint32_t c0 = in.get_bits(25, 1);
899b8e80941Smrg            uint32_t c1 = in.get_bits(26, 1);
900b8e80941Smrg            uint32_t c2 = in.get_bits(27, 1);
901b8e80941Smrg            uint32_t c3 = in.get_bits(28, 1);
902b8e80941Smrg
903b8e80941Smrg            extra_cem_bits = c0 + c1 + c2 + c3;
904b8e80941Smrg
905b8e80941Smrg            num_extra_cem_bits = 8;
906b8e80941Smrg
907b8e80941Smrg            uint32_t m0 = in.get_bits(offset - 8, 2);
908b8e80941Smrg            uint32_t m1 = in.get_bits(offset - 6, 2);
909b8e80941Smrg            uint32_t m2 = in.get_bits(offset - 4, 2);
910b8e80941Smrg            uint32_t m3 = in.get_bits(offset - 2, 2);
911b8e80941Smrg
912b8e80941Smrg            cems[0] = ((cem_base_class + c0) << 2) | m0;
913b8e80941Smrg            cems[1] = ((cem_base_class + c1) << 2) | m1;
914b8e80941Smrg            cems[2] = ((cem_base_class + c2) << 2) | m2;
915b8e80941Smrg            cems[3] = ((cem_base_class + c3) << 2) | m3;
916b8e80941Smrg         } else {
917b8e80941Smrg            unreachable("");
918b8e80941Smrg         }
919b8e80941Smrg      }
920b8e80941Smrg
921b8e80941Smrg      colour_endpoint_data_offset = 29;
922b8e80941Smrg
923b8e80941Smrg   } else {
924b8e80941Smrg      uint32_t cem = in.get_bits(13, 4);
925b8e80941Smrg
926b8e80941Smrg      cem_base_class = cem >> 2;
927b8e80941Smrg      is_multi_cem = false;
928b8e80941Smrg
929b8e80941Smrg      cems[0] = cem;
930b8e80941Smrg
931b8e80941Smrg      partition_index = -1;
932b8e80941Smrg
933b8e80941Smrg      if (VERBOSE_DECODE)
934b8e80941Smrg         in.printf_bits(13, 4, "CEM = %d (class %d)", cem, cem_base_class);
935b8e80941Smrg
936b8e80941Smrg      colour_endpoint_data_offset = 17;
937b8e80941Smrg   }
938b8e80941Smrg}
939b8e80941Smrg
940b8e80941Smrgvoid Block::unpack_colour_endpoints(InputBitVector in)
941b8e80941Smrg{
942b8e80941Smrg   if (ce_trits) {
943b8e80941Smrg      int offset = colour_endpoint_data_offset;
944b8e80941Smrg      int bits_left = colour_endpoint_bits;
945b8e80941Smrg      for (int i = 0; i < num_cem_values; i += 5) {
946b8e80941Smrg         int bits_to_read = MIN2(bits_left, 8 + ce_bits * 5);
947b8e80941Smrg         /* If ce_trits then ce_bits <= 6, so bits_to_read <= 38 and we have to use uint64_t */
948b8e80941Smrg         uint64_t raw = in.get_bits64(offset, bits_to_read);
949b8e80941Smrg         unpack_trit_block(ce_bits, raw, &colour_endpoints_quant[i]);
950b8e80941Smrg
951b8e80941Smrg         if (VERBOSE_DECODE)
952b8e80941Smrg            in.printf_bits(offset, bits_to_read,
953b8e80941Smrg                           "trits [%d,%d,%d,%d,%d]",
954b8e80941Smrg                           colour_endpoints_quant[i+0], colour_endpoints_quant[i+1],
955b8e80941Smrg                  colour_endpoints_quant[i+2], colour_endpoints_quant[i+3],
956b8e80941Smrg                  colour_endpoints_quant[i+4]);
957b8e80941Smrg
958b8e80941Smrg         offset += 8 + ce_bits * 5;
959b8e80941Smrg         bits_left -= 8 + ce_bits * 5;
960b8e80941Smrg      }
961b8e80941Smrg   } else if (ce_quints) {
962b8e80941Smrg      int offset = colour_endpoint_data_offset;
963b8e80941Smrg      int bits_left = colour_endpoint_bits;
964b8e80941Smrg      for (int i = 0; i < num_cem_values; i += 3) {
965b8e80941Smrg         int bits_to_read = MIN2(bits_left, 7 + ce_bits * 3);
966b8e80941Smrg         /* If ce_quints then ce_bits <= 5, so bits_to_read <= 22 and we can use uint32_t */
967b8e80941Smrg         uint32_t raw = in.get_bits(offset, bits_to_read);
968b8e80941Smrg         unpack_quint_block(ce_bits, raw, &colour_endpoints_quant[i]);
969b8e80941Smrg
970b8e80941Smrg         if (VERBOSE_DECODE)
971b8e80941Smrg            in.printf_bits(offset, bits_to_read,
972b8e80941Smrg                           "quints [%d,%d,%d]",
973b8e80941Smrg                           colour_endpoints_quant[i], colour_endpoints_quant[i+1], colour_endpoints_quant[i+2]);
974b8e80941Smrg
975b8e80941Smrg         offset += 7 + ce_bits * 3;
976b8e80941Smrg         bits_left -= 7 + ce_bits * 3;
977b8e80941Smrg      }
978b8e80941Smrg   } else {
979b8e80941Smrg      assert((colour_endpoint_bits % ce_bits) == 0);
980b8e80941Smrg      int offset = colour_endpoint_data_offset;
981b8e80941Smrg      for (int i = 0; i < num_cem_values; i++) {
982b8e80941Smrg         colour_endpoints_quant[i] = in.get_bits(offset, ce_bits);
983b8e80941Smrg
984b8e80941Smrg         if (VERBOSE_DECODE)
985b8e80941Smrg            in.printf_bits(offset, ce_bits, "bits [%d]", colour_endpoints_quant[i]);
986b8e80941Smrg
987b8e80941Smrg         offset += ce_bits;
988b8e80941Smrg      }
989b8e80941Smrg   }
990b8e80941Smrg}
991b8e80941Smrg
992b8e80941Smrgvoid Block::decode_colour_endpoints()
993b8e80941Smrg{
994b8e80941Smrg   int cem_values_idx = 0;
995b8e80941Smrg   for (int part = 0; part < num_parts; ++part) {
996b8e80941Smrg      uint8_t *v = &colour_endpoints[cem_values_idx];
997b8e80941Smrg      int v0 = v[0];
998b8e80941Smrg      int v1 = v[1];
999b8e80941Smrg      int v2 = v[2];
1000b8e80941Smrg      int v3 = v[3];
1001b8e80941Smrg      int v4 = v[4];
1002b8e80941Smrg      int v5 = v[5];
1003b8e80941Smrg      int v6 = v[6];
1004b8e80941Smrg      int v7 = v[7];
1005b8e80941Smrg      cem_values_idx += ((cems[part] >> 2) + 1) * 2;
1006b8e80941Smrg
1007b8e80941Smrg      uint8x4_t e0, e1;
1008b8e80941Smrg      int s0, s1, L0, L1;
1009b8e80941Smrg
1010b8e80941Smrg      switch (cems[part])
1011b8e80941Smrg      {
1012b8e80941Smrg      case 0:
1013b8e80941Smrg         e0 = uint8x4_t(v0, v0, v0, 0xff);
1014b8e80941Smrg         e1 = uint8x4_t(v1, v1, v1, 0xff);
1015b8e80941Smrg         break;
1016b8e80941Smrg      case 1:
1017b8e80941Smrg         L0 = (v0 >> 2) | (v1 & 0xc0);
1018b8e80941Smrg         L1 = L0 + (v1 & 0x3f);
1019b8e80941Smrg         if (L1 > 0xff)
1020b8e80941Smrg            L1 = 0xff;
1021b8e80941Smrg         e0 = uint8x4_t(L0, L0, L0, 0xff);
1022b8e80941Smrg         e1 = uint8x4_t(L1, L1, L1, 0xff);
1023b8e80941Smrg         break;
1024b8e80941Smrg      case 4:
1025b8e80941Smrg         e0 = uint8x4_t(v0, v0, v0, v2);
1026b8e80941Smrg         e1 = uint8x4_t(v1, v1, v1, v3);
1027b8e80941Smrg         break;
1028b8e80941Smrg      case 5:
1029b8e80941Smrg         bit_transfer_signed(v1, v0);
1030b8e80941Smrg         bit_transfer_signed(v3, v2);
1031b8e80941Smrg         e0 = uint8x4_t(v0, v0, v0, v2);
1032b8e80941Smrg         e1 = uint8x4_t::clamped(v0+v1, v0+v1, v0+v1, v2+v3);
1033b8e80941Smrg         break;
1034b8e80941Smrg      case 6:
1035b8e80941Smrg         e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, 0xff);
1036b8e80941Smrg         e1 = uint8x4_t(v0, v1, v2, 0xff);
1037b8e80941Smrg         break;
1038b8e80941Smrg      case 8:
1039b8e80941Smrg         s0 = v0 + v2 + v4;
1040b8e80941Smrg         s1 = v1 + v3 + v5;
1041b8e80941Smrg         if (s1 >= s0) {
1042b8e80941Smrg            e0 = uint8x4_t(v0, v2, v4, 0xff);
1043b8e80941Smrg            e1 = uint8x4_t(v1, v3, v5, 0xff);
1044b8e80941Smrg         } else {
1045b8e80941Smrg            e0 = blue_contract(v1, v3, v5, 0xff);
1046b8e80941Smrg            e1 = blue_contract(v0, v2, v4, 0xff);
1047b8e80941Smrg         }
1048b8e80941Smrg         break;
1049b8e80941Smrg      case 9:
1050b8e80941Smrg         bit_transfer_signed(v1, v0);
1051b8e80941Smrg         bit_transfer_signed(v3, v2);
1052b8e80941Smrg         bit_transfer_signed(v5, v4);
1053b8e80941Smrg         if (v1 + v3 + v5 >= 0) {
1054b8e80941Smrg            e0 = uint8x4_t(v0, v2, v4, 0xff);
1055b8e80941Smrg            e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, 0xff);
1056b8e80941Smrg         } else {
1057b8e80941Smrg            e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, 0xff);
1058b8e80941Smrg            e1 = blue_contract(v0, v2, v4, 0xff);
1059b8e80941Smrg         }
1060b8e80941Smrg         break;
1061b8e80941Smrg      case 10:
1062b8e80941Smrg         e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, v4);
1063b8e80941Smrg         e1 = uint8x4_t(v0, v1, v2, v5);
1064b8e80941Smrg         break;
1065b8e80941Smrg      case 12:
1066b8e80941Smrg         s0 = v0 + v2 + v4;
1067b8e80941Smrg         s1 = v1 + v3 + v5;
1068b8e80941Smrg         if (s1 >= s0) {
1069b8e80941Smrg            e0 = uint8x4_t(v0, v2, v4, v6);
1070b8e80941Smrg            e1 = uint8x4_t(v1, v3, v5, v7);
1071b8e80941Smrg         } else {
1072b8e80941Smrg            e0 = blue_contract(v1, v3, v5, v7);
1073b8e80941Smrg            e1 = blue_contract(v0, v2, v4, v6);
1074b8e80941Smrg         }
1075b8e80941Smrg         break;
1076b8e80941Smrg      case 13:
1077b8e80941Smrg         bit_transfer_signed(v1, v0);
1078b8e80941Smrg         bit_transfer_signed(v3, v2);
1079b8e80941Smrg         bit_transfer_signed(v5, v4);
1080b8e80941Smrg         bit_transfer_signed(v7, v6);
1081b8e80941Smrg         if (v1 + v3 + v5 >= 0) {
1082b8e80941Smrg            e0 = uint8x4_t(v0, v2, v4, v6);
1083b8e80941Smrg            e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, v6+v7);
1084b8e80941Smrg         } else {
1085b8e80941Smrg            e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, v6+v7);
1086b8e80941Smrg            e1 = blue_contract(v0, v2, v4, v6);
1087b8e80941Smrg         }
1088b8e80941Smrg         break;
1089b8e80941Smrg      default:
1090b8e80941Smrg         /* HDR endpoints not supported; return error colour */
1091b8e80941Smrg         e0 = uint8x4_t(255, 0, 255, 255);
1092b8e80941Smrg         e1 = uint8x4_t(255, 0, 255, 255);
1093b8e80941Smrg         break;
1094b8e80941Smrg      }
1095b8e80941Smrg
1096b8e80941Smrg      endpoints_decoded[0][part] = e0;
1097b8e80941Smrg      endpoints_decoded[1][part] = e1;
1098b8e80941Smrg
1099b8e80941Smrg      if (VERBOSE_DECODE) {
1100b8e80941Smrg         printf("cems[%d]=%d v=[", part, cems[part]);
1101b8e80941Smrg         for (int i = 0; i < (cems[part] >> 2) + 1; ++i) {
1102b8e80941Smrg            if (i)
1103b8e80941Smrg               printf(", ");
1104b8e80941Smrg            printf("%3d", v[i]);
1105b8e80941Smrg         }
1106b8e80941Smrg         printf("] e0=[%3d,%4d,%4d,%4d] e1=[%3d,%4d,%4d,%4d]\n",
1107b8e80941Smrg                e0.v[0], e0.v[1], e0.v[2], e0.v[3],
1108b8e80941Smrg               e1.v[0], e1.v[1], e1.v[2], e1.v[3]);
1109b8e80941Smrg      }
1110b8e80941Smrg   }
1111b8e80941Smrg}
1112b8e80941Smrg
1113b8e80941Smrgvoid Block::unpack_weights(InputBitVector in)
1114b8e80941Smrg{
1115b8e80941Smrg   if (wt_trits) {
1116b8e80941Smrg      int offset = 128;
1117b8e80941Smrg      int bits_left = weight_bits;
1118b8e80941Smrg      for (int i = 0; i < num_weights; i += 5) {
1119b8e80941Smrg         int bits_to_read = MIN2(bits_left, 8 + 5*wt_bits);
1120b8e80941Smrg         /* If wt_trits then wt_bits <= 3, so bits_to_read <= 23 and we can use uint32_t */
1121b8e80941Smrg         uint32_t raw = in.get_bits_rev(offset, bits_to_read);
1122b8e80941Smrg         unpack_trit_block(wt_bits, raw, &weights_quant[i]);
1123b8e80941Smrg
1124b8e80941Smrg         if (VERBOSE_DECODE)
1125b8e80941Smrg            in.printf_bits(offset - bits_to_read, bits_to_read, "weight trits [%d,%d,%d,%d,%d]",
1126b8e80941Smrg                           weights_quant[i+0], weights_quant[i+1],
1127b8e80941Smrg                  weights_quant[i+2], weights_quant[i+3],
1128b8e80941Smrg                  weights_quant[i+4]);
1129b8e80941Smrg
1130b8e80941Smrg         offset -= 8 + wt_bits * 5;
1131b8e80941Smrg         bits_left -= 8 + wt_bits * 5;
1132b8e80941Smrg      }
1133b8e80941Smrg
1134b8e80941Smrg   } else if (wt_quints) {
1135b8e80941Smrg
1136b8e80941Smrg      int offset = 128;
1137b8e80941Smrg      int bits_left = weight_bits;
1138b8e80941Smrg      for (int i = 0; i < num_weights; i += 3) {
1139b8e80941Smrg         int bits_to_read = MIN2(bits_left, 7 + 3*wt_bits);
1140b8e80941Smrg         /* If wt_quints then wt_bits <= 2, so bits_to_read <= 13 and we can use uint32_t */
1141b8e80941Smrg         uint32_t raw = in.get_bits_rev(offset, bits_to_read);
1142b8e80941Smrg         unpack_quint_block(wt_bits, raw, &weights_quant[i]);
1143b8e80941Smrg
1144b8e80941Smrg         if (VERBOSE_DECODE)
1145b8e80941Smrg            in.printf_bits(offset - bits_to_read, bits_to_read, "weight quints [%d,%d,%d]",
1146b8e80941Smrg                           weights_quant[i], weights_quant[i+1], weights_quant[i+2]);
1147b8e80941Smrg
1148b8e80941Smrg         offset -= 7 + wt_bits * 3;
1149b8e80941Smrg         bits_left -= 7 + wt_bits * 3;
1150b8e80941Smrg      }
1151b8e80941Smrg
1152b8e80941Smrg   } else {
1153b8e80941Smrg      int offset = 128;
1154b8e80941Smrg      assert((weight_bits % wt_bits) == 0);
1155b8e80941Smrg      for (int i = 0; i < num_weights; ++i) {
1156b8e80941Smrg         weights_quant[i] = in.get_bits_rev(offset, wt_bits);
1157b8e80941Smrg
1158b8e80941Smrg         if (VERBOSE_DECODE)
1159b8e80941Smrg            in.printf_bits(offset - wt_bits, wt_bits, "weight bits [%d]", weights_quant[i]);
1160b8e80941Smrg
1161b8e80941Smrg         offset -= wt_bits;
1162b8e80941Smrg      }
1163b8e80941Smrg   }
1164b8e80941Smrg}
1165b8e80941Smrg
1166b8e80941Smrgvoid Block::unquantise_weights()
1167b8e80941Smrg{
1168b8e80941Smrg   assert(num_weights <= (int)ARRAY_SIZE(weights_quant));
1169b8e80941Smrg   assert(num_weights <= (int)ARRAY_SIZE(weights));
1170b8e80941Smrg
1171b8e80941Smrg   memset(weights, 0, sizeof(weights));
1172b8e80941Smrg
1173b8e80941Smrg   for (int i = 0; i < num_weights; ++i) {
1174b8e80941Smrg
1175b8e80941Smrg      uint8_t v = weights_quant[i];
1176b8e80941Smrg      uint8_t w;
1177b8e80941Smrg
1178b8e80941Smrg      if (wt_trits) {
1179b8e80941Smrg
1180b8e80941Smrg         if (wt_bits == 0) {
1181b8e80941Smrg            w = v * 32;
1182b8e80941Smrg         } else {
1183b8e80941Smrg            uint8_t A, B, C, D;
1184b8e80941Smrg            A = (v & 0x1) ? 0x7F : 0x00;
1185b8e80941Smrg            switch (wt_bits) {
1186b8e80941Smrg            case 1:
1187b8e80941Smrg               B = 0;
1188b8e80941Smrg               C = 50;
1189b8e80941Smrg               D = v >> 1;
1190b8e80941Smrg               break;
1191b8e80941Smrg            case 2:
1192b8e80941Smrg               B = (v & 0x2) ? 0x45 : 0x00;
1193b8e80941Smrg               C = 23;
1194b8e80941Smrg               D = v >> 2;
1195b8e80941Smrg               break;
1196b8e80941Smrg            case 3:
1197b8e80941Smrg               B = ((v & 0x6) >> 1) | ((v & 0x6) << 4);
1198b8e80941Smrg               C = 11;
1199b8e80941Smrg               D = v >> 3;
1200b8e80941Smrg               break;
1201b8e80941Smrg            default:
1202b8e80941Smrg               unreachable("");
1203b8e80941Smrg            }
1204b8e80941Smrg            uint16_t T = D * C + B;
1205b8e80941Smrg            T = T ^ A;
1206b8e80941Smrg            T = (A & 0x20) | (T >> 2);
1207b8e80941Smrg            assert(T < 64);
1208b8e80941Smrg            if (T > 32)
1209b8e80941Smrg               T++;
1210b8e80941Smrg            w = T;
1211b8e80941Smrg         }
1212b8e80941Smrg
1213b8e80941Smrg      } else if (wt_quints) {
1214b8e80941Smrg
1215b8e80941Smrg         if (wt_bits == 0) {
1216b8e80941Smrg            w = v * 16;
1217b8e80941Smrg         } else {
1218b8e80941Smrg            uint8_t A, B, C, D;
1219b8e80941Smrg            A = (v & 0x1) ? 0x7F : 0x00;
1220b8e80941Smrg            switch (wt_bits) {
1221b8e80941Smrg            case 1:
1222b8e80941Smrg               B = 0;
1223b8e80941Smrg               C = 28;
1224b8e80941Smrg               D = v >> 1;
1225b8e80941Smrg               break;
1226b8e80941Smrg            case 2:
1227b8e80941Smrg               B = (v & 0x2) ? 0x42 : 0x00;
1228b8e80941Smrg               C = 13;
1229b8e80941Smrg               D = v >> 2;
1230b8e80941Smrg               break;
1231b8e80941Smrg            default:
1232b8e80941Smrg               unreachable("");
1233b8e80941Smrg            }
1234b8e80941Smrg            uint16_t T = D * C + B;
1235b8e80941Smrg            T = T ^ A;
1236b8e80941Smrg            T = (A & 0x20) | (T >> 2);
1237b8e80941Smrg            assert(T < 64);
1238b8e80941Smrg            if (T > 32)
1239b8e80941Smrg               T++;
1240b8e80941Smrg            w = T;
1241b8e80941Smrg         }
1242b8e80941Smrg         weights[i] = w;
1243b8e80941Smrg
1244b8e80941Smrg      } else {
1245b8e80941Smrg
1246b8e80941Smrg         switch (wt_bits) {
1247b8e80941Smrg         case 1: w = v ? 0x3F : 0x00; break;
1248b8e80941Smrg         case 2: w = v | (v << 2) | (v << 4); break;
1249b8e80941Smrg         case 3: w = v | (v << 3); break;
1250b8e80941Smrg         case 4: w = (v >> 2) | (v << 2); break;
1251b8e80941Smrg         case 5: w = (v >> 4) | (v << 1); break;
1252b8e80941Smrg         default: unreachable("");
1253b8e80941Smrg         }
1254b8e80941Smrg         assert(w < 64);
1255b8e80941Smrg         if (w > 32)
1256b8e80941Smrg            w++;
1257b8e80941Smrg      }
1258b8e80941Smrg      weights[i] = w;
1259b8e80941Smrg   }
1260b8e80941Smrg}
1261b8e80941Smrg
1262b8e80941Smrgvoid Block::compute_infill_weights(int block_w, int block_h, int block_d)
1263b8e80941Smrg{
1264b8e80941Smrg   int Ds = block_w <= 1 ? 0 : (1024 + block_w / 2) / (block_w - 1);
1265b8e80941Smrg   int Dt = block_h <= 1 ? 0 : (1024 + block_h / 2) / (block_h - 1);
1266b8e80941Smrg   int Dr = block_d <= 1 ? 0 : (1024 + block_d / 2) / (block_d - 1);
1267b8e80941Smrg   for (int r = 0; r < block_d; ++r) {
1268b8e80941Smrg      for (int t = 0; t < block_h; ++t) {
1269b8e80941Smrg         for (int s = 0; s < block_w; ++s) {
1270b8e80941Smrg            int cs = Ds * s;
1271b8e80941Smrg            int ct = Dt * t;
1272b8e80941Smrg            int cr = Dr * r;
1273b8e80941Smrg            int gs = (cs * (wt_w - 1) + 32) >> 6;
1274b8e80941Smrg            int gt = (ct * (wt_h - 1) + 32) >> 6;
1275b8e80941Smrg            int gr = (cr * (wt_d - 1) + 32) >> 6;
1276b8e80941Smrg            assert(gs >= 0 && gs <= 176);
1277b8e80941Smrg            assert(gt >= 0 && gt <= 176);
1278b8e80941Smrg            assert(gr >= 0 && gr <= 176);
1279b8e80941Smrg            int js = gs >> 4;
1280b8e80941Smrg            int fs = gs & 0xf;
1281b8e80941Smrg            int jt = gt >> 4;
1282b8e80941Smrg            int ft = gt & 0xf;
1283b8e80941Smrg            int jr = gr >> 4;
1284b8e80941Smrg            int fr = gr & 0xf;
1285b8e80941Smrg
1286b8e80941Smrg            /* TODO: 3D */
1287b8e80941Smrg            (void)jr;
1288b8e80941Smrg            (void)fr;
1289b8e80941Smrg
1290b8e80941Smrg            int w11 = (fs * ft + 8) >> 4;
1291b8e80941Smrg            int w10 = ft - w11;
1292b8e80941Smrg            int w01 = fs - w11;
1293b8e80941Smrg            int w00 = 16 - fs - ft + w11;
1294b8e80941Smrg
1295b8e80941Smrg            if (dual_plane) {
1296b8e80941Smrg               int p00, p01, p10, p11, i0, i1;
1297b8e80941Smrg               int v0 = js + jt * wt_w;
1298b8e80941Smrg               p00 = weights[(v0) * 2];
1299b8e80941Smrg               p01 = weights[(v0 + 1) * 2];
1300b8e80941Smrg               p10 = weights[(v0 + wt_w) * 2];
1301b8e80941Smrg               p11 = weights[(v0 + wt_w + 1) * 2];
1302b8e80941Smrg               i0 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1303b8e80941Smrg               p00 = weights[(v0) * 2 + 1];
1304b8e80941Smrg               p01 = weights[(v0 + 1) * 2 + 1];
1305b8e80941Smrg               p10 = weights[(v0 + wt_w) * 2 + 1];
1306b8e80941Smrg               p11 = weights[(v0 + wt_w + 1) * 2 + 1];
1307b8e80941Smrg               assert((v0 + wt_w + 1) * 2 + 1 < (int)ARRAY_SIZE(weights));
1308b8e80941Smrg               i1 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1309b8e80941Smrg               assert(0 <= i0 && i0 <= 64);
1310b8e80941Smrg               infill_weights[0][s + t*block_w + r*block_w*block_h] = i0;
1311b8e80941Smrg               infill_weights[1][s + t*block_w + r*block_w*block_h] = i1;
1312b8e80941Smrg            } else {
1313b8e80941Smrg               int p00, p01, p10, p11, i;
1314b8e80941Smrg               int v0 = js + jt * wt_w;
1315b8e80941Smrg               p00 = weights[v0];
1316b8e80941Smrg               p01 = weights[v0 + 1];
1317b8e80941Smrg               p10 = weights[v0 + wt_w];
1318b8e80941Smrg               p11 = weights[v0 + wt_w + 1];
1319b8e80941Smrg               assert(v0 + wt_w + 1 < (int)ARRAY_SIZE(weights));
1320b8e80941Smrg               i = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1321b8e80941Smrg               assert(0 <= i && i <= 64);
1322b8e80941Smrg               infill_weights[0][s + t*block_w + r*block_w*block_h] = i;
1323b8e80941Smrg            }
1324b8e80941Smrg         }
1325b8e80941Smrg      }
1326b8e80941Smrg   }
1327b8e80941Smrg}
1328b8e80941Smrg
1329b8e80941Smrgvoid Block::unquantise_colour_endpoints()
1330b8e80941Smrg{
1331b8e80941Smrg   assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints_quant));
1332b8e80941Smrg   assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints));
1333b8e80941Smrg
1334b8e80941Smrg   for (int i = 0; i < num_cem_values; ++i) {
1335b8e80941Smrg      uint8_t v = colour_endpoints_quant[i];
1336b8e80941Smrg
1337b8e80941Smrg      if (ce_trits) {
1338b8e80941Smrg         uint16_t A, B, C, D;
1339b8e80941Smrg         uint16_t t;
1340b8e80941Smrg         A = (v & 0x1) ? 0x1FF : 0x000;
1341b8e80941Smrg         switch (ce_bits) {
1342b8e80941Smrg         case 1:
1343b8e80941Smrg            B = 0;
1344b8e80941Smrg            C = 204;
1345b8e80941Smrg            D = v >> 1;
1346b8e80941Smrg            break;
1347b8e80941Smrg         case 2:
1348b8e80941Smrg            B = (v & 0x2) ? 0x116 : 0x000;
1349b8e80941Smrg            C = 93;
1350b8e80941Smrg            D = v >> 2;
1351b8e80941Smrg            break;
1352b8e80941Smrg         case 3:
1353b8e80941Smrg            t = ((v >> 1) & 0x3);
1354b8e80941Smrg            B = t | (t << 2) | (t << 7);
1355b8e80941Smrg            C = 44;
1356b8e80941Smrg            D = v >> 3;
1357b8e80941Smrg            break;
1358b8e80941Smrg         case 4:
1359b8e80941Smrg            t = ((v >> 1) & 0x7);
1360b8e80941Smrg            B = t | (t << 6);
1361b8e80941Smrg            C = 22;
1362b8e80941Smrg            D = v >> 4;
1363b8e80941Smrg            break;
1364b8e80941Smrg         case 5:
1365b8e80941Smrg            t = ((v >> 1) & 0xF);
1366b8e80941Smrg            B = (t >> 2) | (t << 5);
1367b8e80941Smrg            C = 11;
1368b8e80941Smrg            D = v >> 5;
1369b8e80941Smrg            break;
1370b8e80941Smrg         case 6:
1371b8e80941Smrg            B = ((v & 0x3E) << 3) | ((v >> 5) & 0x1);
1372b8e80941Smrg            C = 5;
1373b8e80941Smrg            D = v >> 6;
1374b8e80941Smrg            break;
1375b8e80941Smrg         default:
1376b8e80941Smrg            unreachable("");
1377b8e80941Smrg         }
1378b8e80941Smrg         uint16_t T = D * C + B;
1379b8e80941Smrg         T = T ^ A;
1380b8e80941Smrg         T = (A & 0x80) | (T >> 2);
1381b8e80941Smrg         assert(T < 256);
1382b8e80941Smrg         colour_endpoints[i] = T;
1383b8e80941Smrg      } else if (ce_quints) {
1384b8e80941Smrg         uint16_t A, B, C, D;
1385b8e80941Smrg         uint16_t t;
1386b8e80941Smrg         A = (v & 0x1) ? 0x1FF : 0x000;
1387b8e80941Smrg         switch (ce_bits) {
1388b8e80941Smrg         case 1:
1389b8e80941Smrg            B = 0;
1390b8e80941Smrg            C = 113;
1391b8e80941Smrg            D = v >> 1;
1392b8e80941Smrg            break;
1393b8e80941Smrg         case 2:
1394b8e80941Smrg            B = (v & 0x2) ? 0x10C : 0x000;
1395b8e80941Smrg            C = 54;
1396b8e80941Smrg            D = v >> 2;
1397b8e80941Smrg            break;
1398b8e80941Smrg         case 3:
1399b8e80941Smrg            t = ((v >> 1) & 0x3);
1400b8e80941Smrg            B = (t >> 1) | (t << 1) | (t << 7);
1401b8e80941Smrg            C = 26;
1402b8e80941Smrg            D = v >> 3;
1403b8e80941Smrg            break;
1404b8e80941Smrg         case 4:
1405b8e80941Smrg            t = ((v >> 1) & 0x7);
1406b8e80941Smrg            B = (t >> 1) | (t << 6);
1407b8e80941Smrg            C = 13;
1408b8e80941Smrg            D = v >> 4;
1409b8e80941Smrg            break;
1410b8e80941Smrg         case 5:
1411b8e80941Smrg            t = ((v >> 1) & 0xF);
1412b8e80941Smrg            B = (t >> 4) | (t << 5);
1413b8e80941Smrg            C = 6;
1414b8e80941Smrg            D = v >> 5;
1415b8e80941Smrg            break;
1416b8e80941Smrg         default:
1417b8e80941Smrg            unreachable("");
1418b8e80941Smrg         }
1419b8e80941Smrg         uint16_t T = D * C + B;
1420b8e80941Smrg         T = T ^ A;
1421b8e80941Smrg         T = (A & 0x80) | (T >> 2);
1422b8e80941Smrg         assert(T < 256);
1423b8e80941Smrg         colour_endpoints[i] = T;
1424b8e80941Smrg      } else {
1425b8e80941Smrg         switch (ce_bits) {
1426b8e80941Smrg         case 1: v = v ? 0xFF : 0x00; break;
1427b8e80941Smrg         case 2: v = (v << 6) | (v << 4) | (v << 2) | v; break;
1428b8e80941Smrg         case 3: v = (v << 5) | (v << 2) | (v >> 1); break;
1429b8e80941Smrg         case 4: v = (v << 4) | v; break;
1430b8e80941Smrg         case 5: v = (v << 3) | (v >> 2); break;
1431b8e80941Smrg         case 6: v = (v << 2) | (v >> 4); break;
1432b8e80941Smrg         case 7: v = (v << 1) | (v >> 6); break;
1433b8e80941Smrg         case 8: break;
1434b8e80941Smrg         default: unreachable("");
1435b8e80941Smrg         }
1436b8e80941Smrg         colour_endpoints[i] = v;
1437b8e80941Smrg      }
1438b8e80941Smrg   }
1439b8e80941Smrg}
1440b8e80941Smrg
1441b8e80941Smrgdecode_error::type Block::decode(const Decoder &decoder, InputBitVector in)
1442b8e80941Smrg{
1443b8e80941Smrg   decode_error::type err;
1444b8e80941Smrg
1445b8e80941Smrg   is_error = false;
1446b8e80941Smrg   bogus_colour_endpoints = false;
1447b8e80941Smrg   bogus_weights = false;
1448b8e80941Smrg   is_void_extent = false;
1449b8e80941Smrg
1450b8e80941Smrg   wt_d = 1;
1451b8e80941Smrg   /* TODO: 3D */
1452b8e80941Smrg
1453b8e80941Smrg   /* TODO: test for all the illegal encodings */
1454b8e80941Smrg
1455b8e80941Smrg   if (VERBOSE_DECODE)
1456b8e80941Smrg      in.printf_bits(0, 128);
1457b8e80941Smrg
1458b8e80941Smrg   err = decode_block_mode(in);
1459b8e80941Smrg   if (err != decode_error::ok)
1460b8e80941Smrg      return err;
1461b8e80941Smrg
1462b8e80941Smrg   if (is_void_extent)
1463b8e80941Smrg      return decode_error::ok;
1464b8e80941Smrg
1465b8e80941Smrg   /* TODO: 3D */
1466b8e80941Smrg
1467b8e80941Smrg   calculate_from_weights();
1468b8e80941Smrg
1469b8e80941Smrg   if (VERBOSE_DECODE)
1470b8e80941Smrg      printf("weights_grid=%dx%dx%d dual_plane=%d num_weights=%d high_prec=%d r=%d range=0..%d (%dt %dq %db) weight_bits=%d\n",
1471b8e80941Smrg             wt_w, wt_h, wt_d, dual_plane, num_weights, high_prec, wt_range, wt_max, wt_trits, wt_quints, wt_bits, weight_bits);
1472b8e80941Smrg
1473b8e80941Smrg   if (wt_w > decoder.block_w || wt_h > decoder.block_h || wt_d > decoder.block_d)
1474b8e80941Smrg      return decode_error::weight_grid_exceeds_block_size;
1475b8e80941Smrg
1476b8e80941Smrg   num_parts = in.get_bits(11, 2) + 1;
1477b8e80941Smrg
1478b8e80941Smrg   if (VERBOSE_DECODE)
1479b8e80941Smrg      in.printf_bits(11, 2, "partitions = %d", num_parts);
1480b8e80941Smrg
1481b8e80941Smrg   if (dual_plane && num_parts > 3)
1482b8e80941Smrg      return decode_error::dual_plane_and_too_many_partitions;
1483b8e80941Smrg
1484b8e80941Smrg   decode_cem(in);
1485b8e80941Smrg
1486b8e80941Smrg   if (VERBOSE_DECODE)
1487b8e80941Smrg      printf("cem=[%d,%d,%d,%d] base_cem_class=%d\n", cems[0], cems[1], cems[2], cems[3], cem_base_class);
1488b8e80941Smrg
1489b8e80941Smrg   int num_cem_pairs = (cem_base_class + 1) * num_parts + extra_cem_bits;
1490b8e80941Smrg   num_cem_values = num_cem_pairs * 2;
1491b8e80941Smrg
1492b8e80941Smrg   calculate_remaining_bits();
1493b8e80941Smrg   err = calculate_colour_endpoints_size();
1494b8e80941Smrg   if (err != decode_error::ok)
1495b8e80941Smrg      return err;
1496b8e80941Smrg
1497b8e80941Smrg   if (VERBOSE_DECODE)
1498b8e80941Smrg      in.printf_bits(colour_endpoint_data_offset, colour_endpoint_bits,
1499b8e80941Smrg                     "endpoint data (%d bits, %d vals, %dt %dq %db)",
1500b8e80941Smrg                     colour_endpoint_bits, num_cem_values, ce_trits, ce_quints, ce_bits);
1501b8e80941Smrg
1502b8e80941Smrg   unpack_colour_endpoints(in);
1503b8e80941Smrg
1504b8e80941Smrg   if (VERBOSE_DECODE) {
1505b8e80941Smrg      printf("cem values raw =[");
1506b8e80941Smrg      for (int i = 0; i < num_cem_values; i++) {
1507b8e80941Smrg         if (i)
1508b8e80941Smrg            printf(", ");
1509b8e80941Smrg         printf("%3d", colour_endpoints_quant[i]);
1510b8e80941Smrg      }
1511b8e80941Smrg      printf("]\n");
1512b8e80941Smrg   }
1513b8e80941Smrg
1514b8e80941Smrg   if (num_cem_values > 18)
1515b8e80941Smrg      return decode_error::invalid_colour_endpoints_count;
1516b8e80941Smrg
1517b8e80941Smrg   unquantise_colour_endpoints();
1518b8e80941Smrg
1519b8e80941Smrg   if (VERBOSE_DECODE) {
1520b8e80941Smrg      printf("cem values norm=[");
1521b8e80941Smrg      for (int i = 0; i < num_cem_values; i++) {
1522b8e80941Smrg         if (i)
1523b8e80941Smrg            printf(", ");
1524b8e80941Smrg         printf("%3d", colour_endpoints[i]);
1525b8e80941Smrg      }
1526b8e80941Smrg      printf("]\n");
1527b8e80941Smrg   }
1528b8e80941Smrg
1529b8e80941Smrg   decode_colour_endpoints();
1530b8e80941Smrg
1531b8e80941Smrg   if (dual_plane) {
1532b8e80941Smrg      int ccs_offset = 128 - weight_bits - num_extra_cem_bits - 2;
1533b8e80941Smrg      colour_component_selector = in.get_bits(ccs_offset, 2);
1534b8e80941Smrg
1535b8e80941Smrg      if (VERBOSE_DECODE)
1536b8e80941Smrg         in.printf_bits(ccs_offset, 2, "colour component selector = %d", colour_component_selector);
1537b8e80941Smrg   } else {
1538b8e80941Smrg      colour_component_selector = 0;
1539b8e80941Smrg   }
1540b8e80941Smrg
1541b8e80941Smrg
1542b8e80941Smrg   if (VERBOSE_DECODE)
1543b8e80941Smrg      in.printf_bits(128 - weight_bits, weight_bits, "weights (%d bits)", weight_bits);
1544b8e80941Smrg
1545b8e80941Smrg   if (num_weights > 64)
1546b8e80941Smrg      return decode_error::invalid_num_weights;
1547b8e80941Smrg
1548b8e80941Smrg   if (weight_bits < 24 || weight_bits > 96)
1549b8e80941Smrg      return decode_error::invalid_weight_bits;
1550b8e80941Smrg
1551b8e80941Smrg   unpack_weights(in);
1552b8e80941Smrg
1553b8e80941Smrg   unquantise_weights();
1554b8e80941Smrg
1555b8e80941Smrg   if (VERBOSE_DECODE) {
1556b8e80941Smrg      printf("weights=[");
1557b8e80941Smrg      for (int i = 0; i < num_weights; ++i) {
1558b8e80941Smrg         if (i)
1559b8e80941Smrg            printf(", ");
1560b8e80941Smrg         printf("%d", weights[i]);
1561b8e80941Smrg      }
1562b8e80941Smrg      printf("]\n");
1563b8e80941Smrg
1564b8e80941Smrg      for (int plane = 0; plane <= dual_plane; ++plane) {
1565b8e80941Smrg         printf("weights (plane %d):\n", plane);
1566b8e80941Smrg         int i = 0;
1567b8e80941Smrg         (void)i;
1568b8e80941Smrg
1569b8e80941Smrg         for (int r = 0; r < wt_d; ++r) {
1570b8e80941Smrg            for (int t = 0; t < wt_h; ++t) {
1571b8e80941Smrg               for (int s = 0; s < wt_w; ++s) {
1572b8e80941Smrg                  printf("%3d", weights[i++ * (1 + dual_plane) + plane]);
1573b8e80941Smrg               }
1574b8e80941Smrg               printf("\n");
1575b8e80941Smrg            }
1576b8e80941Smrg            if (r < wt_d - 1)
1577b8e80941Smrg               printf("\n");
1578b8e80941Smrg         }
1579b8e80941Smrg      }
1580b8e80941Smrg   }
1581b8e80941Smrg
1582b8e80941Smrg   compute_infill_weights(decoder.block_w, decoder.block_h, decoder.block_d);
1583b8e80941Smrg
1584b8e80941Smrg   if (VERBOSE_DECODE) {
1585b8e80941Smrg      for (int plane = 0; plane <= dual_plane; ++plane) {
1586b8e80941Smrg         printf("infilled weights (plane %d):\n", plane);
1587b8e80941Smrg         int i = 0;
1588b8e80941Smrg         (void)i;
1589b8e80941Smrg
1590b8e80941Smrg         for (int r = 0; r < decoder.block_d; ++r) {
1591b8e80941Smrg            for (int t = 0; t < decoder.block_h; ++t) {
1592b8e80941Smrg               for (int s = 0; s < decoder.block_w; ++s) {
1593b8e80941Smrg                  printf("%3d", infill_weights[plane][i++]);
1594b8e80941Smrg               }
1595b8e80941Smrg               printf("\n");
1596b8e80941Smrg            }
1597b8e80941Smrg            if (r < decoder.block_d - 1)
1598b8e80941Smrg               printf("\n");
1599b8e80941Smrg         }
1600b8e80941Smrg      }
1601b8e80941Smrg   }
1602b8e80941Smrg   if (VERBOSE_DECODE)
1603b8e80941Smrg      printf("\n");
1604b8e80941Smrg
1605b8e80941Smrg   return decode_error::ok;
1606b8e80941Smrg}
1607b8e80941Smrg
1608b8e80941Smrgvoid Block::write_decoded(const Decoder &decoder, uint16_t *output)
1609b8e80941Smrg{
1610b8e80941Smrg   /* sRGB can only be stored as unorm8. */
1611b8e80941Smrg   assert(!decoder.srgb || decoder.output_unorm8);
1612b8e80941Smrg
1613b8e80941Smrg   if (is_void_extent) {
1614b8e80941Smrg      for (int idx = 0; idx < decoder.block_w*decoder.block_h*decoder.block_d; ++idx) {
1615b8e80941Smrg         if (decoder.output_unorm8) {
1616b8e80941Smrg            if (decoder.srgb) {
1617b8e80941Smrg               output[idx*4+0] = void_extent_colour_r >> 8;
1618b8e80941Smrg               output[idx*4+1] = void_extent_colour_g >> 8;
1619b8e80941Smrg               output[idx*4+2] = void_extent_colour_b >> 8;
1620b8e80941Smrg            } else {
1621b8e80941Smrg               output[idx*4+0] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_r);
1622b8e80941Smrg               output[idx*4+1] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_g);
1623b8e80941Smrg               output[idx*4+2] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_b);
1624b8e80941Smrg            }
1625b8e80941Smrg            output[idx*4+3] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_a);
1626b8e80941Smrg         } else {
1627b8e80941Smrg            /* Store the color as FP16. */
1628b8e80941Smrg            output[idx*4+0] = _mesa_uint16_div_64k_to_half(void_extent_colour_r);
1629b8e80941Smrg            output[idx*4+1] = _mesa_uint16_div_64k_to_half(void_extent_colour_g);
1630b8e80941Smrg            output[idx*4+2] = _mesa_uint16_div_64k_to_half(void_extent_colour_b);
1631b8e80941Smrg            output[idx*4+3] = _mesa_uint16_div_64k_to_half(void_extent_colour_a);
1632b8e80941Smrg         }
1633b8e80941Smrg      }
1634b8e80941Smrg      return;
1635b8e80941Smrg   }
1636b8e80941Smrg
1637b8e80941Smrg   int small_block = (decoder.block_w * decoder.block_h * decoder.block_d) < 31;
1638b8e80941Smrg
1639b8e80941Smrg   int idx = 0;
1640b8e80941Smrg   for (int z = 0; z < decoder.block_d; ++z) {
1641b8e80941Smrg      for (int y = 0; y < decoder.block_h; ++y) {
1642b8e80941Smrg         for (int x = 0; x < decoder.block_w; ++x) {
1643b8e80941Smrg
1644b8e80941Smrg            int partition;
1645b8e80941Smrg            if (num_parts > 1) {
1646b8e80941Smrg               partition = select_partition(partition_index, x, y, z, num_parts, small_block);
1647b8e80941Smrg               assert(partition < num_parts);
1648b8e80941Smrg            } else {
1649b8e80941Smrg               partition = 0;
1650b8e80941Smrg            }
1651b8e80941Smrg
1652b8e80941Smrg            /* TODO: HDR */
1653b8e80941Smrg
1654b8e80941Smrg            uint8x4_t e0 = endpoints_decoded[0][partition];
1655b8e80941Smrg            uint8x4_t e1 = endpoints_decoded[1][partition];
1656b8e80941Smrg            uint16_t c0[4], c1[4];
1657b8e80941Smrg
1658b8e80941Smrg            /* Expand to 16 bits. */
1659b8e80941Smrg            if (decoder.srgb) {
1660b8e80941Smrg               c0[0] = (uint16_t)((e0.v[0] << 8) | 0x80);
1661b8e80941Smrg               c0[1] = (uint16_t)((e0.v[1] << 8) | 0x80);
1662b8e80941Smrg               c0[2] = (uint16_t)((e0.v[2] << 8) | 0x80);
1663b8e80941Smrg               c0[3] = (uint16_t)((e0.v[3] << 8) | 0x80);
1664b8e80941Smrg
1665b8e80941Smrg               c1[0] = (uint16_t)((e1.v[0] << 8) | 0x80);
1666b8e80941Smrg               c1[1] = (uint16_t)((e1.v[1] << 8) | 0x80);
1667b8e80941Smrg               c1[2] = (uint16_t)((e1.v[2] << 8) | 0x80);
1668b8e80941Smrg               c1[3] = (uint16_t)((e1.v[3] << 8) | 0x80);
1669b8e80941Smrg            } else {
1670b8e80941Smrg               c0[0] = (uint16_t)((e0.v[0] << 8) | e0.v[0]);
1671b8e80941Smrg               c0[1] = (uint16_t)((e0.v[1] << 8) | e0.v[1]);
1672b8e80941Smrg               c0[2] = (uint16_t)((e0.v[2] << 8) | e0.v[2]);
1673b8e80941Smrg               c0[3] = (uint16_t)((e0.v[3] << 8) | e0.v[3]);
1674b8e80941Smrg
1675b8e80941Smrg               c1[0] = (uint16_t)((e1.v[0] << 8) | e1.v[0]);
1676b8e80941Smrg               c1[1] = (uint16_t)((e1.v[1] << 8) | e1.v[1]);
1677b8e80941Smrg               c1[2] = (uint16_t)((e1.v[2] << 8) | e1.v[2]);
1678b8e80941Smrg               c1[3] = (uint16_t)((e1.v[3] << 8) | e1.v[3]);
1679b8e80941Smrg            }
1680b8e80941Smrg
1681b8e80941Smrg            int w[4];
1682b8e80941Smrg            if (dual_plane) {
1683b8e80941Smrg               int w0 = infill_weights[0][idx];
1684b8e80941Smrg               int w1 = infill_weights[1][idx];
1685b8e80941Smrg               w[0] = w[1] = w[2] = w[3] = w0;
1686b8e80941Smrg               w[colour_component_selector] = w1;
1687b8e80941Smrg            } else {
1688b8e80941Smrg               int w0 = infill_weights[0][idx];
1689b8e80941Smrg               w[0] = w[1] = w[2] = w[3] = w0;
1690b8e80941Smrg            }
1691b8e80941Smrg
1692b8e80941Smrg            /* Interpolate to produce UNORM16, applying weights. */
1693b8e80941Smrg            uint16_t c[4] = {
1694b8e80941Smrg               (uint16_t)((c0[0] * (64 - w[0]) + c1[0] * w[0] + 32) >> 6),
1695b8e80941Smrg               (uint16_t)((c0[1] * (64 - w[1]) + c1[1] * w[1] + 32) >> 6),
1696b8e80941Smrg               (uint16_t)((c0[2] * (64 - w[2]) + c1[2] * w[2] + 32) >> 6),
1697b8e80941Smrg               (uint16_t)((c0[3] * (64 - w[3]) + c1[3] * w[3] + 32) >> 6),
1698b8e80941Smrg            };
1699b8e80941Smrg
1700b8e80941Smrg            if (decoder.output_unorm8) {
1701b8e80941Smrg               if (decoder.srgb) {
1702b8e80941Smrg                  output[idx*4+0] = c[0] >> 8;
1703b8e80941Smrg                  output[idx*4+1] = c[1] >> 8;
1704b8e80941Smrg                  output[idx*4+2] = c[2] >> 8;
1705b8e80941Smrg               } else {
1706b8e80941Smrg                  output[idx*4+0] = c[0] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[0]);
1707b8e80941Smrg                  output[idx*4+1] = c[1] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[1]);
1708b8e80941Smrg                  output[idx*4+2] = c[2] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[2]);
1709b8e80941Smrg               }
1710b8e80941Smrg               output[idx*4+3] = c[3] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c[3]);
1711b8e80941Smrg            } else {
1712b8e80941Smrg               /* Store the color as FP16. */
1713b8e80941Smrg               output[idx*4+0] = c[0] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[0]);
1714b8e80941Smrg               output[idx*4+1] = c[1] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[1]);
1715b8e80941Smrg               output[idx*4+2] = c[2] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[2]);
1716b8e80941Smrg               output[idx*4+3] = c[3] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[3]);
1717b8e80941Smrg            }
1718b8e80941Smrg
1719b8e80941Smrg            idx++;
1720b8e80941Smrg         }
1721b8e80941Smrg      }
1722b8e80941Smrg   }
1723b8e80941Smrg}
1724b8e80941Smrg
1725b8e80941Smrgvoid Block::calculate_from_weights()
1726b8e80941Smrg{
1727b8e80941Smrg   wt_trits = 0;
1728b8e80941Smrg   wt_quints = 0;
1729b8e80941Smrg   wt_bits = 0;
1730b8e80941Smrg   switch (high_prec) {
1731b8e80941Smrg   case 0:
1732b8e80941Smrg      switch (wt_range) {
1733b8e80941Smrg      case 0x2: wt_max = 1; wt_bits = 1; break;
1734b8e80941Smrg      case 0x3: wt_max = 2; wt_trits = 1; break;
1735b8e80941Smrg      case 0x4: wt_max = 3; wt_bits = 2; break;
1736b8e80941Smrg      case 0x5: wt_max = 4; wt_quints = 1; break;
1737b8e80941Smrg      case 0x6: wt_max = 5; wt_trits = 1; wt_bits = 1; break;
1738b8e80941Smrg      case 0x7: wt_max = 7; wt_bits = 3; break;
1739b8e80941Smrg      default: abort();
1740b8e80941Smrg      }
1741b8e80941Smrg      break;
1742b8e80941Smrg   case 1:
1743b8e80941Smrg      switch (wt_range) {
1744b8e80941Smrg      case 0x2: wt_max = 9; wt_quints = 1; wt_bits = 1; break;
1745b8e80941Smrg      case 0x3: wt_max = 11; wt_trits = 1; wt_bits = 2; break;
1746b8e80941Smrg      case 0x4: wt_max = 15; wt_bits = 4; break;
1747b8e80941Smrg      case 0x5: wt_max = 19; wt_quints = 1; wt_bits = 2; break;
1748b8e80941Smrg      case 0x6: wt_max = 23; wt_trits = 1; wt_bits = 3; break;
1749b8e80941Smrg      case 0x7: wt_max = 31; wt_bits = 5; break;
1750b8e80941Smrg      default: abort();
1751b8e80941Smrg      }
1752b8e80941Smrg      break;
1753b8e80941Smrg   }
1754b8e80941Smrg
1755b8e80941Smrg   assert(wt_trits || wt_quints || wt_bits);
1756b8e80941Smrg
1757b8e80941Smrg   num_weights = wt_w * wt_h * wt_d;
1758b8e80941Smrg
1759b8e80941Smrg   if (dual_plane)
1760b8e80941Smrg      num_weights *= 2;
1761b8e80941Smrg
1762b8e80941Smrg   weight_bits =
1763b8e80941Smrg         (num_weights * 8 * wt_trits + 4) / 5
1764b8e80941Smrg         + (num_weights * 7 * wt_quints + 2) / 3
1765b8e80941Smrg         +  num_weights * wt_bits;
1766b8e80941Smrg}
1767b8e80941Smrg
1768b8e80941Smrgvoid Block::calculate_remaining_bits()
1769b8e80941Smrg{
1770b8e80941Smrg   int config_bits;
1771b8e80941Smrg   if (num_parts > 1) {
1772b8e80941Smrg      if (!is_multi_cem)
1773b8e80941Smrg         config_bits = 29;
1774b8e80941Smrg      else
1775b8e80941Smrg         config_bits = 25 + 3 * num_parts;
1776b8e80941Smrg   } else {
1777b8e80941Smrg      config_bits = 17;
1778b8e80941Smrg   }
1779b8e80941Smrg
1780b8e80941Smrg   if (dual_plane)
1781b8e80941Smrg      config_bits += 2;
1782b8e80941Smrg
1783b8e80941Smrg   remaining_bits = 128 - config_bits - weight_bits;
1784b8e80941Smrg}
1785b8e80941Smrg
1786b8e80941Smrgdecode_error::type Block::calculate_colour_endpoints_size()
1787b8e80941Smrg{
1788b8e80941Smrg   /* Specified as illegal */
1789b8e80941Smrg   if (remaining_bits < (13 * num_cem_values + 4) / 5) {
1790b8e80941Smrg      colour_endpoint_bits = ce_max = ce_trits = ce_quints = ce_bits = 0;
1791b8e80941Smrg      return decode_error::invalid_colour_endpoints_size;
1792b8e80941Smrg   }
1793b8e80941Smrg
1794b8e80941Smrg   /* Find the largest cem_ranges that fits within remaining_bits */
1795b8e80941Smrg   for (int i = ARRAY_SIZE(cem_ranges)-1; i >= 0; --i) {
1796b8e80941Smrg      int cem_bits;
1797b8e80941Smrg      cem_bits = (num_cem_values * 8 * cem_ranges[i].t + 4) / 5
1798b8e80941Smrg                 + (num_cem_values * 7 * cem_ranges[i].q + 2) / 3
1799b8e80941Smrg                 +  num_cem_values * cem_ranges[i].b;
1800b8e80941Smrg
1801b8e80941Smrg      if (cem_bits <= remaining_bits)
1802b8e80941Smrg      {
1803b8e80941Smrg         colour_endpoint_bits = cem_bits;
1804b8e80941Smrg         ce_max = cem_ranges[i].max;
1805b8e80941Smrg         ce_trits = cem_ranges[i].t;
1806b8e80941Smrg         ce_quints = cem_ranges[i].q;
1807b8e80941Smrg         ce_bits = cem_ranges[i].b;
1808b8e80941Smrg         return decode_error::ok;
1809b8e80941Smrg      }
1810b8e80941Smrg   }
1811b8e80941Smrg
1812b8e80941Smrg   assert(0);
1813b8e80941Smrg   return decode_error::invalid_colour_endpoints_size;
1814b8e80941Smrg}
1815b8e80941Smrg
1816b8e80941Smrg/**
1817b8e80941Smrg * Decode ASTC 2D LDR texture data.
1818b8e80941Smrg *
1819b8e80941Smrg * \param src_width in pixels
1820b8e80941Smrg * \param src_height in pixels
1821b8e80941Smrg * \param dst_stride in bytes
1822b8e80941Smrg */
1823b8e80941Smrgextern "C" void
1824b8e80941Smrg_mesa_unpack_astc_2d_ldr(uint8_t *dst_row,
1825b8e80941Smrg                         unsigned dst_stride,
1826b8e80941Smrg                         const uint8_t *src_row,
1827b8e80941Smrg                         unsigned src_stride,
1828b8e80941Smrg                         unsigned src_width,
1829b8e80941Smrg                         unsigned src_height,
1830b8e80941Smrg                         mesa_format format)
1831b8e80941Smrg{
1832b8e80941Smrg   assert(_mesa_is_format_astc_2d(format));
1833b8e80941Smrg   bool srgb = _mesa_get_format_color_encoding(format) == GL_SRGB;
1834b8e80941Smrg
1835b8e80941Smrg   unsigned blk_w, blk_h;
1836b8e80941Smrg   _mesa_get_format_block_size(format, &blk_w, &blk_h);
1837b8e80941Smrg
1838b8e80941Smrg   const unsigned block_size = 16;
1839b8e80941Smrg   unsigned x_blocks = (src_width + blk_w - 1) / blk_w;
1840b8e80941Smrg   unsigned y_blocks = (src_height + blk_h - 1) / blk_h;
1841b8e80941Smrg
1842b8e80941Smrg   Decoder dec(blk_w, blk_h, 1, srgb, true);
1843b8e80941Smrg
1844b8e80941Smrg   for (unsigned y = 0; y < y_blocks; ++y) {
1845b8e80941Smrg      for (unsigned x = 0; x < x_blocks; ++x) {
1846b8e80941Smrg         /* Same size as the largest block. */
1847b8e80941Smrg         uint16_t block_out[12 * 12 * 4];
1848b8e80941Smrg
1849b8e80941Smrg         dec.decode(src_row + x * block_size, block_out);
1850b8e80941Smrg
1851b8e80941Smrg         /* This can be smaller with NPOT dimensions. */
1852b8e80941Smrg         unsigned dst_blk_w = MIN2(blk_w, src_width  - x*blk_w);
1853b8e80941Smrg         unsigned dst_blk_h = MIN2(blk_h, src_height - y*blk_h);
1854b8e80941Smrg
1855b8e80941Smrg         for (unsigned sub_y = 0; sub_y < dst_blk_h; ++sub_y) {
1856b8e80941Smrg            for (unsigned sub_x = 0; sub_x < dst_blk_w; ++sub_x) {
1857b8e80941Smrg               uint8_t *dst = dst_row + sub_y * dst_stride +
1858b8e80941Smrg                              (x * blk_w + sub_x) * 4;
1859b8e80941Smrg               const uint16_t *src = &block_out[(sub_y * blk_w + sub_x) * 4];
1860b8e80941Smrg
1861b8e80941Smrg               dst[0] = src[0];
1862b8e80941Smrg               dst[1] = src[1];
1863b8e80941Smrg               dst[2] = src[2];
1864b8e80941Smrg               dst[3] = src[3];
1865b8e80941Smrg            }
1866b8e80941Smrg         }
1867b8e80941Smrg      }
1868b8e80941Smrg      src_row += src_stride;
1869b8e80941Smrg      dst_row += dst_stride * blk_h;
1870b8e80941Smrg   }
1871b8e80941Smrg}
1872