1b8e80941Smrg/*
2b8e80941Smrg * Copyright (C) 2014 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21b8e80941Smrg * DEALINGS IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg/*
25b8e80941Smrg * Included by texcompress_bptc and gallium to define BPTC decoding routines.
26b8e80941Smrg */
27b8e80941Smrg
28b8e80941Smrg#ifndef TEXCOMPRESS_BPTC_TMP_H
29b8e80941Smrg#define TEXCOMPRESS_BPTC_TMP_H
30b8e80941Smrg
31b8e80941Smrg#include "util/format_srgb.h"
32b8e80941Smrg#include "util/half_float.h"
33b8e80941Smrg#include "macros.h"
34b8e80941Smrg
35b8e80941Smrg#define BLOCK_SIZE 4
36b8e80941Smrg#define N_PARTITIONS 64
37b8e80941Smrg#define BLOCK_BYTES 16
38b8e80941Smrg
39b8e80941Smrgstruct bptc_unorm_mode {
40b8e80941Smrg   int n_subsets;
41b8e80941Smrg   int n_partition_bits;
42b8e80941Smrg   bool has_rotation_bits;
43b8e80941Smrg   bool has_index_selection_bit;
44b8e80941Smrg   int n_color_bits;
45b8e80941Smrg   int n_alpha_bits;
46b8e80941Smrg   bool has_endpoint_pbits;
47b8e80941Smrg   bool has_shared_pbits;
48b8e80941Smrg   int n_index_bits;
49b8e80941Smrg   int n_secondary_index_bits;
50b8e80941Smrg};
51b8e80941Smrg
52b8e80941Smrgstruct bptc_float_bitfield {
53b8e80941Smrg   int8_t endpoint;
54b8e80941Smrg   uint8_t component;
55b8e80941Smrg   uint8_t offset;
56b8e80941Smrg   uint8_t n_bits;
57b8e80941Smrg   bool reverse;
58b8e80941Smrg};
59b8e80941Smrg
60b8e80941Smrgstruct bptc_float_mode {
61b8e80941Smrg   bool reserved;
62b8e80941Smrg   bool transformed_endpoints;
63b8e80941Smrg   int n_partition_bits;
64b8e80941Smrg   int n_endpoint_bits;
65b8e80941Smrg   int n_index_bits;
66b8e80941Smrg   int n_delta_bits[3];
67b8e80941Smrg   struct bptc_float_bitfield bitfields[24];
68b8e80941Smrg};
69b8e80941Smrg
70b8e80941Smrgstruct bit_writer {
71b8e80941Smrg   uint8_t buf;
72b8e80941Smrg   int pos;
73b8e80941Smrg   uint8_t *dst;
74b8e80941Smrg};
75b8e80941Smrg
76b8e80941Smrgstatic const struct bptc_unorm_mode
77b8e80941Smrgbptc_unorm_modes[] = {
78b8e80941Smrg   /* 0 */ { 3, 4, false, false, 4, 0, true,  false, 3, 0 },
79b8e80941Smrg   /* 1 */ { 2, 6, false, false, 6, 0, false, true,  3, 0 },
80b8e80941Smrg   /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
81b8e80941Smrg   /* 3 */ { 2, 6, false, false, 7, 0, true,  false, 2, 0 },
82b8e80941Smrg   /* 4 */ { 1, 0, true,  true,  5, 6, false, false, 2, 3 },
83b8e80941Smrg   /* 5 */ { 1, 0, true,  false, 7, 8, false, false, 2, 2 },
84b8e80941Smrg   /* 6 */ { 1, 0, false, false, 7, 7, true,  false, 4, 0 },
85b8e80941Smrg   /* 7 */ { 2, 6, false, false, 5, 5, true,  false, 2, 0 }
86b8e80941Smrg};
87b8e80941Smrg
88b8e80941Smrgstatic const struct bptc_float_mode
89b8e80941Smrgbptc_float_modes[] = {
90b8e80941Smrg   /* 00 */
91b8e80941Smrg   { false, true, 5, 10, 3, { 5, 5, 5 },
92b8e80941Smrg     { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
93b8e80941Smrg       { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
94b8e80941Smrg       { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
95b8e80941Smrg       { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
96b8e80941Smrg       { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
97b8e80941Smrg       { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
98b8e80941Smrg       { 3, 2, 3, 1, false },
99b8e80941Smrg       { -1 } }
100b8e80941Smrg   },
101b8e80941Smrg   /* 01 */
102b8e80941Smrg   { false, true, 5, 7, 3, { 6, 6, 6 },
103b8e80941Smrg     { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
104b8e80941Smrg       { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
105b8e80941Smrg       { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
106b8e80941Smrg       { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
107b8e80941Smrg       { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
108b8e80941Smrg       { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
109b8e80941Smrg       { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
110b8e80941Smrg       { 2, 0, 0, 6, false },
111b8e80941Smrg       { 3, 0, 0, 6, false },
112b8e80941Smrg       { -1 } }
113b8e80941Smrg   },
114b8e80941Smrg   /* 00010 */
115b8e80941Smrg   { false, true, 5, 11, 3, { 5, 4, 4 },
116b8e80941Smrg     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
117b8e80941Smrg       { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
118b8e80941Smrg       { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
119b8e80941Smrg       { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
120b8e80941Smrg       { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
121b8e80941Smrg       { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
122b8e80941Smrg       { -1 } }
123b8e80941Smrg   },
124b8e80941Smrg   /* 00011 */
125b8e80941Smrg   { false, false, 0, 10, 4, { 10, 10, 10 },
126b8e80941Smrg     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
127b8e80941Smrg       { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
128b8e80941Smrg       { -1 } }
129b8e80941Smrg   },
130b8e80941Smrg   /* 00110 */
131b8e80941Smrg   { false, true, 5, 11, 3, { 4, 5, 4 },
132b8e80941Smrg     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
133b8e80941Smrg       { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
134b8e80941Smrg       { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
135b8e80941Smrg       { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
136b8e80941Smrg       { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
137b8e80941Smrg       { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
138b8e80941Smrg       { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
139b8e80941Smrg       { -1 } }
140b8e80941Smrg   },
141b8e80941Smrg   /* 00111 */
142b8e80941Smrg   { false, true, 0, 11, 4, { 9, 9, 9 },
143b8e80941Smrg     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
144b8e80941Smrg       { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
145b8e80941Smrg       { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
146b8e80941Smrg       { -1 } }
147b8e80941Smrg   },
148b8e80941Smrg   /* 01010 */
149b8e80941Smrg   { false, true, 5, 11, 3, { 4, 4, 5 },
150b8e80941Smrg     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
151b8e80941Smrg       { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
152b8e80941Smrg       { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
153b8e80941Smrg       { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
154b8e80941Smrg       { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
155b8e80941Smrg       { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
156b8e80941Smrg       { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
157b8e80941Smrg       { -1 } }
158b8e80941Smrg   },
159b8e80941Smrg   /* 01011 */
160b8e80941Smrg   { false, true, 0, 12, 4, { 8, 8, 8 },
161b8e80941Smrg     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
162b8e80941Smrg       { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
163b8e80941Smrg       { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
164b8e80941Smrg       { -1 } }
165b8e80941Smrg   },
166b8e80941Smrg   /* 01110 */
167b8e80941Smrg   { false, true, 5, 9, 3, { 5, 5, 5 },
168b8e80941Smrg     { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
169b8e80941Smrg       { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
170b8e80941Smrg       { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
171b8e80941Smrg       { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
172b8e80941Smrg       { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
173b8e80941Smrg       { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
174b8e80941Smrg       { 3, 2, 3, 1, false },
175b8e80941Smrg       { -1 } }
176b8e80941Smrg   },
177b8e80941Smrg   /* 01111 */
178b8e80941Smrg   { false, true, 0, 16, 4, { 4, 4, 4 },
179b8e80941Smrg     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
180b8e80941Smrg       { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
181b8e80941Smrg       { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
182b8e80941Smrg       { -1 } }
183b8e80941Smrg   },
184b8e80941Smrg   /* 10010 */
185b8e80941Smrg   { false, true, 5, 8, 3, { 6, 5, 5 },
186b8e80941Smrg     { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
187b8e80941Smrg       { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
188b8e80941Smrg       { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
189b8e80941Smrg       { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
190b8e80941Smrg       { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
191b8e80941Smrg       { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
192b8e80941Smrg       { 3, 0, 0, 6, false },
193b8e80941Smrg       { -1 } }
194b8e80941Smrg   },
195b8e80941Smrg   /* 10011 */
196b8e80941Smrg   { true /* reserved */ },
197b8e80941Smrg   /* 10110 */
198b8e80941Smrg   { false, true, 5, 8, 3, { 5, 6, 5 },
199b8e80941Smrg     { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
200b8e80941Smrg       { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
201b8e80941Smrg       { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
202b8e80941Smrg       { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
203b8e80941Smrg       { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
204b8e80941Smrg       { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
205b8e80941Smrg       { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
206b8e80941Smrg       { -1 } }
207b8e80941Smrg   },
208b8e80941Smrg   /* 10111 */
209b8e80941Smrg   { true /* reserved */ },
210b8e80941Smrg   /* 11010 */
211b8e80941Smrg   { false, true, 5, 8, 3, { 5, 5, 6 },
212b8e80941Smrg     { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
213b8e80941Smrg       { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
214b8e80941Smrg       { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
215b8e80941Smrg       { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
216b8e80941Smrg       { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
217b8e80941Smrg       { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
218b8e80941Smrg       { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
219b8e80941Smrg       { -1 } }
220b8e80941Smrg   },
221b8e80941Smrg   /* 11011 */
222b8e80941Smrg   { true /* reserved */ },
223b8e80941Smrg   /* 11110 */
224b8e80941Smrg   { false, false, 5, 6, 3, { 6, 6, 6 },
225b8e80941Smrg     { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
226b8e80941Smrg       { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
227b8e80941Smrg       { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
228b8e80941Smrg       { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
229b8e80941Smrg       { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
230b8e80941Smrg       { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
231b8e80941Smrg       { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
232b8e80941Smrg       { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
233b8e80941Smrg       { -1 } }
234b8e80941Smrg   },
235b8e80941Smrg   /* 11111 */
236b8e80941Smrg   { true /* reserved */ },
237b8e80941Smrg};
238b8e80941Smrg
239b8e80941Smrg/* This partition table is used when the mode has two subsets. Each
240b8e80941Smrg * partition is represented by a 32-bit value which gives 2 bits per texel
241b8e80941Smrg * within the block. The value of the two bits represents which subset to use
242b8e80941Smrg * (0 or 1).
243b8e80941Smrg */
244b8e80941Smrgstatic const uint32_t
245b8e80941Smrgpartition_table1[N_PARTITIONS] = {
246b8e80941Smrg   0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
247b8e80941Smrg   0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
248b8e80941Smrg   0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
249b8e80941Smrg   0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
250b8e80941Smrg   0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
251b8e80941Smrg   0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
252b8e80941Smrg   0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
253b8e80941Smrg   0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
254b8e80941Smrg   0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
255b8e80941Smrg   0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
256b8e80941Smrg   0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
257b8e80941Smrg   0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
258b8e80941Smrg   0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
259b8e80941Smrg   0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
260b8e80941Smrg   0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
261b8e80941Smrg   0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
262b8e80941Smrg};
263b8e80941Smrg
264b8e80941Smrg/* This partition table is used when the mode has three subsets. In this case
265b8e80941Smrg * the values can be 0, 1 or 2.
266b8e80941Smrg */
267b8e80941Smrgstatic const uint32_t
268b8e80941Smrgpartition_table2[N_PARTITIONS] = {
269b8e80941Smrg   0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
270b8e80941Smrg   0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
271b8e80941Smrg   0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
272b8e80941Smrg   0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
273b8e80941Smrg   0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
274b8e80941Smrg   0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
275b8e80941Smrg   0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
276b8e80941Smrg   0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
277b8e80941Smrg   0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
278b8e80941Smrg   0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
279b8e80941Smrg   0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
280b8e80941Smrg   0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
281b8e80941Smrg   0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
282b8e80941Smrg   0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
283b8e80941Smrg   0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
284b8e80941Smrg   0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
285b8e80941Smrg};
286b8e80941Smrg
287b8e80941Smrgstatic const uint8_t
288b8e80941Smrganchor_indices[][N_PARTITIONS] = {
289b8e80941Smrg   /* Anchor index values for the second subset of two-subset partitioning */
290b8e80941Smrg   {
291b8e80941Smrg      0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
292b8e80941Smrg      0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
293b8e80941Smrg      0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
294b8e80941Smrg      0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
295b8e80941Smrg   },
296b8e80941Smrg
297b8e80941Smrg   /* Anchor index values for the second subset of three-subset partitioning */
298b8e80941Smrg   {
299b8e80941Smrg      0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
300b8e80941Smrg      0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
301b8e80941Smrg      0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
302b8e80941Smrg      0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
303b8e80941Smrg   },
304b8e80941Smrg
305b8e80941Smrg   /* Anchor index values for the third subset of three-subset
306b8e80941Smrg    * partitioning
307b8e80941Smrg    */
308b8e80941Smrg   {
309b8e80941Smrg      0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
310b8e80941Smrg      0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
311b8e80941Smrg      0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
312b8e80941Smrg      0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
313b8e80941Smrg   }
314b8e80941Smrg};
315b8e80941Smrg
316b8e80941Smrgstatic int
317b8e80941Smrgextract_bits(const uint8_t *block,
318b8e80941Smrg             int offset,
319b8e80941Smrg             int n_bits)
320b8e80941Smrg{
321b8e80941Smrg   int byte_index = offset / 8;
322b8e80941Smrg   int bit_index = offset % 8;
323b8e80941Smrg   int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
324b8e80941Smrg   int result = 0;
325b8e80941Smrg   int bit = 0;
326b8e80941Smrg
327b8e80941Smrg   while (true) {
328b8e80941Smrg      result |= ((block[byte_index] >> bit_index) &
329b8e80941Smrg                 ((1 << n_bits_in_byte) - 1)) << bit;
330b8e80941Smrg
331b8e80941Smrg      n_bits -= n_bits_in_byte;
332b8e80941Smrg
333b8e80941Smrg      if (n_bits <= 0)
334b8e80941Smrg         return result;
335b8e80941Smrg
336b8e80941Smrg      bit += n_bits_in_byte;
337b8e80941Smrg      byte_index++;
338b8e80941Smrg      bit_index = 0;
339b8e80941Smrg      n_bits_in_byte = MIN2(n_bits, 8);
340b8e80941Smrg   }
341b8e80941Smrg}
342b8e80941Smrg
343b8e80941Smrgstatic uint8_t
344b8e80941Smrgexpand_component(uint8_t byte,
345b8e80941Smrg                 int n_bits)
346b8e80941Smrg{
347b8e80941Smrg   /* Expands a n-bit quantity into a byte by copying the most-significant
348b8e80941Smrg    * bits into the unused least-significant bits.
349b8e80941Smrg    */
350b8e80941Smrg   return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
351b8e80941Smrg}
352b8e80941Smrg
353b8e80941Smrgstatic int
354b8e80941Smrgextract_unorm_endpoints(const struct bptc_unorm_mode *mode,
355b8e80941Smrg                        const uint8_t *block,
356b8e80941Smrg                        int bit_offset,
357b8e80941Smrg                        uint8_t endpoints[][4])
358b8e80941Smrg{
359b8e80941Smrg   int component;
360b8e80941Smrg   int subset;
361b8e80941Smrg   int endpoint;
362b8e80941Smrg   int pbit;
363b8e80941Smrg   int n_components;
364b8e80941Smrg
365b8e80941Smrg   /* Extract each color component */
366b8e80941Smrg   for (component = 0; component < 3; component++) {
367b8e80941Smrg      for (subset = 0; subset < mode->n_subsets; subset++) {
368b8e80941Smrg         for (endpoint = 0; endpoint < 2; endpoint++) {
369b8e80941Smrg            endpoints[subset * 2 + endpoint][component] =
370b8e80941Smrg               extract_bits(block, bit_offset, mode->n_color_bits);
371b8e80941Smrg            bit_offset += mode->n_color_bits;
372b8e80941Smrg         }
373b8e80941Smrg      }
374b8e80941Smrg   }
375b8e80941Smrg
376b8e80941Smrg   /* Extract the alpha values */
377b8e80941Smrg   if (mode->n_alpha_bits > 0) {
378b8e80941Smrg      for (subset = 0; subset < mode->n_subsets; subset++) {
379b8e80941Smrg         for (endpoint = 0; endpoint < 2; endpoint++) {
380b8e80941Smrg            endpoints[subset * 2 + endpoint][3] =
381b8e80941Smrg               extract_bits(block, bit_offset, mode->n_alpha_bits);
382b8e80941Smrg            bit_offset += mode->n_alpha_bits;
383b8e80941Smrg         }
384b8e80941Smrg      }
385b8e80941Smrg
386b8e80941Smrg      n_components = 4;
387b8e80941Smrg   } else {
388b8e80941Smrg      for (subset = 0; subset < mode->n_subsets; subset++)
389b8e80941Smrg         for (endpoint = 0; endpoint < 2; endpoint++)
390b8e80941Smrg            endpoints[subset * 2 + endpoint][3] = 255;
391b8e80941Smrg
392b8e80941Smrg      n_components = 3;
393b8e80941Smrg   }
394b8e80941Smrg
395b8e80941Smrg   /* Add in the p-bits */
396b8e80941Smrg   if (mode->has_endpoint_pbits) {
397b8e80941Smrg      for (subset = 0; subset < mode->n_subsets; subset++) {
398b8e80941Smrg         for (endpoint = 0; endpoint < 2; endpoint++) {
399b8e80941Smrg            pbit = extract_bits(block, bit_offset, 1);
400b8e80941Smrg            bit_offset += 1;
401b8e80941Smrg
402b8e80941Smrg            for (component = 0; component < n_components; component++) {
403b8e80941Smrg               endpoints[subset * 2 + endpoint][component] <<= 1;
404b8e80941Smrg               endpoints[subset * 2 + endpoint][component] |= pbit;
405b8e80941Smrg            }
406b8e80941Smrg         }
407b8e80941Smrg      }
408b8e80941Smrg   } else if (mode->has_shared_pbits) {
409b8e80941Smrg      for (subset = 0; subset < mode->n_subsets; subset++) {
410b8e80941Smrg         pbit = extract_bits(block, bit_offset, 1);
411b8e80941Smrg         bit_offset += 1;
412b8e80941Smrg
413b8e80941Smrg         for (endpoint = 0; endpoint < 2; endpoint++) {
414b8e80941Smrg            for (component = 0; component < n_components; component++) {
415b8e80941Smrg               endpoints[subset * 2 + endpoint][component] <<= 1;
416b8e80941Smrg               endpoints[subset * 2 + endpoint][component] |= pbit;
417b8e80941Smrg            }
418b8e80941Smrg         }
419b8e80941Smrg      }
420b8e80941Smrg   }
421b8e80941Smrg
422b8e80941Smrg   /* Expand the n-bit values to a byte */
423b8e80941Smrg   for (subset = 0; subset < mode->n_subsets; subset++) {
424b8e80941Smrg      for (endpoint = 0; endpoint < 2; endpoint++) {
425b8e80941Smrg         for (component = 0; component < 3; component++) {
426b8e80941Smrg            endpoints[subset * 2 + endpoint][component] =
427b8e80941Smrg               expand_component(endpoints[subset * 2 + endpoint][component],
428b8e80941Smrg                                mode->n_color_bits +
429b8e80941Smrg                                mode->has_endpoint_pbits +
430b8e80941Smrg                                mode->has_shared_pbits);
431b8e80941Smrg         }
432b8e80941Smrg
433b8e80941Smrg         if (mode->n_alpha_bits > 0) {
434b8e80941Smrg            endpoints[subset * 2 + endpoint][3] =
435b8e80941Smrg               expand_component(endpoints[subset * 2 + endpoint][3],
436b8e80941Smrg                                mode->n_alpha_bits +
437b8e80941Smrg                                mode->has_endpoint_pbits +
438b8e80941Smrg                                mode->has_shared_pbits);
439b8e80941Smrg         }
440b8e80941Smrg      }
441b8e80941Smrg   }
442b8e80941Smrg
443b8e80941Smrg   return bit_offset;
444b8e80941Smrg}
445b8e80941Smrg
446b8e80941Smrgstatic bool
447b8e80941Smrgis_anchor(int n_subsets,
448b8e80941Smrg          int partition_num,
449b8e80941Smrg          int texel)
450b8e80941Smrg{
451b8e80941Smrg   if (texel == 0)
452b8e80941Smrg      return true;
453b8e80941Smrg
454b8e80941Smrg   switch (n_subsets) {
455b8e80941Smrg   case 1:
456b8e80941Smrg      return false;
457b8e80941Smrg   case 2:
458b8e80941Smrg      return anchor_indices[0][partition_num] == texel;
459b8e80941Smrg   case 3:
460b8e80941Smrg      return (anchor_indices[1][partition_num] == texel ||
461b8e80941Smrg              anchor_indices[2][partition_num] == texel);
462b8e80941Smrg   default:
463b8e80941Smrg      assert(false);
464b8e80941Smrg      return false;
465b8e80941Smrg   }
466b8e80941Smrg}
467b8e80941Smrg
468b8e80941Smrgstatic int
469b8e80941Smrgcount_anchors_before_texel(int n_subsets,
470b8e80941Smrg                           int partition_num,
471b8e80941Smrg                           int texel)
472b8e80941Smrg{
473b8e80941Smrg   int count = 1;
474b8e80941Smrg
475b8e80941Smrg   if (texel == 0)
476b8e80941Smrg      return 0;
477b8e80941Smrg
478b8e80941Smrg   switch (n_subsets) {
479b8e80941Smrg   case 1:
480b8e80941Smrg      break;
481b8e80941Smrg   case 2:
482b8e80941Smrg      if (texel > anchor_indices[0][partition_num])
483b8e80941Smrg         count++;
484b8e80941Smrg      break;
485b8e80941Smrg   case 3:
486b8e80941Smrg      if (texel > anchor_indices[1][partition_num])
487b8e80941Smrg         count++;
488b8e80941Smrg      if (texel > anchor_indices[2][partition_num])
489b8e80941Smrg         count++;
490b8e80941Smrg      break;
491b8e80941Smrg   default:
492b8e80941Smrg      assert(false);
493b8e80941Smrg      return 0;
494b8e80941Smrg   }
495b8e80941Smrg
496b8e80941Smrg   return count;
497b8e80941Smrg}
498b8e80941Smrg
499b8e80941Smrgstatic int32_t
500b8e80941Smrginterpolate(int32_t a, int32_t b,
501b8e80941Smrg            int index,
502b8e80941Smrg            int index_bits)
503b8e80941Smrg{
504b8e80941Smrg   static const uint8_t weights2[] = { 0, 21, 43, 64 };
505b8e80941Smrg   static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
506b8e80941Smrg   static const uint8_t weights4[] =
507b8e80941Smrg      { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
508b8e80941Smrg   static const uint8_t *weights[] = {
509b8e80941Smrg      NULL, NULL, weights2, weights3, weights4
510b8e80941Smrg   };
511b8e80941Smrg   int weight;
512b8e80941Smrg
513b8e80941Smrg   weight = weights[index_bits][index];
514b8e80941Smrg
515b8e80941Smrg   return ((64 - weight) * a + weight * b + 32) >> 6;
516b8e80941Smrg}
517b8e80941Smrg
518b8e80941Smrgstatic void
519b8e80941Smrgapply_rotation(int rotation,
520b8e80941Smrg               uint8_t *result)
521b8e80941Smrg{
522b8e80941Smrg   uint8_t t;
523b8e80941Smrg
524b8e80941Smrg   if (rotation == 0)
525b8e80941Smrg      return;
526b8e80941Smrg
527b8e80941Smrg   rotation--;
528b8e80941Smrg
529b8e80941Smrg   t = result[rotation];
530b8e80941Smrg   result[rotation] = result[3];
531b8e80941Smrg   result[3] = t;
532b8e80941Smrg}
533b8e80941Smrg
534b8e80941Smrgstatic void
535b8e80941Smrgfetch_rgba_unorm_from_block(const uint8_t *block,
536b8e80941Smrg                            uint8_t *result,
537b8e80941Smrg                            int texel)
538b8e80941Smrg{
539b8e80941Smrg   int mode_num = ffs(block[0]);
540b8e80941Smrg   const struct bptc_unorm_mode *mode;
541b8e80941Smrg   int bit_offset, secondary_bit_offset;
542b8e80941Smrg   int partition_num;
543b8e80941Smrg   int subset_num;
544b8e80941Smrg   int rotation;
545b8e80941Smrg   int index_selection;
546b8e80941Smrg   int index_bits;
547b8e80941Smrg   int indices[2];
548b8e80941Smrg   int index;
549b8e80941Smrg   int anchors_before_texel;
550b8e80941Smrg   bool anchor;
551b8e80941Smrg   uint8_t endpoints[3 * 2][4];
552b8e80941Smrg   uint32_t subsets;
553b8e80941Smrg   int component;
554b8e80941Smrg
555b8e80941Smrg   if (mode_num == 0) {
556b8e80941Smrg      /* According to the spec this mode is reserved and shouldn't be used. */
557b8e80941Smrg      memset(result, 0, 3);
558b8e80941Smrg      result[3] = 0xff;
559b8e80941Smrg      return;
560b8e80941Smrg   }
561b8e80941Smrg
562b8e80941Smrg   mode = bptc_unorm_modes + mode_num - 1;
563b8e80941Smrg   bit_offset = mode_num;
564b8e80941Smrg
565b8e80941Smrg   partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
566b8e80941Smrg   bit_offset += mode->n_partition_bits;
567b8e80941Smrg
568b8e80941Smrg   switch (mode->n_subsets) {
569b8e80941Smrg   case 1:
570b8e80941Smrg      subsets = 0;
571b8e80941Smrg      break;
572b8e80941Smrg   case 2:
573b8e80941Smrg      subsets = partition_table1[partition_num];
574b8e80941Smrg      break;
575b8e80941Smrg   case 3:
576b8e80941Smrg      subsets = partition_table2[partition_num];
577b8e80941Smrg      break;
578b8e80941Smrg   default:
579b8e80941Smrg      assert(false);
580b8e80941Smrg      return;
581b8e80941Smrg   }
582b8e80941Smrg
583b8e80941Smrg   if (mode->has_rotation_bits) {
584b8e80941Smrg      rotation = extract_bits(block, bit_offset, 2);
585b8e80941Smrg      bit_offset += 2;
586b8e80941Smrg   } else {
587b8e80941Smrg      rotation = 0;
588b8e80941Smrg   }
589b8e80941Smrg
590b8e80941Smrg   if (mode->has_index_selection_bit) {
591b8e80941Smrg      index_selection = extract_bits(block, bit_offset, 1);
592b8e80941Smrg      bit_offset++;
593b8e80941Smrg   } else {
594b8e80941Smrg      index_selection = 0;
595b8e80941Smrg   }
596b8e80941Smrg
597b8e80941Smrg   bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
598b8e80941Smrg
599b8e80941Smrg   anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
600b8e80941Smrg                                                     partition_num, texel);
601b8e80941Smrg
602b8e80941Smrg   /* Calculate the offset to the secondary index */
603b8e80941Smrg   secondary_bit_offset = (bit_offset +
604b8e80941Smrg                           BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
605b8e80941Smrg                           mode->n_subsets +
606b8e80941Smrg                           mode->n_secondary_index_bits * texel -
607b8e80941Smrg                           anchors_before_texel);
608b8e80941Smrg
609b8e80941Smrg   /* Calculate the offset to the primary index for this texel */
610b8e80941Smrg   bit_offset += mode->n_index_bits * texel - anchors_before_texel;
611b8e80941Smrg
612b8e80941Smrg   subset_num = (subsets >> (texel * 2)) & 3;
613b8e80941Smrg
614b8e80941Smrg   anchor = is_anchor(mode->n_subsets, partition_num, texel);
615b8e80941Smrg
616b8e80941Smrg   index_bits = mode->n_index_bits;
617b8e80941Smrg   if (anchor)
618b8e80941Smrg      index_bits--;
619b8e80941Smrg   indices[0] = extract_bits(block, bit_offset, index_bits);
620b8e80941Smrg
621b8e80941Smrg   if (mode->n_secondary_index_bits) {
622b8e80941Smrg      index_bits = mode->n_secondary_index_bits;
623b8e80941Smrg      if (anchor)
624b8e80941Smrg         index_bits--;
625b8e80941Smrg      indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
626b8e80941Smrg   }
627b8e80941Smrg
628b8e80941Smrg   index = indices[index_selection];
629b8e80941Smrg   index_bits = (index_selection ?
630b8e80941Smrg                 mode->n_secondary_index_bits :
631b8e80941Smrg                 mode->n_index_bits);
632b8e80941Smrg
633b8e80941Smrg   for (component = 0; component < 3; component++)
634b8e80941Smrg      result[component] = interpolate(endpoints[subset_num * 2][component],
635b8e80941Smrg                                      endpoints[subset_num * 2 + 1][component],
636b8e80941Smrg                                      index,
637b8e80941Smrg                                      index_bits);
638b8e80941Smrg
639b8e80941Smrg   /* Alpha uses the opposite index from the color components */
640b8e80941Smrg   if (mode->n_secondary_index_bits && !index_selection) {
641b8e80941Smrg      index = indices[1];
642b8e80941Smrg      index_bits = mode->n_secondary_index_bits;
643b8e80941Smrg   } else {
644b8e80941Smrg      index = indices[0];
645b8e80941Smrg      index_bits = mode->n_index_bits;
646b8e80941Smrg   }
647b8e80941Smrg
648b8e80941Smrg   result[3] = interpolate(endpoints[subset_num * 2][3],
649b8e80941Smrg                           endpoints[subset_num * 2 + 1][3],
650b8e80941Smrg                           index,
651b8e80941Smrg                           index_bits);
652b8e80941Smrg
653b8e80941Smrg   apply_rotation(rotation, result);
654b8e80941Smrg}
655b8e80941Smrg
656b8e80941Smrg#ifdef BPTC_BLOCK_DECODE
657b8e80941Smrgstatic void
658b8e80941Smrgdecompress_rgba_unorm_block(int src_width, int src_height,
659b8e80941Smrg                            const uint8_t *block,
660b8e80941Smrg                            uint8_t *dst_row, int dst_rowstride)
661b8e80941Smrg{
662b8e80941Smrg   int mode_num = ffs(block[0]);
663b8e80941Smrg   const struct bptc_unorm_mode *mode;
664b8e80941Smrg   int bit_offset, secondary_bit_offset;
665b8e80941Smrg   int partition_num;
666b8e80941Smrg   int subset_num;
667b8e80941Smrg   int rotation;
668b8e80941Smrg   int index_selection;
669b8e80941Smrg   int index_bits;
670b8e80941Smrg   int indices[2];
671b8e80941Smrg   int index;
672b8e80941Smrg   int anchors_before_texel;
673b8e80941Smrg   bool anchor;
674b8e80941Smrg   uint8_t endpoints[3 * 2][4];
675b8e80941Smrg   uint32_t subsets;
676b8e80941Smrg   int component;
677b8e80941Smrg   unsigned x, y;
678b8e80941Smrg
679b8e80941Smrg   if (mode_num == 0) {
680b8e80941Smrg      /* According to the spec this mode is reserved and shouldn't be used. */
681b8e80941Smrg      for(y = 0; y < src_height; y += 1) {
682b8e80941Smrg         uint8_t *result = dst_row;
683b8e80941Smrg         memset(result, 0, 4 * src_width);
684b8e80941Smrg         for(x = 0; x < src_width; x += 1) {
685b8e80941Smrg            result[3] = 0xff;
686b8e80941Smrg            result += 4;
687b8e80941Smrg         }
688b8e80941Smrg         dst_row += dst_rowstride;
689b8e80941Smrg      }
690b8e80941Smrg      return;
691b8e80941Smrg   }
692b8e80941Smrg
693b8e80941Smrg   mode = bptc_unorm_modes + mode_num - 1;
694b8e80941Smrg   bit_offset = mode_num;
695b8e80941Smrg
696b8e80941Smrg   partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
697b8e80941Smrg   bit_offset += mode->n_partition_bits;
698b8e80941Smrg
699b8e80941Smrg   switch (mode->n_subsets) {
700b8e80941Smrg   case 1:
701b8e80941Smrg      subsets = 0;
702b8e80941Smrg      break;
703b8e80941Smrg   case 2:
704b8e80941Smrg      subsets = partition_table1[partition_num];
705b8e80941Smrg      break;
706b8e80941Smrg   case 3:
707b8e80941Smrg      subsets = partition_table2[partition_num];
708b8e80941Smrg      break;
709b8e80941Smrg   default:
710b8e80941Smrg      assert(false);
711b8e80941Smrg      return;
712b8e80941Smrg   }
713b8e80941Smrg
714b8e80941Smrg   if (mode->has_rotation_bits) {
715b8e80941Smrg      rotation = extract_bits(block, bit_offset, 2);
716b8e80941Smrg      bit_offset += 2;
717b8e80941Smrg   } else {
718b8e80941Smrg      rotation = 0;
719b8e80941Smrg   }
720b8e80941Smrg
721b8e80941Smrg   if (mode->has_index_selection_bit) {
722b8e80941Smrg      index_selection = extract_bits(block, bit_offset, 1);
723b8e80941Smrg      bit_offset++;
724b8e80941Smrg   } else {
725b8e80941Smrg      index_selection = 0;
726b8e80941Smrg   }
727b8e80941Smrg
728b8e80941Smrg   bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
729b8e80941Smrg
730b8e80941Smrg   for(y = 0; y < src_height; y += 1) {
731b8e80941Smrg      uint8_t *result = dst_row;
732b8e80941Smrg      for(x = 0; x < src_width; x += 1) {
733b8e80941Smrg         int texel;
734b8e80941Smrg         texel = x + y * 4;
735b8e80941Smrg
736b8e80941Smrg         anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
737b8e80941Smrg                                                           partition_num,
738b8e80941Smrg                                                           texel);
739b8e80941Smrg
740b8e80941Smrg         /* Calculate the offset to the secondary index */
741b8e80941Smrg         secondary_bit_offset = (bit_offset +
742b8e80941Smrg                                 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
743b8e80941Smrg                                 mode->n_subsets +
744b8e80941Smrg                                 mode->n_secondary_index_bits * texel -
745b8e80941Smrg                                 anchors_before_texel);
746b8e80941Smrg
747b8e80941Smrg         /* Calculate the offset to the primary index for this texel */
748b8e80941Smrg         bit_offset += mode->n_index_bits * texel - anchors_before_texel;
749b8e80941Smrg
750b8e80941Smrg         subset_num = (subsets >> (texel * 2)) & 3;
751b8e80941Smrg
752b8e80941Smrg         anchor = is_anchor(mode->n_subsets, partition_num, texel);
753b8e80941Smrg
754b8e80941Smrg         index_bits = mode->n_index_bits;
755b8e80941Smrg         if (anchor)
756b8e80941Smrg            index_bits--;
757b8e80941Smrg         indices[0] = extract_bits(block, bit_offset, index_bits);
758b8e80941Smrg
759b8e80941Smrg         if (mode->n_secondary_index_bits) {
760b8e80941Smrg            index_bits = mode->n_secondary_index_bits;
761b8e80941Smrg            if (anchor)
762b8e80941Smrg               index_bits--;
763b8e80941Smrg            indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
764b8e80941Smrg         }
765b8e80941Smrg
766b8e80941Smrg         index = indices[index_selection];
767b8e80941Smrg         index_bits = (index_selection ?
768b8e80941Smrg                       mode->n_secondary_index_bits :
769b8e80941Smrg                       mode->n_index_bits);
770b8e80941Smrg
771b8e80941Smrg         for (component = 0; component < 3; component++)
772b8e80941Smrg            result[component] = interpolate(endpoints[subset_num * 2][component],
773b8e80941Smrg                                            endpoints[subset_num * 2 + 1][component],
774b8e80941Smrg                                            index,
775b8e80941Smrg                                            index_bits);
776b8e80941Smrg
777b8e80941Smrg         /* Alpha uses the opposite index from the color components */
778b8e80941Smrg         if (mode->n_secondary_index_bits && !index_selection) {
779b8e80941Smrg            index = indices[1];
780b8e80941Smrg            index_bits = mode->n_secondary_index_bits;
781b8e80941Smrg         } else {
782b8e80941Smrg            index = indices[0];
783b8e80941Smrg            index_bits = mode->n_index_bits;
784b8e80941Smrg         }
785b8e80941Smrg
786b8e80941Smrg         result[3] = interpolate(endpoints[subset_num * 2][3],
787b8e80941Smrg                                 endpoints[subset_num * 2 + 1][3],
788b8e80941Smrg                                 index,
789b8e80941Smrg                                 index_bits);
790b8e80941Smrg
791b8e80941Smrg         apply_rotation(rotation, result);
792b8e80941Smrg         result += 4;
793b8e80941Smrg      }
794b8e80941Smrg      dst_row += dst_rowstride;
795b8e80941Smrg   }
796b8e80941Smrg}
797b8e80941Smrg
798b8e80941Smrgstatic void
799b8e80941Smrgdecompress_rgba_unorm(int width, int height,
800b8e80941Smrg                      const uint8_t *src, int src_rowstride,
801b8e80941Smrg                      uint8_t *dst, int dst_rowstride)
802b8e80941Smrg{
803b8e80941Smrg   int src_row_diff;
804b8e80941Smrg   int y, x;
805b8e80941Smrg
806b8e80941Smrg   if (src_rowstride >= width * 4)
807b8e80941Smrg      src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
808b8e80941Smrg   else
809b8e80941Smrg      src_row_diff = 0;
810b8e80941Smrg
811b8e80941Smrg   for (y = 0; y < height; y += BLOCK_SIZE) {
812b8e80941Smrg      for (x = 0; x < width; x += BLOCK_SIZE) {
813b8e80941Smrg         decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
814b8e80941Smrg                                     MIN2(height - y, BLOCK_SIZE),
815b8e80941Smrg                                     src,
816b8e80941Smrg                                     dst + x * 4 + y * dst_rowstride,
817b8e80941Smrg                                     dst_rowstride);
818b8e80941Smrg         src += BLOCK_BYTES;
819b8e80941Smrg      }
820b8e80941Smrg      src += src_row_diff;
821b8e80941Smrg   }
822b8e80941Smrg}
823b8e80941Smrg#endif // BPTC_BLOCK_DECODE
824b8e80941Smrg
825b8e80941Smrgstatic int32_t
826b8e80941Smrgsign_extend(int32_t value,
827b8e80941Smrg            int n_bits)
828b8e80941Smrg{
829b8e80941Smrg   if ((value & (1 << (n_bits - 1)))) {
830b8e80941Smrg      value |= (~(int32_t) 0) << n_bits;
831b8e80941Smrg   }
832b8e80941Smrg
833b8e80941Smrg   return value;
834b8e80941Smrg}
835b8e80941Smrg
836b8e80941Smrgstatic int
837b8e80941Smrgsigned_unquantize(int value, int n_endpoint_bits)
838b8e80941Smrg{
839b8e80941Smrg   bool sign;
840b8e80941Smrg
841b8e80941Smrg   if (n_endpoint_bits >= 16)
842b8e80941Smrg      return value;
843b8e80941Smrg
844b8e80941Smrg   if (value == 0)
845b8e80941Smrg      return 0;
846b8e80941Smrg
847b8e80941Smrg   sign = false;
848b8e80941Smrg
849b8e80941Smrg   if (value < 0) {
850b8e80941Smrg      sign = true;
851b8e80941Smrg      value = -value;
852b8e80941Smrg   }
853b8e80941Smrg
854b8e80941Smrg   if (value >= (1 << (n_endpoint_bits - 1)) - 1)
855b8e80941Smrg      value = 0x7fff;
856b8e80941Smrg   else
857b8e80941Smrg      value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
858b8e80941Smrg
859b8e80941Smrg   if (sign)
860b8e80941Smrg      value = -value;
861b8e80941Smrg
862b8e80941Smrg   return value;
863b8e80941Smrg}
864b8e80941Smrg
865b8e80941Smrgstatic int
866b8e80941Smrgunsigned_unquantize(int value, int n_endpoint_bits)
867b8e80941Smrg{
868b8e80941Smrg   if (n_endpoint_bits >= 15)
869b8e80941Smrg      return value;
870b8e80941Smrg
871b8e80941Smrg   if (value == 0)
872b8e80941Smrg      return 0;
873b8e80941Smrg
874b8e80941Smrg   if (value == (1 << n_endpoint_bits) - 1)
875b8e80941Smrg      return 0xffff;
876b8e80941Smrg
877b8e80941Smrg   return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
878b8e80941Smrg}
879b8e80941Smrg
880b8e80941Smrgstatic int
881b8e80941Smrgextract_float_endpoints(const struct bptc_float_mode *mode,
882b8e80941Smrg                        const uint8_t *block,
883b8e80941Smrg                        int bit_offset,
884b8e80941Smrg                        int32_t endpoints[][3],
885b8e80941Smrg                        bool is_signed)
886b8e80941Smrg{
887b8e80941Smrg   const struct bptc_float_bitfield *bitfield;
888b8e80941Smrg   int endpoint, component;
889b8e80941Smrg   int n_endpoints;
890b8e80941Smrg   int value;
891b8e80941Smrg   int i;
892b8e80941Smrg
893b8e80941Smrg   if (mode->n_partition_bits)
894b8e80941Smrg      n_endpoints = 4;
895b8e80941Smrg   else
896b8e80941Smrg      n_endpoints = 2;
897b8e80941Smrg
898b8e80941Smrg   memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
899b8e80941Smrg
900b8e80941Smrg   for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
901b8e80941Smrg      value = extract_bits(block, bit_offset, bitfield->n_bits);
902b8e80941Smrg      bit_offset += bitfield->n_bits;
903b8e80941Smrg
904b8e80941Smrg      if (bitfield->reverse) {
905b8e80941Smrg         for (i = 0; i < bitfield->n_bits; i++) {
906b8e80941Smrg            if (value & (1 << i))
907b8e80941Smrg               endpoints[bitfield->endpoint][bitfield->component] |=
908b8e80941Smrg                  1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
909b8e80941Smrg         }
910b8e80941Smrg      } else {
911b8e80941Smrg         endpoints[bitfield->endpoint][bitfield->component] |=
912b8e80941Smrg            value << bitfield->offset;
913b8e80941Smrg      }
914b8e80941Smrg   }
915b8e80941Smrg
916b8e80941Smrg   if (mode->transformed_endpoints) {
917b8e80941Smrg      /* The endpoints are specified as signed offsets from e0 */
918b8e80941Smrg      for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
919b8e80941Smrg         for (component = 0; component < 3; component++) {
920b8e80941Smrg            value = sign_extend(endpoints[endpoint][component],
921b8e80941Smrg                                mode->n_delta_bits[component]);
922b8e80941Smrg            endpoints[endpoint][component] =
923b8e80941Smrg               ((endpoints[0][component] + value) &
924b8e80941Smrg                ((1 << mode->n_endpoint_bits) - 1));
925b8e80941Smrg         }
926b8e80941Smrg      }
927b8e80941Smrg   }
928b8e80941Smrg
929b8e80941Smrg   if (is_signed) {
930b8e80941Smrg      for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
931b8e80941Smrg         for (component = 0; component < 3; component++) {
932b8e80941Smrg            value = sign_extend(endpoints[endpoint][component],
933b8e80941Smrg                                mode->n_endpoint_bits);
934b8e80941Smrg            endpoints[endpoint][component] =
935b8e80941Smrg               signed_unquantize(value, mode->n_endpoint_bits);
936b8e80941Smrg         }
937b8e80941Smrg      }
938b8e80941Smrg   } else {
939b8e80941Smrg      for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
940b8e80941Smrg         for (component = 0; component < 3; component++) {
941b8e80941Smrg            endpoints[endpoint][component] =
942b8e80941Smrg               unsigned_unquantize(endpoints[endpoint][component],
943b8e80941Smrg                                   mode->n_endpoint_bits);
944b8e80941Smrg         }
945b8e80941Smrg      }
946b8e80941Smrg   }
947b8e80941Smrg
948b8e80941Smrg   return bit_offset;
949b8e80941Smrg}
950b8e80941Smrg
951b8e80941Smrgstatic int32_t
952b8e80941Smrgfinish_unsigned_unquantize(int32_t value)
953b8e80941Smrg{
954b8e80941Smrg   return value * 31 / 64;
955b8e80941Smrg}
956b8e80941Smrg
957b8e80941Smrgstatic int32_t
958b8e80941Smrgfinish_signed_unquantize(int32_t value)
959b8e80941Smrg{
960b8e80941Smrg   if (value < 0)
961b8e80941Smrg      return (-value * 31 / 32) | 0x8000;
962b8e80941Smrg   else
963b8e80941Smrg      return value * 31 / 32;
964b8e80941Smrg}
965b8e80941Smrg
966b8e80941Smrgstatic void
967b8e80941Smrgfetch_rgb_float_from_block(const uint8_t *block,
968b8e80941Smrg                           float *result,
969b8e80941Smrg                           int texel,
970b8e80941Smrg                           bool is_signed)
971b8e80941Smrg{
972b8e80941Smrg   int mode_num;
973b8e80941Smrg   const struct bptc_float_mode *mode;
974b8e80941Smrg   int bit_offset;
975b8e80941Smrg   int partition_num;
976b8e80941Smrg   int subset_num;
977b8e80941Smrg   int index_bits;
978b8e80941Smrg   int index;
979b8e80941Smrg   int anchors_before_texel;
980b8e80941Smrg   int32_t endpoints[2 * 2][3];
981b8e80941Smrg   uint32_t subsets;
982b8e80941Smrg   int n_subsets;
983b8e80941Smrg   int component;
984b8e80941Smrg   int32_t value;
985b8e80941Smrg
986b8e80941Smrg   if (block[0] & 0x2) {
987b8e80941Smrg      mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
988b8e80941Smrg      bit_offset = 5;
989b8e80941Smrg   } else {
990b8e80941Smrg      mode_num = block[0] & 3;
991b8e80941Smrg      bit_offset = 2;
992b8e80941Smrg   }
993b8e80941Smrg
994b8e80941Smrg   mode = bptc_float_modes + mode_num;
995b8e80941Smrg
996b8e80941Smrg   if (mode->reserved) {
997b8e80941Smrg      memset(result, 0, sizeof result[0] * 3);
998b8e80941Smrg      result[3] = 1.0f;
999b8e80941Smrg      return;
1000b8e80941Smrg   }
1001b8e80941Smrg
1002b8e80941Smrg   bit_offset = extract_float_endpoints(mode, block, bit_offset,
1003b8e80941Smrg                                        endpoints, is_signed);
1004b8e80941Smrg
1005b8e80941Smrg   if (mode->n_partition_bits) {
1006b8e80941Smrg      partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
1007b8e80941Smrg      bit_offset += mode->n_partition_bits;
1008b8e80941Smrg
1009b8e80941Smrg      subsets = partition_table1[partition_num];
1010b8e80941Smrg      n_subsets = 2;
1011b8e80941Smrg   } else {
1012b8e80941Smrg      partition_num = 0;
1013b8e80941Smrg      subsets = 0;
1014b8e80941Smrg      n_subsets = 1;
1015b8e80941Smrg   }
1016b8e80941Smrg
1017b8e80941Smrg   anchors_before_texel =
1018b8e80941Smrg      count_anchors_before_texel(n_subsets, partition_num, texel);
1019b8e80941Smrg
1020b8e80941Smrg   /* Calculate the offset to the primary index for this texel */
1021b8e80941Smrg   bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1022b8e80941Smrg
1023b8e80941Smrg   subset_num = (subsets >> (texel * 2)) & 3;
1024b8e80941Smrg
1025b8e80941Smrg   index_bits = mode->n_index_bits;
1026b8e80941Smrg   if (is_anchor(n_subsets, partition_num, texel))
1027b8e80941Smrg      index_bits--;
1028b8e80941Smrg   index = extract_bits(block, bit_offset, index_bits);
1029b8e80941Smrg
1030b8e80941Smrg   for (component = 0; component < 3; component++) {
1031b8e80941Smrg      value = interpolate(endpoints[subset_num * 2][component],
1032b8e80941Smrg                          endpoints[subset_num * 2 + 1][component],
1033b8e80941Smrg                          index,
1034b8e80941Smrg                          mode->n_index_bits);
1035b8e80941Smrg
1036b8e80941Smrg      if (is_signed)
1037b8e80941Smrg         value = finish_signed_unquantize(value);
1038b8e80941Smrg      else
1039b8e80941Smrg         value = finish_unsigned_unquantize(value);
1040b8e80941Smrg
1041b8e80941Smrg      result[component] = _mesa_half_to_float(value);
1042b8e80941Smrg   }
1043b8e80941Smrg
1044b8e80941Smrg   result[3] = 1.0f;
1045b8e80941Smrg}
1046b8e80941Smrg
1047b8e80941Smrg#ifdef BPTC_BLOCK_DECODE
1048b8e80941Smrgstatic void
1049b8e80941Smrgdecompress_rgb_float_block(unsigned src_width, unsigned src_height,
1050b8e80941Smrg                           const uint8_t *block,
1051b8e80941Smrg                           float *dst_row, unsigned dst_rowstride,
1052b8e80941Smrg                           bool is_signed)
1053b8e80941Smrg{
1054b8e80941Smrg   int mode_num;
1055b8e80941Smrg   const struct bptc_float_mode *mode;
1056b8e80941Smrg   int bit_offset;
1057b8e80941Smrg   int partition_num;
1058b8e80941Smrg   int subset_num;
1059b8e80941Smrg   int index_bits;
1060b8e80941Smrg   int index;
1061b8e80941Smrg   int anchors_before_texel;
1062b8e80941Smrg   int32_t endpoints[2 * 2][3];
1063b8e80941Smrg   uint32_t subsets;
1064b8e80941Smrg   int n_subsets;
1065b8e80941Smrg   int component;
1066b8e80941Smrg   int32_t value;
1067b8e80941Smrg   unsigned x, y;
1068b8e80941Smrg
1069b8e80941Smrg   if (block[0] & 0x2) {
1070b8e80941Smrg      mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
1071b8e80941Smrg      bit_offset = 5;
1072b8e80941Smrg   } else {
1073b8e80941Smrg      mode_num = block[0] & 3;
1074b8e80941Smrg      bit_offset = 2;
1075b8e80941Smrg   }
1076b8e80941Smrg
1077b8e80941Smrg   mode = bptc_float_modes + mode_num;
1078b8e80941Smrg
1079b8e80941Smrg   if (mode->reserved) {
1080b8e80941Smrg      for(y = 0; y < src_height; y += 1) {
1081b8e80941Smrg         float *result = dst_row;
1082b8e80941Smrg         memset(result, 0, sizeof result[0] * 4 * src_width);
1083b8e80941Smrg         for(x = 0; x < src_width; x += 1) {
1084b8e80941Smrg            result[3] = 1.0f;
1085b8e80941Smrg            result += 4;
1086b8e80941Smrg         }
1087b8e80941Smrg         dst_row += dst_rowstride / sizeof dst_row[0];
1088b8e80941Smrg      }
1089b8e80941Smrg      return;
1090b8e80941Smrg   }
1091b8e80941Smrg
1092b8e80941Smrg   bit_offset = extract_float_endpoints(mode, block, bit_offset,
1093b8e80941Smrg                                        endpoints, is_signed);
1094b8e80941Smrg
1095b8e80941Smrg   if (mode->n_partition_bits) {
1096b8e80941Smrg      partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
1097b8e80941Smrg      bit_offset += mode->n_partition_bits;
1098b8e80941Smrg
1099b8e80941Smrg      subsets = partition_table1[partition_num];
1100b8e80941Smrg      n_subsets = 2;
1101b8e80941Smrg   } else {
1102b8e80941Smrg      partition_num = 0;
1103b8e80941Smrg      subsets = 0;
1104b8e80941Smrg      n_subsets = 1;
1105b8e80941Smrg   }
1106b8e80941Smrg
1107b8e80941Smrg   for(y = 0; y < src_height; y += 1) {
1108b8e80941Smrg      float *result = dst_row;
1109b8e80941Smrg      for(x = 0; x < src_width; x += 1) {
1110b8e80941Smrg         int texel;
1111b8e80941Smrg
1112b8e80941Smrg         texel = x + y * 4;
1113b8e80941Smrg
1114b8e80941Smrg         anchors_before_texel =
1115b8e80941Smrg            count_anchors_before_texel(n_subsets, partition_num, texel);
1116b8e80941Smrg
1117b8e80941Smrg         /* Calculate the offset to the primary index for this texel */
1118b8e80941Smrg         bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1119b8e80941Smrg
1120b8e80941Smrg         subset_num = (subsets >> (texel * 2)) & 3;
1121b8e80941Smrg
1122b8e80941Smrg         index_bits = mode->n_index_bits;
1123b8e80941Smrg         if (is_anchor(n_subsets, partition_num, texel))
1124b8e80941Smrg            index_bits--;
1125b8e80941Smrg         index = extract_bits(block, bit_offset, index_bits);
1126b8e80941Smrg
1127b8e80941Smrg         for (component = 0; component < 3; component++) {
1128b8e80941Smrg            value = interpolate(endpoints[subset_num * 2][component],
1129b8e80941Smrg                                endpoints[subset_num * 2 + 1][component],
1130b8e80941Smrg                                index,
1131b8e80941Smrg                                mode->n_index_bits);
1132b8e80941Smrg
1133b8e80941Smrg            if (is_signed)
1134b8e80941Smrg               value = finish_signed_unquantize(value);
1135b8e80941Smrg            else
1136b8e80941Smrg               value = finish_unsigned_unquantize(value);
1137b8e80941Smrg
1138b8e80941Smrg            result[component] = _mesa_half_to_float(value);
1139b8e80941Smrg         }
1140b8e80941Smrg
1141b8e80941Smrg         result[3] = 1.0f;
1142b8e80941Smrg         result += 4;
1143b8e80941Smrg      }
1144b8e80941Smrg      dst_row += dst_rowstride / sizeof dst_row[0];
1145b8e80941Smrg   }
1146b8e80941Smrg}
1147b8e80941Smrg
1148b8e80941Smrgstatic void
1149b8e80941Smrgdecompress_rgb_float(int width, int height,
1150b8e80941Smrg                      const uint8_t *src, int src_rowstride,
1151b8e80941Smrg                      float *dst, int dst_rowstride, bool is_signed)
1152b8e80941Smrg{
1153b8e80941Smrg   int src_row_diff;
1154b8e80941Smrg   int y, x;
1155b8e80941Smrg
1156b8e80941Smrg   if (src_rowstride >= width * 4)
1157b8e80941Smrg      src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
1158b8e80941Smrg   else
1159b8e80941Smrg      src_row_diff = 0;
1160b8e80941Smrg
1161b8e80941Smrg   for (y = 0; y < height; y += BLOCK_SIZE) {
1162b8e80941Smrg      for (x = 0; x < width; x += BLOCK_SIZE) {
1163b8e80941Smrg         decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1164b8e80941Smrg                                    MIN2(height - y, BLOCK_SIZE),
1165b8e80941Smrg                                    src,
1166b8e80941Smrg                                    (dst + x * 4 +
1167b8e80941Smrg                                     (y * dst_rowstride / sizeof dst[0])),
1168b8e80941Smrg                                    dst_rowstride, is_signed);
1169b8e80941Smrg         src += BLOCK_BYTES;
1170b8e80941Smrg      }
1171b8e80941Smrg      src += src_row_diff;
1172b8e80941Smrg   }
1173b8e80941Smrg}
1174b8e80941Smrg#endif // BPTC_BLOCK_DECODE
1175b8e80941Smrg
1176b8e80941Smrgstatic void
1177b8e80941Smrgwrite_bits(struct bit_writer *writer, int n_bits, int value)
1178b8e80941Smrg{
1179b8e80941Smrg   do {
1180b8e80941Smrg      if (n_bits + writer->pos >= 8) {
1181b8e80941Smrg         *(writer->dst++) = writer->buf | (value << writer->pos);
1182b8e80941Smrg         writer->buf = 0;
1183b8e80941Smrg         value >>= (8 - writer->pos);
1184b8e80941Smrg         n_bits -= (8 - writer->pos);
1185b8e80941Smrg         writer->pos = 0;
1186b8e80941Smrg      } else {
1187b8e80941Smrg         writer->buf |= value << writer->pos;
1188b8e80941Smrg         writer->pos += n_bits;
1189b8e80941Smrg         break;
1190b8e80941Smrg      }
1191b8e80941Smrg   } while (n_bits > 0);
1192b8e80941Smrg}
1193b8e80941Smrg
1194b8e80941Smrgstatic void
1195b8e80941Smrgget_average_luminance_alpha_unorm(int width, int height,
1196b8e80941Smrg                                  const uint8_t *src, int src_rowstride,
1197b8e80941Smrg                                  int *average_luminance, int *average_alpha)
1198b8e80941Smrg{
1199b8e80941Smrg   int luminance_sum = 0, alpha_sum = 0;
1200b8e80941Smrg   int y, x;
1201b8e80941Smrg
1202b8e80941Smrg   for (y = 0; y < height; y++) {
1203b8e80941Smrg      for (x = 0; x < width; x++) {
1204b8e80941Smrg         luminance_sum += src[0] + src[1] + src[2];
1205b8e80941Smrg         alpha_sum += src[3];
1206b8e80941Smrg         src += 4;
1207b8e80941Smrg      }
1208b8e80941Smrg      src += src_rowstride - width * 4;
1209b8e80941Smrg   }
1210b8e80941Smrg
1211b8e80941Smrg   *average_luminance = luminance_sum / (width * height);
1212b8e80941Smrg   *average_alpha = alpha_sum / (width * height);
1213b8e80941Smrg}
1214b8e80941Smrg
1215b8e80941Smrgstatic void
1216b8e80941Smrgget_rgba_endpoints_unorm(int width, int height,
1217b8e80941Smrg                         const uint8_t *src, int src_rowstride,
1218b8e80941Smrg                         int average_luminance, int average_alpha,
1219b8e80941Smrg                         uint8_t endpoints[][4])
1220b8e80941Smrg{
1221b8e80941Smrg   int endpoint_luminances[2];
1222b8e80941Smrg   int midpoint;
1223b8e80941Smrg   int sums[2][4];
1224b8e80941Smrg   int endpoint;
1225b8e80941Smrg   int luminance;
1226b8e80941Smrg   uint8_t temp[3];
1227b8e80941Smrg   const uint8_t *p = src;
1228b8e80941Smrg   int rgb_left_endpoint_count = 0;
1229b8e80941Smrg   int alpha_left_endpoint_count = 0;
1230b8e80941Smrg   int y, x, i;
1231b8e80941Smrg
1232b8e80941Smrg   memset(sums, 0, sizeof sums);
1233b8e80941Smrg
1234b8e80941Smrg   for (y = 0; y < height; y++) {
1235b8e80941Smrg      for (x = 0; x < width; x++) {
1236b8e80941Smrg         luminance = p[0] + p[1] + p[2];
1237b8e80941Smrg         if (luminance < average_luminance) {
1238b8e80941Smrg            endpoint = 0;
1239b8e80941Smrg            rgb_left_endpoint_count++;
1240b8e80941Smrg         } else {
1241b8e80941Smrg            endpoint = 1;
1242b8e80941Smrg         }
1243b8e80941Smrg         for (i = 0; i < 3; i++)
1244b8e80941Smrg            sums[endpoint][i] += p[i];
1245b8e80941Smrg
1246b8e80941Smrg         if (p[2] < average_alpha) {
1247b8e80941Smrg            endpoint = 0;
1248b8e80941Smrg            alpha_left_endpoint_count++;
1249b8e80941Smrg         } else {
1250b8e80941Smrg            endpoint = 1;
1251b8e80941Smrg         }
1252b8e80941Smrg         sums[endpoint][3] += p[3];
1253b8e80941Smrg
1254b8e80941Smrg         p += 4;
1255b8e80941Smrg      }
1256b8e80941Smrg
1257b8e80941Smrg      p += src_rowstride - width * 4;
1258b8e80941Smrg   }
1259b8e80941Smrg
1260b8e80941Smrg   if (rgb_left_endpoint_count == 0 ||
1261b8e80941Smrg       rgb_left_endpoint_count == width * height) {
1262b8e80941Smrg      for (i = 0; i < 3; i++)
1263b8e80941Smrg         endpoints[0][i] = endpoints[1][i] =
1264b8e80941Smrg            (sums[0][i] + sums[1][i]) / (width * height);
1265b8e80941Smrg   } else {
1266b8e80941Smrg      for (i = 0; i < 3; i++) {
1267b8e80941Smrg         endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1268b8e80941Smrg         endpoints[1][i] = (sums[1][i] /
1269b8e80941Smrg                            (width * height - rgb_left_endpoint_count));
1270b8e80941Smrg      }
1271b8e80941Smrg   }
1272b8e80941Smrg
1273b8e80941Smrg   if (alpha_left_endpoint_count == 0 ||
1274b8e80941Smrg       alpha_left_endpoint_count == width * height) {
1275b8e80941Smrg      endpoints[0][3] = endpoints[1][3] =
1276b8e80941Smrg         (sums[0][3] + sums[1][3]) / (width * height);
1277b8e80941Smrg   } else {
1278b8e80941Smrg         endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1279b8e80941Smrg         endpoints[1][3] = (sums[1][3] /
1280b8e80941Smrg                            (width * height - alpha_left_endpoint_count));
1281b8e80941Smrg   }
1282b8e80941Smrg
1283b8e80941Smrg   /* We may need to swap the endpoints to ensure the most-significant bit of
1284b8e80941Smrg    * the first index is zero */
1285b8e80941Smrg
1286b8e80941Smrg   for (endpoint = 0; endpoint < 2; endpoint++) {
1287b8e80941Smrg      endpoint_luminances[endpoint] =
1288b8e80941Smrg         endpoints[endpoint][0] +
1289b8e80941Smrg         endpoints[endpoint][1] +
1290b8e80941Smrg         endpoints[endpoint][2];
1291b8e80941Smrg   }
1292b8e80941Smrg   midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1293b8e80941Smrg
1294b8e80941Smrg   if ((src[0] + src[1] + src[2] <= midpoint) !=
1295b8e80941Smrg       (endpoint_luminances[0] <= midpoint)) {
1296b8e80941Smrg      memcpy(temp, endpoints[0], 3);
1297b8e80941Smrg      memcpy(endpoints[0], endpoints[1], 3);
1298b8e80941Smrg      memcpy(endpoints[1], temp, 3);
1299b8e80941Smrg   }
1300b8e80941Smrg
1301b8e80941Smrg   /* Same for the alpha endpoints */
1302b8e80941Smrg
1303b8e80941Smrg   midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1304b8e80941Smrg
1305b8e80941Smrg   if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1306b8e80941Smrg      temp[0] = endpoints[0][3];
1307b8e80941Smrg      endpoints[0][3] = endpoints[1][3];
1308b8e80941Smrg      endpoints[1][3] = temp[0];
1309b8e80941Smrg   }
1310b8e80941Smrg}
1311b8e80941Smrg
1312b8e80941Smrgstatic void
1313b8e80941Smrgwrite_rgb_indices_unorm(struct bit_writer *writer,
1314b8e80941Smrg                        int src_width, int src_height,
1315b8e80941Smrg                        const uint8_t *src, int src_rowstride,
1316b8e80941Smrg                        uint8_t endpoints[][4])
1317b8e80941Smrg{
1318b8e80941Smrg   int luminance;
1319b8e80941Smrg   int endpoint_luminances[2];
1320b8e80941Smrg   int endpoint;
1321b8e80941Smrg   int index;
1322b8e80941Smrg   int y, x;
1323b8e80941Smrg
1324b8e80941Smrg   for (endpoint = 0; endpoint < 2; endpoint++) {
1325b8e80941Smrg      endpoint_luminances[endpoint] =
1326b8e80941Smrg         endpoints[endpoint][0] +
1327b8e80941Smrg         endpoints[endpoint][1] +
1328b8e80941Smrg         endpoints[endpoint][2];
1329b8e80941Smrg   }
1330b8e80941Smrg
1331b8e80941Smrg   /* If the endpoints have the same luminance then we'll just use index 0 for
1332b8e80941Smrg    * all of the texels */
1333b8e80941Smrg   if (endpoint_luminances[0] == endpoint_luminances[1]) {
1334b8e80941Smrg      write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1335b8e80941Smrg      return;
1336b8e80941Smrg   }
1337b8e80941Smrg
1338b8e80941Smrg   for (y = 0; y < src_height; y++) {
1339b8e80941Smrg      for (x = 0; x < src_width; x++) {
1340b8e80941Smrg         luminance = src[0] + src[1] + src[2];
1341b8e80941Smrg
1342b8e80941Smrg         index = ((luminance - endpoint_luminances[0]) * 3 /
1343b8e80941Smrg                  (endpoint_luminances[1] - endpoint_luminances[0]));
1344b8e80941Smrg         if (index < 0)
1345b8e80941Smrg            index = 0;
1346b8e80941Smrg         else if (index > 3)
1347b8e80941Smrg            index = 3;
1348b8e80941Smrg
1349b8e80941Smrg         assert(x != 0 || y != 0 || index < 2);
1350b8e80941Smrg
1351b8e80941Smrg         write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1352b8e80941Smrg
1353b8e80941Smrg         src += 4;
1354b8e80941Smrg      }
1355b8e80941Smrg
1356b8e80941Smrg      /* Pad the indices out to the block size */
1357b8e80941Smrg      if (src_width < BLOCK_SIZE)
1358b8e80941Smrg         write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1359b8e80941Smrg
1360b8e80941Smrg      src += src_rowstride - src_width * 4;
1361b8e80941Smrg   }
1362b8e80941Smrg
1363b8e80941Smrg   /* Pad the indices out to the block size */
1364b8e80941Smrg   if (src_height < BLOCK_SIZE)
1365b8e80941Smrg      write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1366b8e80941Smrg}
1367b8e80941Smrg
1368b8e80941Smrgstatic void
1369b8e80941Smrgwrite_alpha_indices_unorm(struct bit_writer *writer,
1370b8e80941Smrg                          int src_width, int src_height,
1371b8e80941Smrg                          const uint8_t *src, int src_rowstride,
1372b8e80941Smrg                          uint8_t endpoints[][4])
1373b8e80941Smrg{
1374b8e80941Smrg   int index;
1375b8e80941Smrg   int y, x;
1376b8e80941Smrg
1377b8e80941Smrg   /* If the endpoints have the same alpha then we'll just use index 0 for
1378b8e80941Smrg    * all of the texels */
1379b8e80941Smrg   if (endpoints[0][3] == endpoints[1][3]) {
1380b8e80941Smrg      write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1381b8e80941Smrg      return;
1382b8e80941Smrg   }
1383b8e80941Smrg
1384b8e80941Smrg   for (y = 0; y < src_height; y++) {
1385b8e80941Smrg      for (x = 0; x < src_width; x++) {
1386b8e80941Smrg         index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1387b8e80941Smrg                  ((int) endpoints[1][3] - endpoints[0][3]));
1388b8e80941Smrg         if (index < 0)
1389b8e80941Smrg            index = 0;
1390b8e80941Smrg         else if (index > 7)
1391b8e80941Smrg            index = 7;
1392b8e80941Smrg
1393b8e80941Smrg         assert(x != 0 || y != 0 || index < 4);
1394b8e80941Smrg
1395b8e80941Smrg         /* The first index has one less bit */
1396b8e80941Smrg         write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1397b8e80941Smrg
1398b8e80941Smrg         src += 4;
1399b8e80941Smrg      }
1400b8e80941Smrg
1401b8e80941Smrg      /* Pad the indices out to the block size */
1402b8e80941Smrg      if (src_width < BLOCK_SIZE)
1403b8e80941Smrg         write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1404b8e80941Smrg
1405b8e80941Smrg      src += src_rowstride - src_width * 4;
1406b8e80941Smrg   }
1407b8e80941Smrg
1408b8e80941Smrg   /* Pad the indices out to the block size */
1409b8e80941Smrg   if (src_height < BLOCK_SIZE)
1410b8e80941Smrg      write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1411b8e80941Smrg}
1412b8e80941Smrg
1413b8e80941Smrgstatic void
1414b8e80941Smrgcompress_rgba_unorm_block(int src_width, int src_height,
1415b8e80941Smrg                          const uint8_t *src, int src_rowstride,
1416b8e80941Smrg                          uint8_t *dst)
1417b8e80941Smrg{
1418b8e80941Smrg   int average_luminance, average_alpha;
1419b8e80941Smrg   uint8_t endpoints[2][4];
1420b8e80941Smrg   struct bit_writer writer;
1421b8e80941Smrg   int component, endpoint;
1422b8e80941Smrg
1423b8e80941Smrg   get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1424b8e80941Smrg                                     &average_luminance, &average_alpha);
1425b8e80941Smrg   get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1426b8e80941Smrg                            average_luminance, average_alpha,
1427b8e80941Smrg                            endpoints);
1428b8e80941Smrg
1429b8e80941Smrg   writer.dst = dst;
1430b8e80941Smrg   writer.pos = 0;
1431b8e80941Smrg   writer.buf = 0;
1432b8e80941Smrg
1433b8e80941Smrg   write_bits(&writer, 5, 0x10); /* mode 4 */
1434b8e80941Smrg   write_bits(&writer, 2, 0); /* rotation 0 */
1435b8e80941Smrg   write_bits(&writer, 1, 0); /* index selection bit */
1436b8e80941Smrg
1437b8e80941Smrg   /* Write the color endpoints */
1438b8e80941Smrg   for (component = 0; component < 3; component++)
1439b8e80941Smrg      for (endpoint = 0; endpoint < 2; endpoint++)
1440b8e80941Smrg         write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1441b8e80941Smrg
1442b8e80941Smrg   /* Write the alpha endpoints */
1443b8e80941Smrg   for (endpoint = 0; endpoint < 2; endpoint++)
1444b8e80941Smrg      write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1445b8e80941Smrg
1446b8e80941Smrg   write_rgb_indices_unorm(&writer,
1447b8e80941Smrg                           src_width, src_height,
1448b8e80941Smrg                           src, src_rowstride,
1449b8e80941Smrg                           endpoints);
1450b8e80941Smrg   write_alpha_indices_unorm(&writer,
1451b8e80941Smrg                             src_width, src_height,
1452b8e80941Smrg                             src, src_rowstride,
1453b8e80941Smrg                             endpoints);
1454b8e80941Smrg}
1455b8e80941Smrg
1456b8e80941Smrgstatic void
1457b8e80941Smrgcompress_rgba_unorm(int width, int height,
1458b8e80941Smrg                    const uint8_t *src, int src_rowstride,
1459b8e80941Smrg                    uint8_t *dst, int dst_rowstride)
1460b8e80941Smrg{
1461b8e80941Smrg   int dst_row_diff;
1462b8e80941Smrg   int y, x;
1463b8e80941Smrg
1464b8e80941Smrg   if (dst_rowstride >= width * 4)
1465b8e80941Smrg      dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1466b8e80941Smrg   else
1467b8e80941Smrg      dst_row_diff = 0;
1468b8e80941Smrg
1469b8e80941Smrg   for (y = 0; y < height; y += BLOCK_SIZE) {
1470b8e80941Smrg      for (x = 0; x < width; x += BLOCK_SIZE) {
1471b8e80941Smrg         compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1472b8e80941Smrg                                   MIN2(height - y, BLOCK_SIZE),
1473b8e80941Smrg                                   src + x * 4 + y * src_rowstride,
1474b8e80941Smrg                                   src_rowstride,
1475b8e80941Smrg                                   dst);
1476b8e80941Smrg         dst += BLOCK_BYTES;
1477b8e80941Smrg      }
1478b8e80941Smrg      dst += dst_row_diff;
1479b8e80941Smrg   }
1480b8e80941Smrg}
1481b8e80941Smrg
1482b8e80941Smrgstatic float
1483b8e80941Smrgget_average_luminance_float(int width, int height,
1484b8e80941Smrg                            const float *src, int src_rowstride)
1485b8e80941Smrg{
1486b8e80941Smrg   float luminance_sum = 0;
1487b8e80941Smrg   int y, x;
1488b8e80941Smrg
1489b8e80941Smrg   for (y = 0; y < height; y++) {
1490b8e80941Smrg      for (x = 0; x < width; x++) {
1491b8e80941Smrg         luminance_sum += src[0] + src[1] + src[2];
1492b8e80941Smrg         src += 3;
1493b8e80941Smrg      }
1494b8e80941Smrg      src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1495b8e80941Smrg   }
1496b8e80941Smrg
1497b8e80941Smrg   return luminance_sum / (width * height);
1498b8e80941Smrg}
1499b8e80941Smrg
1500b8e80941Smrgstatic float
1501b8e80941Smrgclamp_value(float value, bool is_signed)
1502b8e80941Smrg{
1503b8e80941Smrg   if (value > 65504.0f)
1504b8e80941Smrg      return 65504.0f;
1505b8e80941Smrg
1506b8e80941Smrg   if (is_signed) {
1507b8e80941Smrg      if (value < -65504.0f)
1508b8e80941Smrg         return -65504.0f;
1509b8e80941Smrg      else
1510b8e80941Smrg         return value;
1511b8e80941Smrg   }
1512b8e80941Smrg
1513b8e80941Smrg   if (value < 0.0f)
1514b8e80941Smrg      return 0.0f;
1515b8e80941Smrg
1516b8e80941Smrg   return value;
1517b8e80941Smrg}
1518b8e80941Smrg
1519b8e80941Smrgstatic void
1520b8e80941Smrgget_endpoints_float(int width, int height,
1521b8e80941Smrg                    const float *src, int src_rowstride,
1522b8e80941Smrg                    float average_luminance, float endpoints[][3],
1523b8e80941Smrg                    bool is_signed)
1524b8e80941Smrg{
1525b8e80941Smrg   float endpoint_luminances[2];
1526b8e80941Smrg   float midpoint;
1527b8e80941Smrg   float sums[2][3];
1528b8e80941Smrg   int endpoint, component;
1529b8e80941Smrg   float luminance;
1530b8e80941Smrg   float temp[3];
1531b8e80941Smrg   const float *p = src;
1532b8e80941Smrg   int left_endpoint_count = 0;
1533b8e80941Smrg   int y, x, i;
1534b8e80941Smrg
1535b8e80941Smrg   memset(sums, 0, sizeof sums);
1536b8e80941Smrg
1537b8e80941Smrg   for (y = 0; y < height; y++) {
1538b8e80941Smrg      for (x = 0; x < width; x++) {
1539b8e80941Smrg         luminance = p[0] + p[1] + p[2];
1540b8e80941Smrg         if (luminance < average_luminance) {
1541b8e80941Smrg            endpoint = 0;
1542b8e80941Smrg            left_endpoint_count++;
1543b8e80941Smrg         } else {
1544b8e80941Smrg            endpoint = 1;
1545b8e80941Smrg         }
1546b8e80941Smrg         for (i = 0; i < 3; i++)
1547b8e80941Smrg            sums[endpoint][i] += p[i];
1548b8e80941Smrg
1549b8e80941Smrg         p += 3;
1550b8e80941Smrg      }
1551b8e80941Smrg
1552b8e80941Smrg      p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1553b8e80941Smrg   }
1554b8e80941Smrg
1555b8e80941Smrg   if (left_endpoint_count == 0 ||
1556b8e80941Smrg       left_endpoint_count == width * height) {
1557b8e80941Smrg      for (i = 0; i < 3; i++)
1558b8e80941Smrg         endpoints[0][i] = endpoints[1][i] =
1559b8e80941Smrg            (sums[0][i] + sums[1][i]) / (width * height);
1560b8e80941Smrg   } else {
1561b8e80941Smrg      for (i = 0; i < 3; i++) {
1562b8e80941Smrg         endpoints[0][i] = sums[0][i] / left_endpoint_count;
1563b8e80941Smrg         endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1564b8e80941Smrg      }
1565b8e80941Smrg   }
1566b8e80941Smrg
1567b8e80941Smrg   /* Clamp the endpoints to the range of a half float and strip out
1568b8e80941Smrg    * infinities */
1569b8e80941Smrg   for (endpoint = 0; endpoint < 2; endpoint++) {
1570b8e80941Smrg      for (component = 0; component < 3; component++) {
1571b8e80941Smrg         endpoints[endpoint][component] =
1572b8e80941Smrg            clamp_value(endpoints[endpoint][component], is_signed);
1573b8e80941Smrg      }
1574b8e80941Smrg   }
1575b8e80941Smrg
1576b8e80941Smrg   /* We may need to swap the endpoints to ensure the most-significant bit of
1577b8e80941Smrg    * the first index is zero */
1578b8e80941Smrg
1579b8e80941Smrg   for (endpoint = 0; endpoint < 2; endpoint++) {
1580b8e80941Smrg      endpoint_luminances[endpoint] =
1581b8e80941Smrg         endpoints[endpoint][0] +
1582b8e80941Smrg         endpoints[endpoint][1] +
1583b8e80941Smrg         endpoints[endpoint][2];
1584b8e80941Smrg   }
1585b8e80941Smrg   midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1586b8e80941Smrg
1587b8e80941Smrg   if ((src[0] + src[1] + src[2] <= midpoint) !=
1588b8e80941Smrg       (endpoint_luminances[0] <= midpoint)) {
1589b8e80941Smrg      memcpy(temp, endpoints[0], sizeof temp);
1590b8e80941Smrg      memcpy(endpoints[0], endpoints[1], sizeof temp);
1591b8e80941Smrg      memcpy(endpoints[1], temp, sizeof temp);
1592b8e80941Smrg   }
1593b8e80941Smrg}
1594b8e80941Smrg
1595b8e80941Smrgstatic void
1596b8e80941Smrgwrite_rgb_indices_float(struct bit_writer *writer,
1597b8e80941Smrg                        int src_width, int src_height,
1598b8e80941Smrg                        const float *src, int src_rowstride,
1599b8e80941Smrg                        float endpoints[][3])
1600b8e80941Smrg{
1601b8e80941Smrg   float luminance;
1602b8e80941Smrg   float endpoint_luminances[2];
1603b8e80941Smrg   int endpoint;
1604b8e80941Smrg   int index;
1605b8e80941Smrg   int y, x;
1606b8e80941Smrg
1607b8e80941Smrg   for (endpoint = 0; endpoint < 2; endpoint++) {
1608b8e80941Smrg      endpoint_luminances[endpoint] =
1609b8e80941Smrg         endpoints[endpoint][0] +
1610b8e80941Smrg         endpoints[endpoint][1] +
1611b8e80941Smrg         endpoints[endpoint][2];
1612b8e80941Smrg   }
1613b8e80941Smrg
1614b8e80941Smrg   /* If the endpoints have the same luminance then we'll just use index 0 for
1615b8e80941Smrg    * all of the texels */
1616b8e80941Smrg   if (endpoint_luminances[0] == endpoint_luminances[1]) {
1617b8e80941Smrg      write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1618b8e80941Smrg      return;
1619b8e80941Smrg   }
1620b8e80941Smrg
1621b8e80941Smrg   for (y = 0; y < src_height; y++) {
1622b8e80941Smrg      for (x = 0; x < src_width; x++) {
1623b8e80941Smrg         luminance = src[0] + src[1] + src[2];
1624b8e80941Smrg
1625b8e80941Smrg         index = ((luminance - endpoint_luminances[0]) * 15 /
1626b8e80941Smrg                  (endpoint_luminances[1] - endpoint_luminances[0]));
1627b8e80941Smrg         if (index < 0)
1628b8e80941Smrg            index = 0;
1629b8e80941Smrg         else if (index > 15)
1630b8e80941Smrg            index = 15;
1631b8e80941Smrg
1632b8e80941Smrg         assert(x != 0 || y != 0 || index < 8);
1633b8e80941Smrg
1634b8e80941Smrg         write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1635b8e80941Smrg
1636b8e80941Smrg         src += 3;
1637b8e80941Smrg      }
1638b8e80941Smrg
1639b8e80941Smrg      /* Pad the indices out to the block size */
1640b8e80941Smrg      if (src_width < BLOCK_SIZE)
1641b8e80941Smrg         write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1642b8e80941Smrg
1643b8e80941Smrg      src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1644b8e80941Smrg   }
1645b8e80941Smrg
1646b8e80941Smrg   /* Pad the indices out to the block size */
1647b8e80941Smrg   if (src_height < BLOCK_SIZE)
1648b8e80941Smrg      write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1649b8e80941Smrg}
1650b8e80941Smrg
1651b8e80941Smrgstatic int
1652b8e80941Smrgget_endpoint_value(float value, bool is_signed)
1653b8e80941Smrg{
1654b8e80941Smrg   bool sign = false;
1655b8e80941Smrg   int half;
1656b8e80941Smrg
1657b8e80941Smrg   if (is_signed) {
1658b8e80941Smrg      half = _mesa_float_to_half(value);
1659b8e80941Smrg
1660b8e80941Smrg      if (half & 0x8000) {
1661b8e80941Smrg         half &= 0x7fff;
1662b8e80941Smrg         sign = true;
1663b8e80941Smrg      }
1664b8e80941Smrg
1665b8e80941Smrg      half = (32 * half / 31) >> 6;
1666b8e80941Smrg
1667b8e80941Smrg      if (sign)
1668b8e80941Smrg         half = -half & ((1 << 10) - 1);
1669b8e80941Smrg
1670b8e80941Smrg      return half;
1671b8e80941Smrg   } else {
1672b8e80941Smrg      if (value <= 0.0f)
1673b8e80941Smrg         return 0;
1674b8e80941Smrg
1675b8e80941Smrg      half = _mesa_float_to_half(value);
1676b8e80941Smrg
1677b8e80941Smrg      return (64 * half / 31) >> 6;
1678b8e80941Smrg   }
1679b8e80941Smrg}
1680b8e80941Smrg
1681b8e80941Smrgstatic void
1682b8e80941Smrgcompress_rgb_float_block(int src_width, int src_height,
1683b8e80941Smrg                         const float *src, int src_rowstride,
1684b8e80941Smrg                         uint8_t *dst,
1685b8e80941Smrg                         bool is_signed)
1686b8e80941Smrg{
1687b8e80941Smrg   float average_luminance;
1688b8e80941Smrg   float endpoints[2][3];
1689b8e80941Smrg   struct bit_writer writer;
1690b8e80941Smrg   int component, endpoint;
1691b8e80941Smrg   int endpoint_value;
1692b8e80941Smrg
1693b8e80941Smrg   average_luminance =
1694b8e80941Smrg      get_average_luminance_float(src_width, src_height, src, src_rowstride);
1695b8e80941Smrg   get_endpoints_float(src_width, src_height, src, src_rowstride,
1696b8e80941Smrg                       average_luminance, endpoints, is_signed);
1697b8e80941Smrg
1698b8e80941Smrg   writer.dst = dst;
1699b8e80941Smrg   writer.pos = 0;
1700b8e80941Smrg   writer.buf = 0;
1701b8e80941Smrg
1702b8e80941Smrg   write_bits(&writer, 5, 3); /* mode 3 */
1703b8e80941Smrg
1704b8e80941Smrg   /* Write the endpoints */
1705b8e80941Smrg   for (endpoint = 0; endpoint < 2; endpoint++) {
1706b8e80941Smrg      for (component = 0; component < 3; component++) {
1707b8e80941Smrg         endpoint_value =
1708b8e80941Smrg            get_endpoint_value(endpoints[endpoint][component], is_signed);
1709b8e80941Smrg         write_bits(&writer, 10, endpoint_value);
1710b8e80941Smrg      }
1711b8e80941Smrg   }
1712b8e80941Smrg
1713b8e80941Smrg   write_rgb_indices_float(&writer,
1714b8e80941Smrg                           src_width, src_height,
1715b8e80941Smrg                           src, src_rowstride,
1716b8e80941Smrg                           endpoints);
1717b8e80941Smrg}
1718b8e80941Smrg
1719b8e80941Smrgstatic void
1720b8e80941Smrgcompress_rgb_float(int width, int height,
1721b8e80941Smrg                   const float *src, int src_rowstride,
1722b8e80941Smrg                   uint8_t *dst, int dst_rowstride,
1723b8e80941Smrg                   bool is_signed)
1724b8e80941Smrg{
1725b8e80941Smrg   int dst_row_diff;
1726b8e80941Smrg   int y, x;
1727b8e80941Smrg
1728b8e80941Smrg   if (dst_rowstride >= width * 4)
1729b8e80941Smrg      dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1730b8e80941Smrg   else
1731b8e80941Smrg      dst_row_diff = 0;
1732b8e80941Smrg
1733b8e80941Smrg   for (y = 0; y < height; y += BLOCK_SIZE) {
1734b8e80941Smrg      for (x = 0; x < width; x += BLOCK_SIZE) {
1735b8e80941Smrg         compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1736b8e80941Smrg                                  MIN2(height - y, BLOCK_SIZE),
1737b8e80941Smrg                                  src + x * 3 +
1738b8e80941Smrg                                  y * src_rowstride / sizeof (float),
1739b8e80941Smrg                                  src_rowstride,
1740b8e80941Smrg                                  dst,
1741b8e80941Smrg                                  is_signed);
1742b8e80941Smrg         dst += BLOCK_BYTES;
1743b8e80941Smrg      }
1744b8e80941Smrg      dst += dst_row_diff;
1745b8e80941Smrg   }
1746b8e80941Smrg}
1747b8e80941Smrg
1748b8e80941Smrg#endif
1749