101e04c3fSmrg/*
201e04c3fSmrg * Copyright (C) 2014 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2101e04c3fSmrg * DEALINGS IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg/*
2501e04c3fSmrg * Included by texcompress_bptc and gallium to define BPTC decoding routines.
2601e04c3fSmrg */
2701e04c3fSmrg
2801e04c3fSmrg#ifndef TEXCOMPRESS_BPTC_TMP_H
2901e04c3fSmrg#define TEXCOMPRESS_BPTC_TMP_H
3001e04c3fSmrg
3101e04c3fSmrg#include "util/format_srgb.h"
3201e04c3fSmrg#include "util/half_float.h"
3301e04c3fSmrg#include "macros.h"
3401e04c3fSmrg
3501e04c3fSmrg#define BLOCK_SIZE 4
3601e04c3fSmrg#define N_PARTITIONS 64
3701e04c3fSmrg#define BLOCK_BYTES 16
3801e04c3fSmrg
3901e04c3fSmrgstruct bptc_unorm_mode {
4001e04c3fSmrg   int n_subsets;
4101e04c3fSmrg   int n_partition_bits;
4201e04c3fSmrg   bool has_rotation_bits;
4301e04c3fSmrg   bool has_index_selection_bit;
4401e04c3fSmrg   int n_color_bits;
4501e04c3fSmrg   int n_alpha_bits;
4601e04c3fSmrg   bool has_endpoint_pbits;
4701e04c3fSmrg   bool has_shared_pbits;
4801e04c3fSmrg   int n_index_bits;
4901e04c3fSmrg   int n_secondary_index_bits;
5001e04c3fSmrg};
5101e04c3fSmrg
5201e04c3fSmrgstruct bptc_float_bitfield {
5301e04c3fSmrg   int8_t endpoint;
5401e04c3fSmrg   uint8_t component;
5501e04c3fSmrg   uint8_t offset;
5601e04c3fSmrg   uint8_t n_bits;
5701e04c3fSmrg   bool reverse;
5801e04c3fSmrg};
5901e04c3fSmrg
6001e04c3fSmrgstruct bptc_float_mode {
6101e04c3fSmrg   bool reserved;
6201e04c3fSmrg   bool transformed_endpoints;
6301e04c3fSmrg   int n_partition_bits;
6401e04c3fSmrg   int n_endpoint_bits;
6501e04c3fSmrg   int n_index_bits;
6601e04c3fSmrg   int n_delta_bits[3];
6701e04c3fSmrg   struct bptc_float_bitfield bitfields[24];
6801e04c3fSmrg};
6901e04c3fSmrg
7001e04c3fSmrgstruct bit_writer {
7101e04c3fSmrg   uint8_t buf;
7201e04c3fSmrg   int pos;
7301e04c3fSmrg   uint8_t *dst;
7401e04c3fSmrg};
7501e04c3fSmrg
7601e04c3fSmrgstatic const struct bptc_unorm_mode
7701e04c3fSmrgbptc_unorm_modes[] = {
7801e04c3fSmrg   /* 0 */ { 3, 4, false, false, 4, 0, true,  false, 3, 0 },
7901e04c3fSmrg   /* 1 */ { 2, 6, false, false, 6, 0, false, true,  3, 0 },
8001e04c3fSmrg   /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
8101e04c3fSmrg   /* 3 */ { 2, 6, false, false, 7, 0, true,  false, 2, 0 },
8201e04c3fSmrg   /* 4 */ { 1, 0, true,  true,  5, 6, false, false, 2, 3 },
8301e04c3fSmrg   /* 5 */ { 1, 0, true,  false, 7, 8, false, false, 2, 2 },
8401e04c3fSmrg   /* 6 */ { 1, 0, false, false, 7, 7, true,  false, 4, 0 },
8501e04c3fSmrg   /* 7 */ { 2, 6, false, false, 5, 5, true,  false, 2, 0 }
8601e04c3fSmrg};
8701e04c3fSmrg
8801e04c3fSmrgstatic const struct bptc_float_mode
8901e04c3fSmrgbptc_float_modes[] = {
9001e04c3fSmrg   /* 00 */
9101e04c3fSmrg   { false, true, 5, 10, 3, { 5, 5, 5 },
9201e04c3fSmrg     { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
9301e04c3fSmrg       { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
9401e04c3fSmrg       { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
9501e04c3fSmrg       { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
9601e04c3fSmrg       { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
9701e04c3fSmrg       { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
9801e04c3fSmrg       { 3, 2, 3, 1, false },
9901e04c3fSmrg       { -1 } }
10001e04c3fSmrg   },
10101e04c3fSmrg   /* 01 */
10201e04c3fSmrg   { false, true, 5, 7, 3, { 6, 6, 6 },
10301e04c3fSmrg     { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
10401e04c3fSmrg       { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
10501e04c3fSmrg       { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
10601e04c3fSmrg       { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
10701e04c3fSmrg       { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
10801e04c3fSmrg       { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
10901e04c3fSmrg       { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
11001e04c3fSmrg       { 2, 0, 0, 6, false },
11101e04c3fSmrg       { 3, 0, 0, 6, false },
11201e04c3fSmrg       { -1 } }
11301e04c3fSmrg   },
11401e04c3fSmrg   /* 00010 */
11501e04c3fSmrg   { false, true, 5, 11, 3, { 5, 4, 4 },
11601e04c3fSmrg     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
11701e04c3fSmrg       { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
11801e04c3fSmrg       { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
11901e04c3fSmrg       { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
12001e04c3fSmrg       { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
12101e04c3fSmrg       { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
12201e04c3fSmrg       { -1 } }
12301e04c3fSmrg   },
12401e04c3fSmrg   /* 00011 */
12501e04c3fSmrg   { false, false, 0, 10, 4, { 10, 10, 10 },
12601e04c3fSmrg     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
12701e04c3fSmrg       { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
12801e04c3fSmrg       { -1 } }
12901e04c3fSmrg   },
13001e04c3fSmrg   /* 00110 */
13101e04c3fSmrg   { false, true, 5, 11, 3, { 4, 5, 4 },
13201e04c3fSmrg     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
13301e04c3fSmrg       { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
13401e04c3fSmrg       { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
13501e04c3fSmrg       { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
13601e04c3fSmrg       { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
13701e04c3fSmrg       { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
13801e04c3fSmrg       { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
13901e04c3fSmrg       { -1 } }
14001e04c3fSmrg   },
14101e04c3fSmrg   /* 00111 */
14201e04c3fSmrg   { false, true, 0, 11, 4, { 9, 9, 9 },
14301e04c3fSmrg     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
14401e04c3fSmrg       { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
14501e04c3fSmrg       { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
14601e04c3fSmrg       { -1 } }
14701e04c3fSmrg   },
14801e04c3fSmrg   /* 01010 */
14901e04c3fSmrg   { false, true, 5, 11, 3, { 4, 4, 5 },
15001e04c3fSmrg     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
15101e04c3fSmrg       { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
15201e04c3fSmrg       { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
15301e04c3fSmrg       { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
15401e04c3fSmrg       { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
15501e04c3fSmrg       { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
15601e04c3fSmrg       { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
15701e04c3fSmrg       { -1 } }
15801e04c3fSmrg   },
15901e04c3fSmrg   /* 01011 */
16001e04c3fSmrg   { false, true, 0, 12, 4, { 8, 8, 8 },
16101e04c3fSmrg     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
16201e04c3fSmrg       { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
16301e04c3fSmrg       { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
16401e04c3fSmrg       { -1 } }
16501e04c3fSmrg   },
16601e04c3fSmrg   /* 01110 */
16701e04c3fSmrg   { false, true, 5, 9, 3, { 5, 5, 5 },
16801e04c3fSmrg     { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
16901e04c3fSmrg       { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
17001e04c3fSmrg       { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
17101e04c3fSmrg       { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
17201e04c3fSmrg       { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
17301e04c3fSmrg       { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
17401e04c3fSmrg       { 3, 2, 3, 1, false },
17501e04c3fSmrg       { -1 } }
17601e04c3fSmrg   },
17701e04c3fSmrg   /* 01111 */
17801e04c3fSmrg   { false, true, 0, 16, 4, { 4, 4, 4 },
17901e04c3fSmrg     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
18001e04c3fSmrg       { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
18101e04c3fSmrg       { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
18201e04c3fSmrg       { -1 } }
18301e04c3fSmrg   },
18401e04c3fSmrg   /* 10010 */
18501e04c3fSmrg   { false, true, 5, 8, 3, { 6, 5, 5 },
18601e04c3fSmrg     { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
18701e04c3fSmrg       { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
18801e04c3fSmrg       { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
18901e04c3fSmrg       { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
19001e04c3fSmrg       { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
19101e04c3fSmrg       { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
19201e04c3fSmrg       { 3, 0, 0, 6, false },
19301e04c3fSmrg       { -1 } }
19401e04c3fSmrg   },
19501e04c3fSmrg   /* 10011 */
19601e04c3fSmrg   { true /* reserved */ },
19701e04c3fSmrg   /* 10110 */
19801e04c3fSmrg   { false, true, 5, 8, 3, { 5, 6, 5 },
19901e04c3fSmrg     { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
20001e04c3fSmrg       { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
20101e04c3fSmrg       { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
20201e04c3fSmrg       { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
20301e04c3fSmrg       { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
20401e04c3fSmrg       { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
20501e04c3fSmrg       { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
20601e04c3fSmrg       { -1 } }
20701e04c3fSmrg   },
20801e04c3fSmrg   /* 10111 */
20901e04c3fSmrg   { true /* reserved */ },
21001e04c3fSmrg   /* 11010 */
21101e04c3fSmrg   { false, true, 5, 8, 3, { 5, 5, 6 },
21201e04c3fSmrg     { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
21301e04c3fSmrg       { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
21401e04c3fSmrg       { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
21501e04c3fSmrg       { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
21601e04c3fSmrg       { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
21701e04c3fSmrg       { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
21801e04c3fSmrg       { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
21901e04c3fSmrg       { -1 } }
22001e04c3fSmrg   },
22101e04c3fSmrg   /* 11011 */
22201e04c3fSmrg   { true /* reserved */ },
22301e04c3fSmrg   /* 11110 */
22401e04c3fSmrg   { false, false, 5, 6, 3, { 6, 6, 6 },
22501e04c3fSmrg     { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
22601e04c3fSmrg       { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
22701e04c3fSmrg       { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
22801e04c3fSmrg       { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
22901e04c3fSmrg       { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
23001e04c3fSmrg       { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
23101e04c3fSmrg       { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
23201e04c3fSmrg       { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
23301e04c3fSmrg       { -1 } }
23401e04c3fSmrg   },
23501e04c3fSmrg   /* 11111 */
23601e04c3fSmrg   { true /* reserved */ },
23701e04c3fSmrg};
23801e04c3fSmrg
23901e04c3fSmrg/* This partition table is used when the mode has two subsets. Each
24001e04c3fSmrg * partition is represented by a 32-bit value which gives 2 bits per texel
24101e04c3fSmrg * within the block. The value of the two bits represents which subset to use
24201e04c3fSmrg * (0 or 1).
24301e04c3fSmrg */
24401e04c3fSmrgstatic const uint32_t
24501e04c3fSmrgpartition_table1[N_PARTITIONS] = {
24601e04c3fSmrg   0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
24701e04c3fSmrg   0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
24801e04c3fSmrg   0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
24901e04c3fSmrg   0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
25001e04c3fSmrg   0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
25101e04c3fSmrg   0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
25201e04c3fSmrg   0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
25301e04c3fSmrg   0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
25401e04c3fSmrg   0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
25501e04c3fSmrg   0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
25601e04c3fSmrg   0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
25701e04c3fSmrg   0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
25801e04c3fSmrg   0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
25901e04c3fSmrg   0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
26001e04c3fSmrg   0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
26101e04c3fSmrg   0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
26201e04c3fSmrg};
26301e04c3fSmrg
26401e04c3fSmrg/* This partition table is used when the mode has three subsets. In this case
26501e04c3fSmrg * the values can be 0, 1 or 2.
26601e04c3fSmrg */
26701e04c3fSmrgstatic const uint32_t
26801e04c3fSmrgpartition_table2[N_PARTITIONS] = {
26901e04c3fSmrg   0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
27001e04c3fSmrg   0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
27101e04c3fSmrg   0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
27201e04c3fSmrg   0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
27301e04c3fSmrg   0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
27401e04c3fSmrg   0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
27501e04c3fSmrg   0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
27601e04c3fSmrg   0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
27701e04c3fSmrg   0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
27801e04c3fSmrg   0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
27901e04c3fSmrg   0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
28001e04c3fSmrg   0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
28101e04c3fSmrg   0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
28201e04c3fSmrg   0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
28301e04c3fSmrg   0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
28401e04c3fSmrg   0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
28501e04c3fSmrg};
28601e04c3fSmrg
28701e04c3fSmrgstatic const uint8_t
28801e04c3fSmrganchor_indices[][N_PARTITIONS] = {
28901e04c3fSmrg   /* Anchor index values for the second subset of two-subset partitioning */
29001e04c3fSmrg   {
29101e04c3fSmrg      0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
29201e04c3fSmrg      0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
29301e04c3fSmrg      0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
29401e04c3fSmrg      0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
29501e04c3fSmrg   },
29601e04c3fSmrg
29701e04c3fSmrg   /* Anchor index values for the second subset of three-subset partitioning */
29801e04c3fSmrg   {
29901e04c3fSmrg      0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
30001e04c3fSmrg      0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
30101e04c3fSmrg      0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
30201e04c3fSmrg      0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
30301e04c3fSmrg   },
30401e04c3fSmrg
30501e04c3fSmrg   /* Anchor index values for the third subset of three-subset
30601e04c3fSmrg    * partitioning
30701e04c3fSmrg    */
30801e04c3fSmrg   {
30901e04c3fSmrg      0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
31001e04c3fSmrg      0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
31101e04c3fSmrg      0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
31201e04c3fSmrg      0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
31301e04c3fSmrg   }
31401e04c3fSmrg};
31501e04c3fSmrg
31601e04c3fSmrgstatic int
31701e04c3fSmrgextract_bits(const uint8_t *block,
31801e04c3fSmrg             int offset,
31901e04c3fSmrg             int n_bits)
32001e04c3fSmrg{
32101e04c3fSmrg   int byte_index = offset / 8;
32201e04c3fSmrg   int bit_index = offset % 8;
32301e04c3fSmrg   int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
32401e04c3fSmrg   int result = 0;
32501e04c3fSmrg   int bit = 0;
32601e04c3fSmrg
32701e04c3fSmrg   while (true) {
32801e04c3fSmrg      result |= ((block[byte_index] >> bit_index) &
32901e04c3fSmrg                 ((1 << n_bits_in_byte) - 1)) << bit;
33001e04c3fSmrg
33101e04c3fSmrg      n_bits -= n_bits_in_byte;
33201e04c3fSmrg
33301e04c3fSmrg      if (n_bits <= 0)
33401e04c3fSmrg         return result;
33501e04c3fSmrg
33601e04c3fSmrg      bit += n_bits_in_byte;
33701e04c3fSmrg      byte_index++;
33801e04c3fSmrg      bit_index = 0;
33901e04c3fSmrg      n_bits_in_byte = MIN2(n_bits, 8);
34001e04c3fSmrg   }
34101e04c3fSmrg}
34201e04c3fSmrg
34301e04c3fSmrgstatic uint8_t
34401e04c3fSmrgexpand_component(uint8_t byte,
34501e04c3fSmrg                 int n_bits)
34601e04c3fSmrg{
34701e04c3fSmrg   /* Expands a n-bit quantity into a byte by copying the most-significant
34801e04c3fSmrg    * bits into the unused least-significant bits.
34901e04c3fSmrg    */
35001e04c3fSmrg   return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
35101e04c3fSmrg}
35201e04c3fSmrg
35301e04c3fSmrgstatic int
35401e04c3fSmrgextract_unorm_endpoints(const struct bptc_unorm_mode *mode,
35501e04c3fSmrg                        const uint8_t *block,
35601e04c3fSmrg                        int bit_offset,
35701e04c3fSmrg                        uint8_t endpoints[][4])
35801e04c3fSmrg{
35901e04c3fSmrg   int component;
36001e04c3fSmrg   int subset;
36101e04c3fSmrg   int endpoint;
36201e04c3fSmrg   int pbit;
36301e04c3fSmrg   int n_components;
36401e04c3fSmrg
36501e04c3fSmrg   /* Extract each color component */
36601e04c3fSmrg   for (component = 0; component < 3; component++) {
36701e04c3fSmrg      for (subset = 0; subset < mode->n_subsets; subset++) {
36801e04c3fSmrg         for (endpoint = 0; endpoint < 2; endpoint++) {
36901e04c3fSmrg            endpoints[subset * 2 + endpoint][component] =
37001e04c3fSmrg               extract_bits(block, bit_offset, mode->n_color_bits);
37101e04c3fSmrg            bit_offset += mode->n_color_bits;
37201e04c3fSmrg         }
37301e04c3fSmrg      }
37401e04c3fSmrg   }
37501e04c3fSmrg
37601e04c3fSmrg   /* Extract the alpha values */
37701e04c3fSmrg   if (mode->n_alpha_bits > 0) {
37801e04c3fSmrg      for (subset = 0; subset < mode->n_subsets; subset++) {
37901e04c3fSmrg         for (endpoint = 0; endpoint < 2; endpoint++) {
38001e04c3fSmrg            endpoints[subset * 2 + endpoint][3] =
38101e04c3fSmrg               extract_bits(block, bit_offset, mode->n_alpha_bits);
38201e04c3fSmrg            bit_offset += mode->n_alpha_bits;
38301e04c3fSmrg         }
38401e04c3fSmrg      }
38501e04c3fSmrg
38601e04c3fSmrg      n_components = 4;
38701e04c3fSmrg   } else {
38801e04c3fSmrg      for (subset = 0; subset < mode->n_subsets; subset++)
38901e04c3fSmrg         for (endpoint = 0; endpoint < 2; endpoint++)
39001e04c3fSmrg            endpoints[subset * 2 + endpoint][3] = 255;
39101e04c3fSmrg
39201e04c3fSmrg      n_components = 3;
39301e04c3fSmrg   }
39401e04c3fSmrg
39501e04c3fSmrg   /* Add in the p-bits */
39601e04c3fSmrg   if (mode->has_endpoint_pbits) {
39701e04c3fSmrg      for (subset = 0; subset < mode->n_subsets; subset++) {
39801e04c3fSmrg         for (endpoint = 0; endpoint < 2; endpoint++) {
39901e04c3fSmrg            pbit = extract_bits(block, bit_offset, 1);
40001e04c3fSmrg            bit_offset += 1;
40101e04c3fSmrg
40201e04c3fSmrg            for (component = 0; component < n_components; component++) {
40301e04c3fSmrg               endpoints[subset * 2 + endpoint][component] <<= 1;
40401e04c3fSmrg               endpoints[subset * 2 + endpoint][component] |= pbit;
40501e04c3fSmrg            }
40601e04c3fSmrg         }
40701e04c3fSmrg      }
40801e04c3fSmrg   } else if (mode->has_shared_pbits) {
40901e04c3fSmrg      for (subset = 0; subset < mode->n_subsets; subset++) {
41001e04c3fSmrg         pbit = extract_bits(block, bit_offset, 1);
41101e04c3fSmrg         bit_offset += 1;
41201e04c3fSmrg
41301e04c3fSmrg         for (endpoint = 0; endpoint < 2; endpoint++) {
41401e04c3fSmrg            for (component = 0; component < n_components; component++) {
41501e04c3fSmrg               endpoints[subset * 2 + endpoint][component] <<= 1;
41601e04c3fSmrg               endpoints[subset * 2 + endpoint][component] |= pbit;
41701e04c3fSmrg            }
41801e04c3fSmrg         }
41901e04c3fSmrg      }
42001e04c3fSmrg   }
42101e04c3fSmrg
42201e04c3fSmrg   /* Expand the n-bit values to a byte */
42301e04c3fSmrg   for (subset = 0; subset < mode->n_subsets; subset++) {
42401e04c3fSmrg      for (endpoint = 0; endpoint < 2; endpoint++) {
42501e04c3fSmrg         for (component = 0; component < 3; component++) {
42601e04c3fSmrg            endpoints[subset * 2 + endpoint][component] =
42701e04c3fSmrg               expand_component(endpoints[subset * 2 + endpoint][component],
42801e04c3fSmrg                                mode->n_color_bits +
42901e04c3fSmrg                                mode->has_endpoint_pbits +
43001e04c3fSmrg                                mode->has_shared_pbits);
43101e04c3fSmrg         }
43201e04c3fSmrg
43301e04c3fSmrg         if (mode->n_alpha_bits > 0) {
43401e04c3fSmrg            endpoints[subset * 2 + endpoint][3] =
43501e04c3fSmrg               expand_component(endpoints[subset * 2 + endpoint][3],
43601e04c3fSmrg                                mode->n_alpha_bits +
43701e04c3fSmrg                                mode->has_endpoint_pbits +
43801e04c3fSmrg                                mode->has_shared_pbits);
43901e04c3fSmrg         }
44001e04c3fSmrg      }
44101e04c3fSmrg   }
44201e04c3fSmrg
44301e04c3fSmrg   return bit_offset;
44401e04c3fSmrg}
44501e04c3fSmrg
44601e04c3fSmrgstatic bool
44701e04c3fSmrgis_anchor(int n_subsets,
44801e04c3fSmrg          int partition_num,
44901e04c3fSmrg          int texel)
45001e04c3fSmrg{
45101e04c3fSmrg   if (texel == 0)
45201e04c3fSmrg      return true;
45301e04c3fSmrg
45401e04c3fSmrg   switch (n_subsets) {
45501e04c3fSmrg   case 1:
45601e04c3fSmrg      return false;
45701e04c3fSmrg   case 2:
45801e04c3fSmrg      return anchor_indices[0][partition_num] == texel;
45901e04c3fSmrg   case 3:
46001e04c3fSmrg      return (anchor_indices[1][partition_num] == texel ||
46101e04c3fSmrg              anchor_indices[2][partition_num] == texel);
46201e04c3fSmrg   default:
46301e04c3fSmrg      assert(false);
46401e04c3fSmrg      return false;
46501e04c3fSmrg   }
46601e04c3fSmrg}
46701e04c3fSmrg
46801e04c3fSmrgstatic int
46901e04c3fSmrgcount_anchors_before_texel(int n_subsets,
47001e04c3fSmrg                           int partition_num,
47101e04c3fSmrg                           int texel)
47201e04c3fSmrg{
47301e04c3fSmrg   int count = 1;
47401e04c3fSmrg
47501e04c3fSmrg   if (texel == 0)
47601e04c3fSmrg      return 0;
47701e04c3fSmrg
47801e04c3fSmrg   switch (n_subsets) {
47901e04c3fSmrg   case 1:
48001e04c3fSmrg      break;
48101e04c3fSmrg   case 2:
48201e04c3fSmrg      if (texel > anchor_indices[0][partition_num])
48301e04c3fSmrg         count++;
48401e04c3fSmrg      break;
48501e04c3fSmrg   case 3:
48601e04c3fSmrg      if (texel > anchor_indices[1][partition_num])
48701e04c3fSmrg         count++;
48801e04c3fSmrg      if (texel > anchor_indices[2][partition_num])
48901e04c3fSmrg         count++;
49001e04c3fSmrg      break;
49101e04c3fSmrg   default:
49201e04c3fSmrg      assert(false);
49301e04c3fSmrg      return 0;
49401e04c3fSmrg   }
49501e04c3fSmrg
49601e04c3fSmrg   return count;
49701e04c3fSmrg}
49801e04c3fSmrg
49901e04c3fSmrgstatic int32_t
50001e04c3fSmrginterpolate(int32_t a, int32_t b,
50101e04c3fSmrg            int index,
50201e04c3fSmrg            int index_bits)
50301e04c3fSmrg{
50401e04c3fSmrg   static const uint8_t weights2[] = { 0, 21, 43, 64 };
50501e04c3fSmrg   static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
50601e04c3fSmrg   static const uint8_t weights4[] =
50701e04c3fSmrg      { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
50801e04c3fSmrg   static const uint8_t *weights[] = {
50901e04c3fSmrg      NULL, NULL, weights2, weights3, weights4
51001e04c3fSmrg   };
51101e04c3fSmrg   int weight;
51201e04c3fSmrg
51301e04c3fSmrg   weight = weights[index_bits][index];
51401e04c3fSmrg
51501e04c3fSmrg   return ((64 - weight) * a + weight * b + 32) >> 6;
51601e04c3fSmrg}
51701e04c3fSmrg
51801e04c3fSmrgstatic void
51901e04c3fSmrgapply_rotation(int rotation,
52001e04c3fSmrg               uint8_t *result)
52101e04c3fSmrg{
52201e04c3fSmrg   uint8_t t;
52301e04c3fSmrg
52401e04c3fSmrg   if (rotation == 0)
52501e04c3fSmrg      return;
52601e04c3fSmrg
52701e04c3fSmrg   rotation--;
52801e04c3fSmrg
52901e04c3fSmrg   t = result[rotation];
53001e04c3fSmrg   result[rotation] = result[3];
53101e04c3fSmrg   result[3] = t;
53201e04c3fSmrg}
53301e04c3fSmrg
53401e04c3fSmrgstatic void
53501e04c3fSmrgfetch_rgba_unorm_from_block(const uint8_t *block,
53601e04c3fSmrg                            uint8_t *result,
53701e04c3fSmrg                            int texel)
53801e04c3fSmrg{
53901e04c3fSmrg   int mode_num = ffs(block[0]);
54001e04c3fSmrg   const struct bptc_unorm_mode *mode;
54101e04c3fSmrg   int bit_offset, secondary_bit_offset;
54201e04c3fSmrg   int partition_num;
54301e04c3fSmrg   int subset_num;
54401e04c3fSmrg   int rotation;
54501e04c3fSmrg   int index_selection;
54601e04c3fSmrg   int index_bits;
54701e04c3fSmrg   int indices[2];
54801e04c3fSmrg   int index;
54901e04c3fSmrg   int anchors_before_texel;
55001e04c3fSmrg   bool anchor;
55101e04c3fSmrg   uint8_t endpoints[3 * 2][4];
55201e04c3fSmrg   uint32_t subsets;
55301e04c3fSmrg   int component;
55401e04c3fSmrg
55501e04c3fSmrg   if (mode_num == 0) {
55601e04c3fSmrg      /* According to the spec this mode is reserved and shouldn't be used. */
5577ec681f3Smrg      memset(result, 0, 4);
55801e04c3fSmrg      return;
55901e04c3fSmrg   }
56001e04c3fSmrg
56101e04c3fSmrg   mode = bptc_unorm_modes + mode_num - 1;
56201e04c3fSmrg   bit_offset = mode_num;
56301e04c3fSmrg
56401e04c3fSmrg   partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
56501e04c3fSmrg   bit_offset += mode->n_partition_bits;
56601e04c3fSmrg
56701e04c3fSmrg   switch (mode->n_subsets) {
56801e04c3fSmrg   case 1:
56901e04c3fSmrg      subsets = 0;
57001e04c3fSmrg      break;
57101e04c3fSmrg   case 2:
57201e04c3fSmrg      subsets = partition_table1[partition_num];
57301e04c3fSmrg      break;
57401e04c3fSmrg   case 3:
57501e04c3fSmrg      subsets = partition_table2[partition_num];
57601e04c3fSmrg      break;
57701e04c3fSmrg   default:
57801e04c3fSmrg      assert(false);
57901e04c3fSmrg      return;
58001e04c3fSmrg   }
58101e04c3fSmrg
58201e04c3fSmrg   if (mode->has_rotation_bits) {
58301e04c3fSmrg      rotation = extract_bits(block, bit_offset, 2);
58401e04c3fSmrg      bit_offset += 2;
58501e04c3fSmrg   } else {
58601e04c3fSmrg      rotation = 0;
58701e04c3fSmrg   }
58801e04c3fSmrg
58901e04c3fSmrg   if (mode->has_index_selection_bit) {
59001e04c3fSmrg      index_selection = extract_bits(block, bit_offset, 1);
59101e04c3fSmrg      bit_offset++;
59201e04c3fSmrg   } else {
59301e04c3fSmrg      index_selection = 0;
59401e04c3fSmrg   }
59501e04c3fSmrg
59601e04c3fSmrg   bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
59701e04c3fSmrg
59801e04c3fSmrg   anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
59901e04c3fSmrg                                                     partition_num, texel);
60001e04c3fSmrg
60101e04c3fSmrg   /* Calculate the offset to the secondary index */
60201e04c3fSmrg   secondary_bit_offset = (bit_offset +
60301e04c3fSmrg                           BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
60401e04c3fSmrg                           mode->n_subsets +
60501e04c3fSmrg                           mode->n_secondary_index_bits * texel -
60601e04c3fSmrg                           anchors_before_texel);
60701e04c3fSmrg
60801e04c3fSmrg   /* Calculate the offset to the primary index for this texel */
60901e04c3fSmrg   bit_offset += mode->n_index_bits * texel - anchors_before_texel;
61001e04c3fSmrg
61101e04c3fSmrg   subset_num = (subsets >> (texel * 2)) & 3;
61201e04c3fSmrg
61301e04c3fSmrg   anchor = is_anchor(mode->n_subsets, partition_num, texel);
61401e04c3fSmrg
61501e04c3fSmrg   index_bits = mode->n_index_bits;
61601e04c3fSmrg   if (anchor)
61701e04c3fSmrg      index_bits--;
61801e04c3fSmrg   indices[0] = extract_bits(block, bit_offset, index_bits);
61901e04c3fSmrg
62001e04c3fSmrg   if (mode->n_secondary_index_bits) {
62101e04c3fSmrg      index_bits = mode->n_secondary_index_bits;
62201e04c3fSmrg      if (anchor)
62301e04c3fSmrg         index_bits--;
62401e04c3fSmrg      indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
62501e04c3fSmrg   }
62601e04c3fSmrg
62701e04c3fSmrg   index = indices[index_selection];
62801e04c3fSmrg   index_bits = (index_selection ?
62901e04c3fSmrg                 mode->n_secondary_index_bits :
63001e04c3fSmrg                 mode->n_index_bits);
63101e04c3fSmrg
63201e04c3fSmrg   for (component = 0; component < 3; component++)
63301e04c3fSmrg      result[component] = interpolate(endpoints[subset_num * 2][component],
63401e04c3fSmrg                                      endpoints[subset_num * 2 + 1][component],
63501e04c3fSmrg                                      index,
63601e04c3fSmrg                                      index_bits);
63701e04c3fSmrg
63801e04c3fSmrg   /* Alpha uses the opposite index from the color components */
63901e04c3fSmrg   if (mode->n_secondary_index_bits && !index_selection) {
64001e04c3fSmrg      index = indices[1];
64101e04c3fSmrg      index_bits = mode->n_secondary_index_bits;
64201e04c3fSmrg   } else {
64301e04c3fSmrg      index = indices[0];
64401e04c3fSmrg      index_bits = mode->n_index_bits;
64501e04c3fSmrg   }
64601e04c3fSmrg
64701e04c3fSmrg   result[3] = interpolate(endpoints[subset_num * 2][3],
64801e04c3fSmrg                           endpoints[subset_num * 2 + 1][3],
64901e04c3fSmrg                           index,
65001e04c3fSmrg                           index_bits);
65101e04c3fSmrg
65201e04c3fSmrg   apply_rotation(rotation, result);
65301e04c3fSmrg}
65401e04c3fSmrg
65501e04c3fSmrg#ifdef BPTC_BLOCK_DECODE
65601e04c3fSmrgstatic void
65701e04c3fSmrgdecompress_rgba_unorm_block(int src_width, int src_height,
65801e04c3fSmrg                            const uint8_t *block,
65901e04c3fSmrg                            uint8_t *dst_row, int dst_rowstride)
66001e04c3fSmrg{
66101e04c3fSmrg   int mode_num = ffs(block[0]);
66201e04c3fSmrg   const struct bptc_unorm_mode *mode;
6637ec681f3Smrg   int bit_offset_head, bit_offset, secondary_bit_offset;
66401e04c3fSmrg   int partition_num;
66501e04c3fSmrg   int subset_num;
66601e04c3fSmrg   int rotation;
66701e04c3fSmrg   int index_selection;
66801e04c3fSmrg   int index_bits;
66901e04c3fSmrg   int indices[2];
67001e04c3fSmrg   int index;
67101e04c3fSmrg   int anchors_before_texel;
67201e04c3fSmrg   bool anchor;
67301e04c3fSmrg   uint8_t endpoints[3 * 2][4];
67401e04c3fSmrg   uint32_t subsets;
67501e04c3fSmrg   int component;
67601e04c3fSmrg   unsigned x, y;
67701e04c3fSmrg
67801e04c3fSmrg   if (mode_num == 0) {
67901e04c3fSmrg      /* According to the spec this mode is reserved and shouldn't be used. */
68001e04c3fSmrg      for(y = 0; y < src_height; y += 1) {
68101e04c3fSmrg         uint8_t *result = dst_row;
68201e04c3fSmrg         memset(result, 0, 4 * src_width);
68301e04c3fSmrg         dst_row += dst_rowstride;
68401e04c3fSmrg      }
68501e04c3fSmrg      return;
68601e04c3fSmrg   }
68701e04c3fSmrg
68801e04c3fSmrg   mode = bptc_unorm_modes + mode_num - 1;
6897ec681f3Smrg   bit_offset_head = mode_num;
69001e04c3fSmrg
6917ec681f3Smrg   partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
6927ec681f3Smrg   bit_offset_head += mode->n_partition_bits;
69301e04c3fSmrg
69401e04c3fSmrg   switch (mode->n_subsets) {
69501e04c3fSmrg   case 1:
69601e04c3fSmrg      subsets = 0;
69701e04c3fSmrg      break;
69801e04c3fSmrg   case 2:
69901e04c3fSmrg      subsets = partition_table1[partition_num];
70001e04c3fSmrg      break;
70101e04c3fSmrg   case 3:
70201e04c3fSmrg      subsets = partition_table2[partition_num];
70301e04c3fSmrg      break;
70401e04c3fSmrg   default:
70501e04c3fSmrg      assert(false);
70601e04c3fSmrg      return;
70701e04c3fSmrg   }
70801e04c3fSmrg
70901e04c3fSmrg   if (mode->has_rotation_bits) {
7107ec681f3Smrg      rotation = extract_bits(block, bit_offset_head, 2);
7117ec681f3Smrg      bit_offset_head += 2;
71201e04c3fSmrg   } else {
71301e04c3fSmrg      rotation = 0;
71401e04c3fSmrg   }
71501e04c3fSmrg
71601e04c3fSmrg   if (mode->has_index_selection_bit) {
7177ec681f3Smrg      index_selection = extract_bits(block, bit_offset_head, 1);
7187ec681f3Smrg      bit_offset_head++;
71901e04c3fSmrg   } else {
72001e04c3fSmrg      index_selection = 0;
72101e04c3fSmrg   }
72201e04c3fSmrg
7237ec681f3Smrg   bit_offset_head = extract_unorm_endpoints(mode, block, bit_offset_head, endpoints);
72401e04c3fSmrg
72501e04c3fSmrg   for(y = 0; y < src_height; y += 1) {
72601e04c3fSmrg      uint8_t *result = dst_row;
72701e04c3fSmrg      for(x = 0; x < src_width; x += 1) {
72801e04c3fSmrg         int texel;
72901e04c3fSmrg         texel = x + y * 4;
7307ec681f3Smrg         bit_offset = bit_offset_head;
73101e04c3fSmrg
73201e04c3fSmrg         anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
73301e04c3fSmrg                                                           partition_num,
73401e04c3fSmrg                                                           texel);
73501e04c3fSmrg
73601e04c3fSmrg         /* Calculate the offset to the secondary index */
73701e04c3fSmrg         secondary_bit_offset = (bit_offset +
73801e04c3fSmrg                                 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
73901e04c3fSmrg                                 mode->n_subsets +
74001e04c3fSmrg                                 mode->n_secondary_index_bits * texel -
74101e04c3fSmrg                                 anchors_before_texel);
74201e04c3fSmrg
74301e04c3fSmrg         /* Calculate the offset to the primary index for this texel */
74401e04c3fSmrg         bit_offset += mode->n_index_bits * texel - anchors_before_texel;
74501e04c3fSmrg
74601e04c3fSmrg         subset_num = (subsets >> (texel * 2)) & 3;
74701e04c3fSmrg
74801e04c3fSmrg         anchor = is_anchor(mode->n_subsets, partition_num, texel);
74901e04c3fSmrg
75001e04c3fSmrg         index_bits = mode->n_index_bits;
75101e04c3fSmrg         if (anchor)
75201e04c3fSmrg            index_bits--;
75301e04c3fSmrg         indices[0] = extract_bits(block, bit_offset, index_bits);
75401e04c3fSmrg
75501e04c3fSmrg         if (mode->n_secondary_index_bits) {
75601e04c3fSmrg            index_bits = mode->n_secondary_index_bits;
75701e04c3fSmrg            if (anchor)
75801e04c3fSmrg               index_bits--;
75901e04c3fSmrg            indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
76001e04c3fSmrg         }
76101e04c3fSmrg
76201e04c3fSmrg         index = indices[index_selection];
76301e04c3fSmrg         index_bits = (index_selection ?
76401e04c3fSmrg                       mode->n_secondary_index_bits :
76501e04c3fSmrg                       mode->n_index_bits);
76601e04c3fSmrg
76701e04c3fSmrg         for (component = 0; component < 3; component++)
76801e04c3fSmrg            result[component] = interpolate(endpoints[subset_num * 2][component],
76901e04c3fSmrg                                            endpoints[subset_num * 2 + 1][component],
77001e04c3fSmrg                                            index,
77101e04c3fSmrg                                            index_bits);
77201e04c3fSmrg
77301e04c3fSmrg         /* Alpha uses the opposite index from the color components */
77401e04c3fSmrg         if (mode->n_secondary_index_bits && !index_selection) {
77501e04c3fSmrg            index = indices[1];
77601e04c3fSmrg            index_bits = mode->n_secondary_index_bits;
77701e04c3fSmrg         } else {
77801e04c3fSmrg            index = indices[0];
77901e04c3fSmrg            index_bits = mode->n_index_bits;
78001e04c3fSmrg         }
78101e04c3fSmrg
78201e04c3fSmrg         result[3] = interpolate(endpoints[subset_num * 2][3],
78301e04c3fSmrg                                 endpoints[subset_num * 2 + 1][3],
78401e04c3fSmrg                                 index,
78501e04c3fSmrg                                 index_bits);
78601e04c3fSmrg
78701e04c3fSmrg         apply_rotation(rotation, result);
78801e04c3fSmrg         result += 4;
78901e04c3fSmrg      }
79001e04c3fSmrg      dst_row += dst_rowstride;
79101e04c3fSmrg   }
79201e04c3fSmrg}
79301e04c3fSmrg
79401e04c3fSmrgstatic void
79501e04c3fSmrgdecompress_rgba_unorm(int width, int height,
79601e04c3fSmrg                      const uint8_t *src, int src_rowstride,
79701e04c3fSmrg                      uint8_t *dst, int dst_rowstride)
79801e04c3fSmrg{
79901e04c3fSmrg   int src_row_diff;
80001e04c3fSmrg   int y, x;
80101e04c3fSmrg
80201e04c3fSmrg   if (src_rowstride >= width * 4)
80301e04c3fSmrg      src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
80401e04c3fSmrg   else
80501e04c3fSmrg      src_row_diff = 0;
80601e04c3fSmrg
80701e04c3fSmrg   for (y = 0; y < height; y += BLOCK_SIZE) {
80801e04c3fSmrg      for (x = 0; x < width; x += BLOCK_SIZE) {
80901e04c3fSmrg         decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
81001e04c3fSmrg                                     MIN2(height - y, BLOCK_SIZE),
81101e04c3fSmrg                                     src,
81201e04c3fSmrg                                     dst + x * 4 + y * dst_rowstride,
81301e04c3fSmrg                                     dst_rowstride);
81401e04c3fSmrg         src += BLOCK_BYTES;
81501e04c3fSmrg      }
81601e04c3fSmrg      src += src_row_diff;
81701e04c3fSmrg   }
81801e04c3fSmrg}
81901e04c3fSmrg#endif // BPTC_BLOCK_DECODE
82001e04c3fSmrg
82101e04c3fSmrgstatic int32_t
82201e04c3fSmrgsign_extend(int32_t value,
82301e04c3fSmrg            int n_bits)
82401e04c3fSmrg{
8257ec681f3Smrg   assert(n_bits > 0 && n_bits < 32);
82601e04c3fSmrg
8277ec681f3Smrg   const unsigned n = 32 - n_bits;
8287ec681f3Smrg   return (int32_t)((uint32_t)value << n) >> n;
82901e04c3fSmrg}
83001e04c3fSmrg
83101e04c3fSmrgstatic int
83201e04c3fSmrgsigned_unquantize(int value, int n_endpoint_bits)
83301e04c3fSmrg{
83401e04c3fSmrg   bool sign;
83501e04c3fSmrg
83601e04c3fSmrg   if (n_endpoint_bits >= 16)
83701e04c3fSmrg      return value;
83801e04c3fSmrg
83901e04c3fSmrg   if (value == 0)
84001e04c3fSmrg      return 0;
84101e04c3fSmrg
84201e04c3fSmrg   sign = false;
84301e04c3fSmrg
84401e04c3fSmrg   if (value < 0) {
84501e04c3fSmrg      sign = true;
84601e04c3fSmrg      value = -value;
84701e04c3fSmrg   }
84801e04c3fSmrg
84901e04c3fSmrg   if (value >= (1 << (n_endpoint_bits - 1)) - 1)
85001e04c3fSmrg      value = 0x7fff;
85101e04c3fSmrg   else
85201e04c3fSmrg      value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
85301e04c3fSmrg
85401e04c3fSmrg   if (sign)
85501e04c3fSmrg      value = -value;
85601e04c3fSmrg
85701e04c3fSmrg   return value;
85801e04c3fSmrg}
85901e04c3fSmrg
86001e04c3fSmrgstatic int
86101e04c3fSmrgunsigned_unquantize(int value, int n_endpoint_bits)
86201e04c3fSmrg{
86301e04c3fSmrg   if (n_endpoint_bits >= 15)
86401e04c3fSmrg      return value;
86501e04c3fSmrg
86601e04c3fSmrg   if (value == 0)
86701e04c3fSmrg      return 0;
86801e04c3fSmrg
86901e04c3fSmrg   if (value == (1 << n_endpoint_bits) - 1)
87001e04c3fSmrg      return 0xffff;
87101e04c3fSmrg
87201e04c3fSmrg   return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
87301e04c3fSmrg}
87401e04c3fSmrg
87501e04c3fSmrgstatic int
87601e04c3fSmrgextract_float_endpoints(const struct bptc_float_mode *mode,
87701e04c3fSmrg                        const uint8_t *block,
87801e04c3fSmrg                        int bit_offset,
87901e04c3fSmrg                        int32_t endpoints[][3],
88001e04c3fSmrg                        bool is_signed)
88101e04c3fSmrg{
88201e04c3fSmrg   const struct bptc_float_bitfield *bitfield;
88301e04c3fSmrg   int endpoint, component;
88401e04c3fSmrg   int n_endpoints;
88501e04c3fSmrg   int value;
88601e04c3fSmrg   int i;
88701e04c3fSmrg
88801e04c3fSmrg   if (mode->n_partition_bits)
88901e04c3fSmrg      n_endpoints = 4;
89001e04c3fSmrg   else
89101e04c3fSmrg      n_endpoints = 2;
89201e04c3fSmrg
89301e04c3fSmrg   memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
89401e04c3fSmrg
89501e04c3fSmrg   for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
89601e04c3fSmrg      value = extract_bits(block, bit_offset, bitfield->n_bits);
89701e04c3fSmrg      bit_offset += bitfield->n_bits;
89801e04c3fSmrg
89901e04c3fSmrg      if (bitfield->reverse) {
90001e04c3fSmrg         for (i = 0; i < bitfield->n_bits; i++) {
90101e04c3fSmrg            if (value & (1 << i))
90201e04c3fSmrg               endpoints[bitfield->endpoint][bitfield->component] |=
90301e04c3fSmrg                  1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
90401e04c3fSmrg         }
90501e04c3fSmrg      } else {
90601e04c3fSmrg         endpoints[bitfield->endpoint][bitfield->component] |=
90701e04c3fSmrg            value << bitfield->offset;
90801e04c3fSmrg      }
90901e04c3fSmrg   }
91001e04c3fSmrg
91101e04c3fSmrg   if (mode->transformed_endpoints) {
91201e04c3fSmrg      /* The endpoints are specified as signed offsets from e0 */
91301e04c3fSmrg      for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
91401e04c3fSmrg         for (component = 0; component < 3; component++) {
91501e04c3fSmrg            value = sign_extend(endpoints[endpoint][component],
91601e04c3fSmrg                                mode->n_delta_bits[component]);
91701e04c3fSmrg            endpoints[endpoint][component] =
91801e04c3fSmrg               ((endpoints[0][component] + value) &
91901e04c3fSmrg                ((1 << mode->n_endpoint_bits) - 1));
92001e04c3fSmrg         }
92101e04c3fSmrg      }
92201e04c3fSmrg   }
92301e04c3fSmrg
92401e04c3fSmrg   if (is_signed) {
92501e04c3fSmrg      for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
92601e04c3fSmrg         for (component = 0; component < 3; component++) {
92701e04c3fSmrg            value = sign_extend(endpoints[endpoint][component],
92801e04c3fSmrg                                mode->n_endpoint_bits);
92901e04c3fSmrg            endpoints[endpoint][component] =
93001e04c3fSmrg               signed_unquantize(value, mode->n_endpoint_bits);
93101e04c3fSmrg         }
93201e04c3fSmrg      }
93301e04c3fSmrg   } else {
93401e04c3fSmrg      for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
93501e04c3fSmrg         for (component = 0; component < 3; component++) {
93601e04c3fSmrg            endpoints[endpoint][component] =
93701e04c3fSmrg               unsigned_unquantize(endpoints[endpoint][component],
93801e04c3fSmrg                                   mode->n_endpoint_bits);
93901e04c3fSmrg         }
94001e04c3fSmrg      }
94101e04c3fSmrg   }
94201e04c3fSmrg
94301e04c3fSmrg   return bit_offset;
94401e04c3fSmrg}
94501e04c3fSmrg
94601e04c3fSmrgstatic int32_t
94701e04c3fSmrgfinish_unsigned_unquantize(int32_t value)
94801e04c3fSmrg{
94901e04c3fSmrg   return value * 31 / 64;
95001e04c3fSmrg}
95101e04c3fSmrg
95201e04c3fSmrgstatic int32_t
95301e04c3fSmrgfinish_signed_unquantize(int32_t value)
95401e04c3fSmrg{
95501e04c3fSmrg   if (value < 0)
95601e04c3fSmrg      return (-value * 31 / 32) | 0x8000;
95701e04c3fSmrg   else
95801e04c3fSmrg      return value * 31 / 32;
95901e04c3fSmrg}
96001e04c3fSmrg
96101e04c3fSmrgstatic void
96201e04c3fSmrgfetch_rgb_float_from_block(const uint8_t *block,
96301e04c3fSmrg                           float *result,
96401e04c3fSmrg                           int texel,
96501e04c3fSmrg                           bool is_signed)
96601e04c3fSmrg{
96701e04c3fSmrg   int mode_num;
96801e04c3fSmrg   const struct bptc_float_mode *mode;
96901e04c3fSmrg   int bit_offset;
97001e04c3fSmrg   int partition_num;
97101e04c3fSmrg   int subset_num;
97201e04c3fSmrg   int index_bits;
97301e04c3fSmrg   int index;
97401e04c3fSmrg   int anchors_before_texel;
97501e04c3fSmrg   int32_t endpoints[2 * 2][3];
97601e04c3fSmrg   uint32_t subsets;
97701e04c3fSmrg   int n_subsets;
97801e04c3fSmrg   int component;
97901e04c3fSmrg   int32_t value;
98001e04c3fSmrg
98101e04c3fSmrg   if (block[0] & 0x2) {
98201e04c3fSmrg      mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
98301e04c3fSmrg      bit_offset = 5;
98401e04c3fSmrg   } else {
98501e04c3fSmrg      mode_num = block[0] & 3;
98601e04c3fSmrg      bit_offset = 2;
98701e04c3fSmrg   }
98801e04c3fSmrg
98901e04c3fSmrg   mode = bptc_float_modes + mode_num;
99001e04c3fSmrg
99101e04c3fSmrg   if (mode->reserved) {
99201e04c3fSmrg      memset(result, 0, sizeof result[0] * 3);
99301e04c3fSmrg      result[3] = 1.0f;
99401e04c3fSmrg      return;
99501e04c3fSmrg   }
99601e04c3fSmrg
99701e04c3fSmrg   bit_offset = extract_float_endpoints(mode, block, bit_offset,
99801e04c3fSmrg                                        endpoints, is_signed);
99901e04c3fSmrg
100001e04c3fSmrg   if (mode->n_partition_bits) {
100101e04c3fSmrg      partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
100201e04c3fSmrg      bit_offset += mode->n_partition_bits;
100301e04c3fSmrg
100401e04c3fSmrg      subsets = partition_table1[partition_num];
100501e04c3fSmrg      n_subsets = 2;
100601e04c3fSmrg   } else {
100701e04c3fSmrg      partition_num = 0;
100801e04c3fSmrg      subsets = 0;
100901e04c3fSmrg      n_subsets = 1;
101001e04c3fSmrg   }
101101e04c3fSmrg
101201e04c3fSmrg   anchors_before_texel =
101301e04c3fSmrg      count_anchors_before_texel(n_subsets, partition_num, texel);
101401e04c3fSmrg
101501e04c3fSmrg   /* Calculate the offset to the primary index for this texel */
101601e04c3fSmrg   bit_offset += mode->n_index_bits * texel - anchors_before_texel;
101701e04c3fSmrg
101801e04c3fSmrg   subset_num = (subsets >> (texel * 2)) & 3;
101901e04c3fSmrg
102001e04c3fSmrg   index_bits = mode->n_index_bits;
102101e04c3fSmrg   if (is_anchor(n_subsets, partition_num, texel))
102201e04c3fSmrg      index_bits--;
102301e04c3fSmrg   index = extract_bits(block, bit_offset, index_bits);
102401e04c3fSmrg
102501e04c3fSmrg   for (component = 0; component < 3; component++) {
102601e04c3fSmrg      value = interpolate(endpoints[subset_num * 2][component],
102701e04c3fSmrg                          endpoints[subset_num * 2 + 1][component],
102801e04c3fSmrg                          index,
102901e04c3fSmrg                          mode->n_index_bits);
103001e04c3fSmrg
103101e04c3fSmrg      if (is_signed)
103201e04c3fSmrg         value = finish_signed_unquantize(value);
103301e04c3fSmrg      else
103401e04c3fSmrg         value = finish_unsigned_unquantize(value);
103501e04c3fSmrg
103601e04c3fSmrg      result[component] = _mesa_half_to_float(value);
103701e04c3fSmrg   }
103801e04c3fSmrg
103901e04c3fSmrg   result[3] = 1.0f;
104001e04c3fSmrg}
104101e04c3fSmrg
104201e04c3fSmrg#ifdef BPTC_BLOCK_DECODE
104301e04c3fSmrgstatic void
104401e04c3fSmrgdecompress_rgb_float_block(unsigned src_width, unsigned src_height,
104501e04c3fSmrg                           const uint8_t *block,
104601e04c3fSmrg                           float *dst_row, unsigned dst_rowstride,
104701e04c3fSmrg                           bool is_signed)
104801e04c3fSmrg{
104901e04c3fSmrg   int mode_num;
105001e04c3fSmrg   const struct bptc_float_mode *mode;
10517ec681f3Smrg   int bit_offset_head, bit_offset;
105201e04c3fSmrg   int partition_num;
105301e04c3fSmrg   int subset_num;
105401e04c3fSmrg   int index_bits;
105501e04c3fSmrg   int index;
105601e04c3fSmrg   int anchors_before_texel;
105701e04c3fSmrg   int32_t endpoints[2 * 2][3];
105801e04c3fSmrg   uint32_t subsets;
105901e04c3fSmrg   int n_subsets;
106001e04c3fSmrg   int component;
106101e04c3fSmrg   int32_t value;
106201e04c3fSmrg   unsigned x, y;
106301e04c3fSmrg
106401e04c3fSmrg   if (block[0] & 0x2) {
106501e04c3fSmrg      mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
10667ec681f3Smrg      bit_offset_head = 5;
106701e04c3fSmrg   } else {
106801e04c3fSmrg      mode_num = block[0] & 3;
10697ec681f3Smrg      bit_offset_head = 2;
107001e04c3fSmrg   }
107101e04c3fSmrg
107201e04c3fSmrg   mode = bptc_float_modes + mode_num;
107301e04c3fSmrg
107401e04c3fSmrg   if (mode->reserved) {
107501e04c3fSmrg      for(y = 0; y < src_height; y += 1) {
107601e04c3fSmrg         float *result = dst_row;
107701e04c3fSmrg         memset(result, 0, sizeof result[0] * 4 * src_width);
107801e04c3fSmrg         for(x = 0; x < src_width; x += 1) {
107901e04c3fSmrg            result[3] = 1.0f;
108001e04c3fSmrg            result += 4;
108101e04c3fSmrg         }
108201e04c3fSmrg         dst_row += dst_rowstride / sizeof dst_row[0];
108301e04c3fSmrg      }
108401e04c3fSmrg      return;
108501e04c3fSmrg   }
108601e04c3fSmrg
10877ec681f3Smrg   bit_offset_head = extract_float_endpoints(mode, block, bit_offset_head,
108801e04c3fSmrg                                        endpoints, is_signed);
108901e04c3fSmrg
109001e04c3fSmrg   if (mode->n_partition_bits) {
10917ec681f3Smrg      partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
10927ec681f3Smrg      bit_offset_head += mode->n_partition_bits;
109301e04c3fSmrg
109401e04c3fSmrg      subsets = partition_table1[partition_num];
109501e04c3fSmrg      n_subsets = 2;
109601e04c3fSmrg   } else {
109701e04c3fSmrg      partition_num = 0;
109801e04c3fSmrg      subsets = 0;
109901e04c3fSmrg      n_subsets = 1;
110001e04c3fSmrg   }
110101e04c3fSmrg
110201e04c3fSmrg   for(y = 0; y < src_height; y += 1) {
110301e04c3fSmrg      float *result = dst_row;
110401e04c3fSmrg      for(x = 0; x < src_width; x += 1) {
110501e04c3fSmrg         int texel;
110601e04c3fSmrg
11077ec681f3Smrg         bit_offset = bit_offset_head;
11087ec681f3Smrg
110901e04c3fSmrg         texel = x + y * 4;
111001e04c3fSmrg
111101e04c3fSmrg         anchors_before_texel =
111201e04c3fSmrg            count_anchors_before_texel(n_subsets, partition_num, texel);
111301e04c3fSmrg
111401e04c3fSmrg         /* Calculate the offset to the primary index for this texel */
111501e04c3fSmrg         bit_offset += mode->n_index_bits * texel - anchors_before_texel;
111601e04c3fSmrg
111701e04c3fSmrg         subset_num = (subsets >> (texel * 2)) & 3;
111801e04c3fSmrg
111901e04c3fSmrg         index_bits = mode->n_index_bits;
112001e04c3fSmrg         if (is_anchor(n_subsets, partition_num, texel))
112101e04c3fSmrg            index_bits--;
112201e04c3fSmrg         index = extract_bits(block, bit_offset, index_bits);
112301e04c3fSmrg
112401e04c3fSmrg         for (component = 0; component < 3; component++) {
112501e04c3fSmrg            value = interpolate(endpoints[subset_num * 2][component],
112601e04c3fSmrg                                endpoints[subset_num * 2 + 1][component],
112701e04c3fSmrg                                index,
112801e04c3fSmrg                                mode->n_index_bits);
112901e04c3fSmrg
113001e04c3fSmrg            if (is_signed)
113101e04c3fSmrg               value = finish_signed_unquantize(value);
113201e04c3fSmrg            else
113301e04c3fSmrg               value = finish_unsigned_unquantize(value);
113401e04c3fSmrg
113501e04c3fSmrg            result[component] = _mesa_half_to_float(value);
113601e04c3fSmrg         }
113701e04c3fSmrg
113801e04c3fSmrg         result[3] = 1.0f;
113901e04c3fSmrg         result += 4;
114001e04c3fSmrg      }
114101e04c3fSmrg      dst_row += dst_rowstride / sizeof dst_row[0];
114201e04c3fSmrg   }
114301e04c3fSmrg}
114401e04c3fSmrg
114501e04c3fSmrgstatic void
114601e04c3fSmrgdecompress_rgb_float(int width, int height,
114701e04c3fSmrg                      const uint8_t *src, int src_rowstride,
114801e04c3fSmrg                      float *dst, int dst_rowstride, bool is_signed)
114901e04c3fSmrg{
115001e04c3fSmrg   int src_row_diff;
115101e04c3fSmrg   int y, x;
115201e04c3fSmrg
115301e04c3fSmrg   if (src_rowstride >= width * 4)
115401e04c3fSmrg      src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
115501e04c3fSmrg   else
115601e04c3fSmrg      src_row_diff = 0;
115701e04c3fSmrg
115801e04c3fSmrg   for (y = 0; y < height; y += BLOCK_SIZE) {
115901e04c3fSmrg      for (x = 0; x < width; x += BLOCK_SIZE) {
116001e04c3fSmrg         decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
116101e04c3fSmrg                                    MIN2(height - y, BLOCK_SIZE),
116201e04c3fSmrg                                    src,
116301e04c3fSmrg                                    (dst + x * 4 +
116401e04c3fSmrg                                     (y * dst_rowstride / sizeof dst[0])),
116501e04c3fSmrg                                    dst_rowstride, is_signed);
116601e04c3fSmrg         src += BLOCK_BYTES;
116701e04c3fSmrg      }
116801e04c3fSmrg      src += src_row_diff;
116901e04c3fSmrg   }
117001e04c3fSmrg}
117101e04c3fSmrg#endif // BPTC_BLOCK_DECODE
117201e04c3fSmrg
117301e04c3fSmrgstatic void
117401e04c3fSmrgwrite_bits(struct bit_writer *writer, int n_bits, int value)
117501e04c3fSmrg{
117601e04c3fSmrg   do {
117701e04c3fSmrg      if (n_bits + writer->pos >= 8) {
117801e04c3fSmrg         *(writer->dst++) = writer->buf | (value << writer->pos);
117901e04c3fSmrg         writer->buf = 0;
118001e04c3fSmrg         value >>= (8 - writer->pos);
118101e04c3fSmrg         n_bits -= (8 - writer->pos);
118201e04c3fSmrg         writer->pos = 0;
118301e04c3fSmrg      } else {
118401e04c3fSmrg         writer->buf |= value << writer->pos;
118501e04c3fSmrg         writer->pos += n_bits;
118601e04c3fSmrg         break;
118701e04c3fSmrg      }
118801e04c3fSmrg   } while (n_bits > 0);
118901e04c3fSmrg}
119001e04c3fSmrg
119101e04c3fSmrgstatic void
119201e04c3fSmrgget_average_luminance_alpha_unorm(int width, int height,
119301e04c3fSmrg                                  const uint8_t *src, int src_rowstride,
119401e04c3fSmrg                                  int *average_luminance, int *average_alpha)
119501e04c3fSmrg{
119601e04c3fSmrg   int luminance_sum = 0, alpha_sum = 0;
119701e04c3fSmrg   int y, x;
119801e04c3fSmrg
119901e04c3fSmrg   for (y = 0; y < height; y++) {
120001e04c3fSmrg      for (x = 0; x < width; x++) {
120101e04c3fSmrg         luminance_sum += src[0] + src[1] + src[2];
120201e04c3fSmrg         alpha_sum += src[3];
120301e04c3fSmrg         src += 4;
120401e04c3fSmrg      }
120501e04c3fSmrg      src += src_rowstride - width * 4;
120601e04c3fSmrg   }
120701e04c3fSmrg
120801e04c3fSmrg   *average_luminance = luminance_sum / (width * height);
120901e04c3fSmrg   *average_alpha = alpha_sum / (width * height);
121001e04c3fSmrg}
121101e04c3fSmrg
121201e04c3fSmrgstatic void
121301e04c3fSmrgget_rgba_endpoints_unorm(int width, int height,
121401e04c3fSmrg                         const uint8_t *src, int src_rowstride,
121501e04c3fSmrg                         int average_luminance, int average_alpha,
121601e04c3fSmrg                         uint8_t endpoints[][4])
121701e04c3fSmrg{
121801e04c3fSmrg   int endpoint_luminances[2];
121901e04c3fSmrg   int midpoint;
122001e04c3fSmrg   int sums[2][4];
122101e04c3fSmrg   int endpoint;
122201e04c3fSmrg   int luminance;
122301e04c3fSmrg   uint8_t temp[3];
122401e04c3fSmrg   const uint8_t *p = src;
122501e04c3fSmrg   int rgb_left_endpoint_count = 0;
122601e04c3fSmrg   int alpha_left_endpoint_count = 0;
122701e04c3fSmrg   int y, x, i;
122801e04c3fSmrg
122901e04c3fSmrg   memset(sums, 0, sizeof sums);
123001e04c3fSmrg
123101e04c3fSmrg   for (y = 0; y < height; y++) {
123201e04c3fSmrg      for (x = 0; x < width; x++) {
123301e04c3fSmrg         luminance = p[0] + p[1] + p[2];
123401e04c3fSmrg         if (luminance < average_luminance) {
123501e04c3fSmrg            endpoint = 0;
123601e04c3fSmrg            rgb_left_endpoint_count++;
123701e04c3fSmrg         } else {
123801e04c3fSmrg            endpoint = 1;
123901e04c3fSmrg         }
124001e04c3fSmrg         for (i = 0; i < 3; i++)
124101e04c3fSmrg            sums[endpoint][i] += p[i];
124201e04c3fSmrg
124301e04c3fSmrg         if (p[2] < average_alpha) {
124401e04c3fSmrg            endpoint = 0;
124501e04c3fSmrg            alpha_left_endpoint_count++;
124601e04c3fSmrg         } else {
124701e04c3fSmrg            endpoint = 1;
124801e04c3fSmrg         }
124901e04c3fSmrg         sums[endpoint][3] += p[3];
125001e04c3fSmrg
125101e04c3fSmrg         p += 4;
125201e04c3fSmrg      }
125301e04c3fSmrg
125401e04c3fSmrg      p += src_rowstride - width * 4;
125501e04c3fSmrg   }
125601e04c3fSmrg
125701e04c3fSmrg   if (rgb_left_endpoint_count == 0 ||
125801e04c3fSmrg       rgb_left_endpoint_count == width * height) {
125901e04c3fSmrg      for (i = 0; i < 3; i++)
126001e04c3fSmrg         endpoints[0][i] = endpoints[1][i] =
126101e04c3fSmrg            (sums[0][i] + sums[1][i]) / (width * height);
126201e04c3fSmrg   } else {
126301e04c3fSmrg      for (i = 0; i < 3; i++) {
126401e04c3fSmrg         endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
126501e04c3fSmrg         endpoints[1][i] = (sums[1][i] /
126601e04c3fSmrg                            (width * height - rgb_left_endpoint_count));
126701e04c3fSmrg      }
126801e04c3fSmrg   }
126901e04c3fSmrg
127001e04c3fSmrg   if (alpha_left_endpoint_count == 0 ||
127101e04c3fSmrg       alpha_left_endpoint_count == width * height) {
127201e04c3fSmrg      endpoints[0][3] = endpoints[1][3] =
127301e04c3fSmrg         (sums[0][3] + sums[1][3]) / (width * height);
127401e04c3fSmrg   } else {
127501e04c3fSmrg         endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
127601e04c3fSmrg         endpoints[1][3] = (sums[1][3] /
127701e04c3fSmrg                            (width * height - alpha_left_endpoint_count));
127801e04c3fSmrg   }
127901e04c3fSmrg
128001e04c3fSmrg   /* We may need to swap the endpoints to ensure the most-significant bit of
128101e04c3fSmrg    * the first index is zero */
128201e04c3fSmrg
128301e04c3fSmrg   for (endpoint = 0; endpoint < 2; endpoint++) {
128401e04c3fSmrg      endpoint_luminances[endpoint] =
128501e04c3fSmrg         endpoints[endpoint][0] +
128601e04c3fSmrg         endpoints[endpoint][1] +
128701e04c3fSmrg         endpoints[endpoint][2];
128801e04c3fSmrg   }
128901e04c3fSmrg   midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
129001e04c3fSmrg
129101e04c3fSmrg   if ((src[0] + src[1] + src[2] <= midpoint) !=
129201e04c3fSmrg       (endpoint_luminances[0] <= midpoint)) {
129301e04c3fSmrg      memcpy(temp, endpoints[0], 3);
129401e04c3fSmrg      memcpy(endpoints[0], endpoints[1], 3);
129501e04c3fSmrg      memcpy(endpoints[1], temp, 3);
129601e04c3fSmrg   }
129701e04c3fSmrg
129801e04c3fSmrg   /* Same for the alpha endpoints */
129901e04c3fSmrg
130001e04c3fSmrg   midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
130101e04c3fSmrg
130201e04c3fSmrg   if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
130301e04c3fSmrg      temp[0] = endpoints[0][3];
130401e04c3fSmrg      endpoints[0][3] = endpoints[1][3];
130501e04c3fSmrg      endpoints[1][3] = temp[0];
130601e04c3fSmrg   }
130701e04c3fSmrg}
130801e04c3fSmrg
130901e04c3fSmrgstatic void
131001e04c3fSmrgwrite_rgb_indices_unorm(struct bit_writer *writer,
131101e04c3fSmrg                        int src_width, int src_height,
131201e04c3fSmrg                        const uint8_t *src, int src_rowstride,
131301e04c3fSmrg                        uint8_t endpoints[][4])
131401e04c3fSmrg{
131501e04c3fSmrg   int luminance;
131601e04c3fSmrg   int endpoint_luminances[2];
131701e04c3fSmrg   int endpoint;
131801e04c3fSmrg   int index;
131901e04c3fSmrg   int y, x;
132001e04c3fSmrg
132101e04c3fSmrg   for (endpoint = 0; endpoint < 2; endpoint++) {
132201e04c3fSmrg      endpoint_luminances[endpoint] =
132301e04c3fSmrg         endpoints[endpoint][0] +
132401e04c3fSmrg         endpoints[endpoint][1] +
132501e04c3fSmrg         endpoints[endpoint][2];
132601e04c3fSmrg   }
132701e04c3fSmrg
132801e04c3fSmrg   /* If the endpoints have the same luminance then we'll just use index 0 for
132901e04c3fSmrg    * all of the texels */
133001e04c3fSmrg   if (endpoint_luminances[0] == endpoint_luminances[1]) {
133101e04c3fSmrg      write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
133201e04c3fSmrg      return;
133301e04c3fSmrg   }
133401e04c3fSmrg
133501e04c3fSmrg   for (y = 0; y < src_height; y++) {
133601e04c3fSmrg      for (x = 0; x < src_width; x++) {
133701e04c3fSmrg         luminance = src[0] + src[1] + src[2];
133801e04c3fSmrg
133901e04c3fSmrg         index = ((luminance - endpoint_luminances[0]) * 3 /
134001e04c3fSmrg                  (endpoint_luminances[1] - endpoint_luminances[0]));
134101e04c3fSmrg         if (index < 0)
134201e04c3fSmrg            index = 0;
134301e04c3fSmrg         else if (index > 3)
134401e04c3fSmrg            index = 3;
134501e04c3fSmrg
134601e04c3fSmrg         assert(x != 0 || y != 0 || index < 2);
134701e04c3fSmrg
134801e04c3fSmrg         write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
134901e04c3fSmrg
135001e04c3fSmrg         src += 4;
135101e04c3fSmrg      }
135201e04c3fSmrg
135301e04c3fSmrg      /* Pad the indices out to the block size */
135401e04c3fSmrg      if (src_width < BLOCK_SIZE)
135501e04c3fSmrg         write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
135601e04c3fSmrg
135701e04c3fSmrg      src += src_rowstride - src_width * 4;
135801e04c3fSmrg   }
135901e04c3fSmrg
136001e04c3fSmrg   /* Pad the indices out to the block size */
136101e04c3fSmrg   if (src_height < BLOCK_SIZE)
136201e04c3fSmrg      write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
136301e04c3fSmrg}
136401e04c3fSmrg
136501e04c3fSmrgstatic void
136601e04c3fSmrgwrite_alpha_indices_unorm(struct bit_writer *writer,
136701e04c3fSmrg                          int src_width, int src_height,
136801e04c3fSmrg                          const uint8_t *src, int src_rowstride,
136901e04c3fSmrg                          uint8_t endpoints[][4])
137001e04c3fSmrg{
137101e04c3fSmrg   int index;
137201e04c3fSmrg   int y, x;
137301e04c3fSmrg
137401e04c3fSmrg   /* If the endpoints have the same alpha then we'll just use index 0 for
137501e04c3fSmrg    * all of the texels */
137601e04c3fSmrg   if (endpoints[0][3] == endpoints[1][3]) {
137701e04c3fSmrg      write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
137801e04c3fSmrg      return;
137901e04c3fSmrg   }
138001e04c3fSmrg
138101e04c3fSmrg   for (y = 0; y < src_height; y++) {
138201e04c3fSmrg      for (x = 0; x < src_width; x++) {
138301e04c3fSmrg         index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
138401e04c3fSmrg                  ((int) endpoints[1][3] - endpoints[0][3]));
138501e04c3fSmrg         if (index < 0)
138601e04c3fSmrg            index = 0;
138701e04c3fSmrg         else if (index > 7)
138801e04c3fSmrg            index = 7;
138901e04c3fSmrg
139001e04c3fSmrg         assert(x != 0 || y != 0 || index < 4);
139101e04c3fSmrg
139201e04c3fSmrg         /* The first index has one less bit */
139301e04c3fSmrg         write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
139401e04c3fSmrg
139501e04c3fSmrg         src += 4;
139601e04c3fSmrg      }
139701e04c3fSmrg
139801e04c3fSmrg      /* Pad the indices out to the block size */
139901e04c3fSmrg      if (src_width < BLOCK_SIZE)
140001e04c3fSmrg         write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
140101e04c3fSmrg
140201e04c3fSmrg      src += src_rowstride - src_width * 4;
140301e04c3fSmrg   }
140401e04c3fSmrg
140501e04c3fSmrg   /* Pad the indices out to the block size */
140601e04c3fSmrg   if (src_height < BLOCK_SIZE)
140701e04c3fSmrg      write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
140801e04c3fSmrg}
140901e04c3fSmrg
141001e04c3fSmrgstatic void
141101e04c3fSmrgcompress_rgba_unorm_block(int src_width, int src_height,
141201e04c3fSmrg                          const uint8_t *src, int src_rowstride,
141301e04c3fSmrg                          uint8_t *dst)
141401e04c3fSmrg{
141501e04c3fSmrg   int average_luminance, average_alpha;
141601e04c3fSmrg   uint8_t endpoints[2][4];
141701e04c3fSmrg   struct bit_writer writer;
141801e04c3fSmrg   int component, endpoint;
141901e04c3fSmrg
142001e04c3fSmrg   get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
142101e04c3fSmrg                                     &average_luminance, &average_alpha);
142201e04c3fSmrg   get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
142301e04c3fSmrg                            average_luminance, average_alpha,
142401e04c3fSmrg                            endpoints);
142501e04c3fSmrg
142601e04c3fSmrg   writer.dst = dst;
142701e04c3fSmrg   writer.pos = 0;
142801e04c3fSmrg   writer.buf = 0;
142901e04c3fSmrg
143001e04c3fSmrg   write_bits(&writer, 5, 0x10); /* mode 4 */
143101e04c3fSmrg   write_bits(&writer, 2, 0); /* rotation 0 */
143201e04c3fSmrg   write_bits(&writer, 1, 0); /* index selection bit */
143301e04c3fSmrg
143401e04c3fSmrg   /* Write the color endpoints */
143501e04c3fSmrg   for (component = 0; component < 3; component++)
143601e04c3fSmrg      for (endpoint = 0; endpoint < 2; endpoint++)
143701e04c3fSmrg         write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
143801e04c3fSmrg
143901e04c3fSmrg   /* Write the alpha endpoints */
144001e04c3fSmrg   for (endpoint = 0; endpoint < 2; endpoint++)
144101e04c3fSmrg      write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
144201e04c3fSmrg
144301e04c3fSmrg   write_rgb_indices_unorm(&writer,
144401e04c3fSmrg                           src_width, src_height,
144501e04c3fSmrg                           src, src_rowstride,
144601e04c3fSmrg                           endpoints);
144701e04c3fSmrg   write_alpha_indices_unorm(&writer,
144801e04c3fSmrg                             src_width, src_height,
144901e04c3fSmrg                             src, src_rowstride,
145001e04c3fSmrg                             endpoints);
145101e04c3fSmrg}
145201e04c3fSmrg
145301e04c3fSmrgstatic void
145401e04c3fSmrgcompress_rgba_unorm(int width, int height,
145501e04c3fSmrg                    const uint8_t *src, int src_rowstride,
145601e04c3fSmrg                    uint8_t *dst, int dst_rowstride)
145701e04c3fSmrg{
145801e04c3fSmrg   int dst_row_diff;
145901e04c3fSmrg   int y, x;
146001e04c3fSmrg
146101e04c3fSmrg   if (dst_rowstride >= width * 4)
146201e04c3fSmrg      dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
146301e04c3fSmrg   else
146401e04c3fSmrg      dst_row_diff = 0;
146501e04c3fSmrg
146601e04c3fSmrg   for (y = 0; y < height; y += BLOCK_SIZE) {
146701e04c3fSmrg      for (x = 0; x < width; x += BLOCK_SIZE) {
146801e04c3fSmrg         compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
146901e04c3fSmrg                                   MIN2(height - y, BLOCK_SIZE),
147001e04c3fSmrg                                   src + x * 4 + y * src_rowstride,
147101e04c3fSmrg                                   src_rowstride,
147201e04c3fSmrg                                   dst);
147301e04c3fSmrg         dst += BLOCK_BYTES;
147401e04c3fSmrg      }
147501e04c3fSmrg      dst += dst_row_diff;
147601e04c3fSmrg   }
147701e04c3fSmrg}
147801e04c3fSmrg
147901e04c3fSmrgstatic float
148001e04c3fSmrgget_average_luminance_float(int width, int height,
148101e04c3fSmrg                            const float *src, int src_rowstride)
148201e04c3fSmrg{
148301e04c3fSmrg   float luminance_sum = 0;
148401e04c3fSmrg   int y, x;
148501e04c3fSmrg
148601e04c3fSmrg   for (y = 0; y < height; y++) {
148701e04c3fSmrg      for (x = 0; x < width; x++) {
148801e04c3fSmrg         luminance_sum += src[0] + src[1] + src[2];
148901e04c3fSmrg         src += 3;
149001e04c3fSmrg      }
149101e04c3fSmrg      src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
149201e04c3fSmrg   }
149301e04c3fSmrg
149401e04c3fSmrg   return luminance_sum / (width * height);
149501e04c3fSmrg}
149601e04c3fSmrg
149701e04c3fSmrgstatic float
149801e04c3fSmrgclamp_value(float value, bool is_signed)
149901e04c3fSmrg{
150001e04c3fSmrg   if (value > 65504.0f)
150101e04c3fSmrg      return 65504.0f;
150201e04c3fSmrg
150301e04c3fSmrg   if (is_signed) {
150401e04c3fSmrg      if (value < -65504.0f)
150501e04c3fSmrg         return -65504.0f;
150601e04c3fSmrg      else
150701e04c3fSmrg         return value;
150801e04c3fSmrg   }
150901e04c3fSmrg
151001e04c3fSmrg   if (value < 0.0f)
151101e04c3fSmrg      return 0.0f;
151201e04c3fSmrg
151301e04c3fSmrg   return value;
151401e04c3fSmrg}
151501e04c3fSmrg
151601e04c3fSmrgstatic void
151701e04c3fSmrgget_endpoints_float(int width, int height,
151801e04c3fSmrg                    const float *src, int src_rowstride,
151901e04c3fSmrg                    float average_luminance, float endpoints[][3],
152001e04c3fSmrg                    bool is_signed)
152101e04c3fSmrg{
152201e04c3fSmrg   float endpoint_luminances[2];
152301e04c3fSmrg   float midpoint;
152401e04c3fSmrg   float sums[2][3];
152501e04c3fSmrg   int endpoint, component;
152601e04c3fSmrg   float luminance;
152701e04c3fSmrg   float temp[3];
152801e04c3fSmrg   const float *p = src;
152901e04c3fSmrg   int left_endpoint_count = 0;
153001e04c3fSmrg   int y, x, i;
153101e04c3fSmrg
153201e04c3fSmrg   memset(sums, 0, sizeof sums);
153301e04c3fSmrg
153401e04c3fSmrg   for (y = 0; y < height; y++) {
153501e04c3fSmrg      for (x = 0; x < width; x++) {
153601e04c3fSmrg         luminance = p[0] + p[1] + p[2];
153701e04c3fSmrg         if (luminance < average_luminance) {
153801e04c3fSmrg            endpoint = 0;
153901e04c3fSmrg            left_endpoint_count++;
154001e04c3fSmrg         } else {
154101e04c3fSmrg            endpoint = 1;
154201e04c3fSmrg         }
154301e04c3fSmrg         for (i = 0; i < 3; i++)
154401e04c3fSmrg            sums[endpoint][i] += p[i];
154501e04c3fSmrg
154601e04c3fSmrg         p += 3;
154701e04c3fSmrg      }
154801e04c3fSmrg
154901e04c3fSmrg      p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
155001e04c3fSmrg   }
155101e04c3fSmrg
155201e04c3fSmrg   if (left_endpoint_count == 0 ||
155301e04c3fSmrg       left_endpoint_count == width * height) {
155401e04c3fSmrg      for (i = 0; i < 3; i++)
155501e04c3fSmrg         endpoints[0][i] = endpoints[1][i] =
155601e04c3fSmrg            (sums[0][i] + sums[1][i]) / (width * height);
155701e04c3fSmrg   } else {
155801e04c3fSmrg      for (i = 0; i < 3; i++) {
155901e04c3fSmrg         endpoints[0][i] = sums[0][i] / left_endpoint_count;
156001e04c3fSmrg         endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
156101e04c3fSmrg      }
156201e04c3fSmrg   }
156301e04c3fSmrg
156401e04c3fSmrg   /* Clamp the endpoints to the range of a half float and strip out
156501e04c3fSmrg    * infinities */
156601e04c3fSmrg   for (endpoint = 0; endpoint < 2; endpoint++) {
156701e04c3fSmrg      for (component = 0; component < 3; component++) {
156801e04c3fSmrg         endpoints[endpoint][component] =
156901e04c3fSmrg            clamp_value(endpoints[endpoint][component], is_signed);
157001e04c3fSmrg      }
157101e04c3fSmrg   }
157201e04c3fSmrg
157301e04c3fSmrg   /* We may need to swap the endpoints to ensure the most-significant bit of
157401e04c3fSmrg    * the first index is zero */
157501e04c3fSmrg
157601e04c3fSmrg   for (endpoint = 0; endpoint < 2; endpoint++) {
157701e04c3fSmrg      endpoint_luminances[endpoint] =
157801e04c3fSmrg         endpoints[endpoint][0] +
157901e04c3fSmrg         endpoints[endpoint][1] +
158001e04c3fSmrg         endpoints[endpoint][2];
158101e04c3fSmrg   }
158201e04c3fSmrg   midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
158301e04c3fSmrg
158401e04c3fSmrg   if ((src[0] + src[1] + src[2] <= midpoint) !=
158501e04c3fSmrg       (endpoint_luminances[0] <= midpoint)) {
158601e04c3fSmrg      memcpy(temp, endpoints[0], sizeof temp);
158701e04c3fSmrg      memcpy(endpoints[0], endpoints[1], sizeof temp);
158801e04c3fSmrg      memcpy(endpoints[1], temp, sizeof temp);
158901e04c3fSmrg   }
159001e04c3fSmrg}
159101e04c3fSmrg
159201e04c3fSmrgstatic void
159301e04c3fSmrgwrite_rgb_indices_float(struct bit_writer *writer,
159401e04c3fSmrg                        int src_width, int src_height,
159501e04c3fSmrg                        const float *src, int src_rowstride,
159601e04c3fSmrg                        float endpoints[][3])
159701e04c3fSmrg{
159801e04c3fSmrg   float luminance;
159901e04c3fSmrg   float endpoint_luminances[2];
160001e04c3fSmrg   int endpoint;
160101e04c3fSmrg   int index;
160201e04c3fSmrg   int y, x;
160301e04c3fSmrg
160401e04c3fSmrg   for (endpoint = 0; endpoint < 2; endpoint++) {
160501e04c3fSmrg      endpoint_luminances[endpoint] =
160601e04c3fSmrg         endpoints[endpoint][0] +
160701e04c3fSmrg         endpoints[endpoint][1] +
160801e04c3fSmrg         endpoints[endpoint][2];
160901e04c3fSmrg   }
161001e04c3fSmrg
161101e04c3fSmrg   /* If the endpoints have the same luminance then we'll just use index 0 for
161201e04c3fSmrg    * all of the texels */
161301e04c3fSmrg   if (endpoint_luminances[0] == endpoint_luminances[1]) {
161401e04c3fSmrg      write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
161501e04c3fSmrg      return;
161601e04c3fSmrg   }
161701e04c3fSmrg
161801e04c3fSmrg   for (y = 0; y < src_height; y++) {
161901e04c3fSmrg      for (x = 0; x < src_width; x++) {
162001e04c3fSmrg         luminance = src[0] + src[1] + src[2];
162101e04c3fSmrg
162201e04c3fSmrg         index = ((luminance - endpoint_luminances[0]) * 15 /
162301e04c3fSmrg                  (endpoint_luminances[1] - endpoint_luminances[0]));
162401e04c3fSmrg         if (index < 0)
162501e04c3fSmrg            index = 0;
162601e04c3fSmrg         else if (index > 15)
162701e04c3fSmrg            index = 15;
162801e04c3fSmrg
162901e04c3fSmrg         assert(x != 0 || y != 0 || index < 8);
163001e04c3fSmrg
163101e04c3fSmrg         write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
163201e04c3fSmrg
163301e04c3fSmrg         src += 3;
163401e04c3fSmrg      }
163501e04c3fSmrg
163601e04c3fSmrg      /* Pad the indices out to the block size */
163701e04c3fSmrg      if (src_width < BLOCK_SIZE)
163801e04c3fSmrg         write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
163901e04c3fSmrg
164001e04c3fSmrg      src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
164101e04c3fSmrg   }
164201e04c3fSmrg
164301e04c3fSmrg   /* Pad the indices out to the block size */
164401e04c3fSmrg   if (src_height < BLOCK_SIZE)
164501e04c3fSmrg      write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
164601e04c3fSmrg}
164701e04c3fSmrg
164801e04c3fSmrgstatic int
164901e04c3fSmrgget_endpoint_value(float value, bool is_signed)
165001e04c3fSmrg{
165101e04c3fSmrg   bool sign = false;
165201e04c3fSmrg   int half;
165301e04c3fSmrg
165401e04c3fSmrg   if (is_signed) {
165501e04c3fSmrg      half = _mesa_float_to_half(value);
165601e04c3fSmrg
165701e04c3fSmrg      if (half & 0x8000) {
165801e04c3fSmrg         half &= 0x7fff;
165901e04c3fSmrg         sign = true;
166001e04c3fSmrg      }
166101e04c3fSmrg
166201e04c3fSmrg      half = (32 * half / 31) >> 6;
166301e04c3fSmrg
166401e04c3fSmrg      if (sign)
166501e04c3fSmrg         half = -half & ((1 << 10) - 1);
166601e04c3fSmrg
166701e04c3fSmrg      return half;
166801e04c3fSmrg   } else {
166901e04c3fSmrg      if (value <= 0.0f)
167001e04c3fSmrg         return 0;
167101e04c3fSmrg
167201e04c3fSmrg      half = _mesa_float_to_half(value);
167301e04c3fSmrg
167401e04c3fSmrg      return (64 * half / 31) >> 6;
167501e04c3fSmrg   }
167601e04c3fSmrg}
167701e04c3fSmrg
167801e04c3fSmrgstatic void
167901e04c3fSmrgcompress_rgb_float_block(int src_width, int src_height,
168001e04c3fSmrg                         const float *src, int src_rowstride,
168101e04c3fSmrg                         uint8_t *dst,
168201e04c3fSmrg                         bool is_signed)
168301e04c3fSmrg{
168401e04c3fSmrg   float average_luminance;
168501e04c3fSmrg   float endpoints[2][3];
168601e04c3fSmrg   struct bit_writer writer;
168701e04c3fSmrg   int component, endpoint;
168801e04c3fSmrg   int endpoint_value;
168901e04c3fSmrg
169001e04c3fSmrg   average_luminance =
169101e04c3fSmrg      get_average_luminance_float(src_width, src_height, src, src_rowstride);
169201e04c3fSmrg   get_endpoints_float(src_width, src_height, src, src_rowstride,
169301e04c3fSmrg                       average_luminance, endpoints, is_signed);
169401e04c3fSmrg
169501e04c3fSmrg   writer.dst = dst;
169601e04c3fSmrg   writer.pos = 0;
169701e04c3fSmrg   writer.buf = 0;
169801e04c3fSmrg
169901e04c3fSmrg   write_bits(&writer, 5, 3); /* mode 3 */
170001e04c3fSmrg
170101e04c3fSmrg   /* Write the endpoints */
170201e04c3fSmrg   for (endpoint = 0; endpoint < 2; endpoint++) {
170301e04c3fSmrg      for (component = 0; component < 3; component++) {
170401e04c3fSmrg         endpoint_value =
170501e04c3fSmrg            get_endpoint_value(endpoints[endpoint][component], is_signed);
170601e04c3fSmrg         write_bits(&writer, 10, endpoint_value);
170701e04c3fSmrg      }
170801e04c3fSmrg   }
170901e04c3fSmrg
171001e04c3fSmrg   write_rgb_indices_float(&writer,
171101e04c3fSmrg                           src_width, src_height,
171201e04c3fSmrg                           src, src_rowstride,
171301e04c3fSmrg                           endpoints);
171401e04c3fSmrg}
171501e04c3fSmrg
171601e04c3fSmrgstatic void
171701e04c3fSmrgcompress_rgb_float(int width, int height,
171801e04c3fSmrg                   const float *src, int src_rowstride,
171901e04c3fSmrg                   uint8_t *dst, int dst_rowstride,
172001e04c3fSmrg                   bool is_signed)
172101e04c3fSmrg{
172201e04c3fSmrg   int dst_row_diff;
172301e04c3fSmrg   int y, x;
172401e04c3fSmrg
172501e04c3fSmrg   if (dst_rowstride >= width * 4)
172601e04c3fSmrg      dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
172701e04c3fSmrg   else
172801e04c3fSmrg      dst_row_diff = 0;
172901e04c3fSmrg
173001e04c3fSmrg   for (y = 0; y < height; y += BLOCK_SIZE) {
173101e04c3fSmrg      for (x = 0; x < width; x += BLOCK_SIZE) {
173201e04c3fSmrg         compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
173301e04c3fSmrg                                  MIN2(height - y, BLOCK_SIZE),
173401e04c3fSmrg                                  src + x * 3 +
173501e04c3fSmrg                                  y * src_rowstride / sizeof (float),
173601e04c3fSmrg                                  src_rowstride,
173701e04c3fSmrg                                  dst,
173801e04c3fSmrg                                  is_signed);
173901e04c3fSmrg         dst += BLOCK_BYTES;
174001e04c3fSmrg      }
174101e04c3fSmrg      dst += dst_row_diff;
174201e04c3fSmrg   }
174301e04c3fSmrg}
174401e04c3fSmrg
174501e04c3fSmrg#endif
1746