1/************************************************************************** 2 * 3 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. 4 * Copyright (c) 2008 VMware, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 **************************************************************************/ 25 26#include "util/format/u_format.h" 27#include "util/format/u_format_fxt1.h" 28#include "util/format/u_format_pack.h" 29#include "util/format_srgb.h" 30#include "util/u_math.h" 31 32#define RCOMP 0 33#define GCOMP 1 34#define BCOMP 2 35#define ACOMP 3 36 37#define FXT1_BLOCK_SIZE 16 38 39static void 40fxt1_encode (uint32_t width, uint32_t height, int32_t comps, 41 const void *source, int32_t srcRowStride, 42 void *dest, int32_t destRowStride); 43 44static void 45fxt1_decode_1 (const void *texture, int32_t stride, 46 int32_t i, int32_t j, uint8_t *rgba); 47 48/***************************************************************************\ 49 * FXT1 encoder 50 * 51 * The encoder was built by reversing the decoder, 52 * and is vaguely based on Texus2 by 3dfx. Note that this code 53 * is merely a proof of concept, since it is highly UNoptimized; 54 * moreover, it is sub-optimal due to initial conditions passed 55 * to Lloyd's algorithm (the interpolation modes are even worse). 56\***************************************************************************/ 57 58 59#define MAX_COMP 4 /* ever needed maximum number of components in texel */ 60#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */ 61#define N_TEXELS 32 /* number of texels in a block (always 32) */ 62#define LL_N_REP 50 /* number of iterations in lloyd's vq */ 63#define LL_RMS_D 10 /* fault tolerance (maximum delta) */ 64#define LL_RMS_E 255 /* fault tolerance (maximum error) */ 65#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */ 66static const uint32_t zero = 0; 67#define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0) 68 69/* 70 * Define a 64-bit unsigned integer type and macros 71 */ 72#if 1 73 74#define FX64_NATIVE 1 75 76typedef uint64_t Fx64; 77 78#define FX64_MOV32(a, b) a = b 79#define FX64_OR32(a, b) a |= b 80#define FX64_SHL(a, c) a <<= c 81 82#else 83 84#define FX64_NATIVE 0 85 86typedef struct { 87 uint32_t lo, hi; 88} Fx64; 89 90#define FX64_MOV32(a, b) a.lo = b 91#define FX64_OR32(a, b) a.lo |= b 92 93#define FX64_SHL(a, c) \ 94 do { \ 95 if ((c) >= 32) { \ 96 a.hi = a.lo << ((c) - 32); \ 97 a.lo = 0; \ 98 } else { \ 99 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \ 100 a.lo <<= (c); \ 101 } \ 102 } while (0) 103 104#endif 105 106 107#define F(i) (float)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */ 108#define SAFECDOT 1 /* for paranoids */ 109 110#define MAKEIVEC(NV, NC, IV, B, V0, V1) \ 111 do { \ 112 /* compute interpolation vector */ \ 113 float d2 = 0.0F; \ 114 float rd2; \ 115 \ 116 for (i = 0; i < NC; i++) { \ 117 IV[i] = (V1[i] - V0[i]) * F(i); \ 118 d2 += IV[i] * IV[i]; \ 119 } \ 120 rd2 = (float)NV / d2; \ 121 B = 0; \ 122 for (i = 0; i < NC; i++) { \ 123 IV[i] *= F(i); \ 124 B -= IV[i] * V0[i]; \ 125 IV[i] *= rd2; \ 126 } \ 127 B = B * rd2 + 0.5f; \ 128 } while (0) 129 130#define CALCCDOT(TEXEL, NV, NC, IV, B, V)\ 131 do { \ 132 float dot = 0.0F; \ 133 for (i = 0; i < NC; i++) { \ 134 dot += V[i] * IV[i]; \ 135 } \ 136 TEXEL = (int32_t)(dot + B); \ 137 if (SAFECDOT) { \ 138 if (TEXEL < 0) { \ 139 TEXEL = 0; \ 140 } else if (TEXEL > NV) { \ 141 TEXEL = NV; \ 142 } \ 143 } \ 144 } while (0) 145 146 147static int32_t 148fxt1_bestcol (float vec[][MAX_COMP], int32_t nv, 149 uint8_t input[MAX_COMP], int32_t nc) 150{ 151 int32_t i, j, best = -1; 152 float err = 1e9; /* big enough */ 153 154 for (j = 0; j < nv; j++) { 155 float e = 0.0F; 156 for (i = 0; i < nc; i++) { 157 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]); 158 } 159 if (e < err) { 160 err = e; 161 best = j; 162 } 163 } 164 165 return best; 166} 167 168 169static int32_t 170fxt1_worst (float vec[MAX_COMP], 171 uint8_t input[N_TEXELS][MAX_COMP], int32_t nc, int32_t n) 172{ 173 int32_t i, k, worst = -1; 174 float err = -1.0F; /* small enough */ 175 176 for (k = 0; k < n; k++) { 177 float e = 0.0F; 178 for (i = 0; i < nc; i++) { 179 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]); 180 } 181 if (e > err) { 182 err = e; 183 worst = k; 184 } 185 } 186 187 return worst; 188} 189 190 191static int32_t 192fxt1_variance (uint8_t input[N_TEXELS / 2][MAX_COMP], int32_t nc) 193{ 194 const int n = N_TEXELS / 2; 195 int32_t i, k, best = 0; 196 int32_t sx, sx2; 197 double var, maxvar = -1; /* small enough */ 198 double teenth = 1.0 / n; 199 200 for (i = 0; i < nc; i++) { 201 sx = sx2 = 0; 202 for (k = 0; k < n; k++) { 203 int32_t t = input[k][i]; 204 sx += t; 205 sx2 += t * t; 206 } 207 var = sx2 * teenth - sx * sx * teenth * teenth; 208 if (maxvar < var) { 209 maxvar = var; 210 best = i; 211 } 212 } 213 214 return best; 215} 216 217 218static int32_t 219fxt1_choose (float vec[][MAX_COMP], int32_t nv, 220 uint8_t input[N_TEXELS][MAX_COMP], int32_t nc, int32_t n) 221{ 222#if 0 223 /* Choose colors from a grid. 224 */ 225 int32_t i, j; 226 227 for (j = 0; j < nv; j++) { 228 int32_t m = j * (n - 1) / (nv - 1); 229 for (i = 0; i < nc; i++) { 230 vec[j][i] = input[m][i]; 231 } 232 } 233#else 234 /* Our solution here is to find the darkest and brightest colors in 235 * the 8x4 tile and use those as the two representative colors. 236 * There are probably better algorithms to use (histogram-based). 237 */ 238 int32_t i, j, k; 239 int32_t minSum = 2000; /* big enough */ 240 int32_t maxSum = -1; /* small enough */ 241 int32_t minCol = 0; /* phoudoin: silent compiler! */ 242 int32_t maxCol = 0; /* phoudoin: silent compiler! */ 243 244 struct { 245 int32_t flag; 246 int32_t key; 247 int32_t freq; 248 int32_t idx; 249 } hist[N_TEXELS]; 250 int32_t lenh = 0; 251 252 memset(hist, 0, sizeof(hist)); 253 254 for (k = 0; k < n; k++) { 255 int32_t l; 256 int32_t key = 0; 257 int32_t sum = 0; 258 for (i = 0; i < nc; i++) { 259 key <<= 8; 260 key |= input[k][i]; 261 sum += input[k][i]; 262 } 263 for (l = 0; l < n; l++) { 264 if (!hist[l].flag) { 265 /* alloc new slot */ 266 hist[l].flag = !0; 267 hist[l].key = key; 268 hist[l].freq = 1; 269 hist[l].idx = k; 270 lenh = l + 1; 271 break; 272 } else if (hist[l].key == key) { 273 hist[l].freq++; 274 break; 275 } 276 } 277 if (minSum > sum) { 278 minSum = sum; 279 minCol = k; 280 } 281 if (maxSum < sum) { 282 maxSum = sum; 283 maxCol = k; 284 } 285 } 286 287 if (lenh <= nv) { 288 for (j = 0; j < lenh; j++) { 289 for (i = 0; i < nc; i++) { 290 vec[j][i] = (float)input[hist[j].idx][i]; 291 } 292 } 293 for (; j < nv; j++) { 294 for (i = 0; i < nc; i++) { 295 vec[j][i] = vec[0][i]; 296 } 297 } 298 return 0; 299 } 300 301 for (j = 0; j < nv; j++) { 302 for (i = 0; i < nc; i++) { 303 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1); 304 } 305 } 306#endif 307 308 return !0; 309} 310 311 312static int32_t 313fxt1_lloyd (float vec[][MAX_COMP], int32_t nv, 314 uint8_t input[N_TEXELS][MAX_COMP], int32_t nc, int32_t n) 315{ 316 /* Use the generalized lloyd's algorithm for VQ: 317 * find 4 color vectors. 318 * 319 * for each sample color 320 * sort to nearest vector. 321 * 322 * replace each vector with the centroid of its matching colors. 323 * 324 * repeat until RMS doesn't improve. 325 * 326 * if a color vector has no samples, or becomes the same as another 327 * vector, replace it with the color which is farthest from a sample. 328 * 329 * vec[][MAX_COMP] initial vectors and resulting colors 330 * nv number of resulting colors required 331 * input[N_TEXELS][MAX_COMP] input texels 332 * nc number of components in input / vec 333 * n number of input samples 334 */ 335 336 int32_t sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */ 337 int32_t cnt[MAX_VECT]; /* how many times a certain vector was chosen */ 338 float error, lasterror = 1e9; 339 340 int32_t i, j, k, rep; 341 342 /* the quantizer */ 343 for (rep = 0; rep < LL_N_REP; rep++) { 344 /* reset sums & counters */ 345 for (j = 0; j < nv; j++) { 346 for (i = 0; i < nc; i++) { 347 sum[j][i] = 0; 348 } 349 cnt[j] = 0; 350 } 351 error = 0; 352 353 /* scan whole block */ 354 for (k = 0; k < n; k++) { 355#if 1 356 int32_t best = -1; 357 float err = 1e9; /* big enough */ 358 /* determine best vector */ 359 for (j = 0; j < nv; j++) { 360 float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) + 361 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) + 362 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]); 363 if (nc == 4) { 364 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]); 365 } 366 if (e < err) { 367 err = e; 368 best = j; 369 } 370 } 371#else 372 int32_t best = fxt1_bestcol(vec, nv, input[k], nc, &err); 373#endif 374 assert(best >= 0); 375 /* add in closest color */ 376 for (i = 0; i < nc; i++) { 377 sum[best][i] += input[k][i]; 378 } 379 /* mark this vector as used */ 380 cnt[best]++; 381 /* accumulate error */ 382 error += err; 383 } 384 385 /* check RMS */ 386 if ((error < LL_RMS_E) || 387 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) { 388 return !0; /* good match */ 389 } 390 lasterror = error; 391 392 /* move each vector to the barycenter of its closest colors */ 393 for (j = 0; j < nv; j++) { 394 if (cnt[j]) { 395 float div = 1.0F / cnt[j]; 396 for (i = 0; i < nc; i++) { 397 vec[j][i] = div * sum[j][i]; 398 } 399 } else { 400 /* this vec has no samples or is identical with a previous vec */ 401 int32_t worst = fxt1_worst(vec[j], input, nc, n); 402 for (i = 0; i < nc; i++) { 403 vec[j][i] = input[worst][i]; 404 } 405 } 406 } 407 } 408 409 return 0; /* could not converge fast enough */ 410} 411 412 413static void 414fxt1_quantize_CHROMA (uint32_t *cc, 415 uint8_t input[N_TEXELS][MAX_COMP]) 416{ 417 const int32_t n_vect = 4; /* 4 base vectors to find */ 418 const int32_t n_comp = 3; /* 3 components: R, G, B */ 419 float vec[MAX_VECT][MAX_COMP]; 420 int32_t i, j, k; 421 Fx64 hi; /* high quadword */ 422 uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */ 423 424 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) { 425 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS); 426 } 427 428 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */ 429 for (j = n_vect - 1; j >= 0; j--) { 430 for (i = 0; i < n_comp; i++) { 431 /* add in colors */ 432 FX64_SHL(hi, 5); 433 FX64_OR32(hi, (uint32_t)(vec[j][i] / 8.0F)); 434 } 435 } 436 ((Fx64 *)cc)[1] = hi; 437 438 lohi = lolo = 0; 439 /* right microtile */ 440 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) { 441 lohi <<= 2; 442 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp); 443 } 444 /* left microtile */ 445 for (; k >= 0; k--) { 446 lolo <<= 2; 447 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp); 448 } 449 cc[1] = lohi; 450 cc[0] = lolo; 451} 452 453 454static void 455fxt1_quantize_ALPHA0 (uint32_t *cc, 456 uint8_t input[N_TEXELS][MAX_COMP], 457 uint8_t reord[N_TEXELS][MAX_COMP], int32_t n) 458{ 459 const int32_t n_vect = 3; /* 3 base vectors to find */ 460 const int32_t n_comp = 4; /* 4 components: R, G, B, A */ 461 float vec[MAX_VECT][MAX_COMP]; 462 int32_t i, j, k; 463 Fx64 hi; /* high quadword */ 464 uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */ 465 466 /* the last vector indicates zero */ 467 for (i = 0; i < n_comp; i++) { 468 vec[n_vect][i] = 0; 469 } 470 471 /* the first n texels in reord are guaranteed to be non-zero */ 472 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) { 473 fxt1_lloyd(vec, n_vect, reord, n_comp, n); 474 } 475 476 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */ 477 for (j = n_vect - 1; j >= 0; j--) { 478 /* add in alphas */ 479 FX64_SHL(hi, 5); 480 FX64_OR32(hi, (uint32_t)(vec[j][ACOMP] / 8.0F)); 481 } 482 for (j = n_vect - 1; j >= 0; j--) { 483 for (i = 0; i < n_comp - 1; i++) { 484 /* add in colors */ 485 FX64_SHL(hi, 5); 486 FX64_OR32(hi, (uint32_t)(vec[j][i] / 8.0F)); 487 } 488 } 489 ((Fx64 *)cc)[1] = hi; 490 491 lohi = lolo = 0; 492 /* right microtile */ 493 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) { 494 lohi <<= 2; 495 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp); 496 } 497 /* left microtile */ 498 for (; k >= 0; k--) { 499 lolo <<= 2; 500 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp); 501 } 502 cc[1] = lohi; 503 cc[0] = lolo; 504} 505 506 507static void 508fxt1_quantize_ALPHA1 (uint32_t *cc, 509 uint8_t input[N_TEXELS][MAX_COMP]) 510{ 511 const int32_t n_vect = 3; /* highest vector number in each microtile */ 512 const int32_t n_comp = 4; /* 4 components: R, G, B, A */ 513 float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */ 514 float b, iv[MAX_COMP]; /* interpolation vector */ 515 int32_t i, j, k; 516 Fx64 hi; /* high quadword */ 517 uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */ 518 519 int32_t minSum; 520 int32_t maxSum; 521 int32_t minColL = 0, maxColL = 0; 522 int32_t minColR = 0, maxColR = 0; 523 int32_t sumL = 0, sumR = 0; 524 int32_t nn_comp; 525 /* Our solution here is to find the darkest and brightest colors in 526 * the 4x4 tile and use those as the two representative colors. 527 * There are probably better algorithms to use (histogram-based). 528 */ 529 nn_comp = n_comp; 530 while ((minColL == maxColL) && nn_comp) { 531 minSum = 2000; /* big enough */ 532 maxSum = -1; /* small enough */ 533 for (k = 0; k < N_TEXELS / 2; k++) { 534 int32_t sum = 0; 535 for (i = 0; i < nn_comp; i++) { 536 sum += input[k][i]; 537 } 538 if (minSum > sum) { 539 minSum = sum; 540 minColL = k; 541 } 542 if (maxSum < sum) { 543 maxSum = sum; 544 maxColL = k; 545 } 546 sumL += sum; 547 } 548 549 nn_comp--; 550 } 551 552 nn_comp = n_comp; 553 while ((minColR == maxColR) && nn_comp) { 554 minSum = 2000; /* big enough */ 555 maxSum = -1; /* small enough */ 556 for (k = N_TEXELS / 2; k < N_TEXELS; k++) { 557 int32_t sum = 0; 558 for (i = 0; i < nn_comp; i++) { 559 sum += input[k][i]; 560 } 561 if (minSum > sum) { 562 minSum = sum; 563 minColR = k; 564 } 565 if (maxSum < sum) { 566 maxSum = sum; 567 maxColR = k; 568 } 569 sumR += sum; 570 } 571 572 nn_comp--; 573 } 574 575 /* choose the common vector (yuck!) */ 576 { 577 int32_t j1, j2; 578 int32_t v1 = 0, v2 = 0; 579 float err = 1e9; /* big enough */ 580 float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */ 581 for (i = 0; i < n_comp; i++) { 582 tv[0][i] = input[minColL][i]; 583 tv[1][i] = input[maxColL][i]; 584 tv[2][i] = input[minColR][i]; 585 tv[3][i] = input[maxColR][i]; 586 } 587 for (j1 = 0; j1 < 2; j1++) { 588 for (j2 = 2; j2 < 4; j2++) { 589 float e = 0.0F; 590 for (i = 0; i < n_comp; i++) { 591 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]); 592 } 593 if (e < err) { 594 err = e; 595 v1 = j1; 596 v2 = j2; 597 } 598 } 599 } 600 for (i = 0; i < n_comp; i++) { 601 vec[0][i] = tv[1 - v1][i]; 602 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR); 603 vec[2][i] = tv[5 - v2][i]; 604 } 605 } 606 607 /* left microtile */ 608 cc[0] = 0; 609 if (minColL != maxColL) { 610 /* compute interpolation vector */ 611 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]); 612 613 /* add in texels */ 614 lolo = 0; 615 for (k = N_TEXELS / 2 - 1; k >= 0; k--) { 616 int32_t texel; 617 /* interpolate color */ 618 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); 619 /* add in texel */ 620 lolo <<= 2; 621 lolo |= texel; 622 } 623 624 cc[0] = lolo; 625 } 626 627 /* right microtile */ 628 cc[1] = 0; 629 if (minColR != maxColR) { 630 /* compute interpolation vector */ 631 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]); 632 633 /* add in texels */ 634 lohi = 0; 635 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { 636 int32_t texel; 637 /* interpolate color */ 638 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); 639 /* add in texel */ 640 lohi <<= 2; 641 lohi |= texel; 642 } 643 644 cc[1] = lohi; 645 } 646 647 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */ 648 for (j = n_vect - 1; j >= 0; j--) { 649 /* add in alphas */ 650 FX64_SHL(hi, 5); 651 FX64_OR32(hi, (uint32_t)(vec[j][ACOMP] / 8.0F)); 652 } 653 for (j = n_vect - 1; j >= 0; j--) { 654 for (i = 0; i < n_comp - 1; i++) { 655 /* add in colors */ 656 FX64_SHL(hi, 5); 657 FX64_OR32(hi, (uint32_t)(vec[j][i] / 8.0F)); 658 } 659 } 660 ((Fx64 *)cc)[1] = hi; 661} 662 663 664static void 665fxt1_quantize_HI (uint32_t *cc, 666 uint8_t input[N_TEXELS][MAX_COMP], 667 uint8_t reord[N_TEXELS][MAX_COMP], int32_t n) 668{ 669 const int32_t n_vect = 6; /* highest vector number */ 670 const int32_t n_comp = 3; /* 3 components: R, G, B */ 671 float b = 0.0F; /* phoudoin: silent compiler! */ 672 float iv[MAX_COMP]; /* interpolation vector */ 673 int32_t i, k; 674 uint32_t hihi; /* high quadword: hi dword */ 675 676 int32_t minSum = 2000; /* big enough */ 677 int32_t maxSum = -1; /* small enough */ 678 int32_t minCol = 0; /* phoudoin: silent compiler! */ 679 int32_t maxCol = 0; /* phoudoin: silent compiler! */ 680 681 /* Our solution here is to find the darkest and brightest colors in 682 * the 8x4 tile and use those as the two representative colors. 683 * There are probably better algorithms to use (histogram-based). 684 */ 685 for (k = 0; k < n; k++) { 686 int32_t sum = 0; 687 for (i = 0; i < n_comp; i++) { 688 sum += reord[k][i]; 689 } 690 if (minSum > sum) { 691 minSum = sum; 692 minCol = k; 693 } 694 if (maxSum < sum) { 695 maxSum = sum; 696 maxCol = k; 697 } 698 } 699 700 hihi = 0; /* cc-hi = "00" */ 701 for (i = 0; i < n_comp; i++) { 702 /* add in colors */ 703 hihi <<= 5; 704 hihi |= reord[maxCol][i] >> 3; 705 } 706 for (i = 0; i < n_comp; i++) { 707 /* add in colors */ 708 hihi <<= 5; 709 hihi |= reord[minCol][i] >> 3; 710 } 711 cc[3] = hihi; 712 cc[0] = cc[1] = cc[2] = 0; 713 714 /* compute interpolation vector */ 715 if (minCol != maxCol) { 716 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]); 717 } 718 719 /* add in texels */ 720 for (k = N_TEXELS - 1; k >= 0; k--) { 721 int32_t t = k * 3; 722 uint32_t *kk = (uint32_t *)((char *)cc + t / 8); 723 int32_t texel = n_vect + 1; /* transparent black */ 724 725 if (!ISTBLACK(input[k])) { 726 if (minCol != maxCol) { 727 /* interpolate color */ 728 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); 729 /* add in texel */ 730 kk[0] |= texel << (t & 7); 731 } 732 } else { 733 /* add in texel */ 734 kk[0] |= texel << (t & 7); 735 } 736 } 737} 738 739 740static void 741fxt1_quantize_MIXED1 (uint32_t *cc, 742 uint8_t input[N_TEXELS][MAX_COMP]) 743{ 744 const int32_t n_vect = 2; /* highest vector number in each microtile */ 745 const int32_t n_comp = 3; /* 3 components: R, G, B */ 746 uint8_t vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */ 747 float b, iv[MAX_COMP]; /* interpolation vector */ 748 int32_t i, j, k; 749 Fx64 hi; /* high quadword */ 750 uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */ 751 752 int32_t minSum; 753 int32_t maxSum; 754 int32_t minColL = 0, maxColL = -1; 755 int32_t minColR = 0, maxColR = -1; 756 757 /* Our solution here is to find the darkest and brightest colors in 758 * the 4x4 tile and use those as the two representative colors. 759 * There are probably better algorithms to use (histogram-based). 760 */ 761 minSum = 2000; /* big enough */ 762 maxSum = -1; /* small enough */ 763 for (k = 0; k < N_TEXELS / 2; k++) { 764 if (!ISTBLACK(input[k])) { 765 int32_t sum = 0; 766 for (i = 0; i < n_comp; i++) { 767 sum += input[k][i]; 768 } 769 if (minSum > sum) { 770 minSum = sum; 771 minColL = k; 772 } 773 if (maxSum < sum) { 774 maxSum = sum; 775 maxColL = k; 776 } 777 } 778 } 779 minSum = 2000; /* big enough */ 780 maxSum = -1; /* small enough */ 781 for (; k < N_TEXELS; k++) { 782 if (!ISTBLACK(input[k])) { 783 int32_t sum = 0; 784 for (i = 0; i < n_comp; i++) { 785 sum += input[k][i]; 786 } 787 if (minSum > sum) { 788 minSum = sum; 789 minColR = k; 790 } 791 if (maxSum < sum) { 792 maxSum = sum; 793 maxColR = k; 794 } 795 } 796 } 797 798 /* left microtile */ 799 if (maxColL == -1) { 800 /* all transparent black */ 801 cc[0] = ~0u; 802 for (i = 0; i < n_comp; i++) { 803 vec[0][i] = 0; 804 vec[1][i] = 0; 805 } 806 } else { 807 cc[0] = 0; 808 for (i = 0; i < n_comp; i++) { 809 vec[0][i] = input[minColL][i]; 810 vec[1][i] = input[maxColL][i]; 811 } 812 if (minColL != maxColL) { 813 /* compute interpolation vector */ 814 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]); 815 816 /* add in texels */ 817 lolo = 0; 818 for (k = N_TEXELS / 2 - 1; k >= 0; k--) { 819 int32_t texel = n_vect + 1; /* transparent black */ 820 if (!ISTBLACK(input[k])) { 821 /* interpolate color */ 822 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); 823 } 824 /* add in texel */ 825 lolo <<= 2; 826 lolo |= texel; 827 } 828 cc[0] = lolo; 829 } 830 } 831 832 /* right microtile */ 833 if (maxColR == -1) { 834 /* all transparent black */ 835 cc[1] = ~0u; 836 for (i = 0; i < n_comp; i++) { 837 vec[2][i] = 0; 838 vec[3][i] = 0; 839 } 840 } else { 841 cc[1] = 0; 842 for (i = 0; i < n_comp; i++) { 843 vec[2][i] = input[minColR][i]; 844 vec[3][i] = input[maxColR][i]; 845 } 846 if (minColR != maxColR) { 847 /* compute interpolation vector */ 848 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]); 849 850 /* add in texels */ 851 lohi = 0; 852 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { 853 int32_t texel = n_vect + 1; /* transparent black */ 854 if (!ISTBLACK(input[k])) { 855 /* interpolate color */ 856 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); 857 } 858 /* add in texel */ 859 lohi <<= 2; 860 lohi |= texel; 861 } 862 cc[1] = lohi; 863 } 864 } 865 866 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */ 867 for (j = 2 * 2 - 1; j >= 0; j--) { 868 for (i = 0; i < n_comp; i++) { 869 /* add in colors */ 870 FX64_SHL(hi, 5); 871 FX64_OR32(hi, vec[j][i] >> 3); 872 } 873 } 874 ((Fx64 *)cc)[1] = hi; 875} 876 877 878static void 879fxt1_quantize_MIXED0 (uint32_t *cc, 880 uint8_t input[N_TEXELS][MAX_COMP]) 881{ 882 const int32_t n_vect = 3; /* highest vector number in each microtile */ 883 const int32_t n_comp = 3; /* 3 components: R, G, B */ 884 uint8_t vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */ 885 float b, iv[MAX_COMP]; /* interpolation vector */ 886 int32_t i, j, k; 887 Fx64 hi; /* high quadword */ 888 uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */ 889 890 int32_t minColL = 0, maxColL = 0; 891 int32_t minColR = 0, maxColR = 0; 892#if 0 893 int32_t minSum; 894 int32_t maxSum; 895 896 /* Our solution here is to find the darkest and brightest colors in 897 * the 4x4 tile and use those as the two representative colors. 898 * There are probably better algorithms to use (histogram-based). 899 */ 900 minSum = 2000; /* big enough */ 901 maxSum = -1; /* small enough */ 902 for (k = 0; k < N_TEXELS / 2; k++) { 903 int32_t sum = 0; 904 for (i = 0; i < n_comp; i++) { 905 sum += input[k][i]; 906 } 907 if (minSum > sum) { 908 minSum = sum; 909 minColL = k; 910 } 911 if (maxSum < sum) { 912 maxSum = sum; 913 maxColL = k; 914 } 915 } 916 minSum = 2000; /* big enough */ 917 maxSum = -1; /* small enough */ 918 for (; k < N_TEXELS; k++) { 919 int32_t sum = 0; 920 for (i = 0; i < n_comp; i++) { 921 sum += input[k][i]; 922 } 923 if (minSum > sum) { 924 minSum = sum; 925 minColR = k; 926 } 927 if (maxSum < sum) { 928 maxSum = sum; 929 maxColR = k; 930 } 931 } 932#else 933 int32_t minVal; 934 int32_t maxVal; 935 int32_t maxVarL = fxt1_variance(input, n_comp); 936 int32_t maxVarR = fxt1_variance(&input[N_TEXELS / 2], n_comp); 937 938 /* Scan the channel with max variance for lo & hi 939 * and use those as the two representative colors. 940 */ 941 minVal = 2000; /* big enough */ 942 maxVal = -1; /* small enough */ 943 for (k = 0; k < N_TEXELS / 2; k++) { 944 int32_t t = input[k][maxVarL]; 945 if (minVal > t) { 946 minVal = t; 947 minColL = k; 948 } 949 if (maxVal < t) { 950 maxVal = t; 951 maxColL = k; 952 } 953 } 954 minVal = 2000; /* big enough */ 955 maxVal = -1; /* small enough */ 956 for (; k < N_TEXELS; k++) { 957 int32_t t = input[k][maxVarR]; 958 if (minVal > t) { 959 minVal = t; 960 minColR = k; 961 } 962 if (maxVal < t) { 963 maxVal = t; 964 maxColR = k; 965 } 966 } 967#endif 968 969 /* left microtile */ 970 cc[0] = 0; 971 for (i = 0; i < n_comp; i++) { 972 vec[0][i] = input[minColL][i]; 973 vec[1][i] = input[maxColL][i]; 974 } 975 if (minColL != maxColL) { 976 /* compute interpolation vector */ 977 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]); 978 979 /* add in texels */ 980 lolo = 0; 981 for (k = N_TEXELS / 2 - 1; k >= 0; k--) { 982 int32_t texel; 983 /* interpolate color */ 984 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); 985 /* add in texel */ 986 lolo <<= 2; 987 lolo |= texel; 988 } 989 990 /* funky encoding for LSB of green */ 991 if ((int32_t)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) { 992 for (i = 0; i < n_comp; i++) { 993 vec[1][i] = input[minColL][i]; 994 vec[0][i] = input[maxColL][i]; 995 } 996 lolo = ~lolo; 997 } 998 999 cc[0] = lolo; 1000 } 1001 1002 /* right microtile */ 1003 cc[1] = 0; 1004 for (i = 0; i < n_comp; i++) { 1005 vec[2][i] = input[minColR][i]; 1006 vec[3][i] = input[maxColR][i]; 1007 } 1008 if (minColR != maxColR) { 1009 /* compute interpolation vector */ 1010 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]); 1011 1012 /* add in texels */ 1013 lohi = 0; 1014 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { 1015 int32_t texel; 1016 /* interpolate color */ 1017 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); 1018 /* add in texel */ 1019 lohi <<= 2; 1020 lohi |= texel; 1021 } 1022 1023 /* funky encoding for LSB of green */ 1024 if ((int32_t)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) { 1025 for (i = 0; i < n_comp; i++) { 1026 vec[3][i] = input[minColR][i]; 1027 vec[2][i] = input[maxColR][i]; 1028 } 1029 lohi = ~lohi; 1030 } 1031 1032 cc[1] = lohi; 1033 } 1034 1035 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */ 1036 for (j = 2 * 2 - 1; j >= 0; j--) { 1037 for (i = 0; i < n_comp; i++) { 1038 /* add in colors */ 1039 FX64_SHL(hi, 5); 1040 FX64_OR32(hi, vec[j][i] >> 3); 1041 } 1042 } 1043 ((Fx64 *)cc)[1] = hi; 1044} 1045 1046 1047static void 1048fxt1_quantize (uint32_t *cc, const uint8_t *lines[], int32_t comps) 1049{ 1050 int32_t trualpha; 1051 uint8_t reord[N_TEXELS][MAX_COMP]; 1052 1053 uint8_t input[N_TEXELS][MAX_COMP]; 1054 int32_t i, k, l; 1055 1056 if (comps == 3) { 1057 /* make the whole block opaque */ 1058 memset(input, -1, sizeof(input)); 1059 } 1060 1061 /* 8 texels each line */ 1062 for (l = 0; l < 4; l++) { 1063 for (k = 0; k < 4; k++) { 1064 for (i = 0; i < comps; i++) { 1065 input[k + l * 4][i] = *lines[l]++; 1066 } 1067 } 1068 for (; k < 8; k++) { 1069 for (i = 0; i < comps; i++) { 1070 input[k + l * 4 + 12][i] = *lines[l]++; 1071 } 1072 } 1073 } 1074 1075 /* block layout: 1076 * 00, 01, 02, 03, 08, 09, 0a, 0b 1077 * 10, 11, 12, 13, 18, 19, 1a, 1b 1078 * 04, 05, 06, 07, 0c, 0d, 0e, 0f 1079 * 14, 15, 16, 17, 1c, 1d, 1e, 1f 1080 */ 1081 1082 /* [dBorca] 1083 * stupidity flows forth from this 1084 */ 1085 l = N_TEXELS; 1086 trualpha = 0; 1087 if (comps == 4) { 1088 /* skip all transparent black texels */ 1089 l = 0; 1090 for (k = 0; k < N_TEXELS; k++) { 1091 /* test all components against 0 */ 1092 if (!ISTBLACK(input[k])) { 1093 /* texel is not transparent black */ 1094 memcpy(reord[l], input[k], 4); 1095 if (reord[l][ACOMP] < (255 - ALPHA_TS)) { 1096 /* non-opaque texel */ 1097 trualpha = !0; 1098 } 1099 l++; 1100 } 1101 } 1102 } 1103 1104#if 0 1105 if (trualpha) { 1106 fxt1_quantize_ALPHA0(cc, input, reord, l); 1107 } else if (l == 0) { 1108 cc[0] = cc[1] = cc[2] = -1; 1109 cc[3] = 0; 1110 } else if (l < N_TEXELS) { 1111 fxt1_quantize_HI(cc, input, reord, l); 1112 } else { 1113 fxt1_quantize_CHROMA(cc, input); 1114 } 1115 (void)fxt1_quantize_ALPHA1; 1116 (void)fxt1_quantize_MIXED1; 1117 (void)fxt1_quantize_MIXED0; 1118#else 1119 if (trualpha) { 1120 fxt1_quantize_ALPHA1(cc, input); 1121 } else if (l == 0) { 1122 cc[0] = cc[1] = cc[2] = ~0u; 1123 cc[3] = 0; 1124 } else if (l < N_TEXELS) { 1125 fxt1_quantize_MIXED1(cc, input); 1126 } else { 1127 fxt1_quantize_MIXED0(cc, input); 1128 } 1129 (void)fxt1_quantize_ALPHA0; 1130 (void)fxt1_quantize_HI; 1131 (void)fxt1_quantize_CHROMA; 1132#endif 1133} 1134 1135 1136 1137/** 1138 * Upscale an image by replication, not (typical) stretching. 1139 * We use this when the image width or height is less than a 1140 * certain size (4, 8) and we need to upscale an image. 1141 */ 1142static void 1143upscale_teximage2d(int32_t inWidth, int32_t inHeight, 1144 int32_t outWidth, int32_t outHeight, 1145 int32_t comps, const uint8_t *src, int32_t srcRowStride, 1146 uint8_t *dest ) 1147{ 1148 int32_t i, j, k; 1149 1150 assert(outWidth >= inWidth); 1151 assert(outHeight >= inHeight); 1152#if 0 1153 assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2); 1154 assert((outWidth & 3) == 0); 1155 assert((outHeight & 3) == 0); 1156#endif 1157 1158 for (i = 0; i < outHeight; i++) { 1159 const int32_t ii = i % inHeight; 1160 for (j = 0; j < outWidth; j++) { 1161 const int32_t jj = j % inWidth; 1162 for (k = 0; k < comps; k++) { 1163 dest[(i * outWidth + j) * comps + k] 1164 = src[ii * srcRowStride + jj * comps + k]; 1165 } 1166 } 1167 } 1168} 1169 1170 1171static void 1172fxt1_encode (uint32_t width, uint32_t height, int32_t comps, 1173 const void *source, int32_t srcRowStride, 1174 void *dest, int32_t destRowStride) 1175{ 1176 uint32_t x, y; 1177 const uint8_t *data; 1178 uint32_t *encoded = (uint32_t *)dest; 1179 void *newSource = NULL; 1180 1181 assert(comps == 3 || comps == 4); 1182 1183 /* Replicate image if width is not M8 or height is not M4 */ 1184 if ((width & 7) | (height & 3)) { 1185 int32_t newWidth = (width + 7) & ~7; 1186 int32_t newHeight = (height + 3) & ~3; 1187 newSource = malloc(comps * newWidth * newHeight * sizeof(uint8_t)); 1188 if (!newSource) 1189 return; 1190 upscale_teximage2d(width, height, newWidth, newHeight, 1191 comps, (const uint8_t *) source, 1192 srcRowStride, (uint8_t *) newSource); 1193 source = newSource; 1194 width = newWidth; 1195 height = newHeight; 1196 srcRowStride = comps * newWidth; 1197 } 1198 1199 data = (const uint8_t *) source; 1200 destRowStride = (destRowStride - width * 2) / 4; 1201 for (y = 0; y < height; y += 4) { 1202 uint32_t offs = 0 + (y + 0) * srcRowStride; 1203 for (x = 0; x < width; x += 8) { 1204 const uint8_t *lines[4]; 1205 lines[0] = &data[offs]; 1206 lines[1] = lines[0] + srcRowStride; 1207 lines[2] = lines[1] + srcRowStride; 1208 lines[3] = lines[2] + srcRowStride; 1209 offs += 8 * comps; 1210 fxt1_quantize(encoded, lines, comps); 1211 /* 128 bits per 8x4 block */ 1212 encoded += 4; 1213 } 1214 encoded += destRowStride; 1215 } 1216 1217 free(newSource); 1218} 1219 1220 1221/***************************************************************************\ 1222 * FXT1 decoder 1223 * 1224 * The decoder is based on GL_3DFX_texture_compression_FXT1 1225 * specification and serves as a concept for the encoder. 1226\***************************************************************************/ 1227 1228 1229/* lookup table for scaling 5 bit colors up to 8 bits */ 1230static const uint8_t _rgb_scale_5[] = { 1231 0, 8, 16, 25, 33, 41, 49, 58, 1232 66, 74, 82, 90, 99, 107, 115, 123, 1233 132, 140, 148, 156, 165, 173, 181, 189, 1234 197, 206, 214, 222, 230, 239, 247, 255 1235}; 1236 1237/* lookup table for scaling 6 bit colors up to 8 bits */ 1238static const uint8_t _rgb_scale_6[] = { 1239 0, 4, 8, 12, 16, 20, 24, 28, 1240 32, 36, 40, 45, 49, 53, 57, 61, 1241 65, 69, 73, 77, 81, 85, 89, 93, 1242 97, 101, 105, 109, 113, 117, 121, 125, 1243 130, 134, 138, 142, 146, 150, 154, 158, 1244 162, 166, 170, 174, 178, 182, 186, 190, 1245 194, 198, 202, 206, 210, 215, 219, 223, 1246 227, 231, 235, 239, 243, 247, 251, 255 1247}; 1248 1249 1250#define CC_SEL(cc, which) (((uint32_t *)(cc))[(which) / 32] >> ((which) & 31)) 1251#define UP5(c) _rgb_scale_5[(c) & 31] 1252#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)] 1253#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n) 1254 1255 1256static void 1257fxt1_decode_1HI (const uint8_t *code, int32_t t, uint8_t *rgba) 1258{ 1259 const uint32_t *cc; 1260 1261 t *= 3; 1262 cc = (const uint32_t *)(code + t / 8); 1263 t = (cc[0] >> (t & 7)) & 7; 1264 1265 if (t == 7) { 1266 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0; 1267 } else { 1268 uint8_t r, g, b; 1269 cc = (const uint32_t *)(code + 12); 1270 if (t == 0) { 1271 b = UP5(CC_SEL(cc, 0)); 1272 g = UP5(CC_SEL(cc, 5)); 1273 r = UP5(CC_SEL(cc, 10)); 1274 } else if (t == 6) { 1275 b = UP5(CC_SEL(cc, 15)); 1276 g = UP5(CC_SEL(cc, 20)); 1277 r = UP5(CC_SEL(cc, 25)); 1278 } else { 1279 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15))); 1280 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20))); 1281 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25))); 1282 } 1283 rgba[RCOMP] = r; 1284 rgba[GCOMP] = g; 1285 rgba[BCOMP] = b; 1286 rgba[ACOMP] = 255; 1287 } 1288} 1289 1290 1291static void 1292fxt1_decode_1CHROMA (const uint8_t *code, int32_t t, uint8_t *rgba) 1293{ 1294 const uint32_t *cc; 1295 uint32_t kk; 1296 1297 cc = (const uint32_t *)code; 1298 if (t & 16) { 1299 cc++; 1300 t &= 15; 1301 } 1302 t = (cc[0] >> (t * 2)) & 3; 1303 1304 t *= 15; 1305 cc = (const uint32_t *)(code + 8 + t / 8); 1306 kk = cc[0] >> (t & 7); 1307 rgba[BCOMP] = UP5(kk); 1308 rgba[GCOMP] = UP5(kk >> 5); 1309 rgba[RCOMP] = UP5(kk >> 10); 1310 rgba[ACOMP] = 255; 1311} 1312 1313 1314static void 1315fxt1_decode_1MIXED (const uint8_t *code, int32_t t, uint8_t *rgba) 1316{ 1317 const uint32_t *cc; 1318 uint32_t col[2][3]; 1319 int32_t glsb, selb; 1320 1321 cc = (const uint32_t *)code; 1322 if (t & 16) { 1323 t &= 15; 1324 t = (cc[1] >> (t * 2)) & 3; 1325 /* col 2 */ 1326 col[0][BCOMP] = (*(const uint32_t *)(code + 11)) >> 6; 1327 col[0][GCOMP] = CC_SEL(cc, 99); 1328 col[0][RCOMP] = CC_SEL(cc, 104); 1329 /* col 3 */ 1330 col[1][BCOMP] = CC_SEL(cc, 109); 1331 col[1][GCOMP] = CC_SEL(cc, 114); 1332 col[1][RCOMP] = CC_SEL(cc, 119); 1333 glsb = CC_SEL(cc, 126); 1334 selb = CC_SEL(cc, 33); 1335 } else { 1336 t = (cc[0] >> (t * 2)) & 3; 1337 /* col 0 */ 1338 col[0][BCOMP] = CC_SEL(cc, 64); 1339 col[0][GCOMP] = CC_SEL(cc, 69); 1340 col[0][RCOMP] = CC_SEL(cc, 74); 1341 /* col 1 */ 1342 col[1][BCOMP] = CC_SEL(cc, 79); 1343 col[1][GCOMP] = CC_SEL(cc, 84); 1344 col[1][RCOMP] = CC_SEL(cc, 89); 1345 glsb = CC_SEL(cc, 125); 1346 selb = CC_SEL(cc, 1); 1347 } 1348 1349 if (CC_SEL(cc, 124) & 1) { 1350 /* alpha[0] == 1 */ 1351 1352 if (t == 3) { 1353 /* zero */ 1354 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0; 1355 } else { 1356 uint8_t r, g, b; 1357 if (t == 0) { 1358 b = UP5(col[0][BCOMP]); 1359 g = UP5(col[0][GCOMP]); 1360 r = UP5(col[0][RCOMP]); 1361 } else if (t == 2) { 1362 b = UP5(col[1][BCOMP]); 1363 g = UP6(col[1][GCOMP], glsb); 1364 r = UP5(col[1][RCOMP]); 1365 } else { 1366 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2; 1367 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2; 1368 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2; 1369 } 1370 rgba[RCOMP] = r; 1371 rgba[GCOMP] = g; 1372 rgba[BCOMP] = b; 1373 rgba[ACOMP] = 255; 1374 } 1375 } else { 1376 /* alpha[0] == 0 */ 1377 uint8_t r, g, b; 1378 if (t == 0) { 1379 b = UP5(col[0][BCOMP]); 1380 g = UP6(col[0][GCOMP], glsb ^ selb); 1381 r = UP5(col[0][RCOMP]); 1382 } else if (t == 3) { 1383 b = UP5(col[1][BCOMP]); 1384 g = UP6(col[1][GCOMP], glsb); 1385 r = UP5(col[1][RCOMP]); 1386 } else { 1387 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP])); 1388 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb), 1389 UP6(col[1][GCOMP], glsb)); 1390 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP])); 1391 } 1392 rgba[RCOMP] = r; 1393 rgba[GCOMP] = g; 1394 rgba[BCOMP] = b; 1395 rgba[ACOMP] = 255; 1396 } 1397} 1398 1399 1400static void 1401fxt1_decode_1ALPHA (const uint8_t *code, int32_t t, uint8_t *rgba) 1402{ 1403 const uint32_t *cc; 1404 uint8_t r, g, b, a; 1405 1406 cc = (const uint32_t *)code; 1407 if (CC_SEL(cc, 124) & 1) { 1408 /* lerp == 1 */ 1409 uint32_t col0[4]; 1410 1411 if (t & 16) { 1412 t &= 15; 1413 t = (cc[1] >> (t * 2)) & 3; 1414 /* col 2 */ 1415 col0[BCOMP] = (*(const uint32_t *)(code + 11)) >> 6; 1416 col0[GCOMP] = CC_SEL(cc, 99); 1417 col0[RCOMP] = CC_SEL(cc, 104); 1418 col0[ACOMP] = CC_SEL(cc, 119); 1419 } else { 1420 t = (cc[0] >> (t * 2)) & 3; 1421 /* col 0 */ 1422 col0[BCOMP] = CC_SEL(cc, 64); 1423 col0[GCOMP] = CC_SEL(cc, 69); 1424 col0[RCOMP] = CC_SEL(cc, 74); 1425 col0[ACOMP] = CC_SEL(cc, 109); 1426 } 1427 1428 if (t == 0) { 1429 b = UP5(col0[BCOMP]); 1430 g = UP5(col0[GCOMP]); 1431 r = UP5(col0[RCOMP]); 1432 a = UP5(col0[ACOMP]); 1433 } else if (t == 3) { 1434 b = UP5(CC_SEL(cc, 79)); 1435 g = UP5(CC_SEL(cc, 84)); 1436 r = UP5(CC_SEL(cc, 89)); 1437 a = UP5(CC_SEL(cc, 114)); 1438 } else { 1439 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79))); 1440 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84))); 1441 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89))); 1442 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114))); 1443 } 1444 } else { 1445 /* lerp == 0 */ 1446 1447 if (t & 16) { 1448 cc++; 1449 t &= 15; 1450 } 1451 t = (cc[0] >> (t * 2)) & 3; 1452 1453 if (t == 3) { 1454 /* zero */ 1455 r = g = b = a = 0; 1456 } else { 1457 uint32_t kk; 1458 cc = (const uint32_t *)code; 1459 a = UP5(cc[3] >> (t * 5 + 13)); 1460 t *= 15; 1461 cc = (const uint32_t *)(code + 8 + t / 8); 1462 kk = cc[0] >> (t & 7); 1463 b = UP5(kk); 1464 g = UP5(kk >> 5); 1465 r = UP5(kk >> 10); 1466 } 1467 } 1468 rgba[RCOMP] = r; 1469 rgba[GCOMP] = g; 1470 rgba[BCOMP] = b; 1471 rgba[ACOMP] = a; 1472} 1473 1474 1475static void 1476fxt1_decode_1 (const void *texture, int32_t stride, /* in pixels */ 1477 int32_t i, int32_t j, uint8_t *rgba) 1478{ 1479 static void (*decode_1[]) (const uint8_t *, int32_t, uint8_t *) = { 1480 fxt1_decode_1HI, /* cc-high = "00?" */ 1481 fxt1_decode_1HI, /* cc-high = "00?" */ 1482 fxt1_decode_1CHROMA, /* cc-chroma = "010" */ 1483 fxt1_decode_1ALPHA, /* alpha = "011" */ 1484 fxt1_decode_1MIXED, /* mixed = "1??" */ 1485 fxt1_decode_1MIXED, /* mixed = "1??" */ 1486 fxt1_decode_1MIXED, /* mixed = "1??" */ 1487 fxt1_decode_1MIXED /* mixed = "1??" */ 1488 }; 1489 1490 const uint8_t *code = (const uint8_t *)texture + 1491 ((j / 4) * (stride / 8) + (i / 8)) * 16; 1492 int32_t mode = CC_SEL(code, 125); 1493 int32_t t = i & 7; 1494 1495 if (t & 4) { 1496 t += 12; 1497 } 1498 t += (j & 3) * 4; 1499 1500 decode_1[mode](code, t, rgba); 1501} 1502 1503/* 1504 * Pixel fetch within a block. 1505 */ 1506 1507void 1508util_format_fxt1_rgb_fetch_rgba_8unorm(uint8_t *restrict dst, const uint8_t *restrict src, unsigned i, unsigned j) 1509{ 1510 fxt1_decode_1(src, 0, i, j, dst); 1511} 1512 1513void 1514util_format_fxt1_rgba_fetch_rgba_8unorm(uint8_t *restrict dst, const uint8_t *restrict src, unsigned i, unsigned j) 1515{ 1516 fxt1_decode_1(src, 0, i, j, dst); 1517 dst[3] = 0xff; 1518} 1519 1520void 1521util_format_fxt1_rgb_fetch_rgba(void *restrict in_dst, const uint8_t *restrict src, unsigned i, unsigned j) 1522{ 1523 float *dst = in_dst; 1524 uint8_t tmp[4]; 1525 fxt1_decode_1(src, 0, i, j, tmp); 1526 dst[0] = ubyte_to_float(tmp[0]); 1527 dst[1] = ubyte_to_float(tmp[1]); 1528 dst[2] = ubyte_to_float(tmp[2]); 1529 dst[3] = 1.0; 1530} 1531 1532void 1533util_format_fxt1_rgba_fetch_rgba(void *restrict in_dst, const uint8_t *restrict src, unsigned i, unsigned j) 1534{ 1535 float *dst = in_dst; 1536 uint8_t tmp[4]; 1537 fxt1_decode_1(src, 0, i, j, tmp); 1538 dst[0] = ubyte_to_float(tmp[0]); 1539 dst[1] = ubyte_to_float(tmp[1]); 1540 dst[2] = ubyte_to_float(tmp[2]); 1541 dst[3] = ubyte_to_float(tmp[3]); 1542} 1543 1544/* 1545 * Block decompression. 1546 */ 1547 1548static inline void 1549util_format_fxtn_rgb_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, 1550 const uint8_t *restrict src_row, unsigned src_stride, 1551 unsigned width, unsigned height, 1552 boolean rgba) 1553{ 1554 const unsigned bw = 8, bh = 4, comps = 4; 1555 unsigned x, y, i, j; 1556 for (y = 0; y < height; y += bh) { 1557 const uint8_t *src = src_row; 1558 for (x = 0; x < width; x += bw) { 1559 for (j = 0; j < bh; ++j) { 1560 for (i = 0; i < bw; ++i) { 1561 uint8_t *dst = dst_row + (y + j) * dst_stride / sizeof(*dst_row) + (x + i) * comps; 1562 fxt1_decode_1(src, 0, i, j, dst); 1563 if (!rgba) 1564 dst[3] = 0xff; 1565 } 1566 } 1567 src += FXT1_BLOCK_SIZE; 1568 } 1569 src_row += src_stride; 1570 } 1571} 1572 1573void 1574util_format_fxt1_rgb_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, 1575 const uint8_t *restrict src_row, unsigned src_stride, 1576 unsigned width, unsigned height) 1577{ 1578 util_format_fxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, 1579 src_row, src_stride, 1580 width, height, 1581 false); 1582} 1583 1584void 1585util_format_fxt1_rgba_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, 1586 const uint8_t *restrict src_row, unsigned src_stride, 1587 unsigned width, unsigned height) 1588{ 1589 util_format_fxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, 1590 src_row, src_stride, 1591 width, height, 1592 true); 1593} 1594 1595static inline void 1596util_format_fxtn_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, 1597 const uint8_t *restrict src_row, unsigned src_stride, 1598 unsigned width, unsigned height, 1599 boolean rgba) 1600{ 1601 const unsigned bw = 8, bh = 4, comps = 4; 1602 unsigned x, y, i, j; 1603 for (y = 0; y < height; y += 4) { 1604 const uint8_t *src = src_row; 1605 for (x = 0; x < width; x += 8) { 1606 for (j = 0; j < bh; ++j) { 1607 for (i = 0; i < bw; ++i) { 1608 float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i) * comps; 1609 uint8_t tmp[4]; 1610 fxt1_decode_1(src, 0, i, j, tmp); 1611 dst[0] = ubyte_to_float(tmp[0]); 1612 dst[1] = ubyte_to_float(tmp[1]); 1613 dst[2] = ubyte_to_float(tmp[2]); 1614 if (rgba) 1615 dst[3] = ubyte_to_float(tmp[3]); 1616 else 1617 dst[3] = 1.0; 1618 } 1619 } 1620 src += FXT1_BLOCK_SIZE; 1621 } 1622 src_row += src_stride; 1623 } 1624} 1625 1626void 1627util_format_fxt1_rgb_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride, 1628 const uint8_t *restrict src_row, unsigned src_stride, 1629 unsigned width, unsigned height) 1630{ 1631 util_format_fxtn_rgb_unpack_rgba_float(dst_row, dst_stride, 1632 src_row, src_stride, 1633 width, height, 1634 false); 1635} 1636 1637void 1638util_format_fxt1_rgba_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride, 1639 const uint8_t *restrict src_row, unsigned src_stride, 1640 unsigned width, unsigned height) 1641{ 1642 util_format_fxtn_rgb_unpack_rgba_float(dst_row, dst_stride, 1643 src_row, src_stride, 1644 width, height, 1645 true); 1646} 1647 1648/* 1649 * Block compression. 1650 */ 1651 1652void 1653util_format_fxt1_rgb_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, 1654 const uint8_t *restrict src, unsigned src_stride, 1655 unsigned width, unsigned height) 1656{ 1657 /* The encoder for FXT1_RGB wants 24bpp packed rgb, so make a temporary to do that. 1658 */ 1659 int temp_stride = width * 3; 1660 uint8_t *temp = malloc(height * temp_stride); 1661 if (!temp) 1662 return; 1663 1664 for (int y = 0; y < height; y++) { 1665 for (int x = 0; x < width; x++) { 1666 temp[y * temp_stride + x * 3 + 0] = src[x * 4 + 0]; 1667 temp[y * temp_stride + x * 3 + 1] = src[x * 4 + 1]; 1668 temp[y * temp_stride + x * 3 + 2] = src[x * 4 + 2]; 1669 } 1670 src += src_stride; 1671 } 1672 1673 fxt1_encode(width, height, 3, temp, temp_stride, dst_row, dst_stride); 1674 1675 free(temp); 1676} 1677 1678void 1679util_format_fxt1_rgba_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, 1680 const uint8_t *restrict src, unsigned src_stride, 1681 unsigned width, unsigned height) 1682{ 1683 fxt1_encode(width, height, 4, src, src_stride, dst_row, dst_stride); 1684} 1685 1686void 1687util_format_fxt1_rgb_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride, 1688 const float *restrict src, unsigned src_stride, 1689 unsigned width, unsigned height) 1690{ 1691 int temp_stride = width * 4; 1692 uint8_t *temp = malloc(height * temp_stride); 1693 if (!temp) 1694 return; 1695 1696 util_format_r8g8b8a8_unorm_pack_rgba_float(temp, temp_stride, 1697 src, src_stride, 1698 width, height); 1699 1700 util_format_fxt1_rgb_pack_rgba_8unorm(dst_row, dst_stride, 1701 temp, temp_stride, 1702 width, height); 1703 1704 free(temp); 1705} 1706 1707void 1708util_format_fxt1_rgba_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride, 1709 const float *restrict src, unsigned src_stride, 1710 unsigned width, unsigned height) 1711{ 1712 int temp_stride = width * 4; 1713 uint8_t *temp = malloc(height * temp_stride); 1714 if (!temp) 1715 return; 1716 1717 util_format_r8g8b8a8_unorm_pack_rgba_float(temp, temp_stride, 1718 src, src_stride, 1719 width, height); 1720 1721 util_format_fxt1_rgba_pack_rgba_8unorm(dst_row, dst_stride, 1722 temp, temp_stride, 1723 width, height); 1724 1725 free(temp); 1726} 1727