1/*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26/**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32#include "errors.h"
33#include "glheader.h"
34#include "imports.h"
35#include "image.h"
36#include "macros.h"
37#include "mipmap.h"
38#include "texcompress.h"
39#include "texcompress_fxt1.h"
40#include "texstore.h"
41#include "mtypes.h"
42
43
44static void
45fxt1_encode (GLuint width, GLuint height, GLint comps,
46             const void *source, GLint srcRowStride,
47             void *dest, GLint destRowStride);
48
49static void
50fxt1_decode_1 (const void *texture, GLint stride,
51               GLint i, GLint j, GLubyte *rgba);
52
53
54/**
55 * Store user's image in rgb_fxt1 format.
56 */
57GLboolean
58_mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
59{
60   const GLubyte *pixels;
61   GLint srcRowStride;
62   GLubyte *dst;
63   const GLubyte *tempImage = NULL;
64
65   assert(dstFormat == MESA_FORMAT_RGB_FXT1);
66
67   if (srcFormat != GL_RGB ||
68       srcType != GL_UNSIGNED_BYTE ||
69       ctx->_ImageTransferState ||
70       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
71       srcPacking->SwapBytes) {
72      /* convert image to RGB/GLubyte */
73      GLubyte *tempImageSlices[1];
74      int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
75      tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
76      if (!tempImage)
77         return GL_FALSE; /* out of memory */
78      tempImageSlices[0] = (GLubyte *) tempImage;
79      _mesa_texstore(ctx, dims,
80                     baseInternalFormat,
81                     MESA_FORMAT_RGB_UNORM8,
82                     rgbRowStride, tempImageSlices,
83                     srcWidth, srcHeight, srcDepth,
84                     srcFormat, srcType, srcAddr,
85                     srcPacking);
86      pixels = tempImage;
87      srcRowStride = 3 * srcWidth;
88      srcFormat = GL_RGB;
89   }
90   else {
91      pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
92                                     srcFormat, srcType, 0, 0);
93
94      srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
95                                            srcType) / sizeof(GLubyte);
96   }
97
98   dst = dstSlices[0];
99
100   fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
101               dst, dstRowStride);
102
103   free((void*) tempImage);
104
105   return GL_TRUE;
106}
107
108
109/**
110 * Store user's image in rgba_fxt1 format.
111 */
112GLboolean
113_mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
114{
115   const GLubyte *pixels;
116   GLint srcRowStride;
117   GLubyte *dst;
118   const GLubyte *tempImage = NULL;
119
120   assert(dstFormat == MESA_FORMAT_RGBA_FXT1);
121
122   if (srcFormat != GL_RGBA ||
123       srcType != GL_UNSIGNED_BYTE ||
124       ctx->_ImageTransferState ||
125       srcPacking->SwapBytes) {
126      /* convert image to RGBA/GLubyte */
127      GLubyte *tempImageSlices[1];
128      int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
129      tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
130      if (!tempImage)
131         return GL_FALSE; /* out of memory */
132      tempImageSlices[0] = (GLubyte *) tempImage;
133      _mesa_texstore(ctx, dims,
134                     baseInternalFormat,
135                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
136                                           : MESA_FORMAT_A8B8G8R8_UNORM,
137                     rgbaRowStride, tempImageSlices,
138                     srcWidth, srcHeight, srcDepth,
139                     srcFormat, srcType, srcAddr,
140                     srcPacking);
141      pixels = tempImage;
142      srcRowStride = 4 * srcWidth;
143      srcFormat = GL_RGBA;
144   }
145   else {
146      pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
147                                     srcFormat, srcType, 0, 0);
148
149      srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
150                                            srcType) / sizeof(GLubyte);
151   }
152
153   dst = dstSlices[0];
154
155   fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
156               dst, dstRowStride);
157
158   free((void*) tempImage);
159
160   return GL_TRUE;
161}
162
163
164/***************************************************************************\
165 * FXT1 encoder
166 *
167 * The encoder was built by reversing the decoder,
168 * and is vaguely based on Texus2 by 3dfx. Note that this code
169 * is merely a proof of concept, since it is highly UNoptimized;
170 * moreover, it is sub-optimal due to initial conditions passed
171 * to Lloyd's algorithm (the interpolation modes are even worse).
172\***************************************************************************/
173
174
175#define MAX_COMP 4 /* ever needed maximum number of components in texel */
176#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
177#define N_TEXELS 32 /* number of texels in a block (always 32) */
178#define LL_N_REP 50 /* number of iterations in lloyd's vq */
179#define LL_RMS_D 10 /* fault tolerance (maximum delta) */
180#define LL_RMS_E 255 /* fault tolerance (maximum error) */
181#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
182static const GLuint zero = 0;
183#define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0)
184
185/*
186 * Define a 64-bit unsigned integer type and macros
187 */
188#if 1
189
190#define FX64_NATIVE 1
191
192typedef uint64_t Fx64;
193
194#define FX64_MOV32(a, b) a = b
195#define FX64_OR32(a, b)  a |= b
196#define FX64_SHL(a, c)   a <<= c
197
198#else
199
200#define FX64_NATIVE 0
201
202typedef struct {
203   GLuint lo, hi;
204} Fx64;
205
206#define FX64_MOV32(a, b) a.lo = b
207#define FX64_OR32(a, b)  a.lo |= b
208
209#define FX64_SHL(a, c)                                 \
210   do {                                                \
211       if ((c) >= 32) {                                \
212          a.hi = a.lo << ((c) - 32);                   \
213          a.lo = 0;                                    \
214       } else {                                        \
215          a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
216          a.lo <<= (c);                                \
217       }                                               \
218   } while (0)
219
220#endif
221
222
223#define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
224#define SAFECDOT 1 /* for paranoids */
225
226#define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
227   do {                                  \
228      /* compute interpolation vector */ \
229      GLfloat d2 = 0.0F;                 \
230      GLfloat rd2;                       \
231                                         \
232      for (i = 0; i < NC; i++) {         \
233         IV[i] = (V1[i] - V0[i]) * F(i); \
234         d2 += IV[i] * IV[i];            \
235      }                                  \
236      rd2 = (GLfloat)NV / d2;            \
237      B = 0;                             \
238      for (i = 0; i < NC; i++) {         \
239         IV[i] *= F(i);                  \
240         B -= IV[i] * V0[i];             \
241         IV[i] *= rd2;                   \
242      }                                  \
243      B = B * rd2 + 0.5f;                \
244   } while (0)
245
246#define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
247   do {                                  \
248      GLfloat dot = 0.0F;                \
249      for (i = 0; i < NC; i++) {         \
250         dot += V[i] * IV[i];            \
251      }                                  \
252      TEXEL = (GLint)(dot + B);          \
253      if (SAFECDOT) {                    \
254         if (TEXEL < 0) {                \
255            TEXEL = 0;                   \
256         } else if (TEXEL > NV) {        \
257            TEXEL = NV;                  \
258         }                               \
259      }                                  \
260   } while (0)
261
262
263static GLint
264fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
265              GLubyte input[MAX_COMP], GLint nc)
266{
267   GLint i, j, best = -1;
268   GLfloat err = 1e9; /* big enough */
269
270   for (j = 0; j < nv; j++) {
271      GLfloat e = 0.0F;
272      for (i = 0; i < nc; i++) {
273         e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
274      }
275      if (e < err) {
276         err = e;
277         best = j;
278      }
279   }
280
281   return best;
282}
283
284
285static GLint
286fxt1_worst (GLfloat vec[MAX_COMP],
287            GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
288{
289   GLint i, k, worst = -1;
290   GLfloat err = -1.0F; /* small enough */
291
292   for (k = 0; k < n; k++) {
293      GLfloat e = 0.0F;
294      for (i = 0; i < nc; i++) {
295         e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
296      }
297      if (e > err) {
298         err = e;
299         worst = k;
300      }
301   }
302
303   return worst;
304}
305
306
307static GLint
308fxt1_variance (GLdouble variance[MAX_COMP],
309               GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
310{
311   GLint i, k, best = 0;
312   GLint sx, sx2;
313   GLdouble var, maxvar = -1; /* small enough */
314   GLdouble teenth = 1.0 / n;
315
316   for (i = 0; i < nc; i++) {
317      sx = sx2 = 0;
318      for (k = 0; k < n; k++) {
319         GLint t = input[k][i];
320         sx += t;
321         sx2 += t * t;
322      }
323      var = sx2 * teenth - sx * sx * teenth * teenth;
324      if (maxvar < var) {
325         maxvar = var;
326         best = i;
327      }
328      if (variance) {
329         variance[i] = var;
330      }
331   }
332
333   return best;
334}
335
336
337static GLint
338fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
339             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
340{
341#if 0
342   /* Choose colors from a grid.
343    */
344   GLint i, j;
345
346   for (j = 0; j < nv; j++) {
347      GLint m = j * (n - 1) / (nv - 1);
348      for (i = 0; i < nc; i++) {
349         vec[j][i] = input[m][i];
350      }
351   }
352#else
353   /* Our solution here is to find the darkest and brightest colors in
354    * the 8x4 tile and use those as the two representative colors.
355    * There are probably better algorithms to use (histogram-based).
356    */
357   GLint i, j, k;
358   GLint minSum = 2000; /* big enough */
359   GLint maxSum = -1; /* small enough */
360   GLint minCol = 0; /* phoudoin: silent compiler! */
361   GLint maxCol = 0; /* phoudoin: silent compiler! */
362
363   struct {
364      GLint flag;
365      GLint key;
366      GLint freq;
367      GLint idx;
368   } hist[N_TEXELS];
369   GLint lenh = 0;
370
371   memset(hist, 0, sizeof(hist));
372
373   for (k = 0; k < n; k++) {
374      GLint l;
375      GLint key = 0;
376      GLint sum = 0;
377      for (i = 0; i < nc; i++) {
378         key <<= 8;
379         key |= input[k][i];
380         sum += input[k][i];
381      }
382      for (l = 0; l < n; l++) {
383         if (!hist[l].flag) {
384            /* alloc new slot */
385            hist[l].flag = !0;
386            hist[l].key = key;
387            hist[l].freq = 1;
388            hist[l].idx = k;
389            lenh = l + 1;
390            break;
391         } else if (hist[l].key == key) {
392            hist[l].freq++;
393            break;
394         }
395      }
396      if (minSum > sum) {
397         minSum = sum;
398         minCol = k;
399      }
400      if (maxSum < sum) {
401         maxSum = sum;
402         maxCol = k;
403      }
404   }
405
406   if (lenh <= nv) {
407      for (j = 0; j < lenh; j++) {
408         for (i = 0; i < nc; i++) {
409            vec[j][i] = (GLfloat)input[hist[j].idx][i];
410         }
411      }
412      for (; j < nv; j++) {
413         for (i = 0; i < nc; i++) {
414            vec[j][i] = vec[0][i];
415         }
416      }
417      return 0;
418   }
419
420   for (j = 0; j < nv; j++) {
421      for (i = 0; i < nc; i++) {
422         vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
423      }
424   }
425#endif
426
427   return !0;
428}
429
430
431static GLint
432fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
433            GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
434{
435   /* Use the generalized lloyd's algorithm for VQ:
436    *     find 4 color vectors.
437    *
438    *     for each sample color
439    *         sort to nearest vector.
440    *
441    *     replace each vector with the centroid of its matching colors.
442    *
443    *     repeat until RMS doesn't improve.
444    *
445    *     if a color vector has no samples, or becomes the same as another
446    *     vector, replace it with the color which is farthest from a sample.
447    *
448    * vec[][MAX_COMP]           initial vectors and resulting colors
449    * nv                        number of resulting colors required
450    * input[N_TEXELS][MAX_COMP] input texels
451    * nc                        number of components in input / vec
452    * n                         number of input samples
453    */
454
455   GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
456   GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
457   GLfloat error, lasterror = 1e9;
458
459   GLint i, j, k, rep;
460
461   /* the quantizer */
462   for (rep = 0; rep < LL_N_REP; rep++) {
463      /* reset sums & counters */
464      for (j = 0; j < nv; j++) {
465         for (i = 0; i < nc; i++) {
466            sum[j][i] = 0;
467         }
468         cnt[j] = 0;
469      }
470      error = 0;
471
472      /* scan whole block */
473      for (k = 0; k < n; k++) {
474#if 1
475         GLint best = -1;
476         GLfloat err = 1e9; /* big enough */
477         /* determine best vector */
478         for (j = 0; j < nv; j++) {
479            GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
480                      (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
481                      (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
482            if (nc == 4) {
483               e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
484            }
485            if (e < err) {
486               err = e;
487               best = j;
488            }
489         }
490#else
491         GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
492#endif
493         assert(best >= 0);
494         /* add in closest color */
495         for (i = 0; i < nc; i++) {
496            sum[best][i] += input[k][i];
497         }
498         /* mark this vector as used */
499         cnt[best]++;
500         /* accumulate error */
501         error += err;
502      }
503
504      /* check RMS */
505      if ((error < LL_RMS_E) ||
506          ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
507         return !0; /* good match */
508      }
509      lasterror = error;
510
511      /* move each vector to the barycenter of its closest colors */
512      for (j = 0; j < nv; j++) {
513         if (cnt[j]) {
514            GLfloat div = 1.0F / cnt[j];
515            for (i = 0; i < nc; i++) {
516               vec[j][i] = div * sum[j][i];
517            }
518         } else {
519            /* this vec has no samples or is identical with a previous vec */
520            GLint worst = fxt1_worst(vec[j], input, nc, n);
521            for (i = 0; i < nc; i++) {
522               vec[j][i] = input[worst][i];
523            }
524         }
525      }
526   }
527
528   return 0; /* could not converge fast enough */
529}
530
531
532static void
533fxt1_quantize_CHROMA (GLuint *cc,
534                      GLubyte input[N_TEXELS][MAX_COMP])
535{
536   const GLint n_vect = 4; /* 4 base vectors to find */
537   const GLint n_comp = 3; /* 3 components: R, G, B */
538   GLfloat vec[MAX_VECT][MAX_COMP];
539   GLint i, j, k;
540   Fx64 hi; /* high quadword */
541   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
542
543   if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
544      fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
545   }
546
547   FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
548   for (j = n_vect - 1; j >= 0; j--) {
549      for (i = 0; i < n_comp; i++) {
550         /* add in colors */
551         FX64_SHL(hi, 5);
552         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
553      }
554   }
555   ((Fx64 *)cc)[1] = hi;
556
557   lohi = lolo = 0;
558   /* right microtile */
559   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
560      lohi <<= 2;
561      lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
562   }
563   /* left microtile */
564   for (; k >= 0; k--) {
565      lolo <<= 2;
566      lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
567   }
568   cc[1] = lohi;
569   cc[0] = lolo;
570}
571
572
573static void
574fxt1_quantize_ALPHA0 (GLuint *cc,
575                      GLubyte input[N_TEXELS][MAX_COMP],
576                      GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
577{
578   const GLint n_vect = 3; /* 3 base vectors to find */
579   const GLint n_comp = 4; /* 4 components: R, G, B, A */
580   GLfloat vec[MAX_VECT][MAX_COMP];
581   GLint i, j, k;
582   Fx64 hi; /* high quadword */
583   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
584
585   /* the last vector indicates zero */
586   for (i = 0; i < n_comp; i++) {
587      vec[n_vect][i] = 0;
588   }
589
590   /* the first n texels in reord are guaranteed to be non-zero */
591   if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
592      fxt1_lloyd(vec, n_vect, reord, n_comp, n);
593   }
594
595   FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
596   for (j = n_vect - 1; j >= 0; j--) {
597      /* add in alphas */
598      FX64_SHL(hi, 5);
599      FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
600   }
601   for (j = n_vect - 1; j >= 0; j--) {
602      for (i = 0; i < n_comp - 1; i++) {
603         /* add in colors */
604         FX64_SHL(hi, 5);
605         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
606      }
607   }
608   ((Fx64 *)cc)[1] = hi;
609
610   lohi = lolo = 0;
611   /* right microtile */
612   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
613      lohi <<= 2;
614      lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
615   }
616   /* left microtile */
617   for (; k >= 0; k--) {
618      lolo <<= 2;
619      lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
620   }
621   cc[1] = lohi;
622   cc[0] = lolo;
623}
624
625
626static void
627fxt1_quantize_ALPHA1 (GLuint *cc,
628                      GLubyte input[N_TEXELS][MAX_COMP])
629{
630   const GLint n_vect = 3; /* highest vector number in each microtile */
631   const GLint n_comp = 4; /* 4 components: R, G, B, A */
632   GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
633   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
634   GLint i, j, k;
635   Fx64 hi; /* high quadword */
636   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
637
638   GLint minSum;
639   GLint maxSum;
640   GLint minColL = 0, maxColL = 0;
641   GLint minColR = 0, maxColR = 0;
642   GLint sumL = 0, sumR = 0;
643   GLint nn_comp;
644   /* Our solution here is to find the darkest and brightest colors in
645    * the 4x4 tile and use those as the two representative colors.
646    * There are probably better algorithms to use (histogram-based).
647    */
648   nn_comp = n_comp;
649   while ((minColL == maxColL) && nn_comp) {
650       minSum = 2000; /* big enough */
651       maxSum = -1; /* small enough */
652       for (k = 0; k < N_TEXELS / 2; k++) {
653           GLint sum = 0;
654           for (i = 0; i < nn_comp; i++) {
655               sum += input[k][i];
656           }
657           if (minSum > sum) {
658               minSum = sum;
659               minColL = k;
660           }
661           if (maxSum < sum) {
662               maxSum = sum;
663               maxColL = k;
664           }
665           sumL += sum;
666       }
667
668       nn_comp--;
669   }
670
671   nn_comp = n_comp;
672   while ((minColR == maxColR) && nn_comp) {
673       minSum = 2000; /* big enough */
674       maxSum = -1; /* small enough */
675       for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
676           GLint sum = 0;
677           for (i = 0; i < nn_comp; i++) {
678               sum += input[k][i];
679           }
680           if (minSum > sum) {
681               minSum = sum;
682               minColR = k;
683           }
684           if (maxSum < sum) {
685               maxSum = sum;
686               maxColR = k;
687           }
688           sumR += sum;
689       }
690
691       nn_comp--;
692   }
693
694   /* choose the common vector (yuck!) */
695   {
696      GLint j1, j2;
697      GLint v1 = 0, v2 = 0;
698      GLfloat err = 1e9; /* big enough */
699      GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
700      for (i = 0; i < n_comp; i++) {
701         tv[0][i] = input[minColL][i];
702         tv[1][i] = input[maxColL][i];
703         tv[2][i] = input[minColR][i];
704         tv[3][i] = input[maxColR][i];
705      }
706      for (j1 = 0; j1 < 2; j1++) {
707         for (j2 = 2; j2 < 4; j2++) {
708            GLfloat e = 0.0F;
709            for (i = 0; i < n_comp; i++) {
710               e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
711            }
712            if (e < err) {
713               err = e;
714               v1 = j1;
715               v2 = j2;
716            }
717         }
718      }
719      for (i = 0; i < n_comp; i++) {
720         vec[0][i] = tv[1 - v1][i];
721         vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
722         vec[2][i] = tv[5 - v2][i];
723      }
724   }
725
726   /* left microtile */
727   cc[0] = 0;
728   if (minColL != maxColL) {
729      /* compute interpolation vector */
730      MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
731
732      /* add in texels */
733      lolo = 0;
734      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
735         GLint texel;
736         /* interpolate color */
737         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
738         /* add in texel */
739         lolo <<= 2;
740         lolo |= texel;
741      }
742
743      cc[0] = lolo;
744   }
745
746   /* right microtile */
747   cc[1] = 0;
748   if (minColR != maxColR) {
749      /* compute interpolation vector */
750      MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
751
752      /* add in texels */
753      lohi = 0;
754      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
755         GLint texel;
756         /* interpolate color */
757         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
758         /* add in texel */
759         lohi <<= 2;
760         lohi |= texel;
761      }
762
763      cc[1] = lohi;
764   }
765
766   FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
767   for (j = n_vect - 1; j >= 0; j--) {
768      /* add in alphas */
769      FX64_SHL(hi, 5);
770      FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
771   }
772   for (j = n_vect - 1; j >= 0; j--) {
773      for (i = 0; i < n_comp - 1; i++) {
774         /* add in colors */
775         FX64_SHL(hi, 5);
776         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
777      }
778   }
779   ((Fx64 *)cc)[1] = hi;
780}
781
782
783static void
784fxt1_quantize_HI (GLuint *cc,
785                  GLubyte input[N_TEXELS][MAX_COMP],
786                  GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
787{
788   const GLint n_vect = 6; /* highest vector number */
789   const GLint n_comp = 3; /* 3 components: R, G, B */
790   GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
791   GLfloat iv[MAX_COMP];   /* interpolation vector */
792   GLint i, k;
793   GLuint hihi; /* high quadword: hi dword */
794
795   GLint minSum = 2000; /* big enough */
796   GLint maxSum = -1; /* small enough */
797   GLint minCol = 0; /* phoudoin: silent compiler! */
798   GLint maxCol = 0; /* phoudoin: silent compiler! */
799
800   /* Our solution here is to find the darkest and brightest colors in
801    * the 8x4 tile and use those as the two representative colors.
802    * There are probably better algorithms to use (histogram-based).
803    */
804   for (k = 0; k < n; k++) {
805      GLint sum = 0;
806      for (i = 0; i < n_comp; i++) {
807         sum += reord[k][i];
808      }
809      if (minSum > sum) {
810         minSum = sum;
811         minCol = k;
812      }
813      if (maxSum < sum) {
814         maxSum = sum;
815         maxCol = k;
816      }
817   }
818
819   hihi = 0; /* cc-hi = "00" */
820   for (i = 0; i < n_comp; i++) {
821      /* add in colors */
822      hihi <<= 5;
823      hihi |= reord[maxCol][i] >> 3;
824   }
825   for (i = 0; i < n_comp; i++) {
826      /* add in colors */
827      hihi <<= 5;
828      hihi |= reord[minCol][i] >> 3;
829   }
830   cc[3] = hihi;
831   cc[0] = cc[1] = cc[2] = 0;
832
833   /* compute interpolation vector */
834   if (minCol != maxCol) {
835      MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
836   }
837
838   /* add in texels */
839   for (k = N_TEXELS - 1; k >= 0; k--) {
840      GLint t = k * 3;
841      GLuint *kk = (GLuint *)((char *)cc + t / 8);
842      GLint texel = n_vect + 1; /* transparent black */
843
844      if (!ISTBLACK(input[k])) {
845         if (minCol != maxCol) {
846            /* interpolate color */
847            CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
848            /* add in texel */
849            kk[0] |= texel << (t & 7);
850         }
851      } else {
852         /* add in texel */
853         kk[0] |= texel << (t & 7);
854      }
855   }
856}
857
858
859static void
860fxt1_quantize_MIXED1 (GLuint *cc,
861                      GLubyte input[N_TEXELS][MAX_COMP])
862{
863   const GLint n_vect = 2; /* highest vector number in each microtile */
864   const GLint n_comp = 3; /* 3 components: R, G, B */
865   GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
866   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
867   GLint i, j, k;
868   Fx64 hi; /* high quadword */
869   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
870
871   GLint minSum;
872   GLint maxSum;
873   GLint minColL = 0, maxColL = -1;
874   GLint minColR = 0, maxColR = -1;
875
876   /* Our solution here is to find the darkest and brightest colors in
877    * the 4x4 tile and use those as the two representative colors.
878    * There are probably better algorithms to use (histogram-based).
879    */
880   minSum = 2000; /* big enough */
881   maxSum = -1; /* small enough */
882   for (k = 0; k < N_TEXELS / 2; k++) {
883      if (!ISTBLACK(input[k])) {
884         GLint sum = 0;
885         for (i = 0; i < n_comp; i++) {
886            sum += input[k][i];
887         }
888         if (minSum > sum) {
889            minSum = sum;
890            minColL = k;
891         }
892         if (maxSum < sum) {
893            maxSum = sum;
894            maxColL = k;
895         }
896      }
897   }
898   minSum = 2000; /* big enough */
899   maxSum = -1; /* small enough */
900   for (; k < N_TEXELS; k++) {
901      if (!ISTBLACK(input[k])) {
902         GLint sum = 0;
903         for (i = 0; i < n_comp; i++) {
904            sum += input[k][i];
905         }
906         if (minSum > sum) {
907            minSum = sum;
908            minColR = k;
909         }
910         if (maxSum < sum) {
911            maxSum = sum;
912            maxColR = k;
913         }
914      }
915   }
916
917   /* left microtile */
918   if (maxColL == -1) {
919      /* all transparent black */
920      cc[0] = ~0u;
921      for (i = 0; i < n_comp; i++) {
922         vec[0][i] = 0;
923         vec[1][i] = 0;
924      }
925   } else {
926      cc[0] = 0;
927      for (i = 0; i < n_comp; i++) {
928         vec[0][i] = input[minColL][i];
929         vec[1][i] = input[maxColL][i];
930      }
931      if (minColL != maxColL) {
932         /* compute interpolation vector */
933         MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
934
935         /* add in texels */
936         lolo = 0;
937         for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
938            GLint texel = n_vect + 1; /* transparent black */
939            if (!ISTBLACK(input[k])) {
940               /* interpolate color */
941               CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
942            }
943            /* add in texel */
944            lolo <<= 2;
945            lolo |= texel;
946         }
947         cc[0] = lolo;
948      }
949   }
950
951   /* right microtile */
952   if (maxColR == -1) {
953      /* all transparent black */
954      cc[1] = ~0u;
955      for (i = 0; i < n_comp; i++) {
956         vec[2][i] = 0;
957         vec[3][i] = 0;
958      }
959   } else {
960      cc[1] = 0;
961      for (i = 0; i < n_comp; i++) {
962         vec[2][i] = input[minColR][i];
963         vec[3][i] = input[maxColR][i];
964      }
965      if (minColR != maxColR) {
966         /* compute interpolation vector */
967         MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
968
969         /* add in texels */
970         lohi = 0;
971         for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
972            GLint texel = n_vect + 1; /* transparent black */
973            if (!ISTBLACK(input[k])) {
974               /* interpolate color */
975               CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
976            }
977            /* add in texel */
978            lohi <<= 2;
979            lohi |= texel;
980         }
981         cc[1] = lohi;
982      }
983   }
984
985   FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
986   for (j = 2 * 2 - 1; j >= 0; j--) {
987      for (i = 0; i < n_comp; i++) {
988         /* add in colors */
989         FX64_SHL(hi, 5);
990         FX64_OR32(hi, vec[j][i] >> 3);
991      }
992   }
993   ((Fx64 *)cc)[1] = hi;
994}
995
996
997static void
998fxt1_quantize_MIXED0 (GLuint *cc,
999                      GLubyte input[N_TEXELS][MAX_COMP])
1000{
1001   const GLint n_vect = 3; /* highest vector number in each microtile */
1002   const GLint n_comp = 3; /* 3 components: R, G, B */
1003   GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1004   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1005   GLint i, j, k;
1006   Fx64 hi; /* high quadword */
1007   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1008
1009   GLint minColL = 0, maxColL = 0;
1010   GLint minColR = 0, maxColR = 0;
1011#if 0
1012   GLint minSum;
1013   GLint maxSum;
1014
1015   /* Our solution here is to find the darkest and brightest colors in
1016    * the 4x4 tile and use those as the two representative colors.
1017    * There are probably better algorithms to use (histogram-based).
1018    */
1019   minSum = 2000; /* big enough */
1020   maxSum = -1; /* small enough */
1021   for (k = 0; k < N_TEXELS / 2; k++) {
1022      GLint sum = 0;
1023      for (i = 0; i < n_comp; i++) {
1024         sum += input[k][i];
1025      }
1026      if (minSum > sum) {
1027         minSum = sum;
1028         minColL = k;
1029      }
1030      if (maxSum < sum) {
1031         maxSum = sum;
1032         maxColL = k;
1033      }
1034   }
1035   minSum = 2000; /* big enough */
1036   maxSum = -1; /* small enough */
1037   for (; k < N_TEXELS; k++) {
1038      GLint sum = 0;
1039      for (i = 0; i < n_comp; i++) {
1040         sum += input[k][i];
1041      }
1042      if (minSum > sum) {
1043         minSum = sum;
1044         minColR = k;
1045      }
1046      if (maxSum < sum) {
1047         maxSum = sum;
1048         maxColR = k;
1049      }
1050   }
1051#else
1052   GLint minVal;
1053   GLint maxVal;
1054   GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1055   GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1056
1057   /* Scan the channel with max variance for lo & hi
1058    * and use those as the two representative colors.
1059    */
1060   minVal = 2000; /* big enough */
1061   maxVal = -1; /* small enough */
1062   for (k = 0; k < N_TEXELS / 2; k++) {
1063      GLint t = input[k][maxVarL];
1064      if (minVal > t) {
1065         minVal = t;
1066         minColL = k;
1067      }
1068      if (maxVal < t) {
1069         maxVal = t;
1070         maxColL = k;
1071      }
1072   }
1073   minVal = 2000; /* big enough */
1074   maxVal = -1; /* small enough */
1075   for (; k < N_TEXELS; k++) {
1076      GLint t = input[k][maxVarR];
1077      if (minVal > t) {
1078         minVal = t;
1079         minColR = k;
1080      }
1081      if (maxVal < t) {
1082         maxVal = t;
1083         maxColR = k;
1084      }
1085   }
1086#endif
1087
1088   /* left microtile */
1089   cc[0] = 0;
1090   for (i = 0; i < n_comp; i++) {
1091      vec[0][i] = input[minColL][i];
1092      vec[1][i] = input[maxColL][i];
1093   }
1094   if (minColL != maxColL) {
1095      /* compute interpolation vector */
1096      MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1097
1098      /* add in texels */
1099      lolo = 0;
1100      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1101         GLint texel;
1102         /* interpolate color */
1103         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1104         /* add in texel */
1105         lolo <<= 2;
1106         lolo |= texel;
1107      }
1108
1109      /* funky encoding for LSB of green */
1110      if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1111         for (i = 0; i < n_comp; i++) {
1112            vec[1][i] = input[minColL][i];
1113            vec[0][i] = input[maxColL][i];
1114         }
1115         lolo = ~lolo;
1116      }
1117
1118      cc[0] = lolo;
1119   }
1120
1121   /* right microtile */
1122   cc[1] = 0;
1123   for (i = 0; i < n_comp; i++) {
1124      vec[2][i] = input[minColR][i];
1125      vec[3][i] = input[maxColR][i];
1126   }
1127   if (minColR != maxColR) {
1128      /* compute interpolation vector */
1129      MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1130
1131      /* add in texels */
1132      lohi = 0;
1133      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1134         GLint texel;
1135         /* interpolate color */
1136         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1137         /* add in texel */
1138         lohi <<= 2;
1139         lohi |= texel;
1140      }
1141
1142      /* funky encoding for LSB of green */
1143      if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1144         for (i = 0; i < n_comp; i++) {
1145            vec[3][i] = input[minColR][i];
1146            vec[2][i] = input[maxColR][i];
1147         }
1148         lohi = ~lohi;
1149      }
1150
1151      cc[1] = lohi;
1152   }
1153
1154   FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1155   for (j = 2 * 2 - 1; j >= 0; j--) {
1156      for (i = 0; i < n_comp; i++) {
1157         /* add in colors */
1158         FX64_SHL(hi, 5);
1159         FX64_OR32(hi, vec[j][i] >> 3);
1160      }
1161   }
1162   ((Fx64 *)cc)[1] = hi;
1163}
1164
1165
1166static void
1167fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1168{
1169   GLint trualpha;
1170   GLubyte reord[N_TEXELS][MAX_COMP];
1171
1172   GLubyte input[N_TEXELS][MAX_COMP];
1173   GLint i, k, l;
1174
1175   if (comps == 3) {
1176      /* make the whole block opaque */
1177      memset(input, -1, sizeof(input));
1178   }
1179
1180   /* 8 texels each line */
1181   for (l = 0; l < 4; l++) {
1182      for (k = 0; k < 4; k++) {
1183         for (i = 0; i < comps; i++) {
1184            input[k + l * 4][i] = *lines[l]++;
1185         }
1186      }
1187      for (; k < 8; k++) {
1188         for (i = 0; i < comps; i++) {
1189            input[k + l * 4 + 12][i] = *lines[l]++;
1190         }
1191      }
1192   }
1193
1194   /* block layout:
1195    * 00, 01, 02, 03, 08, 09, 0a, 0b
1196    * 10, 11, 12, 13, 18, 19, 1a, 1b
1197    * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1198    * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1199    */
1200
1201   /* [dBorca]
1202    * stupidity flows forth from this
1203    */
1204   l = N_TEXELS;
1205   trualpha = 0;
1206   if (comps == 4) {
1207      /* skip all transparent black texels */
1208      l = 0;
1209      for (k = 0; k < N_TEXELS; k++) {
1210         /* test all components against 0 */
1211         if (!ISTBLACK(input[k])) {
1212            /* texel is not transparent black */
1213            COPY_4UBV(reord[l], input[k]);
1214            if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1215               /* non-opaque texel */
1216               trualpha = !0;
1217            }
1218            l++;
1219         }
1220      }
1221   }
1222
1223#if 0
1224   if (trualpha) {
1225      fxt1_quantize_ALPHA0(cc, input, reord, l);
1226   } else if (l == 0) {
1227      cc[0] = cc[1] = cc[2] = -1;
1228      cc[3] = 0;
1229   } else if (l < N_TEXELS) {
1230      fxt1_quantize_HI(cc, input, reord, l);
1231   } else {
1232      fxt1_quantize_CHROMA(cc, input);
1233   }
1234   (void)fxt1_quantize_ALPHA1;
1235   (void)fxt1_quantize_MIXED1;
1236   (void)fxt1_quantize_MIXED0;
1237#else
1238   if (trualpha) {
1239      fxt1_quantize_ALPHA1(cc, input);
1240   } else if (l == 0) {
1241      cc[0] = cc[1] = cc[2] = ~0u;
1242      cc[3] = 0;
1243   } else if (l < N_TEXELS) {
1244      fxt1_quantize_MIXED1(cc, input);
1245   } else {
1246      fxt1_quantize_MIXED0(cc, input);
1247   }
1248   (void)fxt1_quantize_ALPHA0;
1249   (void)fxt1_quantize_HI;
1250   (void)fxt1_quantize_CHROMA;
1251#endif
1252}
1253
1254
1255
1256/**
1257 * Upscale an image by replication, not (typical) stretching.
1258 * We use this when the image width or height is less than a
1259 * certain size (4, 8) and we need to upscale an image.
1260 */
1261static void
1262upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1263                   GLsizei outWidth, GLsizei outHeight,
1264                   GLint comps, const GLubyte *src, GLint srcRowStride,
1265                   GLubyte *dest )
1266{
1267   GLint i, j, k;
1268
1269   assert(outWidth >= inWidth);
1270   assert(outHeight >= inHeight);
1271#if 0
1272   assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1273   assert((outWidth & 3) == 0);
1274   assert((outHeight & 3) == 0);
1275#endif
1276
1277   for (i = 0; i < outHeight; i++) {
1278      const GLint ii = i % inHeight;
1279      for (j = 0; j < outWidth; j++) {
1280         const GLint jj = j % inWidth;
1281         for (k = 0; k < comps; k++) {
1282            dest[(i * outWidth + j) * comps + k]
1283               = src[ii * srcRowStride + jj * comps + k];
1284         }
1285      }
1286   }
1287}
1288
1289
1290static void
1291fxt1_encode (GLuint width, GLuint height, GLint comps,
1292             const void *source, GLint srcRowStride,
1293             void *dest, GLint destRowStride)
1294{
1295   GLuint x, y;
1296   const GLubyte *data;
1297   GLuint *encoded = (GLuint *)dest;
1298   void *newSource = NULL;
1299
1300   assert(comps == 3 || comps == 4);
1301
1302   /* Replicate image if width is not M8 or height is not M4 */
1303   if ((width & 7) | (height & 3)) {
1304      GLint newWidth = (width + 7) & ~7;
1305      GLint newHeight = (height + 3) & ~3;
1306      newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1307      if (!newSource) {
1308         GET_CURRENT_CONTEXT(ctx);
1309         _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1310         goto cleanUp;
1311      }
1312      upscale_teximage2d(width, height, newWidth, newHeight,
1313                         comps, (const GLubyte *) source,
1314                         srcRowStride, (GLubyte *) newSource);
1315      source = newSource;
1316      width = newWidth;
1317      height = newHeight;
1318      srcRowStride = comps * newWidth;
1319   }
1320
1321   data = (const GLubyte *) source;
1322   destRowStride = (destRowStride - width * 2) / 4;
1323   for (y = 0; y < height; y += 4) {
1324      GLuint offs = 0 + (y + 0) * srcRowStride;
1325      for (x = 0; x < width; x += 8) {
1326         const GLubyte *lines[4];
1327         lines[0] = &data[offs];
1328         lines[1] = lines[0] + srcRowStride;
1329         lines[2] = lines[1] + srcRowStride;
1330         lines[3] = lines[2] + srcRowStride;
1331         offs += 8 * comps;
1332         fxt1_quantize(encoded, lines, comps);
1333         /* 128 bits per 8x4 block */
1334         encoded += 4;
1335      }
1336      encoded += destRowStride;
1337   }
1338
1339 cleanUp:
1340   free(newSource);
1341}
1342
1343
1344/***************************************************************************\
1345 * FXT1 decoder
1346 *
1347 * The decoder is based on GL_3DFX_texture_compression_FXT1
1348 * specification and serves as a concept for the encoder.
1349\***************************************************************************/
1350
1351
1352/* lookup table for scaling 5 bit colors up to 8 bits */
1353static const GLubyte _rgb_scale_5[] = {
1354   0,   8,   16,  25,  33,  41,  49,  58,
1355   66,  74,  82,  90,  99,  107, 115, 123,
1356   132, 140, 148, 156, 165, 173, 181, 189,
1357   197, 206, 214, 222, 230, 239, 247, 255
1358};
1359
1360/* lookup table for scaling 6 bit colors up to 8 bits */
1361static const GLubyte _rgb_scale_6[] = {
1362   0,   4,   8,   12,  16,  20,  24,  28,
1363   32,  36,  40,  45,  49,  53,  57,  61,
1364   65,  69,  73,  77,  81,  85,  89,  93,
1365   97,  101, 105, 109, 113, 117, 121, 125,
1366   130, 134, 138, 142, 146, 150, 154, 158,
1367   162, 166, 170, 174, 178, 182, 186, 190,
1368   194, 198, 202, 206, 210, 215, 219, 223,
1369   227, 231, 235, 239, 243, 247, 251, 255
1370};
1371
1372
1373#define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1374#define UP5(c) _rgb_scale_5[(c) & 31]
1375#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1376#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1377
1378
1379static void
1380fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1381{
1382   const GLuint *cc;
1383
1384   t *= 3;
1385   cc = (const GLuint *)(code + t / 8);
1386   t = (cc[0] >> (t & 7)) & 7;
1387
1388   if (t == 7) {
1389      rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1390   } else {
1391      GLubyte r, g, b;
1392      cc = (const GLuint *)(code + 12);
1393      if (t == 0) {
1394         b = UP5(CC_SEL(cc, 0));
1395         g = UP5(CC_SEL(cc, 5));
1396         r = UP5(CC_SEL(cc, 10));
1397      } else if (t == 6) {
1398         b = UP5(CC_SEL(cc, 15));
1399         g = UP5(CC_SEL(cc, 20));
1400         r = UP5(CC_SEL(cc, 25));
1401      } else {
1402         b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1403         g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1404         r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1405      }
1406      rgba[RCOMP] = r;
1407      rgba[GCOMP] = g;
1408      rgba[BCOMP] = b;
1409      rgba[ACOMP] = 255;
1410   }
1411}
1412
1413
1414static void
1415fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1416{
1417   const GLuint *cc;
1418   GLuint kk;
1419
1420   cc = (const GLuint *)code;
1421   if (t & 16) {
1422      cc++;
1423      t &= 15;
1424   }
1425   t = (cc[0] >> (t * 2)) & 3;
1426
1427   t *= 15;
1428   cc = (const GLuint *)(code + 8 + t / 8);
1429   kk = cc[0] >> (t & 7);
1430   rgba[BCOMP] = UP5(kk);
1431   rgba[GCOMP] = UP5(kk >> 5);
1432   rgba[RCOMP] = UP5(kk >> 10);
1433   rgba[ACOMP] = 255;
1434}
1435
1436
1437static void
1438fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1439{
1440   const GLuint *cc;
1441   GLuint col[2][3];
1442   GLint glsb, selb;
1443
1444   cc = (const GLuint *)code;
1445   if (t & 16) {
1446      t &= 15;
1447      t = (cc[1] >> (t * 2)) & 3;
1448      /* col 2 */
1449      col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1450      col[0][GCOMP] = CC_SEL(cc, 99);
1451      col[0][RCOMP] = CC_SEL(cc, 104);
1452      /* col 3 */
1453      col[1][BCOMP] = CC_SEL(cc, 109);
1454      col[1][GCOMP] = CC_SEL(cc, 114);
1455      col[1][RCOMP] = CC_SEL(cc, 119);
1456      glsb = CC_SEL(cc, 126);
1457      selb = CC_SEL(cc, 33);
1458   } else {
1459      t = (cc[0] >> (t * 2)) & 3;
1460      /* col 0 */
1461      col[0][BCOMP] = CC_SEL(cc, 64);
1462      col[0][GCOMP] = CC_SEL(cc, 69);
1463      col[0][RCOMP] = CC_SEL(cc, 74);
1464      /* col 1 */
1465      col[1][BCOMP] = CC_SEL(cc, 79);
1466      col[1][GCOMP] = CC_SEL(cc, 84);
1467      col[1][RCOMP] = CC_SEL(cc, 89);
1468      glsb = CC_SEL(cc, 125);
1469      selb = CC_SEL(cc, 1);
1470   }
1471
1472   if (CC_SEL(cc, 124) & 1) {
1473      /* alpha[0] == 1 */
1474
1475      if (t == 3) {
1476         /* zero */
1477         rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1478      } else {
1479         GLubyte r, g, b;
1480         if (t == 0) {
1481            b = UP5(col[0][BCOMP]);
1482            g = UP5(col[0][GCOMP]);
1483            r = UP5(col[0][RCOMP]);
1484         } else if (t == 2) {
1485            b = UP5(col[1][BCOMP]);
1486            g = UP6(col[1][GCOMP], glsb);
1487            r = UP5(col[1][RCOMP]);
1488         } else {
1489            b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1490            g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1491            r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1492         }
1493         rgba[RCOMP] = r;
1494         rgba[GCOMP] = g;
1495         rgba[BCOMP] = b;
1496         rgba[ACOMP] = 255;
1497      }
1498   } else {
1499      /* alpha[0] == 0 */
1500      GLubyte r, g, b;
1501      if (t == 0) {
1502         b = UP5(col[0][BCOMP]);
1503         g = UP6(col[0][GCOMP], glsb ^ selb);
1504         r = UP5(col[0][RCOMP]);
1505      } else if (t == 3) {
1506         b = UP5(col[1][BCOMP]);
1507         g = UP6(col[1][GCOMP], glsb);
1508         r = UP5(col[1][RCOMP]);
1509      } else {
1510         b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1511         g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1512                        UP6(col[1][GCOMP], glsb));
1513         r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1514      }
1515      rgba[RCOMP] = r;
1516      rgba[GCOMP] = g;
1517      rgba[BCOMP] = b;
1518      rgba[ACOMP] = 255;
1519   }
1520}
1521
1522
1523static void
1524fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1525{
1526   const GLuint *cc;
1527   GLubyte r, g, b, a;
1528
1529   cc = (const GLuint *)code;
1530   if (CC_SEL(cc, 124) & 1) {
1531      /* lerp == 1 */
1532      GLuint col0[4];
1533
1534      if (t & 16) {
1535         t &= 15;
1536         t = (cc[1] >> (t * 2)) & 3;
1537         /* col 2 */
1538         col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1539         col0[GCOMP] = CC_SEL(cc, 99);
1540         col0[RCOMP] = CC_SEL(cc, 104);
1541         col0[ACOMP] = CC_SEL(cc, 119);
1542      } else {
1543         t = (cc[0] >> (t * 2)) & 3;
1544         /* col 0 */
1545         col0[BCOMP] = CC_SEL(cc, 64);
1546         col0[GCOMP] = CC_SEL(cc, 69);
1547         col0[RCOMP] = CC_SEL(cc, 74);
1548         col0[ACOMP] = CC_SEL(cc, 109);
1549      }
1550
1551      if (t == 0) {
1552         b = UP5(col0[BCOMP]);
1553         g = UP5(col0[GCOMP]);
1554         r = UP5(col0[RCOMP]);
1555         a = UP5(col0[ACOMP]);
1556      } else if (t == 3) {
1557         b = UP5(CC_SEL(cc, 79));
1558         g = UP5(CC_SEL(cc, 84));
1559         r = UP5(CC_SEL(cc, 89));
1560         a = UP5(CC_SEL(cc, 114));
1561      } else {
1562         b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1563         g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1564         r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1565         a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1566      }
1567   } else {
1568      /* lerp == 0 */
1569
1570      if (t & 16) {
1571         cc++;
1572         t &= 15;
1573      }
1574      t = (cc[0] >> (t * 2)) & 3;
1575
1576      if (t == 3) {
1577         /* zero */
1578         r = g = b = a = 0;
1579      } else {
1580         GLuint kk;
1581         cc = (const GLuint *)code;
1582         a = UP5(cc[3] >> (t * 5 + 13));
1583         t *= 15;
1584         cc = (const GLuint *)(code + 8 + t / 8);
1585         kk = cc[0] >> (t & 7);
1586         b = UP5(kk);
1587         g = UP5(kk >> 5);
1588         r = UP5(kk >> 10);
1589      }
1590   }
1591   rgba[RCOMP] = r;
1592   rgba[GCOMP] = g;
1593   rgba[BCOMP] = b;
1594   rgba[ACOMP] = a;
1595}
1596
1597
1598static void
1599fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1600               GLint i, GLint j, GLubyte *rgba)
1601{
1602   static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1603      fxt1_decode_1HI,     /* cc-high   = "00?" */
1604      fxt1_decode_1HI,     /* cc-high   = "00?" */
1605      fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1606      fxt1_decode_1ALPHA,  /* alpha     = "011" */
1607      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1608      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1609      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1610      fxt1_decode_1MIXED   /* mixed     = "1??" */
1611   };
1612
1613   const GLubyte *code = (const GLubyte *)texture +
1614                         ((j / 4) * (stride / 8) + (i / 8)) * 16;
1615   GLint mode = CC_SEL(code, 125);
1616   GLint t = i & 7;
1617
1618   if (t & 4) {
1619      t += 12;
1620   }
1621   t += (j & 3) * 4;
1622
1623   decode_1[mode](code, t, rgba);
1624}
1625
1626
1627
1628
1629static void
1630fetch_rgb_fxt1(const GLubyte *map,
1631               GLint rowStride, GLint i, GLint j, GLfloat *texel)
1632{
1633   GLubyte rgba[4];
1634   fxt1_decode_1(map, rowStride, i, j, rgba);
1635   texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1636   texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1637   texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1638   texel[ACOMP] = 1.0F;
1639}
1640
1641
1642static void
1643fetch_rgba_fxt1(const GLubyte *map,
1644                GLint rowStride, GLint i, GLint j, GLfloat *texel)
1645{
1646   GLubyte rgba[4];
1647   fxt1_decode_1(map, rowStride, i, j, rgba);
1648   texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1649   texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1650   texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1651   texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1652}
1653
1654
1655compressed_fetch_func
1656_mesa_get_fxt_fetch_func(mesa_format format)
1657{
1658   switch (format) {
1659   case MESA_FORMAT_RGB_FXT1:
1660      return fetch_rgb_fxt1;
1661   case MESA_FORMAT_RGBA_FXT1:
1662      return fetch_rgba_fxt1;
1663   default:
1664      return NULL;
1665   }
1666}
1667