1848b8605Smrg/*
2848b8605Smrg * Mesa 3-D graphics library
3848b8605Smrg *
4848b8605Smrg * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
5848b8605Smrg *
6848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
7848b8605Smrg * copy of this software and associated documentation files (the "Software"),
8848b8605Smrg * to deal in the Software without restriction, including without limitation
9848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the
11848b8605Smrg * Software is furnished to do so, subject to the following conditions:
12848b8605Smrg *
13848b8605Smrg * The above copyright notice and this permission notice shall be included
14848b8605Smrg * in all copies or substantial portions of the Software.
15848b8605Smrg *
16848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20848b8605Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21848b8605Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22848b8605Smrg * OTHER DEALINGS IN THE SOFTWARE.
23848b8605Smrg */
24848b8605Smrg
25848b8605Smrg
26848b8605Smrg/**
27848b8605Smrg * \file texcompress_fxt1.c
28848b8605Smrg * GL_3DFX_texture_compression_FXT1 support.
29848b8605Smrg */
30848b8605Smrg
31848b8605Smrg
32b8e80941Smrg#include "errors.h"
33848b8605Smrg#include "glheader.h"
34848b8605Smrg#include "imports.h"
35848b8605Smrg#include "image.h"
36848b8605Smrg#include "macros.h"
37848b8605Smrg#include "mipmap.h"
38848b8605Smrg#include "texcompress.h"
39848b8605Smrg#include "texcompress_fxt1.h"
40848b8605Smrg#include "texstore.h"
41b8e80941Smrg#include "mtypes.h"
42848b8605Smrg
43848b8605Smrg
44848b8605Smrgstatic void
45848b8605Smrgfxt1_encode (GLuint width, GLuint height, GLint comps,
46848b8605Smrg             const void *source, GLint srcRowStride,
47848b8605Smrg             void *dest, GLint destRowStride);
48848b8605Smrg
49848b8605Smrgstatic void
50848b8605Smrgfxt1_decode_1 (const void *texture, GLint stride,
51848b8605Smrg               GLint i, GLint j, GLubyte *rgba);
52848b8605Smrg
53848b8605Smrg
54848b8605Smrg/**
55848b8605Smrg * Store user's image in rgb_fxt1 format.
56848b8605Smrg */
57848b8605SmrgGLboolean
58848b8605Smrg_mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
59848b8605Smrg{
60848b8605Smrg   const GLubyte *pixels;
61848b8605Smrg   GLint srcRowStride;
62848b8605Smrg   GLubyte *dst;
63848b8605Smrg   const GLubyte *tempImage = NULL;
64848b8605Smrg
65b8e80941Smrg   assert(dstFormat == MESA_FORMAT_RGB_FXT1);
66848b8605Smrg
67848b8605Smrg   if (srcFormat != GL_RGB ||
68848b8605Smrg       srcType != GL_UNSIGNED_BYTE ||
69848b8605Smrg       ctx->_ImageTransferState ||
70b8e80941Smrg       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
71848b8605Smrg       srcPacking->SwapBytes) {
72848b8605Smrg      /* convert image to RGB/GLubyte */
73b8e80941Smrg      GLubyte *tempImageSlices[1];
74b8e80941Smrg      int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
75b8e80941Smrg      tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
76848b8605Smrg      if (!tempImage)
77848b8605Smrg         return GL_FALSE; /* out of memory */
78b8e80941Smrg      tempImageSlices[0] = (GLubyte *) tempImage;
79b8e80941Smrg      _mesa_texstore(ctx, dims,
80b8e80941Smrg                     baseInternalFormat,
81b8e80941Smrg                     MESA_FORMAT_RGB_UNORM8,
82b8e80941Smrg                     rgbRowStride, tempImageSlices,
83b8e80941Smrg                     srcWidth, srcHeight, srcDepth,
84b8e80941Smrg                     srcFormat, srcType, srcAddr,
85b8e80941Smrg                     srcPacking);
86848b8605Smrg      pixels = tempImage;
87848b8605Smrg      srcRowStride = 3 * srcWidth;
88848b8605Smrg      srcFormat = GL_RGB;
89848b8605Smrg   }
90848b8605Smrg   else {
91848b8605Smrg      pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
92848b8605Smrg                                     srcFormat, srcType, 0, 0);
93848b8605Smrg
94848b8605Smrg      srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
95848b8605Smrg                                            srcType) / sizeof(GLubyte);
96848b8605Smrg   }
97848b8605Smrg
98848b8605Smrg   dst = dstSlices[0];
99848b8605Smrg
100848b8605Smrg   fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
101848b8605Smrg               dst, dstRowStride);
102848b8605Smrg
103848b8605Smrg   free((void*) tempImage);
104848b8605Smrg
105848b8605Smrg   return GL_TRUE;
106848b8605Smrg}
107848b8605Smrg
108848b8605Smrg
109848b8605Smrg/**
110848b8605Smrg * Store user's image in rgba_fxt1 format.
111848b8605Smrg */
112848b8605SmrgGLboolean
113848b8605Smrg_mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
114848b8605Smrg{
115848b8605Smrg   const GLubyte *pixels;
116848b8605Smrg   GLint srcRowStride;
117848b8605Smrg   GLubyte *dst;
118848b8605Smrg   const GLubyte *tempImage = NULL;
119848b8605Smrg
120b8e80941Smrg   assert(dstFormat == MESA_FORMAT_RGBA_FXT1);
121848b8605Smrg
122848b8605Smrg   if (srcFormat != GL_RGBA ||
123848b8605Smrg       srcType != GL_UNSIGNED_BYTE ||
124848b8605Smrg       ctx->_ImageTransferState ||
125848b8605Smrg       srcPacking->SwapBytes) {
126848b8605Smrg      /* convert image to RGBA/GLubyte */
127b8e80941Smrg      GLubyte *tempImageSlices[1];
128b8e80941Smrg      int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
129b8e80941Smrg      tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
130848b8605Smrg      if (!tempImage)
131848b8605Smrg         return GL_FALSE; /* out of memory */
132b8e80941Smrg      tempImageSlices[0] = (GLubyte *) tempImage;
133b8e80941Smrg      _mesa_texstore(ctx, dims,
134b8e80941Smrg                     baseInternalFormat,
135b8e80941Smrg                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
136b8e80941Smrg                                           : MESA_FORMAT_A8B8G8R8_UNORM,
137b8e80941Smrg                     rgbaRowStride, tempImageSlices,
138b8e80941Smrg                     srcWidth, srcHeight, srcDepth,
139b8e80941Smrg                     srcFormat, srcType, srcAddr,
140b8e80941Smrg                     srcPacking);
141848b8605Smrg      pixels = tempImage;
142848b8605Smrg      srcRowStride = 4 * srcWidth;
143848b8605Smrg      srcFormat = GL_RGBA;
144848b8605Smrg   }
145848b8605Smrg   else {
146848b8605Smrg      pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
147848b8605Smrg                                     srcFormat, srcType, 0, 0);
148848b8605Smrg
149848b8605Smrg      srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
150848b8605Smrg                                            srcType) / sizeof(GLubyte);
151848b8605Smrg   }
152848b8605Smrg
153848b8605Smrg   dst = dstSlices[0];
154848b8605Smrg
155848b8605Smrg   fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
156848b8605Smrg               dst, dstRowStride);
157848b8605Smrg
158848b8605Smrg   free((void*) tempImage);
159848b8605Smrg
160848b8605Smrg   return GL_TRUE;
161848b8605Smrg}
162848b8605Smrg
163848b8605Smrg
164848b8605Smrg/***************************************************************************\
165848b8605Smrg * FXT1 encoder
166848b8605Smrg *
167848b8605Smrg * The encoder was built by reversing the decoder,
168848b8605Smrg * and is vaguely based on Texus2 by 3dfx. Note that this code
169848b8605Smrg * is merely a proof of concept, since it is highly UNoptimized;
170848b8605Smrg * moreover, it is sub-optimal due to initial conditions passed
171848b8605Smrg * to Lloyd's algorithm (the interpolation modes are even worse).
172848b8605Smrg\***************************************************************************/
173848b8605Smrg
174848b8605Smrg
175848b8605Smrg#define MAX_COMP 4 /* ever needed maximum number of components in texel */
176848b8605Smrg#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
177848b8605Smrg#define N_TEXELS 32 /* number of texels in a block (always 32) */
178848b8605Smrg#define LL_N_REP 50 /* number of iterations in lloyd's vq */
179848b8605Smrg#define LL_RMS_D 10 /* fault tolerance (maximum delta) */
180848b8605Smrg#define LL_RMS_E 255 /* fault tolerance (maximum error) */
181848b8605Smrg#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
182b8e80941Smrgstatic const GLuint zero = 0;
183b8e80941Smrg#define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0)
184848b8605Smrg
185848b8605Smrg/*
186848b8605Smrg * Define a 64-bit unsigned integer type and macros
187848b8605Smrg */
188848b8605Smrg#if 1
189848b8605Smrg
190848b8605Smrg#define FX64_NATIVE 1
191848b8605Smrg
192848b8605Smrgtypedef uint64_t Fx64;
193848b8605Smrg
194848b8605Smrg#define FX64_MOV32(a, b) a = b
195848b8605Smrg#define FX64_OR32(a, b)  a |= b
196848b8605Smrg#define FX64_SHL(a, c)   a <<= c
197848b8605Smrg
198848b8605Smrg#else
199848b8605Smrg
200848b8605Smrg#define FX64_NATIVE 0
201848b8605Smrg
202848b8605Smrgtypedef struct {
203848b8605Smrg   GLuint lo, hi;
204848b8605Smrg} Fx64;
205848b8605Smrg
206848b8605Smrg#define FX64_MOV32(a, b) a.lo = b
207848b8605Smrg#define FX64_OR32(a, b)  a.lo |= b
208848b8605Smrg
209848b8605Smrg#define FX64_SHL(a, c)                                 \
210848b8605Smrg   do {                                                \
211848b8605Smrg       if ((c) >= 32) {                                \
212848b8605Smrg          a.hi = a.lo << ((c) - 32);                   \
213848b8605Smrg          a.lo = 0;                                    \
214848b8605Smrg       } else {                                        \
215848b8605Smrg          a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
216848b8605Smrg          a.lo <<= (c);                                \
217848b8605Smrg       }                                               \
218848b8605Smrg   } while (0)
219848b8605Smrg
220848b8605Smrg#endif
221848b8605Smrg
222848b8605Smrg
223848b8605Smrg#define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
224848b8605Smrg#define SAFECDOT 1 /* for paranoids */
225848b8605Smrg
226848b8605Smrg#define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
227848b8605Smrg   do {                                  \
228848b8605Smrg      /* compute interpolation vector */ \
229848b8605Smrg      GLfloat d2 = 0.0F;                 \
230848b8605Smrg      GLfloat rd2;                       \
231848b8605Smrg                                         \
232848b8605Smrg      for (i = 0; i < NC; i++) {         \
233848b8605Smrg         IV[i] = (V1[i] - V0[i]) * F(i); \
234848b8605Smrg         d2 += IV[i] * IV[i];            \
235848b8605Smrg      }                                  \
236848b8605Smrg      rd2 = (GLfloat)NV / d2;            \
237848b8605Smrg      B = 0;                             \
238848b8605Smrg      for (i = 0; i < NC; i++) {         \
239848b8605Smrg         IV[i] *= F(i);                  \
240848b8605Smrg         B -= IV[i] * V0[i];             \
241848b8605Smrg         IV[i] *= rd2;                   \
242848b8605Smrg      }                                  \
243848b8605Smrg      B = B * rd2 + 0.5f;                \
244848b8605Smrg   } while (0)
245848b8605Smrg
246848b8605Smrg#define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
247848b8605Smrg   do {                                  \
248848b8605Smrg      GLfloat dot = 0.0F;                \
249848b8605Smrg      for (i = 0; i < NC; i++) {         \
250848b8605Smrg         dot += V[i] * IV[i];            \
251848b8605Smrg      }                                  \
252848b8605Smrg      TEXEL = (GLint)(dot + B);          \
253848b8605Smrg      if (SAFECDOT) {                    \
254848b8605Smrg         if (TEXEL < 0) {                \
255848b8605Smrg            TEXEL = 0;                   \
256848b8605Smrg         } else if (TEXEL > NV) {        \
257848b8605Smrg            TEXEL = NV;                  \
258848b8605Smrg         }                               \
259848b8605Smrg      }                                  \
260848b8605Smrg   } while (0)
261848b8605Smrg
262848b8605Smrg
263848b8605Smrgstatic GLint
264848b8605Smrgfxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
265848b8605Smrg              GLubyte input[MAX_COMP], GLint nc)
266848b8605Smrg{
267848b8605Smrg   GLint i, j, best = -1;
268848b8605Smrg   GLfloat err = 1e9; /* big enough */
269848b8605Smrg
270848b8605Smrg   for (j = 0; j < nv; j++) {
271848b8605Smrg      GLfloat e = 0.0F;
272848b8605Smrg      for (i = 0; i < nc; i++) {
273848b8605Smrg         e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
274848b8605Smrg      }
275848b8605Smrg      if (e < err) {
276848b8605Smrg         err = e;
277848b8605Smrg         best = j;
278848b8605Smrg      }
279848b8605Smrg   }
280848b8605Smrg
281848b8605Smrg   return best;
282848b8605Smrg}
283848b8605Smrg
284848b8605Smrg
285848b8605Smrgstatic GLint
286848b8605Smrgfxt1_worst (GLfloat vec[MAX_COMP],
287848b8605Smrg            GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
288848b8605Smrg{
289848b8605Smrg   GLint i, k, worst = -1;
290848b8605Smrg   GLfloat err = -1.0F; /* small enough */
291848b8605Smrg
292848b8605Smrg   for (k = 0; k < n; k++) {
293848b8605Smrg      GLfloat e = 0.0F;
294848b8605Smrg      for (i = 0; i < nc; i++) {
295848b8605Smrg         e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
296848b8605Smrg      }
297848b8605Smrg      if (e > err) {
298848b8605Smrg         err = e;
299848b8605Smrg         worst = k;
300848b8605Smrg      }
301848b8605Smrg   }
302848b8605Smrg
303848b8605Smrg   return worst;
304848b8605Smrg}
305848b8605Smrg
306848b8605Smrg
307848b8605Smrgstatic GLint
308848b8605Smrgfxt1_variance (GLdouble variance[MAX_COMP],
309848b8605Smrg               GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
310848b8605Smrg{
311848b8605Smrg   GLint i, k, best = 0;
312848b8605Smrg   GLint sx, sx2;
313848b8605Smrg   GLdouble var, maxvar = -1; /* small enough */
314848b8605Smrg   GLdouble teenth = 1.0 / n;
315848b8605Smrg
316848b8605Smrg   for (i = 0; i < nc; i++) {
317848b8605Smrg      sx = sx2 = 0;
318848b8605Smrg      for (k = 0; k < n; k++) {
319848b8605Smrg         GLint t = input[k][i];
320848b8605Smrg         sx += t;
321848b8605Smrg         sx2 += t * t;
322848b8605Smrg      }
323848b8605Smrg      var = sx2 * teenth - sx * sx * teenth * teenth;
324848b8605Smrg      if (maxvar < var) {
325848b8605Smrg         maxvar = var;
326848b8605Smrg         best = i;
327848b8605Smrg      }
328848b8605Smrg      if (variance) {
329848b8605Smrg         variance[i] = var;
330848b8605Smrg      }
331848b8605Smrg   }
332848b8605Smrg
333848b8605Smrg   return best;
334848b8605Smrg}
335848b8605Smrg
336848b8605Smrg
337848b8605Smrgstatic GLint
338848b8605Smrgfxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
339848b8605Smrg             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
340848b8605Smrg{
341848b8605Smrg#if 0
342848b8605Smrg   /* Choose colors from a grid.
343848b8605Smrg    */
344848b8605Smrg   GLint i, j;
345848b8605Smrg
346848b8605Smrg   for (j = 0; j < nv; j++) {
347848b8605Smrg      GLint m = j * (n - 1) / (nv - 1);
348848b8605Smrg      for (i = 0; i < nc; i++) {
349848b8605Smrg         vec[j][i] = input[m][i];
350848b8605Smrg      }
351848b8605Smrg   }
352848b8605Smrg#else
353848b8605Smrg   /* Our solution here is to find the darkest and brightest colors in
354848b8605Smrg    * the 8x4 tile and use those as the two representative colors.
355848b8605Smrg    * There are probably better algorithms to use (histogram-based).
356848b8605Smrg    */
357848b8605Smrg   GLint i, j, k;
358848b8605Smrg   GLint minSum = 2000; /* big enough */
359848b8605Smrg   GLint maxSum = -1; /* small enough */
360848b8605Smrg   GLint minCol = 0; /* phoudoin: silent compiler! */
361848b8605Smrg   GLint maxCol = 0; /* phoudoin: silent compiler! */
362848b8605Smrg
363848b8605Smrg   struct {
364848b8605Smrg      GLint flag;
365848b8605Smrg      GLint key;
366848b8605Smrg      GLint freq;
367848b8605Smrg      GLint idx;
368848b8605Smrg   } hist[N_TEXELS];
369848b8605Smrg   GLint lenh = 0;
370848b8605Smrg
371848b8605Smrg   memset(hist, 0, sizeof(hist));
372848b8605Smrg
373848b8605Smrg   for (k = 0; k < n; k++) {
374848b8605Smrg      GLint l;
375848b8605Smrg      GLint key = 0;
376848b8605Smrg      GLint sum = 0;
377848b8605Smrg      for (i = 0; i < nc; i++) {
378848b8605Smrg         key <<= 8;
379848b8605Smrg         key |= input[k][i];
380848b8605Smrg         sum += input[k][i];
381848b8605Smrg      }
382848b8605Smrg      for (l = 0; l < n; l++) {
383848b8605Smrg         if (!hist[l].flag) {
384848b8605Smrg            /* alloc new slot */
385848b8605Smrg            hist[l].flag = !0;
386848b8605Smrg            hist[l].key = key;
387848b8605Smrg            hist[l].freq = 1;
388848b8605Smrg            hist[l].idx = k;
389848b8605Smrg            lenh = l + 1;
390848b8605Smrg            break;
391848b8605Smrg         } else if (hist[l].key == key) {
392848b8605Smrg            hist[l].freq++;
393848b8605Smrg            break;
394848b8605Smrg         }
395848b8605Smrg      }
396848b8605Smrg      if (minSum > sum) {
397848b8605Smrg         minSum = sum;
398848b8605Smrg         minCol = k;
399848b8605Smrg      }
400848b8605Smrg      if (maxSum < sum) {
401848b8605Smrg         maxSum = sum;
402848b8605Smrg         maxCol = k;
403848b8605Smrg      }
404848b8605Smrg   }
405848b8605Smrg
406848b8605Smrg   if (lenh <= nv) {
407848b8605Smrg      for (j = 0; j < lenh; j++) {
408848b8605Smrg         for (i = 0; i < nc; i++) {
409848b8605Smrg            vec[j][i] = (GLfloat)input[hist[j].idx][i];
410848b8605Smrg         }
411848b8605Smrg      }
412848b8605Smrg      for (; j < nv; j++) {
413848b8605Smrg         for (i = 0; i < nc; i++) {
414848b8605Smrg            vec[j][i] = vec[0][i];
415848b8605Smrg         }
416848b8605Smrg      }
417848b8605Smrg      return 0;
418848b8605Smrg   }
419848b8605Smrg
420848b8605Smrg   for (j = 0; j < nv; j++) {
421848b8605Smrg      for (i = 0; i < nc; i++) {
422848b8605Smrg         vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
423848b8605Smrg      }
424848b8605Smrg   }
425848b8605Smrg#endif
426848b8605Smrg
427848b8605Smrg   return !0;
428848b8605Smrg}
429848b8605Smrg
430848b8605Smrg
431848b8605Smrgstatic GLint
432848b8605Smrgfxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
433848b8605Smrg            GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
434848b8605Smrg{
435848b8605Smrg   /* Use the generalized lloyd's algorithm for VQ:
436848b8605Smrg    *     find 4 color vectors.
437848b8605Smrg    *
438848b8605Smrg    *     for each sample color
439848b8605Smrg    *         sort to nearest vector.
440848b8605Smrg    *
441848b8605Smrg    *     replace each vector with the centroid of its matching colors.
442848b8605Smrg    *
443848b8605Smrg    *     repeat until RMS doesn't improve.
444848b8605Smrg    *
445848b8605Smrg    *     if a color vector has no samples, or becomes the same as another
446848b8605Smrg    *     vector, replace it with the color which is farthest from a sample.
447848b8605Smrg    *
448848b8605Smrg    * vec[][MAX_COMP]           initial vectors and resulting colors
449848b8605Smrg    * nv                        number of resulting colors required
450848b8605Smrg    * input[N_TEXELS][MAX_COMP] input texels
451848b8605Smrg    * nc                        number of components in input / vec
452848b8605Smrg    * n                         number of input samples
453848b8605Smrg    */
454848b8605Smrg
455848b8605Smrg   GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
456848b8605Smrg   GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
457848b8605Smrg   GLfloat error, lasterror = 1e9;
458848b8605Smrg
459848b8605Smrg   GLint i, j, k, rep;
460848b8605Smrg
461848b8605Smrg   /* the quantizer */
462848b8605Smrg   for (rep = 0; rep < LL_N_REP; rep++) {
463848b8605Smrg      /* reset sums & counters */
464848b8605Smrg      for (j = 0; j < nv; j++) {
465848b8605Smrg         for (i = 0; i < nc; i++) {
466848b8605Smrg            sum[j][i] = 0;
467848b8605Smrg         }
468848b8605Smrg         cnt[j] = 0;
469848b8605Smrg      }
470848b8605Smrg      error = 0;
471848b8605Smrg
472848b8605Smrg      /* scan whole block */
473848b8605Smrg      for (k = 0; k < n; k++) {
474848b8605Smrg#if 1
475848b8605Smrg         GLint best = -1;
476848b8605Smrg         GLfloat err = 1e9; /* big enough */
477848b8605Smrg         /* determine best vector */
478848b8605Smrg         for (j = 0; j < nv; j++) {
479848b8605Smrg            GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
480848b8605Smrg                      (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
481848b8605Smrg                      (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
482848b8605Smrg            if (nc == 4) {
483848b8605Smrg               e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
484848b8605Smrg            }
485848b8605Smrg            if (e < err) {
486848b8605Smrg               err = e;
487848b8605Smrg               best = j;
488848b8605Smrg            }
489848b8605Smrg         }
490848b8605Smrg#else
491848b8605Smrg         GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
492848b8605Smrg#endif
493848b8605Smrg         assert(best >= 0);
494848b8605Smrg         /* add in closest color */
495848b8605Smrg         for (i = 0; i < nc; i++) {
496848b8605Smrg            sum[best][i] += input[k][i];
497848b8605Smrg         }
498848b8605Smrg         /* mark this vector as used */
499848b8605Smrg         cnt[best]++;
500848b8605Smrg         /* accumulate error */
501848b8605Smrg         error += err;
502848b8605Smrg      }
503848b8605Smrg
504848b8605Smrg      /* check RMS */
505848b8605Smrg      if ((error < LL_RMS_E) ||
506848b8605Smrg          ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
507848b8605Smrg         return !0; /* good match */
508848b8605Smrg      }
509848b8605Smrg      lasterror = error;
510848b8605Smrg
511848b8605Smrg      /* move each vector to the barycenter of its closest colors */
512848b8605Smrg      for (j = 0; j < nv; j++) {
513848b8605Smrg         if (cnt[j]) {
514848b8605Smrg            GLfloat div = 1.0F / cnt[j];
515848b8605Smrg            for (i = 0; i < nc; i++) {
516848b8605Smrg               vec[j][i] = div * sum[j][i];
517848b8605Smrg            }
518848b8605Smrg         } else {
519848b8605Smrg            /* this vec has no samples or is identical with a previous vec */
520848b8605Smrg            GLint worst = fxt1_worst(vec[j], input, nc, n);
521848b8605Smrg            for (i = 0; i < nc; i++) {
522848b8605Smrg               vec[j][i] = input[worst][i];
523848b8605Smrg            }
524848b8605Smrg         }
525848b8605Smrg      }
526848b8605Smrg   }
527848b8605Smrg
528848b8605Smrg   return 0; /* could not converge fast enough */
529848b8605Smrg}
530848b8605Smrg
531848b8605Smrg
532848b8605Smrgstatic void
533848b8605Smrgfxt1_quantize_CHROMA (GLuint *cc,
534848b8605Smrg                      GLubyte input[N_TEXELS][MAX_COMP])
535848b8605Smrg{
536848b8605Smrg   const GLint n_vect = 4; /* 4 base vectors to find */
537848b8605Smrg   const GLint n_comp = 3; /* 3 components: R, G, B */
538848b8605Smrg   GLfloat vec[MAX_VECT][MAX_COMP];
539848b8605Smrg   GLint i, j, k;
540848b8605Smrg   Fx64 hi; /* high quadword */
541848b8605Smrg   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
542848b8605Smrg
543848b8605Smrg   if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
544848b8605Smrg      fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
545848b8605Smrg   }
546848b8605Smrg
547848b8605Smrg   FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
548848b8605Smrg   for (j = n_vect - 1; j >= 0; j--) {
549848b8605Smrg      for (i = 0; i < n_comp; i++) {
550848b8605Smrg         /* add in colors */
551848b8605Smrg         FX64_SHL(hi, 5);
552848b8605Smrg         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
553848b8605Smrg      }
554848b8605Smrg   }
555848b8605Smrg   ((Fx64 *)cc)[1] = hi;
556848b8605Smrg
557848b8605Smrg   lohi = lolo = 0;
558848b8605Smrg   /* right microtile */
559848b8605Smrg   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
560848b8605Smrg      lohi <<= 2;
561848b8605Smrg      lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
562848b8605Smrg   }
563848b8605Smrg   /* left microtile */
564848b8605Smrg   for (; k >= 0; k--) {
565848b8605Smrg      lolo <<= 2;
566848b8605Smrg      lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
567848b8605Smrg   }
568848b8605Smrg   cc[1] = lohi;
569848b8605Smrg   cc[0] = lolo;
570848b8605Smrg}
571848b8605Smrg
572848b8605Smrg
573848b8605Smrgstatic void
574848b8605Smrgfxt1_quantize_ALPHA0 (GLuint *cc,
575848b8605Smrg                      GLubyte input[N_TEXELS][MAX_COMP],
576848b8605Smrg                      GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
577848b8605Smrg{
578848b8605Smrg   const GLint n_vect = 3; /* 3 base vectors to find */
579848b8605Smrg   const GLint n_comp = 4; /* 4 components: R, G, B, A */
580848b8605Smrg   GLfloat vec[MAX_VECT][MAX_COMP];
581848b8605Smrg   GLint i, j, k;
582848b8605Smrg   Fx64 hi; /* high quadword */
583848b8605Smrg   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
584848b8605Smrg
585848b8605Smrg   /* the last vector indicates zero */
586848b8605Smrg   for (i = 0; i < n_comp; i++) {
587848b8605Smrg      vec[n_vect][i] = 0;
588848b8605Smrg   }
589848b8605Smrg
590848b8605Smrg   /* the first n texels in reord are guaranteed to be non-zero */
591848b8605Smrg   if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
592848b8605Smrg      fxt1_lloyd(vec, n_vect, reord, n_comp, n);
593848b8605Smrg   }
594848b8605Smrg
595848b8605Smrg   FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
596848b8605Smrg   for (j = n_vect - 1; j >= 0; j--) {
597848b8605Smrg      /* add in alphas */
598848b8605Smrg      FX64_SHL(hi, 5);
599848b8605Smrg      FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
600848b8605Smrg   }
601848b8605Smrg   for (j = n_vect - 1; j >= 0; j--) {
602848b8605Smrg      for (i = 0; i < n_comp - 1; i++) {
603848b8605Smrg         /* add in colors */
604848b8605Smrg         FX64_SHL(hi, 5);
605848b8605Smrg         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
606848b8605Smrg      }
607848b8605Smrg   }
608848b8605Smrg   ((Fx64 *)cc)[1] = hi;
609848b8605Smrg
610848b8605Smrg   lohi = lolo = 0;
611848b8605Smrg   /* right microtile */
612848b8605Smrg   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
613848b8605Smrg      lohi <<= 2;
614848b8605Smrg      lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
615848b8605Smrg   }
616848b8605Smrg   /* left microtile */
617848b8605Smrg   for (; k >= 0; k--) {
618848b8605Smrg      lolo <<= 2;
619848b8605Smrg      lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
620848b8605Smrg   }
621848b8605Smrg   cc[1] = lohi;
622848b8605Smrg   cc[0] = lolo;
623848b8605Smrg}
624848b8605Smrg
625848b8605Smrg
626848b8605Smrgstatic void
627848b8605Smrgfxt1_quantize_ALPHA1 (GLuint *cc,
628848b8605Smrg                      GLubyte input[N_TEXELS][MAX_COMP])
629848b8605Smrg{
630848b8605Smrg   const GLint n_vect = 3; /* highest vector number in each microtile */
631848b8605Smrg   const GLint n_comp = 4; /* 4 components: R, G, B, A */
632848b8605Smrg   GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
633848b8605Smrg   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
634848b8605Smrg   GLint i, j, k;
635848b8605Smrg   Fx64 hi; /* high quadword */
636848b8605Smrg   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
637848b8605Smrg
638848b8605Smrg   GLint minSum;
639848b8605Smrg   GLint maxSum;
640848b8605Smrg   GLint minColL = 0, maxColL = 0;
641848b8605Smrg   GLint minColR = 0, maxColR = 0;
642848b8605Smrg   GLint sumL = 0, sumR = 0;
643848b8605Smrg   GLint nn_comp;
644848b8605Smrg   /* Our solution here is to find the darkest and brightest colors in
645848b8605Smrg    * the 4x4 tile and use those as the two representative colors.
646848b8605Smrg    * There are probably better algorithms to use (histogram-based).
647848b8605Smrg    */
648848b8605Smrg   nn_comp = n_comp;
649848b8605Smrg   while ((minColL == maxColL) && nn_comp) {
650848b8605Smrg       minSum = 2000; /* big enough */
651848b8605Smrg       maxSum = -1; /* small enough */
652848b8605Smrg       for (k = 0; k < N_TEXELS / 2; k++) {
653848b8605Smrg           GLint sum = 0;
654848b8605Smrg           for (i = 0; i < nn_comp; i++) {
655848b8605Smrg               sum += input[k][i];
656848b8605Smrg           }
657848b8605Smrg           if (minSum > sum) {
658848b8605Smrg               minSum = sum;
659848b8605Smrg               minColL = k;
660848b8605Smrg           }
661848b8605Smrg           if (maxSum < sum) {
662848b8605Smrg               maxSum = sum;
663848b8605Smrg               maxColL = k;
664848b8605Smrg           }
665848b8605Smrg           sumL += sum;
666848b8605Smrg       }
667848b8605Smrg
668848b8605Smrg       nn_comp--;
669848b8605Smrg   }
670848b8605Smrg
671848b8605Smrg   nn_comp = n_comp;
672848b8605Smrg   while ((minColR == maxColR) && nn_comp) {
673848b8605Smrg       minSum = 2000; /* big enough */
674848b8605Smrg       maxSum = -1; /* small enough */
675848b8605Smrg       for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
676848b8605Smrg           GLint sum = 0;
677848b8605Smrg           for (i = 0; i < nn_comp; i++) {
678848b8605Smrg               sum += input[k][i];
679848b8605Smrg           }
680848b8605Smrg           if (minSum > sum) {
681848b8605Smrg               minSum = sum;
682848b8605Smrg               minColR = k;
683848b8605Smrg           }
684848b8605Smrg           if (maxSum < sum) {
685848b8605Smrg               maxSum = sum;
686848b8605Smrg               maxColR = k;
687848b8605Smrg           }
688848b8605Smrg           sumR += sum;
689848b8605Smrg       }
690848b8605Smrg
691848b8605Smrg       nn_comp--;
692848b8605Smrg   }
693848b8605Smrg
694848b8605Smrg   /* choose the common vector (yuck!) */
695848b8605Smrg   {
696848b8605Smrg      GLint j1, j2;
697848b8605Smrg      GLint v1 = 0, v2 = 0;
698848b8605Smrg      GLfloat err = 1e9; /* big enough */
699848b8605Smrg      GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
700848b8605Smrg      for (i = 0; i < n_comp; i++) {
701848b8605Smrg         tv[0][i] = input[minColL][i];
702848b8605Smrg         tv[1][i] = input[maxColL][i];
703848b8605Smrg         tv[2][i] = input[minColR][i];
704848b8605Smrg         tv[3][i] = input[maxColR][i];
705848b8605Smrg      }
706848b8605Smrg      for (j1 = 0; j1 < 2; j1++) {
707848b8605Smrg         for (j2 = 2; j2 < 4; j2++) {
708848b8605Smrg            GLfloat e = 0.0F;
709848b8605Smrg            for (i = 0; i < n_comp; i++) {
710848b8605Smrg               e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
711848b8605Smrg            }
712848b8605Smrg            if (e < err) {
713848b8605Smrg               err = e;
714848b8605Smrg               v1 = j1;
715848b8605Smrg               v2 = j2;
716848b8605Smrg            }
717848b8605Smrg         }
718848b8605Smrg      }
719848b8605Smrg      for (i = 0; i < n_comp; i++) {
720848b8605Smrg         vec[0][i] = tv[1 - v1][i];
721848b8605Smrg         vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
722848b8605Smrg         vec[2][i] = tv[5 - v2][i];
723848b8605Smrg      }
724848b8605Smrg   }
725848b8605Smrg
726848b8605Smrg   /* left microtile */
727848b8605Smrg   cc[0] = 0;
728848b8605Smrg   if (minColL != maxColL) {
729848b8605Smrg      /* compute interpolation vector */
730848b8605Smrg      MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
731848b8605Smrg
732848b8605Smrg      /* add in texels */
733848b8605Smrg      lolo = 0;
734848b8605Smrg      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
735848b8605Smrg         GLint texel;
736848b8605Smrg         /* interpolate color */
737848b8605Smrg         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
738848b8605Smrg         /* add in texel */
739848b8605Smrg         lolo <<= 2;
740848b8605Smrg         lolo |= texel;
741848b8605Smrg      }
742848b8605Smrg
743848b8605Smrg      cc[0] = lolo;
744848b8605Smrg   }
745848b8605Smrg
746848b8605Smrg   /* right microtile */
747848b8605Smrg   cc[1] = 0;
748848b8605Smrg   if (minColR != maxColR) {
749848b8605Smrg      /* compute interpolation vector */
750848b8605Smrg      MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
751848b8605Smrg
752848b8605Smrg      /* add in texels */
753848b8605Smrg      lohi = 0;
754848b8605Smrg      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
755848b8605Smrg         GLint texel;
756848b8605Smrg         /* interpolate color */
757848b8605Smrg         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
758848b8605Smrg         /* add in texel */
759848b8605Smrg         lohi <<= 2;
760848b8605Smrg         lohi |= texel;
761848b8605Smrg      }
762848b8605Smrg
763848b8605Smrg      cc[1] = lohi;
764848b8605Smrg   }
765848b8605Smrg
766848b8605Smrg   FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
767848b8605Smrg   for (j = n_vect - 1; j >= 0; j--) {
768848b8605Smrg      /* add in alphas */
769848b8605Smrg      FX64_SHL(hi, 5);
770848b8605Smrg      FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
771848b8605Smrg   }
772848b8605Smrg   for (j = n_vect - 1; j >= 0; j--) {
773848b8605Smrg      for (i = 0; i < n_comp - 1; i++) {
774848b8605Smrg         /* add in colors */
775848b8605Smrg         FX64_SHL(hi, 5);
776848b8605Smrg         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
777848b8605Smrg      }
778848b8605Smrg   }
779848b8605Smrg   ((Fx64 *)cc)[1] = hi;
780848b8605Smrg}
781848b8605Smrg
782848b8605Smrg
783848b8605Smrgstatic void
784848b8605Smrgfxt1_quantize_HI (GLuint *cc,
785848b8605Smrg                  GLubyte input[N_TEXELS][MAX_COMP],
786848b8605Smrg                  GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
787848b8605Smrg{
788848b8605Smrg   const GLint n_vect = 6; /* highest vector number */
789848b8605Smrg   const GLint n_comp = 3; /* 3 components: R, G, B */
790848b8605Smrg   GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
791848b8605Smrg   GLfloat iv[MAX_COMP];   /* interpolation vector */
792848b8605Smrg   GLint i, k;
793848b8605Smrg   GLuint hihi; /* high quadword: hi dword */
794848b8605Smrg
795848b8605Smrg   GLint minSum = 2000; /* big enough */
796848b8605Smrg   GLint maxSum = -1; /* small enough */
797848b8605Smrg   GLint minCol = 0; /* phoudoin: silent compiler! */
798848b8605Smrg   GLint maxCol = 0; /* phoudoin: silent compiler! */
799848b8605Smrg
800848b8605Smrg   /* Our solution here is to find the darkest and brightest colors in
801848b8605Smrg    * the 8x4 tile and use those as the two representative colors.
802848b8605Smrg    * There are probably better algorithms to use (histogram-based).
803848b8605Smrg    */
804848b8605Smrg   for (k = 0; k < n; k++) {
805848b8605Smrg      GLint sum = 0;
806848b8605Smrg      for (i = 0; i < n_comp; i++) {
807848b8605Smrg         sum += reord[k][i];
808848b8605Smrg      }
809848b8605Smrg      if (minSum > sum) {
810848b8605Smrg         minSum = sum;
811848b8605Smrg         minCol = k;
812848b8605Smrg      }
813848b8605Smrg      if (maxSum < sum) {
814848b8605Smrg         maxSum = sum;
815848b8605Smrg         maxCol = k;
816848b8605Smrg      }
817848b8605Smrg   }
818848b8605Smrg
819848b8605Smrg   hihi = 0; /* cc-hi = "00" */
820848b8605Smrg   for (i = 0; i < n_comp; i++) {
821848b8605Smrg      /* add in colors */
822848b8605Smrg      hihi <<= 5;
823848b8605Smrg      hihi |= reord[maxCol][i] >> 3;
824848b8605Smrg   }
825848b8605Smrg   for (i = 0; i < n_comp; i++) {
826848b8605Smrg      /* add in colors */
827848b8605Smrg      hihi <<= 5;
828848b8605Smrg      hihi |= reord[minCol][i] >> 3;
829848b8605Smrg   }
830848b8605Smrg   cc[3] = hihi;
831848b8605Smrg   cc[0] = cc[1] = cc[2] = 0;
832848b8605Smrg
833848b8605Smrg   /* compute interpolation vector */
834848b8605Smrg   if (minCol != maxCol) {
835848b8605Smrg      MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
836848b8605Smrg   }
837848b8605Smrg
838848b8605Smrg   /* add in texels */
839848b8605Smrg   for (k = N_TEXELS - 1; k >= 0; k--) {
840848b8605Smrg      GLint t = k * 3;
841848b8605Smrg      GLuint *kk = (GLuint *)((char *)cc + t / 8);
842848b8605Smrg      GLint texel = n_vect + 1; /* transparent black */
843848b8605Smrg
844848b8605Smrg      if (!ISTBLACK(input[k])) {
845848b8605Smrg         if (minCol != maxCol) {
846848b8605Smrg            /* interpolate color */
847848b8605Smrg            CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
848848b8605Smrg            /* add in texel */
849848b8605Smrg            kk[0] |= texel << (t & 7);
850848b8605Smrg         }
851848b8605Smrg      } else {
852848b8605Smrg         /* add in texel */
853848b8605Smrg         kk[0] |= texel << (t & 7);
854848b8605Smrg      }
855848b8605Smrg   }
856848b8605Smrg}
857848b8605Smrg
858848b8605Smrg
859848b8605Smrgstatic void
860848b8605Smrgfxt1_quantize_MIXED1 (GLuint *cc,
861848b8605Smrg                      GLubyte input[N_TEXELS][MAX_COMP])
862848b8605Smrg{
863848b8605Smrg   const GLint n_vect = 2; /* highest vector number in each microtile */
864848b8605Smrg   const GLint n_comp = 3; /* 3 components: R, G, B */
865848b8605Smrg   GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
866848b8605Smrg   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
867848b8605Smrg   GLint i, j, k;
868848b8605Smrg   Fx64 hi; /* high quadword */
869848b8605Smrg   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
870848b8605Smrg
871848b8605Smrg   GLint minSum;
872848b8605Smrg   GLint maxSum;
873848b8605Smrg   GLint minColL = 0, maxColL = -1;
874848b8605Smrg   GLint minColR = 0, maxColR = -1;
875848b8605Smrg
876848b8605Smrg   /* Our solution here is to find the darkest and brightest colors in
877848b8605Smrg    * the 4x4 tile and use those as the two representative colors.
878848b8605Smrg    * There are probably better algorithms to use (histogram-based).
879848b8605Smrg    */
880848b8605Smrg   minSum = 2000; /* big enough */
881848b8605Smrg   maxSum = -1; /* small enough */
882848b8605Smrg   for (k = 0; k < N_TEXELS / 2; k++) {
883848b8605Smrg      if (!ISTBLACK(input[k])) {
884848b8605Smrg         GLint sum = 0;
885848b8605Smrg         for (i = 0; i < n_comp; i++) {
886848b8605Smrg            sum += input[k][i];
887848b8605Smrg         }
888848b8605Smrg         if (minSum > sum) {
889848b8605Smrg            minSum = sum;
890848b8605Smrg            minColL = k;
891848b8605Smrg         }
892848b8605Smrg         if (maxSum < sum) {
893848b8605Smrg            maxSum = sum;
894848b8605Smrg            maxColL = k;
895848b8605Smrg         }
896848b8605Smrg      }
897848b8605Smrg   }
898848b8605Smrg   minSum = 2000; /* big enough */
899848b8605Smrg   maxSum = -1; /* small enough */
900848b8605Smrg   for (; k < N_TEXELS; k++) {
901848b8605Smrg      if (!ISTBLACK(input[k])) {
902848b8605Smrg         GLint sum = 0;
903848b8605Smrg         for (i = 0; i < n_comp; i++) {
904848b8605Smrg            sum += input[k][i];
905848b8605Smrg         }
906848b8605Smrg         if (minSum > sum) {
907848b8605Smrg            minSum = sum;
908848b8605Smrg            minColR = k;
909848b8605Smrg         }
910848b8605Smrg         if (maxSum < sum) {
911848b8605Smrg            maxSum = sum;
912848b8605Smrg            maxColR = k;
913848b8605Smrg         }
914848b8605Smrg      }
915848b8605Smrg   }
916848b8605Smrg
917848b8605Smrg   /* left microtile */
918848b8605Smrg   if (maxColL == -1) {
919848b8605Smrg      /* all transparent black */
920848b8605Smrg      cc[0] = ~0u;
921848b8605Smrg      for (i = 0; i < n_comp; i++) {
922848b8605Smrg         vec[0][i] = 0;
923848b8605Smrg         vec[1][i] = 0;
924848b8605Smrg      }
925848b8605Smrg   } else {
926848b8605Smrg      cc[0] = 0;
927848b8605Smrg      for (i = 0; i < n_comp; i++) {
928848b8605Smrg         vec[0][i] = input[minColL][i];
929848b8605Smrg         vec[1][i] = input[maxColL][i];
930848b8605Smrg      }
931848b8605Smrg      if (minColL != maxColL) {
932848b8605Smrg         /* compute interpolation vector */
933848b8605Smrg         MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
934848b8605Smrg
935848b8605Smrg         /* add in texels */
936848b8605Smrg         lolo = 0;
937848b8605Smrg         for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
938848b8605Smrg            GLint texel = n_vect + 1; /* transparent black */
939848b8605Smrg            if (!ISTBLACK(input[k])) {
940848b8605Smrg               /* interpolate color */
941848b8605Smrg               CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
942848b8605Smrg            }
943848b8605Smrg            /* add in texel */
944848b8605Smrg            lolo <<= 2;
945848b8605Smrg            lolo |= texel;
946848b8605Smrg         }
947848b8605Smrg         cc[0] = lolo;
948848b8605Smrg      }
949848b8605Smrg   }
950848b8605Smrg
951848b8605Smrg   /* right microtile */
952848b8605Smrg   if (maxColR == -1) {
953848b8605Smrg      /* all transparent black */
954848b8605Smrg      cc[1] = ~0u;
955848b8605Smrg      for (i = 0; i < n_comp; i++) {
956848b8605Smrg         vec[2][i] = 0;
957848b8605Smrg         vec[3][i] = 0;
958848b8605Smrg      }
959848b8605Smrg   } else {
960848b8605Smrg      cc[1] = 0;
961848b8605Smrg      for (i = 0; i < n_comp; i++) {
962848b8605Smrg         vec[2][i] = input[minColR][i];
963848b8605Smrg         vec[3][i] = input[maxColR][i];
964848b8605Smrg      }
965848b8605Smrg      if (minColR != maxColR) {
966848b8605Smrg         /* compute interpolation vector */
967848b8605Smrg         MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
968848b8605Smrg
969848b8605Smrg         /* add in texels */
970848b8605Smrg         lohi = 0;
971848b8605Smrg         for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
972848b8605Smrg            GLint texel = n_vect + 1; /* transparent black */
973848b8605Smrg            if (!ISTBLACK(input[k])) {
974848b8605Smrg               /* interpolate color */
975848b8605Smrg               CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
976848b8605Smrg            }
977848b8605Smrg            /* add in texel */
978848b8605Smrg            lohi <<= 2;
979848b8605Smrg            lohi |= texel;
980848b8605Smrg         }
981848b8605Smrg         cc[1] = lohi;
982848b8605Smrg      }
983848b8605Smrg   }
984848b8605Smrg
985848b8605Smrg   FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
986848b8605Smrg   for (j = 2 * 2 - 1; j >= 0; j--) {
987848b8605Smrg      for (i = 0; i < n_comp; i++) {
988848b8605Smrg         /* add in colors */
989848b8605Smrg         FX64_SHL(hi, 5);
990848b8605Smrg         FX64_OR32(hi, vec[j][i] >> 3);
991848b8605Smrg      }
992848b8605Smrg   }
993848b8605Smrg   ((Fx64 *)cc)[1] = hi;
994848b8605Smrg}
995848b8605Smrg
996848b8605Smrg
997848b8605Smrgstatic void
998848b8605Smrgfxt1_quantize_MIXED0 (GLuint *cc,
999848b8605Smrg                      GLubyte input[N_TEXELS][MAX_COMP])
1000848b8605Smrg{
1001848b8605Smrg   const GLint n_vect = 3; /* highest vector number in each microtile */
1002848b8605Smrg   const GLint n_comp = 3; /* 3 components: R, G, B */
1003848b8605Smrg   GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1004848b8605Smrg   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1005848b8605Smrg   GLint i, j, k;
1006848b8605Smrg   Fx64 hi; /* high quadword */
1007848b8605Smrg   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1008848b8605Smrg
1009848b8605Smrg   GLint minColL = 0, maxColL = 0;
1010848b8605Smrg   GLint minColR = 0, maxColR = 0;
1011848b8605Smrg#if 0
1012848b8605Smrg   GLint minSum;
1013848b8605Smrg   GLint maxSum;
1014848b8605Smrg
1015848b8605Smrg   /* Our solution here is to find the darkest and brightest colors in
1016848b8605Smrg    * the 4x4 tile and use those as the two representative colors.
1017848b8605Smrg    * There are probably better algorithms to use (histogram-based).
1018848b8605Smrg    */
1019848b8605Smrg   minSum = 2000; /* big enough */
1020848b8605Smrg   maxSum = -1; /* small enough */
1021848b8605Smrg   for (k = 0; k < N_TEXELS / 2; k++) {
1022848b8605Smrg      GLint sum = 0;
1023848b8605Smrg      for (i = 0; i < n_comp; i++) {
1024848b8605Smrg         sum += input[k][i];
1025848b8605Smrg      }
1026848b8605Smrg      if (minSum > sum) {
1027848b8605Smrg         minSum = sum;
1028848b8605Smrg         minColL = k;
1029848b8605Smrg      }
1030848b8605Smrg      if (maxSum < sum) {
1031848b8605Smrg         maxSum = sum;
1032848b8605Smrg         maxColL = k;
1033848b8605Smrg      }
1034848b8605Smrg   }
1035848b8605Smrg   minSum = 2000; /* big enough */
1036848b8605Smrg   maxSum = -1; /* small enough */
1037848b8605Smrg   for (; k < N_TEXELS; k++) {
1038848b8605Smrg      GLint sum = 0;
1039848b8605Smrg      for (i = 0; i < n_comp; i++) {
1040848b8605Smrg         sum += input[k][i];
1041848b8605Smrg      }
1042848b8605Smrg      if (minSum > sum) {
1043848b8605Smrg         minSum = sum;
1044848b8605Smrg         minColR = k;
1045848b8605Smrg      }
1046848b8605Smrg      if (maxSum < sum) {
1047848b8605Smrg         maxSum = sum;
1048848b8605Smrg         maxColR = k;
1049848b8605Smrg      }
1050848b8605Smrg   }
1051848b8605Smrg#else
1052848b8605Smrg   GLint minVal;
1053848b8605Smrg   GLint maxVal;
1054848b8605Smrg   GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1055848b8605Smrg   GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1056848b8605Smrg
1057848b8605Smrg   /* Scan the channel with max variance for lo & hi
1058848b8605Smrg    * and use those as the two representative colors.
1059848b8605Smrg    */
1060848b8605Smrg   minVal = 2000; /* big enough */
1061848b8605Smrg   maxVal = -1; /* small enough */
1062848b8605Smrg   for (k = 0; k < N_TEXELS / 2; k++) {
1063848b8605Smrg      GLint t = input[k][maxVarL];
1064848b8605Smrg      if (minVal > t) {
1065848b8605Smrg         minVal = t;
1066848b8605Smrg         minColL = k;
1067848b8605Smrg      }
1068848b8605Smrg      if (maxVal < t) {
1069848b8605Smrg         maxVal = t;
1070848b8605Smrg         maxColL = k;
1071848b8605Smrg      }
1072848b8605Smrg   }
1073848b8605Smrg   minVal = 2000; /* big enough */
1074848b8605Smrg   maxVal = -1; /* small enough */
1075848b8605Smrg   for (; k < N_TEXELS; k++) {
1076848b8605Smrg      GLint t = input[k][maxVarR];
1077848b8605Smrg      if (minVal > t) {
1078848b8605Smrg         minVal = t;
1079848b8605Smrg         minColR = k;
1080848b8605Smrg      }
1081848b8605Smrg      if (maxVal < t) {
1082848b8605Smrg         maxVal = t;
1083848b8605Smrg         maxColR = k;
1084848b8605Smrg      }
1085848b8605Smrg   }
1086848b8605Smrg#endif
1087848b8605Smrg
1088848b8605Smrg   /* left microtile */
1089848b8605Smrg   cc[0] = 0;
1090848b8605Smrg   for (i = 0; i < n_comp; i++) {
1091848b8605Smrg      vec[0][i] = input[minColL][i];
1092848b8605Smrg      vec[1][i] = input[maxColL][i];
1093848b8605Smrg   }
1094848b8605Smrg   if (minColL != maxColL) {
1095848b8605Smrg      /* compute interpolation vector */
1096848b8605Smrg      MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1097848b8605Smrg
1098848b8605Smrg      /* add in texels */
1099848b8605Smrg      lolo = 0;
1100848b8605Smrg      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1101848b8605Smrg         GLint texel;
1102848b8605Smrg         /* interpolate color */
1103848b8605Smrg         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1104848b8605Smrg         /* add in texel */
1105848b8605Smrg         lolo <<= 2;
1106848b8605Smrg         lolo |= texel;
1107848b8605Smrg      }
1108848b8605Smrg
1109848b8605Smrg      /* funky encoding for LSB of green */
1110848b8605Smrg      if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1111848b8605Smrg         for (i = 0; i < n_comp; i++) {
1112848b8605Smrg            vec[1][i] = input[minColL][i];
1113848b8605Smrg            vec[0][i] = input[maxColL][i];
1114848b8605Smrg         }
1115848b8605Smrg         lolo = ~lolo;
1116848b8605Smrg      }
1117848b8605Smrg
1118848b8605Smrg      cc[0] = lolo;
1119848b8605Smrg   }
1120848b8605Smrg
1121848b8605Smrg   /* right microtile */
1122848b8605Smrg   cc[1] = 0;
1123848b8605Smrg   for (i = 0; i < n_comp; i++) {
1124848b8605Smrg      vec[2][i] = input[minColR][i];
1125848b8605Smrg      vec[3][i] = input[maxColR][i];
1126848b8605Smrg   }
1127848b8605Smrg   if (minColR != maxColR) {
1128848b8605Smrg      /* compute interpolation vector */
1129848b8605Smrg      MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1130848b8605Smrg
1131848b8605Smrg      /* add in texels */
1132848b8605Smrg      lohi = 0;
1133848b8605Smrg      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1134848b8605Smrg         GLint texel;
1135848b8605Smrg         /* interpolate color */
1136848b8605Smrg         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1137848b8605Smrg         /* add in texel */
1138848b8605Smrg         lohi <<= 2;
1139848b8605Smrg         lohi |= texel;
1140848b8605Smrg      }
1141848b8605Smrg
1142848b8605Smrg      /* funky encoding for LSB of green */
1143848b8605Smrg      if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1144848b8605Smrg         for (i = 0; i < n_comp; i++) {
1145848b8605Smrg            vec[3][i] = input[minColR][i];
1146848b8605Smrg            vec[2][i] = input[maxColR][i];
1147848b8605Smrg         }
1148848b8605Smrg         lohi = ~lohi;
1149848b8605Smrg      }
1150848b8605Smrg
1151848b8605Smrg      cc[1] = lohi;
1152848b8605Smrg   }
1153848b8605Smrg
1154848b8605Smrg   FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1155848b8605Smrg   for (j = 2 * 2 - 1; j >= 0; j--) {
1156848b8605Smrg      for (i = 0; i < n_comp; i++) {
1157848b8605Smrg         /* add in colors */
1158848b8605Smrg         FX64_SHL(hi, 5);
1159848b8605Smrg         FX64_OR32(hi, vec[j][i] >> 3);
1160848b8605Smrg      }
1161848b8605Smrg   }
1162848b8605Smrg   ((Fx64 *)cc)[1] = hi;
1163848b8605Smrg}
1164848b8605Smrg
1165848b8605Smrg
1166848b8605Smrgstatic void
1167848b8605Smrgfxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1168848b8605Smrg{
1169848b8605Smrg   GLint trualpha;
1170848b8605Smrg   GLubyte reord[N_TEXELS][MAX_COMP];
1171848b8605Smrg
1172848b8605Smrg   GLubyte input[N_TEXELS][MAX_COMP];
1173848b8605Smrg   GLint i, k, l;
1174848b8605Smrg
1175848b8605Smrg   if (comps == 3) {
1176848b8605Smrg      /* make the whole block opaque */
1177848b8605Smrg      memset(input, -1, sizeof(input));
1178848b8605Smrg   }
1179848b8605Smrg
1180848b8605Smrg   /* 8 texels each line */
1181848b8605Smrg   for (l = 0; l < 4; l++) {
1182848b8605Smrg      for (k = 0; k < 4; k++) {
1183848b8605Smrg         for (i = 0; i < comps; i++) {
1184848b8605Smrg            input[k + l * 4][i] = *lines[l]++;
1185848b8605Smrg         }
1186848b8605Smrg      }
1187848b8605Smrg      for (; k < 8; k++) {
1188848b8605Smrg         for (i = 0; i < comps; i++) {
1189848b8605Smrg            input[k + l * 4 + 12][i] = *lines[l]++;
1190848b8605Smrg         }
1191848b8605Smrg      }
1192848b8605Smrg   }
1193848b8605Smrg
1194848b8605Smrg   /* block layout:
1195848b8605Smrg    * 00, 01, 02, 03, 08, 09, 0a, 0b
1196848b8605Smrg    * 10, 11, 12, 13, 18, 19, 1a, 1b
1197848b8605Smrg    * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1198848b8605Smrg    * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1199848b8605Smrg    */
1200848b8605Smrg
1201848b8605Smrg   /* [dBorca]
1202848b8605Smrg    * stupidity flows forth from this
1203848b8605Smrg    */
1204848b8605Smrg   l = N_TEXELS;
1205848b8605Smrg   trualpha = 0;
1206848b8605Smrg   if (comps == 4) {
1207848b8605Smrg      /* skip all transparent black texels */
1208848b8605Smrg      l = 0;
1209848b8605Smrg      for (k = 0; k < N_TEXELS; k++) {
1210848b8605Smrg         /* test all components against 0 */
1211848b8605Smrg         if (!ISTBLACK(input[k])) {
1212848b8605Smrg            /* texel is not transparent black */
1213848b8605Smrg            COPY_4UBV(reord[l], input[k]);
1214848b8605Smrg            if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1215848b8605Smrg               /* non-opaque texel */
1216848b8605Smrg               trualpha = !0;
1217848b8605Smrg            }
1218848b8605Smrg            l++;
1219848b8605Smrg         }
1220848b8605Smrg      }
1221848b8605Smrg   }
1222848b8605Smrg
1223848b8605Smrg#if 0
1224848b8605Smrg   if (trualpha) {
1225848b8605Smrg      fxt1_quantize_ALPHA0(cc, input, reord, l);
1226848b8605Smrg   } else if (l == 0) {
1227848b8605Smrg      cc[0] = cc[1] = cc[2] = -1;
1228848b8605Smrg      cc[3] = 0;
1229848b8605Smrg   } else if (l < N_TEXELS) {
1230848b8605Smrg      fxt1_quantize_HI(cc, input, reord, l);
1231848b8605Smrg   } else {
1232848b8605Smrg      fxt1_quantize_CHROMA(cc, input);
1233848b8605Smrg   }
1234848b8605Smrg   (void)fxt1_quantize_ALPHA1;
1235848b8605Smrg   (void)fxt1_quantize_MIXED1;
1236848b8605Smrg   (void)fxt1_quantize_MIXED0;
1237848b8605Smrg#else
1238848b8605Smrg   if (trualpha) {
1239848b8605Smrg      fxt1_quantize_ALPHA1(cc, input);
1240848b8605Smrg   } else if (l == 0) {
1241848b8605Smrg      cc[0] = cc[1] = cc[2] = ~0u;
1242848b8605Smrg      cc[3] = 0;
1243848b8605Smrg   } else if (l < N_TEXELS) {
1244848b8605Smrg      fxt1_quantize_MIXED1(cc, input);
1245848b8605Smrg   } else {
1246848b8605Smrg      fxt1_quantize_MIXED0(cc, input);
1247848b8605Smrg   }
1248848b8605Smrg   (void)fxt1_quantize_ALPHA0;
1249848b8605Smrg   (void)fxt1_quantize_HI;
1250848b8605Smrg   (void)fxt1_quantize_CHROMA;
1251848b8605Smrg#endif
1252848b8605Smrg}
1253848b8605Smrg
1254848b8605Smrg
1255848b8605Smrg
1256848b8605Smrg/**
1257848b8605Smrg * Upscale an image by replication, not (typical) stretching.
1258848b8605Smrg * We use this when the image width or height is less than a
1259848b8605Smrg * certain size (4, 8) and we need to upscale an image.
1260848b8605Smrg */
1261848b8605Smrgstatic void
1262848b8605Smrgupscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1263848b8605Smrg                   GLsizei outWidth, GLsizei outHeight,
1264848b8605Smrg                   GLint comps, const GLubyte *src, GLint srcRowStride,
1265848b8605Smrg                   GLubyte *dest )
1266848b8605Smrg{
1267848b8605Smrg   GLint i, j, k;
1268848b8605Smrg
1269b8e80941Smrg   assert(outWidth >= inWidth);
1270b8e80941Smrg   assert(outHeight >= inHeight);
1271848b8605Smrg#if 0
1272b8e80941Smrg   assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1273b8e80941Smrg   assert((outWidth & 3) == 0);
1274b8e80941Smrg   assert((outHeight & 3) == 0);
1275848b8605Smrg#endif
1276848b8605Smrg
1277848b8605Smrg   for (i = 0; i < outHeight; i++) {
1278848b8605Smrg      const GLint ii = i % inHeight;
1279848b8605Smrg      for (j = 0; j < outWidth; j++) {
1280848b8605Smrg         const GLint jj = j % inWidth;
1281848b8605Smrg         for (k = 0; k < comps; k++) {
1282848b8605Smrg            dest[(i * outWidth + j) * comps + k]
1283848b8605Smrg               = src[ii * srcRowStride + jj * comps + k];
1284848b8605Smrg         }
1285848b8605Smrg      }
1286848b8605Smrg   }
1287848b8605Smrg}
1288848b8605Smrg
1289848b8605Smrg
1290848b8605Smrgstatic void
1291848b8605Smrgfxt1_encode (GLuint width, GLuint height, GLint comps,
1292848b8605Smrg             const void *source, GLint srcRowStride,
1293848b8605Smrg             void *dest, GLint destRowStride)
1294848b8605Smrg{
1295848b8605Smrg   GLuint x, y;
1296848b8605Smrg   const GLubyte *data;
1297848b8605Smrg   GLuint *encoded = (GLuint *)dest;
1298848b8605Smrg   void *newSource = NULL;
1299848b8605Smrg
1300848b8605Smrg   assert(comps == 3 || comps == 4);
1301848b8605Smrg
1302848b8605Smrg   /* Replicate image if width is not M8 or height is not M4 */
1303848b8605Smrg   if ((width & 7) | (height & 3)) {
1304848b8605Smrg      GLint newWidth = (width + 7) & ~7;
1305848b8605Smrg      GLint newHeight = (height + 3) & ~3;
1306848b8605Smrg      newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1307848b8605Smrg      if (!newSource) {
1308848b8605Smrg         GET_CURRENT_CONTEXT(ctx);
1309848b8605Smrg         _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1310848b8605Smrg         goto cleanUp;
1311848b8605Smrg      }
1312848b8605Smrg      upscale_teximage2d(width, height, newWidth, newHeight,
1313848b8605Smrg                         comps, (const GLubyte *) source,
1314848b8605Smrg                         srcRowStride, (GLubyte *) newSource);
1315848b8605Smrg      source = newSource;
1316848b8605Smrg      width = newWidth;
1317848b8605Smrg      height = newHeight;
1318848b8605Smrg      srcRowStride = comps * newWidth;
1319848b8605Smrg   }
1320848b8605Smrg
1321848b8605Smrg   data = (const GLubyte *) source;
1322848b8605Smrg   destRowStride = (destRowStride - width * 2) / 4;
1323848b8605Smrg   for (y = 0; y < height; y += 4) {
1324848b8605Smrg      GLuint offs = 0 + (y + 0) * srcRowStride;
1325848b8605Smrg      for (x = 0; x < width; x += 8) {
1326848b8605Smrg         const GLubyte *lines[4];
1327848b8605Smrg         lines[0] = &data[offs];
1328848b8605Smrg         lines[1] = lines[0] + srcRowStride;
1329848b8605Smrg         lines[2] = lines[1] + srcRowStride;
1330848b8605Smrg         lines[3] = lines[2] + srcRowStride;
1331848b8605Smrg         offs += 8 * comps;
1332848b8605Smrg         fxt1_quantize(encoded, lines, comps);
1333848b8605Smrg         /* 128 bits per 8x4 block */
1334848b8605Smrg         encoded += 4;
1335848b8605Smrg      }
1336848b8605Smrg      encoded += destRowStride;
1337848b8605Smrg   }
1338848b8605Smrg
1339848b8605Smrg cleanUp:
1340848b8605Smrg   free(newSource);
1341848b8605Smrg}
1342848b8605Smrg
1343848b8605Smrg
1344848b8605Smrg/***************************************************************************\
1345848b8605Smrg * FXT1 decoder
1346848b8605Smrg *
1347848b8605Smrg * The decoder is based on GL_3DFX_texture_compression_FXT1
1348848b8605Smrg * specification and serves as a concept for the encoder.
1349848b8605Smrg\***************************************************************************/
1350848b8605Smrg
1351848b8605Smrg
1352848b8605Smrg/* lookup table for scaling 5 bit colors up to 8 bits */
1353848b8605Smrgstatic const GLubyte _rgb_scale_5[] = {
1354848b8605Smrg   0,   8,   16,  25,  33,  41,  49,  58,
1355848b8605Smrg   66,  74,  82,  90,  99,  107, 115, 123,
1356848b8605Smrg   132, 140, 148, 156, 165, 173, 181, 189,
1357848b8605Smrg   197, 206, 214, 222, 230, 239, 247, 255
1358848b8605Smrg};
1359848b8605Smrg
1360848b8605Smrg/* lookup table for scaling 6 bit colors up to 8 bits */
1361848b8605Smrgstatic const GLubyte _rgb_scale_6[] = {
1362848b8605Smrg   0,   4,   8,   12,  16,  20,  24,  28,
1363848b8605Smrg   32,  36,  40,  45,  49,  53,  57,  61,
1364848b8605Smrg   65,  69,  73,  77,  81,  85,  89,  93,
1365848b8605Smrg   97,  101, 105, 109, 113, 117, 121, 125,
1366848b8605Smrg   130, 134, 138, 142, 146, 150, 154, 158,
1367848b8605Smrg   162, 166, 170, 174, 178, 182, 186, 190,
1368848b8605Smrg   194, 198, 202, 206, 210, 215, 219, 223,
1369848b8605Smrg   227, 231, 235, 239, 243, 247, 251, 255
1370848b8605Smrg};
1371848b8605Smrg
1372848b8605Smrg
1373848b8605Smrg#define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1374848b8605Smrg#define UP5(c) _rgb_scale_5[(c) & 31]
1375848b8605Smrg#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1376848b8605Smrg#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1377848b8605Smrg
1378848b8605Smrg
1379848b8605Smrgstatic void
1380848b8605Smrgfxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1381848b8605Smrg{
1382848b8605Smrg   const GLuint *cc;
1383848b8605Smrg
1384848b8605Smrg   t *= 3;
1385848b8605Smrg   cc = (const GLuint *)(code + t / 8);
1386848b8605Smrg   t = (cc[0] >> (t & 7)) & 7;
1387848b8605Smrg
1388848b8605Smrg   if (t == 7) {
1389848b8605Smrg      rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1390848b8605Smrg   } else {
1391848b8605Smrg      GLubyte r, g, b;
1392848b8605Smrg      cc = (const GLuint *)(code + 12);
1393848b8605Smrg      if (t == 0) {
1394848b8605Smrg         b = UP5(CC_SEL(cc, 0));
1395848b8605Smrg         g = UP5(CC_SEL(cc, 5));
1396848b8605Smrg         r = UP5(CC_SEL(cc, 10));
1397848b8605Smrg      } else if (t == 6) {
1398848b8605Smrg         b = UP5(CC_SEL(cc, 15));
1399848b8605Smrg         g = UP5(CC_SEL(cc, 20));
1400848b8605Smrg         r = UP5(CC_SEL(cc, 25));
1401848b8605Smrg      } else {
1402848b8605Smrg         b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1403848b8605Smrg         g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1404848b8605Smrg         r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1405848b8605Smrg      }
1406848b8605Smrg      rgba[RCOMP] = r;
1407848b8605Smrg      rgba[GCOMP] = g;
1408848b8605Smrg      rgba[BCOMP] = b;
1409848b8605Smrg      rgba[ACOMP] = 255;
1410848b8605Smrg   }
1411848b8605Smrg}
1412848b8605Smrg
1413848b8605Smrg
1414848b8605Smrgstatic void
1415848b8605Smrgfxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1416848b8605Smrg{
1417848b8605Smrg   const GLuint *cc;
1418848b8605Smrg   GLuint kk;
1419848b8605Smrg
1420848b8605Smrg   cc = (const GLuint *)code;
1421848b8605Smrg   if (t & 16) {
1422848b8605Smrg      cc++;
1423848b8605Smrg      t &= 15;
1424848b8605Smrg   }
1425848b8605Smrg   t = (cc[0] >> (t * 2)) & 3;
1426848b8605Smrg
1427848b8605Smrg   t *= 15;
1428848b8605Smrg   cc = (const GLuint *)(code + 8 + t / 8);
1429848b8605Smrg   kk = cc[0] >> (t & 7);
1430848b8605Smrg   rgba[BCOMP] = UP5(kk);
1431848b8605Smrg   rgba[GCOMP] = UP5(kk >> 5);
1432848b8605Smrg   rgba[RCOMP] = UP5(kk >> 10);
1433848b8605Smrg   rgba[ACOMP] = 255;
1434848b8605Smrg}
1435848b8605Smrg
1436848b8605Smrg
1437848b8605Smrgstatic void
1438848b8605Smrgfxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1439848b8605Smrg{
1440848b8605Smrg   const GLuint *cc;
1441848b8605Smrg   GLuint col[2][3];
1442848b8605Smrg   GLint glsb, selb;
1443848b8605Smrg
1444848b8605Smrg   cc = (const GLuint *)code;
1445848b8605Smrg   if (t & 16) {
1446848b8605Smrg      t &= 15;
1447848b8605Smrg      t = (cc[1] >> (t * 2)) & 3;
1448848b8605Smrg      /* col 2 */
1449848b8605Smrg      col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1450848b8605Smrg      col[0][GCOMP] = CC_SEL(cc, 99);
1451848b8605Smrg      col[0][RCOMP] = CC_SEL(cc, 104);
1452848b8605Smrg      /* col 3 */
1453848b8605Smrg      col[1][BCOMP] = CC_SEL(cc, 109);
1454848b8605Smrg      col[1][GCOMP] = CC_SEL(cc, 114);
1455848b8605Smrg      col[1][RCOMP] = CC_SEL(cc, 119);
1456848b8605Smrg      glsb = CC_SEL(cc, 126);
1457848b8605Smrg      selb = CC_SEL(cc, 33);
1458848b8605Smrg   } else {
1459848b8605Smrg      t = (cc[0] >> (t * 2)) & 3;
1460848b8605Smrg      /* col 0 */
1461848b8605Smrg      col[0][BCOMP] = CC_SEL(cc, 64);
1462848b8605Smrg      col[0][GCOMP] = CC_SEL(cc, 69);
1463848b8605Smrg      col[0][RCOMP] = CC_SEL(cc, 74);
1464848b8605Smrg      /* col 1 */
1465848b8605Smrg      col[1][BCOMP] = CC_SEL(cc, 79);
1466848b8605Smrg      col[1][GCOMP] = CC_SEL(cc, 84);
1467848b8605Smrg      col[1][RCOMP] = CC_SEL(cc, 89);
1468848b8605Smrg      glsb = CC_SEL(cc, 125);
1469848b8605Smrg      selb = CC_SEL(cc, 1);
1470848b8605Smrg   }
1471848b8605Smrg
1472848b8605Smrg   if (CC_SEL(cc, 124) & 1) {
1473848b8605Smrg      /* alpha[0] == 1 */
1474848b8605Smrg
1475848b8605Smrg      if (t == 3) {
1476848b8605Smrg         /* zero */
1477848b8605Smrg         rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1478848b8605Smrg      } else {
1479848b8605Smrg         GLubyte r, g, b;
1480848b8605Smrg         if (t == 0) {
1481848b8605Smrg            b = UP5(col[0][BCOMP]);
1482848b8605Smrg            g = UP5(col[0][GCOMP]);
1483848b8605Smrg            r = UP5(col[0][RCOMP]);
1484848b8605Smrg         } else if (t == 2) {
1485848b8605Smrg            b = UP5(col[1][BCOMP]);
1486848b8605Smrg            g = UP6(col[1][GCOMP], glsb);
1487848b8605Smrg            r = UP5(col[1][RCOMP]);
1488848b8605Smrg         } else {
1489848b8605Smrg            b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1490848b8605Smrg            g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1491848b8605Smrg            r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1492848b8605Smrg         }
1493848b8605Smrg         rgba[RCOMP] = r;
1494848b8605Smrg         rgba[GCOMP] = g;
1495848b8605Smrg         rgba[BCOMP] = b;
1496848b8605Smrg         rgba[ACOMP] = 255;
1497848b8605Smrg      }
1498848b8605Smrg   } else {
1499848b8605Smrg      /* alpha[0] == 0 */
1500848b8605Smrg      GLubyte r, g, b;
1501848b8605Smrg      if (t == 0) {
1502848b8605Smrg         b = UP5(col[0][BCOMP]);
1503848b8605Smrg         g = UP6(col[0][GCOMP], glsb ^ selb);
1504848b8605Smrg         r = UP5(col[0][RCOMP]);
1505848b8605Smrg      } else if (t == 3) {
1506848b8605Smrg         b = UP5(col[1][BCOMP]);
1507848b8605Smrg         g = UP6(col[1][GCOMP], glsb);
1508848b8605Smrg         r = UP5(col[1][RCOMP]);
1509848b8605Smrg      } else {
1510848b8605Smrg         b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1511848b8605Smrg         g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1512848b8605Smrg                        UP6(col[1][GCOMP], glsb));
1513848b8605Smrg         r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1514848b8605Smrg      }
1515848b8605Smrg      rgba[RCOMP] = r;
1516848b8605Smrg      rgba[GCOMP] = g;
1517848b8605Smrg      rgba[BCOMP] = b;
1518848b8605Smrg      rgba[ACOMP] = 255;
1519848b8605Smrg   }
1520848b8605Smrg}
1521848b8605Smrg
1522848b8605Smrg
1523848b8605Smrgstatic void
1524848b8605Smrgfxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1525848b8605Smrg{
1526848b8605Smrg   const GLuint *cc;
1527848b8605Smrg   GLubyte r, g, b, a;
1528848b8605Smrg
1529848b8605Smrg   cc = (const GLuint *)code;
1530848b8605Smrg   if (CC_SEL(cc, 124) & 1) {
1531848b8605Smrg      /* lerp == 1 */
1532848b8605Smrg      GLuint col0[4];
1533848b8605Smrg
1534848b8605Smrg      if (t & 16) {
1535848b8605Smrg         t &= 15;
1536848b8605Smrg         t = (cc[1] >> (t * 2)) & 3;
1537848b8605Smrg         /* col 2 */
1538848b8605Smrg         col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1539848b8605Smrg         col0[GCOMP] = CC_SEL(cc, 99);
1540848b8605Smrg         col0[RCOMP] = CC_SEL(cc, 104);
1541848b8605Smrg         col0[ACOMP] = CC_SEL(cc, 119);
1542848b8605Smrg      } else {
1543848b8605Smrg         t = (cc[0] >> (t * 2)) & 3;
1544848b8605Smrg         /* col 0 */
1545848b8605Smrg         col0[BCOMP] = CC_SEL(cc, 64);
1546848b8605Smrg         col0[GCOMP] = CC_SEL(cc, 69);
1547848b8605Smrg         col0[RCOMP] = CC_SEL(cc, 74);
1548848b8605Smrg         col0[ACOMP] = CC_SEL(cc, 109);
1549848b8605Smrg      }
1550848b8605Smrg
1551848b8605Smrg      if (t == 0) {
1552848b8605Smrg         b = UP5(col0[BCOMP]);
1553848b8605Smrg         g = UP5(col0[GCOMP]);
1554848b8605Smrg         r = UP5(col0[RCOMP]);
1555848b8605Smrg         a = UP5(col0[ACOMP]);
1556848b8605Smrg      } else if (t == 3) {
1557848b8605Smrg         b = UP5(CC_SEL(cc, 79));
1558848b8605Smrg         g = UP5(CC_SEL(cc, 84));
1559848b8605Smrg         r = UP5(CC_SEL(cc, 89));
1560848b8605Smrg         a = UP5(CC_SEL(cc, 114));
1561848b8605Smrg      } else {
1562848b8605Smrg         b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1563848b8605Smrg         g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1564848b8605Smrg         r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1565848b8605Smrg         a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1566848b8605Smrg      }
1567848b8605Smrg   } else {
1568848b8605Smrg      /* lerp == 0 */
1569848b8605Smrg
1570848b8605Smrg      if (t & 16) {
1571848b8605Smrg         cc++;
1572848b8605Smrg         t &= 15;
1573848b8605Smrg      }
1574848b8605Smrg      t = (cc[0] >> (t * 2)) & 3;
1575848b8605Smrg
1576848b8605Smrg      if (t == 3) {
1577848b8605Smrg         /* zero */
1578848b8605Smrg         r = g = b = a = 0;
1579848b8605Smrg      } else {
1580848b8605Smrg         GLuint kk;
1581848b8605Smrg         cc = (const GLuint *)code;
1582848b8605Smrg         a = UP5(cc[3] >> (t * 5 + 13));
1583848b8605Smrg         t *= 15;
1584848b8605Smrg         cc = (const GLuint *)(code + 8 + t / 8);
1585848b8605Smrg         kk = cc[0] >> (t & 7);
1586848b8605Smrg         b = UP5(kk);
1587848b8605Smrg         g = UP5(kk >> 5);
1588848b8605Smrg         r = UP5(kk >> 10);
1589848b8605Smrg      }
1590848b8605Smrg   }
1591848b8605Smrg   rgba[RCOMP] = r;
1592848b8605Smrg   rgba[GCOMP] = g;
1593848b8605Smrg   rgba[BCOMP] = b;
1594848b8605Smrg   rgba[ACOMP] = a;
1595848b8605Smrg}
1596848b8605Smrg
1597848b8605Smrg
1598848b8605Smrgstatic void
1599848b8605Smrgfxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1600848b8605Smrg               GLint i, GLint j, GLubyte *rgba)
1601848b8605Smrg{
1602848b8605Smrg   static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1603848b8605Smrg      fxt1_decode_1HI,     /* cc-high   = "00?" */
1604848b8605Smrg      fxt1_decode_1HI,     /* cc-high   = "00?" */
1605848b8605Smrg      fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1606848b8605Smrg      fxt1_decode_1ALPHA,  /* alpha     = "011" */
1607848b8605Smrg      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1608848b8605Smrg      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1609848b8605Smrg      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1610848b8605Smrg      fxt1_decode_1MIXED   /* mixed     = "1??" */
1611848b8605Smrg   };
1612848b8605Smrg
1613848b8605Smrg   const GLubyte *code = (const GLubyte *)texture +
1614848b8605Smrg                         ((j / 4) * (stride / 8) + (i / 8)) * 16;
1615848b8605Smrg   GLint mode = CC_SEL(code, 125);
1616848b8605Smrg   GLint t = i & 7;
1617848b8605Smrg
1618848b8605Smrg   if (t & 4) {
1619848b8605Smrg      t += 12;
1620848b8605Smrg   }
1621848b8605Smrg   t += (j & 3) * 4;
1622848b8605Smrg
1623848b8605Smrg   decode_1[mode](code, t, rgba);
1624848b8605Smrg}
1625848b8605Smrg
1626848b8605Smrg
1627848b8605Smrg
1628848b8605Smrg
1629848b8605Smrgstatic void
1630848b8605Smrgfetch_rgb_fxt1(const GLubyte *map,
1631848b8605Smrg               GLint rowStride, GLint i, GLint j, GLfloat *texel)
1632848b8605Smrg{
1633848b8605Smrg   GLubyte rgba[4];
1634848b8605Smrg   fxt1_decode_1(map, rowStride, i, j, rgba);
1635848b8605Smrg   texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1636848b8605Smrg   texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1637848b8605Smrg   texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1638848b8605Smrg   texel[ACOMP] = 1.0F;
1639848b8605Smrg}
1640848b8605Smrg
1641848b8605Smrg
1642848b8605Smrgstatic void
1643848b8605Smrgfetch_rgba_fxt1(const GLubyte *map,
1644848b8605Smrg                GLint rowStride, GLint i, GLint j, GLfloat *texel)
1645848b8605Smrg{
1646848b8605Smrg   GLubyte rgba[4];
1647848b8605Smrg   fxt1_decode_1(map, rowStride, i, j, rgba);
1648848b8605Smrg   texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1649848b8605Smrg   texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1650848b8605Smrg   texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1651848b8605Smrg   texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1652848b8605Smrg}
1653848b8605Smrg
1654848b8605Smrg
1655848b8605Smrgcompressed_fetch_func
1656848b8605Smrg_mesa_get_fxt_fetch_func(mesa_format format)
1657848b8605Smrg{
1658848b8605Smrg   switch (format) {
1659848b8605Smrg   case MESA_FORMAT_RGB_FXT1:
1660848b8605Smrg      return fetch_rgb_fxt1;
1661848b8605Smrg   case MESA_FORMAT_RGBA_FXT1:
1662848b8605Smrg      return fetch_rgba_fxt1;
1663848b8605Smrg   default:
1664848b8605Smrg      return NULL;
1665848b8605Smrg   }
1666848b8605Smrg}
1667