1/*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
5 * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26
27#include "main/glheader.h"
28#include "main/context.h"
29
30#include "main/macros.h"
31#include "main/pixeltransfer.h"
32#include "main/samplerobj.h"
33#include "program/prog_instruction.h"
34
35#include "s_context.h"
36#include "s_texcombine.h"
37
38
39/**
40 * Pointer to array of float[4]
41 * This type makes the code below more concise and avoids a lot of casting.
42 */
43typedef float (*float4_array)[4];
44
45
46/**
47 * Return array of texels for given unit.
48 */
49static inline float4_array
50get_texel_array(SWcontext *swrast, GLuint unit)
51{
52#ifdef _OPENMP
53   return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4 * omp_get_num_threads() + (SWRAST_MAX_WIDTH * 4 * omp_get_thread_num()));
54#else
55   return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4);
56#endif
57}
58
59
60
61/**
62 * Do texture application for:
63 *  GL_EXT_texture_env_combine
64 *  GL_ARB_texture_env_combine
65 *  GL_EXT_texture_env_dot3
66 *  GL_ARB_texture_env_dot3
67 *  GL_ATI_texture_env_combine3
68 *  GL_NV_texture_env_combine4
69 *  conventional GL texture env modes
70 *
71 * \param ctx          rendering context
72 * \param unit         the texture combiner unit
73 * \param primary_rgba incoming fragment color array
74 * \param texelBuffer  pointer to texel colors for all texture units
75 *
76 * \param span         two fields are used in this function:
77 *                       span->end: number of fragments to process
78 *                       span->array->rgba: incoming/result fragment colors
79 */
80static void
81texture_combine( struct gl_context *ctx, GLuint unit,
82                 const float4_array primary_rgba,
83                 const GLfloat *texelBuffer,
84                 SWspan *span )
85{
86   SWcontext *swrast = SWRAST_CONTEXT(ctx);
87   const struct gl_fixedfunc_texture_unit *textureUnit =
88      &ctx->Texture.FixedFuncUnit[unit];
89   const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine;
90   float4_array argRGB[MAX_COMBINER_TERMS];
91   float4_array argA[MAX_COMBINER_TERMS];
92   const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB);
93   const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA);
94   const GLuint numArgsRGB = combine->_NumArgsRGB;
95   const GLuint numArgsA = combine->_NumArgsA;
96   float4_array ccolor[4], rgba;
97   GLuint i, term;
98   GLuint n = span->end;
99   GLchan (*rgbaChan)[4] = span->array->rgba;
100
101   /* alloc temp pixel buffers */
102   rgba = malloc(4 * n * sizeof(GLfloat));
103   if (!rgba) {
104      _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
105      return;
106   }
107
108   for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
109      ccolor[i] = malloc(4 * n * sizeof(GLfloat));
110      if (!ccolor[i]) {
111         while (i) {
112            free(ccolor[i]);
113            i--;
114         }
115         _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
116         free(rgba);
117         return;
118      }
119   }
120
121   for (i = 0; i < n; i++) {
122      rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]);
123      rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]);
124      rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]);
125      rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]);
126   }
127
128   /*
129   printf("modeRGB 0x%x  modeA 0x%x  srcRGB1 0x%x  srcA1 0x%x  srcRGB2 0x%x  srcA2 0x%x\n",
130          combine->ModeRGB,
131          combine->ModeA,
132          combine->SourceRGB[0],
133          combine->SourceA[0],
134          combine->SourceRGB[1],
135          combine->SourceA[1]);
136   */
137
138   /*
139    * Do operand setup for up to 4 operands.  Loop over the terms.
140    */
141   for (term = 0; term < numArgsRGB; term++) {
142      const GLenum srcRGB = combine->SourceRGB[term];
143      const GLenum operandRGB = combine->OperandRGB[term];
144
145      switch (srcRGB) {
146         case GL_TEXTURE:
147            argRGB[term] = get_texel_array(swrast, unit);
148            break;
149         case GL_PRIMARY_COLOR:
150            argRGB[term] = primary_rgba;
151            break;
152         case GL_PREVIOUS:
153            argRGB[term] = rgba;
154            break;
155         case GL_CONSTANT:
156            {
157               float4_array c = ccolor[term];
158               GLfloat red   = textureUnit->EnvColor[0];
159               GLfloat green = textureUnit->EnvColor[1];
160               GLfloat blue  = textureUnit->EnvColor[2];
161               GLfloat alpha = textureUnit->EnvColor[3];
162               for (i = 0; i < n; i++) {
163                  ASSIGN_4V(c[i], red, green, blue, alpha);
164               }
165               argRGB[term] = ccolor[term];
166            }
167            break;
168	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
169	  */
170	 case GL_ZERO:
171            {
172               float4_array c = ccolor[term];
173               for (i = 0; i < n; i++) {
174                  ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F);
175               }
176               argRGB[term] = ccolor[term];
177            }
178            break;
179	 case GL_ONE:
180            {
181               float4_array c = ccolor[term];
182               for (i = 0; i < n; i++) {
183                  ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F);
184               }
185               argRGB[term] = ccolor[term];
186            }
187            break;
188         default:
189            /* ARB_texture_env_crossbar source */
190            {
191               const GLuint srcUnit = srcRGB - GL_TEXTURE0;
192               assert(srcUnit < ctx->Const.MaxTextureUnits);
193               if (!ctx->Texture.Unit[srcUnit]._Current)
194                  goto end;
195               argRGB[term] = get_texel_array(swrast, srcUnit);
196            }
197      }
198
199      if (operandRGB != GL_SRC_COLOR) {
200         float4_array src = argRGB[term];
201         float4_array dst = ccolor[term];
202
203         /* point to new arg[term] storage */
204         argRGB[term] = ccolor[term];
205
206         switch (operandRGB) {
207         case GL_ONE_MINUS_SRC_COLOR:
208            for (i = 0; i < n; i++) {
209               dst[i][RCOMP] = 1.0F - src[i][RCOMP];
210               dst[i][GCOMP] = 1.0F - src[i][GCOMP];
211               dst[i][BCOMP] = 1.0F - src[i][BCOMP];
212            }
213            break;
214         case GL_SRC_ALPHA:
215            for (i = 0; i < n; i++) {
216               dst[i][RCOMP] =
217               dst[i][GCOMP] =
218               dst[i][BCOMP] = src[i][ACOMP];
219            }
220            break;
221         case GL_ONE_MINUS_SRC_ALPHA:
222            for (i = 0; i < n; i++) {
223               dst[i][RCOMP] =
224               dst[i][GCOMP] =
225               dst[i][BCOMP] = 1.0F - src[i][ACOMP];
226            }
227            break;
228         default:
229            _mesa_problem(ctx, "Bad operandRGB");
230         }
231      }
232   }
233
234   /*
235    * Set up the argA[term] pointers
236    */
237   for (term = 0; term < numArgsA; term++) {
238      const GLenum srcA = combine->SourceA[term];
239      const GLenum operandA = combine->OperandA[term];
240
241      switch (srcA) {
242         case GL_TEXTURE:
243            argA[term] = get_texel_array(swrast, unit);
244            break;
245         case GL_PRIMARY_COLOR:
246            argA[term] = primary_rgba;
247            break;
248         case GL_PREVIOUS:
249            argA[term] = rgba;
250            break;
251         case GL_CONSTANT:
252            {
253               float4_array c = ccolor[term];
254               GLfloat alpha = textureUnit->EnvColor[3];
255               for (i = 0; i < n; i++)
256                  c[i][ACOMP] = alpha;
257               argA[term] = ccolor[term];
258            }
259            break;
260	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
261	  */
262	 case GL_ZERO:
263            {
264               float4_array c = ccolor[term];
265               for (i = 0; i < n; i++)
266                  c[i][ACOMP] = 0.0F;
267               argA[term] = ccolor[term];
268            }
269            break;
270	 case GL_ONE:
271            {
272               float4_array c = ccolor[term];
273               for (i = 0; i < n; i++)
274                  c[i][ACOMP] = 1.0F;
275               argA[term] = ccolor[term];
276            }
277            break;
278         default:
279            /* ARB_texture_env_crossbar source */
280            {
281               const GLuint srcUnit = srcA - GL_TEXTURE0;
282               assert(srcUnit < ctx->Const.MaxTextureUnits);
283               if (!ctx->Texture.Unit[srcUnit]._Current)
284                  goto end;
285               argA[term] = get_texel_array(swrast, srcUnit);
286            }
287      }
288
289      if (operandA == GL_ONE_MINUS_SRC_ALPHA) {
290         float4_array src = argA[term];
291         float4_array dst = ccolor[term];
292         argA[term] = ccolor[term];
293         for (i = 0; i < n; i++) {
294            dst[i][ACOMP] = 1.0F - src[i][ACOMP];
295         }
296      }
297   }
298
299   /* RGB channel combine */
300   {
301      float4_array arg0 = argRGB[0];
302      float4_array arg1 = argRGB[1];
303      float4_array arg2 = argRGB[2];
304      float4_array arg3 = argRGB[3];
305
306      switch (combine->ModeRGB) {
307      case GL_REPLACE:
308         for (i = 0; i < n; i++) {
309            rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB;
310            rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB;
311            rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB;
312         }
313         break;
314      case GL_MODULATE:
315         for (i = 0; i < n; i++) {
316            rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB;
317            rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB;
318            rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB;
319         }
320         break;
321      case GL_ADD:
322         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
323            /* (a * b) + (c * d) */
324            for (i = 0; i < n; i++) {
325               rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
326                                 arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB;
327               rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
328                                 arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB;
329               rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
330                                 arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB;
331            }
332         }
333         else {
334            /* 2-term addition */
335            for (i = 0; i < n; i++) {
336               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB;
337               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB;
338               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB;
339            }
340         }
341         break;
342      case GL_ADD_SIGNED:
343         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
344            /* (a * b) + (c * d) - 0.5 */
345            for (i = 0; i < n; i++) {
346               rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
347                                 arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB;
348               rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
349                                 arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB;
350               rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
351                                 arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB;
352            }
353         }
354         else {
355            for (i = 0; i < n; i++) {
356               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB;
357               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB;
358               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB;
359            }
360         }
361         break;
362      case GL_INTERPOLATE:
363         for (i = 0; i < n; i++) {
364            rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
365                          arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB;
366            rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
367                          arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB;
368            rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
369                          arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB;
370         }
371         break;
372      case GL_SUBTRACT:
373         for (i = 0; i < n; i++) {
374            rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB;
375            rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB;
376            rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB;
377         }
378         break;
379      case GL_DOT3_RGB_EXT:
380      case GL_DOT3_RGBA_EXT:
381         /* Do not scale the result by 1 2 or 4 */
382         for (i = 0; i < n; i++) {
383            GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
384                           (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
385                           (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
386               * 4.0F;
387            dot = CLAMP(dot, 0.0F, 1.0F);
388            rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
389         }
390         break;
391      case GL_DOT3_RGB:
392      case GL_DOT3_RGBA:
393         /* DO scale the result by 1 2 or 4 */
394         for (i = 0; i < n; i++) {
395            GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
396                           (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
397                           (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
398               * 4.0F * scaleRGB;
399            dot = CLAMP(dot, 0.0F, 1.0F);
400            rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
401         }
402         break;
403      case GL_MODULATE_ADD_ATI:
404         for (i = 0; i < n; i++) {
405            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
406                              arg1[i][RCOMP]) * scaleRGB;
407            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
408                              arg1[i][GCOMP]) * scaleRGB;
409            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
410                              arg1[i][BCOMP]) * scaleRGB;
411	 }
412         break;
413      case GL_MODULATE_SIGNED_ADD_ATI:
414         for (i = 0; i < n; i++) {
415            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
416                              arg1[i][RCOMP] - 0.5F) * scaleRGB;
417            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
418                              arg1[i][GCOMP] - 0.5F) * scaleRGB;
419            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
420                              arg1[i][BCOMP] - 0.5F) * scaleRGB;
421	 }
422         break;
423      case GL_MODULATE_SUBTRACT_ATI:
424         for (i = 0; i < n; i++) {
425            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) -
426                              arg1[i][RCOMP]) * scaleRGB;
427            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) -
428                              arg1[i][GCOMP]) * scaleRGB;
429            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) -
430                              arg1[i][BCOMP]) * scaleRGB;
431	 }
432         break;
433      default:
434         _mesa_problem(ctx, "invalid combine mode");
435      }
436   }
437
438   /* Alpha channel combine */
439   {
440      float4_array arg0 = argA[0];
441      float4_array arg1 = argA[1];
442      float4_array arg2 = argA[2];
443      float4_array arg3 = argA[3];
444
445      switch (combine->ModeA) {
446      case GL_REPLACE:
447         for (i = 0; i < n; i++) {
448            rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA;
449         }
450         break;
451      case GL_MODULATE:
452         for (i = 0; i < n; i++) {
453            rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA;
454         }
455         break;
456      case GL_ADD:
457         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
458            /* (a * b) + (c * d) */
459            for (i = 0; i < n; i++) {
460               rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
461                                 arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA;
462            }
463         }
464         else {
465            /* two-term add */
466            for (i = 0; i < n; i++) {
467               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA;
468            }
469         }
470         break;
471      case GL_ADD_SIGNED:
472         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
473            /* (a * b) + (c * d) - 0.5 */
474            for (i = 0; i < n; i++) {
475               rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
476                                 arg2[i][ACOMP] * arg3[i][ACOMP] -
477                                 0.5F) * scaleA;
478            }
479         }
480         else {
481            /* a + b - 0.5 */
482            for (i = 0; i < n; i++) {
483               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA;
484            }
485         }
486         break;
487      case GL_INTERPOLATE:
488         for (i = 0; i < n; i++) {
489            rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
490                              arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP]))
491               * scaleA;
492         }
493         break;
494      case GL_SUBTRACT:
495         for (i = 0; i < n; i++) {
496            rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA;
497         }
498         break;
499      case GL_MODULATE_ADD_ATI:
500         for (i = 0; i < n; i++) {
501            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
502                              + arg1[i][ACOMP]) * scaleA;
503         }
504         break;
505      case GL_MODULATE_SIGNED_ADD_ATI:
506         for (i = 0; i < n; i++) {
507            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) +
508                              arg1[i][ACOMP] - 0.5F) * scaleA;
509         }
510         break;
511      case GL_MODULATE_SUBTRACT_ATI:
512         for (i = 0; i < n; i++) {
513            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
514                              - arg1[i][ACOMP]) * scaleA;
515         }
516         break;
517      default:
518         _mesa_problem(ctx, "invalid combine mode");
519      }
520   }
521
522   /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining.
523    * This is kind of a kludge.  It would have been better if the spec
524    * were written such that the GL_COMBINE_ALPHA value could be set to
525    * GL_DOT3.
526    */
527   if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
528       combine->ModeRGB == GL_DOT3_RGBA) {
529      for (i = 0; i < n; i++) {
530	 rgba[i][ACOMP] = rgba[i][RCOMP];
531      }
532   }
533
534   for (i = 0; i < n; i++) {
535      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]);
536      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]);
537      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]);
538      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]);
539   }
540   /* The span->array->rgba values are of CHAN type so set
541    * span->array->ChanType field accordingly.
542    */
543   span->array->ChanType = CHAN_TYPE;
544
545end:
546   for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
547      free(ccolor[i]);
548   }
549   free(rgba);
550}
551
552
553/**
554 * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels.
555 * See GL_EXT_texture_swizzle.
556 */
557static void
558swizzle_texels(GLuint swizzle, GLuint count, float4_array texels)
559{
560   const GLuint swzR = GET_SWZ(swizzle, 0);
561   const GLuint swzG = GET_SWZ(swizzle, 1);
562   const GLuint swzB = GET_SWZ(swizzle, 2);
563   const GLuint swzA = GET_SWZ(swizzle, 3);
564   GLfloat vector[6];
565   GLuint i;
566
567   vector[SWIZZLE_ZERO] = 0;
568   vector[SWIZZLE_ONE] = 1.0F;
569
570   for (i = 0; i < count; i++) {
571      vector[SWIZZLE_X] = texels[i][0];
572      vector[SWIZZLE_Y] = texels[i][1];
573      vector[SWIZZLE_Z] = texels[i][2];
574      vector[SWIZZLE_W] = texels[i][3];
575      texels[i][RCOMP] = vector[swzR];
576      texels[i][GCOMP] = vector[swzG];
577      texels[i][BCOMP] = vector[swzB];
578      texels[i][ACOMP] = vector[swzA];
579   }
580}
581
582
583/**
584 * Apply texture mapping to a span of fragments.
585 */
586void
587_swrast_texture_span( struct gl_context *ctx, SWspan *span )
588{
589   SWcontext *swrast = SWRAST_CONTEXT(ctx);
590   float4_array primary_rgba;
591   GLuint unit;
592
593   if (!swrast->TexelBuffer) {
594#ifdef _OPENMP
595      const GLint maxThreads = omp_get_max_threads();
596
597      /* TexelBuffer memory allocation needs to be done in a critical section
598       * as this code runs in a parallel loop.
599       * When entering the section, first check if TexelBuffer has been
600       * initialized already by another thread while this thread was waiting.
601       */
602      #pragma omp critical
603      if (!swrast->TexelBuffer) {
604#else
605      const GLint maxThreads = 1;
606#endif
607
608      /* TexelBuffer is also global and normally shared by all SWspan
609       * instances; when running with multiple threads, create one per
610       * thread.
611       */
612      swrast->TexelBuffer =
613	 malloc(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits * maxThreads *
614			    SWRAST_MAX_WIDTH * 4 * sizeof(GLfloat));
615#ifdef _OPENMP
616      } /* critical section */
617#endif
618
619      if (!swrast->TexelBuffer) {
620	 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
621	 return;
622      }
623   }
624
625   primary_rgba = malloc(span->end * 4 * sizeof(GLfloat));
626
627   if (!primary_rgba) {
628      _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span");
629      return;
630   }
631
632   assert(span->end <= SWRAST_MAX_WIDTH);
633
634   /*
635    * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR)
636    */
637   if (swrast->_TextureCombinePrimary) {
638      GLuint i;
639      for (i = 0; i < span->end; i++) {
640         primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
641         primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
642         primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
643         primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
644      }
645   }
646
647   /*
648    * Must do all texture sampling before combining in order to
649    * accommodate GL_ARB_texture_env_crossbar.
650    */
651   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
652      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
653      if (texUnit->_Current) {
654         const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
655            span->array->attribs[VARYING_SLOT_TEX0 + unit];
656         const struct gl_texture_object *curObj = texUnit->_Current;
657         const struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, unit);
658         GLfloat *lambda = span->array->lambda[unit];
659         float4_array texels = get_texel_array(swrast, unit);
660
661         /* adjust texture lod (lambda) */
662         if (span->arrayMask & SPAN_LAMBDA) {
663            if (texUnit->LodBias + samp->Attrib.LodBias != 0.0F) {
664               /* apply LOD bias, but don't clamp yet */
665               const GLfloat bias = CLAMP(texUnit->LodBias + samp->Attrib.LodBias,
666                                          -ctx->Const.MaxTextureLodBias,
667                                          ctx->Const.MaxTextureLodBias);
668               GLuint i;
669               for (i = 0; i < span->end; i++) {
670                  lambda[i] += bias;
671               }
672            }
673
674            if (samp->Attrib.MinLod != -1000.0F ||
675                samp->Attrib.MaxLod != 1000.0F) {
676               /* apply LOD clamping to lambda */
677               const GLfloat min = samp->Attrib.MinLod;
678               const GLfloat max = samp->Attrib.MaxLod;
679               GLuint i;
680               for (i = 0; i < span->end; i++) {
681                  GLfloat l = lambda[i];
682                  lambda[i] = CLAMP(l, min, max);
683               }
684            }
685         }
686         else if (samp->Attrib.MaxAnisotropy > 1.0F &&
687                  samp->Attrib.MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
688            /* sample_lambda_2d_aniso is beeing used as texture_sample_func,
689             * it requires the current SWspan *span as an additional parameter.
690             * In order to keep the same function signature, the unused lambda
691             * parameter will be modified to actually contain the SWspan pointer.
692             * This is a Hack. To make it right, the texture_sample_func
693             * signature and all implementing functions need to be modified.
694             */
695            /* "hide" SWspan struct; cast to (GLfloat *) to suppress warning */
696            lambda = (GLfloat *)span;
697         }
698
699         /* Sample the texture (span->end = number of fragments) */
700         swrast->TextureSample[unit]( ctx, samp,
701                                      ctx->Texture.Unit[unit]._Current,
702                                      span->end, texcoords, lambda, texels );
703
704         /* GL_EXT_texture_swizzle */
705         if (curObj->Attrib._Swizzle != SWIZZLE_NOOP) {
706            swizzle_texels(curObj->Attrib._Swizzle, span->end, texels);
707         }
708      }
709   }
710
711   /*
712    * OK, now apply the texture (aka texture combine/blend).
713    * We modify the span->color.rgba values.
714    */
715   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
716      if (ctx->Texture.Unit[unit]._Current)
717         texture_combine(ctx, unit, primary_rgba, swrast->TexelBuffer, span);
718   }
719
720   free(primary_rgba);
721}
722