17117f1b4Smrg/*
27117f1b4Smrg * Mesa 3-D graphics library
37117f1b4Smrg *
44a49301eSmrg * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
54a49301eSmrg * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
67117f1b4Smrg *
77117f1b4Smrg * Permission is hereby granted, free of charge, to any person obtaining a
87117f1b4Smrg * copy of this software and associated documentation files (the "Software"),
97117f1b4Smrg * to deal in the Software without restriction, including without limitation
107117f1b4Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
117117f1b4Smrg * and/or sell copies of the Software, and to permit persons to whom the
127117f1b4Smrg * Software is furnished to do so, subject to the following conditions:
137117f1b4Smrg *
147117f1b4Smrg * The above copyright notice and this permission notice shall be included
157117f1b4Smrg * in all copies or substantial portions of the Software.
167117f1b4Smrg *
177117f1b4Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
187117f1b4Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
197117f1b4Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20af69d88dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21af69d88dSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22af69d88dSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23af69d88dSmrg * OTHER DEALINGS IN THE SOFTWARE.
247117f1b4Smrg */
257117f1b4Smrg
267117f1b4Smrg
27c1f859d4Smrg#include "main/glheader.h"
28c1f859d4Smrg#include "main/context.h"
297ec681f3Smrg
3001e04c3fSmrg#include "main/macros.h"
313464ebd5Sriastradh#include "main/pixeltransfer.h"
32af69d88dSmrg#include "main/samplerobj.h"
333464ebd5Sriastradh#include "program/prog_instruction.h"
347117f1b4Smrg
357117f1b4Smrg#include "s_context.h"
367117f1b4Smrg#include "s_texcombine.h"
377117f1b4Smrg
387117f1b4Smrg
394a49301eSmrg/**
404a49301eSmrg * Pointer to array of float[4]
414a49301eSmrg * This type makes the code below more concise and avoids a lot of casting.
424a49301eSmrg */
434a49301eSmrgtypedef float (*float4_array)[4];
444a49301eSmrg
454a49301eSmrg
464a49301eSmrg/**
474a49301eSmrg * Return array of texels for given unit.
484a49301eSmrg */
49af69d88dSmrgstatic inline float4_array
504a49301eSmrgget_texel_array(SWcontext *swrast, GLuint unit)
514a49301eSmrg{
52af69d88dSmrg#ifdef _OPENMP
53af69d88dSmrg   return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4 * omp_get_num_threads() + (SWRAST_MAX_WIDTH * 4 * omp_get_thread_num()));
54af69d88dSmrg#else
55af69d88dSmrg   return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4);
56af69d88dSmrg#endif
574a49301eSmrg}
584a49301eSmrg
597117f1b4Smrg
607117f1b4Smrg
617117f1b4Smrg/**
624a49301eSmrg * Do texture application for:
634a49301eSmrg *  GL_EXT_texture_env_combine
644a49301eSmrg *  GL_ARB_texture_env_combine
654a49301eSmrg *  GL_EXT_texture_env_dot3
664a49301eSmrg *  GL_ARB_texture_env_dot3
674a49301eSmrg *  GL_ATI_texture_env_combine3
684a49301eSmrg *  GL_NV_texture_env_combine4
694a49301eSmrg *  conventional GL texture env modes
707117f1b4Smrg *
717117f1b4Smrg * \param ctx          rendering context
724a49301eSmrg * \param unit         the texture combiner unit
737117f1b4Smrg * \param primary_rgba incoming fragment color array
747117f1b4Smrg * \param texelBuffer  pointer to texel colors for all texture units
757ec681f3Smrg *
76af69d88dSmrg * \param span         two fields are used in this function:
77af69d88dSmrg *                       span->end: number of fragments to process
78af69d88dSmrg *                       span->array->rgba: incoming/result fragment colors
797117f1b4Smrg */
807117f1b4Smrgstatic void
81af69d88dSmrgtexture_combine( struct gl_context *ctx, GLuint unit,
824a49301eSmrg                 const float4_array primary_rgba,
834a49301eSmrg                 const GLfloat *texelBuffer,
84af69d88dSmrg                 SWspan *span )
857117f1b4Smrg{
864a49301eSmrg   SWcontext *swrast = SWRAST_CONTEXT(ctx);
8701e04c3fSmrg   const struct gl_fixedfunc_texture_unit *textureUnit =
8801e04c3fSmrg      &ctx->Texture.FixedFuncUnit[unit];
894a49301eSmrg   const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine;
904a49301eSmrg   float4_array argRGB[MAX_COMBINER_TERMS];
914a49301eSmrg   float4_array argA[MAX_COMBINER_TERMS];
924a49301eSmrg   const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB);
934a49301eSmrg   const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA);
944a49301eSmrg   const GLuint numArgsRGB = combine->_NumArgsRGB;
954a49301eSmrg   const GLuint numArgsA = combine->_NumArgsA;
963464ebd5Sriastradh   float4_array ccolor[4], rgba;
974a49301eSmrg   GLuint i, term;
98af69d88dSmrg   GLuint n = span->end;
99af69d88dSmrg   GLchan (*rgbaChan)[4] = span->array->rgba;
1004a49301eSmrg
1013464ebd5Sriastradh   /* alloc temp pixel buffers */
102af69d88dSmrg   rgba = malloc(4 * n * sizeof(GLfloat));
1033464ebd5Sriastradh   if (!rgba) {
1043464ebd5Sriastradh      _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
1053464ebd5Sriastradh      return;
1063464ebd5Sriastradh   }
1073464ebd5Sriastradh
1083464ebd5Sriastradh   for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
109af69d88dSmrg      ccolor[i] = malloc(4 * n * sizeof(GLfloat));
1103464ebd5Sriastradh      if (!ccolor[i]) {
1113464ebd5Sriastradh         while (i) {
1123464ebd5Sriastradh            free(ccolor[i]);
1133464ebd5Sriastradh            i--;
1143464ebd5Sriastradh         }
1153464ebd5Sriastradh         _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
116af69d88dSmrg         free(rgba);
1173464ebd5Sriastradh         return;
1183464ebd5Sriastradh      }
1193464ebd5Sriastradh   }
1203464ebd5Sriastradh
1214a49301eSmrg   for (i = 0; i < n; i++) {
1224a49301eSmrg      rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]);
1234a49301eSmrg      rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]);
1244a49301eSmrg      rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]);
1254a49301eSmrg      rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]);
1264a49301eSmrg   }
1277117f1b4Smrg
1287117f1b4Smrg   /*
1297117f1b4Smrg   printf("modeRGB 0x%x  modeA 0x%x  srcRGB1 0x%x  srcA1 0x%x  srcRGB2 0x%x  srcA2 0x%x\n",
1304a49301eSmrg          combine->ModeRGB,
1314a49301eSmrg          combine->ModeA,
1324a49301eSmrg          combine->SourceRGB[0],
1334a49301eSmrg          combine->SourceA[0],
1344a49301eSmrg          combine->SourceRGB[1],
1354a49301eSmrg          combine->SourceA[1]);
1367117f1b4Smrg   */
1377117f1b4Smrg
1387117f1b4Smrg   /*
1394a49301eSmrg    * Do operand setup for up to 4 operands.  Loop over the terms.
1407117f1b4Smrg    */
1414a49301eSmrg   for (term = 0; term < numArgsRGB; term++) {
1424a49301eSmrg      const GLenum srcRGB = combine->SourceRGB[term];
1434a49301eSmrg      const GLenum operandRGB = combine->OperandRGB[term];
1447117f1b4Smrg
1457117f1b4Smrg      switch (srcRGB) {
1467117f1b4Smrg         case GL_TEXTURE:
1474a49301eSmrg            argRGB[term] = get_texel_array(swrast, unit);
1487117f1b4Smrg            break;
1497117f1b4Smrg         case GL_PRIMARY_COLOR:
1504a49301eSmrg            argRGB[term] = primary_rgba;
1517117f1b4Smrg            break;
1527117f1b4Smrg         case GL_PREVIOUS:
1534a49301eSmrg            argRGB[term] = rgba;
1547117f1b4Smrg            break;
1557117f1b4Smrg         case GL_CONSTANT:
1567117f1b4Smrg            {
1574a49301eSmrg               float4_array c = ccolor[term];
1584a49301eSmrg               GLfloat red   = textureUnit->EnvColor[0];
1594a49301eSmrg               GLfloat green = textureUnit->EnvColor[1];
1604a49301eSmrg               GLfloat blue  = textureUnit->EnvColor[2];
1614a49301eSmrg               GLfloat alpha = textureUnit->EnvColor[3];
1627117f1b4Smrg               for (i = 0; i < n; i++) {
1634a49301eSmrg                  ASSIGN_4V(c[i], red, green, blue, alpha);
1647117f1b4Smrg               }
1654a49301eSmrg               argRGB[term] = ccolor[term];
1667117f1b4Smrg            }
1677117f1b4Smrg            break;
1687117f1b4Smrg	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
1697117f1b4Smrg	  */
1707117f1b4Smrg	 case GL_ZERO:
1714a49301eSmrg            {
1724a49301eSmrg               float4_array c = ccolor[term];
1734a49301eSmrg               for (i = 0; i < n; i++) {
1744a49301eSmrg                  ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F);
1754a49301eSmrg               }
1764a49301eSmrg               argRGB[term] = ccolor[term];
1774a49301eSmrg            }
1787117f1b4Smrg            break;
1797117f1b4Smrg	 case GL_ONE:
1804a49301eSmrg            {
1814a49301eSmrg               float4_array c = ccolor[term];
1824a49301eSmrg               for (i = 0; i < n; i++) {
1834a49301eSmrg                  ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F);
1844a49301eSmrg               }
1854a49301eSmrg               argRGB[term] = ccolor[term];
1864a49301eSmrg            }
1877117f1b4Smrg            break;
1887117f1b4Smrg         default:
1897117f1b4Smrg            /* ARB_texture_env_crossbar source */
1907117f1b4Smrg            {
1917117f1b4Smrg               const GLuint srcUnit = srcRGB - GL_TEXTURE0;
19201e04c3fSmrg               assert(srcUnit < ctx->Const.MaxTextureUnits);
193af69d88dSmrg               if (!ctx->Texture.Unit[srcUnit]._Current)
1943464ebd5Sriastradh                  goto end;
1954a49301eSmrg               argRGB[term] = get_texel_array(swrast, srcUnit);
1967117f1b4Smrg            }
1977117f1b4Smrg      }
1987117f1b4Smrg
1994a49301eSmrg      if (operandRGB != GL_SRC_COLOR) {
2004a49301eSmrg         float4_array src = argRGB[term];
2014a49301eSmrg         float4_array dst = ccolor[term];
2027117f1b4Smrg
2034a49301eSmrg         /* point to new arg[term] storage */
2044a49301eSmrg         argRGB[term] = ccolor[term];
2057117f1b4Smrg
2064a49301eSmrg         switch (operandRGB) {
2074a49301eSmrg         case GL_ONE_MINUS_SRC_COLOR:
2087117f1b4Smrg            for (i = 0; i < n; i++) {
2094a49301eSmrg               dst[i][RCOMP] = 1.0F - src[i][RCOMP];
2104a49301eSmrg               dst[i][GCOMP] = 1.0F - src[i][GCOMP];
2114a49301eSmrg               dst[i][BCOMP] = 1.0F - src[i][BCOMP];
2127117f1b4Smrg            }
2134a49301eSmrg            break;
2144a49301eSmrg         case GL_SRC_ALPHA:
2157117f1b4Smrg            for (i = 0; i < n; i++) {
2164a49301eSmrg               dst[i][RCOMP] =
2174a49301eSmrg               dst[i][GCOMP] =
2187117f1b4Smrg               dst[i][BCOMP] = src[i][ACOMP];
2197117f1b4Smrg            }
2204a49301eSmrg            break;
2214a49301eSmrg         case GL_ONE_MINUS_SRC_ALPHA:
2227117f1b4Smrg            for (i = 0; i < n; i++) {
2234a49301eSmrg               dst[i][RCOMP] =
2244a49301eSmrg               dst[i][GCOMP] =
2254a49301eSmrg               dst[i][BCOMP] = 1.0F - src[i][ACOMP];
2267117f1b4Smrg            }
2274a49301eSmrg            break;
2284a49301eSmrg         default:
2294a49301eSmrg            _mesa_problem(ctx, "Bad operandRGB");
2307117f1b4Smrg         }
2317117f1b4Smrg      }
2327117f1b4Smrg   }
2337117f1b4Smrg
2347117f1b4Smrg   /*
2354a49301eSmrg    * Set up the argA[term] pointers
2367117f1b4Smrg    */
2374a49301eSmrg   for (term = 0; term < numArgsA; term++) {
2384a49301eSmrg      const GLenum srcA = combine->SourceA[term];
2394a49301eSmrg      const GLenum operandA = combine->OperandA[term];
2407117f1b4Smrg
2417117f1b4Smrg      switch (srcA) {
2427117f1b4Smrg         case GL_TEXTURE:
2434a49301eSmrg            argA[term] = get_texel_array(swrast, unit);
2447117f1b4Smrg            break;
2457117f1b4Smrg         case GL_PRIMARY_COLOR:
2464a49301eSmrg            argA[term] = primary_rgba;
2477117f1b4Smrg            break;
2487117f1b4Smrg         case GL_PREVIOUS:
2494a49301eSmrg            argA[term] = rgba;
2507117f1b4Smrg            break;
2517117f1b4Smrg         case GL_CONSTANT:
2527117f1b4Smrg            {
2534a49301eSmrg               float4_array c = ccolor[term];
2544a49301eSmrg               GLfloat alpha = textureUnit->EnvColor[3];
2557117f1b4Smrg               for (i = 0; i < n; i++)
2567117f1b4Smrg                  c[i][ACOMP] = alpha;
2574a49301eSmrg               argA[term] = ccolor[term];
2587117f1b4Smrg            }
2597117f1b4Smrg            break;
2607117f1b4Smrg	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
2617117f1b4Smrg	  */
2627117f1b4Smrg	 case GL_ZERO:
2634a49301eSmrg            {
2644a49301eSmrg               float4_array c = ccolor[term];
2654a49301eSmrg               for (i = 0; i < n; i++)
2664a49301eSmrg                  c[i][ACOMP] = 0.0F;
2674a49301eSmrg               argA[term] = ccolor[term];
2684a49301eSmrg            }
2697117f1b4Smrg            break;
2707117f1b4Smrg	 case GL_ONE:
2714a49301eSmrg            {
2724a49301eSmrg               float4_array c = ccolor[term];
2734a49301eSmrg               for (i = 0; i < n; i++)
2744a49301eSmrg                  c[i][ACOMP] = 1.0F;
2754a49301eSmrg               argA[term] = ccolor[term];
2764a49301eSmrg            }
2777117f1b4Smrg            break;
2787117f1b4Smrg         default:
2797117f1b4Smrg            /* ARB_texture_env_crossbar source */
2807117f1b4Smrg            {
2817117f1b4Smrg               const GLuint srcUnit = srcA - GL_TEXTURE0;
28201e04c3fSmrg               assert(srcUnit < ctx->Const.MaxTextureUnits);
283af69d88dSmrg               if (!ctx->Texture.Unit[srcUnit]._Current)
2843464ebd5Sriastradh                  goto end;
2854a49301eSmrg               argA[term] = get_texel_array(swrast, srcUnit);
2867117f1b4Smrg            }
2877117f1b4Smrg      }
2887117f1b4Smrg
2894a49301eSmrg      if (operandA == GL_ONE_MINUS_SRC_ALPHA) {
2904a49301eSmrg         float4_array src = argA[term];
2914a49301eSmrg         float4_array dst = ccolor[term];
2924a49301eSmrg         argA[term] = ccolor[term];
2937117f1b4Smrg         for (i = 0; i < n; i++) {
2944a49301eSmrg            dst[i][ACOMP] = 1.0F - src[i][ACOMP];
2957117f1b4Smrg         }
2967117f1b4Smrg      }
2977117f1b4Smrg   }
2987117f1b4Smrg
2994a49301eSmrg   /* RGB channel combine */
3004a49301eSmrg   {
3014a49301eSmrg      float4_array arg0 = argRGB[0];
3024a49301eSmrg      float4_array arg1 = argRGB[1];
3034a49301eSmrg      float4_array arg2 = argRGB[2];
3044a49301eSmrg      float4_array arg3 = argRGB[3];
3054a49301eSmrg
3064a49301eSmrg      switch (combine->ModeRGB) {
3077117f1b4Smrg      case GL_REPLACE:
3084a49301eSmrg         for (i = 0; i < n; i++) {
3094a49301eSmrg            rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB;
3104a49301eSmrg            rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB;
3114a49301eSmrg            rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB;
3127117f1b4Smrg         }
3137117f1b4Smrg         break;
3147117f1b4Smrg      case GL_MODULATE:
3154a49301eSmrg         for (i = 0; i < n; i++) {
3164a49301eSmrg            rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB;
3174a49301eSmrg            rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB;
3184a49301eSmrg            rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB;
3197117f1b4Smrg         }
3207117f1b4Smrg         break;
3217117f1b4Smrg      case GL_ADD:
3224a49301eSmrg         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
3234a49301eSmrg            /* (a * b) + (c * d) */
3247117f1b4Smrg            for (i = 0; i < n; i++) {
3254a49301eSmrg               rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
3264a49301eSmrg                                 arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB;
3274a49301eSmrg               rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
3284a49301eSmrg                                 arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB;
3294a49301eSmrg               rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
3304a49301eSmrg                                 arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB;
3314a49301eSmrg            }
3324a49301eSmrg         }
3334a49301eSmrg         else {
3344a49301eSmrg            /* 2-term addition */
3354a49301eSmrg            for (i = 0; i < n; i++) {
3364a49301eSmrg               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB;
3374a49301eSmrg               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB;
3384a49301eSmrg               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB;
3397117f1b4Smrg            }
3407117f1b4Smrg         }
3417117f1b4Smrg         break;
3427117f1b4Smrg      case GL_ADD_SIGNED:
3434a49301eSmrg         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
3444a49301eSmrg            /* (a * b) + (c * d) - 0.5 */
3457117f1b4Smrg            for (i = 0; i < n; i++) {
3464a49301eSmrg               rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
347cdc920a0Smrg                                 arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB;
3484a49301eSmrg               rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
349cdc920a0Smrg                                 arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB;
3504a49301eSmrg               rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
351cdc920a0Smrg                                 arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB;
3527117f1b4Smrg            }
3537117f1b4Smrg         }
3544a49301eSmrg         else {
3557117f1b4Smrg            for (i = 0; i < n; i++) {
356cdc920a0Smrg               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB;
357cdc920a0Smrg               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB;
358cdc920a0Smrg               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB;
3597117f1b4Smrg            }
3607117f1b4Smrg         }
3617117f1b4Smrg         break;
3624a49301eSmrg      case GL_INTERPOLATE:
3634a49301eSmrg         for (i = 0; i < n; i++) {
3644a49301eSmrg            rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
3654a49301eSmrg                          arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB;
3664a49301eSmrg            rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
3674a49301eSmrg                          arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB;
3684a49301eSmrg            rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
3694a49301eSmrg                          arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB;
3704a49301eSmrg         }
3714a49301eSmrg         break;
3727117f1b4Smrg      case GL_SUBTRACT:
3734a49301eSmrg         for (i = 0; i < n; i++) {
3744a49301eSmrg            rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB;
3754a49301eSmrg            rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB;
3764a49301eSmrg            rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB;
3777117f1b4Smrg         }
3787117f1b4Smrg         break;
3797117f1b4Smrg      case GL_DOT3_RGB_EXT:
3807117f1b4Smrg      case GL_DOT3_RGBA_EXT:
3814a49301eSmrg         /* Do not scale the result by 1 2 or 4 */
3824a49301eSmrg         for (i = 0; i < n; i++) {
3834a49301eSmrg            GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
3844a49301eSmrg                           (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
3854a49301eSmrg                           (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
3864a49301eSmrg               * 4.0F;
3874a49301eSmrg            dot = CLAMP(dot, 0.0F, 1.0F);
3884a49301eSmrg            rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
3897117f1b4Smrg         }
3907117f1b4Smrg         break;
3917117f1b4Smrg      case GL_DOT3_RGB:
3927117f1b4Smrg      case GL_DOT3_RGBA:
3934a49301eSmrg         /* DO scale the result by 1 2 or 4 */
3944a49301eSmrg         for (i = 0; i < n; i++) {
3954a49301eSmrg            GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
3964a49301eSmrg                           (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
3974a49301eSmrg                           (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
3984a49301eSmrg               * 4.0F * scaleRGB;
399cdc920a0Smrg            dot = CLAMP(dot, 0.0F, 1.0F);
4004a49301eSmrg            rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
4017117f1b4Smrg         }
4027117f1b4Smrg         break;
4037117f1b4Smrg      case GL_MODULATE_ADD_ATI:
4044a49301eSmrg         for (i = 0; i < n; i++) {
4054a49301eSmrg            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
4064a49301eSmrg                              arg1[i][RCOMP]) * scaleRGB;
4074a49301eSmrg            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
4084a49301eSmrg                              arg1[i][GCOMP]) * scaleRGB;
4094a49301eSmrg            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
4104a49301eSmrg                              arg1[i][BCOMP]) * scaleRGB;
4117117f1b4Smrg	 }
4127117f1b4Smrg         break;
4137117f1b4Smrg      case GL_MODULATE_SIGNED_ADD_ATI:
4144a49301eSmrg         for (i = 0; i < n; i++) {
4154a49301eSmrg            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
416cdc920a0Smrg                              arg1[i][RCOMP] - 0.5F) * scaleRGB;
4174a49301eSmrg            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
418cdc920a0Smrg                              arg1[i][GCOMP] - 0.5F) * scaleRGB;
4194a49301eSmrg            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
420cdc920a0Smrg                              arg1[i][BCOMP] - 0.5F) * scaleRGB;
4217117f1b4Smrg	 }
4227117f1b4Smrg         break;
4237117f1b4Smrg      case GL_MODULATE_SUBTRACT_ATI:
4244a49301eSmrg         for (i = 0; i < n; i++) {
4254a49301eSmrg            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) -
4264a49301eSmrg                              arg1[i][RCOMP]) * scaleRGB;
4274a49301eSmrg            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) -
4284a49301eSmrg                              arg1[i][GCOMP]) * scaleRGB;
4294a49301eSmrg            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) -
4304a49301eSmrg                              arg1[i][BCOMP]) * scaleRGB;
4317117f1b4Smrg	 }
4327117f1b4Smrg         break;
4337117f1b4Smrg      default:
4347117f1b4Smrg         _mesa_problem(ctx, "invalid combine mode");
4354a49301eSmrg      }
4367117f1b4Smrg   }
4377117f1b4Smrg
4384a49301eSmrg   /* Alpha channel combine */
4394a49301eSmrg   {
4404a49301eSmrg      float4_array arg0 = argA[0];
4414a49301eSmrg      float4_array arg1 = argA[1];
4424a49301eSmrg      float4_array arg2 = argA[2];
4434a49301eSmrg      float4_array arg3 = argA[3];
4444a49301eSmrg
4454a49301eSmrg      switch (combine->ModeA) {
4467117f1b4Smrg      case GL_REPLACE:
4474a49301eSmrg         for (i = 0; i < n; i++) {
4484a49301eSmrg            rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA;
4497117f1b4Smrg         }
4507117f1b4Smrg         break;
4517117f1b4Smrg      case GL_MODULATE:
4524a49301eSmrg         for (i = 0; i < n; i++) {
4534a49301eSmrg            rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA;
4547117f1b4Smrg         }
4557117f1b4Smrg         break;
4567117f1b4Smrg      case GL_ADD:
4574a49301eSmrg         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
4584a49301eSmrg            /* (a * b) + (c * d) */
4597117f1b4Smrg            for (i = 0; i < n; i++) {
4604a49301eSmrg               rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
4614a49301eSmrg                                 arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA;
4624a49301eSmrg            }
4634a49301eSmrg         }
4644a49301eSmrg         else {
4654a49301eSmrg            /* two-term add */
4664a49301eSmrg            for (i = 0; i < n; i++) {
4674a49301eSmrg               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA;
4687117f1b4Smrg            }
4697117f1b4Smrg         }
4707117f1b4Smrg         break;
4717117f1b4Smrg      case GL_ADD_SIGNED:
4724a49301eSmrg         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
4734a49301eSmrg            /* (a * b) + (c * d) - 0.5 */
4744a49301eSmrg            for (i = 0; i < n; i++) {
4754a49301eSmrg               rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
4764a49301eSmrg                                 arg2[i][ACOMP] * arg3[i][ACOMP] -
477cdc920a0Smrg                                 0.5F) * scaleA;
4784a49301eSmrg            }
4794a49301eSmrg         }
4804a49301eSmrg         else {
4814a49301eSmrg            /* a + b - 0.5 */
4827117f1b4Smrg            for (i = 0; i < n; i++) {
4834a49301eSmrg               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA;
4847117f1b4Smrg            }
4857117f1b4Smrg         }
4867117f1b4Smrg         break;
4877117f1b4Smrg      case GL_INTERPOLATE:
4884a49301eSmrg         for (i = 0; i < n; i++) {
4894a49301eSmrg            rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
4904a49301eSmrg                              arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP]))
4914a49301eSmrg               * scaleA;
4927117f1b4Smrg         }
4937117f1b4Smrg         break;
4947117f1b4Smrg      case GL_SUBTRACT:
4954a49301eSmrg         for (i = 0; i < n; i++) {
4964a49301eSmrg            rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA;
4977117f1b4Smrg         }
4987117f1b4Smrg         break;
4997117f1b4Smrg      case GL_MODULATE_ADD_ATI:
5004a49301eSmrg         for (i = 0; i < n; i++) {
5014a49301eSmrg            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
5024a49301eSmrg                              + arg1[i][ACOMP]) * scaleA;
5037117f1b4Smrg         }
5047117f1b4Smrg         break;
5057117f1b4Smrg      case GL_MODULATE_SIGNED_ADD_ATI:
5064a49301eSmrg         for (i = 0; i < n; i++) {
5074a49301eSmrg            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) +
5084a49301eSmrg                              arg1[i][ACOMP] - 0.5F) * scaleA;
5097117f1b4Smrg         }
5107117f1b4Smrg         break;
5117117f1b4Smrg      case GL_MODULATE_SUBTRACT_ATI:
5124a49301eSmrg         for (i = 0; i < n; i++) {
5134a49301eSmrg            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
5144a49301eSmrg                              - arg1[i][ACOMP]) * scaleA;
5157117f1b4Smrg         }
5167117f1b4Smrg         break;
5177117f1b4Smrg      default:
5187117f1b4Smrg         _mesa_problem(ctx, "invalid combine mode");
5194a49301eSmrg      }
5207117f1b4Smrg   }
5217117f1b4Smrg
5227117f1b4Smrg   /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining.
5237117f1b4Smrg    * This is kind of a kludge.  It would have been better if the spec
5247117f1b4Smrg    * were written such that the GL_COMBINE_ALPHA value could be set to
5257117f1b4Smrg    * GL_DOT3.
5267117f1b4Smrg    */
5274a49301eSmrg   if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
5284a49301eSmrg       combine->ModeRGB == GL_DOT3_RGBA) {
5297117f1b4Smrg      for (i = 0; i < n; i++) {
5307117f1b4Smrg	 rgba[i][ACOMP] = rgba[i][RCOMP];
5317117f1b4Smrg      }
5327117f1b4Smrg   }
5334a49301eSmrg
5344a49301eSmrg   for (i = 0; i < n; i++) {
5354a49301eSmrg      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]);
5364a49301eSmrg      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]);
5374a49301eSmrg      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]);
5384a49301eSmrg      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]);
5394a49301eSmrg   }
540af69d88dSmrg   /* The span->array->rgba values are of CHAN type so set
541af69d88dSmrg    * span->array->ChanType field accordingly.
542af69d88dSmrg    */
543af69d88dSmrg   span->array->ChanType = CHAN_TYPE;
5443464ebd5Sriastradh
5453464ebd5Sriastradhend:
5463464ebd5Sriastradh   for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
5473464ebd5Sriastradh      free(ccolor[i]);
5483464ebd5Sriastradh   }
5493464ebd5Sriastradh   free(rgba);
5507117f1b4Smrg}
5517117f1b4Smrg
5527117f1b4Smrg
5537117f1b4Smrg/**
5544a49301eSmrg * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels.
5554a49301eSmrg * See GL_EXT_texture_swizzle.
5567117f1b4Smrg */
5577117f1b4Smrgstatic void
5584a49301eSmrgswizzle_texels(GLuint swizzle, GLuint count, float4_array texels)
5597117f1b4Smrg{
5604a49301eSmrg   const GLuint swzR = GET_SWZ(swizzle, 0);
5614a49301eSmrg   const GLuint swzG = GET_SWZ(swizzle, 1);
5624a49301eSmrg   const GLuint swzB = GET_SWZ(swizzle, 2);
5634a49301eSmrg   const GLuint swzA = GET_SWZ(swizzle, 3);
5644a49301eSmrg   GLfloat vector[6];
5657117f1b4Smrg   GLuint i;
5667117f1b4Smrg
5674a49301eSmrg   vector[SWIZZLE_ZERO] = 0;
5684a49301eSmrg   vector[SWIZZLE_ONE] = 1.0F;
5694a49301eSmrg
5704a49301eSmrg   for (i = 0; i < count; i++) {
5714a49301eSmrg      vector[SWIZZLE_X] = texels[i][0];
5724a49301eSmrg      vector[SWIZZLE_Y] = texels[i][1];
5734a49301eSmrg      vector[SWIZZLE_Z] = texels[i][2];
5744a49301eSmrg      vector[SWIZZLE_W] = texels[i][3];
5754a49301eSmrg      texels[i][RCOMP] = vector[swzR];
5764a49301eSmrg      texels[i][GCOMP] = vector[swzG];
5774a49301eSmrg      texels[i][BCOMP] = vector[swzB];
5784a49301eSmrg      texels[i][ACOMP] = vector[swzA];
5797117f1b4Smrg   }
5807117f1b4Smrg}
5817117f1b4Smrg
5827117f1b4Smrg
5837117f1b4Smrg/**
5847117f1b4Smrg * Apply texture mapping to a span of fragments.
5857117f1b4Smrg */
5867117f1b4Smrgvoid
5873464ebd5Sriastradh_swrast_texture_span( struct gl_context *ctx, SWspan *span )
5887117f1b4Smrg{
5897117f1b4Smrg   SWcontext *swrast = SWRAST_CONTEXT(ctx);
5903464ebd5Sriastradh   float4_array primary_rgba;
5917117f1b4Smrg   GLuint unit;
5927117f1b4Smrg
593af69d88dSmrg   if (!swrast->TexelBuffer) {
594af69d88dSmrg#ifdef _OPENMP
595af69d88dSmrg      const GLint maxThreads = omp_get_max_threads();
596af69d88dSmrg
597af69d88dSmrg      /* TexelBuffer memory allocation needs to be done in a critical section
598af69d88dSmrg       * as this code runs in a parallel loop.
599af69d88dSmrg       * When entering the section, first check if TexelBuffer has been
600af69d88dSmrg       * initialized already by another thread while this thread was waiting.
601af69d88dSmrg       */
602af69d88dSmrg      #pragma omp critical
603af69d88dSmrg      if (!swrast->TexelBuffer) {
604af69d88dSmrg#else
605af69d88dSmrg      const GLint maxThreads = 1;
606af69d88dSmrg#endif
607af69d88dSmrg
608af69d88dSmrg      /* TexelBuffer is also global and normally shared by all SWspan
609af69d88dSmrg       * instances; when running with multiple threads, create one per
610af69d88dSmrg       * thread.
611af69d88dSmrg       */
612af69d88dSmrg      swrast->TexelBuffer =
613af69d88dSmrg	 malloc(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits * maxThreads *
614af69d88dSmrg			    SWRAST_MAX_WIDTH * 4 * sizeof(GLfloat));
615af69d88dSmrg#ifdef _OPENMP
616af69d88dSmrg      } /* critical section */
617af69d88dSmrg#endif
618af69d88dSmrg
619af69d88dSmrg      if (!swrast->TexelBuffer) {
620af69d88dSmrg	 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
621af69d88dSmrg	 return;
622af69d88dSmrg      }
623af69d88dSmrg   }
624af69d88dSmrg
625af69d88dSmrg   primary_rgba = malloc(span->end * 4 * sizeof(GLfloat));
6263464ebd5Sriastradh
6273464ebd5Sriastradh   if (!primary_rgba) {
6283464ebd5Sriastradh      _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span");
6293464ebd5Sriastradh      return;
6303464ebd5Sriastradh   }
6313464ebd5Sriastradh
63201e04c3fSmrg   assert(span->end <= SWRAST_MAX_WIDTH);
6337117f1b4Smrg
6347117f1b4Smrg   /*
6357117f1b4Smrg    * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR)
6367117f1b4Smrg    */
6374a49301eSmrg   if (swrast->_TextureCombinePrimary) {
6384a49301eSmrg      GLuint i;
6394a49301eSmrg      for (i = 0; i < span->end; i++) {
6404a49301eSmrg         primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
6414a49301eSmrg         primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
6424a49301eSmrg         primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
6434a49301eSmrg         primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
6444a49301eSmrg      }
6454a49301eSmrg   }
6464a49301eSmrg
6477117f1b4Smrg   /*
6487117f1b4Smrg    * Must do all texture sampling before combining in order to
64901e04c3fSmrg    * accommodate GL_ARB_texture_env_crossbar.
6507117f1b4Smrg    */
6517117f1b4Smrg   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
6524a49301eSmrg      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
653af69d88dSmrg      if (texUnit->_Current) {
6544a49301eSmrg         const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
655af69d88dSmrg            span->array->attribs[VARYING_SLOT_TEX0 + unit];
6567117f1b4Smrg         const struct gl_texture_object *curObj = texUnit->_Current;
657af69d88dSmrg         const struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, unit);
6587117f1b4Smrg         GLfloat *lambda = span->array->lambda[unit];
6594a49301eSmrg         float4_array texels = get_texel_array(swrast, unit);
6607117f1b4Smrg
6617117f1b4Smrg         /* adjust texture lod (lambda) */
6627117f1b4Smrg         if (span->arrayMask & SPAN_LAMBDA) {
6637ec681f3Smrg            if (texUnit->LodBias + samp->Attrib.LodBias != 0.0F) {
6647117f1b4Smrg               /* apply LOD bias, but don't clamp yet */
6657ec681f3Smrg               const GLfloat bias = CLAMP(texUnit->LodBias + samp->Attrib.LodBias,
6667117f1b4Smrg                                          -ctx->Const.MaxTextureLodBias,
6677117f1b4Smrg                                          ctx->Const.MaxTextureLodBias);
6687117f1b4Smrg               GLuint i;
6697117f1b4Smrg               for (i = 0; i < span->end; i++) {
6707117f1b4Smrg                  lambda[i] += bias;
6717117f1b4Smrg               }
6727117f1b4Smrg            }
6737117f1b4Smrg
6747ec681f3Smrg            if (samp->Attrib.MinLod != -1000.0F ||
6757ec681f3Smrg                samp->Attrib.MaxLod != 1000.0F) {
6767117f1b4Smrg               /* apply LOD clamping to lambda */
6777ec681f3Smrg               const GLfloat min = samp->Attrib.MinLod;
6787ec681f3Smrg               const GLfloat max = samp->Attrib.MaxLod;
6797117f1b4Smrg               GLuint i;
6807117f1b4Smrg               for (i = 0; i < span->end; i++) {
6817117f1b4Smrg                  GLfloat l = lambda[i];
6827117f1b4Smrg                  lambda[i] = CLAMP(l, min, max);
6837117f1b4Smrg               }
6847117f1b4Smrg            }
6857117f1b4Smrg         }
6867ec681f3Smrg         else if (samp->Attrib.MaxAnisotropy > 1.0F &&
6877ec681f3Smrg                  samp->Attrib.MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
6883464ebd5Sriastradh            /* sample_lambda_2d_aniso is beeing used as texture_sample_func,
6893464ebd5Sriastradh             * it requires the current SWspan *span as an additional parameter.
6903464ebd5Sriastradh             * In order to keep the same function signature, the unused lambda
6913464ebd5Sriastradh             * parameter will be modified to actually contain the SWspan pointer.
6923464ebd5Sriastradh             * This is a Hack. To make it right, the texture_sample_func
6933464ebd5Sriastradh             * signature and all implementing functions need to be modified.
6943464ebd5Sriastradh             */
6953464ebd5Sriastradh            /* "hide" SWspan struct; cast to (GLfloat *) to suppress warning */
6963464ebd5Sriastradh            lambda = (GLfloat *)span;
6973464ebd5Sriastradh         }
6987117f1b4Smrg
6997117f1b4Smrg         /* Sample the texture (span->end = number of fragments) */
700af69d88dSmrg         swrast->TextureSample[unit]( ctx, samp,
701af69d88dSmrg                                      ctx->Texture.Unit[unit]._Current,
702af69d88dSmrg                                      span->end, texcoords, lambda, texels );
7037117f1b4Smrg
7044a49301eSmrg         /* GL_EXT_texture_swizzle */
7057ec681f3Smrg         if (curObj->Attrib._Swizzle != SWIZZLE_NOOP) {
7067ec681f3Smrg            swizzle_texels(curObj->Attrib._Swizzle, span->end, texels);
7077117f1b4Smrg         }
7087117f1b4Smrg      }
7097117f1b4Smrg   }
7107117f1b4Smrg
7117117f1b4Smrg   /*
7127117f1b4Smrg    * OK, now apply the texture (aka texture combine/blend).
7137117f1b4Smrg    * We modify the span->color.rgba values.
7147117f1b4Smrg    */
7157117f1b4Smrg   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
716af69d88dSmrg      if (ctx->Texture.Unit[unit]._Current)
717af69d88dSmrg         texture_combine(ctx, unit, primary_rgba, swrast->TexelBuffer, span);
7187117f1b4Smrg   }
7193464ebd5Sriastradh
7203464ebd5Sriastradh   free(primary_rgba);
7217117f1b4Smrg}
722