ff_fragment_shader.cpp revision 3464ebd5
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2009 VMware, Inc.  All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29extern "C" {
30#include "glheader.h"
31#include "imports.h"
32#include "mtypes.h"
33#include "program/program.h"
34#include "program/prog_parameter.h"
35#include "program/prog_cache.h"
36#include "program/prog_instruction.h"
37#include "program/prog_print.h"
38#include "program/prog_statevars.h"
39#include "program/programopt.h"
40#include "texenvprogram.h"
41}
42
43/*
44 * Note on texture units:
45 *
46 * The number of texture units supported by fixed-function fragment
47 * processing is MAX_TEXTURE_COORD_UNITS, not MAX_TEXTURE_IMAGE_UNITS.
48 * That's because there's a one-to-one correspondence between texture
49 * coordinates and samplers in fixed-function processing.
50 *
51 * Since fixed-function vertex processing is limited to MAX_TEXTURE_COORD_UNITS
52 * sets of texcoords, so is fixed-function fragment processing.
53 *
54 * We can safely use ctx->Const.MaxTextureUnits for loop bounds.
55 */
56
57
58struct texenvprog_cache_item
59{
60   GLuint hash;
61   void *key;
62   struct gl_fragment_program *data;
63   struct texenvprog_cache_item *next;
64};
65
66static GLboolean
67texenv_doing_secondary_color(struct gl_context *ctx)
68{
69   if (ctx->Light.Enabled &&
70       (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR))
71      return GL_TRUE;
72
73   if (ctx->Fog.ColorSumEnabled)
74      return GL_TRUE;
75
76   return GL_FALSE;
77}
78
79/**
80 * Up to nine instructions per tex unit, plus fog, specular color.
81 */
82#define MAX_INSTRUCTIONS ((MAX_TEXTURE_COORD_UNITS * 9) + 12)
83
84#define DISASSEM (MESA_VERBOSE & VERBOSE_DISASSEM)
85
86struct mode_opt {
87#ifdef __GNUC__
88   __extension__ GLubyte Source:4;  /**< SRC_x */
89   __extension__ GLubyte Operand:3; /**< OPR_x */
90#else
91   GLubyte Source;  /**< SRC_x */
92   GLubyte Operand; /**< OPR_x */
93#endif
94};
95
96struct state_key {
97   GLuint nr_enabled_units:8;
98   GLuint enabled_units:8;
99   GLuint separate_specular:1;
100   GLuint fog_enabled:1;
101   GLuint fog_mode:2;          /**< FOG_x */
102   GLuint inputs_available:12;
103   GLuint num_draw_buffers:4;
104
105   /* NOTE: This array of structs must be last! (see "keySize" below) */
106   struct {
107      GLuint enabled:1;
108      GLuint source_index:3;   /**< TEXTURE_x_INDEX */
109      GLuint shadow:1;
110      GLuint ScaleShiftRGB:2;
111      GLuint ScaleShiftA:2;
112
113      GLuint NumArgsRGB:3;  /**< up to MAX_COMBINER_TERMS */
114      GLuint ModeRGB:5;     /**< MODE_x */
115
116      GLuint NumArgsA:3;  /**< up to MAX_COMBINER_TERMS */
117      GLuint ModeA:5;     /**< MODE_x */
118
119      GLuint texture_cyl_wrap:1; /**< For gallium test/debug only */
120
121      struct mode_opt OptRGB[MAX_COMBINER_TERMS];
122      struct mode_opt OptA[MAX_COMBINER_TERMS];
123   } unit[MAX_TEXTURE_UNITS];
124};
125
126#define FOG_LINEAR  0
127#define FOG_EXP     1
128#define FOG_EXP2    2
129#define FOG_UNKNOWN 3
130
131static GLuint translate_fog_mode( GLenum mode )
132{
133   switch (mode) {
134   case GL_LINEAR: return FOG_LINEAR;
135   case GL_EXP: return FOG_EXP;
136   case GL_EXP2: return FOG_EXP2;
137   default: return FOG_UNKNOWN;
138   }
139}
140
141#define OPR_SRC_COLOR           0
142#define OPR_ONE_MINUS_SRC_COLOR 1
143#define OPR_SRC_ALPHA           2
144#define OPR_ONE_MINUS_SRC_ALPHA	3
145#define OPR_ZERO                4
146#define OPR_ONE                 5
147#define OPR_UNKNOWN             7
148
149static GLuint translate_operand( GLenum operand )
150{
151   switch (operand) {
152   case GL_SRC_COLOR: return OPR_SRC_COLOR;
153   case GL_ONE_MINUS_SRC_COLOR: return OPR_ONE_MINUS_SRC_COLOR;
154   case GL_SRC_ALPHA: return OPR_SRC_ALPHA;
155   case GL_ONE_MINUS_SRC_ALPHA: return OPR_ONE_MINUS_SRC_ALPHA;
156   case GL_ZERO: return OPR_ZERO;
157   case GL_ONE: return OPR_ONE;
158   default:
159      assert(0);
160      return OPR_UNKNOWN;
161   }
162}
163
164#define SRC_TEXTURE  0
165#define SRC_TEXTURE0 1
166#define SRC_TEXTURE1 2
167#define SRC_TEXTURE2 3
168#define SRC_TEXTURE3 4
169#define SRC_TEXTURE4 5
170#define SRC_TEXTURE5 6
171#define SRC_TEXTURE6 7
172#define SRC_TEXTURE7 8
173#define SRC_CONSTANT 9
174#define SRC_PRIMARY_COLOR 10
175#define SRC_PREVIOUS 11
176#define SRC_ZERO     12
177#define SRC_UNKNOWN  15
178
179static GLuint translate_source( GLenum src )
180{
181   switch (src) {
182   case GL_TEXTURE: return SRC_TEXTURE;
183   case GL_TEXTURE0:
184   case GL_TEXTURE1:
185   case GL_TEXTURE2:
186   case GL_TEXTURE3:
187   case GL_TEXTURE4:
188   case GL_TEXTURE5:
189   case GL_TEXTURE6:
190   case GL_TEXTURE7: return SRC_TEXTURE0 + (src - GL_TEXTURE0);
191   case GL_CONSTANT: return SRC_CONSTANT;
192   case GL_PRIMARY_COLOR: return SRC_PRIMARY_COLOR;
193   case GL_PREVIOUS: return SRC_PREVIOUS;
194   case GL_ZERO:
195      return SRC_ZERO;
196   default:
197      assert(0);
198      return SRC_UNKNOWN;
199   }
200}
201
202#define MODE_REPLACE                     0  /* r = a0 */
203#define MODE_MODULATE                    1  /* r = a0 * a1 */
204#define MODE_ADD                         2  /* r = a0 + a1 */
205#define MODE_ADD_SIGNED                  3  /* r = a0 + a1 - 0.5 */
206#define MODE_INTERPOLATE                 4  /* r = a0 * a2 + a1 * (1 - a2) */
207#define MODE_SUBTRACT                    5  /* r = a0 - a1 */
208#define MODE_DOT3_RGB                    6  /* r = a0 . a1 */
209#define MODE_DOT3_RGB_EXT                7  /* r = a0 . a1 */
210#define MODE_DOT3_RGBA                   8  /* r = a0 . a1 */
211#define MODE_DOT3_RGBA_EXT               9  /* r = a0 . a1 */
212#define MODE_MODULATE_ADD_ATI           10  /* r = a0 * a2 + a1 */
213#define MODE_MODULATE_SIGNED_ADD_ATI    11  /* r = a0 * a2 + a1 - 0.5 */
214#define MODE_MODULATE_SUBTRACT_ATI      12  /* r = a0 * a2 - a1 */
215#define MODE_ADD_PRODUCTS               13  /* r = a0 * a1 + a2 * a3 */
216#define MODE_ADD_PRODUCTS_SIGNED        14  /* r = a0 * a1 + a2 * a3 - 0.5 */
217#define MODE_BUMP_ENVMAP_ATI            15  /* special */
218#define MODE_UNKNOWN                    16
219
220/**
221 * Translate GL combiner state into a MODE_x value
222 */
223static GLuint translate_mode( GLenum envMode, GLenum mode )
224{
225   switch (mode) {
226   case GL_REPLACE: return MODE_REPLACE;
227   case GL_MODULATE: return MODE_MODULATE;
228   case GL_ADD:
229      if (envMode == GL_COMBINE4_NV)
230         return MODE_ADD_PRODUCTS;
231      else
232         return MODE_ADD;
233   case GL_ADD_SIGNED:
234      if (envMode == GL_COMBINE4_NV)
235         return MODE_ADD_PRODUCTS_SIGNED;
236      else
237         return MODE_ADD_SIGNED;
238   case GL_INTERPOLATE: return MODE_INTERPOLATE;
239   case GL_SUBTRACT: return MODE_SUBTRACT;
240   case GL_DOT3_RGB: return MODE_DOT3_RGB;
241   case GL_DOT3_RGB_EXT: return MODE_DOT3_RGB_EXT;
242   case GL_DOT3_RGBA: return MODE_DOT3_RGBA;
243   case GL_DOT3_RGBA_EXT: return MODE_DOT3_RGBA_EXT;
244   case GL_MODULATE_ADD_ATI: return MODE_MODULATE_ADD_ATI;
245   case GL_MODULATE_SIGNED_ADD_ATI: return MODE_MODULATE_SIGNED_ADD_ATI;
246   case GL_MODULATE_SUBTRACT_ATI: return MODE_MODULATE_SUBTRACT_ATI;
247   case GL_BUMP_ENVMAP_ATI: return MODE_BUMP_ENVMAP_ATI;
248   default:
249      assert(0);
250      return MODE_UNKNOWN;
251   }
252}
253
254
255/**
256 * Do we need to clamp the results of the given texture env/combine mode?
257 * If the inputs to the mode are in [0,1] we don't always have to clamp
258 * the results.
259 */
260static GLboolean
261need_saturate( GLuint mode )
262{
263   switch (mode) {
264   case MODE_REPLACE:
265   case MODE_MODULATE:
266   case MODE_INTERPOLATE:
267      return GL_FALSE;
268   case MODE_ADD:
269   case MODE_ADD_SIGNED:
270   case MODE_SUBTRACT:
271   case MODE_DOT3_RGB:
272   case MODE_DOT3_RGB_EXT:
273   case MODE_DOT3_RGBA:
274   case MODE_DOT3_RGBA_EXT:
275   case MODE_MODULATE_ADD_ATI:
276   case MODE_MODULATE_SIGNED_ADD_ATI:
277   case MODE_MODULATE_SUBTRACT_ATI:
278   case MODE_ADD_PRODUCTS:
279   case MODE_ADD_PRODUCTS_SIGNED:
280   case MODE_BUMP_ENVMAP_ATI:
281      return GL_TRUE;
282   default:
283      assert(0);
284      return GL_FALSE;
285   }
286}
287
288
289
290/**
291 * Translate TEXTURE_x_BIT to TEXTURE_x_INDEX.
292 */
293static GLuint translate_tex_src_bit( GLbitfield bit )
294{
295   ASSERT(bit);
296   return _mesa_ffs(bit) - 1;
297}
298
299
300#define VERT_BIT_TEX_ANY    (0xff << VERT_ATTRIB_TEX0)
301#define VERT_RESULT_TEX_ANY (0xff << VERT_RESULT_TEX0)
302
303/**
304 * Identify all possible varying inputs.  The fragment program will
305 * never reference non-varying inputs, but will track them via state
306 * constants instead.
307 *
308 * This function figures out all the inputs that the fragment program
309 * has access to.  The bitmask is later reduced to just those which
310 * are actually referenced.
311 */
312static GLbitfield get_fp_input_mask( struct gl_context *ctx )
313{
314   /* _NEW_PROGRAM */
315   const GLboolean vertexShader =
316      (ctx->Shader.CurrentVertexProgram &&
317       ctx->Shader.CurrentVertexProgram->LinkStatus &&
318       ctx->Shader.CurrentVertexProgram->VertexProgram);
319   const GLboolean vertexProgram = ctx->VertexProgram._Enabled;
320   GLbitfield fp_inputs = 0x0;
321
322   if (ctx->VertexProgram._Overriden) {
323      /* Somebody's messing with the vertex program and we don't have
324       * a clue what's happening.  Assume that it could be producing
325       * all possible outputs.
326       */
327      fp_inputs = ~0;
328   }
329   else if (ctx->RenderMode == GL_FEEDBACK) {
330      /* _NEW_RENDERMODE */
331      fp_inputs = (FRAG_BIT_COL0 | FRAG_BIT_TEX0);
332   }
333   else if (!(vertexProgram || vertexShader) ||
334            !ctx->VertexProgram._Current) {
335      /* Fixed function vertex logic */
336      /* _NEW_ARRAY */
337      GLbitfield varying_inputs = ctx->varying_vp_inputs;
338
339      /* These get generated in the setup routine regardless of the
340       * vertex program:
341       */
342      /* _NEW_POINT */
343      if (ctx->Point.PointSprite)
344         varying_inputs |= FRAG_BITS_TEX_ANY;
345
346      /* First look at what values may be computed by the generated
347       * vertex program:
348       */
349      /* _NEW_LIGHT */
350      if (ctx->Light.Enabled) {
351         fp_inputs |= FRAG_BIT_COL0;
352
353         if (texenv_doing_secondary_color(ctx))
354            fp_inputs |= FRAG_BIT_COL1;
355      }
356
357      /* _NEW_TEXTURE */
358      fp_inputs |= (ctx->Texture._TexGenEnabled |
359                    ctx->Texture._TexMatEnabled) << FRAG_ATTRIB_TEX0;
360
361      /* Then look at what might be varying as a result of enabled
362       * arrays, etc:
363       */
364      if (varying_inputs & VERT_BIT_COLOR0)
365         fp_inputs |= FRAG_BIT_COL0;
366      if (varying_inputs & VERT_BIT_COLOR1)
367         fp_inputs |= FRAG_BIT_COL1;
368
369      fp_inputs |= (((varying_inputs & VERT_BIT_TEX_ANY) >> VERT_ATTRIB_TEX0)
370                    << FRAG_ATTRIB_TEX0);
371
372   }
373   else {
374      /* calculate from vp->outputs */
375      struct gl_vertex_program *vprog;
376      GLbitfield64 vp_outputs;
377
378      /* Choose GLSL vertex shader over ARB vertex program.  Need this
379       * since vertex shader state validation comes after fragment state
380       * validation (see additional comments in state.c).
381       */
382      if (vertexShader)
383         vprog = ctx->Shader.CurrentVertexProgram->VertexProgram;
384      else
385         vprog = ctx->VertexProgram.Current;
386
387      vp_outputs = vprog->Base.OutputsWritten;
388
389      /* These get generated in the setup routine regardless of the
390       * vertex program:
391       */
392      /* _NEW_POINT */
393      if (ctx->Point.PointSprite)
394         vp_outputs |= FRAG_BITS_TEX_ANY;
395
396      if (vp_outputs & (1 << VERT_RESULT_COL0))
397         fp_inputs |= FRAG_BIT_COL0;
398      if (vp_outputs & (1 << VERT_RESULT_COL1))
399         fp_inputs |= FRAG_BIT_COL1;
400
401      fp_inputs |= (((vp_outputs & VERT_RESULT_TEX_ANY) >> VERT_RESULT_TEX0)
402                    << FRAG_ATTRIB_TEX0);
403   }
404
405   return fp_inputs;
406}
407
408
409/**
410 * Examine current texture environment state and generate a unique
411 * key to identify it.
412 */
413static GLuint make_state_key( struct gl_context *ctx,  struct state_key *key )
414{
415   GLuint i, j;
416   GLbitfield inputs_referenced = FRAG_BIT_COL0;
417   const GLbitfield inputs_available = get_fp_input_mask( ctx );
418   GLuint keySize;
419
420   memset(key, 0, sizeof(*key));
421
422   /* _NEW_TEXTURE */
423   for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
424      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
425      const struct gl_texture_object *texObj = texUnit->_Current;
426      const struct gl_tex_env_combine_state *comb = texUnit->_CurrentCombine;
427      GLenum format;
428
429      if (!texUnit->_ReallyEnabled || !texUnit->Enabled)
430         continue;
431
432      format = texObj->Image[0][texObj->BaseLevel]->_BaseFormat;
433
434      key->unit[i].enabled = 1;
435      key->enabled_units |= (1<<i);
436      key->nr_enabled_units = i + 1;
437      inputs_referenced |= FRAG_BIT_TEX(i);
438
439      key->unit[i].source_index =
440         translate_tex_src_bit(texUnit->_ReallyEnabled);
441
442      key->unit[i].shadow =
443         ((texObj->Sampler.CompareMode == GL_COMPARE_R_TO_TEXTURE) &&
444          ((format == GL_DEPTH_COMPONENT) ||
445           (format == GL_DEPTH_STENCIL_EXT)));
446
447      key->unit[i].NumArgsRGB = comb->_NumArgsRGB;
448      key->unit[i].NumArgsA = comb->_NumArgsA;
449
450      key->unit[i].ModeRGB =
451	 translate_mode(texUnit->EnvMode, comb->ModeRGB);
452      key->unit[i].ModeA =
453	 translate_mode(texUnit->EnvMode, comb->ModeA);
454
455      key->unit[i].ScaleShiftRGB = comb->ScaleShiftRGB;
456      key->unit[i].ScaleShiftA = comb->ScaleShiftA;
457
458      for (j = 0; j < MAX_COMBINER_TERMS; j++) {
459         key->unit[i].OptRGB[j].Operand = translate_operand(comb->OperandRGB[j]);
460         key->unit[i].OptA[j].Operand = translate_operand(comb->OperandA[j]);
461         key->unit[i].OptRGB[j].Source = translate_source(comb->SourceRGB[j]);
462         key->unit[i].OptA[j].Source = translate_source(comb->SourceA[j]);
463      }
464
465      if (key->unit[i].ModeRGB == MODE_BUMP_ENVMAP_ATI) {
466         /* requires some special translation */
467         key->unit[i].NumArgsRGB = 2;
468         key->unit[i].ScaleShiftRGB = 0;
469         key->unit[i].OptRGB[0].Operand = OPR_SRC_COLOR;
470         key->unit[i].OptRGB[0].Source = SRC_TEXTURE;
471         key->unit[i].OptRGB[1].Operand = OPR_SRC_COLOR;
472         key->unit[i].OptRGB[1].Source = texUnit->BumpTarget - GL_TEXTURE0 + SRC_TEXTURE0;
473       }
474
475      /* this is a back-door for enabling cylindrical texture wrap mode */
476      if (texObj->Priority == 0.125)
477         key->unit[i].texture_cyl_wrap = 1;
478   }
479
480   /* _NEW_LIGHT | _NEW_FOG */
481   if (texenv_doing_secondary_color(ctx)) {
482      key->separate_specular = 1;
483      inputs_referenced |= FRAG_BIT_COL1;
484   }
485
486   /* _NEW_FOG */
487   if (ctx->Fog.Enabled) {
488      key->fog_enabled = 1;
489      key->fog_mode = translate_fog_mode(ctx->Fog.Mode);
490      inputs_referenced |= FRAG_BIT_FOGC; /* maybe */
491   }
492
493   /* _NEW_BUFFERS */
494   key->num_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers;
495
496   key->inputs_available = (inputs_available & inputs_referenced);
497
498   /* compute size of state key, ignoring unused texture units */
499   keySize = sizeof(*key) - sizeof(key->unit)
500      + key->nr_enabled_units * sizeof(key->unit[0]);
501
502   return keySize;
503}
504
505
506/**
507 * Use uregs to represent registers internally, translate to Mesa's
508 * expected formats on emit.
509 *
510 * NOTE: These are passed by value extensively in this file rather
511 * than as usual by pointer reference.  If this disturbs you, try
512 * remembering they are just 32bits in size.
513 *
514 * GCC is smart enough to deal with these dword-sized structures in
515 * much the same way as if I had defined them as dwords and was using
516 * macros to access and set the fields.  This is much nicer and easier
517 * to evolve.
518 */
519struct ureg {
520   GLuint file:4;
521   GLuint idx:8;
522   GLuint negatebase:1;
523   GLuint swz:12;
524   GLuint pad:7;
525};
526
527static const struct ureg undef = {
528   PROGRAM_UNDEFINED,
529   255,
530   0,
531   0,
532   0
533};
534
535
536/** State used to build the fragment program:
537 */
538struct texenv_fragment_program {
539   struct gl_fragment_program *program;
540   struct state_key *state;
541
542   GLbitfield alu_temps;	/**< Track texture indirections, see spec. */
543   GLbitfield temps_output;	/**< Track texture indirections, see spec. */
544   GLbitfield temp_in_use;	/**< Tracks temporary regs which are in use. */
545   GLboolean error;
546
547   struct ureg src_texture[MAX_TEXTURE_COORD_UNITS];
548   /* Reg containing each texture unit's sampled texture color,
549    * else undef.
550    */
551
552   struct ureg texcoord_tex[MAX_TEXTURE_COORD_UNITS];
553   /* Reg containing texcoord for a texture unit,
554    * needed for bump mapping, else undef.
555    */
556
557   struct ureg src_previous;	/**< Reg containing color from previous
558				 * stage.  May need to be decl'd.
559				 */
560
561   GLuint last_tex_stage;	/**< Number of last enabled texture unit */
562
563   struct ureg half;
564   struct ureg one;
565   struct ureg zero;
566};
567
568
569
570static struct ureg make_ureg(GLuint file, GLuint idx)
571{
572   struct ureg reg;
573   reg.file = file;
574   reg.idx = idx;
575   reg.negatebase = 0;
576   reg.swz = SWIZZLE_NOOP;
577   reg.pad = 0;
578   return reg;
579}
580
581static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
582{
583   reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
584			   GET_SWZ(reg.swz, y),
585			   GET_SWZ(reg.swz, z),
586			   GET_SWZ(reg.swz, w));
587
588   return reg;
589}
590
591static struct ureg swizzle1( struct ureg reg, int x )
592{
593   return swizzle(reg, x, x, x, x);
594}
595
596static struct ureg negate( struct ureg reg )
597{
598   reg.negatebase ^= 1;
599   return reg;
600}
601
602static GLboolean is_undef( struct ureg reg )
603{
604   return reg.file == PROGRAM_UNDEFINED;
605}
606
607
608static struct ureg get_temp( struct texenv_fragment_program *p )
609{
610   GLint bit;
611
612   /* First try and reuse temps which have been used already:
613    */
614   bit = _mesa_ffs( ~p->temp_in_use & p->alu_temps );
615
616   /* Then any unused temporary:
617    */
618   if (!bit)
619      bit = _mesa_ffs( ~p->temp_in_use );
620
621   if (!bit) {
622      _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
623      exit(1);
624   }
625
626   if ((GLuint) bit > p->program->Base.NumTemporaries)
627      p->program->Base.NumTemporaries = bit;
628
629   p->temp_in_use |= 1<<(bit-1);
630   return make_ureg(PROGRAM_TEMPORARY, (bit-1));
631}
632
633static struct ureg get_tex_temp( struct texenv_fragment_program *p )
634{
635   int bit;
636
637   /* First try to find available temp not previously used (to avoid
638    * starting a new texture indirection).  According to the spec, the
639    * ~p->temps_output isn't necessary, but will keep it there for
640    * now:
641    */
642   bit = _mesa_ffs( ~p->temp_in_use & ~p->alu_temps & ~p->temps_output );
643
644   /* Then any unused temporary:
645    */
646   if (!bit)
647      bit = _mesa_ffs( ~p->temp_in_use );
648
649   if (!bit) {
650      _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
651      exit(1);
652   }
653
654   if ((GLuint) bit > p->program->Base.NumTemporaries)
655      p->program->Base.NumTemporaries = bit;
656
657   p->temp_in_use |= 1<<(bit-1);
658   return make_ureg(PROGRAM_TEMPORARY, (bit-1));
659}
660
661
662/** Mark a temp reg as being no longer allocatable. */
663static void reserve_temp( struct texenv_fragment_program *p, struct ureg r )
664{
665   if (r.file == PROGRAM_TEMPORARY)
666      p->temps_output |= (1 << r.idx);
667}
668
669
670static void release_temps(struct gl_context *ctx, struct texenv_fragment_program *p )
671{
672   GLuint max_temp = ctx->Const.FragmentProgram.MaxTemps;
673
674   /* KW: To support tex_env_crossbar, don't release the registers in
675    * temps_output.
676    */
677   if (max_temp >= sizeof(int) * 8)
678      p->temp_in_use = p->temps_output;
679   else
680      p->temp_in_use = ~((1<<max_temp)-1) | p->temps_output;
681}
682
683
684static struct ureg register_param5( struct texenv_fragment_program *p,
685				    GLint s0,
686				    GLint s1,
687				    GLint s2,
688				    GLint s3,
689				    GLint s4)
690{
691   int tokens[STATE_LENGTH];
692   GLuint idx;
693   tokens[0] = s0;
694   tokens[1] = s1;
695   tokens[2] = s2;
696   tokens[3] = s3;
697   tokens[4] = s4;
698   idx = _mesa_add_state_reference(p->program->Base.Parameters,
699				   (gl_state_index *)tokens);
700   return make_ureg(PROGRAM_STATE_VAR, idx);
701}
702
703
704#define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
705#define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
706#define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
707#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
708
709static GLuint frag_to_vert_attrib( GLuint attrib )
710{
711   switch (attrib) {
712   case FRAG_ATTRIB_COL0: return VERT_ATTRIB_COLOR0;
713   case FRAG_ATTRIB_COL1: return VERT_ATTRIB_COLOR1;
714   default:
715      assert(attrib >= FRAG_ATTRIB_TEX0);
716      assert(attrib <= FRAG_ATTRIB_TEX7);
717      return attrib - FRAG_ATTRIB_TEX0 + VERT_ATTRIB_TEX0;
718   }
719}
720
721
722static struct ureg register_input( struct texenv_fragment_program *p, GLuint input )
723{
724   if (p->state->inputs_available & (1<<input)) {
725      p->program->Base.InputsRead |= (1 << input);
726      return make_ureg(PROGRAM_INPUT, input);
727   }
728   else {
729      GLuint idx = frag_to_vert_attrib( input );
730      return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB_MAYBE_VP_CLAMPED, idx );
731   }
732}
733
734
735static void emit_arg( struct prog_src_register *reg,
736		      struct ureg ureg )
737{
738   reg->File = ureg.file;
739   reg->Index = ureg.idx;
740   reg->Swizzle = ureg.swz;
741   reg->Negate = ureg.negatebase ? NEGATE_XYZW : NEGATE_NONE;
742   reg->Abs = GL_FALSE;
743}
744
745static void emit_dst( struct prog_dst_register *dst,
746		      struct ureg ureg, GLuint mask )
747{
748   dst->File = ureg.file;
749   dst->Index = ureg.idx;
750   dst->WriteMask = mask;
751   dst->CondMask = COND_TR;  /* always pass cond test */
752   dst->CondSwizzle = SWIZZLE_NOOP;
753}
754
755static struct prog_instruction *
756emit_op(struct texenv_fragment_program *p,
757	enum prog_opcode op,
758	struct ureg dest,
759	GLuint mask,
760	GLboolean saturate,
761	struct ureg src0,
762	struct ureg src1,
763	struct ureg src2 )
764{
765   const GLuint nr = p->program->Base.NumInstructions++;
766   struct prog_instruction *inst = &p->program->Base.Instructions[nr];
767
768   assert(nr < MAX_INSTRUCTIONS);
769
770   _mesa_init_instructions(inst, 1);
771   inst->Opcode = op;
772
773   emit_arg( &inst->SrcReg[0], src0 );
774   emit_arg( &inst->SrcReg[1], src1 );
775   emit_arg( &inst->SrcReg[2], src2 );
776
777   inst->SaturateMode = saturate ? SATURATE_ZERO_ONE : SATURATE_OFF;
778
779   emit_dst( &inst->DstReg, dest, mask );
780
781#if 0
782   /* Accounting for indirection tracking:
783    */
784   if (dest.file == PROGRAM_TEMPORARY)
785      p->temps_output |= 1 << dest.idx;
786#endif
787
788   return inst;
789}
790
791
792static struct ureg emit_arith( struct texenv_fragment_program *p,
793			       enum prog_opcode op,
794			       struct ureg dest,
795			       GLuint mask,
796			       GLboolean saturate,
797			       struct ureg src0,
798			       struct ureg src1,
799			       struct ureg src2 )
800{
801   emit_op(p, op, dest, mask, saturate, src0, src1, src2);
802
803   /* Accounting for indirection tracking:
804    */
805   if (src0.file == PROGRAM_TEMPORARY)
806      p->alu_temps |= 1 << src0.idx;
807
808   if (!is_undef(src1) && src1.file == PROGRAM_TEMPORARY)
809      p->alu_temps |= 1 << src1.idx;
810
811   if (!is_undef(src2) && src2.file == PROGRAM_TEMPORARY)
812      p->alu_temps |= 1 << src2.idx;
813
814   if (dest.file == PROGRAM_TEMPORARY)
815      p->alu_temps |= 1 << dest.idx;
816
817   p->program->Base.NumAluInstructions++;
818   return dest;
819}
820
821static struct ureg emit_texld( struct texenv_fragment_program *p,
822			       enum prog_opcode op,
823			       struct ureg dest,
824			       GLuint destmask,
825			       GLuint tex_unit,
826			       GLuint tex_idx,
827                               GLuint tex_shadow,
828			       struct ureg coord )
829{
830   struct prog_instruction *inst = emit_op( p, op,
831					  dest, destmask,
832					  GL_FALSE,	/* don't saturate? */
833					  coord, 	/* arg 0? */
834					  undef,
835					  undef);
836
837   inst->TexSrcTarget = tex_idx;
838   inst->TexSrcUnit = tex_unit;
839   inst->TexShadow = tex_shadow;
840
841   p->program->Base.NumTexInstructions++;
842
843   /* Accounting for indirection tracking:
844    */
845   reserve_temp(p, dest);
846
847#if 0
848   /* Is this a texture indirection?
849    */
850   if ((coord.file == PROGRAM_TEMPORARY &&
851	(p->temps_output & (1<<coord.idx))) ||
852       (dest.file == PROGRAM_TEMPORARY &&
853	(p->alu_temps & (1<<dest.idx)))) {
854      p->program->Base.NumTexIndirections++;
855      p->temps_output = 1<<coord.idx;
856      p->alu_temps = 0;
857      assert(0);		/* KW: texture env crossbar */
858   }
859#endif
860
861   return dest;
862}
863
864
865static struct ureg register_const4f( struct texenv_fragment_program *p,
866				     GLfloat s0,
867				     GLfloat s1,
868				     GLfloat s2,
869				     GLfloat s3)
870{
871   GLfloat values[4];
872   GLuint idx, swizzle;
873   struct ureg r;
874   values[0] = s0;
875   values[1] = s1;
876   values[2] = s2;
877   values[3] = s3;
878   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
879                                     &swizzle );
880   r = make_ureg(PROGRAM_CONSTANT, idx);
881   r.swz = swizzle;
882   return r;
883}
884
885#define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
886#define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
887#define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
888#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
889
890
891static struct ureg get_one( struct texenv_fragment_program *p )
892{
893   if (is_undef(p->one))
894      p->one = register_scalar_const(p, 1.0);
895   return p->one;
896}
897
898static struct ureg get_half( struct texenv_fragment_program *p )
899{
900   if (is_undef(p->half))
901      p->half = register_scalar_const(p, 0.5);
902   return p->half;
903}
904
905static struct ureg get_zero( struct texenv_fragment_program *p )
906{
907   if (is_undef(p->zero))
908      p->zero = register_scalar_const(p, 0.0);
909   return p->zero;
910}
911
912
913static void program_error( struct texenv_fragment_program *p, const char *msg )
914{
915   _mesa_problem(NULL, "%s", msg);
916   p->error = 1;
917}
918
919static struct ureg get_source( struct texenv_fragment_program *p,
920			       GLuint src, GLuint unit )
921{
922   switch (src) {
923   case SRC_TEXTURE:
924      assert(!is_undef(p->src_texture[unit]));
925      return p->src_texture[unit];
926
927   case SRC_TEXTURE0:
928   case SRC_TEXTURE1:
929   case SRC_TEXTURE2:
930   case SRC_TEXTURE3:
931   case SRC_TEXTURE4:
932   case SRC_TEXTURE5:
933   case SRC_TEXTURE6:
934   case SRC_TEXTURE7:
935      assert(!is_undef(p->src_texture[src - SRC_TEXTURE0]));
936      return p->src_texture[src - SRC_TEXTURE0];
937
938   case SRC_CONSTANT:
939      return register_param2(p, STATE_TEXENV_COLOR, unit);
940
941   case SRC_PRIMARY_COLOR:
942      return register_input(p, FRAG_ATTRIB_COL0);
943
944   case SRC_ZERO:
945      return get_zero(p);
946
947   case SRC_PREVIOUS:
948      if (is_undef(p->src_previous))
949	 return register_input(p, FRAG_ATTRIB_COL0);
950      else
951	 return p->src_previous;
952
953   default:
954      assert(0);
955      return undef;
956   }
957}
958
959static struct ureg emit_combine_source( struct texenv_fragment_program *p,
960					GLuint mask,
961					GLuint unit,
962					GLuint source,
963					GLuint operand )
964{
965   struct ureg arg, src, one;
966
967   src = get_source(p, source, unit);
968
969   switch (operand) {
970   case OPR_ONE_MINUS_SRC_COLOR:
971      /* Get unused tmp,
972       * Emit tmp = 1.0 - arg.xyzw
973       */
974      arg = get_temp( p );
975      one = get_one( p );
976      return emit_arith( p, OPCODE_SUB, arg, mask, 0, one, src, undef);
977
978   case OPR_SRC_ALPHA:
979      if (mask == WRITEMASK_W)
980	 return src;
981      else
982	 return swizzle1( src, SWIZZLE_W );
983   case OPR_ONE_MINUS_SRC_ALPHA:
984      /* Get unused tmp,
985       * Emit tmp = 1.0 - arg.wwww
986       */
987      arg = get_temp(p);
988      one = get_one(p);
989      return emit_arith(p, OPCODE_SUB, arg, mask, 0,
990			one, swizzle1(src, SWIZZLE_W), undef);
991   case OPR_ZERO:
992      return get_zero(p);
993   case OPR_ONE:
994      return get_one(p);
995   case OPR_SRC_COLOR:
996      return src;
997   default:
998      assert(0);
999      return src;
1000   }
1001}
1002
1003/**
1004 * Check if the RGB and Alpha sources and operands match for the given
1005 * texture unit's combinder state.  When the RGB and A sources and
1006 * operands match, we can emit fewer instructions.
1007 */
1008static GLboolean args_match( const struct state_key *key, GLuint unit )
1009{
1010   GLuint i, numArgs = key->unit[unit].NumArgsRGB;
1011
1012   for (i = 0; i < numArgs; i++) {
1013      if (key->unit[unit].OptA[i].Source != key->unit[unit].OptRGB[i].Source)
1014	 return GL_FALSE;
1015
1016      switch (key->unit[unit].OptA[i].Operand) {
1017      case OPR_SRC_ALPHA:
1018	 switch (key->unit[unit].OptRGB[i].Operand) {
1019	 case OPR_SRC_COLOR:
1020	 case OPR_SRC_ALPHA:
1021	    break;
1022	 default:
1023	    return GL_FALSE;
1024	 }
1025	 break;
1026      case OPR_ONE_MINUS_SRC_ALPHA:
1027	 switch (key->unit[unit].OptRGB[i].Operand) {
1028	 case OPR_ONE_MINUS_SRC_COLOR:
1029	 case OPR_ONE_MINUS_SRC_ALPHA:
1030	    break;
1031	 default:
1032	    return GL_FALSE;
1033	 }
1034	 break;
1035      default:
1036	 return GL_FALSE;	/* impossible */
1037      }
1038   }
1039
1040   return GL_TRUE;
1041}
1042
1043static struct ureg emit_combine( struct texenv_fragment_program *p,
1044				 struct ureg dest,
1045				 GLuint mask,
1046				 GLboolean saturate,
1047				 GLuint unit,
1048				 GLuint nr,
1049				 GLuint mode,
1050				 const struct mode_opt *opt)
1051{
1052   struct ureg src[MAX_COMBINER_TERMS];
1053   struct ureg tmp, half;
1054   GLuint i;
1055
1056   assert(nr <= MAX_COMBINER_TERMS);
1057
1058   for (i = 0; i < nr; i++)
1059      src[i] = emit_combine_source( p, mask, unit, opt[i].Source, opt[i].Operand );
1060
1061   switch (mode) {
1062   case MODE_REPLACE:
1063      if (mask == WRITEMASK_XYZW && !saturate)
1064	 return src[0];
1065      else
1066	 return emit_arith( p, OPCODE_MOV, dest, mask, saturate, src[0], undef, undef );
1067   case MODE_MODULATE:
1068      return emit_arith( p, OPCODE_MUL, dest, mask, saturate,
1069			 src[0], src[1], undef );
1070   case MODE_ADD:
1071      return emit_arith( p, OPCODE_ADD, dest, mask, saturate,
1072			 src[0], src[1], undef );
1073   case MODE_ADD_SIGNED:
1074      /* tmp = arg0 + arg1
1075       * result = tmp - .5
1076       */
1077      half = get_half(p);
1078      tmp = get_temp( p );
1079      emit_arith( p, OPCODE_ADD, tmp, mask, 0, src[0], src[1], undef );
1080      emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp, half, undef );
1081      return dest;
1082   case MODE_INTERPOLATE:
1083      /* Arg0 * (Arg2) + Arg1 * (1-Arg2) -- note arguments are reordered:
1084       */
1085      return emit_arith( p, OPCODE_LRP, dest, mask, saturate, src[2], src[0], src[1] );
1086
1087   case MODE_SUBTRACT:
1088      return emit_arith( p, OPCODE_SUB, dest, mask, saturate, src[0], src[1], undef );
1089
1090   case MODE_DOT3_RGBA:
1091   case MODE_DOT3_RGBA_EXT:
1092   case MODE_DOT3_RGB_EXT:
1093   case MODE_DOT3_RGB: {
1094      struct ureg tmp0 = get_temp( p );
1095      struct ureg tmp1 = get_temp( p );
1096      struct ureg neg1 = register_scalar_const(p, -1);
1097      struct ureg two  = register_scalar_const(p, 2);
1098
1099      /* tmp0 = 2*src0 - 1
1100       * tmp1 = 2*src1 - 1
1101       *
1102       * dst = tmp0 dot3 tmp1
1103       */
1104      emit_arith( p, OPCODE_MAD, tmp0, WRITEMASK_XYZW, 0,
1105		  two, src[0], neg1);
1106
1107      if (memcmp(&src[0], &src[1], sizeof(struct ureg)) == 0)
1108	 tmp1 = tmp0;
1109      else
1110	 emit_arith( p, OPCODE_MAD, tmp1, WRITEMASK_XYZW, 0,
1111		     two, src[1], neg1);
1112      emit_arith( p, OPCODE_DP3, dest, mask, saturate, tmp0, tmp1, undef);
1113      return dest;
1114   }
1115   case MODE_MODULATE_ADD_ATI:
1116      /* Arg0 * Arg2 + Arg1 */
1117      return emit_arith( p, OPCODE_MAD, dest, mask, saturate,
1118			 src[0], src[2], src[1] );
1119   case MODE_MODULATE_SIGNED_ADD_ATI: {
1120      /* Arg0 * Arg2 + Arg1 - 0.5 */
1121      struct ureg tmp0 = get_temp(p);
1122      half = get_half(p);
1123      emit_arith( p, OPCODE_MAD, tmp0, mask, 0, src[0], src[2], src[1] );
1124      emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp0, half, undef );
1125      return dest;
1126   }
1127   case MODE_MODULATE_SUBTRACT_ATI:
1128      /* Arg0 * Arg2 - Arg1 */
1129      emit_arith( p, OPCODE_MAD, dest, mask, 0, src[0], src[2], negate(src[1]) );
1130      return dest;
1131   case MODE_ADD_PRODUCTS:
1132      /* Arg0 * Arg1 + Arg2 * Arg3 */
1133      {
1134         struct ureg tmp0 = get_temp(p);
1135         emit_arith( p, OPCODE_MUL, tmp0, mask, 0, src[0], src[1], undef );
1136         emit_arith( p, OPCODE_MAD, dest, mask, saturate, src[2], src[3], tmp0 );
1137      }
1138      return dest;
1139   case MODE_ADD_PRODUCTS_SIGNED:
1140      /* Arg0 * Arg1 + Arg2 * Arg3 - 0.5 */
1141      {
1142         struct ureg tmp0 = get_temp(p);
1143         half = get_half(p);
1144         emit_arith( p, OPCODE_MUL, tmp0, mask, 0, src[0], src[1], undef );
1145         emit_arith( p, OPCODE_MAD, tmp0, mask, 0, src[2], src[3], tmp0 );
1146         emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp0, half, undef );
1147      }
1148      return dest;
1149   case MODE_BUMP_ENVMAP_ATI:
1150      /* special - not handled here */
1151      assert(0);
1152      return src[0];
1153   default:
1154      assert(0);
1155      return src[0];
1156   }
1157}
1158
1159
1160/**
1161 * Generate instructions for one texture unit's env/combiner mode.
1162 */
1163static struct ureg
1164emit_texenv(struct texenv_fragment_program *p, GLuint unit)
1165{
1166   const struct state_key *key = p->state;
1167   GLboolean rgb_saturate, alpha_saturate;
1168   GLuint rgb_shift, alpha_shift;
1169   struct ureg out, dest;
1170
1171   if (!key->unit[unit].enabled) {
1172      return get_source(p, SRC_PREVIOUS, 0);
1173   }
1174   if (key->unit[unit].ModeRGB == MODE_BUMP_ENVMAP_ATI) {
1175      /* this isn't really a env stage delivering a color and handled elsewhere */
1176      return get_source(p, SRC_PREVIOUS, 0);
1177   }
1178
1179   switch (key->unit[unit].ModeRGB) {
1180   case MODE_DOT3_RGB_EXT:
1181      alpha_shift = key->unit[unit].ScaleShiftA;
1182      rgb_shift = 0;
1183      break;
1184   case MODE_DOT3_RGBA_EXT:
1185      alpha_shift = 0;
1186      rgb_shift = 0;
1187      break;
1188   default:
1189      rgb_shift = key->unit[unit].ScaleShiftRGB;
1190      alpha_shift = key->unit[unit].ScaleShiftA;
1191      break;
1192   }
1193
1194   /* If we'll do rgb/alpha shifting don't saturate in emit_combine().
1195    * We don't want to clamp twice.
1196    */
1197   if (rgb_shift)
1198      rgb_saturate = GL_FALSE;  /* saturate after rgb shift */
1199   else if (need_saturate(key->unit[unit].ModeRGB))
1200      rgb_saturate = GL_TRUE;
1201   else
1202      rgb_saturate = GL_FALSE;
1203
1204   if (alpha_shift)
1205      alpha_saturate = GL_FALSE;  /* saturate after alpha shift */
1206   else if (need_saturate(key->unit[unit].ModeA))
1207      alpha_saturate = GL_TRUE;
1208   else
1209      alpha_saturate = GL_FALSE;
1210
1211   /* If this is the very last calculation (and various other conditions
1212    * are met), emit directly to the color output register.  Otherwise,
1213    * emit to a temporary register.
1214    */
1215   if (key->separate_specular ||
1216       unit != p->last_tex_stage ||
1217       alpha_shift ||
1218       key->num_draw_buffers != 1 ||
1219       rgb_shift)
1220      dest = get_temp( p );
1221   else
1222      dest = make_ureg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
1223
1224   /* Emit the RGB and A combine ops
1225    */
1226   if (key->unit[unit].ModeRGB == key->unit[unit].ModeA &&
1227       args_match(key, unit)) {
1228      out = emit_combine( p, dest, WRITEMASK_XYZW, rgb_saturate,
1229			  unit,
1230			  key->unit[unit].NumArgsRGB,
1231			  key->unit[unit].ModeRGB,
1232			  key->unit[unit].OptRGB);
1233   }
1234   else if (key->unit[unit].ModeRGB == MODE_DOT3_RGBA_EXT ||
1235	    key->unit[unit].ModeRGB == MODE_DOT3_RGBA) {
1236      out = emit_combine( p, dest, WRITEMASK_XYZW, rgb_saturate,
1237			  unit,
1238			  key->unit[unit].NumArgsRGB,
1239			  key->unit[unit].ModeRGB,
1240			  key->unit[unit].OptRGB);
1241   }
1242   else {
1243      /* Need to do something to stop from re-emitting identical
1244       * argument calculations here:
1245       */
1246      out = emit_combine( p, dest, WRITEMASK_XYZ, rgb_saturate,
1247			  unit,
1248			  key->unit[unit].NumArgsRGB,
1249			  key->unit[unit].ModeRGB,
1250			  key->unit[unit].OptRGB);
1251      out = emit_combine( p, dest, WRITEMASK_W, alpha_saturate,
1252			  unit,
1253			  key->unit[unit].NumArgsA,
1254			  key->unit[unit].ModeA,
1255			  key->unit[unit].OptA);
1256   }
1257
1258   /* Deal with the final shift:
1259    */
1260   if (alpha_shift || rgb_shift) {
1261      struct ureg shift;
1262      GLboolean saturate = GL_TRUE;  /* always saturate at this point */
1263
1264      if (rgb_shift == alpha_shift) {
1265	 shift = register_scalar_const(p, (GLfloat)(1<<rgb_shift));
1266      }
1267      else {
1268	 shift = register_const4f(p,
1269				  (GLfloat)(1<<rgb_shift),
1270				  (GLfloat)(1<<rgb_shift),
1271				  (GLfloat)(1<<rgb_shift),
1272				  (GLfloat)(1<<alpha_shift));
1273      }
1274      return emit_arith( p, OPCODE_MUL, dest, WRITEMASK_XYZW,
1275			 saturate, out, shift, undef );
1276   }
1277   else
1278      return out;
1279}
1280
1281
1282/**
1283 * Generate instruction for getting a texture source term.
1284 */
1285static void load_texture( struct texenv_fragment_program *p, GLuint unit )
1286{
1287   if (is_undef(p->src_texture[unit])) {
1288      const GLuint texTarget = p->state->unit[unit].source_index;
1289      struct ureg texcoord;
1290      struct ureg tmp = get_tex_temp( p );
1291
1292      if (is_undef(p->texcoord_tex[unit])) {
1293         texcoord = register_input(p, FRAG_ATTRIB_TEX0+unit);
1294      }
1295      else {
1296         /* might want to reuse this reg for tex output actually */
1297         texcoord = p->texcoord_tex[unit];
1298      }
1299
1300      /* TODO: Use D0_MASK_XY where possible.
1301       */
1302      if (p->state->unit[unit].enabled) {
1303         GLboolean shadow = GL_FALSE;
1304
1305	 if (p->state->unit[unit].shadow) {
1306	    p->program->Base.ShadowSamplers |= 1 << unit;
1307            shadow = GL_TRUE;
1308         }
1309
1310	 p->src_texture[unit] = emit_texld( p, OPCODE_TXP,
1311					    tmp, WRITEMASK_XYZW,
1312					    unit, texTarget, shadow,
1313                                            texcoord );
1314
1315         p->program->Base.SamplersUsed |= (1 << unit);
1316         /* This identity mapping should already be in place
1317          * (see _mesa_init_program_struct()) but let's be safe.
1318          */
1319         p->program->Base.SamplerUnits[unit] = unit;
1320      }
1321      else
1322	 p->src_texture[unit] = get_zero(p);
1323
1324      if (p->state->unit[unit].texture_cyl_wrap) {
1325         /* set flag which is checked by Mesa->Gallium program translation */
1326         p->program->Base.InputFlags[0] |= PROG_PARAM_BIT_CYL_WRAP;
1327      }
1328
1329   }
1330}
1331
1332static GLboolean load_texenv_source( struct texenv_fragment_program *p,
1333				     GLuint src, GLuint unit )
1334{
1335   switch (src) {
1336   case SRC_TEXTURE:
1337      load_texture(p, unit);
1338      break;
1339
1340   case SRC_TEXTURE0:
1341   case SRC_TEXTURE1:
1342   case SRC_TEXTURE2:
1343   case SRC_TEXTURE3:
1344   case SRC_TEXTURE4:
1345   case SRC_TEXTURE5:
1346   case SRC_TEXTURE6:
1347   case SRC_TEXTURE7:
1348      load_texture(p, src - SRC_TEXTURE0);
1349      break;
1350
1351   default:
1352      /* not a texture src - do nothing */
1353      break;
1354   }
1355
1356   return GL_TRUE;
1357}
1358
1359
1360/**
1361 * Generate instructions for loading all texture source terms.
1362 */
1363static GLboolean
1364load_texunit_sources( struct texenv_fragment_program *p, GLuint unit )
1365{
1366   const struct state_key *key = p->state;
1367   GLuint i;
1368
1369   for (i = 0; i < key->unit[unit].NumArgsRGB; i++) {
1370      load_texenv_source( p, key->unit[unit].OptRGB[i].Source, unit );
1371   }
1372
1373   for (i = 0; i < key->unit[unit].NumArgsA; i++) {
1374      load_texenv_source( p, key->unit[unit].OptA[i].Source, unit );
1375   }
1376
1377   return GL_TRUE;
1378}
1379
1380/**
1381 * Generate instructions for loading bump map textures.
1382 */
1383static GLboolean
1384load_texunit_bumpmap( struct texenv_fragment_program *p, GLuint unit )
1385{
1386   const struct state_key *key = p->state;
1387   GLuint bumpedUnitNr = key->unit[unit].OptRGB[1].Source - SRC_TEXTURE0;
1388   struct ureg texcDst, bumpMapRes;
1389   struct ureg constdudvcolor = register_const4f(p, 0.0, 0.0, 0.0, 1.0);
1390   struct ureg texcSrc = register_input(p, FRAG_ATTRIB_TEX0 + bumpedUnitNr);
1391   struct ureg rotMat0 = register_param3( p, STATE_INTERNAL, STATE_ROT_MATRIX_0, unit );
1392   struct ureg rotMat1 = register_param3( p, STATE_INTERNAL, STATE_ROT_MATRIX_1, unit );
1393
1394   load_texenv_source( p, unit + SRC_TEXTURE0, unit );
1395
1396   bumpMapRes = get_source(p, key->unit[unit].OptRGB[0].Source, unit);
1397   texcDst = get_tex_temp( p );
1398   p->texcoord_tex[bumpedUnitNr] = texcDst;
1399
1400   /* Apply rot matrix and add coords to be available in next phase.
1401    * dest = (Arg0.xxxx * rotMat0 + Arg1) + (Arg0.yyyy * rotMat1)
1402    * note only 2 coords are affected the rest are left unchanged (mul by 0)
1403    */
1404   emit_arith( p, OPCODE_MAD, texcDst, WRITEMASK_XYZW, 0,
1405               swizzle1(bumpMapRes, SWIZZLE_X), rotMat0, texcSrc );
1406   emit_arith( p, OPCODE_MAD, texcDst, WRITEMASK_XYZW, 0,
1407               swizzle1(bumpMapRes, SWIZZLE_Y), rotMat1, texcDst );
1408
1409   /* Move 0,0,0,1 into bumpmap src if someone (crossbar) is foolish
1410    * enough to access this later, should optimize away.
1411    */
1412   emit_arith( p, OPCODE_MOV, bumpMapRes, WRITEMASK_XYZW, 0,
1413               constdudvcolor, undef, undef );
1414
1415   return GL_TRUE;
1416}
1417
1418/**
1419 * Generate a new fragment program which implements the context's
1420 * current texture env/combine mode.
1421 */
1422static void
1423create_new_program(struct gl_context *ctx, struct state_key *key,
1424                   struct gl_fragment_program *program)
1425{
1426   struct prog_instruction instBuffer[MAX_INSTRUCTIONS];
1427   struct texenv_fragment_program p;
1428   GLuint unit;
1429   struct ureg cf, out;
1430   int i;
1431
1432   memset(&p, 0, sizeof(p));
1433   p.state = key;
1434   p.program = program;
1435
1436   /* During code generation, use locally-allocated instruction buffer,
1437    * then alloc dynamic storage below.
1438    */
1439   p.program->Base.Instructions = instBuffer;
1440   p.program->Base.Target = GL_FRAGMENT_PROGRAM_ARB;
1441   p.program->Base.String = NULL;
1442   p.program->Base.NumTexIndirections = 1; /* is this right? */
1443   p.program->Base.NumTexInstructions = 0;
1444   p.program->Base.NumAluInstructions = 0;
1445   p.program->Base.NumInstructions = 0;
1446   p.program->Base.NumTemporaries = 0;
1447   p.program->Base.NumParameters = 0;
1448   p.program->Base.NumAttributes = 0;
1449   p.program->Base.NumAddressRegs = 0;
1450   p.program->Base.Parameters = _mesa_new_parameter_list();
1451   p.program->Base.InputsRead = 0x0;
1452
1453   if (key->num_draw_buffers == 1)
1454      p.program->Base.OutputsWritten = 1 << FRAG_RESULT_COLOR;
1455   else {
1456      for (i = 0; i < key->num_draw_buffers; i++)
1457	 p.program->Base.OutputsWritten |= (1 << (FRAG_RESULT_DATA0 + i));
1458   }
1459
1460   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
1461      p.src_texture[unit] = undef;
1462      p.texcoord_tex[unit] = undef;
1463   }
1464
1465   p.src_previous = undef;
1466   p.half = undef;
1467   p.zero = undef;
1468   p.one = undef;
1469
1470   p.last_tex_stage = 0;
1471   release_temps(ctx, &p);
1472
1473   if (key->enabled_units && key->num_draw_buffers) {
1474      GLboolean needbumpstage = GL_FALSE;
1475
1476      /* Zeroth pass - bump map textures first */
1477      for (unit = 0; unit < key->nr_enabled_units; unit++)
1478	 if (key->unit[unit].enabled &&
1479             key->unit[unit].ModeRGB == MODE_BUMP_ENVMAP_ATI) {
1480	    needbumpstage = GL_TRUE;
1481	    load_texunit_bumpmap( &p, unit );
1482	 }
1483      if (needbumpstage)
1484	 p.program->Base.NumTexIndirections++;
1485
1486      /* First pass - to support texture_env_crossbar, first identify
1487       * all referenced texture sources and emit texld instructions
1488       * for each:
1489       */
1490      for (unit = 0; unit < key->nr_enabled_units; unit++)
1491	 if (key->unit[unit].enabled) {
1492	    load_texunit_sources( &p, unit );
1493	    p.last_tex_stage = unit;
1494	 }
1495
1496      /* Second pass - emit combine instructions to build final color:
1497       */
1498      for (unit = 0; unit < key->nr_enabled_units; unit++)
1499	 if (key->unit[unit].enabled) {
1500	    p.src_previous = emit_texenv( &p, unit );
1501            reserve_temp(&p, p.src_previous); /* don't re-use this temp reg */
1502	    release_temps(ctx, &p);	/* release all temps */
1503	 }
1504   }
1505
1506   cf = get_source( &p, SRC_PREVIOUS, 0 );
1507
1508   for (i = 0; i < key->num_draw_buffers; i++) {
1509      if (key->num_draw_buffers == 1)
1510	 out = make_ureg( PROGRAM_OUTPUT, FRAG_RESULT_COLOR );
1511      else {
1512	 out = make_ureg( PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i );
1513      }
1514
1515      if (key->separate_specular) {
1516	 /* Emit specular add.
1517	  */
1518	 struct ureg s = register_input(&p, FRAG_ATTRIB_COL1);
1519	 emit_arith( &p, OPCODE_ADD, out, WRITEMASK_XYZ, 0, cf, s, undef );
1520	 emit_arith( &p, OPCODE_MOV, out, WRITEMASK_W, 0, cf, undef, undef );
1521      }
1522      else if (memcmp(&cf, &out, sizeof(cf)) != 0) {
1523	 /* Will wind up in here if no texture enabled or a couple of
1524	  * other scenarios (GL_REPLACE for instance).
1525	  */
1526	 emit_arith( &p, OPCODE_MOV, out, WRITEMASK_XYZW, 0, cf, undef, undef );
1527      }
1528   }
1529   /* Finish up:
1530    */
1531   emit_arith( &p, OPCODE_END, undef, WRITEMASK_XYZW, 0, undef, undef, undef);
1532
1533   /* Allocate final instruction array.  This has to be done before calling
1534    * _mesa_append_fog_code because that function frees the Base.Instructions.
1535    * At this point, Base.Instructions points to stack data, so it's a really
1536    * bad idea to free it.
1537    */
1538   p.program->Base.Instructions
1539      = _mesa_alloc_instructions(p.program->Base.NumInstructions);
1540   if (!p.program->Base.Instructions) {
1541      _mesa_error(ctx, GL_OUT_OF_MEMORY,
1542                  "generating tex env program");
1543      return;
1544   }
1545   _mesa_copy_instructions(p.program->Base.Instructions, instBuffer,
1546                           p.program->Base.NumInstructions);
1547
1548   /* Append fog code.  This must be done before checking the program against
1549    * the limits becuase it will potentially add some instructions.
1550    */
1551   if (key->fog_enabled) {
1552      _mesa_append_fog_code(ctx, p.program, ctx->Fog.Mode, GL_FALSE);
1553   }
1554
1555   if (p.program->Base.NumTexIndirections > ctx->Const.FragmentProgram.MaxTexIndirections)
1556      program_error(&p, "Exceeded max nr indirect texture lookups");
1557
1558   if (p.program->Base.NumTexInstructions > ctx->Const.FragmentProgram.MaxTexInstructions)
1559      program_error(&p, "Exceeded max TEX instructions");
1560
1561   if (p.program->Base.NumAluInstructions > ctx->Const.FragmentProgram.MaxAluInstructions)
1562      program_error(&p, "Exceeded max ALU instructions");
1563
1564   ASSERT(p.program->Base.NumInstructions <= MAX_INSTRUCTIONS);
1565
1566   /* Notify driver the fragment program has (actually) changed.
1567    */
1568   if (ctx->Driver.ProgramStringNotify) {
1569      GLboolean ok = ctx->Driver.ProgramStringNotify(ctx,
1570                                                     GL_FRAGMENT_PROGRAM_ARB,
1571                                                     &p.program->Base);
1572      /* Driver should be able to handle any texenv programs as long as
1573       * the driver correctly reported max number of texture units correctly,
1574       * etc.
1575       */
1576      ASSERT(ok);
1577      (void) ok; /* silence unused var warning */
1578   }
1579
1580   if (DISASSEM) {
1581      _mesa_print_program(&p.program->Base);
1582      printf("\n");
1583   }
1584}
1585
1586extern "C" {
1587
1588/**
1589 * Return a fragment program which implements the current
1590 * fixed-function texture, fog and color-sum operations.
1591 */
1592struct gl_fragment_program *
1593_mesa_get_fixed_func_fragment_program(struct gl_context *ctx)
1594{
1595   struct gl_fragment_program *prog;
1596   struct state_key key;
1597   GLuint keySize;
1598
1599   keySize = make_state_key(ctx, &key);
1600
1601   prog = (struct gl_fragment_program *)
1602      _mesa_search_program_cache(ctx->FragmentProgram.Cache,
1603                                 &key, keySize);
1604
1605   if (!prog) {
1606      prog = (struct gl_fragment_program *)
1607         ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
1608
1609      create_new_program(ctx, &key, prog);
1610
1611      _mesa_program_cache_insert(ctx, ctx->FragmentProgram.Cache,
1612                                 &key, keySize, &prog->Base);
1613   }
1614
1615   return prog;
1616}
1617
1618}
1619