ffvertex_prog.c revision 01e04c3f
1/**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * \file ffvertex_prog.c
30 *
31 * Create a vertex program to execute the current fixed function T&L pipeline.
32 * \author Keith Whitwell
33 */
34
35
36#include "main/errors.h"
37#include "main/glheader.h"
38#include "main/mtypes.h"
39#include "main/macros.h"
40#include "main/enums.h"
41#include "main/ffvertex_prog.h"
42#include "program/program.h"
43#include "program/prog_cache.h"
44#include "program/prog_instruction.h"
45#include "program/prog_parameter.h"
46#include "program/prog_print.h"
47#include "program/prog_statevars.h"
48#include "util/bitscan.h"
49
50
51/** Max of number of lights and texture coord units */
52#define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS)
53
54struct state_key {
55   GLbitfield varying_vp_inputs;
56
57   unsigned fragprog_inputs_read:12;
58
59   unsigned light_color_material_mask:12;
60   unsigned light_global_enabled:1;
61   unsigned light_local_viewer:1;
62   unsigned light_twoside:1;
63   unsigned material_shininess_is_zero:1;
64   unsigned need_eye_coords:1;
65   unsigned normalize:1;
66   unsigned rescale_normals:1;
67
68   unsigned fog_distance_mode:2;
69   unsigned separate_specular:1;
70   unsigned point_attenuated:1;
71
72   struct {
73      unsigned char light_enabled:1;
74      unsigned char light_eyepos3_is_zero:1;
75      unsigned char light_spotcutoff_is_180:1;
76      unsigned char light_attenuated:1;
77      unsigned char texmat_enabled:1;
78      unsigned char coord_replace:1;
79      unsigned char texgen_enabled:1;
80      unsigned char texgen_mode0:4;
81      unsigned char texgen_mode1:4;
82      unsigned char texgen_mode2:4;
83      unsigned char texgen_mode3:4;
84   } unit[NUM_UNITS];
85};
86
87
88#define TXG_NONE           0
89#define TXG_OBJ_LINEAR     1
90#define TXG_EYE_LINEAR     2
91#define TXG_SPHERE_MAP     3
92#define TXG_REFLECTION_MAP 4
93#define TXG_NORMAL_MAP     5
94
95static GLuint translate_texgen( GLboolean enabled, GLenum mode )
96{
97   if (!enabled)
98      return TXG_NONE;
99
100   switch (mode) {
101   case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
102   case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
103   case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
104   case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
105   case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
106   default: return TXG_NONE;
107   }
108}
109
110#define FDM_EYE_RADIAL    0
111#define FDM_EYE_PLANE     1
112#define FDM_EYE_PLANE_ABS 2
113#define FDM_FROM_ARRAY    3
114
115static GLuint translate_fog_distance_mode(GLenum source, GLenum mode)
116{
117   if (source == GL_FRAGMENT_DEPTH_EXT) {
118      switch (mode) {
119      case GL_EYE_RADIAL_NV:
120         return FDM_EYE_RADIAL;
121      case GL_EYE_PLANE:
122         return FDM_EYE_PLANE;
123      default: /* shouldn't happen; fall through to a sensible default */
124      case GL_EYE_PLANE_ABSOLUTE_NV:
125         return FDM_EYE_PLANE_ABS;
126      }
127   } else {
128      return FDM_FROM_ARRAY;
129   }
130}
131
132static GLboolean check_active_shininess( struct gl_context *ctx,
133                                         const struct state_key *key,
134                                         GLuint side )
135{
136   GLuint attr = MAT_ATTRIB_FRONT_SHININESS + side;
137
138   if ((key->varying_vp_inputs & VERT_BIT_COLOR0) &&
139       (key->light_color_material_mask & (1 << attr)))
140      return GL_TRUE;
141
142   if (key->varying_vp_inputs & VERT_BIT_MAT(attr))
143      return GL_TRUE;
144
145   if (ctx->Light.Material.Attrib[attr][0] != 0.0F)
146      return GL_TRUE;
147
148   return GL_FALSE;
149}
150
151
152static void make_state_key( struct gl_context *ctx, struct state_key *key )
153{
154   const struct gl_program *fp = ctx->FragmentProgram._Current;
155   GLbitfield mask;
156
157   memset(key, 0, sizeof(struct state_key));
158
159   /* This now relies on texenvprogram.c being active:
160    */
161   assert(fp);
162
163   key->need_eye_coords = ctx->_NeedEyeCoords;
164
165   key->fragprog_inputs_read = fp->info.inputs_read;
166   key->varying_vp_inputs = ctx->varying_vp_inputs;
167
168   if (ctx->RenderMode == GL_FEEDBACK) {
169      /* make sure the vertprog emits color and tex0 */
170      key->fragprog_inputs_read |= (VARYING_BIT_COL0 | VARYING_BIT_TEX0);
171   }
172
173   if (ctx->Light.Enabled) {
174      key->light_global_enabled = 1;
175
176      if (ctx->Light.Model.LocalViewer)
177	 key->light_local_viewer = 1;
178
179      if (ctx->Light.Model.TwoSide)
180	 key->light_twoside = 1;
181
182      if (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR)
183         key->separate_specular = 1;
184
185      if (ctx->Light.ColorMaterialEnabled) {
186	 key->light_color_material_mask = ctx->Light._ColorMaterialBitmask;
187      }
188
189      mask = ctx->Light._EnabledLights;
190      while (mask) {
191         const int i = u_bit_scan(&mask);
192         struct gl_light *light = &ctx->Light.Light[i];
193
194         key->unit[i].light_enabled = 1;
195
196         if (light->EyePosition[3] == 0.0F)
197            key->unit[i].light_eyepos3_is_zero = 1;
198
199         if (light->SpotCutoff == 180.0F)
200            key->unit[i].light_spotcutoff_is_180 = 1;
201
202         if (light->ConstantAttenuation != 1.0F ||
203             light->LinearAttenuation != 0.0F ||
204             light->QuadraticAttenuation != 0.0F)
205            key->unit[i].light_attenuated = 1;
206      }
207
208      if (check_active_shininess(ctx, key, 0)) {
209         key->material_shininess_is_zero = 0;
210      }
211      else if (key->light_twoside &&
212               check_active_shininess(ctx, key, 1)) {
213         key->material_shininess_is_zero = 0;
214      }
215      else {
216         key->material_shininess_is_zero = 1;
217      }
218   }
219
220   if (ctx->Transform.Normalize)
221      key->normalize = 1;
222
223   if (ctx->Transform.RescaleNormals)
224      key->rescale_normals = 1;
225
226   /* Only distinguish fog parameters if we actually need */
227   if (key->fragprog_inputs_read & VARYING_BIT_FOGC)
228      key->fog_distance_mode =
229         translate_fog_distance_mode(ctx->Fog.FogCoordinateSource,
230                                     ctx->Fog.FogDistanceMode);
231
232   if (ctx->Point._Attenuated)
233      key->point_attenuated = 1;
234
235   mask = ctx->Texture._EnabledCoordUnits | ctx->Texture._TexGenEnabled
236      | ctx->Texture._TexMatEnabled | ctx->Point.CoordReplace;
237   while (mask) {
238      const int i = u_bit_scan(&mask);
239      struct gl_fixedfunc_texture_unit *texUnit =
240         &ctx->Texture.FixedFuncUnit[i];
241
242      if (ctx->Point.PointSprite)
243	 if (ctx->Point.CoordReplace & (1u << i))
244	    key->unit[i].coord_replace = 1;
245
246      if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i))
247	 key->unit[i].texmat_enabled = 1;
248
249      if (texUnit->TexGenEnabled) {
250	 key->unit[i].texgen_enabled = 1;
251
252	 key->unit[i].texgen_mode0 =
253	    translate_texgen( texUnit->TexGenEnabled & (1<<0),
254			      texUnit->GenS.Mode );
255	 key->unit[i].texgen_mode1 =
256	    translate_texgen( texUnit->TexGenEnabled & (1<<1),
257			      texUnit->GenT.Mode );
258	 key->unit[i].texgen_mode2 =
259	    translate_texgen( texUnit->TexGenEnabled & (1<<2),
260			      texUnit->GenR.Mode );
261	 key->unit[i].texgen_mode3 =
262	    translate_texgen( texUnit->TexGenEnabled & (1<<3),
263			      texUnit->GenQ.Mode );
264      }
265   }
266}
267
268
269
270/* Very useful debugging tool - produces annotated listing of
271 * generated program with line/function references for each
272 * instruction back into this file:
273 */
274#define DISASSEM 0
275
276
277/* Use uregs to represent registers internally, translate to Mesa's
278 * expected formats on emit.
279 *
280 * NOTE: These are passed by value extensively in this file rather
281 * than as usual by pointer reference.  If this disturbs you, try
282 * remembering they are just 32bits in size.
283 *
284 * GCC is smart enough to deal with these dword-sized structures in
285 * much the same way as if I had defined them as dwords and was using
286 * macros to access and set the fields.  This is much nicer and easier
287 * to evolve.
288 */
289struct ureg {
290   GLuint file:4;
291   GLint idx:9;      /* relative addressing may be negative */
292                     /* sizeof(idx) should == sizeof(prog_src_reg::Index) */
293   GLuint negate:1;
294   GLuint swz:12;
295   GLuint pad:6;
296};
297
298
299struct tnl_program {
300   const struct state_key *state;
301   struct gl_program *program;
302   GLuint max_inst;  /** number of instructions allocated for program */
303   GLboolean mvp_with_dp4;
304
305   GLuint temp_in_use;
306   GLuint temp_reserved;
307
308   struct ureg eye_position;
309   struct ureg eye_position_z;
310   struct ureg eye_position_normalized;
311   struct ureg transformed_normal;
312   struct ureg identity;
313
314   GLuint materials;
315   GLuint color_materials;
316};
317
318
319static const struct ureg undef = {
320   PROGRAM_UNDEFINED,
321   0,
322   0,
323   0,
324   0
325};
326
327/* Local shorthand:
328 */
329#define X    SWIZZLE_X
330#define Y    SWIZZLE_Y
331#define Z    SWIZZLE_Z
332#define W    SWIZZLE_W
333
334
335/* Construct a ureg:
336 */
337static struct ureg make_ureg(GLuint file, GLint idx)
338{
339   struct ureg reg;
340   reg.file = file;
341   reg.idx = idx;
342   reg.negate = 0;
343   reg.swz = SWIZZLE_NOOP;
344   reg.pad = 0;
345   return reg;
346}
347
348
349static struct ureg negate( struct ureg reg )
350{
351   reg.negate ^= 1;
352   return reg;
353}
354
355
356static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
357{
358   reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
359			   GET_SWZ(reg.swz, y),
360			   GET_SWZ(reg.swz, z),
361			   GET_SWZ(reg.swz, w));
362   return reg;
363}
364
365
366static struct ureg swizzle1( struct ureg reg, int x )
367{
368   return swizzle(reg, x, x, x, x);
369}
370
371
372static struct ureg get_temp( struct tnl_program *p )
373{
374   int bit = ffs( ~p->temp_in_use );
375   if (!bit) {
376      _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
377      exit(1);
378   }
379
380   if ((GLuint) bit > p->program->arb.NumTemporaries)
381      p->program->arb.NumTemporaries = bit;
382
383   p->temp_in_use |= 1<<(bit-1);
384   return make_ureg(PROGRAM_TEMPORARY, bit-1);
385}
386
387
388static struct ureg reserve_temp( struct tnl_program *p )
389{
390   struct ureg temp = get_temp( p );
391   p->temp_reserved |= 1<<temp.idx;
392   return temp;
393}
394
395
396static void release_temp( struct tnl_program *p, struct ureg reg )
397{
398   if (reg.file == PROGRAM_TEMPORARY) {
399      p->temp_in_use &= ~(1<<reg.idx);
400      p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
401   }
402}
403
404static void release_temps( struct tnl_program *p )
405{
406   p->temp_in_use = p->temp_reserved;
407}
408
409
410static struct ureg register_param5(struct tnl_program *p,
411				   GLint s0,
412				   GLint s1,
413				   GLint s2,
414				   GLint s3,
415                                   GLint s4)
416{
417   gl_state_index16 tokens[STATE_LENGTH];
418   GLint idx;
419   tokens[0] = s0;
420   tokens[1] = s1;
421   tokens[2] = s2;
422   tokens[3] = s3;
423   tokens[4] = s4;
424   idx = _mesa_add_state_reference(p->program->Parameters, tokens );
425   return make_ureg(PROGRAM_STATE_VAR, idx);
426}
427
428
429#define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
430#define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
431#define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
432#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
433
434
435
436/**
437 * \param input  one of VERT_ATTRIB_x tokens.
438 */
439static struct ureg register_input( struct tnl_program *p, GLuint input )
440{
441   assert(input < VERT_ATTRIB_MAX);
442
443   if (p->state->varying_vp_inputs & VERT_BIT(input)) {
444      p->program->info.inputs_read |= VERT_BIT(input);
445      return make_ureg(PROGRAM_INPUT, input);
446   }
447   else {
448      return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input );
449   }
450}
451
452
453/**
454 * \param input  one of VARYING_SLOT_x tokens.
455 */
456static struct ureg register_output( struct tnl_program *p, GLuint output )
457{
458   p->program->info.outputs_written |= BITFIELD64_BIT(output);
459   return make_ureg(PROGRAM_OUTPUT, output);
460}
461
462
463static struct ureg register_const4f( struct tnl_program *p,
464			      GLfloat s0,
465			      GLfloat s1,
466			      GLfloat s2,
467			      GLfloat s3)
468{
469   gl_constant_value values[4];
470   GLint idx;
471   GLuint swizzle;
472   values[0].f = s0;
473   values[1].f = s1;
474   values[2].f = s2;
475   values[3].f = s3;
476   idx = _mesa_add_unnamed_constant(p->program->Parameters, values, 4,
477                                    &swizzle );
478   assert(swizzle == SWIZZLE_NOOP);
479   return make_ureg(PROGRAM_CONSTANT, idx);
480}
481
482#define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
483#define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
484#define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
485#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
486
487static GLboolean is_undef( struct ureg reg )
488{
489   return reg.file == PROGRAM_UNDEFINED;
490}
491
492
493static struct ureg get_identity_param( struct tnl_program *p )
494{
495   if (is_undef(p->identity))
496      p->identity = register_const4f(p, 0,0,0,1);
497
498   return p->identity;
499}
500
501static void register_matrix_param5( struct tnl_program *p,
502				    GLint s0, /* modelview, projection, etc */
503				    GLint s1, /* texture matrix number */
504				    GLint s2, /* first row */
505				    GLint s3, /* last row */
506				    GLint s4, /* inverse, transpose, etc */
507				    struct ureg *matrix )
508{
509   GLint i;
510
511   /* This is a bit sad as the support is there to pull the whole
512    * matrix out in one go:
513    */
514   for (i = 0; i <= s3 - s2; i++)
515      matrix[i] = register_param5( p, s0, s1, i, i, s4 );
516}
517
518
519static void emit_arg( struct prog_src_register *src,
520		      struct ureg reg )
521{
522   src->File = reg.file;
523   src->Index = reg.idx;
524   src->Swizzle = reg.swz;
525   src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE;
526   src->RelAddr = 0;
527   /* Check that bitfield sizes aren't exceeded */
528   assert(src->Index == reg.idx);
529}
530
531
532static void emit_dst( struct prog_dst_register *dst,
533		      struct ureg reg, GLuint mask )
534{
535   dst->File = reg.file;
536   dst->Index = reg.idx;
537   /* allow zero as a shorthand for xyzw */
538   dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
539   /* Check that bitfield sizes aren't exceeded */
540   assert(dst->Index == reg.idx);
541}
542
543
544static void debug_insn( struct prog_instruction *inst, const char *fn,
545			GLuint line )
546{
547   if (DISASSEM) {
548      static const char *last_fn;
549
550      if (fn != last_fn) {
551	 last_fn = fn;
552	 printf("%s:\n", fn);
553      }
554
555      printf("%d:\t", line);
556      _mesa_print_instruction(inst);
557   }
558}
559
560
561static void emit_op3fn(struct tnl_program *p,
562                       enum prog_opcode op,
563		       struct ureg dest,
564		       GLuint mask,
565		       struct ureg src0,
566		       struct ureg src1,
567		       struct ureg src2,
568		       const char *fn,
569		       GLuint line)
570{
571   GLuint nr;
572   struct prog_instruction *inst;
573
574   assert(p->program->arb.NumInstructions <= p->max_inst);
575
576   if (p->program->arb.NumInstructions == p->max_inst) {
577      /* need to extend the program's instruction array */
578      struct prog_instruction *newInst;
579
580      /* double the size */
581      p->max_inst *= 2;
582
583      newInst =
584         rzalloc_array(p->program, struct prog_instruction, p->max_inst);
585      if (!newInst) {
586         _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build");
587         return;
588      }
589
590      _mesa_copy_instructions(newInst, p->program->arb.Instructions,
591                              p->program->arb.NumInstructions);
592
593      ralloc_free(p->program->arb.Instructions);
594
595      p->program->arb.Instructions = newInst;
596   }
597
598   nr = p->program->arb.NumInstructions++;
599
600   inst = &p->program->arb.Instructions[nr];
601   inst->Opcode = (enum prog_opcode) op;
602
603   emit_arg( &inst->SrcReg[0], src0 );
604   emit_arg( &inst->SrcReg[1], src1 );
605   emit_arg( &inst->SrcReg[2], src2 );
606
607   emit_dst( &inst->DstReg, dest, mask );
608
609   debug_insn(inst, fn, line);
610}
611
612
613#define emit_op3(p, op, dst, mask, src0, src1, src2) \
614   emit_op3fn(p, op, dst, mask, src0, src1, src2, __func__, __LINE__)
615
616#define emit_op2(p, op, dst, mask, src0, src1) \
617    emit_op3fn(p, op, dst, mask, src0, src1, undef, __func__, __LINE__)
618
619#define emit_op1(p, op, dst, mask, src0) \
620    emit_op3fn(p, op, dst, mask, src0, undef, undef, __func__, __LINE__)
621
622
623static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
624{
625   if (reg.file == PROGRAM_TEMPORARY &&
626       !(p->temp_reserved & (1<<reg.idx)))
627      return reg;
628   else {
629      struct ureg temp = get_temp(p);
630      emit_op1(p, OPCODE_MOV, temp, 0, reg);
631      return temp;
632   }
633}
634
635
636/* Currently no tracking performed of input/output/register size or
637 * active elements.  Could be used to reduce these operations, as
638 * could the matrix type.
639 */
640static void emit_matrix_transform_vec4( struct tnl_program *p,
641					struct ureg dest,
642					const struct ureg *mat,
643					struct ureg src)
644{
645   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
646   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
647   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
648   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
649}
650
651
652/* This version is much easier to implement if writemasks are not
653 * supported natively on the target or (like SSE), the target doesn't
654 * have a clean/obvious dotproduct implementation.
655 */
656static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
657						  struct ureg dest,
658						  const struct ureg *mat,
659						  struct ureg src)
660{
661   struct ureg tmp;
662
663   if (dest.file != PROGRAM_TEMPORARY)
664      tmp = get_temp(p);
665   else
666      tmp = dest;
667
668   emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
669   emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
670   emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
671   emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
672
673   if (dest.file != PROGRAM_TEMPORARY)
674      release_temp(p, tmp);
675}
676
677
678static void emit_matrix_transform_vec3( struct tnl_program *p,
679					struct ureg dest,
680					const struct ureg *mat,
681					struct ureg src)
682{
683   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
684   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
685   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
686}
687
688
689static void emit_normalize_vec3( struct tnl_program *p,
690				 struct ureg dest,
691				 struct ureg src )
692{
693   struct ureg tmp = get_temp(p);
694   emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
695   emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
696   emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
697   release_temp(p, tmp);
698}
699
700
701static void emit_passthrough( struct tnl_program *p,
702			      GLuint input,
703			      GLuint output )
704{
705   struct ureg out = register_output(p, output);
706   emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
707}
708
709
710static struct ureg get_eye_position( struct tnl_program *p )
711{
712   if (is_undef(p->eye_position)) {
713      struct ureg pos = register_input( p, VERT_ATTRIB_POS );
714      struct ureg modelview[4];
715
716      p->eye_position = reserve_temp(p);
717
718      if (p->mvp_with_dp4) {
719	 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
720                                 0, modelview );
721
722	 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
723      }
724      else {
725	 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
726				 STATE_MATRIX_TRANSPOSE, modelview );
727
728	 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
729      }
730   }
731
732   return p->eye_position;
733}
734
735
736static struct ureg get_eye_position_z( struct tnl_program *p )
737{
738   if (!is_undef(p->eye_position))
739      return swizzle1(p->eye_position, Z);
740
741   if (is_undef(p->eye_position_z)) {
742      struct ureg pos = register_input( p, VERT_ATTRIB_POS );
743      struct ureg modelview[4];
744
745      p->eye_position_z = reserve_temp(p);
746
747      register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
748                              0, modelview );
749
750      emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]);
751   }
752
753   return p->eye_position_z;
754}
755
756
757static struct ureg get_eye_position_normalized( struct tnl_program *p )
758{
759   if (is_undef(p->eye_position_normalized)) {
760      struct ureg eye = get_eye_position(p);
761      p->eye_position_normalized = reserve_temp(p);
762      emit_normalize_vec3(p, p->eye_position_normalized, eye);
763   }
764
765   return p->eye_position_normalized;
766}
767
768
769static struct ureg get_transformed_normal( struct tnl_program *p )
770{
771   if (is_undef(p->transformed_normal) &&
772       !p->state->need_eye_coords &&
773       !p->state->normalize &&
774       !(p->state->need_eye_coords == p->state->rescale_normals))
775   {
776      p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL );
777   }
778   else if (is_undef(p->transformed_normal))
779   {
780      struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
781      struct ureg mvinv[3];
782      struct ureg transformed_normal = reserve_temp(p);
783
784      if (p->state->need_eye_coords) {
785         register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
786                                 STATE_MATRIX_INVTRANS, mvinv );
787
788         /* Transform to eye space:
789          */
790         emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal );
791         normal = transformed_normal;
792      }
793
794      /* Normalize/Rescale:
795       */
796      if (p->state->normalize) {
797	 emit_normalize_vec3( p, transformed_normal, normal );
798         normal = transformed_normal;
799      }
800      else if (p->state->need_eye_coords == p->state->rescale_normals) {
801         /* This is already adjusted for eye/non-eye rendering:
802          */
803	 struct ureg rescale = register_param2(p, STATE_INTERNAL,
804                                               STATE_NORMAL_SCALE);
805
806	 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale );
807         normal = transformed_normal;
808      }
809
810      assert(normal.file == PROGRAM_TEMPORARY);
811      p->transformed_normal = normal;
812   }
813
814   return p->transformed_normal;
815}
816
817
818static void build_hpos( struct tnl_program *p )
819{
820   struct ureg pos = register_input( p, VERT_ATTRIB_POS );
821   struct ureg hpos = register_output( p, VARYING_SLOT_POS );
822   struct ureg mvp[4];
823
824   if (p->mvp_with_dp4) {
825      register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
826			      0, mvp );
827      emit_matrix_transform_vec4( p, hpos, mvp, pos );
828   }
829   else {
830      register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
831			      STATE_MATRIX_TRANSPOSE, mvp );
832      emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
833   }
834}
835
836
837static GLuint material_attrib( GLuint side, GLuint property )
838{
839   return (property - STATE_AMBIENT) * 2 + side;
840}
841
842
843/**
844 * Get a bitmask of which material values vary on a per-vertex basis.
845 */
846static void set_material_flags( struct tnl_program *p )
847{
848   p->color_materials = 0;
849   p->materials = 0;
850
851   if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) {
852      p->materials =
853	 p->color_materials = p->state->light_color_material_mask;
854   }
855
856   p->materials |= ((p->state->varying_vp_inputs & VERT_BIT_MAT_ALL)
857                    >> VERT_ATTRIB_MAT(0));
858}
859
860
861static struct ureg get_material( struct tnl_program *p, GLuint side,
862				 GLuint property )
863{
864   GLuint attrib = material_attrib(side, property);
865
866   if (p->color_materials & (1<<attrib))
867      return register_input(p, VERT_ATTRIB_COLOR0);
868   else if (p->materials & (1<<attrib)) {
869      /* Put material values in the GENERIC slots -- they are not used
870       * for anything in fixed function mode.
871       */
872      return register_input( p, VERT_ATTRIB_MAT(attrib) );
873   }
874   else
875      return register_param3( p, STATE_MATERIAL, side, property );
876}
877
878#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \
879				   MAT_BIT_FRONT_AMBIENT | \
880				   MAT_BIT_FRONT_DIFFUSE) << (side))
881
882
883/**
884 * Either return a precalculated constant value or emit code to
885 * calculate these values dynamically in the case where material calls
886 * are present between begin/end pairs.
887 *
888 * Probably want to shift this to the program compilation phase - if
889 * we always emitted the calculation here, a smart compiler could
890 * detect that it was constant (given a certain set of inputs), and
891 * lift it out of the main loop.  That way the programs created here
892 * would be independent of the vertex_buffer details.
893 */
894static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
895{
896   if (p->materials & SCENE_COLOR_BITS(side)) {
897      struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
898      struct ureg material_emission = get_material(p, side, STATE_EMISSION);
899      struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
900      struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
901      struct ureg tmp = make_temp(p, material_diffuse);
902      emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient,
903	       material_ambient, material_emission);
904      return tmp;
905   }
906   else
907      return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
908}
909
910
911static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
912				  GLuint side, GLuint property )
913{
914   GLuint attrib = material_attrib(side, property);
915   if (p->materials & (1<<attrib)) {
916      struct ureg light_value =
917	 register_param3(p, STATE_LIGHT, light, property);
918      struct ureg material_value = get_material(p, side, property);
919      struct ureg tmp = get_temp(p);
920      emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value);
921      return tmp;
922   }
923   else
924      return register_param4(p, STATE_LIGHTPROD, light, side, property);
925}
926
927
928static struct ureg calculate_light_attenuation( struct tnl_program *p,
929						GLuint i,
930						struct ureg VPpli,
931						struct ureg dist )
932{
933   struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
934					     STATE_ATTENUATION);
935   struct ureg att = undef;
936
937   /* Calculate spot attenuation:
938    */
939   if (!p->state->unit[i].light_spotcutoff_is_180) {
940      struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
941						  STATE_LIGHT_SPOT_DIR_NORMALIZED, i);
942      struct ureg spot = get_temp(p);
943      struct ureg slt = get_temp(p);
944
945      att = get_temp(p);
946
947      emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
948      emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
949      emit_op1(p, OPCODE_ABS, spot, 0, spot);
950      emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
951      emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
952
953      release_temp(p, spot);
954      release_temp(p, slt);
955   }
956
957   /* Calculate distance attenuation(See formula (2.4) at glspec 2.1 page 62):
958    *
959    * Skip the calucation when _dist_ is undefined(light_eyepos3_is_zero)
960    */
961   if (p->state->unit[i].light_attenuated && !is_undef(dist)) {
962      if (is_undef(att))
963         att = get_temp(p);
964      /* 1/d,d,d,1/d */
965      emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
966      /* 1,d,d*d,1/d */
967      emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
968      /* 1/dist-atten */
969      emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
970
971      if (!p->state->unit[i].light_spotcutoff_is_180) {
972	 /* dist-atten */
973	 emit_op1(p, OPCODE_RCP, dist, 0, dist);
974	 /* spot-atten * dist-atten */
975	 emit_op2(p, OPCODE_MUL, att, 0, dist, att);
976      }
977      else {
978	 /* dist-atten */
979	 emit_op1(p, OPCODE_RCP, att, 0, dist);
980      }
981   }
982
983   return att;
984}
985
986
987/**
988 * Compute:
989 *   lit.y = MAX(0, dots.x)
990 *   lit.z = SLT(0, dots.x)
991 */
992static void emit_degenerate_lit( struct tnl_program *p,
993                                 struct ureg lit,
994                                 struct ureg dots )
995{
996   struct ureg id = get_identity_param(p);  /* id = {0,0,0,1} */
997
998   /* Note that lit.x & lit.w will not be examined.  Note also that
999    * dots.xyzw == dots.xxxx.
1000    */
1001
1002   /* MAX lit, id, dots;
1003    */
1004   emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots);
1005
1006   /* result[2] = (in > 0 ? 1 : 0)
1007    * SLT lit.z, id.z, dots;   # lit.z = (0 < dots.z) ? 1 : 0
1008    */
1009   emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots);
1010}
1011
1012
1013/* Need to add some addtional parameters to allow lighting in object
1014 * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
1015 * space lighting.
1016 */
1017static void build_lighting( struct tnl_program *p )
1018{
1019   const GLboolean twoside = p->state->light_twoside;
1020   const GLboolean separate = p->state->separate_specular;
1021   GLuint nr_lights = 0, count = 0;
1022   struct ureg normal = get_transformed_normal(p);
1023   struct ureg lit = get_temp(p);
1024   struct ureg dots = get_temp(p);
1025   struct ureg _col0 = undef, _col1 = undef;
1026   struct ureg _bfc0 = undef, _bfc1 = undef;
1027   GLuint i;
1028
1029   /*
1030    * NOTE:
1031    * dots.x = dot(normal, VPpli)
1032    * dots.y = dot(normal, halfAngle)
1033    * dots.z = back.shininess
1034    * dots.w = front.shininess
1035    */
1036
1037   for (i = 0; i < MAX_LIGHTS; i++)
1038      if (p->state->unit[i].light_enabled)
1039	 nr_lights++;
1040
1041   set_material_flags(p);
1042
1043   {
1044      if (!p->state->material_shininess_is_zero) {
1045         struct ureg shininess = get_material(p, 0, STATE_SHININESS);
1046         emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
1047         release_temp(p, shininess);
1048      }
1049
1050      _col0 = make_temp(p, get_scenecolor(p, 0));
1051      if (separate)
1052	 _col1 = make_temp(p, get_identity_param(p));
1053      else
1054	 _col1 = _col0;
1055   }
1056
1057   if (twoside) {
1058      if (!p->state->material_shininess_is_zero) {
1059         /* Note that we negate the back-face specular exponent here.
1060          * The negation will be un-done later in the back-face code below.
1061          */
1062         struct ureg shininess = get_material(p, 1, STATE_SHININESS);
1063         emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
1064                  negate(swizzle1(shininess,X)));
1065         release_temp(p, shininess);
1066      }
1067
1068      _bfc0 = make_temp(p, get_scenecolor(p, 1));
1069      if (separate)
1070	 _bfc1 = make_temp(p, get_identity_param(p));
1071      else
1072	 _bfc1 = _bfc0;
1073   }
1074
1075   /* If no lights, still need to emit the scenecolor.
1076    */
1077   {
1078      struct ureg res0 = register_output( p, VARYING_SLOT_COL0 );
1079      emit_op1(p, OPCODE_MOV, res0, 0, _col0);
1080   }
1081
1082   if (separate) {
1083      struct ureg res1 = register_output( p, VARYING_SLOT_COL1 );
1084      emit_op1(p, OPCODE_MOV, res1, 0, _col1);
1085   }
1086
1087   if (twoside) {
1088      struct ureg res0 = register_output( p, VARYING_SLOT_BFC0 );
1089      emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
1090   }
1091
1092   if (twoside && separate) {
1093      struct ureg res1 = register_output( p, VARYING_SLOT_BFC1 );
1094      emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
1095   }
1096
1097   if (nr_lights == 0) {
1098      release_temps(p);
1099      return;
1100   }
1101
1102   for (i = 0; i < MAX_LIGHTS; i++) {
1103      if (p->state->unit[i].light_enabled) {
1104	 struct ureg half = undef;
1105	 struct ureg att = undef, VPpli = undef;
1106	 struct ureg dist = undef;
1107
1108	 count++;
1109         if (p->state->unit[i].light_eyepos3_is_zero) {
1110             VPpli = register_param3(p, STATE_INTERNAL,
1111                                     STATE_LIGHT_POSITION_NORMALIZED, i);
1112         } else {
1113            struct ureg Ppli = register_param3(p, STATE_INTERNAL,
1114                                               STATE_LIGHT_POSITION, i);
1115            struct ureg V = get_eye_position(p);
1116
1117            VPpli = get_temp(p);
1118            dist = get_temp(p);
1119
1120            /* Calculate VPpli vector
1121             */
1122            emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V);
1123
1124            /* Normalize VPpli.  The dist value also used in
1125             * attenuation below.
1126             */
1127            emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
1128            emit_op1(p, OPCODE_RSQ, dist, 0, dist);
1129            emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
1130         }
1131
1132         /* Calculate attenuation:
1133          */
1134         att = calculate_light_attenuation(p, i, VPpli, dist);
1135         release_temp(p, dist);
1136
1137	 /* Calculate viewer direction, or use infinite viewer:
1138	  */
1139         if (!p->state->material_shininess_is_zero) {
1140            if (p->state->light_local_viewer) {
1141               struct ureg eye_hat = get_eye_position_normalized(p);
1142               half = get_temp(p);
1143               emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
1144               emit_normalize_vec3(p, half, half);
1145            } else if (p->state->unit[i].light_eyepos3_is_zero) {
1146               half = register_param3(p, STATE_INTERNAL,
1147                                      STATE_LIGHT_HALF_VECTOR, i);
1148            } else {
1149               struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
1150               half = get_temp(p);
1151               emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
1152               emit_normalize_vec3(p, half, half);
1153            }
1154	 }
1155
1156	 /* Calculate dot products:
1157	  */
1158         if (p->state->material_shininess_is_zero) {
1159            emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli);
1160         }
1161         else {
1162            emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
1163            emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
1164         }
1165
1166	 /* Front face lighting:
1167	  */
1168	 {
1169	    struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
1170	    struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
1171	    struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
1172	    struct ureg res0, res1;
1173	    GLuint mask0, mask1;
1174
1175	    if (count == nr_lights) {
1176	       if (separate) {
1177		  mask0 = WRITEMASK_XYZ;
1178		  mask1 = WRITEMASK_XYZ;
1179		  res0 = register_output( p, VARYING_SLOT_COL0 );
1180		  res1 = register_output( p, VARYING_SLOT_COL1 );
1181	       }
1182	       else {
1183		  mask0 = 0;
1184		  mask1 = WRITEMASK_XYZ;
1185		  res0 = _col0;
1186		  res1 = register_output( p, VARYING_SLOT_COL0 );
1187	       }
1188	    }
1189            else {
1190	       mask0 = 0;
1191	       mask1 = 0;
1192	       res0 = _col0;
1193	       res1 = _col1;
1194	    }
1195
1196	    if (!is_undef(att)) {
1197               /* light is attenuated by distance */
1198               emit_op1(p, OPCODE_LIT, lit, 0, dots);
1199               emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1200               emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
1201            }
1202            else if (!p->state->material_shininess_is_zero) {
1203               /* there's a non-zero specular term */
1204               emit_op1(p, OPCODE_LIT, lit, 0, dots);
1205               emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
1206            }
1207            else {
1208               /* no attenutation, no specular */
1209               emit_degenerate_lit(p, lit, dots);
1210               emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
1211            }
1212
1213	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
1214	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
1215
1216	    release_temp(p, ambient);
1217	    release_temp(p, diffuse);
1218	    release_temp(p, specular);
1219	 }
1220
1221	 /* Back face lighting:
1222	  */
1223	 if (twoside) {
1224	    struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
1225	    struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
1226	    struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
1227	    struct ureg res0, res1;
1228	    GLuint mask0, mask1;
1229
1230	    if (count == nr_lights) {
1231	       if (separate) {
1232		  mask0 = WRITEMASK_XYZ;
1233		  mask1 = WRITEMASK_XYZ;
1234		  res0 = register_output( p, VARYING_SLOT_BFC0 );
1235		  res1 = register_output( p, VARYING_SLOT_BFC1 );
1236	       }
1237	       else {
1238		  mask0 = 0;
1239		  mask1 = WRITEMASK_XYZ;
1240		  res0 = _bfc0;
1241		  res1 = register_output( p, VARYING_SLOT_BFC0 );
1242	       }
1243	    }
1244            else {
1245	       res0 = _bfc0;
1246	       res1 = _bfc1;
1247	       mask0 = 0;
1248	       mask1 = 0;
1249	    }
1250
1251            /* For the back face we need to negate the X and Y component
1252             * dot products.  dots.Z has the negated back-face specular
1253             * exponent.  We swizzle that into the W position.  This
1254             * negation makes the back-face specular term positive again.
1255             */
1256            dots = negate(swizzle(dots,X,Y,W,Z));
1257
1258	    if (!is_undef(att)) {
1259               emit_op1(p, OPCODE_LIT, lit, 0, dots);
1260	       emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1261               emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
1262            }
1263            else if (!p->state->material_shininess_is_zero) {
1264               emit_op1(p, OPCODE_LIT, lit, 0, dots);
1265               emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/
1266            }
1267            else {
1268               emit_degenerate_lit(p, lit, dots);
1269               emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
1270            }
1271
1272	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
1273	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
1274            /* restore dots to its original state for subsequent lights
1275             * by negating and swizzling again.
1276             */
1277            dots = negate(swizzle(dots,X,Y,W,Z));
1278
1279	    release_temp(p, ambient);
1280	    release_temp(p, diffuse);
1281	    release_temp(p, specular);
1282	 }
1283
1284	 release_temp(p, half);
1285	 release_temp(p, VPpli);
1286	 release_temp(p, att);
1287      }
1288   }
1289
1290   release_temps( p );
1291}
1292
1293
1294static void build_fog( struct tnl_program *p )
1295{
1296   struct ureg fog = register_output(p, VARYING_SLOT_FOGC);
1297   struct ureg input;
1298
1299   switch (p->state->fog_distance_mode) {
1300   case FDM_EYE_RADIAL: { /* Z = sqrt(Xe*Xe + Ye*Ye + Ze*Ze) */
1301      struct ureg tmp = get_temp(p);
1302      input = get_eye_position(p);
1303      emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, input, input);
1304      emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
1305      emit_op1(p, OPCODE_RCP, fog, WRITEMASK_X, tmp);
1306      break;
1307   }
1308   case FDM_EYE_PLANE: /* Z = Ze */
1309      input = get_eye_position_z(p);
1310      emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input);
1311      break;
1312   case FDM_EYE_PLANE_ABS: /* Z = abs(Ze) */
1313      input = get_eye_position_z(p);
1314      emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input);
1315      break;
1316   case FDM_FROM_ARRAY:
1317      input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
1318      emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input);
1319      break;
1320   default:
1321      assert(!"Bad fog mode in build_fog()");
1322      break;
1323   }
1324
1325   emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p));
1326}
1327
1328
1329static void build_reflect_texgen( struct tnl_program *p,
1330				  struct ureg dest,
1331				  GLuint writemask )
1332{
1333   struct ureg normal = get_transformed_normal(p);
1334   struct ureg eye_hat = get_eye_position_normalized(p);
1335   struct ureg tmp = get_temp(p);
1336
1337   /* n.u */
1338   emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1339   /* 2n.u */
1340   emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1341   /* (-2n.u)n + u */
1342   emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat);
1343
1344   release_temp(p, tmp);
1345}
1346
1347
1348static void build_sphere_texgen( struct tnl_program *p,
1349				 struct ureg dest,
1350				 GLuint writemask )
1351{
1352   struct ureg normal = get_transformed_normal(p);
1353   struct ureg eye_hat = get_eye_position_normalized(p);
1354   struct ureg tmp = get_temp(p);
1355   struct ureg half = register_scalar_const(p, .5);
1356   struct ureg r = get_temp(p);
1357   struct ureg inv_m = get_temp(p);
1358   struct ureg id = get_identity_param(p);
1359
1360   /* Could share the above calculations, but it would be
1361    * a fairly odd state for someone to set (both sphere and
1362    * reflection active for different texture coordinate
1363    * components.  Of course - if two texture units enable
1364    * reflect and/or sphere, things start to tilt in favour
1365    * of seperating this out:
1366    */
1367
1368   /* n.u */
1369   emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1370   /* 2n.u */
1371   emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1372   /* (-2n.u)n + u */
1373   emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat);
1374   /* r + 0,0,1 */
1375   emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
1376   /* rx^2 + ry^2 + (rz+1)^2 */
1377   emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
1378   /* 2/m */
1379   emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
1380   /* 1/m */
1381   emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
1382   /* r/m + 1/2 */
1383   emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
1384
1385   release_temp(p, tmp);
1386   release_temp(p, r);
1387   release_temp(p, inv_m);
1388}
1389
1390
1391static void build_texture_transform( struct tnl_program *p )
1392{
1393   GLuint i, j;
1394
1395   for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
1396
1397      if (!(p->state->fragprog_inputs_read & VARYING_BIT_TEX(i)))
1398	 continue;
1399
1400      if (p->state->unit[i].coord_replace)
1401  	 continue;
1402
1403      if (p->state->unit[i].texgen_enabled ||
1404	  p->state->unit[i].texmat_enabled) {
1405
1406	 GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
1407	 struct ureg out = register_output(p, VARYING_SLOT_TEX0 + i);
1408	 struct ureg out_texgen = undef;
1409
1410	 if (p->state->unit[i].texgen_enabled) {
1411	    GLuint copy_mask = 0;
1412	    GLuint sphere_mask = 0;
1413	    GLuint reflect_mask = 0;
1414	    GLuint normal_mask = 0;
1415	    GLuint modes[4];
1416
1417	    if (texmat_enabled)
1418	       out_texgen = get_temp(p);
1419	    else
1420	       out_texgen = out;
1421
1422	    modes[0] = p->state->unit[i].texgen_mode0;
1423	    modes[1] = p->state->unit[i].texgen_mode1;
1424	    modes[2] = p->state->unit[i].texgen_mode2;
1425	    modes[3] = p->state->unit[i].texgen_mode3;
1426
1427	    for (j = 0; j < 4; j++) {
1428	       switch (modes[j]) {
1429	       case TXG_OBJ_LINEAR: {
1430		  struct ureg obj = register_input(p, VERT_ATTRIB_POS);
1431		  struct ureg plane =
1432		     register_param3(p, STATE_TEXGEN, i,
1433				     STATE_TEXGEN_OBJECT_S + j);
1434
1435		  emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1436			   obj, plane );
1437		  break;
1438	       }
1439	       case TXG_EYE_LINEAR: {
1440		  struct ureg eye = get_eye_position(p);
1441		  struct ureg plane =
1442		     register_param3(p, STATE_TEXGEN, i,
1443				     STATE_TEXGEN_EYE_S + j);
1444
1445		  emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1446			   eye, plane );
1447		  break;
1448	       }
1449	       case TXG_SPHERE_MAP:
1450		  sphere_mask |= WRITEMASK_X << j;
1451		  break;
1452	       case TXG_REFLECTION_MAP:
1453		  reflect_mask |= WRITEMASK_X << j;
1454		  break;
1455	       case TXG_NORMAL_MAP:
1456		  normal_mask |= WRITEMASK_X << j;
1457		  break;
1458	       case TXG_NONE:
1459		  copy_mask |= WRITEMASK_X << j;
1460	       }
1461	    }
1462
1463	    if (sphere_mask) {
1464	       build_sphere_texgen(p, out_texgen, sphere_mask);
1465	    }
1466
1467	    if (reflect_mask) {
1468	       build_reflect_texgen(p, out_texgen, reflect_mask);
1469	    }
1470
1471	    if (normal_mask) {
1472	       struct ureg normal = get_transformed_normal(p);
1473	       emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
1474	    }
1475
1476	    if (copy_mask) {
1477	       struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
1478	       emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
1479	    }
1480	 }
1481
1482	 if (texmat_enabled) {
1483	    struct ureg texmat[4];
1484	    struct ureg in = (!is_undef(out_texgen) ?
1485			      out_texgen :
1486			      register_input(p, VERT_ATTRIB_TEX0+i));
1487	    if (p->mvp_with_dp4) {
1488	       register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
1489				       0, texmat );
1490	       emit_matrix_transform_vec4( p, out, texmat, in );
1491	    }
1492	    else {
1493	       register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
1494				       STATE_MATRIX_TRANSPOSE, texmat );
1495	       emit_transpose_matrix_transform_vec4( p, out, texmat, in );
1496	    }
1497	 }
1498
1499	 release_temps(p);
1500      }
1501      else {
1502	 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VARYING_SLOT_TEX0+i);
1503      }
1504   }
1505}
1506
1507
1508/**
1509 * Point size attenuation computation.
1510 */
1511static void build_atten_pointsize( struct tnl_program *p )
1512{
1513   struct ureg eye = get_eye_position_z(p);
1514   struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED);
1515   struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
1516   struct ureg out = register_output(p, VARYING_SLOT_PSIZ);
1517   struct ureg ut = get_temp(p);
1518
1519   /* dist = |eyez| */
1520   emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z));
1521   /* p1 + dist * (p2 + dist * p3); */
1522   emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
1523		swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
1524   emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
1525		ut, swizzle1(state_attenuation, X));
1526
1527   /* 1 / sqrt(factor) */
1528   emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
1529
1530#if 0
1531   /* out = pointSize / sqrt(factor) */
1532   emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
1533#else
1534   /* this is a good place to clamp the point size since there's likely
1535    * no hardware registers to clamp point size at rasterization time.
1536    */
1537   emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
1538   emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
1539   emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
1540#endif
1541
1542   release_temp(p, ut);
1543}
1544
1545
1546/**
1547 * Pass-though per-vertex point size, from user's point size array.
1548 */
1549static void build_array_pointsize( struct tnl_program *p )
1550{
1551   struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE);
1552   struct ureg out = register_output(p, VARYING_SLOT_PSIZ);
1553   emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in);
1554}
1555
1556
1557static void build_tnl_program( struct tnl_program *p )
1558{
1559   /* Emit the program, starting with the modelview, projection transforms:
1560    */
1561   build_hpos(p);
1562
1563   /* Lighting calculations:
1564    */
1565   if (p->state->fragprog_inputs_read & (VARYING_BIT_COL0|VARYING_BIT_COL1)) {
1566      if (p->state->light_global_enabled)
1567	 build_lighting(p);
1568      else {
1569	 if (p->state->fragprog_inputs_read & VARYING_BIT_COL0)
1570	    emit_passthrough(p, VERT_ATTRIB_COLOR0, VARYING_SLOT_COL0);
1571
1572	 if (p->state->fragprog_inputs_read & VARYING_BIT_COL1)
1573	    emit_passthrough(p, VERT_ATTRIB_COLOR1, VARYING_SLOT_COL1);
1574      }
1575   }
1576
1577   if (p->state->fragprog_inputs_read & VARYING_BIT_FOGC)
1578      build_fog(p);
1579
1580   if (p->state->fragprog_inputs_read & VARYING_BITS_TEX_ANY)
1581      build_texture_transform(p);
1582
1583   if (p->state->point_attenuated)
1584      build_atten_pointsize(p);
1585   else if (p->state->varying_vp_inputs & VERT_BIT_POINT_SIZE)
1586      build_array_pointsize(p);
1587
1588   /* Finish up:
1589    */
1590   emit_op1(p, OPCODE_END, undef, 0, undef);
1591
1592   /* Disassemble:
1593    */
1594   if (DISASSEM) {
1595      printf ("\n");
1596   }
1597}
1598
1599
1600static void
1601create_new_program( const struct state_key *key,
1602                    struct gl_program *program,
1603                    GLboolean mvp_with_dp4,
1604                    GLuint max_temps)
1605{
1606   struct tnl_program p;
1607
1608   memset(&p, 0, sizeof(p));
1609   p.state = key;
1610   p.program = program;
1611   p.eye_position = undef;
1612   p.eye_position_z = undef;
1613   p.eye_position_normalized = undef;
1614   p.transformed_normal = undef;
1615   p.identity = undef;
1616   p.temp_in_use = 0;
1617   p.mvp_with_dp4 = mvp_with_dp4;
1618
1619   if (max_temps >= sizeof(int) * 8)
1620      p.temp_reserved = 0;
1621   else
1622      p.temp_reserved = ~((1<<max_temps)-1);
1623
1624   /* Start by allocating 32 instructions.
1625    * If we need more, we'll grow the instruction array as needed.
1626    */
1627   p.max_inst = 32;
1628   p.program->arb.Instructions =
1629      rzalloc_array(program, struct prog_instruction, p.max_inst);
1630   p.program->String = NULL;
1631   p.program->arb.NumInstructions =
1632   p.program->arb.NumTemporaries =
1633   p.program->arb.NumParameters =
1634   p.program->arb.NumAttributes = p.program->arb.NumAddressRegs = 0;
1635   p.program->Parameters = _mesa_new_parameter_list();
1636   p.program->info.inputs_read = 0;
1637   p.program->info.outputs_written = 0;
1638
1639   build_tnl_program( &p );
1640}
1641
1642
1643/**
1644 * Return a vertex program which implements the current fixed-function
1645 * transform/lighting/texgen operations.
1646 */
1647struct gl_program *
1648_mesa_get_fixed_func_vertex_program(struct gl_context *ctx)
1649{
1650   struct gl_program *prog;
1651   struct state_key key;
1652
1653   /* Grab all the relevant state and put it in a single structure:
1654    */
1655   make_state_key(ctx, &key);
1656
1657   /* Look for an already-prepared program for this state:
1658    */
1659   prog = _mesa_search_program_cache(ctx->VertexProgram.Cache, &key,
1660                                     sizeof(key));
1661
1662   if (!prog) {
1663      /* OK, we'll have to build a new one */
1664      if (0)
1665         printf("Build new TNL program\n");
1666
1667      prog = ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0, true);
1668      if (!prog)
1669         return NULL;
1670
1671      create_new_program( &key, prog,
1672                          ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS,
1673                          ctx->Const.Program[MESA_SHADER_VERTEX].MaxTemps );
1674
1675      if (ctx->Driver.ProgramStringNotify)
1676         ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, prog);
1677
1678      _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache, &key,
1679                                 sizeof(key), prog);
1680   }
1681
1682   return prog;
1683}
1684