1848b8605Smrg/*
2848b8605Smrg * Copyright (C) 2004  David Airlie   All Rights Reserved.
3848b8605Smrg *
4848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5848b8605Smrg * copy of this software and associated documentation files (the "Software"),
6848b8605Smrg * to deal in the Software without restriction, including without limitation
7848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the
9848b8605Smrg * Software is furnished to do so, subject to the following conditions:
10848b8605Smrg *
11848b8605Smrg * The above copyright notice and this permission notice shall be included
12848b8605Smrg * in all copies or substantial portions of the Software.
13848b8605Smrg *
14848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17848b8605Smrg * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18848b8605Smrg * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19848b8605Smrg * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20848b8605Smrg */
21848b8605Smrg
22848b8605Smrg#include "main/glheader.h"
23848b8605Smrg#include "main/macros.h"
24848b8605Smrg#include "main/atifragshader.h"
25848b8605Smrg#include "main/samplerobj.h"
26848b8605Smrg#include "swrast/s_atifragshader.h"
27848b8605Smrg#include "swrast/s_context.h"
28848b8605Smrg
29b8e80941Smrg#define ATI_FS_INPUT_PRIMARY 0
30b8e80941Smrg#define ATI_FS_INPUT_SECONDARY 1
31848b8605Smrg
32848b8605Smrg/**
33848b8605Smrg * State for executing ATI fragment shader.
34848b8605Smrg */
35848b8605Smrgstruct atifs_machine
36848b8605Smrg{
37848b8605Smrg   GLfloat Registers[6][4];         /** six temporary registers */
38848b8605Smrg   GLfloat PrevPassRegisters[6][4];
39848b8605Smrg   GLfloat Inputs[2][4];   /** Primary, secondary input colors */
40848b8605Smrg};
41848b8605Smrg
42848b8605Smrg
43848b8605Smrg
44848b8605Smrg/**
45848b8605Smrg * Fetch a texel.
46848b8605Smrg */
47848b8605Smrgstatic void
48848b8605Smrgfetch_texel(struct gl_context * ctx, const GLfloat texcoord[4], GLfloat lambda,
49848b8605Smrg	    GLuint unit, GLfloat color[4])
50848b8605Smrg{
51848b8605Smrg   SWcontext *swrast = SWRAST_CONTEXT(ctx);
52848b8605Smrg
53848b8605Smrg   /* XXX use a float-valued TextureSample routine here!!! */
54848b8605Smrg   swrast->TextureSample[unit](ctx, _mesa_get_samplerobj(ctx, unit),
55848b8605Smrg                               ctx->Texture.Unit[unit]._Current,
56848b8605Smrg			       1, (const GLfloat(*)[4]) texcoord,
57848b8605Smrg                               &lambda, (GLfloat (*)[4]) color);
58848b8605Smrg}
59848b8605Smrg
60848b8605Smrgstatic void
61848b8605Smrgapply_swizzle(GLfloat values[4], GLuint swizzle)
62848b8605Smrg{
63848b8605Smrg   GLfloat s, t, r, q;
64848b8605Smrg
65848b8605Smrg   s = values[0];
66848b8605Smrg   t = values[1];
67848b8605Smrg   r = values[2];
68848b8605Smrg   q = values[3];
69848b8605Smrg
70848b8605Smrg   switch (swizzle) {
71848b8605Smrg   case GL_SWIZZLE_STR_ATI:
72848b8605Smrg      values[0] = s;
73848b8605Smrg      values[1] = t;
74848b8605Smrg      values[2] = r;
75848b8605Smrg      break;
76848b8605Smrg   case GL_SWIZZLE_STQ_ATI:
77848b8605Smrg      values[0] = s;
78848b8605Smrg      values[1] = t;
79848b8605Smrg      values[2] = q;
80848b8605Smrg      break;
81848b8605Smrg   case GL_SWIZZLE_STR_DR_ATI:
82848b8605Smrg      values[0] = s / r;
83848b8605Smrg      values[1] = t / r;
84848b8605Smrg      values[2] = 1 / r;
85848b8605Smrg      break;
86848b8605Smrg   case GL_SWIZZLE_STQ_DQ_ATI:
87848b8605Smrg/* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
88848b8605Smrg      if (q == 0.0F)
89848b8605Smrg         q = 0.000000001F;
90848b8605Smrg      values[0] = s / q;
91848b8605Smrg      values[1] = t / q;
92848b8605Smrg      values[2] = 1.0F / q;
93848b8605Smrg      break;
94848b8605Smrg   }
95848b8605Smrg   values[3] = 0.0;
96848b8605Smrg}
97848b8605Smrg
98848b8605Smrgstatic void
99848b8605Smrgapply_src_rep(GLint optype, GLuint rep, GLfloat * val)
100848b8605Smrg{
101848b8605Smrg   GLint i;
102848b8605Smrg   GLint start, end;
103848b8605Smrg   if (!rep)
104848b8605Smrg      return;
105848b8605Smrg
106848b8605Smrg   start = optype ? 3 : 0;
107848b8605Smrg   end = 4;
108848b8605Smrg
109848b8605Smrg   for (i = start; i < end; i++) {
110848b8605Smrg      switch (rep) {
111848b8605Smrg      case GL_RED:
112848b8605Smrg	 val[i] = val[0];
113848b8605Smrg	 break;
114848b8605Smrg      case GL_GREEN:
115848b8605Smrg	 val[i] = val[1];
116848b8605Smrg	 break;
117848b8605Smrg      case GL_BLUE:
118848b8605Smrg	 val[i] = val[2];
119848b8605Smrg	 break;
120848b8605Smrg      case GL_ALPHA:
121848b8605Smrg	 val[i] = val[3];
122848b8605Smrg	 break;
123848b8605Smrg      }
124848b8605Smrg   }
125848b8605Smrg}
126848b8605Smrg
127848b8605Smrgstatic void
128848b8605Smrgapply_src_mod(GLint optype, GLuint mod, GLfloat * val)
129848b8605Smrg{
130848b8605Smrg   GLint i;
131848b8605Smrg   GLint start, end;
132848b8605Smrg
133848b8605Smrg   if (!mod)
134848b8605Smrg      return;
135848b8605Smrg
136848b8605Smrg   start = optype ? 3 : 0;
137848b8605Smrg   end = 4;
138848b8605Smrg
139848b8605Smrg   for (i = start; i < end; i++) {
140848b8605Smrg      if (mod & GL_COMP_BIT_ATI)
141848b8605Smrg	 val[i] = 1 - val[i];
142848b8605Smrg
143848b8605Smrg      if (mod & GL_BIAS_BIT_ATI)
144848b8605Smrg	 val[i] = val[i] - 0.5F;
145848b8605Smrg
146848b8605Smrg      if (mod & GL_2X_BIT_ATI)
147848b8605Smrg	 val[i] = 2 * val[i];
148848b8605Smrg
149848b8605Smrg      if (mod & GL_NEGATE_BIT_ATI)
150848b8605Smrg	 val[i] = -val[i];
151848b8605Smrg   }
152848b8605Smrg}
153848b8605Smrg
154848b8605Smrgstatic void
155848b8605Smrgapply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
156848b8605Smrg{
157848b8605Smrg   GLint i;
158848b8605Smrg   GLint has_sat = mod & GL_SATURATE_BIT_ATI;
159848b8605Smrg   GLint start, end;
160848b8605Smrg
161848b8605Smrg   mod &= ~GL_SATURATE_BIT_ATI;
162848b8605Smrg
163848b8605Smrg   start = optype ? 3 : 0;
164848b8605Smrg   end = optype ? 4 : 3;
165848b8605Smrg
166848b8605Smrg   for (i = start; i < end; i++) {
167848b8605Smrg      switch (mod) {
168848b8605Smrg      case GL_2X_BIT_ATI:
169848b8605Smrg	 val[i] = 2 * val[i];
170848b8605Smrg	 break;
171848b8605Smrg      case GL_4X_BIT_ATI:
172848b8605Smrg	 val[i] = 4 * val[i];
173848b8605Smrg	 break;
174848b8605Smrg      case GL_8X_BIT_ATI:
175848b8605Smrg	 val[i] = 8 * val[i];
176848b8605Smrg	 break;
177848b8605Smrg      case GL_HALF_BIT_ATI:
178848b8605Smrg	 val[i] = val[i] * 0.5F;
179848b8605Smrg	 break;
180848b8605Smrg      case GL_QUARTER_BIT_ATI:
181848b8605Smrg	 val[i] = val[i] * 0.25F;
182848b8605Smrg	 break;
183848b8605Smrg      case GL_EIGHTH_BIT_ATI:
184848b8605Smrg	 val[i] = val[i] * 0.125F;
185848b8605Smrg	 break;
186848b8605Smrg      }
187848b8605Smrg
188848b8605Smrg      if (has_sat) {
189848b8605Smrg	 if (val[i] < 0.0F)
190848b8605Smrg	    val[i] = 0.0F;
191848b8605Smrg	 else if (val[i] > 1.0F)
192848b8605Smrg	    val[i] = 1.0F;
193848b8605Smrg      }
194848b8605Smrg      else {
195848b8605Smrg	 if (val[i] < -8.0F)
196848b8605Smrg	    val[i] = -8.0F;
197848b8605Smrg	 else if (val[i] > 8.0F)
198848b8605Smrg	    val[i] = 8.0F;
199848b8605Smrg      }
200848b8605Smrg   }
201848b8605Smrg}
202848b8605Smrg
203848b8605Smrg
204848b8605Smrgstatic void
205848b8605Smrgwrite_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
206848b8605Smrg	       GLfloat * dst)
207848b8605Smrg{
208848b8605Smrg   GLint i;
209848b8605Smrg   apply_dst_mod(optype, mod, src);
210848b8605Smrg
211848b8605Smrg   if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
212848b8605Smrg      if (mask) {
213848b8605Smrg	 if (mask & GL_RED_BIT_ATI)
214848b8605Smrg	    dst[0] = src[0];
215848b8605Smrg
216848b8605Smrg	 if (mask & GL_GREEN_BIT_ATI)
217848b8605Smrg	    dst[1] = src[1];
218848b8605Smrg
219848b8605Smrg	 if (mask & GL_BLUE_BIT_ATI)
220848b8605Smrg	    dst[2] = src[2];
221848b8605Smrg      }
222848b8605Smrg      else {
223848b8605Smrg	 for (i = 0; i < 3; i++)
224848b8605Smrg	    dst[i] = src[i];
225848b8605Smrg      }
226848b8605Smrg   }
227848b8605Smrg   else
228848b8605Smrg      dst[3] = src[3];
229848b8605Smrg}
230848b8605Smrg
231848b8605Smrgstatic void
232848b8605Smrgfinish_pass(struct atifs_machine *machine)
233848b8605Smrg{
234848b8605Smrg   GLint i;
235848b8605Smrg
236848b8605Smrg   for (i = 0; i < 6; i++) {
237848b8605Smrg      COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
238848b8605Smrg   }
239848b8605Smrg}
240848b8605Smrg
241848b8605Smrg
242848b8605Smrgstatic void
243848b8605Smrghandle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
244848b8605Smrg	       const SWspan *span, GLuint column, GLuint idx)
245848b8605Smrg{
246848b8605Smrg   GLuint swizzle = texinst->swizzle;
247848b8605Smrg   GLuint pass_tex = texinst->src;
248848b8605Smrg
249848b8605Smrg   if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
250848b8605Smrg      pass_tex -= GL_TEXTURE0_ARB;
251848b8605Smrg      COPY_4V(machine->Registers[idx],
252848b8605Smrg	      span->array->attribs[VARYING_SLOT_TEX0 + pass_tex][column]);
253848b8605Smrg   }
254848b8605Smrg   else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
255848b8605Smrg      pass_tex -= GL_REG_0_ATI;
256848b8605Smrg      COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
257848b8605Smrg   }
258848b8605Smrg   apply_swizzle(machine->Registers[idx], swizzle);
259848b8605Smrg
260848b8605Smrg}
261848b8605Smrg
262848b8605Smrgstatic void
263848b8605Smrghandle_sample_op(struct gl_context * ctx, struct atifs_machine *machine,
264848b8605Smrg		 struct atifs_setupinst *texinst, const SWspan *span,
265848b8605Smrg		 GLuint column, GLuint idx)
266848b8605Smrg{
267848b8605Smrg/* sample from unit idx using texinst->src as coords */
268848b8605Smrg   GLuint swizzle = texinst->swizzle;
269848b8605Smrg   GLuint coord_source = texinst->src;
270848b8605Smrg   GLfloat tex_coords[4] = { 0 };
271848b8605Smrg
272848b8605Smrg   if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
273848b8605Smrg      coord_source -= GL_TEXTURE0_ARB;
274848b8605Smrg      COPY_4V(tex_coords,
275848b8605Smrg              span->array->attribs[VARYING_SLOT_TEX0 + coord_source][column]);
276848b8605Smrg   }
277848b8605Smrg   else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
278848b8605Smrg      coord_source -= GL_REG_0_ATI;
279848b8605Smrg      COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
280848b8605Smrg   }
281848b8605Smrg   apply_swizzle(tex_coords, swizzle);
282848b8605Smrg   fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
283848b8605Smrg}
284848b8605Smrg
285848b8605Smrg#define SETUP_SRC_REG(optype, i, x)		\
286848b8605Smrgdo {						\
287848b8605Smrg   COPY_4V(src[optype][i], x); 			\
288848b8605Smrg} while (0)
289848b8605Smrg
290848b8605Smrg
291848b8605Smrg
292848b8605Smrg/**
293848b8605Smrg * Execute the given fragment shader.
294848b8605Smrg * NOTE: we do everything in single-precision floating point
295848b8605Smrg * \param ctx - rendering context
296848b8605Smrg * \param shader - the shader to execute
297848b8605Smrg * \param machine - virtual machine state
298848b8605Smrg * \param span - the SWspan we're operating on
299848b8605Smrg * \param column - which pixel [i] we're operating on in the span
300848b8605Smrg */
301848b8605Smrgstatic void
302848b8605Smrgexecute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader,
303848b8605Smrg	       struct atifs_machine *machine, const SWspan *span,
304848b8605Smrg               GLuint column)
305848b8605Smrg{
306848b8605Smrg   GLuint pc;
307848b8605Smrg   struct atifs_instruction *inst;
308848b8605Smrg   struct atifs_setupinst *texinst;
309848b8605Smrg   GLint optype;
310848b8605Smrg   GLuint i;
311848b8605Smrg   GLint j, pass;
312848b8605Smrg   GLint dstreg;
313848b8605Smrg   GLfloat src[2][3][4];
314848b8605Smrg   GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
315848b8605Smrg   GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
316848b8605Smrg   GLfloat dst[2][4], *dstp;
317848b8605Smrg
318848b8605Smrg   for (pass = 0; pass < shader->NumPasses; pass++) {
319848b8605Smrg      if (pass > 0)
320848b8605Smrg	 finish_pass(machine);
321848b8605Smrg      for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
322848b8605Smrg	 texinst = &shader->SetupInst[pass][j];
323848b8605Smrg	 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
324848b8605Smrg	    handle_pass_op(machine, texinst, span, column, j);
325848b8605Smrg	 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
326848b8605Smrg	    handle_sample_op(ctx, machine, texinst, span, column, j);
327848b8605Smrg      }
328848b8605Smrg
329848b8605Smrg      for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
330848b8605Smrg	 inst = &shader->Instructions[pass][pc];
331848b8605Smrg
332848b8605Smrg	 /* setup the source registers for color and alpha ops */
333848b8605Smrg	 for (optype = 0; optype < 2; optype++) {
334848b8605Smrg 	    for (i = 0; i < inst->ArgCount[optype]; i++) {
335848b8605Smrg	       GLint index = inst->SrcReg[optype][i].Index;
336848b8605Smrg
337848b8605Smrg	       if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
338848b8605Smrg		  SETUP_SRC_REG(optype, i,
339848b8605Smrg				machine->Registers[index - GL_REG_0_ATI]);
340848b8605Smrg	       else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
341848b8605Smrg		  if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
342848b8605Smrg		     SETUP_SRC_REG(optype, i,
343848b8605Smrg				shader->Constants[index - GL_CON_0_ATI]);
344848b8605Smrg		  } else {
345848b8605Smrg		     SETUP_SRC_REG(optype, i,
346848b8605Smrg				ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
347848b8605Smrg		  }
348848b8605Smrg	       }
349848b8605Smrg	       else if (index == GL_ONE)
350848b8605Smrg		  SETUP_SRC_REG(optype, i, ones);
351848b8605Smrg	       else if (index == GL_ZERO)
352848b8605Smrg		  SETUP_SRC_REG(optype, i, zeros);
353848b8605Smrg	       else if (index == GL_PRIMARY_COLOR_EXT)
354848b8605Smrg		  SETUP_SRC_REG(optype, i,
355848b8605Smrg				machine->Inputs[ATI_FS_INPUT_PRIMARY]);
356848b8605Smrg	       else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
357848b8605Smrg		  SETUP_SRC_REG(optype, i,
358848b8605Smrg				machine->Inputs[ATI_FS_INPUT_SECONDARY]);
359848b8605Smrg
360848b8605Smrg	       apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
361848b8605Smrg			     src[optype][i]);
362848b8605Smrg	       apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
363848b8605Smrg			     src[optype][i]);
364848b8605Smrg	    }
365848b8605Smrg	 }
366848b8605Smrg
367848b8605Smrg	 /* Execute the operations - color then alpha */
368848b8605Smrg	 for (optype = 0; optype < 2; optype++) {
369848b8605Smrg	    if (inst->Opcode[optype]) {
370848b8605Smrg	       switch (inst->Opcode[optype]) {
371848b8605Smrg	       case GL_ADD_ATI:
372848b8605Smrg		  if (!optype)
373848b8605Smrg		     for (i = 0; i < 3; i++) {
374848b8605Smrg			dst[optype][i] =
375848b8605Smrg			   src[optype][0][i] + src[optype][1][i];
376848b8605Smrg		     }
377848b8605Smrg		  else
378848b8605Smrg		     dst[optype][3] = src[optype][0][3] + src[optype][1][3];
379848b8605Smrg		  break;
380848b8605Smrg	       case GL_SUB_ATI:
381848b8605Smrg		  if (!optype)
382848b8605Smrg		     for (i = 0; i < 3; i++) {
383848b8605Smrg			dst[optype][i] =
384848b8605Smrg			   src[optype][0][i] - src[optype][1][i];
385848b8605Smrg		     }
386848b8605Smrg		  else
387848b8605Smrg		     dst[optype][3] = src[optype][0][3] - src[optype][1][3];
388848b8605Smrg		  break;
389848b8605Smrg	       case GL_MUL_ATI:
390848b8605Smrg		  if (!optype)
391848b8605Smrg		     for (i = 0; i < 3; i++) {
392848b8605Smrg			dst[optype][i] =
393848b8605Smrg			   src[optype][0][i] * src[optype][1][i];
394848b8605Smrg		     }
395848b8605Smrg		  else
396848b8605Smrg		     dst[optype][3] = src[optype][0][3] * src[optype][1][3];
397848b8605Smrg		  break;
398848b8605Smrg	       case GL_MAD_ATI:
399848b8605Smrg		  if (!optype)
400848b8605Smrg		     for (i = 0; i < 3; i++) {
401848b8605Smrg			dst[optype][i] =
402848b8605Smrg			   src[optype][0][i] * src[optype][1][i] +
403848b8605Smrg			   src[optype][2][i];
404848b8605Smrg		     }
405848b8605Smrg		  else
406848b8605Smrg		     dst[optype][3] =
407848b8605Smrg			src[optype][0][3] * src[optype][1][3] +
408848b8605Smrg			src[optype][2][3];
409848b8605Smrg		  break;
410848b8605Smrg	       case GL_LERP_ATI:
411848b8605Smrg		  if (!optype)
412848b8605Smrg		     for (i = 0; i < 3; i++) {
413848b8605Smrg			dst[optype][i] =
414848b8605Smrg			   src[optype][0][i] * src[optype][1][i] + (1 -
415848b8605Smrg								    src
416848b8605Smrg								    [optype]
417848b8605Smrg								    [0][i]) *
418848b8605Smrg			   src[optype][2][i];
419848b8605Smrg		     }
420848b8605Smrg		  else
421848b8605Smrg		     dst[optype][3] =
422848b8605Smrg			src[optype][0][3] * src[optype][1][3] + (1 -
423848b8605Smrg								 src[optype]
424848b8605Smrg								 [0][3]) *
425848b8605Smrg			src[optype][2][3];
426848b8605Smrg		  break;
427848b8605Smrg
428848b8605Smrg	       case GL_MOV_ATI:
429848b8605Smrg		  if (!optype)
430848b8605Smrg		     for (i = 0; i < 3; i++) {
431848b8605Smrg			dst[optype][i] = src[optype][0][i];
432848b8605Smrg		     }
433848b8605Smrg		  else
434848b8605Smrg		     dst[optype][3] = src[optype][0][3];
435848b8605Smrg		  break;
436848b8605Smrg	       case GL_CND_ATI:
437848b8605Smrg		  if (!optype) {
438848b8605Smrg		     for (i = 0; i < 3; i++) {
439848b8605Smrg			dst[optype][i] =
440848b8605Smrg			   (src[optype][2][i] >
441b8e80941Smrg			    0.5F) ? src[optype][0][i] : src[optype][1][i];
442848b8605Smrg		     }
443848b8605Smrg		  }
444848b8605Smrg		  else {
445848b8605Smrg		     dst[optype][3] =
446848b8605Smrg			(src[optype][2][3] >
447b8e80941Smrg			 0.5F) ? src[optype][0][3] : src[optype][1][3];
448848b8605Smrg		  }
449848b8605Smrg		  break;
450848b8605Smrg
451848b8605Smrg	       case GL_CND0_ATI:
452848b8605Smrg		  if (!optype)
453848b8605Smrg		     for (i = 0; i < 3; i++) {
454848b8605Smrg			dst[optype][i] =
455848b8605Smrg			   (src[optype][2][i] >=
456848b8605Smrg			    0) ? src[optype][0][i] : src[optype][1][i];
457848b8605Smrg		     }
458848b8605Smrg		  else {
459848b8605Smrg		     dst[optype][3] =
460848b8605Smrg			(src[optype][2][3] >=
461848b8605Smrg			 0) ? src[optype][0][3] : src[optype][1][3];
462848b8605Smrg		  }
463848b8605Smrg		  break;
464848b8605Smrg	       case GL_DOT2_ADD_ATI:
465848b8605Smrg		  {
466848b8605Smrg		     GLfloat result;
467848b8605Smrg
468848b8605Smrg		     /* DOT 2 always uses the source from the color op */
469848b8605Smrg		     /* could save recalculation of dot products for alpha inst */
470848b8605Smrg		     result = src[0][0][0] * src[0][1][0] +
471848b8605Smrg			src[0][0][1] * src[0][1][1] + src[0][2][2];
472848b8605Smrg		     if (!optype) {
473848b8605Smrg			for (i = 0; i < 3; i++) {
474848b8605Smrg			   dst[optype][i] = result;
475848b8605Smrg			}
476848b8605Smrg		     }
477848b8605Smrg		     else
478848b8605Smrg			dst[optype][3] = result;
479848b8605Smrg		  }
480848b8605Smrg		  break;
481848b8605Smrg	       case GL_DOT3_ATI:
482848b8605Smrg		  {
483848b8605Smrg		     GLfloat result;
484848b8605Smrg
485848b8605Smrg		     /* DOT 3 always uses the source from the color op */
486848b8605Smrg		     result = src[0][0][0] * src[0][1][0] +
487848b8605Smrg			src[0][0][1] * src[0][1][1] +
488848b8605Smrg			src[0][0][2] * src[0][1][2];
489848b8605Smrg
490848b8605Smrg		     if (!optype) {
491848b8605Smrg			for (i = 0; i < 3; i++) {
492848b8605Smrg			   dst[optype][i] = result;
493848b8605Smrg			}
494848b8605Smrg		     }
495848b8605Smrg		     else
496848b8605Smrg			dst[optype][3] = result;
497848b8605Smrg		  }
498848b8605Smrg		  break;
499848b8605Smrg	       case GL_DOT4_ATI:
500848b8605Smrg		  {
501848b8605Smrg		     GLfloat result;
502848b8605Smrg
503848b8605Smrg		     /* DOT 4 always uses the source from the color op */
504848b8605Smrg		     result = src[0][0][0] * src[0][1][0] +
505848b8605Smrg			src[0][0][1] * src[0][1][1] +
506848b8605Smrg			src[0][0][2] * src[0][1][2] +
507848b8605Smrg			src[0][0][3] * src[0][1][3];
508848b8605Smrg		     if (!optype) {
509848b8605Smrg			for (i = 0; i < 3; i++) {
510848b8605Smrg			   dst[optype][i] = result;
511848b8605Smrg			}
512848b8605Smrg		     }
513848b8605Smrg		     else
514848b8605Smrg			dst[optype][3] = result;
515848b8605Smrg		  }
516848b8605Smrg		  break;
517848b8605Smrg
518848b8605Smrg	       }
519848b8605Smrg	    }
520848b8605Smrg	 }
521848b8605Smrg
522848b8605Smrg	 /* write out the destination registers */
523848b8605Smrg	 for (optype = 0; optype < 2; optype++) {
524848b8605Smrg	    if (inst->Opcode[optype]) {
525848b8605Smrg	       dstreg = inst->DstReg[optype].Index;
526848b8605Smrg	       dstp = machine->Registers[dstreg - GL_REG_0_ATI];
527848b8605Smrg
528848b8605Smrg	       if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
529848b8605Smrg		  (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
530848b8605Smrg	          write_dst_addr(optype, inst->DstReg[optype].dstMod,
531848b8605Smrg			      inst->DstReg[optype].dstMask, dst[optype],
532848b8605Smrg			      dstp);
533848b8605Smrg	       else
534848b8605Smrg		  write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
535848b8605Smrg	    }
536848b8605Smrg	 }
537848b8605Smrg      }
538848b8605Smrg   }
539848b8605Smrg}
540848b8605Smrg
541848b8605Smrg
542848b8605Smrg/**
543848b8605Smrg * Init fragment shader virtual machine state.
544848b8605Smrg */
545848b8605Smrgstatic void
546848b8605Smrginit_machine(struct gl_context * ctx, struct atifs_machine *machine,
547848b8605Smrg	     const struct ati_fragment_shader *shader,
548848b8605Smrg	     const SWspan *span, GLuint col)
549848b8605Smrg{
550848b8605Smrg   GLfloat (*inputs)[4] = machine->Inputs;
551848b8605Smrg   GLint i, j;
552848b8605Smrg
553848b8605Smrg   for (i = 0; i < 6; i++) {
554848b8605Smrg      for (j = 0; j < 4; j++)
555848b8605Smrg	 machine->Registers[i][j] = 0.0;
556848b8605Smrg   }
557848b8605Smrg
558848b8605Smrg   COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[VARYING_SLOT_COL0][col]);
559848b8605Smrg   COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[VARYING_SLOT_COL1][col]);
560848b8605Smrg}
561848b8605Smrg
562848b8605Smrg
563848b8605Smrg
564848b8605Smrg/**
565848b8605Smrg * Execute the current ATI shader program, operating on the given span.
566848b8605Smrg */
567848b8605Smrgvoid
568848b8605Smrg_swrast_exec_fragment_shader(struct gl_context * ctx, SWspan *span)
569848b8605Smrg{
570848b8605Smrg   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
571848b8605Smrg   struct atifs_machine machine;
572848b8605Smrg   GLuint i;
573848b8605Smrg
574848b8605Smrg   /* incoming colors should be floats */
575b8e80941Smrg   assert(span->array->ChanType == GL_FLOAT);
576848b8605Smrg
577848b8605Smrg   for (i = 0; i < span->end; i++) {
578848b8605Smrg      if (span->array->mask[i]) {
579848b8605Smrg	 init_machine(ctx, &machine, shader, span, i);
580848b8605Smrg
581848b8605Smrg	 execute_shader(ctx, shader, &machine, span, i);
582848b8605Smrg
583848b8605Smrg         /* store result color */
584848b8605Smrg	 {
585848b8605Smrg	    const GLfloat *colOut = machine.Registers[0];
586848b8605Smrg            /*fprintf(stderr,"outputs %f %f %f %f\n",
587848b8605Smrg              colOut[0], colOut[1], colOut[2], colOut[3]); */
588848b8605Smrg            COPY_4V(span->array->attribs[VARYING_SLOT_COL0][i], colOut);
589848b8605Smrg	 }
590848b8605Smrg      }
591848b8605Smrg   }
592848b8605Smrg}
593