1/*
2 * Copyright (C) 2004  David Airlie   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22#include "main/glheader.h"
23#include "main/macros.h"
24#include "main/atifragshader.h"
25#include "main/samplerobj.h"
26#include "swrast/s_atifragshader.h"
27#include "swrast/s_context.h"
28
29#define ATI_FS_INPUT_PRIMARY 0
30#define ATI_FS_INPUT_SECONDARY 1
31
32/**
33 * State for executing ATI fragment shader.
34 */
35struct atifs_machine
36{
37   GLfloat Registers[6][4];         /** six temporary registers */
38   GLfloat PrevPassRegisters[6][4];
39   GLfloat Inputs[2][4];   /** Primary, secondary input colors */
40};
41
42
43
44/**
45 * Fetch a texel.
46 */
47static void
48fetch_texel(struct gl_context * ctx, const GLfloat texcoord[4], GLfloat lambda,
49	    GLuint unit, GLfloat color[4])
50{
51   SWcontext *swrast = SWRAST_CONTEXT(ctx);
52
53   /* XXX use a float-valued TextureSample routine here!!! */
54   swrast->TextureSample[unit](ctx, _mesa_get_samplerobj(ctx, unit),
55                               ctx->Texture.Unit[unit]._Current,
56			       1, (const GLfloat(*)[4]) texcoord,
57                               &lambda, (GLfloat (*)[4]) color);
58}
59
60static void
61apply_swizzle(GLfloat values[4], GLuint swizzle)
62{
63   GLfloat s, t, r, q;
64
65   s = values[0];
66   t = values[1];
67   r = values[2];
68   q = values[3];
69
70   switch (swizzle) {
71   case GL_SWIZZLE_STR_ATI:
72      values[0] = s;
73      values[1] = t;
74      values[2] = r;
75      break;
76   case GL_SWIZZLE_STQ_ATI:
77      values[0] = s;
78      values[1] = t;
79      values[2] = q;
80      break;
81   case GL_SWIZZLE_STR_DR_ATI:
82      values[0] = s / r;
83      values[1] = t / r;
84      values[2] = 1 / r;
85      break;
86   case GL_SWIZZLE_STQ_DQ_ATI:
87/* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
88      if (q == 0.0F)
89         q = 0.000000001F;
90      values[0] = s / q;
91      values[1] = t / q;
92      values[2] = 1.0F / q;
93      break;
94   }
95   values[3] = 0.0;
96}
97
98static void
99apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
100{
101   GLint i;
102   GLint start, end;
103   if (!rep)
104      return;
105
106   start = optype ? 3 : 0;
107   end = 4;
108
109   for (i = start; i < end; i++) {
110      switch (rep) {
111      case GL_RED:
112	 val[i] = val[0];
113	 break;
114      case GL_GREEN:
115	 val[i] = val[1];
116	 break;
117      case GL_BLUE:
118	 val[i] = val[2];
119	 break;
120      case GL_ALPHA:
121	 val[i] = val[3];
122	 break;
123      }
124   }
125}
126
127static void
128apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
129{
130   GLint i;
131   GLint start, end;
132
133   if (!mod)
134      return;
135
136   start = optype ? 3 : 0;
137   end = 4;
138
139   for (i = start; i < end; i++) {
140      if (mod & GL_COMP_BIT_ATI)
141	 val[i] = 1 - val[i];
142
143      if (mod & GL_BIAS_BIT_ATI)
144	 val[i] = val[i] - 0.5F;
145
146      if (mod & GL_2X_BIT_ATI)
147	 val[i] = 2 * val[i];
148
149      if (mod & GL_NEGATE_BIT_ATI)
150	 val[i] = -val[i];
151   }
152}
153
154static void
155apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
156{
157   GLint i;
158   GLint has_sat = mod & GL_SATURATE_BIT_ATI;
159   GLint start, end;
160
161   mod &= ~GL_SATURATE_BIT_ATI;
162
163   start = optype ? 3 : 0;
164   end = optype ? 4 : 3;
165
166   for (i = start; i < end; i++) {
167      switch (mod) {
168      case GL_2X_BIT_ATI:
169	 val[i] = 2 * val[i];
170	 break;
171      case GL_4X_BIT_ATI:
172	 val[i] = 4 * val[i];
173	 break;
174      case GL_8X_BIT_ATI:
175	 val[i] = 8 * val[i];
176	 break;
177      case GL_HALF_BIT_ATI:
178	 val[i] = val[i] * 0.5F;
179	 break;
180      case GL_QUARTER_BIT_ATI:
181	 val[i] = val[i] * 0.25F;
182	 break;
183      case GL_EIGHTH_BIT_ATI:
184	 val[i] = val[i] * 0.125F;
185	 break;
186      }
187
188      if (has_sat) {
189	 if (val[i] < 0.0F)
190	    val[i] = 0.0F;
191	 else if (val[i] > 1.0F)
192	    val[i] = 1.0F;
193      }
194      else {
195	 if (val[i] < -8.0F)
196	    val[i] = -8.0F;
197	 else if (val[i] > 8.0F)
198	    val[i] = 8.0F;
199      }
200   }
201}
202
203
204static void
205write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
206	       GLfloat * dst)
207{
208   GLint i;
209   apply_dst_mod(optype, mod, src);
210
211   if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
212      if (mask) {
213	 if (mask & GL_RED_BIT_ATI)
214	    dst[0] = src[0];
215
216	 if (mask & GL_GREEN_BIT_ATI)
217	    dst[1] = src[1];
218
219	 if (mask & GL_BLUE_BIT_ATI)
220	    dst[2] = src[2];
221      }
222      else {
223	 for (i = 0; i < 3; i++)
224	    dst[i] = src[i];
225      }
226   }
227   else
228      dst[3] = src[3];
229}
230
231static void
232finish_pass(struct atifs_machine *machine)
233{
234   GLint i;
235
236   for (i = 0; i < 6; i++) {
237      COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
238   }
239}
240
241
242static void
243handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
244	       const SWspan *span, GLuint column, GLuint idx)
245{
246   GLuint swizzle = texinst->swizzle;
247   GLuint pass_tex = texinst->src;
248
249   if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
250      pass_tex -= GL_TEXTURE0_ARB;
251      COPY_4V(machine->Registers[idx],
252	      span->array->attribs[VARYING_SLOT_TEX0 + pass_tex][column]);
253   }
254   else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
255      pass_tex -= GL_REG_0_ATI;
256      COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
257   }
258   apply_swizzle(machine->Registers[idx], swizzle);
259
260}
261
262static void
263handle_sample_op(struct gl_context * ctx, struct atifs_machine *machine,
264		 struct atifs_setupinst *texinst, const SWspan *span,
265		 GLuint column, GLuint idx)
266{
267/* sample from unit idx using texinst->src as coords */
268   GLuint swizzle = texinst->swizzle;
269   GLuint coord_source = texinst->src;
270   GLfloat tex_coords[4] = { 0 };
271
272   if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
273      coord_source -= GL_TEXTURE0_ARB;
274      COPY_4V(tex_coords,
275              span->array->attribs[VARYING_SLOT_TEX0 + coord_source][column]);
276   }
277   else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
278      coord_source -= GL_REG_0_ATI;
279      COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
280   }
281   apply_swizzle(tex_coords, swizzle);
282   fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
283}
284
285#define SETUP_SRC_REG(optype, i, x)		\
286do {						\
287   COPY_4V(src[optype][i], x); 			\
288} while (0)
289
290
291
292/**
293 * Execute the given fragment shader.
294 * NOTE: we do everything in single-precision floating point
295 * \param ctx - rendering context
296 * \param shader - the shader to execute
297 * \param machine - virtual machine state
298 * \param span - the SWspan we're operating on
299 * \param column - which pixel [i] we're operating on in the span
300 */
301static void
302execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader,
303	       struct atifs_machine *machine, const SWspan *span,
304               GLuint column)
305{
306   GLuint pc;
307   struct atifs_instruction *inst;
308   struct atifs_setupinst *texinst;
309   GLint optype;
310   GLuint i;
311   GLint j, pass;
312   GLint dstreg;
313   GLfloat src[2][3][4];
314   GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
315   GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
316   GLfloat dst[2][4], *dstp;
317
318   for (pass = 0; pass < shader->NumPasses; pass++) {
319      if (pass > 0)
320	 finish_pass(machine);
321      for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
322	 texinst = &shader->SetupInst[pass][j];
323	 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
324	    handle_pass_op(machine, texinst, span, column, j);
325	 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
326	    handle_sample_op(ctx, machine, texinst, span, column, j);
327      }
328
329      for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
330	 inst = &shader->Instructions[pass][pc];
331
332	 /* setup the source registers for color and alpha ops */
333	 for (optype = 0; optype < 2; optype++) {
334 	    for (i = 0; i < inst->ArgCount[optype]; i++) {
335	       GLint index = inst->SrcReg[optype][i].Index;
336
337	       if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
338		  SETUP_SRC_REG(optype, i,
339				machine->Registers[index - GL_REG_0_ATI]);
340	       else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
341		  if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
342		     SETUP_SRC_REG(optype, i,
343				shader->Constants[index - GL_CON_0_ATI]);
344		  } else {
345		     SETUP_SRC_REG(optype, i,
346				ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
347		  }
348	       }
349	       else if (index == GL_ONE)
350		  SETUP_SRC_REG(optype, i, ones);
351	       else if (index == GL_ZERO)
352		  SETUP_SRC_REG(optype, i, zeros);
353	       else if (index == GL_PRIMARY_COLOR_EXT)
354		  SETUP_SRC_REG(optype, i,
355				machine->Inputs[ATI_FS_INPUT_PRIMARY]);
356	       else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
357		  SETUP_SRC_REG(optype, i,
358				machine->Inputs[ATI_FS_INPUT_SECONDARY]);
359
360	       apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
361			     src[optype][i]);
362	       apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
363			     src[optype][i]);
364	    }
365	 }
366
367	 /* Execute the operations - color then alpha */
368	 for (optype = 0; optype < 2; optype++) {
369	    if (inst->Opcode[optype]) {
370	       switch (inst->Opcode[optype]) {
371	       case GL_ADD_ATI:
372		  if (!optype)
373		     for (i = 0; i < 3; i++) {
374			dst[optype][i] =
375			   src[optype][0][i] + src[optype][1][i];
376		     }
377		  else
378		     dst[optype][3] = src[optype][0][3] + src[optype][1][3];
379		  break;
380	       case GL_SUB_ATI:
381		  if (!optype)
382		     for (i = 0; i < 3; i++) {
383			dst[optype][i] =
384			   src[optype][0][i] - src[optype][1][i];
385		     }
386		  else
387		     dst[optype][3] = src[optype][0][3] - src[optype][1][3];
388		  break;
389	       case GL_MUL_ATI:
390		  if (!optype)
391		     for (i = 0; i < 3; i++) {
392			dst[optype][i] =
393			   src[optype][0][i] * src[optype][1][i];
394		     }
395		  else
396		     dst[optype][3] = src[optype][0][3] * src[optype][1][3];
397		  break;
398	       case GL_MAD_ATI:
399		  if (!optype)
400		     for (i = 0; i < 3; i++) {
401			dst[optype][i] =
402			   src[optype][0][i] * src[optype][1][i] +
403			   src[optype][2][i];
404		     }
405		  else
406		     dst[optype][3] =
407			src[optype][0][3] * src[optype][1][3] +
408			src[optype][2][3];
409		  break;
410	       case GL_LERP_ATI:
411		  if (!optype)
412		     for (i = 0; i < 3; i++) {
413			dst[optype][i] =
414			   src[optype][0][i] * src[optype][1][i] + (1 -
415								    src
416								    [optype]
417								    [0][i]) *
418			   src[optype][2][i];
419		     }
420		  else
421		     dst[optype][3] =
422			src[optype][0][3] * src[optype][1][3] + (1 -
423								 src[optype]
424								 [0][3]) *
425			src[optype][2][3];
426		  break;
427
428	       case GL_MOV_ATI:
429		  if (!optype)
430		     for (i = 0; i < 3; i++) {
431			dst[optype][i] = src[optype][0][i];
432		     }
433		  else
434		     dst[optype][3] = src[optype][0][3];
435		  break;
436	       case GL_CND_ATI:
437		  if (!optype) {
438		     for (i = 0; i < 3; i++) {
439			dst[optype][i] =
440			   (src[optype][2][i] >
441			    0.5F) ? src[optype][0][i] : src[optype][1][i];
442		     }
443		  }
444		  else {
445		     dst[optype][3] =
446			(src[optype][2][3] >
447			 0.5F) ? src[optype][0][3] : src[optype][1][3];
448		  }
449		  break;
450
451	       case GL_CND0_ATI:
452		  if (!optype)
453		     for (i = 0; i < 3; i++) {
454			dst[optype][i] =
455			   (src[optype][2][i] >=
456			    0) ? src[optype][0][i] : src[optype][1][i];
457		     }
458		  else {
459		     dst[optype][3] =
460			(src[optype][2][3] >=
461			 0) ? src[optype][0][3] : src[optype][1][3];
462		  }
463		  break;
464	       case GL_DOT2_ADD_ATI:
465		  {
466		     GLfloat result;
467
468		     /* DOT 2 always uses the source from the color op */
469		     /* could save recalculation of dot products for alpha inst */
470		     result = src[0][0][0] * src[0][1][0] +
471			src[0][0][1] * src[0][1][1] + src[0][2][2];
472		     if (!optype) {
473			for (i = 0; i < 3; i++) {
474			   dst[optype][i] = result;
475			}
476		     }
477		     else
478			dst[optype][3] = result;
479		  }
480		  break;
481	       case GL_DOT3_ATI:
482		  {
483		     GLfloat result;
484
485		     /* DOT 3 always uses the source from the color op */
486		     result = src[0][0][0] * src[0][1][0] +
487			src[0][0][1] * src[0][1][1] +
488			src[0][0][2] * src[0][1][2];
489
490		     if (!optype) {
491			for (i = 0; i < 3; i++) {
492			   dst[optype][i] = result;
493			}
494		     }
495		     else
496			dst[optype][3] = result;
497		  }
498		  break;
499	       case GL_DOT4_ATI:
500		  {
501		     GLfloat result;
502
503		     /* DOT 4 always uses the source from the color op */
504		     result = src[0][0][0] * src[0][1][0] +
505			src[0][0][1] * src[0][1][1] +
506			src[0][0][2] * src[0][1][2] +
507			src[0][0][3] * src[0][1][3];
508		     if (!optype) {
509			for (i = 0; i < 3; i++) {
510			   dst[optype][i] = result;
511			}
512		     }
513		     else
514			dst[optype][3] = result;
515		  }
516		  break;
517
518	       }
519	    }
520	 }
521
522	 /* write out the destination registers */
523	 for (optype = 0; optype < 2; optype++) {
524	    if (inst->Opcode[optype]) {
525	       dstreg = inst->DstReg[optype].Index;
526	       dstp = machine->Registers[dstreg - GL_REG_0_ATI];
527
528	       if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
529		  (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
530	          write_dst_addr(optype, inst->DstReg[optype].dstMod,
531			      inst->DstReg[optype].dstMask, dst[optype],
532			      dstp);
533	       else
534		  write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
535	    }
536	 }
537      }
538   }
539}
540
541
542/**
543 * Init fragment shader virtual machine state.
544 */
545static void
546init_machine(struct gl_context * ctx, struct atifs_machine *machine,
547	     const struct ati_fragment_shader *shader,
548	     const SWspan *span, GLuint col)
549{
550   GLfloat (*inputs)[4] = machine->Inputs;
551   GLint i, j;
552
553   for (i = 0; i < 6; i++) {
554      for (j = 0; j < 4; j++)
555	 machine->Registers[i][j] = 0.0;
556   }
557
558   COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[VARYING_SLOT_COL0][col]);
559   COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[VARYING_SLOT_COL1][col]);
560}
561
562
563
564/**
565 * Execute the current ATI shader program, operating on the given span.
566 */
567void
568_swrast_exec_fragment_shader(struct gl_context * ctx, SWspan *span)
569{
570   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
571   struct atifs_machine machine;
572   GLuint i;
573
574   /* incoming colors should be floats */
575   assert(span->array->ChanType == GL_FLOAT);
576
577   for (i = 0; i < span->end; i++) {
578      if (span->array->mask[i]) {
579	 init_machine(ctx, &machine, shader, span, i);
580
581	 execute_shader(ctx, shader, &machine, span, i);
582
583         /* store result color */
584	 {
585	    const GLfloat *colOut = machine.Registers[0];
586            /*fprintf(stderr,"outputs %f %f %f %f\n",
587              colOut[0], colOut[1], colOut[2], colOut[3]); */
588            COPY_4V(span->array->attribs[VARYING_SLOT_COL0][i], colOut);
589	 }
590      }
591   }
592}
593