1/**************************************************************************
2 *
3 * Copyright 2003 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "main/glheader.h"
29#include "main/macros.h"
30#include "main/enums.h"
31
32#include "program/prog_instruction.h"
33#include "program/prog_parameter.h"
34#include "program/program.h"
35#include "program/programopt.h"
36#include "program/prog_print.h"
37
38#include "tnl/tnl.h"
39#include "tnl/t_context.h"
40
41#include "intel_batchbuffer.h"
42
43#include "i915_reg.h"
44#include "i915_context.h"
45#include "i915_program.h"
46
47static const GLfloat sin_quad_constants[2][4] = {
48   {
49      2.0,
50      -1.0,
51      .5,
52      .75
53   },
54   {
55      4.0,
56      -4.0,
57      1.0 / (2.0 * M_PI),
58      .2225
59   }
60};
61
62static const GLfloat sin_constants[4] = { 1.0,
63   -1.0 / (3 * 2 * 1),
64   1.0 / (5 * 4 * 3 * 2 * 1),
65   -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1)
66};
67
68/* 1, -1/2!, 1/4!, -1/6! */
69static const GLfloat cos_constants[4] = { 1.0,
70   -1.0 / (2 * 1),
71   1.0 / (4 * 3 * 2 * 1),
72   -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
73};
74
75/* texcoord_mapping[unit] = index | TEXCOORD_{TEX,VAR} */
76#define TEXCOORD_TEX (0<<7)
77#define TEXCOORD_VAR (1<<7)
78
79static unsigned
80get_texcoord_mapping(struct i915_fragment_program *p, uint8_t texcoord)
81{
82   for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
83      if (p->texcoord_mapping[i] == texcoord)
84         return i;
85   }
86
87   /* blah */
88   return p->ctx->Const.MaxTextureCoordUnits - 1;
89}
90
91/**
92 * Retrieve a ureg for the given source register.  Will emit
93 * constants, apply swizzling and negation as needed.
94 */
95static GLuint
96src_vector(struct i915_fragment_program *p,
97           const struct prog_src_register *source,
98           const struct gl_program *program)
99{
100   GLuint src;
101   unsigned unit;
102
103   switch (source->File) {
104
105      /* Registers:
106       */
107   case PROGRAM_TEMPORARY:
108      if (source->Index >= I915_MAX_TEMPORARY) {
109         i915_program_error(p, "Exceeded max temporary reg: %d/%d",
110			    source->Index, I915_MAX_TEMPORARY);
111         return 0;
112      }
113      src = UREG(REG_TYPE_R, source->Index);
114      break;
115   case PROGRAM_INPUT:
116      switch (source->Index) {
117      case VARYING_SLOT_POS:
118         src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
119         break;
120      case VARYING_SLOT_COL0:
121         src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
122         break;
123      case VARYING_SLOT_COL1:
124         src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
125         src = swizzle(src, X, Y, Z, ONE);
126         break;
127      case VARYING_SLOT_FOGC:
128         src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
129         src = swizzle(src, W, ZERO, ZERO, ONE);
130         break;
131      case VARYING_SLOT_TEX0:
132      case VARYING_SLOT_TEX1:
133      case VARYING_SLOT_TEX2:
134      case VARYING_SLOT_TEX3:
135      case VARYING_SLOT_TEX4:
136      case VARYING_SLOT_TEX5:
137      case VARYING_SLOT_TEX6:
138      case VARYING_SLOT_TEX7:
139         unit = get_texcoord_mapping(p, (source->Index -
140                                         VARYING_SLOT_TEX0) | TEXCOORD_TEX);
141         src = i915_emit_decl(p, REG_TYPE_T,
142                              T_TEX0 + unit,
143                              D0_CHANNEL_ALL);
144	 break;
145
146      case VARYING_SLOT_VAR0:
147      case VARYING_SLOT_VAR0 + 1:
148      case VARYING_SLOT_VAR0 + 2:
149      case VARYING_SLOT_VAR0 + 3:
150      case VARYING_SLOT_VAR0 + 4:
151      case VARYING_SLOT_VAR0 + 5:
152      case VARYING_SLOT_VAR0 + 6:
153      case VARYING_SLOT_VAR0 + 7:
154         unit = get_texcoord_mapping(p, (source->Index -
155                                         VARYING_SLOT_VAR0) | TEXCOORD_VAR);
156         src = i915_emit_decl(p, REG_TYPE_T,
157                              T_TEX0 + unit,
158                              D0_CHANNEL_ALL);
159         break;
160
161      default:
162         i915_program_error(p, "Bad source->Index: %d", source->Index);
163         return 0;
164      }
165      break;
166
167   case PROGRAM_OUTPUT:
168      switch (source->Index) {
169      case FRAG_RESULT_COLOR:
170      case FRAG_RESULT_DATA0:
171	 src = UREG(REG_TYPE_OC, 0);
172	 break;
173      case FRAG_RESULT_DEPTH:
174	 src = UREG(REG_TYPE_OD, 0);
175	 break;
176      default:
177	 i915_program_error(p, "Bad source->Index: %d", source->Index);
178	 return 0;
179      }
180      break;
181
182      /* Various paramters and env values.  All emitted to
183       * hardware as program constants.
184       */
185   case PROGRAM_CONSTANT:
186   case PROGRAM_STATE_VAR:
187   case PROGRAM_UNIFORM: {
188      struct gl_program_parameter_list *params = program->Parameters;
189      unsigned offset = params->Parameters[source->Index].ValueOffset;
190      src = i915_emit_param4fv(p, &params->ParameterValues[offset].f);
191      break;
192   }
193   default:
194      i915_program_error(p, "Bad source->File: %d", source->File);
195      return 0;
196   }
197
198   src = swizzle(src,
199                 GET_SWZ(source->Swizzle, 0),
200                 GET_SWZ(source->Swizzle, 1),
201                 GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3));
202
203   if (source->Negate)
204      src = negate(src,
205                   GET_BIT(source->Negate, 0),
206                   GET_BIT(source->Negate, 1),
207                   GET_BIT(source->Negate, 2),
208                   GET_BIT(source->Negate, 3));
209
210   return src;
211}
212
213
214static GLuint
215get_result_vector(struct i915_fragment_program *p,
216                  const struct prog_instruction *inst)
217{
218   switch (inst->DstReg.File) {
219   case PROGRAM_OUTPUT:
220      switch (inst->DstReg.Index) {
221      case FRAG_RESULT_COLOR:
222      case FRAG_RESULT_DATA0:
223         return UREG(REG_TYPE_OC, 0);
224      case FRAG_RESULT_DEPTH:
225         p->depth_written = 1;
226         return UREG(REG_TYPE_OD, 0);
227      default:
228         i915_program_error(p, "Bad inst->DstReg.Index: %d",
229			    inst->DstReg.Index);
230         return 0;
231      }
232   case PROGRAM_TEMPORARY:
233      return UREG(REG_TYPE_R, inst->DstReg.Index);
234   default:
235      i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File);
236      return 0;
237   }
238}
239
240static GLuint
241get_result_flags(const struct prog_instruction *inst)
242{
243   GLuint flags = 0;
244
245   if (inst->Saturate)
246      flags |= A0_DEST_SATURATE;
247   if (inst->DstReg.WriteMask & WRITEMASK_X)
248      flags |= A0_DEST_CHANNEL_X;
249   if (inst->DstReg.WriteMask & WRITEMASK_Y)
250      flags |= A0_DEST_CHANNEL_Y;
251   if (inst->DstReg.WriteMask & WRITEMASK_Z)
252      flags |= A0_DEST_CHANNEL_Z;
253   if (inst->DstReg.WriteMask & WRITEMASK_W)
254      flags |= A0_DEST_CHANNEL_W;
255
256   return flags;
257}
258
259static GLuint
260translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit)
261{
262   switch (bit) {
263   case TEXTURE_1D_INDEX:
264      return D0_SAMPLE_TYPE_2D;
265   case TEXTURE_2D_INDEX:
266      return D0_SAMPLE_TYPE_2D;
267   case TEXTURE_RECT_INDEX:
268      return D0_SAMPLE_TYPE_2D;
269   case TEXTURE_3D_INDEX:
270      return D0_SAMPLE_TYPE_VOLUME;
271   case TEXTURE_CUBE_INDEX:
272      return D0_SAMPLE_TYPE_CUBE;
273   default:
274      i915_program_error(p, "TexSrcBit: %d", bit);
275      return 0;
276   }
277}
278
279#define EMIT_TEX( OP )						\
280do {								\
281   GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget );	\
282   const struct gl_program *program = &p->FragProg;	\
283   GLuint unit = program->SamplerUnits[inst->TexSrcUnit];	\
284   GLuint sampler = i915_emit_decl(p, REG_TYPE_S,		\
285				   unit, dim);			\
286   GLuint coord = src_vector( p, &inst->SrcReg[0], program);	\
287   /* Texel lookup */						\
288								\
289   i915_emit_texld( p, get_live_regs(p, inst),						\
290	       get_result_vector( p, inst ),			\
291	       get_result_flags( inst ),			\
292	       sampler,						\
293	       coord,						\
294	       OP);						\
295} while (0)
296
297#define EMIT_ARITH( OP, N )						\
298do {									\
299   i915_emit_arith( p,							\
300	       OP,							\
301	       get_result_vector( p, inst ), 				\
302	       get_result_flags( inst ), 0,			\
303	       (N<1)?0:src_vector( p, &inst->SrcReg[0], program),	\
304	       (N<2)?0:src_vector( p, &inst->SrcReg[1], program),	\
305	       (N<3)?0:src_vector( p, &inst->SrcReg[2], program));	\
306} while (0)
307
308#define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
309#define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
310#define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
311
312/*
313 * TODO: consider moving this into core
314 */
315static bool calc_live_regs( struct i915_fragment_program *p )
316{
317    const struct gl_program *program = &p->FragProg;
318    GLuint regsUsed = ~((1 << I915_MAX_TEMPORARY) - 1);
319    uint8_t live_components[I915_MAX_TEMPORARY] = { 0, };
320    GLint i;
321
322    for (i = program->arb.NumInstructions - 1; i >= 0; i--) {
323        struct prog_instruction *inst = &program->arb.Instructions[i];
324        int opArgs = _mesa_num_inst_src_regs(inst->Opcode);
325        int a;
326
327        /* Register is written to: unmark as live for this and preceeding ops */
328        if (inst->DstReg.File == PROGRAM_TEMPORARY) {
329	    if (inst->DstReg.Index >= I915_MAX_TEMPORARY)
330	       return false;
331
332            live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask;
333            if (live_components[inst->DstReg.Index] == 0)
334                regsUsed &= ~(1 << inst->DstReg.Index);
335        }
336
337        for (a = 0; a < opArgs; a++) {
338            /* Register is read from: mark as live for this and preceeding ops */
339            if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) {
340                unsigned c;
341
342		if (inst->SrcReg[a].Index >= I915_MAX_TEMPORARY)
343		   return false;
344
345                regsUsed |= 1 << inst->SrcReg[a].Index;
346
347                for (c = 0; c < 4; c++) {
348                    const unsigned field = GET_SWZ(inst->SrcReg[a].Swizzle, c);
349
350                    if (field <= SWIZZLE_W)
351                        live_components[inst->SrcReg[a].Index] |= (1U << field);
352                }
353            }
354        }
355
356        p->usedRegs[i] = regsUsed;
357    }
358
359    return true;
360}
361
362static GLuint get_live_regs( struct i915_fragment_program *p,
363                             const struct prog_instruction *inst )
364{
365    const struct gl_program *program = &p->FragProg;
366    GLuint nr = inst - program->arb.Instructions;
367
368    return p->usedRegs[nr];
369}
370
371
372/* Possible concerns:
373 *
374 * SIN, COS -- could use another taylor step?
375 * LIT      -- results seem a little different to sw mesa
376 * LOG      -- different to mesa on negative numbers, but this is conformant.
377 *
378 * Parse failures -- Mesa doesn't currently give a good indication
379 * internally whether a particular program string parsed or not.  This
380 * can lead to confusion -- hopefully we cope with it ok now.
381 *
382 */
383static void
384upload_program(struct i915_fragment_program *p)
385{
386   const struct gl_program *program = &p->FragProg;
387   const struct prog_instruction *inst = program->arb.Instructions;
388
389   if (INTEL_DEBUG & DEBUG_WM)
390      _mesa_print_program(program);
391
392   /* Is this a parse-failed program?  Ensure a valid program is
393    * loaded, as the flagging of an error isn't sufficient to stop
394    * this being uploaded to hardware.
395    */
396   if (inst[0].Opcode == OPCODE_END) {
397      GLuint tmp = i915_get_utemp(p);
398      i915_emit_arith(p,
399                      A0_MOV,
400                      UREG(REG_TYPE_OC, 0),
401                      A0_DEST_CHANNEL_ALL, 0,
402                      swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0);
403      return;
404   }
405
406   if (program->arb.NumInstructions > I915_MAX_INSN) {
407      i915_program_error(p, "Exceeded max instructions (%d out of %d)",
408                         program->arb.NumInstructions, I915_MAX_INSN);
409      return;
410   }
411
412   /* Not always needed:
413    */
414   if (!calc_live_regs(p)) {
415      i915_program_error(p, "Could not allocate registers");
416      return;
417   }
418
419   while (1) {
420      GLuint src0, src1, src2, flags;
421      GLuint tmp = 0, dst, consts0 = 0, consts1 = 0;
422
423      switch (inst->Opcode) {
424      case OPCODE_ABS:
425         src0 = src_vector(p, &inst->SrcReg[0], program);
426         i915_emit_arith(p,
427                         A0_MAX,
428                         get_result_vector(p, inst),
429                         get_result_flags(inst), 0,
430                         src0, negate(src0, 1, 1, 1, 1), 0);
431         break;
432
433      case OPCODE_ADD:
434         EMIT_2ARG_ARITH(A0_ADD);
435         break;
436
437      case OPCODE_CMP:
438         src0 = src_vector(p, &inst->SrcReg[0], program);
439         src1 = src_vector(p, &inst->SrcReg[1], program);
440         src2 = src_vector(p, &inst->SrcReg[2], program);
441         i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1);   /* NOTE: order of src2, src1 */
442         break;
443
444      case OPCODE_COS:
445         src0 = src_vector(p, &inst->SrcReg[0], program);
446         tmp = i915_get_utemp(p);
447	 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
448	 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
449
450	 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
451         i915_emit_arith(p,
452                         A0_MAD,
453                         tmp, A0_DEST_CHANNEL_X, 0,
454                         src0,
455			 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
456			 swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */
457
458         i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
459
460	 i915_emit_arith(p,
461			 A0_MAD,
462			 tmp, A0_DEST_CHANNEL_X, 0,
463			 tmp,
464			 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
465			 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
466
467	 /* Compute COS with the same calculation used for SIN, but a
468	  * different source range has been mapped to [-1,1] this time.
469	  */
470
471	 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
472	 i915_emit_arith(p,
473                         A0_MAX,
474			 tmp, A0_DEST_CHANNEL_Y, 0,
475			 swizzle(tmp, ZERO, X, ZERO, ZERO),
476			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
477			 0);
478
479	 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
480	 i915_emit_arith(p,
481			 A0_MUL,
482			 tmp, A0_DEST_CHANNEL_Y, 0,
483			 swizzle(tmp, ZERO, X, ZERO, ZERO),
484			 tmp,
485			 0);
486
487	 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
488         i915_emit_arith(p,
489                         A0_DP3,
490                         tmp, A0_DEST_CHANNEL_X, 0,
491			 tmp,
492                         swizzle(consts1, X, Y, ZERO, ZERO),
493			 0);
494
495	 /* tmp.x now contains a first approximation (y).  Now, weight it
496	  * against tmp.y**2 to get closer.
497	  */
498	 i915_emit_arith(p,
499                         A0_MAX,
500			 tmp, A0_DEST_CHANNEL_Y, 0,
501			 swizzle(tmp, ZERO, X, ZERO, ZERO),
502			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
503			 0);
504
505	 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
506	 i915_emit_arith(p,
507			 A0_MAD,
508			 tmp, A0_DEST_CHANNEL_Y, 0,
509			 swizzle(tmp, ZERO, X, ZERO, ZERO),
510			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
511			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
512
513	 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
514	 i915_emit_arith(p,
515			 A0_MAD,
516                         get_result_vector(p, inst),
517                         get_result_flags(inst), 0,
518			 swizzle(consts1, W, W, W, W),
519			 swizzle(tmp, Y, Y, Y, Y),
520			 swizzle(tmp, X, X, X, X));
521         break;
522
523      case OPCODE_DP2:
524         src0 = src_vector(p, &inst->SrcReg[0], program);
525         src1 = src_vector(p, &inst->SrcReg[1], program);
526	 i915_emit_arith(p,
527			 A0_DP3,
528                         get_result_vector(p, inst),
529                         get_result_flags(inst), 0,
530			 swizzle(src0, X, Y, ZERO, ZERO),
531			 swizzle(src1, X, Y, ZERO, ZERO),
532			 0);
533         break;
534
535      case OPCODE_DP3:
536         EMIT_2ARG_ARITH(A0_DP3);
537         break;
538
539      case OPCODE_DP4:
540         EMIT_2ARG_ARITH(A0_DP4);
541         break;
542
543      case OPCODE_DPH:
544         src0 = src_vector(p, &inst->SrcReg[0], program);
545         src1 = src_vector(p, &inst->SrcReg[1], program);
546
547         i915_emit_arith(p,
548                         A0_DP4,
549                         get_result_vector(p, inst),
550                         get_result_flags(inst), 0,
551                         swizzle(src0, X, Y, Z, ONE), src1, 0);
552         break;
553
554      case OPCODE_DST:
555         src0 = src_vector(p, &inst->SrcReg[0], program);
556         src1 = src_vector(p, &inst->SrcReg[1], program);
557
558         /* result[0] = 1    * 1;
559          * result[1] = a[1] * b[1];
560          * result[2] = a[2] * 1;
561          * result[3] = 1    * b[3];
562          */
563         i915_emit_arith(p,
564                         A0_MUL,
565                         get_result_vector(p, inst),
566                         get_result_flags(inst), 0,
567                         swizzle(src0, ONE, Y, Z, ONE),
568                         swizzle(src1, ONE, Y, ONE, W), 0);
569         break;
570
571      case OPCODE_EX2:
572         src0 = src_vector(p, &inst->SrcReg[0], program);
573
574         i915_emit_arith(p,
575                         A0_EXP,
576                         get_result_vector(p, inst),
577                         get_result_flags(inst), 0,
578                         swizzle(src0, X, X, X, X), 0, 0);
579         break;
580
581      case OPCODE_FLR:
582         EMIT_1ARG_ARITH(A0_FLR);
583         break;
584
585      case OPCODE_TRUNC:
586	 EMIT_1ARG_ARITH(A0_TRC);
587	 break;
588
589      case OPCODE_FRC:
590         EMIT_1ARG_ARITH(A0_FRC);
591         break;
592
593      case OPCODE_KIL:
594         src0 = src_vector(p, &inst->SrcReg[0], program);
595         tmp = i915_get_utemp(p);
596
597         i915_emit_texld(p, get_live_regs(p, inst),
598                         tmp, A0_DEST_CHANNEL_ALL,   /* use a dummy dest reg */
599                         0, src0, T0_TEXKILL);
600         break;
601
602      case OPCODE_LG2:
603         src0 = src_vector(p, &inst->SrcReg[0], program);
604
605         i915_emit_arith(p,
606                         A0_LOG,
607                         get_result_vector(p, inst),
608                         get_result_flags(inst), 0,
609                         swizzle(src0, X, X, X, X), 0, 0);
610         break;
611
612      case OPCODE_LIT:
613         src0 = src_vector(p, &inst->SrcReg[0], program);
614         tmp = i915_get_utemp(p);
615
616         /* tmp = max( a.xyzw, a.00zw )
617          * XXX: Clamp tmp.w to -128..128
618          * tmp.y = log(tmp.y)
619          * tmp.y = tmp.w * tmp.y
620          * tmp.y = exp(tmp.y)
621          * result = cmp (a.11-x1, a.1x01, a.1xy1 )
622          */
623         i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
624                         src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
625
626         i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
627                         swizzle(tmp, Y, Y, Y, Y), 0, 0);
628
629         i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
630                         swizzle(tmp, ZERO, Y, ZERO, ZERO),
631                         swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
632
633         i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
634                         swizzle(tmp, Y, Y, Y, Y), 0, 0);
635
636         i915_emit_arith(p, A0_CMP,
637                         get_result_vector(p, inst),
638                         get_result_flags(inst), 0,
639                         negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
640                         swizzle(tmp, ONE, X, ZERO, ONE),
641                         swizzle(tmp, ONE, X, Y, ONE));
642
643         break;
644
645      case OPCODE_LRP:
646         src0 = src_vector(p, &inst->SrcReg[0], program);
647         src1 = src_vector(p, &inst->SrcReg[1], program);
648         src2 = src_vector(p, &inst->SrcReg[2], program);
649         flags = get_result_flags(inst);
650         tmp = i915_get_utemp(p);
651
652         /* b*a + c*(1-a)
653          *
654          * b*a + c - ca
655          *
656          * tmp = b*a + c,
657          * result = (-c)*a + tmp
658          */
659         i915_emit_arith(p, A0_MAD, tmp,
660                         flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
661
662         i915_emit_arith(p, A0_MAD,
663                         get_result_vector(p, inst),
664                         flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
665         break;
666
667      case OPCODE_MAD:
668         EMIT_3ARG_ARITH(A0_MAD);
669         break;
670
671      case OPCODE_MAX:
672         EMIT_2ARG_ARITH(A0_MAX);
673         break;
674
675      case OPCODE_MIN:
676         EMIT_2ARG_ARITH(A0_MIN);
677         break;
678
679      case OPCODE_MOV:
680         EMIT_1ARG_ARITH(A0_MOV);
681         break;
682
683      case OPCODE_MUL:
684         EMIT_2ARG_ARITH(A0_MUL);
685         break;
686
687      case OPCODE_POW:
688         src0 = src_vector(p, &inst->SrcReg[0], program);
689         src1 = src_vector(p, &inst->SrcReg[1], program);
690         tmp = i915_get_utemp(p);
691         flags = get_result_flags(inst);
692
693         /* XXX: masking on intermediate values, here and elsewhere.
694          */
695         i915_emit_arith(p,
696                         A0_LOG,
697                         tmp, A0_DEST_CHANNEL_X, 0,
698                         swizzle(src0, X, X, X, X), 0, 0);
699
700         i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
701
702
703         i915_emit_arith(p,
704                         A0_EXP,
705                         get_result_vector(p, inst),
706                         flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
707
708         break;
709
710      case OPCODE_RCP:
711         src0 = src_vector(p, &inst->SrcReg[0], program);
712
713         i915_emit_arith(p,
714                         A0_RCP,
715                         get_result_vector(p, inst),
716                         get_result_flags(inst), 0,
717                         swizzle(src0, X, X, X, X), 0, 0);
718         break;
719
720      case OPCODE_RSQ:
721
722         src0 = src_vector(p, &inst->SrcReg[0], program);
723
724         i915_emit_arith(p,
725                         A0_RSQ,
726                         get_result_vector(p, inst),
727                         get_result_flags(inst), 0,
728                         swizzle(src0, X, X, X, X), 0, 0);
729         break;
730
731      case OPCODE_SCS:
732         src0 = src_vector(p, &inst->SrcReg[0], program);
733         tmp = i915_get_utemp(p);
734
735         /*
736          * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
737          * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
738          * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
739          * scs.x = DP4 t1, sin_constants
740          * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
741          * scs.y = DP4 t1, cos_constants
742          */
743         i915_emit_arith(p,
744                         A0_MUL,
745                         tmp, A0_DEST_CHANNEL_XY, 0,
746                         swizzle(src0, X, X, ONE, ONE),
747                         swizzle(src0, X, ONE, ONE, ONE), 0);
748
749         i915_emit_arith(p,
750                         A0_MUL,
751                         tmp, A0_DEST_CHANNEL_ALL, 0,
752                         swizzle(tmp, X, Y, X, Y),
753                         swizzle(tmp, X, X, ONE, ONE), 0);
754
755         if (inst->DstReg.WriteMask & WRITEMASK_Y) {
756            GLuint tmp1;
757
758            if (inst->DstReg.WriteMask & WRITEMASK_X)
759               tmp1 = i915_get_utemp(p);
760            else
761               tmp1 = tmp;
762
763            i915_emit_arith(p,
764                            A0_MUL,
765                            tmp1, A0_DEST_CHANNEL_ALL, 0,
766                            swizzle(tmp, X, Y, Y, W),
767                            swizzle(tmp, X, Z, ONE, ONE), 0);
768
769            i915_emit_arith(p,
770                            A0_DP4,
771                            get_result_vector(p, inst),
772                            A0_DEST_CHANNEL_Y, 0,
773                            swizzle(tmp1, W, Z, Y, X),
774                            i915_emit_const4fv(p, sin_constants), 0);
775         }
776
777         if (inst->DstReg.WriteMask & WRITEMASK_X) {
778            i915_emit_arith(p,
779                            A0_MUL,
780                            tmp, A0_DEST_CHANNEL_XYZ, 0,
781                            swizzle(tmp, X, X, Z, ONE),
782                            swizzle(tmp, Z, ONE, ONE, ONE), 0);
783
784            i915_emit_arith(p,
785                            A0_DP4,
786                            get_result_vector(p, inst),
787                            A0_DEST_CHANNEL_X, 0,
788                            swizzle(tmp, ONE, Z, Y, X),
789                            i915_emit_const4fv(p, cos_constants), 0);
790         }
791         break;
792
793      case OPCODE_SIN:
794         src0 = src_vector(p, &inst->SrcReg[0], program);
795         tmp = i915_get_utemp(p);
796	 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
797	 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
798
799	 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
800         i915_emit_arith(p,
801                         A0_MAD,
802                         tmp, A0_DEST_CHANNEL_X, 0,
803                         src0,
804			 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
805			 swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */
806
807         i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
808
809	 i915_emit_arith(p,
810			 A0_MAD,
811			 tmp, A0_DEST_CHANNEL_X, 0,
812			 tmp,
813			 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
814			 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
815
816	 /* Compute sin using a quadratic and quartic.  It gives continuity
817	  * that repeating the Taylor series lacks every 2*pi, and has
818	  * reduced error.
819	  *
820	  * The idea was described at:
821	  * http://www.devmaster.net/forums/showthread.php?t=5784
822	  */
823
824	 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
825	 i915_emit_arith(p,
826                         A0_MAX,
827			 tmp, A0_DEST_CHANNEL_Y, 0,
828			 swizzle(tmp, ZERO, X, ZERO, ZERO),
829			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
830			 0);
831
832	 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
833	 i915_emit_arith(p,
834			 A0_MUL,
835			 tmp, A0_DEST_CHANNEL_Y, 0,
836			 swizzle(tmp, ZERO, X, ZERO, ZERO),
837			 tmp,
838			 0);
839
840	 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
841         i915_emit_arith(p,
842                         A0_DP3,
843                         tmp, A0_DEST_CHANNEL_X, 0,
844			 tmp,
845                         swizzle(consts1, X, Y, ZERO, ZERO),
846			 0);
847
848	 /* tmp.x now contains a first approximation (y).  Now, weight it
849	  * against tmp.y**2 to get closer.
850	  */
851	 i915_emit_arith(p,
852                         A0_MAX,
853			 tmp, A0_DEST_CHANNEL_Y, 0,
854			 swizzle(tmp, ZERO, X, ZERO, ZERO),
855			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
856			 0);
857
858	 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
859	 i915_emit_arith(p,
860			 A0_MAD,
861			 tmp, A0_DEST_CHANNEL_Y, 0,
862			 swizzle(tmp, ZERO, X, ZERO, ZERO),
863			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
864			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
865
866	 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
867	 i915_emit_arith(p,
868			 A0_MAD,
869                         get_result_vector(p, inst),
870                         get_result_flags(inst), 0,
871			 swizzle(consts1, W, W, W, W),
872			 swizzle(tmp, Y, Y, Y, Y),
873			 swizzle(tmp, X, X, X, X));
874
875         break;
876
877      case OPCODE_SGE:
878	 EMIT_2ARG_ARITH(A0_SGE);
879	 break;
880
881      case OPCODE_SLT:
882         EMIT_2ARG_ARITH(A0_SLT);
883         break;
884
885      case OPCODE_SSG:
886	 dst = get_result_vector(p, inst);
887	 flags = get_result_flags(inst);
888         src0 = src_vector(p, &inst->SrcReg[0], program);
889	 tmp = i915_get_utemp(p);
890
891	 /* tmp = (src < 0.0) */
892	 i915_emit_arith(p,
893			 A0_SLT,
894			 tmp,
895			 flags, 0,
896			 src0,
897			 swizzle(src0, ZERO, ZERO, ZERO, ZERO),
898			 0);
899
900	 /* dst = (0.0 < src) */
901	 i915_emit_arith(p,
902			 A0_SLT,
903			 dst,
904			 flags, 0,
905			 swizzle(src0, ZERO, ZERO, ZERO, ZERO),
906			 src0,
907			 0);
908
909	 /* dst = (src > 0.0) - (src < 0.0) */
910	 i915_emit_arith(p,
911			 A0_ADD,
912			 dst,
913			 flags, 0,
914			 dst,
915			 negate(tmp, 1, 1, 1, 1),
916			 0);
917
918         break;
919
920      case OPCODE_SUB:
921         src0 = src_vector(p, &inst->SrcReg[0], program);
922         src1 = src_vector(p, &inst->SrcReg[1], program);
923
924         i915_emit_arith(p,
925                         A0_ADD,
926                         get_result_vector(p, inst),
927                         get_result_flags(inst), 0,
928                         src0, negate(src1, 1, 1, 1, 1), 0);
929         break;
930
931      case OPCODE_SWZ:
932         EMIT_1ARG_ARITH(A0_MOV);       /* extended swizzle handled natively */
933         break;
934
935      case OPCODE_TEX:
936         EMIT_TEX(T0_TEXLD);
937         break;
938
939      case OPCODE_TXB:
940         EMIT_TEX(T0_TEXLDB);
941         break;
942
943      case OPCODE_TXP:
944         EMIT_TEX(T0_TEXLDP);
945         break;
946
947      case OPCODE_XPD:
948         /* Cross product:
949          *      result.x = src0.y * src1.z - src0.z * src1.y;
950          *      result.y = src0.z * src1.x - src0.x * src1.z;
951          *      result.z = src0.x * src1.y - src0.y * src1.x;
952          *      result.w = undef;
953          */
954         src0 = src_vector(p, &inst->SrcReg[0], program);
955         src1 = src_vector(p, &inst->SrcReg[1], program);
956         tmp = i915_get_utemp(p);
957
958         i915_emit_arith(p,
959                         A0_MUL,
960                         tmp, A0_DEST_CHANNEL_ALL, 0,
961                         swizzle(src0, Z, X, Y, ONE),
962                         swizzle(src1, Y, Z, X, ONE), 0);
963
964         i915_emit_arith(p,
965                         A0_MAD,
966                         get_result_vector(p, inst),
967                         get_result_flags(inst), 0,
968                         swizzle(src0, Y, Z, X, ONE),
969                         swizzle(src1, Z, X, Y, ONE),
970                         negate(tmp, 1, 1, 1, 0));
971         break;
972
973      case OPCODE_END:
974         return;
975
976      case OPCODE_BGNLOOP:
977      case OPCODE_BGNSUB:
978      case OPCODE_BRK:
979      case OPCODE_CAL:
980      case OPCODE_CONT:
981      case OPCODE_DDX:
982      case OPCODE_DDY:
983      case OPCODE_ELSE:
984      case OPCODE_ENDIF:
985      case OPCODE_ENDLOOP:
986      case OPCODE_ENDSUB:
987      case OPCODE_IF:
988      case OPCODE_RET:
989	 p->error = 1;
990	 i915_program_error(p, "Unsupported opcode: %s",
991			    _mesa_opcode_string(inst->Opcode));
992	 return;
993
994      case OPCODE_EXP:
995      case OPCODE_LOG:
996	 /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in
997	  * prog_instruction.h, but apparently GLSL doesn't ever emit them.
998	  * Instead, it translates to EX2 or LG2.
999	  */
1000      case OPCODE_TXD:
1001      case OPCODE_TXL:
1002	 /* These opcodes are claimed by GLSL in prog_instruction.h, but
1003	  * only NV_vp/fp appears to emit them.
1004	  */
1005      default:
1006         i915_program_error(p, "bad opcode: %s",
1007			    _mesa_opcode_string(inst->Opcode));
1008         return;
1009      }
1010
1011      inst++;
1012      i915_release_utemps(p);
1013   }
1014}
1015
1016/* Rather than trying to intercept and jiggle depth writes during
1017 * emit, just move the value into its correct position at the end of
1018 * the program:
1019 */
1020static void
1021fixup_depth_write(struct i915_fragment_program *p)
1022{
1023   if (p->depth_written) {
1024      GLuint depth = UREG(REG_TYPE_OD, 0);
1025
1026      i915_emit_arith(p,
1027                      A0_MOV,
1028                      depth, A0_DEST_CHANNEL_W, 0,
1029                      swizzle(depth, X, Y, Z, Z), 0, 0);
1030   }
1031}
1032
1033static void
1034check_texcoord_mapping(struct i915_fragment_program *p)
1035{
1036   GLbitfield64 inputs = p->FragProg.info.inputs_read;
1037   unsigned unit = 0;
1038
1039   for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1040      if (inputs & VARYING_BIT_TEX(i)) {
1041         if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
1042            unit++;
1043            break;
1044         }
1045         p->texcoord_mapping[unit++] = i | TEXCOORD_TEX;
1046      }
1047      if (inputs & VARYING_BIT_VAR(i)) {
1048         if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
1049            unit++;
1050            break;
1051         }
1052         p->texcoord_mapping[unit++] = i | TEXCOORD_VAR;
1053      }
1054   }
1055
1056   if (unit > p->ctx->Const.MaxTextureCoordUnits)
1057      i915_program_error(p, "Too many texcoord units");
1058}
1059
1060static void
1061check_wpos(struct i915_fragment_program *p)
1062{
1063   GLbitfield64 inputs = p->FragProg.info.inputs_read;
1064   GLint i;
1065   unsigned unit = 0;
1066
1067   p->wpos_tex = I915_WPOS_TEX_INVALID;
1068
1069   if ((inputs & VARYING_BIT_POS) == 0)
1070      return;
1071
1072   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1073      unit += !!(inputs & VARYING_BIT_TEX(i));
1074      unit += !!(inputs & VARYING_BIT_VAR(i));
1075   }
1076
1077   if (unit < p->ctx->Const.MaxTextureCoordUnits)
1078      p->wpos_tex = unit;
1079   else
1080      i915_program_error(p, "No free texcoord for wpos value");
1081}
1082
1083
1084static void
1085translate_program(struct i915_fragment_program *p)
1086{
1087   struct i915_context *i915 = I915_CONTEXT(p->ctx);
1088
1089   if (INTEL_DEBUG & DEBUG_WM) {
1090      printf("fp:\n");
1091      _mesa_print_program(&p->FragProg);
1092      printf("\n");
1093   }
1094
1095   i915_init_program(i915, p);
1096   check_texcoord_mapping(p);
1097   check_wpos(p);
1098   upload_program(p);
1099   fixup_depth_write(p);
1100   i915_fini_program(p);
1101
1102   p->translated = 1;
1103}
1104
1105
1106static void
1107track_params(struct i915_fragment_program *p)
1108{
1109   GLint i;
1110
1111   if (p->nr_params)
1112      _mesa_load_state_parameters(p->ctx, p->FragProg.Parameters);
1113
1114   for (i = 0; i < p->nr_params; i++) {
1115      GLint reg = p->param[i].reg;
1116      COPY_4V(p->constant[reg], p->param[i].values);
1117   }
1118
1119   p->params_uptodate = 1;
1120   p->on_hardware = 0;          /* overkill */
1121}
1122
1123static struct gl_program *
1124i915NewProgram(struct gl_context * ctx, gl_shader_stage stage, GLuint id,
1125               bool is_arb_asm)
1126{
1127   switch (stage) {
1128   case MESA_SHADER_VERTEX: {
1129      struct gl_program *prog = rzalloc(NULL, struct gl_program);
1130      return _mesa_init_gl_program(prog, stage, id, is_arb_asm);
1131   }
1132
1133   case MESA_SHADER_FRAGMENT:{
1134         struct i915_fragment_program *prog =
1135            rzalloc(NULL, struct i915_fragment_program);
1136         if (prog) {
1137            i915_init_program(I915_CONTEXT(ctx), prog);
1138
1139            return _mesa_init_gl_program(&prog->FragProg, stage, id,
1140                                         is_arb_asm);
1141         }
1142         else
1143            return NULL;
1144      }
1145
1146   default:
1147      /* Just fallback:
1148       */
1149      return _mesa_new_program(ctx, stage, id, is_arb_asm);
1150   }
1151}
1152
1153static void
1154i915DeleteProgram(struct gl_context * ctx, struct gl_program *prog)
1155{
1156   if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1157      struct i915_context *i915 = I915_CONTEXT(ctx);
1158      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
1159
1160      if (i915->current_program == p)
1161         i915->current_program = 0;
1162   }
1163
1164   _mesa_delete_program(ctx, prog);
1165}
1166
1167
1168static GLboolean
1169i915IsProgramNative(struct gl_context * ctx, GLenum target, struct gl_program *prog)
1170{
1171   if (target == GL_FRAGMENT_PROGRAM_ARB) {
1172      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
1173
1174      if (!p->translated)
1175         translate_program(p);
1176
1177      return !p->error;
1178   }
1179   else
1180      return true;
1181}
1182
1183static GLboolean
1184i915ProgramStringNotify(struct gl_context * ctx,
1185                        GLenum target, struct gl_program *prog)
1186{
1187   if (target == GL_FRAGMENT_PROGRAM_ARB) {
1188      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
1189      p->translated = 0;
1190   }
1191
1192   (void) _tnl_program_string(ctx, target, prog);
1193
1194   /* XXX check if program is legal, within limits */
1195   return true;
1196}
1197
1198static void
1199i915SamplerUniformChange(struct gl_context *ctx,
1200                         GLenum target, struct gl_program *prog)
1201{
1202   i915ProgramStringNotify(ctx, target, prog);
1203}
1204
1205void
1206i915_update_program(struct gl_context *ctx)
1207{
1208   struct intel_context *intel = intel_context(ctx);
1209   struct i915_context *i915 = i915_context(&intel->ctx);
1210   struct i915_fragment_program *fp =
1211      (struct i915_fragment_program *) ctx->FragmentProgram._Current;
1212
1213   if (i915->current_program != fp) {
1214      if (i915->current_program) {
1215         i915->current_program->on_hardware = 0;
1216         i915->current_program->params_uptodate = 0;
1217      }
1218
1219      i915->current_program = fp;
1220   }
1221
1222   if (!fp->translated)
1223      translate_program(fp);
1224
1225   FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error);
1226}
1227
1228void
1229i915ValidateFragmentProgram(struct i915_context *i915)
1230{
1231   struct gl_context *ctx = &i915->intel.ctx;
1232   struct intel_context *intel = intel_context(ctx);
1233   TNLcontext *tnl = TNL_CONTEXT(ctx);
1234   struct vertex_buffer *VB = &tnl->vb;
1235
1236   struct i915_fragment_program *p =
1237      (struct i915_fragment_program *) ctx->FragmentProgram._Current;
1238
1239   const GLbitfield64 inputsRead = p->FragProg.info.inputs_read;
1240   GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
1241   GLuint s2 = S2_TEXCOORD_NONE;
1242   GLuint s3 = 0;
1243   int i, offset = 0;
1244
1245   /* Important:
1246    */
1247   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
1248
1249   if (!p->translated)
1250      translate_program(p);
1251
1252   intel->vertex_attr_count = 0;
1253   intel->wpos_offset = 0;
1254   intel->coloroffset = 0;
1255   intel->specoffset = 0;
1256
1257   /* Always emit W to get consistent perspective
1258    * correct interpolation of primary/secondary colors.
1259    */
1260   EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16);
1261
1262   /* Handle gl_PointSize builtin var here */
1263   if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled)
1264      EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4);
1265
1266   if (inputsRead & VARYING_BIT_COL0) {
1267      intel->coloroffset = offset / 4;
1268      EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4);
1269   }
1270
1271   if (inputsRead & VARYING_BIT_COL1) {
1272       intel->specoffset = offset / 4;
1273       EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_4UB_4F_BGRA, S4_VFMT_SPEC_FOG, 4);
1274   }
1275
1276   if ((inputsRead & VARYING_BIT_FOGC)) {
1277      EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4);
1278   }
1279
1280   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1281      if (inputsRead & VARYING_BIT_TEX(i)) {
1282         int unit = get_texcoord_mapping(p, i | TEXCOORD_TEX);
1283         int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
1284
1285         s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
1286         s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
1287
1288         EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
1289      }
1290      if (inputsRead & VARYING_BIT_VAR(i)) {
1291         int unit = get_texcoord_mapping(p, i | TEXCOORD_VAR);
1292         int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;
1293
1294         s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
1295         s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
1296
1297         EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
1298      }
1299      if (i == p->wpos_tex) {
1300	 int wpos_size = 4 * sizeof(float);
1301         /* If WPOS is required, duplicate the XYZ position data in an
1302          * unused texture coordinate:
1303          */
1304         s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1305         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(wpos_size));
1306         s3 |= S3_TEXCOORD_PERSPECTIVE_DISABLE(i);
1307
1308         intel->wpos_offset = offset;
1309         EMIT_PAD(wpos_size);
1310      }
1311   }
1312
1313   if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
1314       s3 != i915->state.Ctx[I915_CTXREG_LIS3] ||
1315       s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
1316      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
1317
1318      /* Must do this *after* statechange, so as not to affect
1319       * buffered vertices reliant on the old state:
1320       */
1321      intel->vertex_size = _tnl_install_attrs(&intel->ctx,
1322                                              intel->vertex_attrs,
1323                                              intel->vertex_attr_count,
1324                                              intel->ViewportMatrix.m, 0);
1325
1326      assert(intel->prim.current_offset == intel->prim.start_offset);
1327      intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size;
1328      intel->prim.current_offset = intel->prim.start_offset;
1329
1330      intel->vertex_size >>= 2;
1331
1332      i915->state.Ctx[I915_CTXREG_LIS2] = s2;
1333      i915->state.Ctx[I915_CTXREG_LIS3] = s3;
1334      i915->state.Ctx[I915_CTXREG_LIS4] = s4;
1335
1336      assert(intel->vtbl.check_vertex_size(intel, intel->vertex_size));
1337   }
1338
1339   if (!p->params_uptodate)
1340      track_params(p);
1341
1342   if (!p->on_hardware)
1343      i915_upload_program(i915, p);
1344
1345   if (INTEL_DEBUG & DEBUG_WM) {
1346      printf("i915:\n");
1347      i915_disassemble_program(i915->state.Program, i915->state.ProgramSize);
1348   }
1349}
1350
1351void
1352i915InitFragProgFuncs(struct dd_function_table *functions)
1353{
1354   functions->NewProgram = i915NewProgram;
1355   functions->DeleteProgram = i915DeleteProgram;
1356   functions->IsProgramNative = i915IsProgramNative;
1357   functions->ProgramStringNotify = i915ProgramStringNotify;
1358   functions->SamplerUniformChange = i915SamplerUniformChange;
1359}
1360