1/*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "tgsi/tgsi_transform.h"
28#include "tgsi/tgsi_scan.h"
29#include "tgsi/tgsi_dump.h"
30
31#include "util/u_debug.h"
32#include "util/u_math.h"
33
34#include "tgsi_lowering.h"
35
36struct tgsi_lowering_context {
37   struct tgsi_transform_context base;
38   const struct tgsi_lowering_config *config;
39   struct tgsi_shader_info *info;
40   unsigned two_side_colors;
41   unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
42   unsigned color_base;  /* base register for chosen COLOR/BCOLOR's */
43   int face_idx;
44   unsigned numtmp;
45   struct {
46      struct tgsi_full_src_register src;
47      struct tgsi_full_dst_register dst;
48   } tmp[2];
49#define A 0
50#define B 1
51   struct tgsi_full_src_register imm;
52   int emitted_decls;
53   unsigned saturate;
54};
55
56static inline struct tgsi_lowering_context *
57tgsi_lowering_context(struct tgsi_transform_context *tctx)
58{
59   return (struct tgsi_lowering_context *)tctx;
60}
61
62/*
63 * Utility helpers:
64 */
65
66static void
67reg_dst(struct tgsi_full_dst_register *dst,
68	const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
69{
70   *dst = *orig_dst;
71   dst->Register.WriteMask &= wrmask;
72   assert(dst->Register.WriteMask);
73}
74
75static inline void
76get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
77{
78   swiz[0] = src->SwizzleX;
79   swiz[1] = src->SwizzleY;
80   swiz[2] = src->SwizzleZ;
81   swiz[3] = src->SwizzleW;
82}
83
84static void
85reg_src(struct tgsi_full_src_register *src,
86	const struct tgsi_full_src_register *orig_src,
87	unsigned sx, unsigned sy, unsigned sz, unsigned sw)
88{
89   unsigned swiz[4];
90   get_swiz(swiz, &orig_src->Register);
91   *src = *orig_src;
92   src->Register.SwizzleX = swiz[sx];
93   src->Register.SwizzleY = swiz[sy];
94   src->Register.SwizzleZ = swiz[sz];
95   src->Register.SwizzleW = swiz[sw];
96}
97
98#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
99#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
100      TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
101
102/*
103 * if (dst.x aliases src.x) {
104 *   MOV tmpA.x, src.x
105 *   src = tmpA
106 * }
107 * COS dst.x, src.x
108 * SIN dst.y, src.x
109 * MOV dst.zw, imm{0.0, 1.0}
110 */
111static bool
112aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
113	const struct tgsi_full_src_register *src, unsigned src_mask)
114{
115   if ((dst->Register.File == src->Register.File) &&
116       (dst->Register.Index == src->Register.Index)) {
117      unsigned i, actual_mask = 0;
118      unsigned swiz[4];
119      get_swiz(swiz, &src->Register);
120      for (i = 0; i < 4; i++)
121         if (src_mask & (1 << i))
122            actual_mask |= (1 << swiz[i]);
123      if (actual_mask & dst_mask)
124         return true;
125   }
126   return false;
127}
128
129static void
130create_mov(struct tgsi_transform_context *tctx,
131           const struct tgsi_full_dst_register *dst,
132           const struct tgsi_full_src_register *src,
133           unsigned mask, unsigned saturate)
134{
135   struct tgsi_full_instruction new_inst;
136
137   new_inst = tgsi_default_full_instruction();
138   new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
139   new_inst.Instruction.Saturate = saturate;
140   new_inst.Instruction.NumDstRegs = 1;
141   reg_dst(&new_inst.Dst[0], dst, mask);
142   new_inst.Instruction.NumSrcRegs = 1;
143   reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
144   tctx->emit_instruction(tctx, &new_inst);
145}
146
147/* to help calculate # of tgsi tokens for a lowering.. we assume
148 * the worst case, ie. removed instructions don't have ADDR[] or
149 * anything which increases the # of tokens per src/dst and the
150 * inserted instructions do.
151 *
152 * OINST() - old instruction
153 *    1         : instruction itself
154 *    1         : dst
155 *    1 * nargs : srcN
156 *
157 * NINST() - new instruction
158 *    1         : instruction itself
159 *    2         : dst
160 *    2 * nargs : srcN
161 */
162
163#define OINST(nargs)  (1 + 1 + 1 * (nargs))
164#define NINST(nargs)  (1 + 2 + 2 * (nargs))
165
166/*
167 * Lowering Translators:
168 */
169
170/* DST - Distance Vector
171 *   dst.x = 1.0
172 *   dst.y = src0.y \times src1.y
173 *   dst.z = src0.z
174 *   dst.w = src1.w
175 *
176 * ; note: could be more clever and use just a single temp
177 * ;       if I was clever enough to re-write the swizzles.
178 * ; needs: 2 tmp, imm{1.0}
179 * if (dst.y aliases src0.z) {
180 *   MOV tmpA.yz, src0.yz
181 *   src0 = tmpA
182 * }
183 * if (dst.yz aliases src1.w) {
184 *   MOV tmpB.yw, src1.yw
185 *   src1 = tmpB
186 * }
187 * MUL dst.y, src0.y, src1.y
188 * MOV dst.z, src0.z
189 * MOV dst.w, src1.w
190 * MOV dst.x, imm{1.0}
191 */
192#define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
193		NINST(1) + NINST(1) - OINST(2))
194#define DST_TMP  2
195static void
196transform_dst(struct tgsi_transform_context *tctx,
197              struct tgsi_full_instruction *inst)
198{
199   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
200   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
201   struct tgsi_full_src_register *src0 = &inst->Src[0];
202   struct tgsi_full_src_register *src1 = &inst->Src[1];
203   struct tgsi_full_instruction new_inst;
204
205   if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
206      create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
207      src0 = &ctx->tmp[A].src;
208   }
209
210   if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
211      create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
212      src1 = &ctx->tmp[B].src;
213   }
214
215   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
216      /* MUL dst.y, src0.y, src1.y */
217      new_inst = tgsi_default_full_instruction();
218      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
219      new_inst.Instruction.NumDstRegs = 1;
220      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
221      new_inst.Instruction.NumSrcRegs = 2;
222      reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
223      reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
224      tctx->emit_instruction(tctx, &new_inst);
225   }
226
227   if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
228      /* MOV dst.z, src0.z */
229      new_inst = tgsi_default_full_instruction();
230      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
231      new_inst.Instruction.NumDstRegs = 1;
232      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
233      new_inst.Instruction.NumSrcRegs = 1;
234      reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
235      tctx->emit_instruction(tctx, &new_inst);
236   }
237
238   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
239      /* MOV dst.w, src1.w */
240      new_inst = tgsi_default_full_instruction();
241      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
242      new_inst.Instruction.NumDstRegs = 1;
243      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
244      new_inst.Instruction.NumSrcRegs = 1;
245      reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
246      tctx->emit_instruction(tctx, &new_inst);
247   }
248
249   if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
250      /* MOV dst.x, imm{1.0} */
251      new_inst = tgsi_default_full_instruction();
252      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
253      new_inst.Instruction.NumDstRegs = 1;
254      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
255      new_inst.Instruction.NumSrcRegs = 1;
256      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
257      tctx->emit_instruction(tctx, &new_inst);
258   }
259}
260
261/* LRP - Linear Interpolate
262 *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
263 *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
264 *  dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
265 *  dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
266 *
267 * This becomes: src0 \times src1 + src2 - src0 \times src2, which
268 * can then become: src0 \times src1 - (src0 \times src2 - src2)
269 *
270 * ; needs: 1 tmp
271 * MAD tmpA, src0, src2, -src2
272 * MAD dst, src0, src1, -tmpA
273 */
274#define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
275#define LRP_TMP  1
276static void
277transform_lrp(struct tgsi_transform_context *tctx,
278              struct tgsi_full_instruction *inst)
279{
280   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
281   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
282   struct tgsi_full_src_register *src0 = &inst->Src[0];
283   struct tgsi_full_src_register *src1 = &inst->Src[1];
284   struct tgsi_full_src_register *src2 = &inst->Src[2];
285   struct tgsi_full_instruction new_inst;
286
287   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
288      /* MAD tmpA, src0, src2, -src2 */
289      new_inst = tgsi_default_full_instruction();
290      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
291      new_inst.Instruction.NumDstRegs = 1;
292      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
293      new_inst.Instruction.NumSrcRegs = 3;
294      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
295      reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
296      reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
297      new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
298      tctx->emit_instruction(tctx, &new_inst);
299
300      /* MAD dst, src0, src1, -tmpA */
301      new_inst = tgsi_default_full_instruction();
302      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
303      new_inst.Instruction.NumDstRegs = 1;
304      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
305      new_inst.Instruction.NumSrcRegs = 3;
306      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
307      reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
308      reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
309      new_inst.Src[2].Register.Negate = true;
310      tctx->emit_instruction(tctx, &new_inst);
311   }
312}
313
314/* FRC - Fraction
315 *  dst.x = src.x - \lfloor src.x\rfloor
316 *  dst.y = src.y - \lfloor src.y\rfloor
317 *  dst.z = src.z - \lfloor src.z\rfloor
318 *  dst.w = src.w - \lfloor src.w\rfloor
319 *
320 * ; needs: 1 tmp
321 * FLR tmpA, src
322 * SUB dst, src, tmpA
323 */
324#define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
325#define FRC_TMP  1
326static void
327transform_frc(struct tgsi_transform_context *tctx,
328              struct tgsi_full_instruction *inst)
329{
330   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
331   struct tgsi_full_dst_register *dst = &inst->Dst[0];
332   struct tgsi_full_src_register *src = &inst->Src[0];
333   struct tgsi_full_instruction new_inst;
334
335   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
336      /* FLR tmpA, src */
337      new_inst = tgsi_default_full_instruction();
338      new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
339      new_inst.Instruction.NumDstRegs = 1;
340      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
341      new_inst.Instruction.NumSrcRegs = 1;
342      reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
343      tctx->emit_instruction(tctx, &new_inst);
344
345      /* SUB dst, src, tmpA */
346      new_inst = tgsi_default_full_instruction();
347      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
348      new_inst.Instruction.NumDstRegs = 1;
349      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
350      new_inst.Instruction.NumSrcRegs = 2;
351      reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
352      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
353      new_inst.Src[1].Register.Negate = 1;
354      tctx->emit_instruction(tctx, &new_inst);
355   }
356}
357
358/* POW - Power
359 *  dst.x = src0.x^{src1.x}
360 *  dst.y = src0.x^{src1.x}
361 *  dst.z = src0.x^{src1.x}
362 *  dst.w = src0.x^{src1.x}
363 *
364 * ; needs: 1 tmp
365 * LG2 tmpA.x, src0.x
366 * MUL tmpA.x, src1.x, tmpA.x
367 * EX2 dst, tmpA.x
368 */
369#define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
370#define POW_TMP  1
371static void
372transform_pow(struct tgsi_transform_context *tctx,
373              struct tgsi_full_instruction *inst)
374{
375   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
376   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
377   struct tgsi_full_src_register *src0 = &inst->Src[0];
378   struct tgsi_full_src_register *src1 = &inst->Src[1];
379   struct tgsi_full_instruction new_inst;
380
381   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
382      /* LG2 tmpA.x, src0.x */
383      new_inst = tgsi_default_full_instruction();
384      new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
385      new_inst.Instruction.NumDstRegs = 1;
386      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
387      new_inst.Instruction.NumSrcRegs = 1;
388      reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
389      tctx->emit_instruction(tctx, &new_inst);
390
391      /* MUL tmpA.x, src1.x, tmpA.x */
392      new_inst = tgsi_default_full_instruction();
393      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
394      new_inst.Instruction.NumDstRegs = 1;
395      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
396      new_inst.Instruction.NumSrcRegs = 2;
397      reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
398      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
399      tctx->emit_instruction(tctx, &new_inst);
400
401      /* EX2 dst, tmpA.x */
402      new_inst = tgsi_default_full_instruction();
403      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
404      new_inst.Instruction.NumDstRegs = 1;
405      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
406      new_inst.Instruction.NumSrcRegs = 1;
407      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
408      tctx->emit_instruction(tctx, &new_inst);
409   }
410}
411
412/* LIT - Light Coefficients
413 *  dst.x = 1.0
414 *  dst.y = max(src.x, 0.0)
415 *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
416 *  dst.w = 1.0
417 *
418 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
419 * MAX tmpA.xy, src.xy, imm{0.0}
420 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
421 * LG2 tmpA.y, tmpA.y
422 * MUL tmpA.y, tmpA.z, tmpA.y
423 * EX2 tmpA.y, tmpA.y
424 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
425 * MOV dst.yz, tmpA.xy
426 * MOV dst.xw, imm{1.0}
427 */
428#define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
429		NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
430#define LIT_TMP  1
431static void
432transform_lit(struct tgsi_transform_context *tctx,
433              struct tgsi_full_instruction *inst)
434{
435   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
436   struct tgsi_full_dst_register *dst = &inst->Dst[0];
437   struct tgsi_full_src_register *src = &inst->Src[0];
438   struct tgsi_full_instruction new_inst;
439
440   if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
441      /* MAX tmpA.xy, src.xy, imm{0.0} */
442      new_inst = tgsi_default_full_instruction();
443      new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
444      new_inst.Instruction.NumDstRegs = 1;
445      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
446      new_inst.Instruction.NumSrcRegs = 2;
447      reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
448      reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
449      tctx->emit_instruction(tctx, &new_inst);
450
451      /* MIN tmpA.z, src.w, imm{128.0} */
452      new_inst = tgsi_default_full_instruction();
453      new_inst.Instruction.Opcode = TGSI_OPCODE_MIN;
454      new_inst.Instruction.NumDstRegs = 1;
455      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
456      new_inst.Instruction.NumSrcRegs = 2;
457      reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
458      reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
459      tctx->emit_instruction(tctx, &new_inst);
460
461      /* MAX tmpA.z, tmpA.z, -imm{128.0} */
462      new_inst = tgsi_default_full_instruction();
463      new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
464      new_inst.Instruction.NumDstRegs = 1;
465      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
466      new_inst.Instruction.NumSrcRegs = 2;
467      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _));
468      reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
469      new_inst.Src[1].Register.Negate = true;
470      tctx->emit_instruction(tctx, &new_inst);
471
472      /* LG2 tmpA.y, tmpA.y */
473      new_inst = tgsi_default_full_instruction();
474      new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
475      new_inst.Instruction.NumDstRegs = 1;
476      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
477      new_inst.Instruction.NumSrcRegs = 1;
478      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
479      tctx->emit_instruction(tctx, &new_inst);
480
481      /* MUL tmpA.y, tmpA.z, tmpA.y */
482      new_inst = tgsi_default_full_instruction();
483      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
484      new_inst.Instruction.NumDstRegs = 1;
485      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
486      new_inst.Instruction.NumSrcRegs = 2;
487      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
488      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
489      tctx->emit_instruction(tctx, &new_inst);
490
491      /* EX2 tmpA.y, tmpA.y */
492      new_inst = tgsi_default_full_instruction();
493      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
494      new_inst.Instruction.NumDstRegs = 1;
495      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
496      new_inst.Instruction.NumSrcRegs = 1;
497      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
498      tctx->emit_instruction(tctx, &new_inst);
499
500      /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
501      new_inst = tgsi_default_full_instruction();
502      new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
503      new_inst.Instruction.NumDstRegs = 1;
504      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
505      new_inst.Instruction.NumSrcRegs = 3;
506      reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
507      new_inst.Src[0].Register.Negate = true;
508      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
509      reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
510      tctx->emit_instruction(tctx, &new_inst);
511
512      /* MOV dst.yz, tmpA.xy */
513      new_inst = tgsi_default_full_instruction();
514      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
515      new_inst.Instruction.NumDstRegs = 1;
516      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
517      new_inst.Instruction.NumSrcRegs = 1;
518      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
519      tctx->emit_instruction(tctx, &new_inst);
520   }
521
522   if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
523      /* MOV dst.xw, imm{1.0} */
524      new_inst = tgsi_default_full_instruction();
525      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
526      new_inst.Instruction.NumDstRegs = 1;
527      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
528      new_inst.Instruction.NumSrcRegs = 1;
529      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
530      tctx->emit_instruction(tctx, &new_inst);
531   }
532}
533
534/* EXP - Approximate Exponential Base 2
535 *  dst.x = 2^{\lfloor src.x\rfloor}
536 *  dst.y = src.x - \lfloor src.x\rfloor
537 *  dst.z = 2^{src.x}
538 *  dst.w = 1.0
539 *
540 * ; needs: 1 tmp, imm{1.0}
541 * if (lowering FLR) {
542 *   FRC tmpA.x, src.x
543 *   SUB tmpA.x, src.x, tmpA.x
544 * } else {
545 *   FLR tmpA.x, src.x
546 * }
547 * EX2 tmpA.y, src.x
548 * SUB dst.y, src.x, tmpA.x
549 * EX2 dst.x, tmpA.x
550 * MOV dst.z, tmpA.y
551 * MOV dst.w, imm{1.0}
552 */
553#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
554		NINST(1)+ NINST(1) - OINST(1))
555#define EXP_TMP  1
556static void
557transform_exp(struct tgsi_transform_context *tctx,
558              struct tgsi_full_instruction *inst)
559{
560   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
561   struct tgsi_full_dst_register *dst = &inst->Dst[0];
562   struct tgsi_full_src_register *src = &inst->Src[0];
563   struct tgsi_full_instruction new_inst;
564
565   if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
566      if (ctx->config->lower_FLR) {
567         /* FRC tmpA.x, src.x */
568         new_inst = tgsi_default_full_instruction();
569         new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
570         new_inst.Instruction.NumDstRegs = 1;
571         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
572         new_inst.Instruction.NumSrcRegs = 1;
573         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
574         tctx->emit_instruction(tctx, &new_inst);
575
576         /* SUB tmpA.x, src.x, tmpA.x */
577         new_inst = tgsi_default_full_instruction();
578         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
579         new_inst.Instruction.NumDstRegs = 1;
580         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
581         new_inst.Instruction.NumSrcRegs = 2;
582         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
583         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
584         new_inst.Src[1].Register.Negate = 1;
585         tctx->emit_instruction(tctx, &new_inst);
586     } else {
587         /* FLR tmpA.x, src.x */
588         new_inst = tgsi_default_full_instruction();
589         new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
590         new_inst.Instruction.NumDstRegs = 1;
591         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
592         new_inst.Instruction.NumSrcRegs = 1;
593         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
594         tctx->emit_instruction(tctx, &new_inst);
595      }
596   }
597
598   if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
599      /* EX2 tmpA.y, src.x */
600      new_inst = tgsi_default_full_instruction();
601      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
602      new_inst.Instruction.NumDstRegs = 1;
603      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
604      new_inst.Instruction.NumSrcRegs = 1;
605      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
606      tctx->emit_instruction(tctx, &new_inst);
607   }
608
609   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
610      /* SUB dst.y, src.x, tmpA.x */
611      new_inst = tgsi_default_full_instruction();
612      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
613      new_inst.Instruction.NumDstRegs = 1;
614      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
615      new_inst.Instruction.NumSrcRegs = 2;
616      reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
617      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
618      new_inst.Src[1].Register.Negate = 1;
619      tctx->emit_instruction(tctx, &new_inst);
620   }
621
622   if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
623      /* EX2 dst.x, tmpA.x */
624      new_inst = tgsi_default_full_instruction();
625      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
626      new_inst.Instruction.NumDstRegs = 1;
627      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
628      new_inst.Instruction.NumSrcRegs = 1;
629      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
630      tctx->emit_instruction(tctx, &new_inst);
631   }
632
633   if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
634      /* MOV dst.z, tmpA.y */
635      new_inst = tgsi_default_full_instruction();
636      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
637      new_inst.Instruction.NumDstRegs = 1;
638      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
639      new_inst.Instruction.NumSrcRegs = 1;
640      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
641      tctx->emit_instruction(tctx, &new_inst);
642   }
643
644   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
645      /* MOV dst.w, imm{1.0} */
646      new_inst = tgsi_default_full_instruction();
647      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
648      new_inst.Instruction.NumDstRegs = 1;
649      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
650      new_inst.Instruction.NumSrcRegs = 1;
651      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
652      tctx->emit_instruction(tctx, &new_inst);
653   }
654}
655
656/* LOG - Approximate Logarithm Base 2
657 *  dst.x = \lfloor\log_2{|src.x|}\rfloor
658 *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
659 *  dst.z = \log_2{|src.x|}
660 *  dst.w = 1.0
661 *
662 * ; needs: 1 tmp, imm{1.0}
663 * LG2 tmpA.x, |src.x|
664 * if (lowering FLR) {
665 *   FRC tmpA.y, tmpA.x
666 *   SUB tmpA.y, tmpA.x, tmpA.y
667 * } else {
668 *   FLR tmpA.y, tmpA.x
669 * }
670 * EX2 tmpA.z, tmpA.y
671 * RCP tmpA.z, tmpA.z
672 * MUL dst.y, |src.x|, tmpA.z
673 * MOV dst.xz, tmpA.yx
674 * MOV dst.w, imm{1.0}
675 */
676#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
677		NINST(2) + NINST(1) + NINST(1) - OINST(1))
678#define LOG_TMP  1
679static void
680transform_log(struct tgsi_transform_context *tctx,
681              struct tgsi_full_instruction *inst)
682{
683   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
684   struct tgsi_full_dst_register *dst = &inst->Dst[0];
685   struct tgsi_full_src_register *src = &inst->Src[0];
686   struct tgsi_full_instruction new_inst;
687
688   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
689      /* LG2 tmpA.x, |src.x| */
690      new_inst = tgsi_default_full_instruction();
691      new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
692      new_inst.Instruction.NumDstRegs = 1;
693      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
694      new_inst.Instruction.NumSrcRegs = 1;
695      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
696      new_inst.Src[0].Register.Absolute = true;
697      tctx->emit_instruction(tctx, &new_inst);
698   }
699
700   if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
701      if (ctx->config->lower_FLR) {
702         /* FRC tmpA.y, tmpA.x */
703         new_inst = tgsi_default_full_instruction();
704         new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
705         new_inst.Instruction.NumDstRegs = 1;
706         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
707         new_inst.Instruction.NumSrcRegs = 1;
708         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
709         tctx->emit_instruction(tctx, &new_inst);
710
711         /* SUB tmpA.y, tmpA.x, tmpA.y */
712         new_inst = tgsi_default_full_instruction();
713         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
714         new_inst.Instruction.NumDstRegs = 1;
715         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
716         new_inst.Instruction.NumSrcRegs = 2;
717         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
718         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
719         new_inst.Src[1].Register.Negate = 1;
720         tctx->emit_instruction(tctx, &new_inst);
721      } else {
722         /* FLR tmpA.y, tmpA.x */
723         new_inst = tgsi_default_full_instruction();
724         new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
725         new_inst.Instruction.NumDstRegs = 1;
726         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
727         new_inst.Instruction.NumSrcRegs = 1;
728         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
729         tctx->emit_instruction(tctx, &new_inst);
730      }
731   }
732
733   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
734      /* EX2 tmpA.z, tmpA.y */
735      new_inst = tgsi_default_full_instruction();
736      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
737      new_inst.Instruction.NumDstRegs = 1;
738      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
739      new_inst.Instruction.NumSrcRegs = 1;
740      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
741      tctx->emit_instruction(tctx, &new_inst);
742
743      /* RCP tmpA.z, tmpA.z */
744      new_inst = tgsi_default_full_instruction();
745      new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
746      new_inst.Instruction.NumDstRegs = 1;
747      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
748      new_inst.Instruction.NumSrcRegs = 1;
749      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
750      tctx->emit_instruction(tctx, &new_inst);
751
752      /* MUL dst.y, |src.x|, tmpA.z */
753      new_inst = tgsi_default_full_instruction();
754      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
755      new_inst.Instruction.NumDstRegs = 1;
756      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
757      new_inst.Instruction.NumSrcRegs = 2;
758      reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
759      new_inst.Src[0].Register.Absolute = true;
760      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
761      tctx->emit_instruction(tctx, &new_inst);
762   }
763
764   if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
765      /* MOV dst.xz, tmpA.yx */
766      new_inst = tgsi_default_full_instruction();
767      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
768      new_inst.Instruction.NumDstRegs = 1;
769      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
770      new_inst.Instruction.NumSrcRegs = 1;
771      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
772      tctx->emit_instruction(tctx, &new_inst);
773   }
774
775   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
776      /* MOV dst.w, imm{1.0} */
777      new_inst = tgsi_default_full_instruction();
778      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
779      new_inst.Instruction.NumDstRegs = 1;
780      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
781      new_inst.Instruction.NumSrcRegs = 1;
782      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
783      tctx->emit_instruction(tctx, &new_inst);
784   }
785}
786
787/* DP4 - 4-component Dot Product
788 *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
789 *
790 * DP3 - 3-component Dot Product
791 *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
792 *
793 * DP2 - 2-component Dot Product
794 *   dst = src0.x \times src1.x + src0.y \times src1.y
795 *
796 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
797 * operations, which is what you'd prefer for a ISA that is natively
798 * scalar.  Probably a native vector ISA would at least already have
799 * DP4/DP3 instructions, but perhaps there is room for an alternative
800 * translation for DP2 using vector instructions.
801 *
802 * ; needs: 1 tmp
803 * MUL tmpA.x, src0.x, src1.x
804 * MAD tmpA.x, src0.y, src1.y, tmpA.x
805 * if (DP3 || DP4) {
806 *   MAD tmpA.x, src0.z, src1.z, tmpA.x
807 *   if (DP4) {
808 *     MAD tmpA.x, src0.w, src1.w, tmpA.x
809 *   }
810 * }
811 * ; fixup last instruction to replicate into dst
812 */
813#define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
814#define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
815#define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
816#define DOTP_TMP  1
817static void
818transform_dotp(struct tgsi_transform_context *tctx,
819               struct tgsi_full_instruction *inst)
820{
821   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
822   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
823   struct tgsi_full_src_register *src0 = &inst->Src[0];
824   struct tgsi_full_src_register *src1 = &inst->Src[1];
825   struct tgsi_full_instruction new_inst;
826   enum tgsi_opcode opcode = inst->Instruction.Opcode;
827
828   /* NOTE: any potential last instruction must replicate src on all
829    * components (since it could be re-written to write to final dst)
830    */
831
832   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
833      /* MUL tmpA.x, src0.x, src1.x */
834      new_inst = tgsi_default_full_instruction();
835      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
836      new_inst.Instruction.NumDstRegs = 1;
837      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
838      new_inst.Instruction.NumSrcRegs = 2;
839      reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
840      reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
841      tctx->emit_instruction(tctx, &new_inst);
842
843      /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
844      new_inst = tgsi_default_full_instruction();
845      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
846      new_inst.Instruction.NumDstRegs = 1;
847      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
848      new_inst.Instruction.NumSrcRegs = 3;
849      reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
850      reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
851      reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
852
853      if ((opcode == TGSI_OPCODE_DP3) ||
854          (opcode == TGSI_OPCODE_DP4)) {
855         tctx->emit_instruction(tctx, &new_inst);
856
857         /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
858         new_inst = tgsi_default_full_instruction();
859         new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
860         new_inst.Instruction.NumDstRegs = 1;
861         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
862         new_inst.Instruction.NumSrcRegs = 3;
863         reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
864         reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
865         reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
866
867         if (opcode == TGSI_OPCODE_DP4) {
868            tctx->emit_instruction(tctx, &new_inst);
869
870            /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
871            new_inst = tgsi_default_full_instruction();
872            new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
873            new_inst.Instruction.NumDstRegs = 1;
874            reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
875            new_inst.Instruction.NumSrcRegs = 3;
876            reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
877            reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
878            reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
879         }
880      }
881
882      /* fixup last instruction to write to dst: */
883      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
884
885      tctx->emit_instruction(tctx, &new_inst);
886   }
887}
888
889/* FLR - floor, CEIL - ceil
890 * ; needs: 1 tmp
891 * if (CEIL) {
892 *   FRC tmpA, -src
893 *   ADD dst, src, tmpA
894 * } else {
895 *   FRC tmpA, src
896 *   SUB dst, src, tmpA
897 * }
898 */
899#define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
900#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
901#define FLR_TMP 1
902#define CEIL_TMP 1
903static void
904transform_flr_ceil(struct tgsi_transform_context *tctx,
905                   struct tgsi_full_instruction *inst)
906{
907   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
908   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
909   struct tgsi_full_src_register *src0 = &inst->Src[0];
910   struct tgsi_full_instruction new_inst;
911   enum tgsi_opcode opcode = inst->Instruction.Opcode;
912
913   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
914      /* FLR: FRC tmpA, src  CEIL: FRC tmpA, -src */
915      new_inst = tgsi_default_full_instruction();
916      new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
917      new_inst.Instruction.NumDstRegs = 1;
918      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
919      new_inst.Instruction.NumSrcRegs = 1;
920      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
921
922      if (opcode == TGSI_OPCODE_CEIL)
923         new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
924      tctx->emit_instruction(tctx, &new_inst);
925
926      /* FLR: SUB dst, src, tmpA  CEIL: ADD dst, src, tmpA */
927      new_inst = tgsi_default_full_instruction();
928      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
929      new_inst.Instruction.NumDstRegs = 1;
930      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
931      new_inst.Instruction.NumSrcRegs = 2;
932      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
933      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
934      if (opcode == TGSI_OPCODE_FLR)
935         new_inst.Src[1].Register.Negate = 1;
936      tctx->emit_instruction(tctx, &new_inst);
937   }
938}
939
940/* TRUNC - truncate off fractional part
941 *  dst.x = trunc(src.x)
942 *  dst.y = trunc(src.y)
943 *  dst.z = trunc(src.z)
944 *  dst.w = trunc(src.w)
945 *
946 * ; needs: 1 tmp
947 * if (lower FLR) {
948 *   FRC tmpA, |src|
949 *   SUB tmpA, |src|, tmpA
950 * } else {
951 *   FLR tmpA, |src|
952 * }
953 * CMP dst, src, -tmpA, tmpA
954 */
955#define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
956#define TRUNC_TMP 1
957static void
958transform_trunc(struct tgsi_transform_context *tctx,
959                struct tgsi_full_instruction *inst)
960{
961   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
962   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
963   struct tgsi_full_src_register *src0 = &inst->Src[0];
964   struct tgsi_full_instruction new_inst;
965
966   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
967      if (ctx->config->lower_FLR) {
968         new_inst = tgsi_default_full_instruction();
969         new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
970         new_inst.Instruction.NumDstRegs = 1;
971         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
972         new_inst.Instruction.NumSrcRegs = 1;
973         reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
974         new_inst.Src[0].Register.Absolute = true;
975         new_inst.Src[0].Register.Negate = false;
976         tctx->emit_instruction(tctx, &new_inst);
977
978         new_inst = tgsi_default_full_instruction();
979         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
980         new_inst.Instruction.NumDstRegs = 1;
981         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
982         new_inst.Instruction.NumSrcRegs = 2;
983         reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
984         new_inst.Src[0].Register.Absolute = true;
985         new_inst.Src[0].Register.Negate = false;
986         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
987         new_inst.Src[1].Register.Negate = 1;
988         tctx->emit_instruction(tctx, &new_inst);
989      } else {
990         new_inst = tgsi_default_full_instruction();
991         new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
992         new_inst.Instruction.NumDstRegs = 1;
993         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
994         new_inst.Instruction.NumSrcRegs = 1;
995         reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
996         new_inst.Src[0].Register.Absolute = true;
997         new_inst.Src[0].Register.Negate = false;
998         tctx->emit_instruction(tctx, &new_inst);
999      }
1000
1001      new_inst = tgsi_default_full_instruction();
1002      new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1003      new_inst.Instruction.NumDstRegs = 1;
1004      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1005      new_inst.Instruction.NumSrcRegs = 3;
1006      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1007      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1008      new_inst.Src[1].Register.Negate = true;
1009      reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1010      tctx->emit_instruction(tctx, &new_inst);
1011   }
1012}
1013
1014/* Inserts a MOV_SAT for the needed components of tex coord.  Note that
1015 * in the case of TXP, the clamping must happen *after* projection, so
1016 * we need to lower TXP to TEX.
1017 *
1018 *   MOV tmpA, src0
1019 *   if (opc == TXP) {
1020 *     ; do perspective division manually before clamping:
1021 *     RCP tmpB, tmpA.w
1022 *     MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1023 *     opc = TEX;
1024 *   }
1025 *   MOV_SAT tmpA.<mask>, tmpA  ; <mask> is the clamped s/t/r coords
1026 *   <opc> dst, tmpA, ...
1027 */
1028#define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1029#define SAMP_TMP  2
1030static int
1031transform_samp(struct tgsi_transform_context *tctx,
1032               struct tgsi_full_instruction *inst)
1033{
1034   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1035   struct tgsi_full_src_register *coord = &inst->Src[0];
1036   struct tgsi_full_src_register *samp;
1037   struct tgsi_full_instruction new_inst;
1038   /* mask is clamped coords, pmask is all coords (for projection): */
1039   unsigned mask = 0, pmask = 0, smask;
1040   unsigned tex = inst->Texture.Texture;
1041   enum tgsi_opcode opcode = inst->Instruction.Opcode;
1042   bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1043		   (ctx->config->lower_TXP & (1 << tex));
1044
1045   if (opcode == TGSI_OPCODE_TXB2) {
1046      samp = &inst->Src[2];
1047   } else {
1048      samp = &inst->Src[1];
1049   }
1050
1051   /* convert sampler # to bitmask to test: */
1052   smask = 1 << samp->Register.Index;
1053
1054   /* check if we actually need to lower this one: */
1055   if (!(ctx->saturate & smask) && !lower_txp)
1056      return -1;
1057
1058   /* figure out which coordinates need saturating:
1059    *   - RECT textures should not get saturated
1060    *   - array index coords should not get saturated
1061    */
1062   switch (tex) {
1063   case TGSI_TEXTURE_3D:
1064   case TGSI_TEXTURE_CUBE:
1065   case TGSI_TEXTURE_CUBE_ARRAY:
1066   case TGSI_TEXTURE_SHADOWCUBE:
1067   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1068      if (ctx->config->saturate_r & smask)
1069         mask |= TGSI_WRITEMASK_Z;
1070      pmask |= TGSI_WRITEMASK_Z;
1071      /* fallthrough */
1072
1073   case TGSI_TEXTURE_2D:
1074   case TGSI_TEXTURE_2D_ARRAY:
1075   case TGSI_TEXTURE_SHADOW2D:
1076   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1077   case TGSI_TEXTURE_2D_MSAA:
1078   case TGSI_TEXTURE_2D_ARRAY_MSAA:
1079      if (ctx->config->saturate_t & smask)
1080         mask |= TGSI_WRITEMASK_Y;
1081      pmask |= TGSI_WRITEMASK_Y;
1082      /* fallthrough */
1083
1084   case TGSI_TEXTURE_1D:
1085   case TGSI_TEXTURE_1D_ARRAY:
1086   case TGSI_TEXTURE_SHADOW1D:
1087   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1088      if (ctx->config->saturate_s & smask)
1089         mask |= TGSI_WRITEMASK_X;
1090      pmask |= TGSI_WRITEMASK_X;
1091      break;
1092
1093   case TGSI_TEXTURE_RECT:
1094   case TGSI_TEXTURE_SHADOWRECT:
1095      /* we don't saturate, but in case of lower_txp we
1096       * still need to do the perspective divide:
1097       */
1098       pmask = TGSI_WRITEMASK_XY;
1099       break;
1100   }
1101
1102   /* sanity check.. driver could be asking to saturate a non-
1103    * existent coordinate component:
1104    */
1105   if (!mask && !lower_txp)
1106      return -1;
1107
1108   /* MOV tmpA, src0 */
1109   create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1110
1111   /* This is a bit sad.. we need to clamp *after* the coords
1112    * are projected, which means lowering TXP to TEX and doing
1113    * the projection ourself.  But since I haven't figured out
1114    * how to make the lowering code deliver an electric shock
1115    * to anyone using GL_CLAMP, we must do this instead:
1116    */
1117   if (opcode == TGSI_OPCODE_TXP) {
1118      /* RCP tmpB.x tmpA.w */
1119      new_inst = tgsi_default_full_instruction();
1120      new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1121      new_inst.Instruction.NumDstRegs = 1;
1122      reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1123      new_inst.Instruction.NumSrcRegs = 1;
1124      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1125      tctx->emit_instruction(tctx, &new_inst);
1126
1127      /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1128      new_inst = tgsi_default_full_instruction();
1129      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1130      new_inst.Instruction.NumDstRegs = 1;
1131      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1132      new_inst.Instruction.NumSrcRegs = 2;
1133      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1134      reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1135      tctx->emit_instruction(tctx, &new_inst);
1136
1137      opcode = TGSI_OPCODE_TEX;
1138   }
1139
1140   /* MOV_SAT tmpA.<mask>, tmpA */
1141   if (mask) {
1142      create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
1143   }
1144
1145   /* modify the texture samp instruction to take fixed up coord: */
1146   new_inst = *inst;
1147   new_inst.Instruction.Opcode = opcode;
1148   new_inst.Src[0] = ctx->tmp[A].src;
1149   tctx->emit_instruction(tctx, &new_inst);
1150
1151   return 0;
1152}
1153
1154/* Two-sided color emulation:
1155 * For each COLOR input, create a corresponding BCOLOR input, plus
1156 * CMP instruction to select front or back color based on FACE
1157 */
1158#define TWOSIDE_GROW(n)  (                      \
1159      2 +         /* FACE */                    \
1160      ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1161      ((n) * 1) + /* TEMP[] */                  \
1162      ((n) * NINST(3))   /* CMP instr */        \
1163      )
1164
1165static void
1166emit_twoside(struct tgsi_transform_context *tctx)
1167{
1168   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1169   struct tgsi_shader_info *info = ctx->info;
1170   struct tgsi_full_declaration decl;
1171   struct tgsi_full_instruction new_inst;
1172   unsigned inbase, tmpbase;
1173   unsigned i;
1174
1175   inbase  = info->file_max[TGSI_FILE_INPUT] + 1;
1176   tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1177
1178   /* additional inputs for BCOLOR's */
1179   for (i = 0; i < ctx->two_side_colors; i++) {
1180      unsigned in_idx = ctx->two_side_idx[i];
1181      decl = tgsi_default_full_declaration();
1182      decl.Declaration.File = TGSI_FILE_INPUT;
1183      decl.Declaration.Semantic = true;
1184      decl.Range.First = decl.Range.Last = inbase + i;
1185      decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1186      decl.Semantic.Index = info->input_semantic_index[in_idx];
1187      decl.Declaration.Interpolate = true;
1188      decl.Interp.Interpolate = info->input_interpolate[in_idx];
1189      decl.Interp.Location = info->input_interpolate_loc[in_idx];
1190      decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx];
1191      tctx->emit_declaration(tctx, &decl);
1192   }
1193
1194   /* additional input for FACE */
1195   if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1196      decl = tgsi_default_full_declaration();
1197      decl.Declaration.File = TGSI_FILE_INPUT;
1198      decl.Declaration.Semantic = true;
1199      decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1200      decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1201      decl.Semantic.Index = 0;
1202      tctx->emit_declaration(tctx, &decl);
1203
1204      ctx->face_idx = decl.Range.First;
1205   }
1206
1207   /* additional temps for COLOR/BCOLOR selection: */
1208   for (i = 0; i < ctx->two_side_colors; i++) {
1209      decl = tgsi_default_full_declaration();
1210      decl.Declaration.File = TGSI_FILE_TEMPORARY;
1211      decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1212      tctx->emit_declaration(tctx, &decl);
1213   }
1214
1215   /* and finally additional instructions to select COLOR/BCOLOR: */
1216   for (i = 0; i < ctx->two_side_colors; i++) {
1217      new_inst = tgsi_default_full_instruction();
1218      new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1219
1220      new_inst.Instruction.NumDstRegs = 1;
1221      new_inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
1222      new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1223      new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1224
1225      new_inst.Instruction.NumSrcRegs = 3;
1226      new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
1227      new_inst.Src[0].Register.Index = ctx->face_idx;
1228      new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1229      new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1230      new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1231      new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1232      new_inst.Src[1].Register.File  = TGSI_FILE_INPUT;
1233      new_inst.Src[1].Register.Index = inbase + i;
1234      new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1235      new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1236      new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1237      new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1238      new_inst.Src[2].Register.File  = TGSI_FILE_INPUT;
1239      new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1240      new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1241      new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1242      new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1243      new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1244
1245      tctx->emit_instruction(tctx, &new_inst);
1246   }
1247}
1248
1249static void
1250emit_decls(struct tgsi_transform_context *tctx)
1251{
1252   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1253   struct tgsi_shader_info *info = ctx->info;
1254   struct tgsi_full_declaration decl;
1255   struct tgsi_full_immediate immed;
1256   unsigned tmpbase;
1257   unsigned i;
1258
1259   tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1260
1261   ctx->color_base = tmpbase + ctx->numtmp;
1262
1263   /* declare immediate: */
1264   immed = tgsi_default_full_immediate();
1265   immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1266   immed.u[0].Float = 0.0;
1267   immed.u[1].Float = 1.0;
1268   immed.u[2].Float = 128.0;
1269   immed.u[3].Float = 0.0;
1270   tctx->emit_immediate(tctx, &immed);
1271
1272   ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1273   ctx->imm.Register.Index = info->immediate_count;
1274   ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1275   ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1276   ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1277   ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1278
1279   /* declare temp regs: */
1280   for (i = 0; i < ctx->numtmp; i++) {
1281      decl = tgsi_default_full_declaration();
1282      decl.Declaration.File = TGSI_FILE_TEMPORARY;
1283      decl.Range.First = decl.Range.Last = tmpbase + i;
1284      tctx->emit_declaration(tctx, &decl);
1285
1286      ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
1287      ctx->tmp[i].src.Register.Index = tmpbase + i;
1288      ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1289      ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1290      ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1291      ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1292
1293      ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
1294      ctx->tmp[i].dst.Register.Index = tmpbase + i;
1295      ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1296   }
1297
1298   if (ctx->two_side_colors)
1299      emit_twoside(tctx);
1300}
1301
1302static void
1303rename_color_inputs(struct tgsi_lowering_context *ctx,
1304                    struct tgsi_full_instruction *inst)
1305{
1306   unsigned i, j;
1307   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1308      struct tgsi_src_register *src = &inst->Src[i].Register;
1309      if (src->File == TGSI_FILE_INPUT) {
1310         for (j = 0; j < ctx->two_side_colors; j++) {
1311	    if (src->Index == (int)ctx->two_side_idx[j]) {
1312               src->File = TGSI_FILE_TEMPORARY;
1313               src->Index = ctx->color_base + j;
1314               break;
1315            }
1316         }
1317      }
1318   }
1319
1320}
1321
1322static void
1323transform_instr(struct tgsi_transform_context *tctx,
1324		struct tgsi_full_instruction *inst)
1325{
1326   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1327
1328   if (!ctx->emitted_decls) {
1329      emit_decls(tctx);
1330      ctx->emitted_decls = 1;
1331   }
1332
1333   /* if emulating two-sided-color, we need to re-write some
1334    * src registers:
1335    */
1336   if (ctx->two_side_colors)
1337      rename_color_inputs(ctx, inst);
1338
1339   switch (inst->Instruction.Opcode) {
1340   case TGSI_OPCODE_DST:
1341      if (!ctx->config->lower_DST)
1342         goto skip;
1343      transform_dst(tctx, inst);
1344      break;
1345   case TGSI_OPCODE_LRP:
1346      if (!ctx->config->lower_LRP)
1347         goto skip;
1348      transform_lrp(tctx, inst);
1349      break;
1350   case TGSI_OPCODE_FRC:
1351      if (!ctx->config->lower_FRC)
1352         goto skip;
1353      transform_frc(tctx, inst);
1354      break;
1355   case TGSI_OPCODE_POW:
1356      if (!ctx->config->lower_POW)
1357         goto skip;
1358      transform_pow(tctx, inst);
1359      break;
1360   case TGSI_OPCODE_LIT:
1361      if (!ctx->config->lower_LIT)
1362         goto skip;
1363      transform_lit(tctx, inst);
1364      break;
1365   case TGSI_OPCODE_EXP:
1366      if (!ctx->config->lower_EXP)
1367         goto skip;
1368      transform_exp(tctx, inst);
1369      break;
1370   case TGSI_OPCODE_LOG:
1371      if (!ctx->config->lower_LOG)
1372         goto skip;
1373      transform_log(tctx, inst);
1374      break;
1375   case TGSI_OPCODE_DP4:
1376      if (!ctx->config->lower_DP4)
1377         goto skip;
1378      transform_dotp(tctx, inst);
1379      break;
1380   case TGSI_OPCODE_DP3:
1381      if (!ctx->config->lower_DP3)
1382         goto skip;
1383      transform_dotp(tctx, inst);
1384      break;
1385   case TGSI_OPCODE_DP2:
1386      if (!ctx->config->lower_DP2)
1387         goto skip;
1388      transform_dotp(tctx, inst);
1389      break;
1390   case TGSI_OPCODE_FLR:
1391      if (!ctx->config->lower_FLR)
1392         goto skip;
1393      transform_flr_ceil(tctx, inst);
1394      break;
1395   case TGSI_OPCODE_CEIL:
1396      if (!ctx->config->lower_CEIL)
1397         goto skip;
1398      transform_flr_ceil(tctx, inst);
1399      break;
1400   case TGSI_OPCODE_TRUNC:
1401      if (!ctx->config->lower_TRUNC)
1402         goto skip;
1403      transform_trunc(tctx, inst);
1404      break;
1405   case TGSI_OPCODE_TEX:
1406   case TGSI_OPCODE_TXP:
1407   case TGSI_OPCODE_TXB:
1408   case TGSI_OPCODE_TXB2:
1409   case TGSI_OPCODE_TXL:
1410      if (transform_samp(tctx, inst))
1411         goto skip;
1412      break;
1413   default:
1414   skip:
1415      tctx->emit_instruction(tctx, inst);
1416      break;
1417   }
1418}
1419
1420/* returns NULL if no lowering required, else returns the new
1421 * tokens (which caller is required to free()).  In either case
1422 * returns the current info.
1423 */
1424const struct tgsi_token *
1425tgsi_transform_lowering(const struct tgsi_lowering_config *config,
1426                        const struct tgsi_token *tokens,
1427                        struct tgsi_shader_info *info)
1428{
1429   struct tgsi_lowering_context ctx;
1430   struct tgsi_token *newtoks;
1431   int newlen, numtmp;
1432
1433   /* sanity check in case limit is ever increased: */
1434   STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1435
1436   /* sanity check the lowering */
1437   assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
1438   assert(!(config->lower_FRC && config->lower_TRUNC));
1439
1440   memset(&ctx, 0, sizeof(ctx));
1441   ctx.base.transform_instruction = transform_instr;
1442   ctx.info = info;
1443   ctx.config = config;
1444
1445   tgsi_scan_shader(tokens, info);
1446
1447   /* if we are adding fragment shader support to emulate two-sided
1448    * color, then figure out the number of additional inputs we need
1449    * to create for BCOLOR's..
1450    */
1451   if ((info->processor == PIPE_SHADER_FRAGMENT) &&
1452       config->color_two_side) {
1453      int i;
1454      ctx.face_idx = -1;
1455      for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1456         if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1457            ctx.two_side_idx[ctx.two_side_colors++] = i;
1458         if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1459            ctx.face_idx = i;
1460      }
1461   }
1462
1463   ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1464
1465#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1466   /* if there are no instructions to lower, then we are done: */
1467   if (!(OPCS(DST) ||
1468         OPCS(LRP) ||
1469         OPCS(FRC) ||
1470         OPCS(POW) ||
1471         OPCS(LIT) ||
1472         OPCS(EXP) ||
1473         OPCS(LOG) ||
1474         OPCS(DP4) ||
1475         OPCS(DP3) ||
1476         OPCS(DP2) ||
1477         OPCS(FLR) ||
1478         OPCS(CEIL) ||
1479         OPCS(TRUNC) ||
1480         OPCS(TXP) ||
1481         ctx.two_side_colors ||
1482         ctx.saturate))
1483      return NULL;
1484
1485#if 0  /* debug */
1486   _debug_printf("BEFORE:");
1487   tgsi_dump(tokens, 0);
1488#endif
1489
1490   numtmp = 0;
1491   newlen = tgsi_num_tokens(tokens);
1492   if (OPCS(DST)) {
1493      newlen += DST_GROW * OPCS(DST);
1494      numtmp = MAX2(numtmp, DST_TMP);
1495   }
1496   if (OPCS(LRP)) {
1497      newlen += LRP_GROW * OPCS(LRP);
1498      numtmp = MAX2(numtmp, LRP_TMP);
1499   }
1500   if (OPCS(FRC)) {
1501      newlen += FRC_GROW * OPCS(FRC);
1502      numtmp = MAX2(numtmp, FRC_TMP);
1503   }
1504   if (OPCS(POW)) {
1505      newlen += POW_GROW * OPCS(POW);
1506      numtmp = MAX2(numtmp, POW_TMP);
1507   }
1508   if (OPCS(LIT)) {
1509      newlen += LIT_GROW * OPCS(LIT);
1510      numtmp = MAX2(numtmp, LIT_TMP);
1511   }
1512   if (OPCS(EXP)) {
1513      newlen += EXP_GROW * OPCS(EXP);
1514      numtmp = MAX2(numtmp, EXP_TMP);
1515   }
1516   if (OPCS(LOG)) {
1517      newlen += LOG_GROW * OPCS(LOG);
1518      numtmp = MAX2(numtmp, LOG_TMP);
1519   }
1520   if (OPCS(DP4)) {
1521      newlen += DP4_GROW * OPCS(DP4);
1522      numtmp = MAX2(numtmp, DOTP_TMP);
1523   }
1524   if (OPCS(DP3)) {
1525      newlen += DP3_GROW * OPCS(DP3);
1526      numtmp = MAX2(numtmp, DOTP_TMP);
1527   }
1528   if (OPCS(DP2)) {
1529      newlen += DP2_GROW * OPCS(DP2);
1530      numtmp = MAX2(numtmp, DOTP_TMP);
1531   }
1532   if (OPCS(FLR)) {
1533      newlen += FLR_GROW * OPCS(FLR);
1534      numtmp = MAX2(numtmp, FLR_TMP);
1535   }
1536   if (OPCS(CEIL)) {
1537      newlen += CEIL_GROW * OPCS(CEIL);
1538      numtmp = MAX2(numtmp, CEIL_TMP);
1539   }
1540   if (OPCS(TRUNC)) {
1541      newlen += TRUNC_GROW * OPCS(TRUNC);
1542      numtmp = MAX2(numtmp, TRUNC_TMP);
1543   }
1544   if (ctx.saturate || config->lower_TXP) {
1545      int n = 0;
1546
1547      if (ctx.saturate) {
1548         n = info->opcode_count[TGSI_OPCODE_TEX] +
1549            info->opcode_count[TGSI_OPCODE_TXP] +
1550            info->opcode_count[TGSI_OPCODE_TXB] +
1551            info->opcode_count[TGSI_OPCODE_TXB2] +
1552            info->opcode_count[TGSI_OPCODE_TXL];
1553      } else if (config->lower_TXP) {
1554          n = info->opcode_count[TGSI_OPCODE_TXP];
1555      }
1556
1557      newlen += SAMP_GROW * n;
1558      numtmp = MAX2(numtmp, SAMP_TMP);
1559   }
1560
1561   /* specifically don't include two_side_colors temps in the count: */
1562   ctx.numtmp = numtmp;
1563
1564   if (ctx.two_side_colors) {
1565      newlen += TWOSIDE_GROW(ctx.two_side_colors);
1566      /* note: we permanently consume temp regs, re-writing references
1567       * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1568       * instruction that selects which varying to use):
1569       */
1570      numtmp += ctx.two_side_colors;
1571   }
1572
1573   newlen += 2 * numtmp;
1574   newlen += 5;        /* immediate */
1575
1576   newtoks = tgsi_alloc_tokens(newlen);
1577   if (!newtoks)
1578      return NULL;
1579
1580   tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1581
1582   tgsi_scan_shader(newtoks, info);
1583
1584#if 0  /* debug */
1585   _debug_printf("AFTER:");
1586   tgsi_dump(newtoks, 0);
1587#endif
1588
1589   return newtoks;
1590}
1591