1b8e80941Smrg/*
2b8e80941Smrg * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b8e80941Smrg * SOFTWARE.
22b8e80941Smrg *
23b8e80941Smrg * Authors:
24b8e80941Smrg *    Rob Clark <robclark@freedesktop.org>
25b8e80941Smrg */
26b8e80941Smrg
27b8e80941Smrg#include "tgsi/tgsi_transform.h"
28b8e80941Smrg#include "tgsi/tgsi_scan.h"
29b8e80941Smrg#include "tgsi/tgsi_dump.h"
30b8e80941Smrg
31b8e80941Smrg#include "util/u_debug.h"
32b8e80941Smrg#include "util/u_math.h"
33b8e80941Smrg
34b8e80941Smrg#include "tgsi_lowering.h"
35b8e80941Smrg
36b8e80941Smrgstruct tgsi_lowering_context {
37b8e80941Smrg   struct tgsi_transform_context base;
38b8e80941Smrg   const struct tgsi_lowering_config *config;
39b8e80941Smrg   struct tgsi_shader_info *info;
40b8e80941Smrg   unsigned two_side_colors;
41b8e80941Smrg   unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
42b8e80941Smrg   unsigned color_base;  /* base register for chosen COLOR/BCOLOR's */
43b8e80941Smrg   int face_idx;
44b8e80941Smrg   unsigned numtmp;
45b8e80941Smrg   struct {
46b8e80941Smrg      struct tgsi_full_src_register src;
47b8e80941Smrg      struct tgsi_full_dst_register dst;
48b8e80941Smrg   } tmp[2];
49b8e80941Smrg#define A 0
50b8e80941Smrg#define B 1
51b8e80941Smrg   struct tgsi_full_src_register imm;
52b8e80941Smrg   int emitted_decls;
53b8e80941Smrg   unsigned saturate;
54b8e80941Smrg};
55b8e80941Smrg
56b8e80941Smrgstatic inline struct tgsi_lowering_context *
57b8e80941Smrgtgsi_lowering_context(struct tgsi_transform_context *tctx)
58b8e80941Smrg{
59b8e80941Smrg   return (struct tgsi_lowering_context *)tctx;
60b8e80941Smrg}
61b8e80941Smrg
62b8e80941Smrg/*
63b8e80941Smrg * Utility helpers:
64b8e80941Smrg */
65b8e80941Smrg
66b8e80941Smrgstatic void
67b8e80941Smrgreg_dst(struct tgsi_full_dst_register *dst,
68b8e80941Smrg	const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
69b8e80941Smrg{
70b8e80941Smrg   *dst = *orig_dst;
71b8e80941Smrg   dst->Register.WriteMask &= wrmask;
72b8e80941Smrg   assert(dst->Register.WriteMask);
73b8e80941Smrg}
74b8e80941Smrg
75b8e80941Smrgstatic inline void
76b8e80941Smrgget_swiz(unsigned *swiz, const struct tgsi_src_register *src)
77b8e80941Smrg{
78b8e80941Smrg   swiz[0] = src->SwizzleX;
79b8e80941Smrg   swiz[1] = src->SwizzleY;
80b8e80941Smrg   swiz[2] = src->SwizzleZ;
81b8e80941Smrg   swiz[3] = src->SwizzleW;
82b8e80941Smrg}
83b8e80941Smrg
84b8e80941Smrgstatic void
85b8e80941Smrgreg_src(struct tgsi_full_src_register *src,
86b8e80941Smrg	const struct tgsi_full_src_register *orig_src,
87b8e80941Smrg	unsigned sx, unsigned sy, unsigned sz, unsigned sw)
88b8e80941Smrg{
89b8e80941Smrg   unsigned swiz[4];
90b8e80941Smrg   get_swiz(swiz, &orig_src->Register);
91b8e80941Smrg   *src = *orig_src;
92b8e80941Smrg   src->Register.SwizzleX = swiz[sx];
93b8e80941Smrg   src->Register.SwizzleY = swiz[sy];
94b8e80941Smrg   src->Register.SwizzleZ = swiz[sz];
95b8e80941Smrg   src->Register.SwizzleW = swiz[sw];
96b8e80941Smrg}
97b8e80941Smrg
98b8e80941Smrg#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
99b8e80941Smrg#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
100b8e80941Smrg      TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
101b8e80941Smrg
102b8e80941Smrg/*
103b8e80941Smrg * if (dst.x aliases src.x) {
104b8e80941Smrg *   MOV tmpA.x, src.x
105b8e80941Smrg *   src = tmpA
106b8e80941Smrg * }
107b8e80941Smrg * COS dst.x, src.x
108b8e80941Smrg * SIN dst.y, src.x
109b8e80941Smrg * MOV dst.zw, imm{0.0, 1.0}
110b8e80941Smrg */
111b8e80941Smrgstatic bool
112b8e80941Smrgaliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
113b8e80941Smrg	const struct tgsi_full_src_register *src, unsigned src_mask)
114b8e80941Smrg{
115b8e80941Smrg   if ((dst->Register.File == src->Register.File) &&
116b8e80941Smrg       (dst->Register.Index == src->Register.Index)) {
117b8e80941Smrg      unsigned i, actual_mask = 0;
118b8e80941Smrg      unsigned swiz[4];
119b8e80941Smrg      get_swiz(swiz, &src->Register);
120b8e80941Smrg      for (i = 0; i < 4; i++)
121b8e80941Smrg         if (src_mask & (1 << i))
122b8e80941Smrg            actual_mask |= (1 << swiz[i]);
123b8e80941Smrg      if (actual_mask & dst_mask)
124b8e80941Smrg         return true;
125b8e80941Smrg   }
126b8e80941Smrg   return false;
127b8e80941Smrg}
128b8e80941Smrg
129b8e80941Smrgstatic void
130b8e80941Smrgcreate_mov(struct tgsi_transform_context *tctx,
131b8e80941Smrg           const struct tgsi_full_dst_register *dst,
132b8e80941Smrg           const struct tgsi_full_src_register *src,
133b8e80941Smrg           unsigned mask, unsigned saturate)
134b8e80941Smrg{
135b8e80941Smrg   struct tgsi_full_instruction new_inst;
136b8e80941Smrg
137b8e80941Smrg   new_inst = tgsi_default_full_instruction();
138b8e80941Smrg   new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
139b8e80941Smrg   new_inst.Instruction.Saturate = saturate;
140b8e80941Smrg   new_inst.Instruction.NumDstRegs = 1;
141b8e80941Smrg   reg_dst(&new_inst.Dst[0], dst, mask);
142b8e80941Smrg   new_inst.Instruction.NumSrcRegs = 1;
143b8e80941Smrg   reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
144b8e80941Smrg   tctx->emit_instruction(tctx, &new_inst);
145b8e80941Smrg}
146b8e80941Smrg
147b8e80941Smrg/* to help calculate # of tgsi tokens for a lowering.. we assume
148b8e80941Smrg * the worst case, ie. removed instructions don't have ADDR[] or
149b8e80941Smrg * anything which increases the # of tokens per src/dst and the
150b8e80941Smrg * inserted instructions do.
151b8e80941Smrg *
152b8e80941Smrg * OINST() - old instruction
153b8e80941Smrg *    1         : instruction itself
154b8e80941Smrg *    1         : dst
155b8e80941Smrg *    1 * nargs : srcN
156b8e80941Smrg *
157b8e80941Smrg * NINST() - new instruction
158b8e80941Smrg *    1         : instruction itself
159b8e80941Smrg *    2         : dst
160b8e80941Smrg *    2 * nargs : srcN
161b8e80941Smrg */
162b8e80941Smrg
163b8e80941Smrg#define OINST(nargs)  (1 + 1 + 1 * (nargs))
164b8e80941Smrg#define NINST(nargs)  (1 + 2 + 2 * (nargs))
165b8e80941Smrg
166b8e80941Smrg/*
167b8e80941Smrg * Lowering Translators:
168b8e80941Smrg */
169b8e80941Smrg
170b8e80941Smrg/* DST - Distance Vector
171b8e80941Smrg *   dst.x = 1.0
172b8e80941Smrg *   dst.y = src0.y \times src1.y
173b8e80941Smrg *   dst.z = src0.z
174b8e80941Smrg *   dst.w = src1.w
175b8e80941Smrg *
176b8e80941Smrg * ; note: could be more clever and use just a single temp
177b8e80941Smrg * ;       if I was clever enough to re-write the swizzles.
178b8e80941Smrg * ; needs: 2 tmp, imm{1.0}
179b8e80941Smrg * if (dst.y aliases src0.z) {
180b8e80941Smrg *   MOV tmpA.yz, src0.yz
181b8e80941Smrg *   src0 = tmpA
182b8e80941Smrg * }
183b8e80941Smrg * if (dst.yz aliases src1.w) {
184b8e80941Smrg *   MOV tmpB.yw, src1.yw
185b8e80941Smrg *   src1 = tmpB
186b8e80941Smrg * }
187b8e80941Smrg * MUL dst.y, src0.y, src1.y
188b8e80941Smrg * MOV dst.z, src0.z
189b8e80941Smrg * MOV dst.w, src1.w
190b8e80941Smrg * MOV dst.x, imm{1.0}
191b8e80941Smrg */
192b8e80941Smrg#define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
193b8e80941Smrg		NINST(1) + NINST(1) - OINST(2))
194b8e80941Smrg#define DST_TMP  2
195b8e80941Smrgstatic void
196b8e80941Smrgtransform_dst(struct tgsi_transform_context *tctx,
197b8e80941Smrg              struct tgsi_full_instruction *inst)
198b8e80941Smrg{
199b8e80941Smrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
200b8e80941Smrg   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
201b8e80941Smrg   struct tgsi_full_src_register *src0 = &inst->Src[0];
202b8e80941Smrg   struct tgsi_full_src_register *src1 = &inst->Src[1];
203b8e80941Smrg   struct tgsi_full_instruction new_inst;
204b8e80941Smrg
205b8e80941Smrg   if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
206b8e80941Smrg      create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
207b8e80941Smrg      src0 = &ctx->tmp[A].src;
208b8e80941Smrg   }
209b8e80941Smrg
210b8e80941Smrg   if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
211b8e80941Smrg      create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
212b8e80941Smrg      src1 = &ctx->tmp[B].src;
213b8e80941Smrg   }
214b8e80941Smrg
215b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
216b8e80941Smrg      /* MUL dst.y, src0.y, src1.y */
217b8e80941Smrg      new_inst = tgsi_default_full_instruction();
218b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
219b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
220b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
221b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 2;
222b8e80941Smrg      reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
223b8e80941Smrg      reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
224b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
225b8e80941Smrg   }
226b8e80941Smrg
227b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
228b8e80941Smrg      /* MOV dst.z, src0.z */
229b8e80941Smrg      new_inst = tgsi_default_full_instruction();
230b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
231b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
232b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
233b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
234b8e80941Smrg      reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
235b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
236b8e80941Smrg   }
237b8e80941Smrg
238b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
239b8e80941Smrg      /* MOV dst.w, src1.w */
240b8e80941Smrg      new_inst = tgsi_default_full_instruction();
241b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
242b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
243b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
244b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
245b8e80941Smrg      reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
246b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
247b8e80941Smrg   }
248b8e80941Smrg
249b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
250b8e80941Smrg      /* MOV dst.x, imm{1.0} */
251b8e80941Smrg      new_inst = tgsi_default_full_instruction();
252b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
253b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
254b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
255b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
256b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
257b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
258b8e80941Smrg   }
259b8e80941Smrg}
260b8e80941Smrg
261b8e80941Smrg/* LRP - Linear Interpolate
262b8e80941Smrg *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
263b8e80941Smrg *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
264b8e80941Smrg *  dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
265b8e80941Smrg *  dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
266b8e80941Smrg *
267b8e80941Smrg * This becomes: src0 \times src1 + src2 - src0 \times src2, which
268b8e80941Smrg * can then become: src0 \times src1 - (src0 \times src2 - src2)
269b8e80941Smrg *
270b8e80941Smrg * ; needs: 1 tmp
271b8e80941Smrg * MAD tmpA, src0, src2, -src2
272b8e80941Smrg * MAD dst, src0, src1, -tmpA
273b8e80941Smrg */
274b8e80941Smrg#define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
275b8e80941Smrg#define LRP_TMP  1
276b8e80941Smrgstatic void
277b8e80941Smrgtransform_lrp(struct tgsi_transform_context *tctx,
278b8e80941Smrg              struct tgsi_full_instruction *inst)
279b8e80941Smrg{
280b8e80941Smrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
281b8e80941Smrg   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
282b8e80941Smrg   struct tgsi_full_src_register *src0 = &inst->Src[0];
283b8e80941Smrg   struct tgsi_full_src_register *src1 = &inst->Src[1];
284b8e80941Smrg   struct tgsi_full_src_register *src2 = &inst->Src[2];
285b8e80941Smrg   struct tgsi_full_instruction new_inst;
286b8e80941Smrg
287b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
288b8e80941Smrg      /* MAD tmpA, src0, src2, -src2 */
289b8e80941Smrg      new_inst = tgsi_default_full_instruction();
290b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
291b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
292b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
293b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 3;
294b8e80941Smrg      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
295b8e80941Smrg      reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
296b8e80941Smrg      reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
297b8e80941Smrg      new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
298b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
299b8e80941Smrg
300b8e80941Smrg      /* MAD dst, src0, src1, -tmpA */
301b8e80941Smrg      new_inst = tgsi_default_full_instruction();
302b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
303b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
304b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
305b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 3;
306b8e80941Smrg      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
307b8e80941Smrg      reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
308b8e80941Smrg      reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
309b8e80941Smrg      new_inst.Src[2].Register.Negate = true;
310b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
311b8e80941Smrg   }
312b8e80941Smrg}
313b8e80941Smrg
314b8e80941Smrg/* FRC - Fraction
315b8e80941Smrg *  dst.x = src.x - \lfloor src.x\rfloor
316b8e80941Smrg *  dst.y = src.y - \lfloor src.y\rfloor
317b8e80941Smrg *  dst.z = src.z - \lfloor src.z\rfloor
318b8e80941Smrg *  dst.w = src.w - \lfloor src.w\rfloor
319b8e80941Smrg *
320b8e80941Smrg * ; needs: 1 tmp
321b8e80941Smrg * FLR tmpA, src
322b8e80941Smrg * SUB dst, src, tmpA
323b8e80941Smrg */
324b8e80941Smrg#define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
325b8e80941Smrg#define FRC_TMP  1
326b8e80941Smrgstatic void
327b8e80941Smrgtransform_frc(struct tgsi_transform_context *tctx,
328b8e80941Smrg              struct tgsi_full_instruction *inst)
329b8e80941Smrg{
330b8e80941Smrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
331b8e80941Smrg   struct tgsi_full_dst_register *dst = &inst->Dst[0];
332b8e80941Smrg   struct tgsi_full_src_register *src = &inst->Src[0];
333b8e80941Smrg   struct tgsi_full_instruction new_inst;
334b8e80941Smrg
335b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
336b8e80941Smrg      /* FLR tmpA, src */
337b8e80941Smrg      new_inst = tgsi_default_full_instruction();
338b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
339b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
340b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
341b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
342b8e80941Smrg      reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
343b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
344b8e80941Smrg
345b8e80941Smrg      /* SUB dst, src, tmpA */
346b8e80941Smrg      new_inst = tgsi_default_full_instruction();
347b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
348b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
349b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
350b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 2;
351b8e80941Smrg      reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
352b8e80941Smrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
353b8e80941Smrg      new_inst.Src[1].Register.Negate = 1;
354b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
355b8e80941Smrg   }
356b8e80941Smrg}
357b8e80941Smrg
358b8e80941Smrg/* POW - Power
359b8e80941Smrg *  dst.x = src0.x^{src1.x}
360b8e80941Smrg *  dst.y = src0.x^{src1.x}
361b8e80941Smrg *  dst.z = src0.x^{src1.x}
362b8e80941Smrg *  dst.w = src0.x^{src1.x}
363b8e80941Smrg *
364b8e80941Smrg * ; needs: 1 tmp
365b8e80941Smrg * LG2 tmpA.x, src0.x
366b8e80941Smrg * MUL tmpA.x, src1.x, tmpA.x
367b8e80941Smrg * EX2 dst, tmpA.x
368b8e80941Smrg */
369b8e80941Smrg#define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
370b8e80941Smrg#define POW_TMP  1
371b8e80941Smrgstatic void
372b8e80941Smrgtransform_pow(struct tgsi_transform_context *tctx,
373b8e80941Smrg              struct tgsi_full_instruction *inst)
374b8e80941Smrg{
375b8e80941Smrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
376b8e80941Smrg   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
377b8e80941Smrg   struct tgsi_full_src_register *src0 = &inst->Src[0];
378b8e80941Smrg   struct tgsi_full_src_register *src1 = &inst->Src[1];
379b8e80941Smrg   struct tgsi_full_instruction new_inst;
380b8e80941Smrg
381b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
382b8e80941Smrg      /* LG2 tmpA.x, src0.x */
383b8e80941Smrg      new_inst = tgsi_default_full_instruction();
384b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
385b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
386b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
387b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
388b8e80941Smrg      reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
389b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
390b8e80941Smrg
391b8e80941Smrg      /* MUL tmpA.x, src1.x, tmpA.x */
392b8e80941Smrg      new_inst = tgsi_default_full_instruction();
393b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
394b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
395b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
396b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 2;
397b8e80941Smrg      reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
398b8e80941Smrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
399b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
400b8e80941Smrg
401b8e80941Smrg      /* EX2 dst, tmpA.x */
402b8e80941Smrg      new_inst = tgsi_default_full_instruction();
403b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
404b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
405b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
406b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
407b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
408b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
409b8e80941Smrg   }
410b8e80941Smrg}
411b8e80941Smrg
412b8e80941Smrg/* LIT - Light Coefficients
413b8e80941Smrg *  dst.x = 1.0
414b8e80941Smrg *  dst.y = max(src.x, 0.0)
415b8e80941Smrg *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
416b8e80941Smrg *  dst.w = 1.0
417b8e80941Smrg *
418b8e80941Smrg * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
419b8e80941Smrg * MAX tmpA.xy, src.xy, imm{0.0}
420b8e80941Smrg * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
421b8e80941Smrg * LG2 tmpA.y, tmpA.y
422b8e80941Smrg * MUL tmpA.y, tmpA.z, tmpA.y
423b8e80941Smrg * EX2 tmpA.y, tmpA.y
424b8e80941Smrg * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
425b8e80941Smrg * MOV dst.yz, tmpA.xy
426b8e80941Smrg * MOV dst.xw, imm{1.0}
427b8e80941Smrg */
428b8e80941Smrg#define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
429b8e80941Smrg		NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
430b8e80941Smrg#define LIT_TMP  1
431b8e80941Smrgstatic void
432b8e80941Smrgtransform_lit(struct tgsi_transform_context *tctx,
433b8e80941Smrg              struct tgsi_full_instruction *inst)
434b8e80941Smrg{
435b8e80941Smrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
436b8e80941Smrg   struct tgsi_full_dst_register *dst = &inst->Dst[0];
437b8e80941Smrg   struct tgsi_full_src_register *src = &inst->Src[0];
438b8e80941Smrg   struct tgsi_full_instruction new_inst;
439b8e80941Smrg
440b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
441b8e80941Smrg      /* MAX tmpA.xy, src.xy, imm{0.0} */
442b8e80941Smrg      new_inst = tgsi_default_full_instruction();
443b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
444b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
445b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
446b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 2;
447b8e80941Smrg      reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
448b8e80941Smrg      reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
449b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
450b8e80941Smrg
451b8e80941Smrg      /* MIN tmpA.z, src.w, imm{128.0} */
452b8e80941Smrg      new_inst = tgsi_default_full_instruction();
453b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MIN;
454b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
455b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
456b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 2;
457b8e80941Smrg      reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
458b8e80941Smrg      reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
459b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
460b8e80941Smrg
461b8e80941Smrg      /* MAX tmpA.z, tmpA.z, -imm{128.0} */
462b8e80941Smrg      new_inst = tgsi_default_full_instruction();
463b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
464b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
465b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
466b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 2;
467b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _));
468b8e80941Smrg      reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
469b8e80941Smrg      new_inst.Src[1].Register.Negate = true;
470b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
471b8e80941Smrg
472b8e80941Smrg      /* LG2 tmpA.y, tmpA.y */
473b8e80941Smrg      new_inst = tgsi_default_full_instruction();
474b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
475b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
476b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
477b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
478b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
479b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
480b8e80941Smrg
481b8e80941Smrg      /* MUL tmpA.y, tmpA.z, tmpA.y */
482b8e80941Smrg      new_inst = tgsi_default_full_instruction();
483b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
484b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
485b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
486b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 2;
487b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
488b8e80941Smrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
489b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
490b8e80941Smrg
491b8e80941Smrg      /* EX2 tmpA.y, tmpA.y */
492b8e80941Smrg      new_inst = tgsi_default_full_instruction();
493b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
494b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
495b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
496b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
497b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
498b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
499b8e80941Smrg
500b8e80941Smrg      /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
501b8e80941Smrg      new_inst = tgsi_default_full_instruction();
502b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
503b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
504b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
505b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 3;
506b8e80941Smrg      reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
507b8e80941Smrg      new_inst.Src[0].Register.Negate = true;
508b8e80941Smrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
509b8e80941Smrg      reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
510b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
511b8e80941Smrg
512b8e80941Smrg      /* MOV dst.yz, tmpA.xy */
513b8e80941Smrg      new_inst = tgsi_default_full_instruction();
514b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
515b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
516b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
517b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
518b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
519b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
520b8e80941Smrg   }
521b8e80941Smrg
522b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
523b8e80941Smrg      /* MOV dst.xw, imm{1.0} */
524b8e80941Smrg      new_inst = tgsi_default_full_instruction();
525b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
526b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
527b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
528b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
529b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
530b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
531b8e80941Smrg   }
532b8e80941Smrg}
533b8e80941Smrg
534b8e80941Smrg/* EXP - Approximate Exponential Base 2
535b8e80941Smrg *  dst.x = 2^{\lfloor src.x\rfloor}
536b8e80941Smrg *  dst.y = src.x - \lfloor src.x\rfloor
537b8e80941Smrg *  dst.z = 2^{src.x}
538b8e80941Smrg *  dst.w = 1.0
539b8e80941Smrg *
540b8e80941Smrg * ; needs: 1 tmp, imm{1.0}
541b8e80941Smrg * if (lowering FLR) {
542b8e80941Smrg *   FRC tmpA.x, src.x
543b8e80941Smrg *   SUB tmpA.x, src.x, tmpA.x
544b8e80941Smrg * } else {
545b8e80941Smrg *   FLR tmpA.x, src.x
546b8e80941Smrg * }
547b8e80941Smrg * EX2 tmpA.y, src.x
548b8e80941Smrg * SUB dst.y, src.x, tmpA.x
549b8e80941Smrg * EX2 dst.x, tmpA.x
550b8e80941Smrg * MOV dst.z, tmpA.y
551b8e80941Smrg * MOV dst.w, imm{1.0}
552b8e80941Smrg */
553b8e80941Smrg#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
554b8e80941Smrg		NINST(1)+ NINST(1) - OINST(1))
555b8e80941Smrg#define EXP_TMP  1
556b8e80941Smrgstatic void
557b8e80941Smrgtransform_exp(struct tgsi_transform_context *tctx,
558b8e80941Smrg              struct tgsi_full_instruction *inst)
559b8e80941Smrg{
560b8e80941Smrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
561b8e80941Smrg   struct tgsi_full_dst_register *dst = &inst->Dst[0];
562b8e80941Smrg   struct tgsi_full_src_register *src = &inst->Src[0];
563b8e80941Smrg   struct tgsi_full_instruction new_inst;
564b8e80941Smrg
565b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
566b8e80941Smrg      if (ctx->config->lower_FLR) {
567b8e80941Smrg         /* FRC tmpA.x, src.x */
568b8e80941Smrg         new_inst = tgsi_default_full_instruction();
569b8e80941Smrg         new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
570b8e80941Smrg         new_inst.Instruction.NumDstRegs = 1;
571b8e80941Smrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
572b8e80941Smrg         new_inst.Instruction.NumSrcRegs = 1;
573b8e80941Smrg         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
574b8e80941Smrg         tctx->emit_instruction(tctx, &new_inst);
575b8e80941Smrg
576b8e80941Smrg         /* SUB tmpA.x, src.x, tmpA.x */
577b8e80941Smrg         new_inst = tgsi_default_full_instruction();
578b8e80941Smrg         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
579b8e80941Smrg         new_inst.Instruction.NumDstRegs = 1;
580b8e80941Smrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
581b8e80941Smrg         new_inst.Instruction.NumSrcRegs = 2;
582b8e80941Smrg         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
583b8e80941Smrg         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
584b8e80941Smrg         new_inst.Src[1].Register.Negate = 1;
585b8e80941Smrg         tctx->emit_instruction(tctx, &new_inst);
586b8e80941Smrg     } else {
587b8e80941Smrg         /* FLR tmpA.x, src.x */
588b8e80941Smrg         new_inst = tgsi_default_full_instruction();
589b8e80941Smrg         new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
590b8e80941Smrg         new_inst.Instruction.NumDstRegs = 1;
591b8e80941Smrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
592b8e80941Smrg         new_inst.Instruction.NumSrcRegs = 1;
593b8e80941Smrg         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
594b8e80941Smrg         tctx->emit_instruction(tctx, &new_inst);
595b8e80941Smrg      }
596b8e80941Smrg   }
597b8e80941Smrg
598b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
599b8e80941Smrg      /* EX2 tmpA.y, src.x */
600b8e80941Smrg      new_inst = tgsi_default_full_instruction();
601b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
602b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
603b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
604b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
605b8e80941Smrg      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
606b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
607b8e80941Smrg   }
608b8e80941Smrg
609b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
610b8e80941Smrg      /* SUB dst.y, src.x, tmpA.x */
611b8e80941Smrg      new_inst = tgsi_default_full_instruction();
612b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
613b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
614b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
615b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 2;
616b8e80941Smrg      reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
617b8e80941Smrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
618b8e80941Smrg      new_inst.Src[1].Register.Negate = 1;
619b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
620b8e80941Smrg   }
621b8e80941Smrg
622b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
623b8e80941Smrg      /* EX2 dst.x, tmpA.x */
624b8e80941Smrg      new_inst = tgsi_default_full_instruction();
625b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
626b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
627b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
628b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
629b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
630b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
631b8e80941Smrg   }
632b8e80941Smrg
633b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
634b8e80941Smrg      /* MOV dst.z, tmpA.y */
635b8e80941Smrg      new_inst = tgsi_default_full_instruction();
636b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
637b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
638b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
639b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
640b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
641b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
642b8e80941Smrg   }
643b8e80941Smrg
644b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
645b8e80941Smrg      /* MOV dst.w, imm{1.0} */
646b8e80941Smrg      new_inst = tgsi_default_full_instruction();
647b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
648b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
649b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
650b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
651b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
652b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
653b8e80941Smrg   }
654b8e80941Smrg}
655b8e80941Smrg
656b8e80941Smrg/* LOG - Approximate Logarithm Base 2
657b8e80941Smrg *  dst.x = \lfloor\log_2{|src.x|}\rfloor
658b8e80941Smrg *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
659b8e80941Smrg *  dst.z = \log_2{|src.x|}
660b8e80941Smrg *  dst.w = 1.0
661b8e80941Smrg *
662b8e80941Smrg * ; needs: 1 tmp, imm{1.0}
663b8e80941Smrg * LG2 tmpA.x, |src.x|
664b8e80941Smrg * if (lowering FLR) {
665b8e80941Smrg *   FRC tmpA.y, tmpA.x
666b8e80941Smrg *   SUB tmpA.y, tmpA.x, tmpA.y
667b8e80941Smrg * } else {
668b8e80941Smrg *   FLR tmpA.y, tmpA.x
669b8e80941Smrg * }
670b8e80941Smrg * EX2 tmpA.z, tmpA.y
671b8e80941Smrg * RCP tmpA.z, tmpA.z
672b8e80941Smrg * MUL dst.y, |src.x|, tmpA.z
673b8e80941Smrg * MOV dst.xz, tmpA.yx
674b8e80941Smrg * MOV dst.w, imm{1.0}
675b8e80941Smrg */
676b8e80941Smrg#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
677b8e80941Smrg		NINST(2) + NINST(1) + NINST(1) - OINST(1))
678b8e80941Smrg#define LOG_TMP  1
679b8e80941Smrgstatic void
680b8e80941Smrgtransform_log(struct tgsi_transform_context *tctx,
681b8e80941Smrg              struct tgsi_full_instruction *inst)
682b8e80941Smrg{
683b8e80941Smrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
684b8e80941Smrg   struct tgsi_full_dst_register *dst = &inst->Dst[0];
685b8e80941Smrg   struct tgsi_full_src_register *src = &inst->Src[0];
686b8e80941Smrg   struct tgsi_full_instruction new_inst;
687b8e80941Smrg
688b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
689b8e80941Smrg      /* LG2 tmpA.x, |src.x| */
690b8e80941Smrg      new_inst = tgsi_default_full_instruction();
691b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
692b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
693b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
694b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
695b8e80941Smrg      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
696b8e80941Smrg      new_inst.Src[0].Register.Absolute = true;
697b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
698b8e80941Smrg   }
699b8e80941Smrg
700b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
701b8e80941Smrg      if (ctx->config->lower_FLR) {
702b8e80941Smrg         /* FRC tmpA.y, tmpA.x */
703b8e80941Smrg         new_inst = tgsi_default_full_instruction();
704b8e80941Smrg         new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
705b8e80941Smrg         new_inst.Instruction.NumDstRegs = 1;
706b8e80941Smrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
707b8e80941Smrg         new_inst.Instruction.NumSrcRegs = 1;
708b8e80941Smrg         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
709b8e80941Smrg         tctx->emit_instruction(tctx, &new_inst);
710b8e80941Smrg
711b8e80941Smrg         /* SUB tmpA.y, tmpA.x, tmpA.y */
712b8e80941Smrg         new_inst = tgsi_default_full_instruction();
713b8e80941Smrg         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
714b8e80941Smrg         new_inst.Instruction.NumDstRegs = 1;
715b8e80941Smrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
716b8e80941Smrg         new_inst.Instruction.NumSrcRegs = 2;
717b8e80941Smrg         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
718b8e80941Smrg         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
719b8e80941Smrg         new_inst.Src[1].Register.Negate = 1;
720b8e80941Smrg         tctx->emit_instruction(tctx, &new_inst);
721b8e80941Smrg      } else {
722b8e80941Smrg         /* FLR tmpA.y, tmpA.x */
723b8e80941Smrg         new_inst = tgsi_default_full_instruction();
724b8e80941Smrg         new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
725b8e80941Smrg         new_inst.Instruction.NumDstRegs = 1;
726b8e80941Smrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
727b8e80941Smrg         new_inst.Instruction.NumSrcRegs = 1;
728b8e80941Smrg         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
729b8e80941Smrg         tctx->emit_instruction(tctx, &new_inst);
730b8e80941Smrg      }
731b8e80941Smrg   }
732b8e80941Smrg
733b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
734b8e80941Smrg      /* EX2 tmpA.z, tmpA.y */
735b8e80941Smrg      new_inst = tgsi_default_full_instruction();
736b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
737b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
738b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
739b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
740b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
741b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
742b8e80941Smrg
743b8e80941Smrg      /* RCP tmpA.z, tmpA.z */
744b8e80941Smrg      new_inst = tgsi_default_full_instruction();
745b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
746b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
747b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
748b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
749b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
750b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
751b8e80941Smrg
752b8e80941Smrg      /* MUL dst.y, |src.x|, tmpA.z */
753b8e80941Smrg      new_inst = tgsi_default_full_instruction();
754b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
755b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
756b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
757b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 2;
758b8e80941Smrg      reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
759b8e80941Smrg      new_inst.Src[0].Register.Absolute = true;
760b8e80941Smrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
761b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
762b8e80941Smrg   }
763b8e80941Smrg
764b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
765b8e80941Smrg      /* MOV dst.xz, tmpA.yx */
766b8e80941Smrg      new_inst = tgsi_default_full_instruction();
767b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
768b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
769b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
770b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
771b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
772b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
773b8e80941Smrg   }
774b8e80941Smrg
775b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
776b8e80941Smrg      /* MOV dst.w, imm{1.0} */
777b8e80941Smrg      new_inst = tgsi_default_full_instruction();
778b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
779b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
780b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
781b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
782b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
783b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
784b8e80941Smrg   }
785b8e80941Smrg}
786b8e80941Smrg
787b8e80941Smrg/* DP4 - 4-component Dot Product
788b8e80941Smrg *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
789b8e80941Smrg *
790b8e80941Smrg * DP3 - 3-component Dot Product
791b8e80941Smrg *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
792b8e80941Smrg *
793b8e80941Smrg * DP2 - 2-component Dot Product
794b8e80941Smrg *   dst = src0.x \times src1.x + src0.y \times src1.y
795b8e80941Smrg *
796b8e80941Smrg * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
797b8e80941Smrg * operations, which is what you'd prefer for a ISA that is natively
798b8e80941Smrg * scalar.  Probably a native vector ISA would at least already have
799b8e80941Smrg * DP4/DP3 instructions, but perhaps there is room for an alternative
800b8e80941Smrg * translation for DP2 using vector instructions.
801b8e80941Smrg *
802b8e80941Smrg * ; needs: 1 tmp
803b8e80941Smrg * MUL tmpA.x, src0.x, src1.x
804b8e80941Smrg * MAD tmpA.x, src0.y, src1.y, tmpA.x
805b8e80941Smrg * if (DP3 || DP4) {
806b8e80941Smrg *   MAD tmpA.x, src0.z, src1.z, tmpA.x
807b8e80941Smrg *   if (DP4) {
808b8e80941Smrg *     MAD tmpA.x, src0.w, src1.w, tmpA.x
809b8e80941Smrg *   }
810b8e80941Smrg * }
811b8e80941Smrg * ; fixup last instruction to replicate into dst
812b8e80941Smrg */
813b8e80941Smrg#define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
814b8e80941Smrg#define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
815b8e80941Smrg#define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
816b8e80941Smrg#define DOTP_TMP  1
817b8e80941Smrgstatic void
818b8e80941Smrgtransform_dotp(struct tgsi_transform_context *tctx,
819b8e80941Smrg               struct tgsi_full_instruction *inst)
820b8e80941Smrg{
821b8e80941Smrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
822b8e80941Smrg   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
823b8e80941Smrg   struct tgsi_full_src_register *src0 = &inst->Src[0];
824b8e80941Smrg   struct tgsi_full_src_register *src1 = &inst->Src[1];
825b8e80941Smrg   struct tgsi_full_instruction new_inst;
826b8e80941Smrg   enum tgsi_opcode opcode = inst->Instruction.Opcode;
827b8e80941Smrg
828b8e80941Smrg   /* NOTE: any potential last instruction must replicate src on all
829b8e80941Smrg    * components (since it could be re-written to write to final dst)
830b8e80941Smrg    */
831b8e80941Smrg
832b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
833b8e80941Smrg      /* MUL tmpA.x, src0.x, src1.x */
834b8e80941Smrg      new_inst = tgsi_default_full_instruction();
835b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
836b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
837b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
838b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 2;
839b8e80941Smrg      reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
840b8e80941Smrg      reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
841b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
842b8e80941Smrg
843b8e80941Smrg      /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
844b8e80941Smrg      new_inst = tgsi_default_full_instruction();
845b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
846b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
847b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
848b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 3;
849b8e80941Smrg      reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
850b8e80941Smrg      reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
851b8e80941Smrg      reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
852b8e80941Smrg
853b8e80941Smrg      if ((opcode == TGSI_OPCODE_DP3) ||
854b8e80941Smrg          (opcode == TGSI_OPCODE_DP4)) {
855b8e80941Smrg         tctx->emit_instruction(tctx, &new_inst);
856b8e80941Smrg
857b8e80941Smrg         /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
858b8e80941Smrg         new_inst = tgsi_default_full_instruction();
859b8e80941Smrg         new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
860b8e80941Smrg         new_inst.Instruction.NumDstRegs = 1;
861b8e80941Smrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
862b8e80941Smrg         new_inst.Instruction.NumSrcRegs = 3;
863b8e80941Smrg         reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
864b8e80941Smrg         reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
865b8e80941Smrg         reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
866b8e80941Smrg
867b8e80941Smrg         if (opcode == TGSI_OPCODE_DP4) {
868b8e80941Smrg            tctx->emit_instruction(tctx, &new_inst);
869b8e80941Smrg
870b8e80941Smrg            /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
871b8e80941Smrg            new_inst = tgsi_default_full_instruction();
872b8e80941Smrg            new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
873b8e80941Smrg            new_inst.Instruction.NumDstRegs = 1;
874b8e80941Smrg            reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
875b8e80941Smrg            new_inst.Instruction.NumSrcRegs = 3;
876b8e80941Smrg            reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
877b8e80941Smrg            reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
878b8e80941Smrg            reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
879b8e80941Smrg         }
880b8e80941Smrg      }
881b8e80941Smrg
882b8e80941Smrg      /* fixup last instruction to write to dst: */
883b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
884b8e80941Smrg
885b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
886b8e80941Smrg   }
887b8e80941Smrg}
888b8e80941Smrg
889b8e80941Smrg/* FLR - floor, CEIL - ceil
890b8e80941Smrg * ; needs: 1 tmp
891b8e80941Smrg * if (CEIL) {
892b8e80941Smrg *   FRC tmpA, -src
893b8e80941Smrg *   ADD dst, src, tmpA
894b8e80941Smrg * } else {
895b8e80941Smrg *   FRC tmpA, src
896b8e80941Smrg *   SUB dst, src, tmpA
897b8e80941Smrg * }
898b8e80941Smrg */
899b8e80941Smrg#define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
900b8e80941Smrg#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
901b8e80941Smrg#define FLR_TMP 1
902b8e80941Smrg#define CEIL_TMP 1
903b8e80941Smrgstatic void
904b8e80941Smrgtransform_flr_ceil(struct tgsi_transform_context *tctx,
905b8e80941Smrg                   struct tgsi_full_instruction *inst)
906b8e80941Smrg{
907b8e80941Smrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
908b8e80941Smrg   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
909b8e80941Smrg   struct tgsi_full_src_register *src0 = &inst->Src[0];
910b8e80941Smrg   struct tgsi_full_instruction new_inst;
911b8e80941Smrg   enum tgsi_opcode opcode = inst->Instruction.Opcode;
912b8e80941Smrg
913b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
914b8e80941Smrg      /* FLR: FRC tmpA, src  CEIL: FRC tmpA, -src */
915b8e80941Smrg      new_inst = tgsi_default_full_instruction();
916b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
917b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
918b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
919b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
920b8e80941Smrg      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
921b8e80941Smrg
922b8e80941Smrg      if (opcode == TGSI_OPCODE_CEIL)
923b8e80941Smrg         new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
924b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
925b8e80941Smrg
926b8e80941Smrg      /* FLR: SUB dst, src, tmpA  CEIL: ADD dst, src, tmpA */
927b8e80941Smrg      new_inst = tgsi_default_full_instruction();
928b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
929b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
930b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
931b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 2;
932b8e80941Smrg      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
933b8e80941Smrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
934b8e80941Smrg      if (opcode == TGSI_OPCODE_FLR)
935b8e80941Smrg         new_inst.Src[1].Register.Negate = 1;
936b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
937b8e80941Smrg   }
938b8e80941Smrg}
939b8e80941Smrg
940b8e80941Smrg/* TRUNC - truncate off fractional part
941b8e80941Smrg *  dst.x = trunc(src.x)
942b8e80941Smrg *  dst.y = trunc(src.y)
943b8e80941Smrg *  dst.z = trunc(src.z)
944b8e80941Smrg *  dst.w = trunc(src.w)
945b8e80941Smrg *
946b8e80941Smrg * ; needs: 1 tmp
947b8e80941Smrg * if (lower FLR) {
948b8e80941Smrg *   FRC tmpA, |src|
949b8e80941Smrg *   SUB tmpA, |src|, tmpA
950b8e80941Smrg * } else {
951b8e80941Smrg *   FLR tmpA, |src|
952b8e80941Smrg * }
953b8e80941Smrg * CMP dst, src, -tmpA, tmpA
954b8e80941Smrg */
955b8e80941Smrg#define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
956b8e80941Smrg#define TRUNC_TMP 1
957b8e80941Smrgstatic void
958b8e80941Smrgtransform_trunc(struct tgsi_transform_context *tctx,
959b8e80941Smrg                struct tgsi_full_instruction *inst)
960b8e80941Smrg{
961b8e80941Smrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
962b8e80941Smrg   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
963b8e80941Smrg   struct tgsi_full_src_register *src0 = &inst->Src[0];
964b8e80941Smrg   struct tgsi_full_instruction new_inst;
965b8e80941Smrg
966b8e80941Smrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
967b8e80941Smrg      if (ctx->config->lower_FLR) {
968b8e80941Smrg         new_inst = tgsi_default_full_instruction();
969b8e80941Smrg         new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
970b8e80941Smrg         new_inst.Instruction.NumDstRegs = 1;
971b8e80941Smrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
972b8e80941Smrg         new_inst.Instruction.NumSrcRegs = 1;
973b8e80941Smrg         reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
974b8e80941Smrg         new_inst.Src[0].Register.Absolute = true;
975b8e80941Smrg         new_inst.Src[0].Register.Negate = false;
976b8e80941Smrg         tctx->emit_instruction(tctx, &new_inst);
977b8e80941Smrg
978b8e80941Smrg         new_inst = tgsi_default_full_instruction();
979b8e80941Smrg         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
980b8e80941Smrg         new_inst.Instruction.NumDstRegs = 1;
981b8e80941Smrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
982b8e80941Smrg         new_inst.Instruction.NumSrcRegs = 2;
983b8e80941Smrg         reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
984b8e80941Smrg         new_inst.Src[0].Register.Absolute = true;
985b8e80941Smrg         new_inst.Src[0].Register.Negate = false;
986b8e80941Smrg         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
987b8e80941Smrg         new_inst.Src[1].Register.Negate = 1;
988b8e80941Smrg         tctx->emit_instruction(tctx, &new_inst);
989b8e80941Smrg      } else {
990b8e80941Smrg         new_inst = tgsi_default_full_instruction();
991b8e80941Smrg         new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
992b8e80941Smrg         new_inst.Instruction.NumDstRegs = 1;
993b8e80941Smrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
994b8e80941Smrg         new_inst.Instruction.NumSrcRegs = 1;
995b8e80941Smrg         reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
996b8e80941Smrg         new_inst.Src[0].Register.Absolute = true;
997b8e80941Smrg         new_inst.Src[0].Register.Negate = false;
998b8e80941Smrg         tctx->emit_instruction(tctx, &new_inst);
999b8e80941Smrg      }
1000b8e80941Smrg
1001b8e80941Smrg      new_inst = tgsi_default_full_instruction();
1002b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1003b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
1004b8e80941Smrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1005b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 3;
1006b8e80941Smrg      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1007b8e80941Smrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1008b8e80941Smrg      new_inst.Src[1].Register.Negate = true;
1009b8e80941Smrg      reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1010b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
1011b8e80941Smrg   }
1012b8e80941Smrg}
1013b8e80941Smrg
1014b8e80941Smrg/* Inserts a MOV_SAT for the needed components of tex coord.  Note that
1015b8e80941Smrg * in the case of TXP, the clamping must happen *after* projection, so
1016b8e80941Smrg * we need to lower TXP to TEX.
1017b8e80941Smrg *
1018b8e80941Smrg *   MOV tmpA, src0
1019b8e80941Smrg *   if (opc == TXP) {
1020b8e80941Smrg *     ; do perspective division manually before clamping:
1021b8e80941Smrg *     RCP tmpB, tmpA.w
1022b8e80941Smrg *     MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1023b8e80941Smrg *     opc = TEX;
1024b8e80941Smrg *   }
1025b8e80941Smrg *   MOV_SAT tmpA.<mask>, tmpA  ; <mask> is the clamped s/t/r coords
1026b8e80941Smrg *   <opc> dst, tmpA, ...
1027b8e80941Smrg */
1028b8e80941Smrg#define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1029b8e80941Smrg#define SAMP_TMP  2
1030b8e80941Smrgstatic int
1031b8e80941Smrgtransform_samp(struct tgsi_transform_context *tctx,
1032b8e80941Smrg               struct tgsi_full_instruction *inst)
1033b8e80941Smrg{
1034b8e80941Smrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1035b8e80941Smrg   struct tgsi_full_src_register *coord = &inst->Src[0];
1036b8e80941Smrg   struct tgsi_full_src_register *samp;
1037b8e80941Smrg   struct tgsi_full_instruction new_inst;
1038b8e80941Smrg   /* mask is clamped coords, pmask is all coords (for projection): */
1039b8e80941Smrg   unsigned mask = 0, pmask = 0, smask;
1040b8e80941Smrg   unsigned tex = inst->Texture.Texture;
1041b8e80941Smrg   enum tgsi_opcode opcode = inst->Instruction.Opcode;
1042b8e80941Smrg   bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1043b8e80941Smrg		   (ctx->config->lower_TXP & (1 << tex));
1044b8e80941Smrg
1045b8e80941Smrg   if (opcode == TGSI_OPCODE_TXB2) {
1046b8e80941Smrg      samp = &inst->Src[2];
1047b8e80941Smrg   } else {
1048b8e80941Smrg      samp = &inst->Src[1];
1049b8e80941Smrg   }
1050b8e80941Smrg
1051b8e80941Smrg   /* convert sampler # to bitmask to test: */
1052b8e80941Smrg   smask = 1 << samp->Register.Index;
1053b8e80941Smrg
1054b8e80941Smrg   /* check if we actually need to lower this one: */
1055b8e80941Smrg   if (!(ctx->saturate & smask) && !lower_txp)
1056b8e80941Smrg      return -1;
1057b8e80941Smrg
1058b8e80941Smrg   /* figure out which coordinates need saturating:
1059b8e80941Smrg    *   - RECT textures should not get saturated
1060b8e80941Smrg    *   - array index coords should not get saturated
1061b8e80941Smrg    */
1062b8e80941Smrg   switch (tex) {
1063b8e80941Smrg   case TGSI_TEXTURE_3D:
1064b8e80941Smrg   case TGSI_TEXTURE_CUBE:
1065b8e80941Smrg   case TGSI_TEXTURE_CUBE_ARRAY:
1066b8e80941Smrg   case TGSI_TEXTURE_SHADOWCUBE:
1067b8e80941Smrg   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1068b8e80941Smrg      if (ctx->config->saturate_r & smask)
1069b8e80941Smrg         mask |= TGSI_WRITEMASK_Z;
1070b8e80941Smrg      pmask |= TGSI_WRITEMASK_Z;
1071b8e80941Smrg      /* fallthrough */
1072b8e80941Smrg
1073b8e80941Smrg   case TGSI_TEXTURE_2D:
1074b8e80941Smrg   case TGSI_TEXTURE_2D_ARRAY:
1075b8e80941Smrg   case TGSI_TEXTURE_SHADOW2D:
1076b8e80941Smrg   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1077b8e80941Smrg   case TGSI_TEXTURE_2D_MSAA:
1078b8e80941Smrg   case TGSI_TEXTURE_2D_ARRAY_MSAA:
1079b8e80941Smrg      if (ctx->config->saturate_t & smask)
1080b8e80941Smrg         mask |= TGSI_WRITEMASK_Y;
1081b8e80941Smrg      pmask |= TGSI_WRITEMASK_Y;
1082b8e80941Smrg      /* fallthrough */
1083b8e80941Smrg
1084b8e80941Smrg   case TGSI_TEXTURE_1D:
1085b8e80941Smrg   case TGSI_TEXTURE_1D_ARRAY:
1086b8e80941Smrg   case TGSI_TEXTURE_SHADOW1D:
1087b8e80941Smrg   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1088b8e80941Smrg      if (ctx->config->saturate_s & smask)
1089b8e80941Smrg         mask |= TGSI_WRITEMASK_X;
1090b8e80941Smrg      pmask |= TGSI_WRITEMASK_X;
1091b8e80941Smrg      break;
1092b8e80941Smrg
1093b8e80941Smrg   case TGSI_TEXTURE_RECT:
1094b8e80941Smrg   case TGSI_TEXTURE_SHADOWRECT:
1095b8e80941Smrg      /* we don't saturate, but in case of lower_txp we
1096b8e80941Smrg       * still need to do the perspective divide:
1097b8e80941Smrg       */
1098b8e80941Smrg       pmask = TGSI_WRITEMASK_XY;
1099b8e80941Smrg       break;
1100b8e80941Smrg   }
1101b8e80941Smrg
1102b8e80941Smrg   /* sanity check.. driver could be asking to saturate a non-
1103b8e80941Smrg    * existent coordinate component:
1104b8e80941Smrg    */
1105b8e80941Smrg   if (!mask && !lower_txp)
1106b8e80941Smrg      return -1;
1107b8e80941Smrg
1108b8e80941Smrg   /* MOV tmpA, src0 */
1109b8e80941Smrg   create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1110b8e80941Smrg
1111b8e80941Smrg   /* This is a bit sad.. we need to clamp *after* the coords
1112b8e80941Smrg    * are projected, which means lowering TXP to TEX and doing
1113b8e80941Smrg    * the projection ourself.  But since I haven't figured out
1114b8e80941Smrg    * how to make the lowering code deliver an electric shock
1115b8e80941Smrg    * to anyone using GL_CLAMP, we must do this instead:
1116b8e80941Smrg    */
1117b8e80941Smrg   if (opcode == TGSI_OPCODE_TXP) {
1118b8e80941Smrg      /* RCP tmpB.x tmpA.w */
1119b8e80941Smrg      new_inst = tgsi_default_full_instruction();
1120b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1121b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
1122b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1123b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 1;
1124b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1125b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
1126b8e80941Smrg
1127b8e80941Smrg      /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1128b8e80941Smrg      new_inst = tgsi_default_full_instruction();
1129b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1130b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
1131b8e80941Smrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1132b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 2;
1133b8e80941Smrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1134b8e80941Smrg      reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1135b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
1136b8e80941Smrg
1137b8e80941Smrg      opcode = TGSI_OPCODE_TEX;
1138b8e80941Smrg   }
1139b8e80941Smrg
1140b8e80941Smrg   /* MOV_SAT tmpA.<mask>, tmpA */
1141b8e80941Smrg   if (mask) {
1142b8e80941Smrg      create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
1143b8e80941Smrg   }
1144b8e80941Smrg
1145b8e80941Smrg   /* modify the texture samp instruction to take fixed up coord: */
1146b8e80941Smrg   new_inst = *inst;
1147b8e80941Smrg   new_inst.Instruction.Opcode = opcode;
1148b8e80941Smrg   new_inst.Src[0] = ctx->tmp[A].src;
1149b8e80941Smrg   tctx->emit_instruction(tctx, &new_inst);
1150b8e80941Smrg
1151b8e80941Smrg   return 0;
1152b8e80941Smrg}
1153b8e80941Smrg
1154b8e80941Smrg/* Two-sided color emulation:
1155b8e80941Smrg * For each COLOR input, create a corresponding BCOLOR input, plus
1156b8e80941Smrg * CMP instruction to select front or back color based on FACE
1157b8e80941Smrg */
1158b8e80941Smrg#define TWOSIDE_GROW(n)  (                      \
1159b8e80941Smrg      2 +         /* FACE */                    \
1160b8e80941Smrg      ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1161b8e80941Smrg      ((n) * 1) + /* TEMP[] */                  \
1162b8e80941Smrg      ((n) * NINST(3))   /* CMP instr */        \
1163b8e80941Smrg      )
1164b8e80941Smrg
1165b8e80941Smrgstatic void
1166b8e80941Smrgemit_twoside(struct tgsi_transform_context *tctx)
1167b8e80941Smrg{
1168b8e80941Smrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1169b8e80941Smrg   struct tgsi_shader_info *info = ctx->info;
1170b8e80941Smrg   struct tgsi_full_declaration decl;
1171b8e80941Smrg   struct tgsi_full_instruction new_inst;
1172b8e80941Smrg   unsigned inbase, tmpbase;
1173b8e80941Smrg   unsigned i;
1174b8e80941Smrg
1175b8e80941Smrg   inbase  = info->file_max[TGSI_FILE_INPUT] + 1;
1176b8e80941Smrg   tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1177b8e80941Smrg
1178b8e80941Smrg   /* additional inputs for BCOLOR's */
1179b8e80941Smrg   for (i = 0; i < ctx->two_side_colors; i++) {
1180b8e80941Smrg      unsigned in_idx = ctx->two_side_idx[i];
1181b8e80941Smrg      decl = tgsi_default_full_declaration();
1182b8e80941Smrg      decl.Declaration.File = TGSI_FILE_INPUT;
1183b8e80941Smrg      decl.Declaration.Semantic = true;
1184b8e80941Smrg      decl.Range.First = decl.Range.Last = inbase + i;
1185b8e80941Smrg      decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1186b8e80941Smrg      decl.Semantic.Index = info->input_semantic_index[in_idx];
1187b8e80941Smrg      decl.Declaration.Interpolate = true;
1188b8e80941Smrg      decl.Interp.Interpolate = info->input_interpolate[in_idx];
1189b8e80941Smrg      decl.Interp.Location = info->input_interpolate_loc[in_idx];
1190b8e80941Smrg      decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx];
1191b8e80941Smrg      tctx->emit_declaration(tctx, &decl);
1192b8e80941Smrg   }
1193b8e80941Smrg
1194b8e80941Smrg   /* additional input for FACE */
1195b8e80941Smrg   if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1196b8e80941Smrg      decl = tgsi_default_full_declaration();
1197b8e80941Smrg      decl.Declaration.File = TGSI_FILE_INPUT;
1198b8e80941Smrg      decl.Declaration.Semantic = true;
1199b8e80941Smrg      decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1200b8e80941Smrg      decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1201b8e80941Smrg      decl.Semantic.Index = 0;
1202b8e80941Smrg      tctx->emit_declaration(tctx, &decl);
1203b8e80941Smrg
1204b8e80941Smrg      ctx->face_idx = decl.Range.First;
1205b8e80941Smrg   }
1206b8e80941Smrg
1207b8e80941Smrg   /* additional temps for COLOR/BCOLOR selection: */
1208b8e80941Smrg   for (i = 0; i < ctx->two_side_colors; i++) {
1209b8e80941Smrg      decl = tgsi_default_full_declaration();
1210b8e80941Smrg      decl.Declaration.File = TGSI_FILE_TEMPORARY;
1211b8e80941Smrg      decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1212b8e80941Smrg      tctx->emit_declaration(tctx, &decl);
1213b8e80941Smrg   }
1214b8e80941Smrg
1215b8e80941Smrg   /* and finally additional instructions to select COLOR/BCOLOR: */
1216b8e80941Smrg   for (i = 0; i < ctx->two_side_colors; i++) {
1217b8e80941Smrg      new_inst = tgsi_default_full_instruction();
1218b8e80941Smrg      new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1219b8e80941Smrg
1220b8e80941Smrg      new_inst.Instruction.NumDstRegs = 1;
1221b8e80941Smrg      new_inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
1222b8e80941Smrg      new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1223b8e80941Smrg      new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1224b8e80941Smrg
1225b8e80941Smrg      new_inst.Instruction.NumSrcRegs = 3;
1226b8e80941Smrg      new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
1227b8e80941Smrg      new_inst.Src[0].Register.Index = ctx->face_idx;
1228b8e80941Smrg      new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1229b8e80941Smrg      new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1230b8e80941Smrg      new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1231b8e80941Smrg      new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1232b8e80941Smrg      new_inst.Src[1].Register.File  = TGSI_FILE_INPUT;
1233b8e80941Smrg      new_inst.Src[1].Register.Index = inbase + i;
1234b8e80941Smrg      new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1235b8e80941Smrg      new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1236b8e80941Smrg      new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1237b8e80941Smrg      new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1238b8e80941Smrg      new_inst.Src[2].Register.File  = TGSI_FILE_INPUT;
1239b8e80941Smrg      new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1240b8e80941Smrg      new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1241b8e80941Smrg      new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1242b8e80941Smrg      new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1243b8e80941Smrg      new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1244b8e80941Smrg
1245b8e80941Smrg      tctx->emit_instruction(tctx, &new_inst);
1246b8e80941Smrg   }
1247b8e80941Smrg}
1248b8e80941Smrg
1249b8e80941Smrgstatic void
1250b8e80941Smrgemit_decls(struct tgsi_transform_context *tctx)
1251b8e80941Smrg{
1252b8e80941Smrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1253b8e80941Smrg   struct tgsi_shader_info *info = ctx->info;
1254b8e80941Smrg   struct tgsi_full_declaration decl;
1255b8e80941Smrg   struct tgsi_full_immediate immed;
1256b8e80941Smrg   unsigned tmpbase;
1257b8e80941Smrg   unsigned i;
1258b8e80941Smrg
1259b8e80941Smrg   tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1260b8e80941Smrg
1261b8e80941Smrg   ctx->color_base = tmpbase + ctx->numtmp;
1262b8e80941Smrg
1263b8e80941Smrg   /* declare immediate: */
1264b8e80941Smrg   immed = tgsi_default_full_immediate();
1265b8e80941Smrg   immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1266b8e80941Smrg   immed.u[0].Float = 0.0;
1267b8e80941Smrg   immed.u[1].Float = 1.0;
1268b8e80941Smrg   immed.u[2].Float = 128.0;
1269b8e80941Smrg   immed.u[3].Float = 0.0;
1270b8e80941Smrg   tctx->emit_immediate(tctx, &immed);
1271b8e80941Smrg
1272b8e80941Smrg   ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1273b8e80941Smrg   ctx->imm.Register.Index = info->immediate_count;
1274b8e80941Smrg   ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1275b8e80941Smrg   ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1276b8e80941Smrg   ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1277b8e80941Smrg   ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1278b8e80941Smrg
1279b8e80941Smrg   /* declare temp regs: */
1280b8e80941Smrg   for (i = 0; i < ctx->numtmp; i++) {
1281b8e80941Smrg      decl = tgsi_default_full_declaration();
1282b8e80941Smrg      decl.Declaration.File = TGSI_FILE_TEMPORARY;
1283b8e80941Smrg      decl.Range.First = decl.Range.Last = tmpbase + i;
1284b8e80941Smrg      tctx->emit_declaration(tctx, &decl);
1285b8e80941Smrg
1286b8e80941Smrg      ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
1287b8e80941Smrg      ctx->tmp[i].src.Register.Index = tmpbase + i;
1288b8e80941Smrg      ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1289b8e80941Smrg      ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1290b8e80941Smrg      ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1291b8e80941Smrg      ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1292b8e80941Smrg
1293b8e80941Smrg      ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
1294b8e80941Smrg      ctx->tmp[i].dst.Register.Index = tmpbase + i;
1295b8e80941Smrg      ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1296b8e80941Smrg   }
1297b8e80941Smrg
1298b8e80941Smrg   if (ctx->two_side_colors)
1299b8e80941Smrg      emit_twoside(tctx);
1300b8e80941Smrg}
1301b8e80941Smrg
1302b8e80941Smrgstatic void
1303b8e80941Smrgrename_color_inputs(struct tgsi_lowering_context *ctx,
1304b8e80941Smrg                    struct tgsi_full_instruction *inst)
1305b8e80941Smrg{
1306b8e80941Smrg   unsigned i, j;
1307b8e80941Smrg   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1308b8e80941Smrg      struct tgsi_src_register *src = &inst->Src[i].Register;
1309b8e80941Smrg      if (src->File == TGSI_FILE_INPUT) {
1310b8e80941Smrg         for (j = 0; j < ctx->two_side_colors; j++) {
1311b8e80941Smrg	    if (src->Index == (int)ctx->two_side_idx[j]) {
1312b8e80941Smrg               src->File = TGSI_FILE_TEMPORARY;
1313b8e80941Smrg               src->Index = ctx->color_base + j;
1314b8e80941Smrg               break;
1315b8e80941Smrg            }
1316b8e80941Smrg         }
1317b8e80941Smrg      }
1318b8e80941Smrg   }
1319b8e80941Smrg
1320b8e80941Smrg}
1321b8e80941Smrg
1322b8e80941Smrgstatic void
1323b8e80941Smrgtransform_instr(struct tgsi_transform_context *tctx,
1324b8e80941Smrg		struct tgsi_full_instruction *inst)
1325b8e80941Smrg{
1326b8e80941Smrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1327b8e80941Smrg
1328b8e80941Smrg   if (!ctx->emitted_decls) {
1329b8e80941Smrg      emit_decls(tctx);
1330b8e80941Smrg      ctx->emitted_decls = 1;
1331b8e80941Smrg   }
1332b8e80941Smrg
1333b8e80941Smrg   /* if emulating two-sided-color, we need to re-write some
1334b8e80941Smrg    * src registers:
1335b8e80941Smrg    */
1336b8e80941Smrg   if (ctx->two_side_colors)
1337b8e80941Smrg      rename_color_inputs(ctx, inst);
1338b8e80941Smrg
1339b8e80941Smrg   switch (inst->Instruction.Opcode) {
1340b8e80941Smrg   case TGSI_OPCODE_DST:
1341b8e80941Smrg      if (!ctx->config->lower_DST)
1342b8e80941Smrg         goto skip;
1343b8e80941Smrg      transform_dst(tctx, inst);
1344b8e80941Smrg      break;
1345b8e80941Smrg   case TGSI_OPCODE_LRP:
1346b8e80941Smrg      if (!ctx->config->lower_LRP)
1347b8e80941Smrg         goto skip;
1348b8e80941Smrg      transform_lrp(tctx, inst);
1349b8e80941Smrg      break;
1350b8e80941Smrg   case TGSI_OPCODE_FRC:
1351b8e80941Smrg      if (!ctx->config->lower_FRC)
1352b8e80941Smrg         goto skip;
1353b8e80941Smrg      transform_frc(tctx, inst);
1354b8e80941Smrg      break;
1355b8e80941Smrg   case TGSI_OPCODE_POW:
1356b8e80941Smrg      if (!ctx->config->lower_POW)
1357b8e80941Smrg         goto skip;
1358b8e80941Smrg      transform_pow(tctx, inst);
1359b8e80941Smrg      break;
1360b8e80941Smrg   case TGSI_OPCODE_LIT:
1361b8e80941Smrg      if (!ctx->config->lower_LIT)
1362b8e80941Smrg         goto skip;
1363b8e80941Smrg      transform_lit(tctx, inst);
1364b8e80941Smrg      break;
1365b8e80941Smrg   case TGSI_OPCODE_EXP:
1366b8e80941Smrg      if (!ctx->config->lower_EXP)
1367b8e80941Smrg         goto skip;
1368b8e80941Smrg      transform_exp(tctx, inst);
1369b8e80941Smrg      break;
1370b8e80941Smrg   case TGSI_OPCODE_LOG:
1371b8e80941Smrg      if (!ctx->config->lower_LOG)
1372b8e80941Smrg         goto skip;
1373b8e80941Smrg      transform_log(tctx, inst);
1374b8e80941Smrg      break;
1375b8e80941Smrg   case TGSI_OPCODE_DP4:
1376b8e80941Smrg      if (!ctx->config->lower_DP4)
1377b8e80941Smrg         goto skip;
1378b8e80941Smrg      transform_dotp(tctx, inst);
1379b8e80941Smrg      break;
1380b8e80941Smrg   case TGSI_OPCODE_DP3:
1381b8e80941Smrg      if (!ctx->config->lower_DP3)
1382b8e80941Smrg         goto skip;
1383b8e80941Smrg      transform_dotp(tctx, inst);
1384b8e80941Smrg      break;
1385b8e80941Smrg   case TGSI_OPCODE_DP2:
1386b8e80941Smrg      if (!ctx->config->lower_DP2)
1387b8e80941Smrg         goto skip;
1388b8e80941Smrg      transform_dotp(tctx, inst);
1389b8e80941Smrg      break;
1390b8e80941Smrg   case TGSI_OPCODE_FLR:
1391b8e80941Smrg      if (!ctx->config->lower_FLR)
1392b8e80941Smrg         goto skip;
1393b8e80941Smrg      transform_flr_ceil(tctx, inst);
1394b8e80941Smrg      break;
1395b8e80941Smrg   case TGSI_OPCODE_CEIL:
1396b8e80941Smrg      if (!ctx->config->lower_CEIL)
1397b8e80941Smrg         goto skip;
1398b8e80941Smrg      transform_flr_ceil(tctx, inst);
1399b8e80941Smrg      break;
1400b8e80941Smrg   case TGSI_OPCODE_TRUNC:
1401b8e80941Smrg      if (!ctx->config->lower_TRUNC)
1402b8e80941Smrg         goto skip;
1403b8e80941Smrg      transform_trunc(tctx, inst);
1404b8e80941Smrg      break;
1405b8e80941Smrg   case TGSI_OPCODE_TEX:
1406b8e80941Smrg   case TGSI_OPCODE_TXP:
1407b8e80941Smrg   case TGSI_OPCODE_TXB:
1408b8e80941Smrg   case TGSI_OPCODE_TXB2:
1409b8e80941Smrg   case TGSI_OPCODE_TXL:
1410b8e80941Smrg      if (transform_samp(tctx, inst))
1411b8e80941Smrg         goto skip;
1412b8e80941Smrg      break;
1413b8e80941Smrg   default:
1414b8e80941Smrg   skip:
1415b8e80941Smrg      tctx->emit_instruction(tctx, inst);
1416b8e80941Smrg      break;
1417b8e80941Smrg   }
1418b8e80941Smrg}
1419b8e80941Smrg
1420b8e80941Smrg/* returns NULL if no lowering required, else returns the new
1421b8e80941Smrg * tokens (which caller is required to free()).  In either case
1422b8e80941Smrg * returns the current info.
1423b8e80941Smrg */
1424b8e80941Smrgconst struct tgsi_token *
1425b8e80941Smrgtgsi_transform_lowering(const struct tgsi_lowering_config *config,
1426b8e80941Smrg                        const struct tgsi_token *tokens,
1427b8e80941Smrg                        struct tgsi_shader_info *info)
1428b8e80941Smrg{
1429b8e80941Smrg   struct tgsi_lowering_context ctx;
1430b8e80941Smrg   struct tgsi_token *newtoks;
1431b8e80941Smrg   int newlen, numtmp;
1432b8e80941Smrg
1433b8e80941Smrg   /* sanity check in case limit is ever increased: */
1434b8e80941Smrg   STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1435b8e80941Smrg
1436b8e80941Smrg   /* sanity check the lowering */
1437b8e80941Smrg   assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
1438b8e80941Smrg   assert(!(config->lower_FRC && config->lower_TRUNC));
1439b8e80941Smrg
1440b8e80941Smrg   memset(&ctx, 0, sizeof(ctx));
1441b8e80941Smrg   ctx.base.transform_instruction = transform_instr;
1442b8e80941Smrg   ctx.info = info;
1443b8e80941Smrg   ctx.config = config;
1444b8e80941Smrg
1445b8e80941Smrg   tgsi_scan_shader(tokens, info);
1446b8e80941Smrg
1447b8e80941Smrg   /* if we are adding fragment shader support to emulate two-sided
1448b8e80941Smrg    * color, then figure out the number of additional inputs we need
1449b8e80941Smrg    * to create for BCOLOR's..
1450b8e80941Smrg    */
1451b8e80941Smrg   if ((info->processor == PIPE_SHADER_FRAGMENT) &&
1452b8e80941Smrg       config->color_two_side) {
1453b8e80941Smrg      int i;
1454b8e80941Smrg      ctx.face_idx = -1;
1455b8e80941Smrg      for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1456b8e80941Smrg         if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1457b8e80941Smrg            ctx.two_side_idx[ctx.two_side_colors++] = i;
1458b8e80941Smrg         if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1459b8e80941Smrg            ctx.face_idx = i;
1460b8e80941Smrg      }
1461b8e80941Smrg   }
1462b8e80941Smrg
1463b8e80941Smrg   ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1464b8e80941Smrg
1465b8e80941Smrg#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1466b8e80941Smrg   /* if there are no instructions to lower, then we are done: */
1467b8e80941Smrg   if (!(OPCS(DST) ||
1468b8e80941Smrg         OPCS(LRP) ||
1469b8e80941Smrg         OPCS(FRC) ||
1470b8e80941Smrg         OPCS(POW) ||
1471b8e80941Smrg         OPCS(LIT) ||
1472b8e80941Smrg         OPCS(EXP) ||
1473b8e80941Smrg         OPCS(LOG) ||
1474b8e80941Smrg         OPCS(DP4) ||
1475b8e80941Smrg         OPCS(DP3) ||
1476b8e80941Smrg         OPCS(DP2) ||
1477b8e80941Smrg         OPCS(FLR) ||
1478b8e80941Smrg         OPCS(CEIL) ||
1479b8e80941Smrg         OPCS(TRUNC) ||
1480b8e80941Smrg         OPCS(TXP) ||
1481b8e80941Smrg         ctx.two_side_colors ||
1482b8e80941Smrg         ctx.saturate))
1483b8e80941Smrg      return NULL;
1484b8e80941Smrg
1485b8e80941Smrg#if 0  /* debug */
1486b8e80941Smrg   _debug_printf("BEFORE:");
1487b8e80941Smrg   tgsi_dump(tokens, 0);
1488b8e80941Smrg#endif
1489b8e80941Smrg
1490b8e80941Smrg   numtmp = 0;
1491b8e80941Smrg   newlen = tgsi_num_tokens(tokens);
1492b8e80941Smrg   if (OPCS(DST)) {
1493b8e80941Smrg      newlen += DST_GROW * OPCS(DST);
1494b8e80941Smrg      numtmp = MAX2(numtmp, DST_TMP);
1495b8e80941Smrg   }
1496b8e80941Smrg   if (OPCS(LRP)) {
1497b8e80941Smrg      newlen += LRP_GROW * OPCS(LRP);
1498b8e80941Smrg      numtmp = MAX2(numtmp, LRP_TMP);
1499b8e80941Smrg   }
1500b8e80941Smrg   if (OPCS(FRC)) {
1501b8e80941Smrg      newlen += FRC_GROW * OPCS(FRC);
1502b8e80941Smrg      numtmp = MAX2(numtmp, FRC_TMP);
1503b8e80941Smrg   }
1504b8e80941Smrg   if (OPCS(POW)) {
1505b8e80941Smrg      newlen += POW_GROW * OPCS(POW);
1506b8e80941Smrg      numtmp = MAX2(numtmp, POW_TMP);
1507b8e80941Smrg   }
1508b8e80941Smrg   if (OPCS(LIT)) {
1509b8e80941Smrg      newlen += LIT_GROW * OPCS(LIT);
1510b8e80941Smrg      numtmp = MAX2(numtmp, LIT_TMP);
1511b8e80941Smrg   }
1512b8e80941Smrg   if (OPCS(EXP)) {
1513b8e80941Smrg      newlen += EXP_GROW * OPCS(EXP);
1514b8e80941Smrg      numtmp = MAX2(numtmp, EXP_TMP);
1515b8e80941Smrg   }
1516b8e80941Smrg   if (OPCS(LOG)) {
1517b8e80941Smrg      newlen += LOG_GROW * OPCS(LOG);
1518b8e80941Smrg      numtmp = MAX2(numtmp, LOG_TMP);
1519b8e80941Smrg   }
1520b8e80941Smrg   if (OPCS(DP4)) {
1521b8e80941Smrg      newlen += DP4_GROW * OPCS(DP4);
1522b8e80941Smrg      numtmp = MAX2(numtmp, DOTP_TMP);
1523b8e80941Smrg   }
1524b8e80941Smrg   if (OPCS(DP3)) {
1525b8e80941Smrg      newlen += DP3_GROW * OPCS(DP3);
1526b8e80941Smrg      numtmp = MAX2(numtmp, DOTP_TMP);
1527b8e80941Smrg   }
1528b8e80941Smrg   if (OPCS(DP2)) {
1529b8e80941Smrg      newlen += DP2_GROW * OPCS(DP2);
1530b8e80941Smrg      numtmp = MAX2(numtmp, DOTP_TMP);
1531b8e80941Smrg   }
1532b8e80941Smrg   if (OPCS(FLR)) {
1533b8e80941Smrg      newlen += FLR_GROW * OPCS(FLR);
1534b8e80941Smrg      numtmp = MAX2(numtmp, FLR_TMP);
1535b8e80941Smrg   }
1536b8e80941Smrg   if (OPCS(CEIL)) {
1537b8e80941Smrg      newlen += CEIL_GROW * OPCS(CEIL);
1538b8e80941Smrg      numtmp = MAX2(numtmp, CEIL_TMP);
1539b8e80941Smrg   }
1540b8e80941Smrg   if (OPCS(TRUNC)) {
1541b8e80941Smrg      newlen += TRUNC_GROW * OPCS(TRUNC);
1542b8e80941Smrg      numtmp = MAX2(numtmp, TRUNC_TMP);
1543b8e80941Smrg   }
1544b8e80941Smrg   if (ctx.saturate || config->lower_TXP) {
1545b8e80941Smrg      int n = 0;
1546b8e80941Smrg
1547b8e80941Smrg      if (ctx.saturate) {
1548b8e80941Smrg         n = info->opcode_count[TGSI_OPCODE_TEX] +
1549b8e80941Smrg            info->opcode_count[TGSI_OPCODE_TXP] +
1550b8e80941Smrg            info->opcode_count[TGSI_OPCODE_TXB] +
1551b8e80941Smrg            info->opcode_count[TGSI_OPCODE_TXB2] +
1552b8e80941Smrg            info->opcode_count[TGSI_OPCODE_TXL];
1553b8e80941Smrg      } else if (config->lower_TXP) {
1554b8e80941Smrg          n = info->opcode_count[TGSI_OPCODE_TXP];
1555b8e80941Smrg      }
1556b8e80941Smrg
1557b8e80941Smrg      newlen += SAMP_GROW * n;
1558b8e80941Smrg      numtmp = MAX2(numtmp, SAMP_TMP);
1559b8e80941Smrg   }
1560b8e80941Smrg
1561b8e80941Smrg   /* specifically don't include two_side_colors temps in the count: */
1562b8e80941Smrg   ctx.numtmp = numtmp;
1563b8e80941Smrg
1564b8e80941Smrg   if (ctx.two_side_colors) {
1565b8e80941Smrg      newlen += TWOSIDE_GROW(ctx.two_side_colors);
1566b8e80941Smrg      /* note: we permanently consume temp regs, re-writing references
1567b8e80941Smrg       * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1568b8e80941Smrg       * instruction that selects which varying to use):
1569b8e80941Smrg       */
1570b8e80941Smrg      numtmp += ctx.two_side_colors;
1571b8e80941Smrg   }
1572b8e80941Smrg
1573b8e80941Smrg   newlen += 2 * numtmp;
1574b8e80941Smrg   newlen += 5;        /* immediate */
1575b8e80941Smrg
1576b8e80941Smrg   newtoks = tgsi_alloc_tokens(newlen);
1577b8e80941Smrg   if (!newtoks)
1578b8e80941Smrg      return NULL;
1579b8e80941Smrg
1580b8e80941Smrg   tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1581b8e80941Smrg
1582b8e80941Smrg   tgsi_scan_shader(newtoks, info);
1583b8e80941Smrg
1584b8e80941Smrg#if 0  /* debug */
1585b8e80941Smrg   _debug_printf("AFTER:");
1586b8e80941Smrg   tgsi_dump(newtoks, 0);
1587b8e80941Smrg#endif
1588b8e80941Smrg
1589b8e80941Smrg   return newtoks;
1590b8e80941Smrg}
1591