101e04c3fSmrg/*
201e04c3fSmrg * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2001e04c3fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2101e04c3fSmrg * SOFTWARE.
2201e04c3fSmrg *
2301e04c3fSmrg * Authors:
2401e04c3fSmrg *    Rob Clark <robclark@freedesktop.org>
2501e04c3fSmrg */
2601e04c3fSmrg
2701e04c3fSmrg#include "tgsi/tgsi_transform.h"
2801e04c3fSmrg#include "tgsi/tgsi_scan.h"
2901e04c3fSmrg#include "tgsi/tgsi_dump.h"
3001e04c3fSmrg
317ec681f3Smrg#include "util/compiler.h"
3201e04c3fSmrg#include "util/u_debug.h"
3301e04c3fSmrg#include "util/u_math.h"
3401e04c3fSmrg
3501e04c3fSmrg#include "tgsi_lowering.h"
3601e04c3fSmrg
3701e04c3fSmrgstruct tgsi_lowering_context {
3801e04c3fSmrg   struct tgsi_transform_context base;
3901e04c3fSmrg   const struct tgsi_lowering_config *config;
4001e04c3fSmrg   struct tgsi_shader_info *info;
4101e04c3fSmrg   unsigned two_side_colors;
4201e04c3fSmrg   unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
4301e04c3fSmrg   unsigned color_base;  /* base register for chosen COLOR/BCOLOR's */
4401e04c3fSmrg   int face_idx;
4501e04c3fSmrg   unsigned numtmp;
4601e04c3fSmrg   struct {
4701e04c3fSmrg      struct tgsi_full_src_register src;
4801e04c3fSmrg      struct tgsi_full_dst_register dst;
4901e04c3fSmrg   } tmp[2];
5001e04c3fSmrg#define A 0
5101e04c3fSmrg#define B 1
5201e04c3fSmrg   struct tgsi_full_src_register imm;
5301e04c3fSmrg   int emitted_decls;
5401e04c3fSmrg   unsigned saturate;
5501e04c3fSmrg};
5601e04c3fSmrg
5701e04c3fSmrgstatic inline struct tgsi_lowering_context *
5801e04c3fSmrgtgsi_lowering_context(struct tgsi_transform_context *tctx)
5901e04c3fSmrg{
6001e04c3fSmrg   return (struct tgsi_lowering_context *)tctx;
6101e04c3fSmrg}
6201e04c3fSmrg
6301e04c3fSmrg/*
6401e04c3fSmrg * Utility helpers:
6501e04c3fSmrg */
6601e04c3fSmrg
6701e04c3fSmrgstatic void
6801e04c3fSmrgreg_dst(struct tgsi_full_dst_register *dst,
6901e04c3fSmrg	const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
7001e04c3fSmrg{
7101e04c3fSmrg   *dst = *orig_dst;
7201e04c3fSmrg   dst->Register.WriteMask &= wrmask;
7301e04c3fSmrg   assert(dst->Register.WriteMask);
7401e04c3fSmrg}
7501e04c3fSmrg
7601e04c3fSmrgstatic inline void
7701e04c3fSmrgget_swiz(unsigned *swiz, const struct tgsi_src_register *src)
7801e04c3fSmrg{
7901e04c3fSmrg   swiz[0] = src->SwizzleX;
8001e04c3fSmrg   swiz[1] = src->SwizzleY;
8101e04c3fSmrg   swiz[2] = src->SwizzleZ;
8201e04c3fSmrg   swiz[3] = src->SwizzleW;
8301e04c3fSmrg}
8401e04c3fSmrg
8501e04c3fSmrgstatic void
8601e04c3fSmrgreg_src(struct tgsi_full_src_register *src,
8701e04c3fSmrg	const struct tgsi_full_src_register *orig_src,
8801e04c3fSmrg	unsigned sx, unsigned sy, unsigned sz, unsigned sw)
8901e04c3fSmrg{
9001e04c3fSmrg   unsigned swiz[4];
9101e04c3fSmrg   get_swiz(swiz, &orig_src->Register);
9201e04c3fSmrg   *src = *orig_src;
9301e04c3fSmrg   src->Register.SwizzleX = swiz[sx];
9401e04c3fSmrg   src->Register.SwizzleY = swiz[sy];
9501e04c3fSmrg   src->Register.SwizzleZ = swiz[sz];
9601e04c3fSmrg   src->Register.SwizzleW = swiz[sw];
9701e04c3fSmrg}
9801e04c3fSmrg
9901e04c3fSmrg#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
10001e04c3fSmrg#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
10101e04c3fSmrg      TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
10201e04c3fSmrg
10301e04c3fSmrg/*
10401e04c3fSmrg * if (dst.x aliases src.x) {
10501e04c3fSmrg *   MOV tmpA.x, src.x
10601e04c3fSmrg *   src = tmpA
10701e04c3fSmrg * }
10801e04c3fSmrg * COS dst.x, src.x
10901e04c3fSmrg * SIN dst.y, src.x
11001e04c3fSmrg * MOV dst.zw, imm{0.0, 1.0}
11101e04c3fSmrg */
11201e04c3fSmrgstatic bool
11301e04c3fSmrgaliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
11401e04c3fSmrg	const struct tgsi_full_src_register *src, unsigned src_mask)
11501e04c3fSmrg{
11601e04c3fSmrg   if ((dst->Register.File == src->Register.File) &&
11701e04c3fSmrg       (dst->Register.Index == src->Register.Index)) {
11801e04c3fSmrg      unsigned i, actual_mask = 0;
11901e04c3fSmrg      unsigned swiz[4];
12001e04c3fSmrg      get_swiz(swiz, &src->Register);
12101e04c3fSmrg      for (i = 0; i < 4; i++)
12201e04c3fSmrg         if (src_mask & (1 << i))
12301e04c3fSmrg            actual_mask |= (1 << swiz[i]);
12401e04c3fSmrg      if (actual_mask & dst_mask)
12501e04c3fSmrg         return true;
12601e04c3fSmrg   }
12701e04c3fSmrg   return false;
12801e04c3fSmrg}
12901e04c3fSmrg
13001e04c3fSmrgstatic void
13101e04c3fSmrgcreate_mov(struct tgsi_transform_context *tctx,
13201e04c3fSmrg           const struct tgsi_full_dst_register *dst,
13301e04c3fSmrg           const struct tgsi_full_src_register *src,
13401e04c3fSmrg           unsigned mask, unsigned saturate)
13501e04c3fSmrg{
13601e04c3fSmrg   struct tgsi_full_instruction new_inst;
13701e04c3fSmrg
13801e04c3fSmrg   new_inst = tgsi_default_full_instruction();
13901e04c3fSmrg   new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
14001e04c3fSmrg   new_inst.Instruction.Saturate = saturate;
14101e04c3fSmrg   new_inst.Instruction.NumDstRegs = 1;
14201e04c3fSmrg   reg_dst(&new_inst.Dst[0], dst, mask);
14301e04c3fSmrg   new_inst.Instruction.NumSrcRegs = 1;
14401e04c3fSmrg   reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
14501e04c3fSmrg   tctx->emit_instruction(tctx, &new_inst);
14601e04c3fSmrg}
14701e04c3fSmrg
14801e04c3fSmrg/* to help calculate # of tgsi tokens for a lowering.. we assume
14901e04c3fSmrg * the worst case, ie. removed instructions don't have ADDR[] or
15001e04c3fSmrg * anything which increases the # of tokens per src/dst and the
15101e04c3fSmrg * inserted instructions do.
15201e04c3fSmrg *
15301e04c3fSmrg * OINST() - old instruction
15401e04c3fSmrg *    1         : instruction itself
15501e04c3fSmrg *    1         : dst
15601e04c3fSmrg *    1 * nargs : srcN
15701e04c3fSmrg *
15801e04c3fSmrg * NINST() - new instruction
15901e04c3fSmrg *    1         : instruction itself
16001e04c3fSmrg *    2         : dst
16101e04c3fSmrg *    2 * nargs : srcN
16201e04c3fSmrg */
16301e04c3fSmrg
16401e04c3fSmrg#define OINST(nargs)  (1 + 1 + 1 * (nargs))
16501e04c3fSmrg#define NINST(nargs)  (1 + 2 + 2 * (nargs))
16601e04c3fSmrg
16701e04c3fSmrg/*
16801e04c3fSmrg * Lowering Translators:
16901e04c3fSmrg */
17001e04c3fSmrg
17101e04c3fSmrg/* DST - Distance Vector
17201e04c3fSmrg *   dst.x = 1.0
17301e04c3fSmrg *   dst.y = src0.y \times src1.y
17401e04c3fSmrg *   dst.z = src0.z
17501e04c3fSmrg *   dst.w = src1.w
17601e04c3fSmrg *
17701e04c3fSmrg * ; note: could be more clever and use just a single temp
17801e04c3fSmrg * ;       if I was clever enough to re-write the swizzles.
17901e04c3fSmrg * ; needs: 2 tmp, imm{1.0}
18001e04c3fSmrg * if (dst.y aliases src0.z) {
18101e04c3fSmrg *   MOV tmpA.yz, src0.yz
18201e04c3fSmrg *   src0 = tmpA
18301e04c3fSmrg * }
18401e04c3fSmrg * if (dst.yz aliases src1.w) {
18501e04c3fSmrg *   MOV tmpB.yw, src1.yw
18601e04c3fSmrg *   src1 = tmpB
18701e04c3fSmrg * }
18801e04c3fSmrg * MUL dst.y, src0.y, src1.y
18901e04c3fSmrg * MOV dst.z, src0.z
19001e04c3fSmrg * MOV dst.w, src1.w
19101e04c3fSmrg * MOV dst.x, imm{1.0}
19201e04c3fSmrg */
19301e04c3fSmrg#define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
19401e04c3fSmrg		NINST(1) + NINST(1) - OINST(2))
19501e04c3fSmrg#define DST_TMP  2
19601e04c3fSmrgstatic void
19701e04c3fSmrgtransform_dst(struct tgsi_transform_context *tctx,
19801e04c3fSmrg              struct tgsi_full_instruction *inst)
19901e04c3fSmrg{
20001e04c3fSmrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
20101e04c3fSmrg   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
20201e04c3fSmrg   struct tgsi_full_src_register *src0 = &inst->Src[0];
20301e04c3fSmrg   struct tgsi_full_src_register *src1 = &inst->Src[1];
20401e04c3fSmrg   struct tgsi_full_instruction new_inst;
20501e04c3fSmrg
20601e04c3fSmrg   if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
20701e04c3fSmrg      create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
20801e04c3fSmrg      src0 = &ctx->tmp[A].src;
20901e04c3fSmrg   }
21001e04c3fSmrg
21101e04c3fSmrg   if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
21201e04c3fSmrg      create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
21301e04c3fSmrg      src1 = &ctx->tmp[B].src;
21401e04c3fSmrg   }
21501e04c3fSmrg
21601e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
21701e04c3fSmrg      /* MUL dst.y, src0.y, src1.y */
21801e04c3fSmrg      new_inst = tgsi_default_full_instruction();
21901e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
22001e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
22101e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
22201e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 2;
22301e04c3fSmrg      reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
22401e04c3fSmrg      reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
22501e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
22601e04c3fSmrg   }
22701e04c3fSmrg
22801e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
22901e04c3fSmrg      /* MOV dst.z, src0.z */
23001e04c3fSmrg      new_inst = tgsi_default_full_instruction();
23101e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
23201e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
23301e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
23401e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
23501e04c3fSmrg      reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
23601e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
23701e04c3fSmrg   }
23801e04c3fSmrg
23901e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
24001e04c3fSmrg      /* MOV dst.w, src1.w */
24101e04c3fSmrg      new_inst = tgsi_default_full_instruction();
24201e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
24301e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
24401e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
24501e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
24601e04c3fSmrg      reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
24701e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
24801e04c3fSmrg   }
24901e04c3fSmrg
25001e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
25101e04c3fSmrg      /* MOV dst.x, imm{1.0} */
25201e04c3fSmrg      new_inst = tgsi_default_full_instruction();
25301e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
25401e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
25501e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
25601e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
25701e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
25801e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
25901e04c3fSmrg   }
26001e04c3fSmrg}
26101e04c3fSmrg
26201e04c3fSmrg/* LRP - Linear Interpolate
26301e04c3fSmrg *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
26401e04c3fSmrg *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
26501e04c3fSmrg *  dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
26601e04c3fSmrg *  dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
26701e04c3fSmrg *
26801e04c3fSmrg * This becomes: src0 \times src1 + src2 - src0 \times src2, which
26901e04c3fSmrg * can then become: src0 \times src1 - (src0 \times src2 - src2)
27001e04c3fSmrg *
27101e04c3fSmrg * ; needs: 1 tmp
27201e04c3fSmrg * MAD tmpA, src0, src2, -src2
27301e04c3fSmrg * MAD dst, src0, src1, -tmpA
27401e04c3fSmrg */
27501e04c3fSmrg#define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
27601e04c3fSmrg#define LRP_TMP  1
27701e04c3fSmrgstatic void
27801e04c3fSmrgtransform_lrp(struct tgsi_transform_context *tctx,
27901e04c3fSmrg              struct tgsi_full_instruction *inst)
28001e04c3fSmrg{
28101e04c3fSmrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
28201e04c3fSmrg   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
28301e04c3fSmrg   struct tgsi_full_src_register *src0 = &inst->Src[0];
28401e04c3fSmrg   struct tgsi_full_src_register *src1 = &inst->Src[1];
28501e04c3fSmrg   struct tgsi_full_src_register *src2 = &inst->Src[2];
28601e04c3fSmrg   struct tgsi_full_instruction new_inst;
28701e04c3fSmrg
28801e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
28901e04c3fSmrg      /* MAD tmpA, src0, src2, -src2 */
29001e04c3fSmrg      new_inst = tgsi_default_full_instruction();
29101e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
29201e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
29301e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
29401e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 3;
29501e04c3fSmrg      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
29601e04c3fSmrg      reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
29701e04c3fSmrg      reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
29801e04c3fSmrg      new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
29901e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
30001e04c3fSmrg
30101e04c3fSmrg      /* MAD dst, src0, src1, -tmpA */
30201e04c3fSmrg      new_inst = tgsi_default_full_instruction();
30301e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
30401e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
30501e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
30601e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 3;
30701e04c3fSmrg      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
30801e04c3fSmrg      reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
30901e04c3fSmrg      reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
31001e04c3fSmrg      new_inst.Src[2].Register.Negate = true;
31101e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
31201e04c3fSmrg   }
31301e04c3fSmrg}
31401e04c3fSmrg
31501e04c3fSmrg/* FRC - Fraction
31601e04c3fSmrg *  dst.x = src.x - \lfloor src.x\rfloor
31701e04c3fSmrg *  dst.y = src.y - \lfloor src.y\rfloor
31801e04c3fSmrg *  dst.z = src.z - \lfloor src.z\rfloor
31901e04c3fSmrg *  dst.w = src.w - \lfloor src.w\rfloor
32001e04c3fSmrg *
32101e04c3fSmrg * ; needs: 1 tmp
32201e04c3fSmrg * FLR tmpA, src
32301e04c3fSmrg * SUB dst, src, tmpA
32401e04c3fSmrg */
32501e04c3fSmrg#define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
32601e04c3fSmrg#define FRC_TMP  1
32701e04c3fSmrgstatic void
32801e04c3fSmrgtransform_frc(struct tgsi_transform_context *tctx,
32901e04c3fSmrg              struct tgsi_full_instruction *inst)
33001e04c3fSmrg{
33101e04c3fSmrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
33201e04c3fSmrg   struct tgsi_full_dst_register *dst = &inst->Dst[0];
33301e04c3fSmrg   struct tgsi_full_src_register *src = &inst->Src[0];
33401e04c3fSmrg   struct tgsi_full_instruction new_inst;
33501e04c3fSmrg
33601e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
33701e04c3fSmrg      /* FLR tmpA, src */
33801e04c3fSmrg      new_inst = tgsi_default_full_instruction();
33901e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
34001e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
34101e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
34201e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
34301e04c3fSmrg      reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
34401e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
34501e04c3fSmrg
34601e04c3fSmrg      /* SUB dst, src, tmpA */
34701e04c3fSmrg      new_inst = tgsi_default_full_instruction();
34801e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
34901e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
35001e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
35101e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 2;
35201e04c3fSmrg      reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
35301e04c3fSmrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
35401e04c3fSmrg      new_inst.Src[1].Register.Negate = 1;
35501e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
35601e04c3fSmrg   }
35701e04c3fSmrg}
35801e04c3fSmrg
35901e04c3fSmrg/* POW - Power
36001e04c3fSmrg *  dst.x = src0.x^{src1.x}
36101e04c3fSmrg *  dst.y = src0.x^{src1.x}
36201e04c3fSmrg *  dst.z = src0.x^{src1.x}
36301e04c3fSmrg *  dst.w = src0.x^{src1.x}
36401e04c3fSmrg *
36501e04c3fSmrg * ; needs: 1 tmp
36601e04c3fSmrg * LG2 tmpA.x, src0.x
36701e04c3fSmrg * MUL tmpA.x, src1.x, tmpA.x
36801e04c3fSmrg * EX2 dst, tmpA.x
36901e04c3fSmrg */
37001e04c3fSmrg#define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
37101e04c3fSmrg#define POW_TMP  1
37201e04c3fSmrgstatic void
37301e04c3fSmrgtransform_pow(struct tgsi_transform_context *tctx,
37401e04c3fSmrg              struct tgsi_full_instruction *inst)
37501e04c3fSmrg{
37601e04c3fSmrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
37701e04c3fSmrg   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
37801e04c3fSmrg   struct tgsi_full_src_register *src0 = &inst->Src[0];
37901e04c3fSmrg   struct tgsi_full_src_register *src1 = &inst->Src[1];
38001e04c3fSmrg   struct tgsi_full_instruction new_inst;
38101e04c3fSmrg
38201e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
38301e04c3fSmrg      /* LG2 tmpA.x, src0.x */
38401e04c3fSmrg      new_inst = tgsi_default_full_instruction();
38501e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
38601e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
38701e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
38801e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
38901e04c3fSmrg      reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
39001e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
39101e04c3fSmrg
39201e04c3fSmrg      /* MUL tmpA.x, src1.x, tmpA.x */
39301e04c3fSmrg      new_inst = tgsi_default_full_instruction();
39401e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
39501e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
39601e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
39701e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 2;
39801e04c3fSmrg      reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
39901e04c3fSmrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
40001e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
40101e04c3fSmrg
40201e04c3fSmrg      /* EX2 dst, tmpA.x */
40301e04c3fSmrg      new_inst = tgsi_default_full_instruction();
40401e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
40501e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
40601e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
40701e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
40801e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
40901e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
41001e04c3fSmrg   }
41101e04c3fSmrg}
41201e04c3fSmrg
41301e04c3fSmrg/* LIT - Light Coefficients
41401e04c3fSmrg *  dst.x = 1.0
41501e04c3fSmrg *  dst.y = max(src.x, 0.0)
41601e04c3fSmrg *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
41701e04c3fSmrg *  dst.w = 1.0
41801e04c3fSmrg *
41901e04c3fSmrg * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
42001e04c3fSmrg * MAX tmpA.xy, src.xy, imm{0.0}
42101e04c3fSmrg * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
42201e04c3fSmrg * LG2 tmpA.y, tmpA.y
42301e04c3fSmrg * MUL tmpA.y, tmpA.z, tmpA.y
42401e04c3fSmrg * EX2 tmpA.y, tmpA.y
42501e04c3fSmrg * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
42601e04c3fSmrg * MOV dst.yz, tmpA.xy
42701e04c3fSmrg * MOV dst.xw, imm{1.0}
42801e04c3fSmrg */
42901e04c3fSmrg#define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
43001e04c3fSmrg		NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
43101e04c3fSmrg#define LIT_TMP  1
43201e04c3fSmrgstatic void
43301e04c3fSmrgtransform_lit(struct tgsi_transform_context *tctx,
43401e04c3fSmrg              struct tgsi_full_instruction *inst)
43501e04c3fSmrg{
43601e04c3fSmrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
43701e04c3fSmrg   struct tgsi_full_dst_register *dst = &inst->Dst[0];
43801e04c3fSmrg   struct tgsi_full_src_register *src = &inst->Src[0];
43901e04c3fSmrg   struct tgsi_full_instruction new_inst;
44001e04c3fSmrg
44101e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
44201e04c3fSmrg      /* MAX tmpA.xy, src.xy, imm{0.0} */
44301e04c3fSmrg      new_inst = tgsi_default_full_instruction();
44401e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
44501e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
44601e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
44701e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 2;
44801e04c3fSmrg      reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
44901e04c3fSmrg      reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
45001e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
45101e04c3fSmrg
45201e04c3fSmrg      /* MIN tmpA.z, src.w, imm{128.0} */
45301e04c3fSmrg      new_inst = tgsi_default_full_instruction();
45401e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MIN;
45501e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
45601e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
45701e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 2;
45801e04c3fSmrg      reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
45901e04c3fSmrg      reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
46001e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
46101e04c3fSmrg
46201e04c3fSmrg      /* MAX tmpA.z, tmpA.z, -imm{128.0} */
46301e04c3fSmrg      new_inst = tgsi_default_full_instruction();
46401e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
46501e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
46601e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
46701e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 2;
46801e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _));
46901e04c3fSmrg      reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
47001e04c3fSmrg      new_inst.Src[1].Register.Negate = true;
47101e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
47201e04c3fSmrg
47301e04c3fSmrg      /* LG2 tmpA.y, tmpA.y */
47401e04c3fSmrg      new_inst = tgsi_default_full_instruction();
47501e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
47601e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
47701e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
47801e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
47901e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
48001e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
48101e04c3fSmrg
48201e04c3fSmrg      /* MUL tmpA.y, tmpA.z, tmpA.y */
48301e04c3fSmrg      new_inst = tgsi_default_full_instruction();
48401e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
48501e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
48601e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
48701e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 2;
48801e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
48901e04c3fSmrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
49001e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
49101e04c3fSmrg
49201e04c3fSmrg      /* EX2 tmpA.y, tmpA.y */
49301e04c3fSmrg      new_inst = tgsi_default_full_instruction();
49401e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
49501e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
49601e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
49701e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
49801e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
49901e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
50001e04c3fSmrg
50101e04c3fSmrg      /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
50201e04c3fSmrg      new_inst = tgsi_default_full_instruction();
50301e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
50401e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
50501e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
50601e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 3;
50701e04c3fSmrg      reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
50801e04c3fSmrg      new_inst.Src[0].Register.Negate = true;
50901e04c3fSmrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
51001e04c3fSmrg      reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
51101e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
51201e04c3fSmrg
51301e04c3fSmrg      /* MOV dst.yz, tmpA.xy */
51401e04c3fSmrg      new_inst = tgsi_default_full_instruction();
51501e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
51601e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
51701e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
51801e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
51901e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
52001e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
52101e04c3fSmrg   }
52201e04c3fSmrg
52301e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
52401e04c3fSmrg      /* MOV dst.xw, imm{1.0} */
52501e04c3fSmrg      new_inst = tgsi_default_full_instruction();
52601e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
52701e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
52801e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
52901e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
53001e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
53101e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
53201e04c3fSmrg   }
53301e04c3fSmrg}
53401e04c3fSmrg
53501e04c3fSmrg/* EXP - Approximate Exponential Base 2
53601e04c3fSmrg *  dst.x = 2^{\lfloor src.x\rfloor}
53701e04c3fSmrg *  dst.y = src.x - \lfloor src.x\rfloor
53801e04c3fSmrg *  dst.z = 2^{src.x}
53901e04c3fSmrg *  dst.w = 1.0
54001e04c3fSmrg *
54101e04c3fSmrg * ; needs: 1 tmp, imm{1.0}
54201e04c3fSmrg * if (lowering FLR) {
54301e04c3fSmrg *   FRC tmpA.x, src.x
54401e04c3fSmrg *   SUB tmpA.x, src.x, tmpA.x
54501e04c3fSmrg * } else {
54601e04c3fSmrg *   FLR tmpA.x, src.x
54701e04c3fSmrg * }
54801e04c3fSmrg * EX2 tmpA.y, src.x
54901e04c3fSmrg * SUB dst.y, src.x, tmpA.x
55001e04c3fSmrg * EX2 dst.x, tmpA.x
55101e04c3fSmrg * MOV dst.z, tmpA.y
55201e04c3fSmrg * MOV dst.w, imm{1.0}
55301e04c3fSmrg */
55401e04c3fSmrg#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
55501e04c3fSmrg		NINST(1)+ NINST(1) - OINST(1))
55601e04c3fSmrg#define EXP_TMP  1
55701e04c3fSmrgstatic void
55801e04c3fSmrgtransform_exp(struct tgsi_transform_context *tctx,
55901e04c3fSmrg              struct tgsi_full_instruction *inst)
56001e04c3fSmrg{
56101e04c3fSmrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
56201e04c3fSmrg   struct tgsi_full_dst_register *dst = &inst->Dst[0];
56301e04c3fSmrg   struct tgsi_full_src_register *src = &inst->Src[0];
56401e04c3fSmrg   struct tgsi_full_instruction new_inst;
56501e04c3fSmrg
56601e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
56701e04c3fSmrg      if (ctx->config->lower_FLR) {
56801e04c3fSmrg         /* FRC tmpA.x, src.x */
56901e04c3fSmrg         new_inst = tgsi_default_full_instruction();
57001e04c3fSmrg         new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
57101e04c3fSmrg         new_inst.Instruction.NumDstRegs = 1;
57201e04c3fSmrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
57301e04c3fSmrg         new_inst.Instruction.NumSrcRegs = 1;
57401e04c3fSmrg         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
57501e04c3fSmrg         tctx->emit_instruction(tctx, &new_inst);
57601e04c3fSmrg
57701e04c3fSmrg         /* SUB tmpA.x, src.x, tmpA.x */
57801e04c3fSmrg         new_inst = tgsi_default_full_instruction();
57901e04c3fSmrg         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
58001e04c3fSmrg         new_inst.Instruction.NumDstRegs = 1;
58101e04c3fSmrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
58201e04c3fSmrg         new_inst.Instruction.NumSrcRegs = 2;
58301e04c3fSmrg         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
58401e04c3fSmrg         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
58501e04c3fSmrg         new_inst.Src[1].Register.Negate = 1;
58601e04c3fSmrg         tctx->emit_instruction(tctx, &new_inst);
58701e04c3fSmrg     } else {
58801e04c3fSmrg         /* FLR tmpA.x, src.x */
58901e04c3fSmrg         new_inst = tgsi_default_full_instruction();
59001e04c3fSmrg         new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
59101e04c3fSmrg         new_inst.Instruction.NumDstRegs = 1;
59201e04c3fSmrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
59301e04c3fSmrg         new_inst.Instruction.NumSrcRegs = 1;
59401e04c3fSmrg         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
59501e04c3fSmrg         tctx->emit_instruction(tctx, &new_inst);
59601e04c3fSmrg      }
59701e04c3fSmrg   }
59801e04c3fSmrg
59901e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
60001e04c3fSmrg      /* EX2 tmpA.y, src.x */
60101e04c3fSmrg      new_inst = tgsi_default_full_instruction();
60201e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
60301e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
60401e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
60501e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
60601e04c3fSmrg      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
60701e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
60801e04c3fSmrg   }
60901e04c3fSmrg
61001e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
61101e04c3fSmrg      /* SUB dst.y, src.x, tmpA.x */
61201e04c3fSmrg      new_inst = tgsi_default_full_instruction();
61301e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
61401e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
61501e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
61601e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 2;
61701e04c3fSmrg      reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
61801e04c3fSmrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
61901e04c3fSmrg      new_inst.Src[1].Register.Negate = 1;
62001e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
62101e04c3fSmrg   }
62201e04c3fSmrg
62301e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
62401e04c3fSmrg      /* EX2 dst.x, tmpA.x */
62501e04c3fSmrg      new_inst = tgsi_default_full_instruction();
62601e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
62701e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
62801e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
62901e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
63001e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
63101e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
63201e04c3fSmrg   }
63301e04c3fSmrg
63401e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
63501e04c3fSmrg      /* MOV dst.z, tmpA.y */
63601e04c3fSmrg      new_inst = tgsi_default_full_instruction();
63701e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
63801e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
63901e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
64001e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
64101e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
64201e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
64301e04c3fSmrg   }
64401e04c3fSmrg
64501e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
64601e04c3fSmrg      /* MOV dst.w, imm{1.0} */
64701e04c3fSmrg      new_inst = tgsi_default_full_instruction();
64801e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
64901e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
65001e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
65101e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
65201e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
65301e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
65401e04c3fSmrg   }
65501e04c3fSmrg}
65601e04c3fSmrg
65701e04c3fSmrg/* LOG - Approximate Logarithm Base 2
65801e04c3fSmrg *  dst.x = \lfloor\log_2{|src.x|}\rfloor
65901e04c3fSmrg *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
66001e04c3fSmrg *  dst.z = \log_2{|src.x|}
66101e04c3fSmrg *  dst.w = 1.0
66201e04c3fSmrg *
66301e04c3fSmrg * ; needs: 1 tmp, imm{1.0}
66401e04c3fSmrg * LG2 tmpA.x, |src.x|
66501e04c3fSmrg * if (lowering FLR) {
66601e04c3fSmrg *   FRC tmpA.y, tmpA.x
66701e04c3fSmrg *   SUB tmpA.y, tmpA.x, tmpA.y
66801e04c3fSmrg * } else {
66901e04c3fSmrg *   FLR tmpA.y, tmpA.x
67001e04c3fSmrg * }
67101e04c3fSmrg * EX2 tmpA.z, tmpA.y
67201e04c3fSmrg * RCP tmpA.z, tmpA.z
67301e04c3fSmrg * MUL dst.y, |src.x|, tmpA.z
67401e04c3fSmrg * MOV dst.xz, tmpA.yx
67501e04c3fSmrg * MOV dst.w, imm{1.0}
67601e04c3fSmrg */
67701e04c3fSmrg#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
67801e04c3fSmrg		NINST(2) + NINST(1) + NINST(1) - OINST(1))
67901e04c3fSmrg#define LOG_TMP  1
68001e04c3fSmrgstatic void
68101e04c3fSmrgtransform_log(struct tgsi_transform_context *tctx,
68201e04c3fSmrg              struct tgsi_full_instruction *inst)
68301e04c3fSmrg{
68401e04c3fSmrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
68501e04c3fSmrg   struct tgsi_full_dst_register *dst = &inst->Dst[0];
68601e04c3fSmrg   struct tgsi_full_src_register *src = &inst->Src[0];
68701e04c3fSmrg   struct tgsi_full_instruction new_inst;
68801e04c3fSmrg
68901e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
69001e04c3fSmrg      /* LG2 tmpA.x, |src.x| */
69101e04c3fSmrg      new_inst = tgsi_default_full_instruction();
69201e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
69301e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
69401e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
69501e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
69601e04c3fSmrg      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
69701e04c3fSmrg      new_inst.Src[0].Register.Absolute = true;
69801e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
69901e04c3fSmrg   }
70001e04c3fSmrg
70101e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
70201e04c3fSmrg      if (ctx->config->lower_FLR) {
70301e04c3fSmrg         /* FRC tmpA.y, tmpA.x */
70401e04c3fSmrg         new_inst = tgsi_default_full_instruction();
70501e04c3fSmrg         new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
70601e04c3fSmrg         new_inst.Instruction.NumDstRegs = 1;
70701e04c3fSmrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
70801e04c3fSmrg         new_inst.Instruction.NumSrcRegs = 1;
70901e04c3fSmrg         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
71001e04c3fSmrg         tctx->emit_instruction(tctx, &new_inst);
71101e04c3fSmrg
71201e04c3fSmrg         /* SUB tmpA.y, tmpA.x, tmpA.y */
71301e04c3fSmrg         new_inst = tgsi_default_full_instruction();
71401e04c3fSmrg         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
71501e04c3fSmrg         new_inst.Instruction.NumDstRegs = 1;
71601e04c3fSmrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
71701e04c3fSmrg         new_inst.Instruction.NumSrcRegs = 2;
71801e04c3fSmrg         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
71901e04c3fSmrg         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
72001e04c3fSmrg         new_inst.Src[1].Register.Negate = 1;
72101e04c3fSmrg         tctx->emit_instruction(tctx, &new_inst);
72201e04c3fSmrg      } else {
72301e04c3fSmrg         /* FLR tmpA.y, tmpA.x */
72401e04c3fSmrg         new_inst = tgsi_default_full_instruction();
72501e04c3fSmrg         new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
72601e04c3fSmrg         new_inst.Instruction.NumDstRegs = 1;
72701e04c3fSmrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
72801e04c3fSmrg         new_inst.Instruction.NumSrcRegs = 1;
72901e04c3fSmrg         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
73001e04c3fSmrg         tctx->emit_instruction(tctx, &new_inst);
73101e04c3fSmrg      }
73201e04c3fSmrg   }
73301e04c3fSmrg
73401e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
73501e04c3fSmrg      /* EX2 tmpA.z, tmpA.y */
73601e04c3fSmrg      new_inst = tgsi_default_full_instruction();
73701e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
73801e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
73901e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
74001e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
74101e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
74201e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
74301e04c3fSmrg
74401e04c3fSmrg      /* RCP tmpA.z, tmpA.z */
74501e04c3fSmrg      new_inst = tgsi_default_full_instruction();
74601e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
74701e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
74801e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
74901e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
75001e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
75101e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
75201e04c3fSmrg
75301e04c3fSmrg      /* MUL dst.y, |src.x|, tmpA.z */
75401e04c3fSmrg      new_inst = tgsi_default_full_instruction();
75501e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
75601e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
75701e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
75801e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 2;
75901e04c3fSmrg      reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
76001e04c3fSmrg      new_inst.Src[0].Register.Absolute = true;
76101e04c3fSmrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
76201e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
76301e04c3fSmrg   }
76401e04c3fSmrg
76501e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
76601e04c3fSmrg      /* MOV dst.xz, tmpA.yx */
76701e04c3fSmrg      new_inst = tgsi_default_full_instruction();
76801e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
76901e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
77001e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
77101e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
77201e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
77301e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
77401e04c3fSmrg   }
77501e04c3fSmrg
77601e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
77701e04c3fSmrg      /* MOV dst.w, imm{1.0} */
77801e04c3fSmrg      new_inst = tgsi_default_full_instruction();
77901e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
78001e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
78101e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
78201e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
78301e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
78401e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
78501e04c3fSmrg   }
78601e04c3fSmrg}
78701e04c3fSmrg
78801e04c3fSmrg/* DP4 - 4-component Dot Product
78901e04c3fSmrg *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
79001e04c3fSmrg *
79101e04c3fSmrg * DP3 - 3-component Dot Product
79201e04c3fSmrg *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
79301e04c3fSmrg *
79401e04c3fSmrg * DP2 - 2-component Dot Product
79501e04c3fSmrg *   dst = src0.x \times src1.x + src0.y \times src1.y
79601e04c3fSmrg *
79701e04c3fSmrg * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
79801e04c3fSmrg * operations, which is what you'd prefer for a ISA that is natively
79901e04c3fSmrg * scalar.  Probably a native vector ISA would at least already have
80001e04c3fSmrg * DP4/DP3 instructions, but perhaps there is room for an alternative
80101e04c3fSmrg * translation for DP2 using vector instructions.
80201e04c3fSmrg *
80301e04c3fSmrg * ; needs: 1 tmp
80401e04c3fSmrg * MUL tmpA.x, src0.x, src1.x
80501e04c3fSmrg * MAD tmpA.x, src0.y, src1.y, tmpA.x
80601e04c3fSmrg * if (DP3 || DP4) {
80701e04c3fSmrg *   MAD tmpA.x, src0.z, src1.z, tmpA.x
80801e04c3fSmrg *   if (DP4) {
80901e04c3fSmrg *     MAD tmpA.x, src0.w, src1.w, tmpA.x
81001e04c3fSmrg *   }
81101e04c3fSmrg * }
81201e04c3fSmrg * ; fixup last instruction to replicate into dst
81301e04c3fSmrg */
81401e04c3fSmrg#define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
81501e04c3fSmrg#define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
81601e04c3fSmrg#define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
81701e04c3fSmrg#define DOTP_TMP  1
81801e04c3fSmrgstatic void
81901e04c3fSmrgtransform_dotp(struct tgsi_transform_context *tctx,
82001e04c3fSmrg               struct tgsi_full_instruction *inst)
82101e04c3fSmrg{
82201e04c3fSmrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
82301e04c3fSmrg   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
82401e04c3fSmrg   struct tgsi_full_src_register *src0 = &inst->Src[0];
82501e04c3fSmrg   struct tgsi_full_src_register *src1 = &inst->Src[1];
82601e04c3fSmrg   struct tgsi_full_instruction new_inst;
82701e04c3fSmrg   enum tgsi_opcode opcode = inst->Instruction.Opcode;
82801e04c3fSmrg
82901e04c3fSmrg   /* NOTE: any potential last instruction must replicate src on all
83001e04c3fSmrg    * components (since it could be re-written to write to final dst)
83101e04c3fSmrg    */
83201e04c3fSmrg
83301e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
83401e04c3fSmrg      /* MUL tmpA.x, src0.x, src1.x */
83501e04c3fSmrg      new_inst = tgsi_default_full_instruction();
83601e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
83701e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
83801e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
83901e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 2;
84001e04c3fSmrg      reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
84101e04c3fSmrg      reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
84201e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
84301e04c3fSmrg
84401e04c3fSmrg      /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
84501e04c3fSmrg      new_inst = tgsi_default_full_instruction();
84601e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
84701e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
84801e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
84901e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 3;
85001e04c3fSmrg      reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
85101e04c3fSmrg      reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
85201e04c3fSmrg      reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
85301e04c3fSmrg
85401e04c3fSmrg      if ((opcode == TGSI_OPCODE_DP3) ||
85501e04c3fSmrg          (opcode == TGSI_OPCODE_DP4)) {
85601e04c3fSmrg         tctx->emit_instruction(tctx, &new_inst);
85701e04c3fSmrg
85801e04c3fSmrg         /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
85901e04c3fSmrg         new_inst = tgsi_default_full_instruction();
86001e04c3fSmrg         new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
86101e04c3fSmrg         new_inst.Instruction.NumDstRegs = 1;
86201e04c3fSmrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
86301e04c3fSmrg         new_inst.Instruction.NumSrcRegs = 3;
86401e04c3fSmrg         reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
86501e04c3fSmrg         reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
86601e04c3fSmrg         reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
86701e04c3fSmrg
86801e04c3fSmrg         if (opcode == TGSI_OPCODE_DP4) {
86901e04c3fSmrg            tctx->emit_instruction(tctx, &new_inst);
87001e04c3fSmrg
87101e04c3fSmrg            /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
87201e04c3fSmrg            new_inst = tgsi_default_full_instruction();
87301e04c3fSmrg            new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
87401e04c3fSmrg            new_inst.Instruction.NumDstRegs = 1;
87501e04c3fSmrg            reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
87601e04c3fSmrg            new_inst.Instruction.NumSrcRegs = 3;
87701e04c3fSmrg            reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
87801e04c3fSmrg            reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
87901e04c3fSmrg            reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
88001e04c3fSmrg         }
88101e04c3fSmrg      }
88201e04c3fSmrg
88301e04c3fSmrg      /* fixup last instruction to write to dst: */
88401e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
88501e04c3fSmrg
88601e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
88701e04c3fSmrg   }
88801e04c3fSmrg}
88901e04c3fSmrg
89001e04c3fSmrg/* FLR - floor, CEIL - ceil
89101e04c3fSmrg * ; needs: 1 tmp
89201e04c3fSmrg * if (CEIL) {
89301e04c3fSmrg *   FRC tmpA, -src
89401e04c3fSmrg *   ADD dst, src, tmpA
89501e04c3fSmrg * } else {
89601e04c3fSmrg *   FRC tmpA, src
89701e04c3fSmrg *   SUB dst, src, tmpA
89801e04c3fSmrg * }
89901e04c3fSmrg */
90001e04c3fSmrg#define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
90101e04c3fSmrg#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
90201e04c3fSmrg#define FLR_TMP 1
90301e04c3fSmrg#define CEIL_TMP 1
90401e04c3fSmrgstatic void
90501e04c3fSmrgtransform_flr_ceil(struct tgsi_transform_context *tctx,
90601e04c3fSmrg                   struct tgsi_full_instruction *inst)
90701e04c3fSmrg{
90801e04c3fSmrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
90901e04c3fSmrg   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
91001e04c3fSmrg   struct tgsi_full_src_register *src0 = &inst->Src[0];
91101e04c3fSmrg   struct tgsi_full_instruction new_inst;
91201e04c3fSmrg   enum tgsi_opcode opcode = inst->Instruction.Opcode;
91301e04c3fSmrg
91401e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
91501e04c3fSmrg      /* FLR: FRC tmpA, src  CEIL: FRC tmpA, -src */
91601e04c3fSmrg      new_inst = tgsi_default_full_instruction();
91701e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
91801e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
91901e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
92001e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
92101e04c3fSmrg      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
92201e04c3fSmrg
92301e04c3fSmrg      if (opcode == TGSI_OPCODE_CEIL)
92401e04c3fSmrg         new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
92501e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
92601e04c3fSmrg
92701e04c3fSmrg      /* FLR: SUB dst, src, tmpA  CEIL: ADD dst, src, tmpA */
92801e04c3fSmrg      new_inst = tgsi_default_full_instruction();
92901e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
93001e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
93101e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
93201e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 2;
93301e04c3fSmrg      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
93401e04c3fSmrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
93501e04c3fSmrg      if (opcode == TGSI_OPCODE_FLR)
93601e04c3fSmrg         new_inst.Src[1].Register.Negate = 1;
93701e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
93801e04c3fSmrg   }
93901e04c3fSmrg}
94001e04c3fSmrg
94101e04c3fSmrg/* TRUNC - truncate off fractional part
94201e04c3fSmrg *  dst.x = trunc(src.x)
94301e04c3fSmrg *  dst.y = trunc(src.y)
94401e04c3fSmrg *  dst.z = trunc(src.z)
94501e04c3fSmrg *  dst.w = trunc(src.w)
94601e04c3fSmrg *
94701e04c3fSmrg * ; needs: 1 tmp
94801e04c3fSmrg * if (lower FLR) {
94901e04c3fSmrg *   FRC tmpA, |src|
95001e04c3fSmrg *   SUB tmpA, |src|, tmpA
95101e04c3fSmrg * } else {
95201e04c3fSmrg *   FLR tmpA, |src|
95301e04c3fSmrg * }
95401e04c3fSmrg * CMP dst, src, -tmpA, tmpA
95501e04c3fSmrg */
95601e04c3fSmrg#define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
95701e04c3fSmrg#define TRUNC_TMP 1
95801e04c3fSmrgstatic void
95901e04c3fSmrgtransform_trunc(struct tgsi_transform_context *tctx,
96001e04c3fSmrg                struct tgsi_full_instruction *inst)
96101e04c3fSmrg{
96201e04c3fSmrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
96301e04c3fSmrg   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
96401e04c3fSmrg   struct tgsi_full_src_register *src0 = &inst->Src[0];
96501e04c3fSmrg   struct tgsi_full_instruction new_inst;
96601e04c3fSmrg
96701e04c3fSmrg   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
96801e04c3fSmrg      if (ctx->config->lower_FLR) {
96901e04c3fSmrg         new_inst = tgsi_default_full_instruction();
97001e04c3fSmrg         new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
97101e04c3fSmrg         new_inst.Instruction.NumDstRegs = 1;
97201e04c3fSmrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
97301e04c3fSmrg         new_inst.Instruction.NumSrcRegs = 1;
97401e04c3fSmrg         reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
97501e04c3fSmrg         new_inst.Src[0].Register.Absolute = true;
97601e04c3fSmrg         new_inst.Src[0].Register.Negate = false;
97701e04c3fSmrg         tctx->emit_instruction(tctx, &new_inst);
97801e04c3fSmrg
97901e04c3fSmrg         new_inst = tgsi_default_full_instruction();
98001e04c3fSmrg         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
98101e04c3fSmrg         new_inst.Instruction.NumDstRegs = 1;
98201e04c3fSmrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
98301e04c3fSmrg         new_inst.Instruction.NumSrcRegs = 2;
98401e04c3fSmrg         reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
98501e04c3fSmrg         new_inst.Src[0].Register.Absolute = true;
98601e04c3fSmrg         new_inst.Src[0].Register.Negate = false;
98701e04c3fSmrg         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
98801e04c3fSmrg         new_inst.Src[1].Register.Negate = 1;
98901e04c3fSmrg         tctx->emit_instruction(tctx, &new_inst);
99001e04c3fSmrg      } else {
99101e04c3fSmrg         new_inst = tgsi_default_full_instruction();
99201e04c3fSmrg         new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
99301e04c3fSmrg         new_inst.Instruction.NumDstRegs = 1;
99401e04c3fSmrg         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
99501e04c3fSmrg         new_inst.Instruction.NumSrcRegs = 1;
99601e04c3fSmrg         reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
99701e04c3fSmrg         new_inst.Src[0].Register.Absolute = true;
99801e04c3fSmrg         new_inst.Src[0].Register.Negate = false;
99901e04c3fSmrg         tctx->emit_instruction(tctx, &new_inst);
100001e04c3fSmrg      }
100101e04c3fSmrg
100201e04c3fSmrg      new_inst = tgsi_default_full_instruction();
100301e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
100401e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
100501e04c3fSmrg      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
100601e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 3;
100701e04c3fSmrg      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
100801e04c3fSmrg      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
100901e04c3fSmrg      new_inst.Src[1].Register.Negate = true;
101001e04c3fSmrg      reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
101101e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
101201e04c3fSmrg   }
101301e04c3fSmrg}
101401e04c3fSmrg
101501e04c3fSmrg/* Inserts a MOV_SAT for the needed components of tex coord.  Note that
101601e04c3fSmrg * in the case of TXP, the clamping must happen *after* projection, so
101701e04c3fSmrg * we need to lower TXP to TEX.
101801e04c3fSmrg *
101901e04c3fSmrg *   MOV tmpA, src0
102001e04c3fSmrg *   if (opc == TXP) {
102101e04c3fSmrg *     ; do perspective division manually before clamping:
102201e04c3fSmrg *     RCP tmpB, tmpA.w
102301e04c3fSmrg *     MUL tmpB.<pmask>, tmpA, tmpB.xxxx
102401e04c3fSmrg *     opc = TEX;
102501e04c3fSmrg *   }
102601e04c3fSmrg *   MOV_SAT tmpA.<mask>, tmpA  ; <mask> is the clamped s/t/r coords
102701e04c3fSmrg *   <opc> dst, tmpA, ...
102801e04c3fSmrg */
102901e04c3fSmrg#define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
103001e04c3fSmrg#define SAMP_TMP  2
103101e04c3fSmrgstatic int
103201e04c3fSmrgtransform_samp(struct tgsi_transform_context *tctx,
103301e04c3fSmrg               struct tgsi_full_instruction *inst)
103401e04c3fSmrg{
103501e04c3fSmrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
103601e04c3fSmrg   struct tgsi_full_src_register *coord = &inst->Src[0];
103701e04c3fSmrg   struct tgsi_full_src_register *samp;
103801e04c3fSmrg   struct tgsi_full_instruction new_inst;
103901e04c3fSmrg   /* mask is clamped coords, pmask is all coords (for projection): */
104001e04c3fSmrg   unsigned mask = 0, pmask = 0, smask;
104101e04c3fSmrg   unsigned tex = inst->Texture.Texture;
104201e04c3fSmrg   enum tgsi_opcode opcode = inst->Instruction.Opcode;
104301e04c3fSmrg   bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
104401e04c3fSmrg		   (ctx->config->lower_TXP & (1 << tex));
104501e04c3fSmrg
104601e04c3fSmrg   if (opcode == TGSI_OPCODE_TXB2) {
104701e04c3fSmrg      samp = &inst->Src[2];
104801e04c3fSmrg   } else {
104901e04c3fSmrg      samp = &inst->Src[1];
105001e04c3fSmrg   }
105101e04c3fSmrg
105201e04c3fSmrg   /* convert sampler # to bitmask to test: */
105301e04c3fSmrg   smask = 1 << samp->Register.Index;
105401e04c3fSmrg
105501e04c3fSmrg   /* check if we actually need to lower this one: */
105601e04c3fSmrg   if (!(ctx->saturate & smask) && !lower_txp)
105701e04c3fSmrg      return -1;
105801e04c3fSmrg
105901e04c3fSmrg   /* figure out which coordinates need saturating:
106001e04c3fSmrg    *   - RECT textures should not get saturated
106101e04c3fSmrg    *   - array index coords should not get saturated
106201e04c3fSmrg    */
106301e04c3fSmrg   switch (tex) {
106401e04c3fSmrg   case TGSI_TEXTURE_3D:
106501e04c3fSmrg   case TGSI_TEXTURE_CUBE:
106601e04c3fSmrg   case TGSI_TEXTURE_CUBE_ARRAY:
106701e04c3fSmrg   case TGSI_TEXTURE_SHADOWCUBE:
106801e04c3fSmrg   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
106901e04c3fSmrg      if (ctx->config->saturate_r & smask)
107001e04c3fSmrg         mask |= TGSI_WRITEMASK_Z;
107101e04c3fSmrg      pmask |= TGSI_WRITEMASK_Z;
10727ec681f3Smrg      FALLTHROUGH;
107301e04c3fSmrg
107401e04c3fSmrg   case TGSI_TEXTURE_2D:
107501e04c3fSmrg   case TGSI_TEXTURE_2D_ARRAY:
107601e04c3fSmrg   case TGSI_TEXTURE_SHADOW2D:
107701e04c3fSmrg   case TGSI_TEXTURE_SHADOW2D_ARRAY:
107801e04c3fSmrg   case TGSI_TEXTURE_2D_MSAA:
107901e04c3fSmrg   case TGSI_TEXTURE_2D_ARRAY_MSAA:
108001e04c3fSmrg      if (ctx->config->saturate_t & smask)
108101e04c3fSmrg         mask |= TGSI_WRITEMASK_Y;
108201e04c3fSmrg      pmask |= TGSI_WRITEMASK_Y;
10837ec681f3Smrg      FALLTHROUGH;
108401e04c3fSmrg
108501e04c3fSmrg   case TGSI_TEXTURE_1D:
108601e04c3fSmrg   case TGSI_TEXTURE_1D_ARRAY:
108701e04c3fSmrg   case TGSI_TEXTURE_SHADOW1D:
108801e04c3fSmrg   case TGSI_TEXTURE_SHADOW1D_ARRAY:
108901e04c3fSmrg      if (ctx->config->saturate_s & smask)
109001e04c3fSmrg         mask |= TGSI_WRITEMASK_X;
109101e04c3fSmrg      pmask |= TGSI_WRITEMASK_X;
109201e04c3fSmrg      break;
109301e04c3fSmrg
109401e04c3fSmrg   case TGSI_TEXTURE_RECT:
109501e04c3fSmrg   case TGSI_TEXTURE_SHADOWRECT:
109601e04c3fSmrg      /* we don't saturate, but in case of lower_txp we
109701e04c3fSmrg       * still need to do the perspective divide:
109801e04c3fSmrg       */
109901e04c3fSmrg       pmask = TGSI_WRITEMASK_XY;
110001e04c3fSmrg       break;
110101e04c3fSmrg   }
110201e04c3fSmrg
110301e04c3fSmrg   /* sanity check.. driver could be asking to saturate a non-
110401e04c3fSmrg    * existent coordinate component:
110501e04c3fSmrg    */
110601e04c3fSmrg   if (!mask && !lower_txp)
110701e04c3fSmrg      return -1;
110801e04c3fSmrg
110901e04c3fSmrg   /* MOV tmpA, src0 */
111001e04c3fSmrg   create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
111101e04c3fSmrg
111201e04c3fSmrg   /* This is a bit sad.. we need to clamp *after* the coords
111301e04c3fSmrg    * are projected, which means lowering TXP to TEX and doing
111401e04c3fSmrg    * the projection ourself.  But since I haven't figured out
111501e04c3fSmrg    * how to make the lowering code deliver an electric shock
111601e04c3fSmrg    * to anyone using GL_CLAMP, we must do this instead:
111701e04c3fSmrg    */
111801e04c3fSmrg   if (opcode == TGSI_OPCODE_TXP) {
111901e04c3fSmrg      /* RCP tmpB.x tmpA.w */
112001e04c3fSmrg      new_inst = tgsi_default_full_instruction();
112101e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
112201e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
112301e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
112401e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 1;
112501e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
112601e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
112701e04c3fSmrg
112801e04c3fSmrg      /* MUL tmpA.mask, tmpA, tmpB.xxxx */
112901e04c3fSmrg      new_inst = tgsi_default_full_instruction();
113001e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
113101e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
113201e04c3fSmrg      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
113301e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 2;
113401e04c3fSmrg      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
113501e04c3fSmrg      reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
113601e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
113701e04c3fSmrg
113801e04c3fSmrg      opcode = TGSI_OPCODE_TEX;
113901e04c3fSmrg   }
114001e04c3fSmrg
114101e04c3fSmrg   /* MOV_SAT tmpA.<mask>, tmpA */
114201e04c3fSmrg   if (mask) {
114301e04c3fSmrg      create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
114401e04c3fSmrg   }
114501e04c3fSmrg
114601e04c3fSmrg   /* modify the texture samp instruction to take fixed up coord: */
114701e04c3fSmrg   new_inst = *inst;
114801e04c3fSmrg   new_inst.Instruction.Opcode = opcode;
114901e04c3fSmrg   new_inst.Src[0] = ctx->tmp[A].src;
115001e04c3fSmrg   tctx->emit_instruction(tctx, &new_inst);
115101e04c3fSmrg
115201e04c3fSmrg   return 0;
115301e04c3fSmrg}
115401e04c3fSmrg
115501e04c3fSmrg/* Two-sided color emulation:
115601e04c3fSmrg * For each COLOR input, create a corresponding BCOLOR input, plus
115701e04c3fSmrg * CMP instruction to select front or back color based on FACE
115801e04c3fSmrg */
115901e04c3fSmrg#define TWOSIDE_GROW(n)  (                      \
116001e04c3fSmrg      2 +         /* FACE */                    \
116101e04c3fSmrg      ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
116201e04c3fSmrg      ((n) * 1) + /* TEMP[] */                  \
116301e04c3fSmrg      ((n) * NINST(3))   /* CMP instr */        \
116401e04c3fSmrg      )
116501e04c3fSmrg
116601e04c3fSmrgstatic void
116701e04c3fSmrgemit_twoside(struct tgsi_transform_context *tctx)
116801e04c3fSmrg{
116901e04c3fSmrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
117001e04c3fSmrg   struct tgsi_shader_info *info = ctx->info;
117101e04c3fSmrg   struct tgsi_full_declaration decl;
117201e04c3fSmrg   struct tgsi_full_instruction new_inst;
117301e04c3fSmrg   unsigned inbase, tmpbase;
117401e04c3fSmrg   unsigned i;
117501e04c3fSmrg
117601e04c3fSmrg   inbase  = info->file_max[TGSI_FILE_INPUT] + 1;
117701e04c3fSmrg   tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
117801e04c3fSmrg
117901e04c3fSmrg   /* additional inputs for BCOLOR's */
118001e04c3fSmrg   for (i = 0; i < ctx->two_side_colors; i++) {
118101e04c3fSmrg      unsigned in_idx = ctx->two_side_idx[i];
118201e04c3fSmrg      decl = tgsi_default_full_declaration();
118301e04c3fSmrg      decl.Declaration.File = TGSI_FILE_INPUT;
118401e04c3fSmrg      decl.Declaration.Semantic = true;
118501e04c3fSmrg      decl.Range.First = decl.Range.Last = inbase + i;
118601e04c3fSmrg      decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
118701e04c3fSmrg      decl.Semantic.Index = info->input_semantic_index[in_idx];
118801e04c3fSmrg      decl.Declaration.Interpolate = true;
118901e04c3fSmrg      decl.Interp.Interpolate = info->input_interpolate[in_idx];
119001e04c3fSmrg      decl.Interp.Location = info->input_interpolate_loc[in_idx];
119101e04c3fSmrg      tctx->emit_declaration(tctx, &decl);
119201e04c3fSmrg   }
119301e04c3fSmrg
119401e04c3fSmrg   /* additional input for FACE */
119501e04c3fSmrg   if (ctx->two_side_colors && (ctx->face_idx == -1)) {
119601e04c3fSmrg      decl = tgsi_default_full_declaration();
119701e04c3fSmrg      decl.Declaration.File = TGSI_FILE_INPUT;
119801e04c3fSmrg      decl.Declaration.Semantic = true;
119901e04c3fSmrg      decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
120001e04c3fSmrg      decl.Semantic.Name = TGSI_SEMANTIC_FACE;
120101e04c3fSmrg      decl.Semantic.Index = 0;
120201e04c3fSmrg      tctx->emit_declaration(tctx, &decl);
120301e04c3fSmrg
120401e04c3fSmrg      ctx->face_idx = decl.Range.First;
120501e04c3fSmrg   }
120601e04c3fSmrg
120701e04c3fSmrg   /* additional temps for COLOR/BCOLOR selection: */
120801e04c3fSmrg   for (i = 0; i < ctx->two_side_colors; i++) {
120901e04c3fSmrg      decl = tgsi_default_full_declaration();
121001e04c3fSmrg      decl.Declaration.File = TGSI_FILE_TEMPORARY;
121101e04c3fSmrg      decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
121201e04c3fSmrg      tctx->emit_declaration(tctx, &decl);
121301e04c3fSmrg   }
121401e04c3fSmrg
121501e04c3fSmrg   /* and finally additional instructions to select COLOR/BCOLOR: */
121601e04c3fSmrg   for (i = 0; i < ctx->two_side_colors; i++) {
121701e04c3fSmrg      new_inst = tgsi_default_full_instruction();
121801e04c3fSmrg      new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
121901e04c3fSmrg
122001e04c3fSmrg      new_inst.Instruction.NumDstRegs = 1;
122101e04c3fSmrg      new_inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
122201e04c3fSmrg      new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
122301e04c3fSmrg      new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
122401e04c3fSmrg
122501e04c3fSmrg      new_inst.Instruction.NumSrcRegs = 3;
122601e04c3fSmrg      new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
122701e04c3fSmrg      new_inst.Src[0].Register.Index = ctx->face_idx;
122801e04c3fSmrg      new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
122901e04c3fSmrg      new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
123001e04c3fSmrg      new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
123101e04c3fSmrg      new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
123201e04c3fSmrg      new_inst.Src[1].Register.File  = TGSI_FILE_INPUT;
123301e04c3fSmrg      new_inst.Src[1].Register.Index = inbase + i;
123401e04c3fSmrg      new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
123501e04c3fSmrg      new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
123601e04c3fSmrg      new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
123701e04c3fSmrg      new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
123801e04c3fSmrg      new_inst.Src[2].Register.File  = TGSI_FILE_INPUT;
123901e04c3fSmrg      new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
124001e04c3fSmrg      new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
124101e04c3fSmrg      new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
124201e04c3fSmrg      new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
124301e04c3fSmrg      new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
124401e04c3fSmrg
124501e04c3fSmrg      tctx->emit_instruction(tctx, &new_inst);
124601e04c3fSmrg   }
124701e04c3fSmrg}
124801e04c3fSmrg
124901e04c3fSmrgstatic void
125001e04c3fSmrgemit_decls(struct tgsi_transform_context *tctx)
125101e04c3fSmrg{
125201e04c3fSmrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
125301e04c3fSmrg   struct tgsi_shader_info *info = ctx->info;
125401e04c3fSmrg   struct tgsi_full_declaration decl;
125501e04c3fSmrg   struct tgsi_full_immediate immed;
125601e04c3fSmrg   unsigned tmpbase;
125701e04c3fSmrg   unsigned i;
125801e04c3fSmrg
125901e04c3fSmrg   tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
126001e04c3fSmrg
126101e04c3fSmrg   ctx->color_base = tmpbase + ctx->numtmp;
126201e04c3fSmrg
126301e04c3fSmrg   /* declare immediate: */
126401e04c3fSmrg   immed = tgsi_default_full_immediate();
126501e04c3fSmrg   immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
126601e04c3fSmrg   immed.u[0].Float = 0.0;
126701e04c3fSmrg   immed.u[1].Float = 1.0;
126801e04c3fSmrg   immed.u[2].Float = 128.0;
126901e04c3fSmrg   immed.u[3].Float = 0.0;
127001e04c3fSmrg   tctx->emit_immediate(tctx, &immed);
127101e04c3fSmrg
127201e04c3fSmrg   ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
127301e04c3fSmrg   ctx->imm.Register.Index = info->immediate_count;
127401e04c3fSmrg   ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
127501e04c3fSmrg   ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
127601e04c3fSmrg   ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
127701e04c3fSmrg   ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
127801e04c3fSmrg
127901e04c3fSmrg   /* declare temp regs: */
128001e04c3fSmrg   for (i = 0; i < ctx->numtmp; i++) {
128101e04c3fSmrg      decl = tgsi_default_full_declaration();
128201e04c3fSmrg      decl.Declaration.File = TGSI_FILE_TEMPORARY;
128301e04c3fSmrg      decl.Range.First = decl.Range.Last = tmpbase + i;
128401e04c3fSmrg      tctx->emit_declaration(tctx, &decl);
128501e04c3fSmrg
128601e04c3fSmrg      ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
128701e04c3fSmrg      ctx->tmp[i].src.Register.Index = tmpbase + i;
128801e04c3fSmrg      ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
128901e04c3fSmrg      ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
129001e04c3fSmrg      ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
129101e04c3fSmrg      ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
129201e04c3fSmrg
129301e04c3fSmrg      ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
129401e04c3fSmrg      ctx->tmp[i].dst.Register.Index = tmpbase + i;
129501e04c3fSmrg      ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
129601e04c3fSmrg   }
129701e04c3fSmrg
129801e04c3fSmrg   if (ctx->two_side_colors)
129901e04c3fSmrg      emit_twoside(tctx);
130001e04c3fSmrg}
130101e04c3fSmrg
130201e04c3fSmrgstatic void
130301e04c3fSmrgrename_color_inputs(struct tgsi_lowering_context *ctx,
130401e04c3fSmrg                    struct tgsi_full_instruction *inst)
130501e04c3fSmrg{
130601e04c3fSmrg   unsigned i, j;
130701e04c3fSmrg   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
130801e04c3fSmrg      struct tgsi_src_register *src = &inst->Src[i].Register;
130901e04c3fSmrg      if (src->File == TGSI_FILE_INPUT) {
131001e04c3fSmrg         for (j = 0; j < ctx->two_side_colors; j++) {
131101e04c3fSmrg	    if (src->Index == (int)ctx->two_side_idx[j]) {
131201e04c3fSmrg               src->File = TGSI_FILE_TEMPORARY;
131301e04c3fSmrg               src->Index = ctx->color_base + j;
131401e04c3fSmrg               break;
131501e04c3fSmrg            }
131601e04c3fSmrg         }
131701e04c3fSmrg      }
131801e04c3fSmrg   }
131901e04c3fSmrg
132001e04c3fSmrg}
132101e04c3fSmrg
132201e04c3fSmrgstatic void
132301e04c3fSmrgtransform_instr(struct tgsi_transform_context *tctx,
132401e04c3fSmrg		struct tgsi_full_instruction *inst)
132501e04c3fSmrg{
132601e04c3fSmrg   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
132701e04c3fSmrg
132801e04c3fSmrg   if (!ctx->emitted_decls) {
132901e04c3fSmrg      emit_decls(tctx);
133001e04c3fSmrg      ctx->emitted_decls = 1;
133101e04c3fSmrg   }
133201e04c3fSmrg
133301e04c3fSmrg   /* if emulating two-sided-color, we need to re-write some
133401e04c3fSmrg    * src registers:
133501e04c3fSmrg    */
133601e04c3fSmrg   if (ctx->two_side_colors)
133701e04c3fSmrg      rename_color_inputs(ctx, inst);
133801e04c3fSmrg
133901e04c3fSmrg   switch (inst->Instruction.Opcode) {
134001e04c3fSmrg   case TGSI_OPCODE_DST:
134101e04c3fSmrg      if (!ctx->config->lower_DST)
134201e04c3fSmrg         goto skip;
134301e04c3fSmrg      transform_dst(tctx, inst);
134401e04c3fSmrg      break;
134501e04c3fSmrg   case TGSI_OPCODE_LRP:
134601e04c3fSmrg      if (!ctx->config->lower_LRP)
134701e04c3fSmrg         goto skip;
134801e04c3fSmrg      transform_lrp(tctx, inst);
134901e04c3fSmrg      break;
135001e04c3fSmrg   case TGSI_OPCODE_FRC:
135101e04c3fSmrg      if (!ctx->config->lower_FRC)
135201e04c3fSmrg         goto skip;
135301e04c3fSmrg      transform_frc(tctx, inst);
135401e04c3fSmrg      break;
135501e04c3fSmrg   case TGSI_OPCODE_POW:
135601e04c3fSmrg      if (!ctx->config->lower_POW)
135701e04c3fSmrg         goto skip;
135801e04c3fSmrg      transform_pow(tctx, inst);
135901e04c3fSmrg      break;
136001e04c3fSmrg   case TGSI_OPCODE_LIT:
136101e04c3fSmrg      if (!ctx->config->lower_LIT)
136201e04c3fSmrg         goto skip;
136301e04c3fSmrg      transform_lit(tctx, inst);
136401e04c3fSmrg      break;
136501e04c3fSmrg   case TGSI_OPCODE_EXP:
136601e04c3fSmrg      if (!ctx->config->lower_EXP)
136701e04c3fSmrg         goto skip;
136801e04c3fSmrg      transform_exp(tctx, inst);
136901e04c3fSmrg      break;
137001e04c3fSmrg   case TGSI_OPCODE_LOG:
137101e04c3fSmrg      if (!ctx->config->lower_LOG)
137201e04c3fSmrg         goto skip;
137301e04c3fSmrg      transform_log(tctx, inst);
137401e04c3fSmrg      break;
137501e04c3fSmrg   case TGSI_OPCODE_DP4:
137601e04c3fSmrg      if (!ctx->config->lower_DP4)
137701e04c3fSmrg         goto skip;
137801e04c3fSmrg      transform_dotp(tctx, inst);
137901e04c3fSmrg      break;
138001e04c3fSmrg   case TGSI_OPCODE_DP3:
138101e04c3fSmrg      if (!ctx->config->lower_DP3)
138201e04c3fSmrg         goto skip;
138301e04c3fSmrg      transform_dotp(tctx, inst);
138401e04c3fSmrg      break;
138501e04c3fSmrg   case TGSI_OPCODE_DP2:
138601e04c3fSmrg      if (!ctx->config->lower_DP2)
138701e04c3fSmrg         goto skip;
138801e04c3fSmrg      transform_dotp(tctx, inst);
138901e04c3fSmrg      break;
139001e04c3fSmrg   case TGSI_OPCODE_FLR:
139101e04c3fSmrg      if (!ctx->config->lower_FLR)
139201e04c3fSmrg         goto skip;
139301e04c3fSmrg      transform_flr_ceil(tctx, inst);
139401e04c3fSmrg      break;
139501e04c3fSmrg   case TGSI_OPCODE_CEIL:
139601e04c3fSmrg      if (!ctx->config->lower_CEIL)
139701e04c3fSmrg         goto skip;
139801e04c3fSmrg      transform_flr_ceil(tctx, inst);
139901e04c3fSmrg      break;
140001e04c3fSmrg   case TGSI_OPCODE_TRUNC:
140101e04c3fSmrg      if (!ctx->config->lower_TRUNC)
140201e04c3fSmrg         goto skip;
140301e04c3fSmrg      transform_trunc(tctx, inst);
140401e04c3fSmrg      break;
140501e04c3fSmrg   case TGSI_OPCODE_TEX:
140601e04c3fSmrg   case TGSI_OPCODE_TXP:
140701e04c3fSmrg   case TGSI_OPCODE_TXB:
140801e04c3fSmrg   case TGSI_OPCODE_TXB2:
140901e04c3fSmrg   case TGSI_OPCODE_TXL:
141001e04c3fSmrg      if (transform_samp(tctx, inst))
141101e04c3fSmrg         goto skip;
141201e04c3fSmrg      break;
141301e04c3fSmrg   default:
141401e04c3fSmrg   skip:
141501e04c3fSmrg      tctx->emit_instruction(tctx, inst);
141601e04c3fSmrg      break;
141701e04c3fSmrg   }
141801e04c3fSmrg}
141901e04c3fSmrg
142001e04c3fSmrg/* returns NULL if no lowering required, else returns the new
142101e04c3fSmrg * tokens (which caller is required to free()).  In either case
142201e04c3fSmrg * returns the current info.
142301e04c3fSmrg */
142401e04c3fSmrgconst struct tgsi_token *
142501e04c3fSmrgtgsi_transform_lowering(const struct tgsi_lowering_config *config,
142601e04c3fSmrg                        const struct tgsi_token *tokens,
142701e04c3fSmrg                        struct tgsi_shader_info *info)
142801e04c3fSmrg{
142901e04c3fSmrg   struct tgsi_lowering_context ctx;
143001e04c3fSmrg   struct tgsi_token *newtoks;
143101e04c3fSmrg   int newlen, numtmp;
143201e04c3fSmrg
143301e04c3fSmrg   /* sanity check in case limit is ever increased: */
143401e04c3fSmrg   STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
143501e04c3fSmrg
143601e04c3fSmrg   /* sanity check the lowering */
143701e04c3fSmrg   assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
143801e04c3fSmrg   assert(!(config->lower_FRC && config->lower_TRUNC));
143901e04c3fSmrg
144001e04c3fSmrg   memset(&ctx, 0, sizeof(ctx));
144101e04c3fSmrg   ctx.base.transform_instruction = transform_instr;
144201e04c3fSmrg   ctx.info = info;
144301e04c3fSmrg   ctx.config = config;
144401e04c3fSmrg
144501e04c3fSmrg   tgsi_scan_shader(tokens, info);
144601e04c3fSmrg
144701e04c3fSmrg   /* if we are adding fragment shader support to emulate two-sided
144801e04c3fSmrg    * color, then figure out the number of additional inputs we need
144901e04c3fSmrg    * to create for BCOLOR's..
145001e04c3fSmrg    */
145101e04c3fSmrg   if ((info->processor == PIPE_SHADER_FRAGMENT) &&
145201e04c3fSmrg       config->color_two_side) {
145301e04c3fSmrg      int i;
145401e04c3fSmrg      ctx.face_idx = -1;
145501e04c3fSmrg      for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
145601e04c3fSmrg         if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
145701e04c3fSmrg            ctx.two_side_idx[ctx.two_side_colors++] = i;
145801e04c3fSmrg         if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
145901e04c3fSmrg            ctx.face_idx = i;
146001e04c3fSmrg      }
146101e04c3fSmrg   }
146201e04c3fSmrg
146301e04c3fSmrg   ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
146401e04c3fSmrg
146501e04c3fSmrg#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
146601e04c3fSmrg   /* if there are no instructions to lower, then we are done: */
146701e04c3fSmrg   if (!(OPCS(DST) ||
146801e04c3fSmrg         OPCS(LRP) ||
146901e04c3fSmrg         OPCS(FRC) ||
147001e04c3fSmrg         OPCS(POW) ||
147101e04c3fSmrg         OPCS(LIT) ||
147201e04c3fSmrg         OPCS(EXP) ||
147301e04c3fSmrg         OPCS(LOG) ||
147401e04c3fSmrg         OPCS(DP4) ||
147501e04c3fSmrg         OPCS(DP3) ||
147601e04c3fSmrg         OPCS(DP2) ||
147701e04c3fSmrg         OPCS(FLR) ||
147801e04c3fSmrg         OPCS(CEIL) ||
147901e04c3fSmrg         OPCS(TRUNC) ||
148001e04c3fSmrg         OPCS(TXP) ||
148101e04c3fSmrg         ctx.two_side_colors ||
148201e04c3fSmrg         ctx.saturate))
148301e04c3fSmrg      return NULL;
148401e04c3fSmrg
148501e04c3fSmrg#if 0  /* debug */
148601e04c3fSmrg   _debug_printf("BEFORE:");
148701e04c3fSmrg   tgsi_dump(tokens, 0);
148801e04c3fSmrg#endif
148901e04c3fSmrg
149001e04c3fSmrg   numtmp = 0;
149101e04c3fSmrg   newlen = tgsi_num_tokens(tokens);
149201e04c3fSmrg   if (OPCS(DST)) {
149301e04c3fSmrg      newlen += DST_GROW * OPCS(DST);
149401e04c3fSmrg      numtmp = MAX2(numtmp, DST_TMP);
149501e04c3fSmrg   }
149601e04c3fSmrg   if (OPCS(LRP)) {
149701e04c3fSmrg      newlen += LRP_GROW * OPCS(LRP);
149801e04c3fSmrg      numtmp = MAX2(numtmp, LRP_TMP);
149901e04c3fSmrg   }
150001e04c3fSmrg   if (OPCS(FRC)) {
150101e04c3fSmrg      newlen += FRC_GROW * OPCS(FRC);
150201e04c3fSmrg      numtmp = MAX2(numtmp, FRC_TMP);
150301e04c3fSmrg   }
150401e04c3fSmrg   if (OPCS(POW)) {
150501e04c3fSmrg      newlen += POW_GROW * OPCS(POW);
150601e04c3fSmrg      numtmp = MAX2(numtmp, POW_TMP);
150701e04c3fSmrg   }
150801e04c3fSmrg   if (OPCS(LIT)) {
150901e04c3fSmrg      newlen += LIT_GROW * OPCS(LIT);
151001e04c3fSmrg      numtmp = MAX2(numtmp, LIT_TMP);
151101e04c3fSmrg   }
151201e04c3fSmrg   if (OPCS(EXP)) {
151301e04c3fSmrg      newlen += EXP_GROW * OPCS(EXP);
151401e04c3fSmrg      numtmp = MAX2(numtmp, EXP_TMP);
151501e04c3fSmrg   }
151601e04c3fSmrg   if (OPCS(LOG)) {
151701e04c3fSmrg      newlen += LOG_GROW * OPCS(LOG);
151801e04c3fSmrg      numtmp = MAX2(numtmp, LOG_TMP);
151901e04c3fSmrg   }
152001e04c3fSmrg   if (OPCS(DP4)) {
152101e04c3fSmrg      newlen += DP4_GROW * OPCS(DP4);
152201e04c3fSmrg      numtmp = MAX2(numtmp, DOTP_TMP);
152301e04c3fSmrg   }
152401e04c3fSmrg   if (OPCS(DP3)) {
152501e04c3fSmrg      newlen += DP3_GROW * OPCS(DP3);
152601e04c3fSmrg      numtmp = MAX2(numtmp, DOTP_TMP);
152701e04c3fSmrg   }
152801e04c3fSmrg   if (OPCS(DP2)) {
152901e04c3fSmrg      newlen += DP2_GROW * OPCS(DP2);
153001e04c3fSmrg      numtmp = MAX2(numtmp, DOTP_TMP);
153101e04c3fSmrg   }
153201e04c3fSmrg   if (OPCS(FLR)) {
153301e04c3fSmrg      newlen += FLR_GROW * OPCS(FLR);
153401e04c3fSmrg      numtmp = MAX2(numtmp, FLR_TMP);
153501e04c3fSmrg   }
153601e04c3fSmrg   if (OPCS(CEIL)) {
153701e04c3fSmrg      newlen += CEIL_GROW * OPCS(CEIL);
153801e04c3fSmrg      numtmp = MAX2(numtmp, CEIL_TMP);
153901e04c3fSmrg   }
154001e04c3fSmrg   if (OPCS(TRUNC)) {
154101e04c3fSmrg      newlen += TRUNC_GROW * OPCS(TRUNC);
154201e04c3fSmrg      numtmp = MAX2(numtmp, TRUNC_TMP);
154301e04c3fSmrg   }
154401e04c3fSmrg   if (ctx.saturate || config->lower_TXP) {
154501e04c3fSmrg      int n = 0;
154601e04c3fSmrg
154701e04c3fSmrg      if (ctx.saturate) {
154801e04c3fSmrg         n = info->opcode_count[TGSI_OPCODE_TEX] +
154901e04c3fSmrg            info->opcode_count[TGSI_OPCODE_TXP] +
155001e04c3fSmrg            info->opcode_count[TGSI_OPCODE_TXB] +
155101e04c3fSmrg            info->opcode_count[TGSI_OPCODE_TXB2] +
155201e04c3fSmrg            info->opcode_count[TGSI_OPCODE_TXL];
155301e04c3fSmrg      } else if (config->lower_TXP) {
155401e04c3fSmrg          n = info->opcode_count[TGSI_OPCODE_TXP];
155501e04c3fSmrg      }
155601e04c3fSmrg
155701e04c3fSmrg      newlen += SAMP_GROW * n;
155801e04c3fSmrg      numtmp = MAX2(numtmp, SAMP_TMP);
155901e04c3fSmrg   }
156001e04c3fSmrg
156101e04c3fSmrg   /* specifically don't include two_side_colors temps in the count: */
156201e04c3fSmrg   ctx.numtmp = numtmp;
156301e04c3fSmrg
156401e04c3fSmrg   if (ctx.two_side_colors) {
156501e04c3fSmrg      newlen += TWOSIDE_GROW(ctx.two_side_colors);
156601e04c3fSmrg      /* note: we permanently consume temp regs, re-writing references
156701e04c3fSmrg       * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
156801e04c3fSmrg       * instruction that selects which varying to use):
156901e04c3fSmrg       */
157001e04c3fSmrg      numtmp += ctx.two_side_colors;
157101e04c3fSmrg   }
157201e04c3fSmrg
157301e04c3fSmrg   newlen += 2 * numtmp;
157401e04c3fSmrg   newlen += 5;        /* immediate */
157501e04c3fSmrg
157601e04c3fSmrg   newtoks = tgsi_alloc_tokens(newlen);
157701e04c3fSmrg   if (!newtoks)
157801e04c3fSmrg      return NULL;
157901e04c3fSmrg
158001e04c3fSmrg   tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
158101e04c3fSmrg
158201e04c3fSmrg   tgsi_scan_shader(newtoks, info);
158301e04c3fSmrg
158401e04c3fSmrg#if 0  /* debug */
158501e04c3fSmrg   _debug_printf("AFTER:");
158601e04c3fSmrg   tgsi_dump(newtoks, 0);
158701e04c3fSmrg#endif
158801e04c3fSmrg
158901e04c3fSmrg   return newtoks;
159001e04c3fSmrg}
1591