101e04c3fSmrg/* 201e04c3fSmrg * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 2001e04c3fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 2101e04c3fSmrg * SOFTWARE. 2201e04c3fSmrg * 2301e04c3fSmrg * Authors: 2401e04c3fSmrg * Rob Clark <robclark@freedesktop.org> 2501e04c3fSmrg */ 2601e04c3fSmrg 2701e04c3fSmrg#include "tgsi/tgsi_transform.h" 2801e04c3fSmrg#include "tgsi/tgsi_scan.h" 2901e04c3fSmrg#include "tgsi/tgsi_dump.h" 3001e04c3fSmrg 317ec681f3Smrg#include "util/compiler.h" 3201e04c3fSmrg#include "util/u_debug.h" 3301e04c3fSmrg#include "util/u_math.h" 3401e04c3fSmrg 3501e04c3fSmrg#include "tgsi_lowering.h" 3601e04c3fSmrg 3701e04c3fSmrgstruct tgsi_lowering_context { 3801e04c3fSmrg struct tgsi_transform_context base; 3901e04c3fSmrg const struct tgsi_lowering_config *config; 4001e04c3fSmrg struct tgsi_shader_info *info; 4101e04c3fSmrg unsigned two_side_colors; 4201e04c3fSmrg unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS]; 4301e04c3fSmrg unsigned color_base; /* base register for chosen COLOR/BCOLOR's */ 4401e04c3fSmrg int face_idx; 4501e04c3fSmrg unsigned numtmp; 4601e04c3fSmrg struct { 4701e04c3fSmrg struct tgsi_full_src_register src; 4801e04c3fSmrg struct tgsi_full_dst_register dst; 4901e04c3fSmrg } tmp[2]; 5001e04c3fSmrg#define A 0 5101e04c3fSmrg#define B 1 5201e04c3fSmrg struct tgsi_full_src_register imm; 5301e04c3fSmrg int emitted_decls; 5401e04c3fSmrg unsigned saturate; 5501e04c3fSmrg}; 5601e04c3fSmrg 5701e04c3fSmrgstatic inline struct tgsi_lowering_context * 5801e04c3fSmrgtgsi_lowering_context(struct tgsi_transform_context *tctx) 5901e04c3fSmrg{ 6001e04c3fSmrg return (struct tgsi_lowering_context *)tctx; 6101e04c3fSmrg} 6201e04c3fSmrg 6301e04c3fSmrg/* 6401e04c3fSmrg * Utility helpers: 6501e04c3fSmrg */ 6601e04c3fSmrg 6701e04c3fSmrgstatic void 6801e04c3fSmrgreg_dst(struct tgsi_full_dst_register *dst, 6901e04c3fSmrg const struct tgsi_full_dst_register *orig_dst, unsigned wrmask) 7001e04c3fSmrg{ 7101e04c3fSmrg *dst = *orig_dst; 7201e04c3fSmrg dst->Register.WriteMask &= wrmask; 7301e04c3fSmrg assert(dst->Register.WriteMask); 7401e04c3fSmrg} 7501e04c3fSmrg 7601e04c3fSmrgstatic inline void 7701e04c3fSmrgget_swiz(unsigned *swiz, const struct tgsi_src_register *src) 7801e04c3fSmrg{ 7901e04c3fSmrg swiz[0] = src->SwizzleX; 8001e04c3fSmrg swiz[1] = src->SwizzleY; 8101e04c3fSmrg swiz[2] = src->SwizzleZ; 8201e04c3fSmrg swiz[3] = src->SwizzleW; 8301e04c3fSmrg} 8401e04c3fSmrg 8501e04c3fSmrgstatic void 8601e04c3fSmrgreg_src(struct tgsi_full_src_register *src, 8701e04c3fSmrg const struct tgsi_full_src_register *orig_src, 8801e04c3fSmrg unsigned sx, unsigned sy, unsigned sz, unsigned sw) 8901e04c3fSmrg{ 9001e04c3fSmrg unsigned swiz[4]; 9101e04c3fSmrg get_swiz(swiz, &orig_src->Register); 9201e04c3fSmrg *src = *orig_src; 9301e04c3fSmrg src->Register.SwizzleX = swiz[sx]; 9401e04c3fSmrg src->Register.SwizzleY = swiz[sy]; 9501e04c3fSmrg src->Register.SwizzleZ = swiz[sz]; 9601e04c3fSmrg src->Register.SwizzleW = swiz[sw]; 9701e04c3fSmrg} 9801e04c3fSmrg 9901e04c3fSmrg#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */ 10001e04c3fSmrg#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \ 10101e04c3fSmrg TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w 10201e04c3fSmrg 10301e04c3fSmrg/* 10401e04c3fSmrg * if (dst.x aliases src.x) { 10501e04c3fSmrg * MOV tmpA.x, src.x 10601e04c3fSmrg * src = tmpA 10701e04c3fSmrg * } 10801e04c3fSmrg * COS dst.x, src.x 10901e04c3fSmrg * SIN dst.y, src.x 11001e04c3fSmrg * MOV dst.zw, imm{0.0, 1.0} 11101e04c3fSmrg */ 11201e04c3fSmrgstatic bool 11301e04c3fSmrgaliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask, 11401e04c3fSmrg const struct tgsi_full_src_register *src, unsigned src_mask) 11501e04c3fSmrg{ 11601e04c3fSmrg if ((dst->Register.File == src->Register.File) && 11701e04c3fSmrg (dst->Register.Index == src->Register.Index)) { 11801e04c3fSmrg unsigned i, actual_mask = 0; 11901e04c3fSmrg unsigned swiz[4]; 12001e04c3fSmrg get_swiz(swiz, &src->Register); 12101e04c3fSmrg for (i = 0; i < 4; i++) 12201e04c3fSmrg if (src_mask & (1 << i)) 12301e04c3fSmrg actual_mask |= (1 << swiz[i]); 12401e04c3fSmrg if (actual_mask & dst_mask) 12501e04c3fSmrg return true; 12601e04c3fSmrg } 12701e04c3fSmrg return false; 12801e04c3fSmrg} 12901e04c3fSmrg 13001e04c3fSmrgstatic void 13101e04c3fSmrgcreate_mov(struct tgsi_transform_context *tctx, 13201e04c3fSmrg const struct tgsi_full_dst_register *dst, 13301e04c3fSmrg const struct tgsi_full_src_register *src, 13401e04c3fSmrg unsigned mask, unsigned saturate) 13501e04c3fSmrg{ 13601e04c3fSmrg struct tgsi_full_instruction new_inst; 13701e04c3fSmrg 13801e04c3fSmrg new_inst = tgsi_default_full_instruction(); 13901e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 14001e04c3fSmrg new_inst.Instruction.Saturate = saturate; 14101e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 14201e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, mask); 14301e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 14401e04c3fSmrg reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 14501e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 14601e04c3fSmrg} 14701e04c3fSmrg 14801e04c3fSmrg/* to help calculate # of tgsi tokens for a lowering.. we assume 14901e04c3fSmrg * the worst case, ie. removed instructions don't have ADDR[] or 15001e04c3fSmrg * anything which increases the # of tokens per src/dst and the 15101e04c3fSmrg * inserted instructions do. 15201e04c3fSmrg * 15301e04c3fSmrg * OINST() - old instruction 15401e04c3fSmrg * 1 : instruction itself 15501e04c3fSmrg * 1 : dst 15601e04c3fSmrg * 1 * nargs : srcN 15701e04c3fSmrg * 15801e04c3fSmrg * NINST() - new instruction 15901e04c3fSmrg * 1 : instruction itself 16001e04c3fSmrg * 2 : dst 16101e04c3fSmrg * 2 * nargs : srcN 16201e04c3fSmrg */ 16301e04c3fSmrg 16401e04c3fSmrg#define OINST(nargs) (1 + 1 + 1 * (nargs)) 16501e04c3fSmrg#define NINST(nargs) (1 + 2 + 2 * (nargs)) 16601e04c3fSmrg 16701e04c3fSmrg/* 16801e04c3fSmrg * Lowering Translators: 16901e04c3fSmrg */ 17001e04c3fSmrg 17101e04c3fSmrg/* DST - Distance Vector 17201e04c3fSmrg * dst.x = 1.0 17301e04c3fSmrg * dst.y = src0.y \times src1.y 17401e04c3fSmrg * dst.z = src0.z 17501e04c3fSmrg * dst.w = src1.w 17601e04c3fSmrg * 17701e04c3fSmrg * ; note: could be more clever and use just a single temp 17801e04c3fSmrg * ; if I was clever enough to re-write the swizzles. 17901e04c3fSmrg * ; needs: 2 tmp, imm{1.0} 18001e04c3fSmrg * if (dst.y aliases src0.z) { 18101e04c3fSmrg * MOV tmpA.yz, src0.yz 18201e04c3fSmrg * src0 = tmpA 18301e04c3fSmrg * } 18401e04c3fSmrg * if (dst.yz aliases src1.w) { 18501e04c3fSmrg * MOV tmpB.yw, src1.yw 18601e04c3fSmrg * src1 = tmpB 18701e04c3fSmrg * } 18801e04c3fSmrg * MUL dst.y, src0.y, src1.y 18901e04c3fSmrg * MOV dst.z, src0.z 19001e04c3fSmrg * MOV dst.w, src1.w 19101e04c3fSmrg * MOV dst.x, imm{1.0} 19201e04c3fSmrg */ 19301e04c3fSmrg#define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \ 19401e04c3fSmrg NINST(1) + NINST(1) - OINST(2)) 19501e04c3fSmrg#define DST_TMP 2 19601e04c3fSmrgstatic void 19701e04c3fSmrgtransform_dst(struct tgsi_transform_context *tctx, 19801e04c3fSmrg struct tgsi_full_instruction *inst) 19901e04c3fSmrg{ 20001e04c3fSmrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 20101e04c3fSmrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 20201e04c3fSmrg struct tgsi_full_src_register *src0 = &inst->Src[0]; 20301e04c3fSmrg struct tgsi_full_src_register *src1 = &inst->Src[1]; 20401e04c3fSmrg struct tgsi_full_instruction new_inst; 20501e04c3fSmrg 20601e04c3fSmrg if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) { 20701e04c3fSmrg create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0); 20801e04c3fSmrg src0 = &ctx->tmp[A].src; 20901e04c3fSmrg } 21001e04c3fSmrg 21101e04c3fSmrg if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) { 21201e04c3fSmrg create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0); 21301e04c3fSmrg src1 = &ctx->tmp[B].src; 21401e04c3fSmrg } 21501e04c3fSmrg 21601e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 21701e04c3fSmrg /* MUL dst.y, src0.y, src1.y */ 21801e04c3fSmrg new_inst = tgsi_default_full_instruction(); 21901e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 22001e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 22101e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 22201e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 22301e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _)); 22401e04c3fSmrg reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _)); 22501e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 22601e04c3fSmrg } 22701e04c3fSmrg 22801e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 22901e04c3fSmrg /* MOV dst.z, src0.z */ 23001e04c3fSmrg new_inst = tgsi_default_full_instruction(); 23101e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 23201e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 23301e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z); 23401e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 23501e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _)); 23601e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 23701e04c3fSmrg } 23801e04c3fSmrg 23901e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 24001e04c3fSmrg /* MOV dst.w, src1.w */ 24101e04c3fSmrg new_inst = tgsi_default_full_instruction(); 24201e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 24301e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 24401e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 24501e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 24601e04c3fSmrg reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W)); 24701e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 24801e04c3fSmrg } 24901e04c3fSmrg 25001e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 25101e04c3fSmrg /* MOV dst.x, imm{1.0} */ 25201e04c3fSmrg new_inst = tgsi_default_full_instruction(); 25301e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 25401e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 25501e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); 25601e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 25701e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _)); 25801e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 25901e04c3fSmrg } 26001e04c3fSmrg} 26101e04c3fSmrg 26201e04c3fSmrg/* LRP - Linear Interpolate 26301e04c3fSmrg * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x 26401e04c3fSmrg * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y 26501e04c3fSmrg * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z 26601e04c3fSmrg * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w 26701e04c3fSmrg * 26801e04c3fSmrg * This becomes: src0 \times src1 + src2 - src0 \times src2, which 26901e04c3fSmrg * can then become: src0 \times src1 - (src0 \times src2 - src2) 27001e04c3fSmrg * 27101e04c3fSmrg * ; needs: 1 tmp 27201e04c3fSmrg * MAD tmpA, src0, src2, -src2 27301e04c3fSmrg * MAD dst, src0, src1, -tmpA 27401e04c3fSmrg */ 27501e04c3fSmrg#define LRP_GROW (NINST(3) + NINST(3) - OINST(3)) 27601e04c3fSmrg#define LRP_TMP 1 27701e04c3fSmrgstatic void 27801e04c3fSmrgtransform_lrp(struct tgsi_transform_context *tctx, 27901e04c3fSmrg struct tgsi_full_instruction *inst) 28001e04c3fSmrg{ 28101e04c3fSmrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 28201e04c3fSmrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 28301e04c3fSmrg struct tgsi_full_src_register *src0 = &inst->Src[0]; 28401e04c3fSmrg struct tgsi_full_src_register *src1 = &inst->Src[1]; 28501e04c3fSmrg struct tgsi_full_src_register *src2 = &inst->Src[2]; 28601e04c3fSmrg struct tgsi_full_instruction new_inst; 28701e04c3fSmrg 28801e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 28901e04c3fSmrg /* MAD tmpA, src0, src2, -src2 */ 29001e04c3fSmrg new_inst = tgsi_default_full_instruction(); 29101e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 29201e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 29301e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 29401e04c3fSmrg new_inst.Instruction.NumSrcRegs = 3; 29501e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 29601e04c3fSmrg reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W)); 29701e04c3fSmrg reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W)); 29801e04c3fSmrg new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate; 29901e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 30001e04c3fSmrg 30101e04c3fSmrg /* MAD dst, src0, src1, -tmpA */ 30201e04c3fSmrg new_inst = tgsi_default_full_instruction(); 30301e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 30401e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 30501e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 30601e04c3fSmrg new_inst.Instruction.NumSrcRegs = 3; 30701e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 30801e04c3fSmrg reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W)); 30901e04c3fSmrg reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 31001e04c3fSmrg new_inst.Src[2].Register.Negate = true; 31101e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 31201e04c3fSmrg } 31301e04c3fSmrg} 31401e04c3fSmrg 31501e04c3fSmrg/* FRC - Fraction 31601e04c3fSmrg * dst.x = src.x - \lfloor src.x\rfloor 31701e04c3fSmrg * dst.y = src.y - \lfloor src.y\rfloor 31801e04c3fSmrg * dst.z = src.z - \lfloor src.z\rfloor 31901e04c3fSmrg * dst.w = src.w - \lfloor src.w\rfloor 32001e04c3fSmrg * 32101e04c3fSmrg * ; needs: 1 tmp 32201e04c3fSmrg * FLR tmpA, src 32301e04c3fSmrg * SUB dst, src, tmpA 32401e04c3fSmrg */ 32501e04c3fSmrg#define FRC_GROW (NINST(1) + NINST(2) - OINST(1)) 32601e04c3fSmrg#define FRC_TMP 1 32701e04c3fSmrgstatic void 32801e04c3fSmrgtransform_frc(struct tgsi_transform_context *tctx, 32901e04c3fSmrg struct tgsi_full_instruction *inst) 33001e04c3fSmrg{ 33101e04c3fSmrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 33201e04c3fSmrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 33301e04c3fSmrg struct tgsi_full_src_register *src = &inst->Src[0]; 33401e04c3fSmrg struct tgsi_full_instruction new_inst; 33501e04c3fSmrg 33601e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 33701e04c3fSmrg /* FLR tmpA, src */ 33801e04c3fSmrg new_inst = tgsi_default_full_instruction(); 33901e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 34001e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 34101e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 34201e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 34301e04c3fSmrg reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 34401e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 34501e04c3fSmrg 34601e04c3fSmrg /* SUB dst, src, tmpA */ 34701e04c3fSmrg new_inst = tgsi_default_full_instruction(); 34801e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 34901e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 35001e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 35101e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 35201e04c3fSmrg reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 35301e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 35401e04c3fSmrg new_inst.Src[1].Register.Negate = 1; 35501e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 35601e04c3fSmrg } 35701e04c3fSmrg} 35801e04c3fSmrg 35901e04c3fSmrg/* POW - Power 36001e04c3fSmrg * dst.x = src0.x^{src1.x} 36101e04c3fSmrg * dst.y = src0.x^{src1.x} 36201e04c3fSmrg * dst.z = src0.x^{src1.x} 36301e04c3fSmrg * dst.w = src0.x^{src1.x} 36401e04c3fSmrg * 36501e04c3fSmrg * ; needs: 1 tmp 36601e04c3fSmrg * LG2 tmpA.x, src0.x 36701e04c3fSmrg * MUL tmpA.x, src1.x, tmpA.x 36801e04c3fSmrg * EX2 dst, tmpA.x 36901e04c3fSmrg */ 37001e04c3fSmrg#define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2)) 37101e04c3fSmrg#define POW_TMP 1 37201e04c3fSmrgstatic void 37301e04c3fSmrgtransform_pow(struct tgsi_transform_context *tctx, 37401e04c3fSmrg struct tgsi_full_instruction *inst) 37501e04c3fSmrg{ 37601e04c3fSmrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 37701e04c3fSmrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 37801e04c3fSmrg struct tgsi_full_src_register *src0 = &inst->Src[0]; 37901e04c3fSmrg struct tgsi_full_src_register *src1 = &inst->Src[1]; 38001e04c3fSmrg struct tgsi_full_instruction new_inst; 38101e04c3fSmrg 38201e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 38301e04c3fSmrg /* LG2 tmpA.x, src0.x */ 38401e04c3fSmrg new_inst = tgsi_default_full_instruction(); 38501e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 38601e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 38701e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 38801e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 38901e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _)); 39001e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 39101e04c3fSmrg 39201e04c3fSmrg /* MUL tmpA.x, src1.x, tmpA.x */ 39301e04c3fSmrg new_inst = tgsi_default_full_instruction(); 39401e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 39501e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 39601e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 39701e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 39801e04c3fSmrg reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _)); 39901e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 40001e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 40101e04c3fSmrg 40201e04c3fSmrg /* EX2 dst, tmpA.x */ 40301e04c3fSmrg new_inst = tgsi_default_full_instruction(); 40401e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 40501e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 40601e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 40701e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 40801e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 40901e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 41001e04c3fSmrg } 41101e04c3fSmrg} 41201e04c3fSmrg 41301e04c3fSmrg/* LIT - Light Coefficients 41401e04c3fSmrg * dst.x = 1.0 41501e04c3fSmrg * dst.y = max(src.x, 0.0) 41601e04c3fSmrg * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0 41701e04c3fSmrg * dst.w = 1.0 41801e04c3fSmrg * 41901e04c3fSmrg * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0} 42001e04c3fSmrg * MAX tmpA.xy, src.xy, imm{0.0} 42101e04c3fSmrg * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} 42201e04c3fSmrg * LG2 tmpA.y, tmpA.y 42301e04c3fSmrg * MUL tmpA.y, tmpA.z, tmpA.y 42401e04c3fSmrg * EX2 tmpA.y, tmpA.y 42501e04c3fSmrg * CMP tmpA.y, -src.x, tmpA.y, imm{0.0} 42601e04c3fSmrg * MOV dst.yz, tmpA.xy 42701e04c3fSmrg * MOV dst.xw, imm{1.0} 42801e04c3fSmrg */ 42901e04c3fSmrg#define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \ 43001e04c3fSmrg NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1)) 43101e04c3fSmrg#define LIT_TMP 1 43201e04c3fSmrgstatic void 43301e04c3fSmrgtransform_lit(struct tgsi_transform_context *tctx, 43401e04c3fSmrg struct tgsi_full_instruction *inst) 43501e04c3fSmrg{ 43601e04c3fSmrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 43701e04c3fSmrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 43801e04c3fSmrg struct tgsi_full_src_register *src = &inst->Src[0]; 43901e04c3fSmrg struct tgsi_full_instruction new_inst; 44001e04c3fSmrg 44101e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) { 44201e04c3fSmrg /* MAX tmpA.xy, src.xy, imm{0.0} */ 44301e04c3fSmrg new_inst = tgsi_default_full_instruction(); 44401e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; 44501e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 44601e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY); 44701e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 44801e04c3fSmrg reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _)); 44901e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _)); 45001e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 45101e04c3fSmrg 45201e04c3fSmrg /* MIN tmpA.z, src.w, imm{128.0} */ 45301e04c3fSmrg new_inst = tgsi_default_full_instruction(); 45401e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MIN; 45501e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 45601e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 45701e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 45801e04c3fSmrg reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _)); 45901e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); 46001e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 46101e04c3fSmrg 46201e04c3fSmrg /* MAX tmpA.z, tmpA.z, -imm{128.0} */ 46301e04c3fSmrg new_inst = tgsi_default_full_instruction(); 46401e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; 46501e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 46601e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 46701e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 46801e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _)); 46901e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); 47001e04c3fSmrg new_inst.Src[1].Register.Negate = true; 47101e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 47201e04c3fSmrg 47301e04c3fSmrg /* LG2 tmpA.y, tmpA.y */ 47401e04c3fSmrg new_inst = tgsi_default_full_instruction(); 47501e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 47601e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 47701e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 47801e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 47901e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 48001e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 48101e04c3fSmrg 48201e04c3fSmrg /* MUL tmpA.y, tmpA.z, tmpA.y */ 48301e04c3fSmrg new_inst = tgsi_default_full_instruction(); 48401e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 48501e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 48601e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 48701e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 48801e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _)); 48901e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 49001e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 49101e04c3fSmrg 49201e04c3fSmrg /* EX2 tmpA.y, tmpA.y */ 49301e04c3fSmrg new_inst = tgsi_default_full_instruction(); 49401e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 49501e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 49601e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 49701e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 49801e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 49901e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 50001e04c3fSmrg 50101e04c3fSmrg /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */ 50201e04c3fSmrg new_inst = tgsi_default_full_instruction(); 50301e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 50401e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 50501e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 50601e04c3fSmrg new_inst.Instruction.NumSrcRegs = 3; 50701e04c3fSmrg reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 50801e04c3fSmrg new_inst.Src[0].Register.Negate = true; 50901e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 51001e04c3fSmrg reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _)); 51101e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 51201e04c3fSmrg 51301e04c3fSmrg /* MOV dst.yz, tmpA.xy */ 51401e04c3fSmrg new_inst = tgsi_default_full_instruction(); 51501e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 51601e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 51701e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ); 51801e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 51901e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _)); 52001e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 52101e04c3fSmrg } 52201e04c3fSmrg 52301e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) { 52401e04c3fSmrg /* MOV dst.xw, imm{1.0} */ 52501e04c3fSmrg new_inst = tgsi_default_full_instruction(); 52601e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 52701e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 52801e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW); 52901e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 53001e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y)); 53101e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 53201e04c3fSmrg } 53301e04c3fSmrg} 53401e04c3fSmrg 53501e04c3fSmrg/* EXP - Approximate Exponential Base 2 53601e04c3fSmrg * dst.x = 2^{\lfloor src.x\rfloor} 53701e04c3fSmrg * dst.y = src.x - \lfloor src.x\rfloor 53801e04c3fSmrg * dst.z = 2^{src.x} 53901e04c3fSmrg * dst.w = 1.0 54001e04c3fSmrg * 54101e04c3fSmrg * ; needs: 1 tmp, imm{1.0} 54201e04c3fSmrg * if (lowering FLR) { 54301e04c3fSmrg * FRC tmpA.x, src.x 54401e04c3fSmrg * SUB tmpA.x, src.x, tmpA.x 54501e04c3fSmrg * } else { 54601e04c3fSmrg * FLR tmpA.x, src.x 54701e04c3fSmrg * } 54801e04c3fSmrg * EX2 tmpA.y, src.x 54901e04c3fSmrg * SUB dst.y, src.x, tmpA.x 55001e04c3fSmrg * EX2 dst.x, tmpA.x 55101e04c3fSmrg * MOV dst.z, tmpA.y 55201e04c3fSmrg * MOV dst.w, imm{1.0} 55301e04c3fSmrg */ 55401e04c3fSmrg#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \ 55501e04c3fSmrg NINST(1)+ NINST(1) - OINST(1)) 55601e04c3fSmrg#define EXP_TMP 1 55701e04c3fSmrgstatic void 55801e04c3fSmrgtransform_exp(struct tgsi_transform_context *tctx, 55901e04c3fSmrg struct tgsi_full_instruction *inst) 56001e04c3fSmrg{ 56101e04c3fSmrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 56201e04c3fSmrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 56301e04c3fSmrg struct tgsi_full_src_register *src = &inst->Src[0]; 56401e04c3fSmrg struct tgsi_full_instruction new_inst; 56501e04c3fSmrg 56601e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { 56701e04c3fSmrg if (ctx->config->lower_FLR) { 56801e04c3fSmrg /* FRC tmpA.x, src.x */ 56901e04c3fSmrg new_inst = tgsi_default_full_instruction(); 57001e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 57101e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 57201e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 57301e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 57401e04c3fSmrg reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 57501e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 57601e04c3fSmrg 57701e04c3fSmrg /* SUB tmpA.x, src.x, tmpA.x */ 57801e04c3fSmrg new_inst = tgsi_default_full_instruction(); 57901e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 58001e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 58101e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 58201e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 58301e04c3fSmrg reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 58401e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 58501e04c3fSmrg new_inst.Src[1].Register.Negate = 1; 58601e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 58701e04c3fSmrg } else { 58801e04c3fSmrg /* FLR tmpA.x, src.x */ 58901e04c3fSmrg new_inst = tgsi_default_full_instruction(); 59001e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 59101e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 59201e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 59301e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 59401e04c3fSmrg reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 59501e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 59601e04c3fSmrg } 59701e04c3fSmrg } 59801e04c3fSmrg 59901e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 60001e04c3fSmrg /* EX2 tmpA.y, src.x */ 60101e04c3fSmrg new_inst = tgsi_default_full_instruction(); 60201e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 60301e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 60401e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 60501e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 60601e04c3fSmrg reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 60701e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 60801e04c3fSmrg } 60901e04c3fSmrg 61001e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 61101e04c3fSmrg /* SUB dst.y, src.x, tmpA.x */ 61201e04c3fSmrg new_inst = tgsi_default_full_instruction(); 61301e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 61401e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 61501e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 61601e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 61701e04c3fSmrg reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 61801e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 61901e04c3fSmrg new_inst.Src[1].Register.Negate = 1; 62001e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 62101e04c3fSmrg } 62201e04c3fSmrg 62301e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 62401e04c3fSmrg /* EX2 dst.x, tmpA.x */ 62501e04c3fSmrg new_inst = tgsi_default_full_instruction(); 62601e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 62701e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 62801e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); 62901e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 63001e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 63101e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 63201e04c3fSmrg } 63301e04c3fSmrg 63401e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 63501e04c3fSmrg /* MOV dst.z, tmpA.y */ 63601e04c3fSmrg new_inst = tgsi_default_full_instruction(); 63701e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 63801e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 63901e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z); 64001e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 64101e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _)); 64201e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 64301e04c3fSmrg } 64401e04c3fSmrg 64501e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 64601e04c3fSmrg /* MOV dst.w, imm{1.0} */ 64701e04c3fSmrg new_inst = tgsi_default_full_instruction(); 64801e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 64901e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 65001e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 65101e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 65201e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); 65301e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 65401e04c3fSmrg } 65501e04c3fSmrg} 65601e04c3fSmrg 65701e04c3fSmrg/* LOG - Approximate Logarithm Base 2 65801e04c3fSmrg * dst.x = \lfloor\log_2{|src.x|}\rfloor 65901e04c3fSmrg * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} 66001e04c3fSmrg * dst.z = \log_2{|src.x|} 66101e04c3fSmrg * dst.w = 1.0 66201e04c3fSmrg * 66301e04c3fSmrg * ; needs: 1 tmp, imm{1.0} 66401e04c3fSmrg * LG2 tmpA.x, |src.x| 66501e04c3fSmrg * if (lowering FLR) { 66601e04c3fSmrg * FRC tmpA.y, tmpA.x 66701e04c3fSmrg * SUB tmpA.y, tmpA.x, tmpA.y 66801e04c3fSmrg * } else { 66901e04c3fSmrg * FLR tmpA.y, tmpA.x 67001e04c3fSmrg * } 67101e04c3fSmrg * EX2 tmpA.z, tmpA.y 67201e04c3fSmrg * RCP tmpA.z, tmpA.z 67301e04c3fSmrg * MUL dst.y, |src.x|, tmpA.z 67401e04c3fSmrg * MOV dst.xz, tmpA.yx 67501e04c3fSmrg * MOV dst.w, imm{1.0} 67601e04c3fSmrg */ 67701e04c3fSmrg#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \ 67801e04c3fSmrg NINST(2) + NINST(1) + NINST(1) - OINST(1)) 67901e04c3fSmrg#define LOG_TMP 1 68001e04c3fSmrgstatic void 68101e04c3fSmrgtransform_log(struct tgsi_transform_context *tctx, 68201e04c3fSmrg struct tgsi_full_instruction *inst) 68301e04c3fSmrg{ 68401e04c3fSmrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 68501e04c3fSmrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 68601e04c3fSmrg struct tgsi_full_src_register *src = &inst->Src[0]; 68701e04c3fSmrg struct tgsi_full_instruction new_inst; 68801e04c3fSmrg 68901e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) { 69001e04c3fSmrg /* LG2 tmpA.x, |src.x| */ 69101e04c3fSmrg new_inst = tgsi_default_full_instruction(); 69201e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 69301e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 69401e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 69501e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 69601e04c3fSmrg reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 69701e04c3fSmrg new_inst.Src[0].Register.Absolute = true; 69801e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 69901e04c3fSmrg } 70001e04c3fSmrg 70101e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { 70201e04c3fSmrg if (ctx->config->lower_FLR) { 70301e04c3fSmrg /* FRC tmpA.y, tmpA.x */ 70401e04c3fSmrg new_inst = tgsi_default_full_instruction(); 70501e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 70601e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 70701e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 70801e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 70901e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 71001e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 71101e04c3fSmrg 71201e04c3fSmrg /* SUB tmpA.y, tmpA.x, tmpA.y */ 71301e04c3fSmrg new_inst = tgsi_default_full_instruction(); 71401e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 71501e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 71601e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 71701e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 71801e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 71901e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 72001e04c3fSmrg new_inst.Src[1].Register.Negate = 1; 72101e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 72201e04c3fSmrg } else { 72301e04c3fSmrg /* FLR tmpA.y, tmpA.x */ 72401e04c3fSmrg new_inst = tgsi_default_full_instruction(); 72501e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 72601e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 72701e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 72801e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 72901e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 73001e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 73101e04c3fSmrg } 73201e04c3fSmrg } 73301e04c3fSmrg 73401e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 73501e04c3fSmrg /* EX2 tmpA.z, tmpA.y */ 73601e04c3fSmrg new_inst = tgsi_default_full_instruction(); 73701e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 73801e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 73901e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 74001e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 74101e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 74201e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 74301e04c3fSmrg 74401e04c3fSmrg /* RCP tmpA.z, tmpA.z */ 74501e04c3fSmrg new_inst = tgsi_default_full_instruction(); 74601e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_RCP; 74701e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 74801e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 74901e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 75001e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _)); 75101e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 75201e04c3fSmrg 75301e04c3fSmrg /* MUL dst.y, |src.x|, tmpA.z */ 75401e04c3fSmrg new_inst = tgsi_default_full_instruction(); 75501e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 75601e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 75701e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 75801e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 75901e04c3fSmrg reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 76001e04c3fSmrg new_inst.Src[0].Register.Absolute = true; 76101e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _)); 76201e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 76301e04c3fSmrg } 76401e04c3fSmrg 76501e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) { 76601e04c3fSmrg /* MOV dst.xz, tmpA.yx */ 76701e04c3fSmrg new_inst = tgsi_default_full_instruction(); 76801e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 76901e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 77001e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ); 77101e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 77201e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _)); 77301e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 77401e04c3fSmrg } 77501e04c3fSmrg 77601e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 77701e04c3fSmrg /* MOV dst.w, imm{1.0} */ 77801e04c3fSmrg new_inst = tgsi_default_full_instruction(); 77901e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 78001e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 78101e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 78201e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 78301e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); 78401e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 78501e04c3fSmrg } 78601e04c3fSmrg} 78701e04c3fSmrg 78801e04c3fSmrg/* DP4 - 4-component Dot Product 78901e04c3fSmrg * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w 79001e04c3fSmrg * 79101e04c3fSmrg * DP3 - 3-component Dot Product 79201e04c3fSmrg * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z 79301e04c3fSmrg * 79401e04c3fSmrg * DP2 - 2-component Dot Product 79501e04c3fSmrg * dst = src0.x \times src1.x + src0.y \times src1.y 79601e04c3fSmrg * 79701e04c3fSmrg * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar 79801e04c3fSmrg * operations, which is what you'd prefer for a ISA that is natively 79901e04c3fSmrg * scalar. Probably a native vector ISA would at least already have 80001e04c3fSmrg * DP4/DP3 instructions, but perhaps there is room for an alternative 80101e04c3fSmrg * translation for DP2 using vector instructions. 80201e04c3fSmrg * 80301e04c3fSmrg * ; needs: 1 tmp 80401e04c3fSmrg * MUL tmpA.x, src0.x, src1.x 80501e04c3fSmrg * MAD tmpA.x, src0.y, src1.y, tmpA.x 80601e04c3fSmrg * if (DP3 || DP4) { 80701e04c3fSmrg * MAD tmpA.x, src0.z, src1.z, tmpA.x 80801e04c3fSmrg * if (DP4) { 80901e04c3fSmrg * MAD tmpA.x, src0.w, src1.w, tmpA.x 81001e04c3fSmrg * } 81101e04c3fSmrg * } 81201e04c3fSmrg * ; fixup last instruction to replicate into dst 81301e04c3fSmrg */ 81401e04c3fSmrg#define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2)) 81501e04c3fSmrg#define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2)) 81601e04c3fSmrg#define DP2_GROW (NINST(2) + NINST(3) - OINST(2)) 81701e04c3fSmrg#define DOTP_TMP 1 81801e04c3fSmrgstatic void 81901e04c3fSmrgtransform_dotp(struct tgsi_transform_context *tctx, 82001e04c3fSmrg struct tgsi_full_instruction *inst) 82101e04c3fSmrg{ 82201e04c3fSmrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 82301e04c3fSmrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 82401e04c3fSmrg struct tgsi_full_src_register *src0 = &inst->Src[0]; 82501e04c3fSmrg struct tgsi_full_src_register *src1 = &inst->Src[1]; 82601e04c3fSmrg struct tgsi_full_instruction new_inst; 82701e04c3fSmrg enum tgsi_opcode opcode = inst->Instruction.Opcode; 82801e04c3fSmrg 82901e04c3fSmrg /* NOTE: any potential last instruction must replicate src on all 83001e04c3fSmrg * components (since it could be re-written to write to final dst) 83101e04c3fSmrg */ 83201e04c3fSmrg 83301e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 83401e04c3fSmrg /* MUL tmpA.x, src0.x, src1.x */ 83501e04c3fSmrg new_inst = tgsi_default_full_instruction(); 83601e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 83701e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 83801e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 83901e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 84001e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _)); 84101e04c3fSmrg reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _)); 84201e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 84301e04c3fSmrg 84401e04c3fSmrg /* MAD tmpA.x, src0.y, src1.y, tmpA.x */ 84501e04c3fSmrg new_inst = tgsi_default_full_instruction(); 84601e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 84701e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 84801e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 84901e04c3fSmrg new_inst.Instruction.NumSrcRegs = 3; 85001e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y)); 85101e04c3fSmrg reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y)); 85201e04c3fSmrg reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 85301e04c3fSmrg 85401e04c3fSmrg if ((opcode == TGSI_OPCODE_DP3) || 85501e04c3fSmrg (opcode == TGSI_OPCODE_DP4)) { 85601e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 85701e04c3fSmrg 85801e04c3fSmrg /* MAD tmpA.x, src0.z, src1.z, tmpA.x */ 85901e04c3fSmrg new_inst = tgsi_default_full_instruction(); 86001e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 86101e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 86201e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 86301e04c3fSmrg new_inst.Instruction.NumSrcRegs = 3; 86401e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z)); 86501e04c3fSmrg reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z)); 86601e04c3fSmrg reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 86701e04c3fSmrg 86801e04c3fSmrg if (opcode == TGSI_OPCODE_DP4) { 86901e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 87001e04c3fSmrg 87101e04c3fSmrg /* MAD tmpA.x, src0.w, src1.w, tmpA.x */ 87201e04c3fSmrg new_inst = tgsi_default_full_instruction(); 87301e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 87401e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 87501e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 87601e04c3fSmrg new_inst.Instruction.NumSrcRegs = 3; 87701e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W)); 87801e04c3fSmrg reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W)); 87901e04c3fSmrg reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 88001e04c3fSmrg } 88101e04c3fSmrg } 88201e04c3fSmrg 88301e04c3fSmrg /* fixup last instruction to write to dst: */ 88401e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 88501e04c3fSmrg 88601e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 88701e04c3fSmrg } 88801e04c3fSmrg} 88901e04c3fSmrg 89001e04c3fSmrg/* FLR - floor, CEIL - ceil 89101e04c3fSmrg * ; needs: 1 tmp 89201e04c3fSmrg * if (CEIL) { 89301e04c3fSmrg * FRC tmpA, -src 89401e04c3fSmrg * ADD dst, src, tmpA 89501e04c3fSmrg * } else { 89601e04c3fSmrg * FRC tmpA, src 89701e04c3fSmrg * SUB dst, src, tmpA 89801e04c3fSmrg * } 89901e04c3fSmrg */ 90001e04c3fSmrg#define FLR_GROW (NINST(1) + NINST(2) - OINST(1)) 90101e04c3fSmrg#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1)) 90201e04c3fSmrg#define FLR_TMP 1 90301e04c3fSmrg#define CEIL_TMP 1 90401e04c3fSmrgstatic void 90501e04c3fSmrgtransform_flr_ceil(struct tgsi_transform_context *tctx, 90601e04c3fSmrg struct tgsi_full_instruction *inst) 90701e04c3fSmrg{ 90801e04c3fSmrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 90901e04c3fSmrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 91001e04c3fSmrg struct tgsi_full_src_register *src0 = &inst->Src[0]; 91101e04c3fSmrg struct tgsi_full_instruction new_inst; 91201e04c3fSmrg enum tgsi_opcode opcode = inst->Instruction.Opcode; 91301e04c3fSmrg 91401e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 91501e04c3fSmrg /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */ 91601e04c3fSmrg new_inst = tgsi_default_full_instruction(); 91701e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 91801e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 91901e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 92001e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 92101e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 92201e04c3fSmrg 92301e04c3fSmrg if (opcode == TGSI_OPCODE_CEIL) 92401e04c3fSmrg new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate; 92501e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 92601e04c3fSmrg 92701e04c3fSmrg /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */ 92801e04c3fSmrg new_inst = tgsi_default_full_instruction(); 92901e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 93001e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 93101e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 93201e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 93301e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 93401e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 93501e04c3fSmrg if (opcode == TGSI_OPCODE_FLR) 93601e04c3fSmrg new_inst.Src[1].Register.Negate = 1; 93701e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 93801e04c3fSmrg } 93901e04c3fSmrg} 94001e04c3fSmrg 94101e04c3fSmrg/* TRUNC - truncate off fractional part 94201e04c3fSmrg * dst.x = trunc(src.x) 94301e04c3fSmrg * dst.y = trunc(src.y) 94401e04c3fSmrg * dst.z = trunc(src.z) 94501e04c3fSmrg * dst.w = trunc(src.w) 94601e04c3fSmrg * 94701e04c3fSmrg * ; needs: 1 tmp 94801e04c3fSmrg * if (lower FLR) { 94901e04c3fSmrg * FRC tmpA, |src| 95001e04c3fSmrg * SUB tmpA, |src|, tmpA 95101e04c3fSmrg * } else { 95201e04c3fSmrg * FLR tmpA, |src| 95301e04c3fSmrg * } 95401e04c3fSmrg * CMP dst, src, -tmpA, tmpA 95501e04c3fSmrg */ 95601e04c3fSmrg#define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1)) 95701e04c3fSmrg#define TRUNC_TMP 1 95801e04c3fSmrgstatic void 95901e04c3fSmrgtransform_trunc(struct tgsi_transform_context *tctx, 96001e04c3fSmrg struct tgsi_full_instruction *inst) 96101e04c3fSmrg{ 96201e04c3fSmrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 96301e04c3fSmrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 96401e04c3fSmrg struct tgsi_full_src_register *src0 = &inst->Src[0]; 96501e04c3fSmrg struct tgsi_full_instruction new_inst; 96601e04c3fSmrg 96701e04c3fSmrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 96801e04c3fSmrg if (ctx->config->lower_FLR) { 96901e04c3fSmrg new_inst = tgsi_default_full_instruction(); 97001e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 97101e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 97201e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 97301e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 97401e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 97501e04c3fSmrg new_inst.Src[0].Register.Absolute = true; 97601e04c3fSmrg new_inst.Src[0].Register.Negate = false; 97701e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 97801e04c3fSmrg 97901e04c3fSmrg new_inst = tgsi_default_full_instruction(); 98001e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 98101e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 98201e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 98301e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 98401e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 98501e04c3fSmrg new_inst.Src[0].Register.Absolute = true; 98601e04c3fSmrg new_inst.Src[0].Register.Negate = false; 98701e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 98801e04c3fSmrg new_inst.Src[1].Register.Negate = 1; 98901e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 99001e04c3fSmrg } else { 99101e04c3fSmrg new_inst = tgsi_default_full_instruction(); 99201e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 99301e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 99401e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 99501e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 99601e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 99701e04c3fSmrg new_inst.Src[0].Register.Absolute = true; 99801e04c3fSmrg new_inst.Src[0].Register.Negate = false; 99901e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 100001e04c3fSmrg } 100101e04c3fSmrg 100201e04c3fSmrg new_inst = tgsi_default_full_instruction(); 100301e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 100401e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 100501e04c3fSmrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 100601e04c3fSmrg new_inst.Instruction.NumSrcRegs = 3; 100701e04c3fSmrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 100801e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 100901e04c3fSmrg new_inst.Src[1].Register.Negate = true; 101001e04c3fSmrg reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 101101e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 101201e04c3fSmrg } 101301e04c3fSmrg} 101401e04c3fSmrg 101501e04c3fSmrg/* Inserts a MOV_SAT for the needed components of tex coord. Note that 101601e04c3fSmrg * in the case of TXP, the clamping must happen *after* projection, so 101701e04c3fSmrg * we need to lower TXP to TEX. 101801e04c3fSmrg * 101901e04c3fSmrg * MOV tmpA, src0 102001e04c3fSmrg * if (opc == TXP) { 102101e04c3fSmrg * ; do perspective division manually before clamping: 102201e04c3fSmrg * RCP tmpB, tmpA.w 102301e04c3fSmrg * MUL tmpB.<pmask>, tmpA, tmpB.xxxx 102401e04c3fSmrg * opc = TEX; 102501e04c3fSmrg * } 102601e04c3fSmrg * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords 102701e04c3fSmrg * <opc> dst, tmpA, ... 102801e04c3fSmrg */ 102901e04c3fSmrg#define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1)) 103001e04c3fSmrg#define SAMP_TMP 2 103101e04c3fSmrgstatic int 103201e04c3fSmrgtransform_samp(struct tgsi_transform_context *tctx, 103301e04c3fSmrg struct tgsi_full_instruction *inst) 103401e04c3fSmrg{ 103501e04c3fSmrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 103601e04c3fSmrg struct tgsi_full_src_register *coord = &inst->Src[0]; 103701e04c3fSmrg struct tgsi_full_src_register *samp; 103801e04c3fSmrg struct tgsi_full_instruction new_inst; 103901e04c3fSmrg /* mask is clamped coords, pmask is all coords (for projection): */ 104001e04c3fSmrg unsigned mask = 0, pmask = 0, smask; 104101e04c3fSmrg unsigned tex = inst->Texture.Texture; 104201e04c3fSmrg enum tgsi_opcode opcode = inst->Instruction.Opcode; 104301e04c3fSmrg bool lower_txp = (opcode == TGSI_OPCODE_TXP) && 104401e04c3fSmrg (ctx->config->lower_TXP & (1 << tex)); 104501e04c3fSmrg 104601e04c3fSmrg if (opcode == TGSI_OPCODE_TXB2) { 104701e04c3fSmrg samp = &inst->Src[2]; 104801e04c3fSmrg } else { 104901e04c3fSmrg samp = &inst->Src[1]; 105001e04c3fSmrg } 105101e04c3fSmrg 105201e04c3fSmrg /* convert sampler # to bitmask to test: */ 105301e04c3fSmrg smask = 1 << samp->Register.Index; 105401e04c3fSmrg 105501e04c3fSmrg /* check if we actually need to lower this one: */ 105601e04c3fSmrg if (!(ctx->saturate & smask) && !lower_txp) 105701e04c3fSmrg return -1; 105801e04c3fSmrg 105901e04c3fSmrg /* figure out which coordinates need saturating: 106001e04c3fSmrg * - RECT textures should not get saturated 106101e04c3fSmrg * - array index coords should not get saturated 106201e04c3fSmrg */ 106301e04c3fSmrg switch (tex) { 106401e04c3fSmrg case TGSI_TEXTURE_3D: 106501e04c3fSmrg case TGSI_TEXTURE_CUBE: 106601e04c3fSmrg case TGSI_TEXTURE_CUBE_ARRAY: 106701e04c3fSmrg case TGSI_TEXTURE_SHADOWCUBE: 106801e04c3fSmrg case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 106901e04c3fSmrg if (ctx->config->saturate_r & smask) 107001e04c3fSmrg mask |= TGSI_WRITEMASK_Z; 107101e04c3fSmrg pmask |= TGSI_WRITEMASK_Z; 10727ec681f3Smrg FALLTHROUGH; 107301e04c3fSmrg 107401e04c3fSmrg case TGSI_TEXTURE_2D: 107501e04c3fSmrg case TGSI_TEXTURE_2D_ARRAY: 107601e04c3fSmrg case TGSI_TEXTURE_SHADOW2D: 107701e04c3fSmrg case TGSI_TEXTURE_SHADOW2D_ARRAY: 107801e04c3fSmrg case TGSI_TEXTURE_2D_MSAA: 107901e04c3fSmrg case TGSI_TEXTURE_2D_ARRAY_MSAA: 108001e04c3fSmrg if (ctx->config->saturate_t & smask) 108101e04c3fSmrg mask |= TGSI_WRITEMASK_Y; 108201e04c3fSmrg pmask |= TGSI_WRITEMASK_Y; 10837ec681f3Smrg FALLTHROUGH; 108401e04c3fSmrg 108501e04c3fSmrg case TGSI_TEXTURE_1D: 108601e04c3fSmrg case TGSI_TEXTURE_1D_ARRAY: 108701e04c3fSmrg case TGSI_TEXTURE_SHADOW1D: 108801e04c3fSmrg case TGSI_TEXTURE_SHADOW1D_ARRAY: 108901e04c3fSmrg if (ctx->config->saturate_s & smask) 109001e04c3fSmrg mask |= TGSI_WRITEMASK_X; 109101e04c3fSmrg pmask |= TGSI_WRITEMASK_X; 109201e04c3fSmrg break; 109301e04c3fSmrg 109401e04c3fSmrg case TGSI_TEXTURE_RECT: 109501e04c3fSmrg case TGSI_TEXTURE_SHADOWRECT: 109601e04c3fSmrg /* we don't saturate, but in case of lower_txp we 109701e04c3fSmrg * still need to do the perspective divide: 109801e04c3fSmrg */ 109901e04c3fSmrg pmask = TGSI_WRITEMASK_XY; 110001e04c3fSmrg break; 110101e04c3fSmrg } 110201e04c3fSmrg 110301e04c3fSmrg /* sanity check.. driver could be asking to saturate a non- 110401e04c3fSmrg * existent coordinate component: 110501e04c3fSmrg */ 110601e04c3fSmrg if (!mask && !lower_txp) 110701e04c3fSmrg return -1; 110801e04c3fSmrg 110901e04c3fSmrg /* MOV tmpA, src0 */ 111001e04c3fSmrg create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0); 111101e04c3fSmrg 111201e04c3fSmrg /* This is a bit sad.. we need to clamp *after* the coords 111301e04c3fSmrg * are projected, which means lowering TXP to TEX and doing 111401e04c3fSmrg * the projection ourself. But since I haven't figured out 111501e04c3fSmrg * how to make the lowering code deliver an electric shock 111601e04c3fSmrg * to anyone using GL_CLAMP, we must do this instead: 111701e04c3fSmrg */ 111801e04c3fSmrg if (opcode == TGSI_OPCODE_TXP) { 111901e04c3fSmrg /* RCP tmpB.x tmpA.w */ 112001e04c3fSmrg new_inst = tgsi_default_full_instruction(); 112101e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_RCP; 112201e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 112301e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); 112401e04c3fSmrg new_inst.Instruction.NumSrcRegs = 1; 112501e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _)); 112601e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 112701e04c3fSmrg 112801e04c3fSmrg /* MUL tmpA.mask, tmpA, tmpB.xxxx */ 112901e04c3fSmrg new_inst = tgsi_default_full_instruction(); 113001e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 113101e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 113201e04c3fSmrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask); 113301e04c3fSmrg new_inst.Instruction.NumSrcRegs = 2; 113401e04c3fSmrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 113501e04c3fSmrg reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X)); 113601e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 113701e04c3fSmrg 113801e04c3fSmrg opcode = TGSI_OPCODE_TEX; 113901e04c3fSmrg } 114001e04c3fSmrg 114101e04c3fSmrg /* MOV_SAT tmpA.<mask>, tmpA */ 114201e04c3fSmrg if (mask) { 114301e04c3fSmrg create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1); 114401e04c3fSmrg } 114501e04c3fSmrg 114601e04c3fSmrg /* modify the texture samp instruction to take fixed up coord: */ 114701e04c3fSmrg new_inst = *inst; 114801e04c3fSmrg new_inst.Instruction.Opcode = opcode; 114901e04c3fSmrg new_inst.Src[0] = ctx->tmp[A].src; 115001e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 115101e04c3fSmrg 115201e04c3fSmrg return 0; 115301e04c3fSmrg} 115401e04c3fSmrg 115501e04c3fSmrg/* Two-sided color emulation: 115601e04c3fSmrg * For each COLOR input, create a corresponding BCOLOR input, plus 115701e04c3fSmrg * CMP instruction to select front or back color based on FACE 115801e04c3fSmrg */ 115901e04c3fSmrg#define TWOSIDE_GROW(n) ( \ 116001e04c3fSmrg 2 + /* FACE */ \ 116101e04c3fSmrg ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\ 116201e04c3fSmrg ((n) * 1) + /* TEMP[] */ \ 116301e04c3fSmrg ((n) * NINST(3)) /* CMP instr */ \ 116401e04c3fSmrg ) 116501e04c3fSmrg 116601e04c3fSmrgstatic void 116701e04c3fSmrgemit_twoside(struct tgsi_transform_context *tctx) 116801e04c3fSmrg{ 116901e04c3fSmrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 117001e04c3fSmrg struct tgsi_shader_info *info = ctx->info; 117101e04c3fSmrg struct tgsi_full_declaration decl; 117201e04c3fSmrg struct tgsi_full_instruction new_inst; 117301e04c3fSmrg unsigned inbase, tmpbase; 117401e04c3fSmrg unsigned i; 117501e04c3fSmrg 117601e04c3fSmrg inbase = info->file_max[TGSI_FILE_INPUT] + 1; 117701e04c3fSmrg tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; 117801e04c3fSmrg 117901e04c3fSmrg /* additional inputs for BCOLOR's */ 118001e04c3fSmrg for (i = 0; i < ctx->two_side_colors; i++) { 118101e04c3fSmrg unsigned in_idx = ctx->two_side_idx[i]; 118201e04c3fSmrg decl = tgsi_default_full_declaration(); 118301e04c3fSmrg decl.Declaration.File = TGSI_FILE_INPUT; 118401e04c3fSmrg decl.Declaration.Semantic = true; 118501e04c3fSmrg decl.Range.First = decl.Range.Last = inbase + i; 118601e04c3fSmrg decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR; 118701e04c3fSmrg decl.Semantic.Index = info->input_semantic_index[in_idx]; 118801e04c3fSmrg decl.Declaration.Interpolate = true; 118901e04c3fSmrg decl.Interp.Interpolate = info->input_interpolate[in_idx]; 119001e04c3fSmrg decl.Interp.Location = info->input_interpolate_loc[in_idx]; 119101e04c3fSmrg tctx->emit_declaration(tctx, &decl); 119201e04c3fSmrg } 119301e04c3fSmrg 119401e04c3fSmrg /* additional input for FACE */ 119501e04c3fSmrg if (ctx->two_side_colors && (ctx->face_idx == -1)) { 119601e04c3fSmrg decl = tgsi_default_full_declaration(); 119701e04c3fSmrg decl.Declaration.File = TGSI_FILE_INPUT; 119801e04c3fSmrg decl.Declaration.Semantic = true; 119901e04c3fSmrg decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors; 120001e04c3fSmrg decl.Semantic.Name = TGSI_SEMANTIC_FACE; 120101e04c3fSmrg decl.Semantic.Index = 0; 120201e04c3fSmrg tctx->emit_declaration(tctx, &decl); 120301e04c3fSmrg 120401e04c3fSmrg ctx->face_idx = decl.Range.First; 120501e04c3fSmrg } 120601e04c3fSmrg 120701e04c3fSmrg /* additional temps for COLOR/BCOLOR selection: */ 120801e04c3fSmrg for (i = 0; i < ctx->two_side_colors; i++) { 120901e04c3fSmrg decl = tgsi_default_full_declaration(); 121001e04c3fSmrg decl.Declaration.File = TGSI_FILE_TEMPORARY; 121101e04c3fSmrg decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i; 121201e04c3fSmrg tctx->emit_declaration(tctx, &decl); 121301e04c3fSmrg } 121401e04c3fSmrg 121501e04c3fSmrg /* and finally additional instructions to select COLOR/BCOLOR: */ 121601e04c3fSmrg for (i = 0; i < ctx->two_side_colors; i++) { 121701e04c3fSmrg new_inst = tgsi_default_full_instruction(); 121801e04c3fSmrg new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 121901e04c3fSmrg 122001e04c3fSmrg new_inst.Instruction.NumDstRegs = 1; 122101e04c3fSmrg new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 122201e04c3fSmrg new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i; 122301e04c3fSmrg new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 122401e04c3fSmrg 122501e04c3fSmrg new_inst.Instruction.NumSrcRegs = 3; 122601e04c3fSmrg new_inst.Src[0].Register.File = TGSI_FILE_INPUT; 122701e04c3fSmrg new_inst.Src[0].Register.Index = ctx->face_idx; 122801e04c3fSmrg new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; 122901e04c3fSmrg new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; 123001e04c3fSmrg new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; 123101e04c3fSmrg new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X; 123201e04c3fSmrg new_inst.Src[1].Register.File = TGSI_FILE_INPUT; 123301e04c3fSmrg new_inst.Src[1].Register.Index = inbase + i; 123401e04c3fSmrg new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X; 123501e04c3fSmrg new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y; 123601e04c3fSmrg new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z; 123701e04c3fSmrg new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; 123801e04c3fSmrg new_inst.Src[2].Register.File = TGSI_FILE_INPUT; 123901e04c3fSmrg new_inst.Src[2].Register.Index = ctx->two_side_idx[i]; 124001e04c3fSmrg new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X; 124101e04c3fSmrg new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y; 124201e04c3fSmrg new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z; 124301e04c3fSmrg new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W; 124401e04c3fSmrg 124501e04c3fSmrg tctx->emit_instruction(tctx, &new_inst); 124601e04c3fSmrg } 124701e04c3fSmrg} 124801e04c3fSmrg 124901e04c3fSmrgstatic void 125001e04c3fSmrgemit_decls(struct tgsi_transform_context *tctx) 125101e04c3fSmrg{ 125201e04c3fSmrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 125301e04c3fSmrg struct tgsi_shader_info *info = ctx->info; 125401e04c3fSmrg struct tgsi_full_declaration decl; 125501e04c3fSmrg struct tgsi_full_immediate immed; 125601e04c3fSmrg unsigned tmpbase; 125701e04c3fSmrg unsigned i; 125801e04c3fSmrg 125901e04c3fSmrg tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; 126001e04c3fSmrg 126101e04c3fSmrg ctx->color_base = tmpbase + ctx->numtmp; 126201e04c3fSmrg 126301e04c3fSmrg /* declare immediate: */ 126401e04c3fSmrg immed = tgsi_default_full_immediate(); 126501e04c3fSmrg immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ 126601e04c3fSmrg immed.u[0].Float = 0.0; 126701e04c3fSmrg immed.u[1].Float = 1.0; 126801e04c3fSmrg immed.u[2].Float = 128.0; 126901e04c3fSmrg immed.u[3].Float = 0.0; 127001e04c3fSmrg tctx->emit_immediate(tctx, &immed); 127101e04c3fSmrg 127201e04c3fSmrg ctx->imm.Register.File = TGSI_FILE_IMMEDIATE; 127301e04c3fSmrg ctx->imm.Register.Index = info->immediate_count; 127401e04c3fSmrg ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X; 127501e04c3fSmrg ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y; 127601e04c3fSmrg ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z; 127701e04c3fSmrg ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W; 127801e04c3fSmrg 127901e04c3fSmrg /* declare temp regs: */ 128001e04c3fSmrg for (i = 0; i < ctx->numtmp; i++) { 128101e04c3fSmrg decl = tgsi_default_full_declaration(); 128201e04c3fSmrg decl.Declaration.File = TGSI_FILE_TEMPORARY; 128301e04c3fSmrg decl.Range.First = decl.Range.Last = tmpbase + i; 128401e04c3fSmrg tctx->emit_declaration(tctx, &decl); 128501e04c3fSmrg 128601e04c3fSmrg ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; 128701e04c3fSmrg ctx->tmp[i].src.Register.Index = tmpbase + i; 128801e04c3fSmrg ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; 128901e04c3fSmrg ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; 129001e04c3fSmrg ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; 129101e04c3fSmrg ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; 129201e04c3fSmrg 129301e04c3fSmrg ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; 129401e04c3fSmrg ctx->tmp[i].dst.Register.Index = tmpbase + i; 129501e04c3fSmrg ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; 129601e04c3fSmrg } 129701e04c3fSmrg 129801e04c3fSmrg if (ctx->two_side_colors) 129901e04c3fSmrg emit_twoside(tctx); 130001e04c3fSmrg} 130101e04c3fSmrg 130201e04c3fSmrgstatic void 130301e04c3fSmrgrename_color_inputs(struct tgsi_lowering_context *ctx, 130401e04c3fSmrg struct tgsi_full_instruction *inst) 130501e04c3fSmrg{ 130601e04c3fSmrg unsigned i, j; 130701e04c3fSmrg for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 130801e04c3fSmrg struct tgsi_src_register *src = &inst->Src[i].Register; 130901e04c3fSmrg if (src->File == TGSI_FILE_INPUT) { 131001e04c3fSmrg for (j = 0; j < ctx->two_side_colors; j++) { 131101e04c3fSmrg if (src->Index == (int)ctx->two_side_idx[j]) { 131201e04c3fSmrg src->File = TGSI_FILE_TEMPORARY; 131301e04c3fSmrg src->Index = ctx->color_base + j; 131401e04c3fSmrg break; 131501e04c3fSmrg } 131601e04c3fSmrg } 131701e04c3fSmrg } 131801e04c3fSmrg } 131901e04c3fSmrg 132001e04c3fSmrg} 132101e04c3fSmrg 132201e04c3fSmrgstatic void 132301e04c3fSmrgtransform_instr(struct tgsi_transform_context *tctx, 132401e04c3fSmrg struct tgsi_full_instruction *inst) 132501e04c3fSmrg{ 132601e04c3fSmrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 132701e04c3fSmrg 132801e04c3fSmrg if (!ctx->emitted_decls) { 132901e04c3fSmrg emit_decls(tctx); 133001e04c3fSmrg ctx->emitted_decls = 1; 133101e04c3fSmrg } 133201e04c3fSmrg 133301e04c3fSmrg /* if emulating two-sided-color, we need to re-write some 133401e04c3fSmrg * src registers: 133501e04c3fSmrg */ 133601e04c3fSmrg if (ctx->two_side_colors) 133701e04c3fSmrg rename_color_inputs(ctx, inst); 133801e04c3fSmrg 133901e04c3fSmrg switch (inst->Instruction.Opcode) { 134001e04c3fSmrg case TGSI_OPCODE_DST: 134101e04c3fSmrg if (!ctx->config->lower_DST) 134201e04c3fSmrg goto skip; 134301e04c3fSmrg transform_dst(tctx, inst); 134401e04c3fSmrg break; 134501e04c3fSmrg case TGSI_OPCODE_LRP: 134601e04c3fSmrg if (!ctx->config->lower_LRP) 134701e04c3fSmrg goto skip; 134801e04c3fSmrg transform_lrp(tctx, inst); 134901e04c3fSmrg break; 135001e04c3fSmrg case TGSI_OPCODE_FRC: 135101e04c3fSmrg if (!ctx->config->lower_FRC) 135201e04c3fSmrg goto skip; 135301e04c3fSmrg transform_frc(tctx, inst); 135401e04c3fSmrg break; 135501e04c3fSmrg case TGSI_OPCODE_POW: 135601e04c3fSmrg if (!ctx->config->lower_POW) 135701e04c3fSmrg goto skip; 135801e04c3fSmrg transform_pow(tctx, inst); 135901e04c3fSmrg break; 136001e04c3fSmrg case TGSI_OPCODE_LIT: 136101e04c3fSmrg if (!ctx->config->lower_LIT) 136201e04c3fSmrg goto skip; 136301e04c3fSmrg transform_lit(tctx, inst); 136401e04c3fSmrg break; 136501e04c3fSmrg case TGSI_OPCODE_EXP: 136601e04c3fSmrg if (!ctx->config->lower_EXP) 136701e04c3fSmrg goto skip; 136801e04c3fSmrg transform_exp(tctx, inst); 136901e04c3fSmrg break; 137001e04c3fSmrg case TGSI_OPCODE_LOG: 137101e04c3fSmrg if (!ctx->config->lower_LOG) 137201e04c3fSmrg goto skip; 137301e04c3fSmrg transform_log(tctx, inst); 137401e04c3fSmrg break; 137501e04c3fSmrg case TGSI_OPCODE_DP4: 137601e04c3fSmrg if (!ctx->config->lower_DP4) 137701e04c3fSmrg goto skip; 137801e04c3fSmrg transform_dotp(tctx, inst); 137901e04c3fSmrg break; 138001e04c3fSmrg case TGSI_OPCODE_DP3: 138101e04c3fSmrg if (!ctx->config->lower_DP3) 138201e04c3fSmrg goto skip; 138301e04c3fSmrg transform_dotp(tctx, inst); 138401e04c3fSmrg break; 138501e04c3fSmrg case TGSI_OPCODE_DP2: 138601e04c3fSmrg if (!ctx->config->lower_DP2) 138701e04c3fSmrg goto skip; 138801e04c3fSmrg transform_dotp(tctx, inst); 138901e04c3fSmrg break; 139001e04c3fSmrg case TGSI_OPCODE_FLR: 139101e04c3fSmrg if (!ctx->config->lower_FLR) 139201e04c3fSmrg goto skip; 139301e04c3fSmrg transform_flr_ceil(tctx, inst); 139401e04c3fSmrg break; 139501e04c3fSmrg case TGSI_OPCODE_CEIL: 139601e04c3fSmrg if (!ctx->config->lower_CEIL) 139701e04c3fSmrg goto skip; 139801e04c3fSmrg transform_flr_ceil(tctx, inst); 139901e04c3fSmrg break; 140001e04c3fSmrg case TGSI_OPCODE_TRUNC: 140101e04c3fSmrg if (!ctx->config->lower_TRUNC) 140201e04c3fSmrg goto skip; 140301e04c3fSmrg transform_trunc(tctx, inst); 140401e04c3fSmrg break; 140501e04c3fSmrg case TGSI_OPCODE_TEX: 140601e04c3fSmrg case TGSI_OPCODE_TXP: 140701e04c3fSmrg case TGSI_OPCODE_TXB: 140801e04c3fSmrg case TGSI_OPCODE_TXB2: 140901e04c3fSmrg case TGSI_OPCODE_TXL: 141001e04c3fSmrg if (transform_samp(tctx, inst)) 141101e04c3fSmrg goto skip; 141201e04c3fSmrg break; 141301e04c3fSmrg default: 141401e04c3fSmrg skip: 141501e04c3fSmrg tctx->emit_instruction(tctx, inst); 141601e04c3fSmrg break; 141701e04c3fSmrg } 141801e04c3fSmrg} 141901e04c3fSmrg 142001e04c3fSmrg/* returns NULL if no lowering required, else returns the new 142101e04c3fSmrg * tokens (which caller is required to free()). In either case 142201e04c3fSmrg * returns the current info. 142301e04c3fSmrg */ 142401e04c3fSmrgconst struct tgsi_token * 142501e04c3fSmrgtgsi_transform_lowering(const struct tgsi_lowering_config *config, 142601e04c3fSmrg const struct tgsi_token *tokens, 142701e04c3fSmrg struct tgsi_shader_info *info) 142801e04c3fSmrg{ 142901e04c3fSmrg struct tgsi_lowering_context ctx; 143001e04c3fSmrg struct tgsi_token *newtoks; 143101e04c3fSmrg int newlen, numtmp; 143201e04c3fSmrg 143301e04c3fSmrg /* sanity check in case limit is ever increased: */ 143401e04c3fSmrg STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS); 143501e04c3fSmrg 143601e04c3fSmrg /* sanity check the lowering */ 143701e04c3fSmrg assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL))); 143801e04c3fSmrg assert(!(config->lower_FRC && config->lower_TRUNC)); 143901e04c3fSmrg 144001e04c3fSmrg memset(&ctx, 0, sizeof(ctx)); 144101e04c3fSmrg ctx.base.transform_instruction = transform_instr; 144201e04c3fSmrg ctx.info = info; 144301e04c3fSmrg ctx.config = config; 144401e04c3fSmrg 144501e04c3fSmrg tgsi_scan_shader(tokens, info); 144601e04c3fSmrg 144701e04c3fSmrg /* if we are adding fragment shader support to emulate two-sided 144801e04c3fSmrg * color, then figure out the number of additional inputs we need 144901e04c3fSmrg * to create for BCOLOR's.. 145001e04c3fSmrg */ 145101e04c3fSmrg if ((info->processor == PIPE_SHADER_FRAGMENT) && 145201e04c3fSmrg config->color_two_side) { 145301e04c3fSmrg int i; 145401e04c3fSmrg ctx.face_idx = -1; 145501e04c3fSmrg for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) { 145601e04c3fSmrg if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR) 145701e04c3fSmrg ctx.two_side_idx[ctx.two_side_colors++] = i; 145801e04c3fSmrg if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE) 145901e04c3fSmrg ctx.face_idx = i; 146001e04c3fSmrg } 146101e04c3fSmrg } 146201e04c3fSmrg 146301e04c3fSmrg ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t; 146401e04c3fSmrg 146501e04c3fSmrg#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0) 146601e04c3fSmrg /* if there are no instructions to lower, then we are done: */ 146701e04c3fSmrg if (!(OPCS(DST) || 146801e04c3fSmrg OPCS(LRP) || 146901e04c3fSmrg OPCS(FRC) || 147001e04c3fSmrg OPCS(POW) || 147101e04c3fSmrg OPCS(LIT) || 147201e04c3fSmrg OPCS(EXP) || 147301e04c3fSmrg OPCS(LOG) || 147401e04c3fSmrg OPCS(DP4) || 147501e04c3fSmrg OPCS(DP3) || 147601e04c3fSmrg OPCS(DP2) || 147701e04c3fSmrg OPCS(FLR) || 147801e04c3fSmrg OPCS(CEIL) || 147901e04c3fSmrg OPCS(TRUNC) || 148001e04c3fSmrg OPCS(TXP) || 148101e04c3fSmrg ctx.two_side_colors || 148201e04c3fSmrg ctx.saturate)) 148301e04c3fSmrg return NULL; 148401e04c3fSmrg 148501e04c3fSmrg#if 0 /* debug */ 148601e04c3fSmrg _debug_printf("BEFORE:"); 148701e04c3fSmrg tgsi_dump(tokens, 0); 148801e04c3fSmrg#endif 148901e04c3fSmrg 149001e04c3fSmrg numtmp = 0; 149101e04c3fSmrg newlen = tgsi_num_tokens(tokens); 149201e04c3fSmrg if (OPCS(DST)) { 149301e04c3fSmrg newlen += DST_GROW * OPCS(DST); 149401e04c3fSmrg numtmp = MAX2(numtmp, DST_TMP); 149501e04c3fSmrg } 149601e04c3fSmrg if (OPCS(LRP)) { 149701e04c3fSmrg newlen += LRP_GROW * OPCS(LRP); 149801e04c3fSmrg numtmp = MAX2(numtmp, LRP_TMP); 149901e04c3fSmrg } 150001e04c3fSmrg if (OPCS(FRC)) { 150101e04c3fSmrg newlen += FRC_GROW * OPCS(FRC); 150201e04c3fSmrg numtmp = MAX2(numtmp, FRC_TMP); 150301e04c3fSmrg } 150401e04c3fSmrg if (OPCS(POW)) { 150501e04c3fSmrg newlen += POW_GROW * OPCS(POW); 150601e04c3fSmrg numtmp = MAX2(numtmp, POW_TMP); 150701e04c3fSmrg } 150801e04c3fSmrg if (OPCS(LIT)) { 150901e04c3fSmrg newlen += LIT_GROW * OPCS(LIT); 151001e04c3fSmrg numtmp = MAX2(numtmp, LIT_TMP); 151101e04c3fSmrg } 151201e04c3fSmrg if (OPCS(EXP)) { 151301e04c3fSmrg newlen += EXP_GROW * OPCS(EXP); 151401e04c3fSmrg numtmp = MAX2(numtmp, EXP_TMP); 151501e04c3fSmrg } 151601e04c3fSmrg if (OPCS(LOG)) { 151701e04c3fSmrg newlen += LOG_GROW * OPCS(LOG); 151801e04c3fSmrg numtmp = MAX2(numtmp, LOG_TMP); 151901e04c3fSmrg } 152001e04c3fSmrg if (OPCS(DP4)) { 152101e04c3fSmrg newlen += DP4_GROW * OPCS(DP4); 152201e04c3fSmrg numtmp = MAX2(numtmp, DOTP_TMP); 152301e04c3fSmrg } 152401e04c3fSmrg if (OPCS(DP3)) { 152501e04c3fSmrg newlen += DP3_GROW * OPCS(DP3); 152601e04c3fSmrg numtmp = MAX2(numtmp, DOTP_TMP); 152701e04c3fSmrg } 152801e04c3fSmrg if (OPCS(DP2)) { 152901e04c3fSmrg newlen += DP2_GROW * OPCS(DP2); 153001e04c3fSmrg numtmp = MAX2(numtmp, DOTP_TMP); 153101e04c3fSmrg } 153201e04c3fSmrg if (OPCS(FLR)) { 153301e04c3fSmrg newlen += FLR_GROW * OPCS(FLR); 153401e04c3fSmrg numtmp = MAX2(numtmp, FLR_TMP); 153501e04c3fSmrg } 153601e04c3fSmrg if (OPCS(CEIL)) { 153701e04c3fSmrg newlen += CEIL_GROW * OPCS(CEIL); 153801e04c3fSmrg numtmp = MAX2(numtmp, CEIL_TMP); 153901e04c3fSmrg } 154001e04c3fSmrg if (OPCS(TRUNC)) { 154101e04c3fSmrg newlen += TRUNC_GROW * OPCS(TRUNC); 154201e04c3fSmrg numtmp = MAX2(numtmp, TRUNC_TMP); 154301e04c3fSmrg } 154401e04c3fSmrg if (ctx.saturate || config->lower_TXP) { 154501e04c3fSmrg int n = 0; 154601e04c3fSmrg 154701e04c3fSmrg if (ctx.saturate) { 154801e04c3fSmrg n = info->opcode_count[TGSI_OPCODE_TEX] + 154901e04c3fSmrg info->opcode_count[TGSI_OPCODE_TXP] + 155001e04c3fSmrg info->opcode_count[TGSI_OPCODE_TXB] + 155101e04c3fSmrg info->opcode_count[TGSI_OPCODE_TXB2] + 155201e04c3fSmrg info->opcode_count[TGSI_OPCODE_TXL]; 155301e04c3fSmrg } else if (config->lower_TXP) { 155401e04c3fSmrg n = info->opcode_count[TGSI_OPCODE_TXP]; 155501e04c3fSmrg } 155601e04c3fSmrg 155701e04c3fSmrg newlen += SAMP_GROW * n; 155801e04c3fSmrg numtmp = MAX2(numtmp, SAMP_TMP); 155901e04c3fSmrg } 156001e04c3fSmrg 156101e04c3fSmrg /* specifically don't include two_side_colors temps in the count: */ 156201e04c3fSmrg ctx.numtmp = numtmp; 156301e04c3fSmrg 156401e04c3fSmrg if (ctx.two_side_colors) { 156501e04c3fSmrg newlen += TWOSIDE_GROW(ctx.two_side_colors); 156601e04c3fSmrg /* note: we permanently consume temp regs, re-writing references 156701e04c3fSmrg * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP 156801e04c3fSmrg * instruction that selects which varying to use): 156901e04c3fSmrg */ 157001e04c3fSmrg numtmp += ctx.two_side_colors; 157101e04c3fSmrg } 157201e04c3fSmrg 157301e04c3fSmrg newlen += 2 * numtmp; 157401e04c3fSmrg newlen += 5; /* immediate */ 157501e04c3fSmrg 157601e04c3fSmrg newtoks = tgsi_alloc_tokens(newlen); 157701e04c3fSmrg if (!newtoks) 157801e04c3fSmrg return NULL; 157901e04c3fSmrg 158001e04c3fSmrg tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); 158101e04c3fSmrg 158201e04c3fSmrg tgsi_scan_shader(newtoks, info); 158301e04c3fSmrg 158401e04c3fSmrg#if 0 /* debug */ 158501e04c3fSmrg _debug_printf("AFTER:"); 158601e04c3fSmrg tgsi_dump(newtoks, 0); 158701e04c3fSmrg#endif 158801e04c3fSmrg 158901e04c3fSmrg return newtoks; 159001e04c3fSmrg} 1591