1b8e80941Smrg/* 2b8e80941Smrg * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21b8e80941Smrg * SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * Authors: 24b8e80941Smrg * Rob Clark <robclark@freedesktop.org> 25b8e80941Smrg */ 26b8e80941Smrg 27b8e80941Smrg#include "tgsi/tgsi_transform.h" 28b8e80941Smrg#include "tgsi/tgsi_scan.h" 29b8e80941Smrg#include "tgsi/tgsi_dump.h" 30b8e80941Smrg 31b8e80941Smrg#include "util/u_debug.h" 32b8e80941Smrg#include "util/u_math.h" 33b8e80941Smrg 34b8e80941Smrg#include "tgsi_lowering.h" 35b8e80941Smrg 36b8e80941Smrgstruct tgsi_lowering_context { 37b8e80941Smrg struct tgsi_transform_context base; 38b8e80941Smrg const struct tgsi_lowering_config *config; 39b8e80941Smrg struct tgsi_shader_info *info; 40b8e80941Smrg unsigned two_side_colors; 41b8e80941Smrg unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS]; 42b8e80941Smrg unsigned color_base; /* base register for chosen COLOR/BCOLOR's */ 43b8e80941Smrg int face_idx; 44b8e80941Smrg unsigned numtmp; 45b8e80941Smrg struct { 46b8e80941Smrg struct tgsi_full_src_register src; 47b8e80941Smrg struct tgsi_full_dst_register dst; 48b8e80941Smrg } tmp[2]; 49b8e80941Smrg#define A 0 50b8e80941Smrg#define B 1 51b8e80941Smrg struct tgsi_full_src_register imm; 52b8e80941Smrg int emitted_decls; 53b8e80941Smrg unsigned saturate; 54b8e80941Smrg}; 55b8e80941Smrg 56b8e80941Smrgstatic inline struct tgsi_lowering_context * 57b8e80941Smrgtgsi_lowering_context(struct tgsi_transform_context *tctx) 58b8e80941Smrg{ 59b8e80941Smrg return (struct tgsi_lowering_context *)tctx; 60b8e80941Smrg} 61b8e80941Smrg 62b8e80941Smrg/* 63b8e80941Smrg * Utility helpers: 64b8e80941Smrg */ 65b8e80941Smrg 66b8e80941Smrgstatic void 67b8e80941Smrgreg_dst(struct tgsi_full_dst_register *dst, 68b8e80941Smrg const struct tgsi_full_dst_register *orig_dst, unsigned wrmask) 69b8e80941Smrg{ 70b8e80941Smrg *dst = *orig_dst; 71b8e80941Smrg dst->Register.WriteMask &= wrmask; 72b8e80941Smrg assert(dst->Register.WriteMask); 73b8e80941Smrg} 74b8e80941Smrg 75b8e80941Smrgstatic inline void 76b8e80941Smrgget_swiz(unsigned *swiz, const struct tgsi_src_register *src) 77b8e80941Smrg{ 78b8e80941Smrg swiz[0] = src->SwizzleX; 79b8e80941Smrg swiz[1] = src->SwizzleY; 80b8e80941Smrg swiz[2] = src->SwizzleZ; 81b8e80941Smrg swiz[3] = src->SwizzleW; 82b8e80941Smrg} 83b8e80941Smrg 84b8e80941Smrgstatic void 85b8e80941Smrgreg_src(struct tgsi_full_src_register *src, 86b8e80941Smrg const struct tgsi_full_src_register *orig_src, 87b8e80941Smrg unsigned sx, unsigned sy, unsigned sz, unsigned sw) 88b8e80941Smrg{ 89b8e80941Smrg unsigned swiz[4]; 90b8e80941Smrg get_swiz(swiz, &orig_src->Register); 91b8e80941Smrg *src = *orig_src; 92b8e80941Smrg src->Register.SwizzleX = swiz[sx]; 93b8e80941Smrg src->Register.SwizzleY = swiz[sy]; 94b8e80941Smrg src->Register.SwizzleZ = swiz[sz]; 95b8e80941Smrg src->Register.SwizzleW = swiz[sw]; 96b8e80941Smrg} 97b8e80941Smrg 98b8e80941Smrg#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */ 99b8e80941Smrg#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \ 100b8e80941Smrg TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w 101b8e80941Smrg 102b8e80941Smrg/* 103b8e80941Smrg * if (dst.x aliases src.x) { 104b8e80941Smrg * MOV tmpA.x, src.x 105b8e80941Smrg * src = tmpA 106b8e80941Smrg * } 107b8e80941Smrg * COS dst.x, src.x 108b8e80941Smrg * SIN dst.y, src.x 109b8e80941Smrg * MOV dst.zw, imm{0.0, 1.0} 110b8e80941Smrg */ 111b8e80941Smrgstatic bool 112b8e80941Smrgaliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask, 113b8e80941Smrg const struct tgsi_full_src_register *src, unsigned src_mask) 114b8e80941Smrg{ 115b8e80941Smrg if ((dst->Register.File == src->Register.File) && 116b8e80941Smrg (dst->Register.Index == src->Register.Index)) { 117b8e80941Smrg unsigned i, actual_mask = 0; 118b8e80941Smrg unsigned swiz[4]; 119b8e80941Smrg get_swiz(swiz, &src->Register); 120b8e80941Smrg for (i = 0; i < 4; i++) 121b8e80941Smrg if (src_mask & (1 << i)) 122b8e80941Smrg actual_mask |= (1 << swiz[i]); 123b8e80941Smrg if (actual_mask & dst_mask) 124b8e80941Smrg return true; 125b8e80941Smrg } 126b8e80941Smrg return false; 127b8e80941Smrg} 128b8e80941Smrg 129b8e80941Smrgstatic void 130b8e80941Smrgcreate_mov(struct tgsi_transform_context *tctx, 131b8e80941Smrg const struct tgsi_full_dst_register *dst, 132b8e80941Smrg const struct tgsi_full_src_register *src, 133b8e80941Smrg unsigned mask, unsigned saturate) 134b8e80941Smrg{ 135b8e80941Smrg struct tgsi_full_instruction new_inst; 136b8e80941Smrg 137b8e80941Smrg new_inst = tgsi_default_full_instruction(); 138b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 139b8e80941Smrg new_inst.Instruction.Saturate = saturate; 140b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 141b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, mask); 142b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 143b8e80941Smrg reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 144b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 145b8e80941Smrg} 146b8e80941Smrg 147b8e80941Smrg/* to help calculate # of tgsi tokens for a lowering.. we assume 148b8e80941Smrg * the worst case, ie. removed instructions don't have ADDR[] or 149b8e80941Smrg * anything which increases the # of tokens per src/dst and the 150b8e80941Smrg * inserted instructions do. 151b8e80941Smrg * 152b8e80941Smrg * OINST() - old instruction 153b8e80941Smrg * 1 : instruction itself 154b8e80941Smrg * 1 : dst 155b8e80941Smrg * 1 * nargs : srcN 156b8e80941Smrg * 157b8e80941Smrg * NINST() - new instruction 158b8e80941Smrg * 1 : instruction itself 159b8e80941Smrg * 2 : dst 160b8e80941Smrg * 2 * nargs : srcN 161b8e80941Smrg */ 162b8e80941Smrg 163b8e80941Smrg#define OINST(nargs) (1 + 1 + 1 * (nargs)) 164b8e80941Smrg#define NINST(nargs) (1 + 2 + 2 * (nargs)) 165b8e80941Smrg 166b8e80941Smrg/* 167b8e80941Smrg * Lowering Translators: 168b8e80941Smrg */ 169b8e80941Smrg 170b8e80941Smrg/* DST - Distance Vector 171b8e80941Smrg * dst.x = 1.0 172b8e80941Smrg * dst.y = src0.y \times src1.y 173b8e80941Smrg * dst.z = src0.z 174b8e80941Smrg * dst.w = src1.w 175b8e80941Smrg * 176b8e80941Smrg * ; note: could be more clever and use just a single temp 177b8e80941Smrg * ; if I was clever enough to re-write the swizzles. 178b8e80941Smrg * ; needs: 2 tmp, imm{1.0} 179b8e80941Smrg * if (dst.y aliases src0.z) { 180b8e80941Smrg * MOV tmpA.yz, src0.yz 181b8e80941Smrg * src0 = tmpA 182b8e80941Smrg * } 183b8e80941Smrg * if (dst.yz aliases src1.w) { 184b8e80941Smrg * MOV tmpB.yw, src1.yw 185b8e80941Smrg * src1 = tmpB 186b8e80941Smrg * } 187b8e80941Smrg * MUL dst.y, src0.y, src1.y 188b8e80941Smrg * MOV dst.z, src0.z 189b8e80941Smrg * MOV dst.w, src1.w 190b8e80941Smrg * MOV dst.x, imm{1.0} 191b8e80941Smrg */ 192b8e80941Smrg#define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \ 193b8e80941Smrg NINST(1) + NINST(1) - OINST(2)) 194b8e80941Smrg#define DST_TMP 2 195b8e80941Smrgstatic void 196b8e80941Smrgtransform_dst(struct tgsi_transform_context *tctx, 197b8e80941Smrg struct tgsi_full_instruction *inst) 198b8e80941Smrg{ 199b8e80941Smrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 200b8e80941Smrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 201b8e80941Smrg struct tgsi_full_src_register *src0 = &inst->Src[0]; 202b8e80941Smrg struct tgsi_full_src_register *src1 = &inst->Src[1]; 203b8e80941Smrg struct tgsi_full_instruction new_inst; 204b8e80941Smrg 205b8e80941Smrg if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) { 206b8e80941Smrg create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0); 207b8e80941Smrg src0 = &ctx->tmp[A].src; 208b8e80941Smrg } 209b8e80941Smrg 210b8e80941Smrg if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) { 211b8e80941Smrg create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0); 212b8e80941Smrg src1 = &ctx->tmp[B].src; 213b8e80941Smrg } 214b8e80941Smrg 215b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 216b8e80941Smrg /* MUL dst.y, src0.y, src1.y */ 217b8e80941Smrg new_inst = tgsi_default_full_instruction(); 218b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 219b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 220b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 221b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 222b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _)); 223b8e80941Smrg reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _)); 224b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 225b8e80941Smrg } 226b8e80941Smrg 227b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 228b8e80941Smrg /* MOV dst.z, src0.z */ 229b8e80941Smrg new_inst = tgsi_default_full_instruction(); 230b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 231b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 232b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z); 233b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 234b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _)); 235b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 236b8e80941Smrg } 237b8e80941Smrg 238b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 239b8e80941Smrg /* MOV dst.w, src1.w */ 240b8e80941Smrg new_inst = tgsi_default_full_instruction(); 241b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 242b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 243b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 244b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 245b8e80941Smrg reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W)); 246b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 247b8e80941Smrg } 248b8e80941Smrg 249b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 250b8e80941Smrg /* MOV dst.x, imm{1.0} */ 251b8e80941Smrg new_inst = tgsi_default_full_instruction(); 252b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 253b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 254b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); 255b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 256b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _)); 257b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 258b8e80941Smrg } 259b8e80941Smrg} 260b8e80941Smrg 261b8e80941Smrg/* LRP - Linear Interpolate 262b8e80941Smrg * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x 263b8e80941Smrg * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y 264b8e80941Smrg * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z 265b8e80941Smrg * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w 266b8e80941Smrg * 267b8e80941Smrg * This becomes: src0 \times src1 + src2 - src0 \times src2, which 268b8e80941Smrg * can then become: src0 \times src1 - (src0 \times src2 - src2) 269b8e80941Smrg * 270b8e80941Smrg * ; needs: 1 tmp 271b8e80941Smrg * MAD tmpA, src0, src2, -src2 272b8e80941Smrg * MAD dst, src0, src1, -tmpA 273b8e80941Smrg */ 274b8e80941Smrg#define LRP_GROW (NINST(3) + NINST(3) - OINST(3)) 275b8e80941Smrg#define LRP_TMP 1 276b8e80941Smrgstatic void 277b8e80941Smrgtransform_lrp(struct tgsi_transform_context *tctx, 278b8e80941Smrg struct tgsi_full_instruction *inst) 279b8e80941Smrg{ 280b8e80941Smrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 281b8e80941Smrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 282b8e80941Smrg struct tgsi_full_src_register *src0 = &inst->Src[0]; 283b8e80941Smrg struct tgsi_full_src_register *src1 = &inst->Src[1]; 284b8e80941Smrg struct tgsi_full_src_register *src2 = &inst->Src[2]; 285b8e80941Smrg struct tgsi_full_instruction new_inst; 286b8e80941Smrg 287b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 288b8e80941Smrg /* MAD tmpA, src0, src2, -src2 */ 289b8e80941Smrg new_inst = tgsi_default_full_instruction(); 290b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 291b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 292b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 293b8e80941Smrg new_inst.Instruction.NumSrcRegs = 3; 294b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 295b8e80941Smrg reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W)); 296b8e80941Smrg reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W)); 297b8e80941Smrg new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate; 298b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 299b8e80941Smrg 300b8e80941Smrg /* MAD dst, src0, src1, -tmpA */ 301b8e80941Smrg new_inst = tgsi_default_full_instruction(); 302b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 303b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 304b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 305b8e80941Smrg new_inst.Instruction.NumSrcRegs = 3; 306b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 307b8e80941Smrg reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W)); 308b8e80941Smrg reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 309b8e80941Smrg new_inst.Src[2].Register.Negate = true; 310b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 311b8e80941Smrg } 312b8e80941Smrg} 313b8e80941Smrg 314b8e80941Smrg/* FRC - Fraction 315b8e80941Smrg * dst.x = src.x - \lfloor src.x\rfloor 316b8e80941Smrg * dst.y = src.y - \lfloor src.y\rfloor 317b8e80941Smrg * dst.z = src.z - \lfloor src.z\rfloor 318b8e80941Smrg * dst.w = src.w - \lfloor src.w\rfloor 319b8e80941Smrg * 320b8e80941Smrg * ; needs: 1 tmp 321b8e80941Smrg * FLR tmpA, src 322b8e80941Smrg * SUB dst, src, tmpA 323b8e80941Smrg */ 324b8e80941Smrg#define FRC_GROW (NINST(1) + NINST(2) - OINST(1)) 325b8e80941Smrg#define FRC_TMP 1 326b8e80941Smrgstatic void 327b8e80941Smrgtransform_frc(struct tgsi_transform_context *tctx, 328b8e80941Smrg struct tgsi_full_instruction *inst) 329b8e80941Smrg{ 330b8e80941Smrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 331b8e80941Smrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 332b8e80941Smrg struct tgsi_full_src_register *src = &inst->Src[0]; 333b8e80941Smrg struct tgsi_full_instruction new_inst; 334b8e80941Smrg 335b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 336b8e80941Smrg /* FLR tmpA, src */ 337b8e80941Smrg new_inst = tgsi_default_full_instruction(); 338b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 339b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 340b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 341b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 342b8e80941Smrg reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 343b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 344b8e80941Smrg 345b8e80941Smrg /* SUB dst, src, tmpA */ 346b8e80941Smrg new_inst = tgsi_default_full_instruction(); 347b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 348b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 349b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 350b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 351b8e80941Smrg reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 352b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 353b8e80941Smrg new_inst.Src[1].Register.Negate = 1; 354b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 355b8e80941Smrg } 356b8e80941Smrg} 357b8e80941Smrg 358b8e80941Smrg/* POW - Power 359b8e80941Smrg * dst.x = src0.x^{src1.x} 360b8e80941Smrg * dst.y = src0.x^{src1.x} 361b8e80941Smrg * dst.z = src0.x^{src1.x} 362b8e80941Smrg * dst.w = src0.x^{src1.x} 363b8e80941Smrg * 364b8e80941Smrg * ; needs: 1 tmp 365b8e80941Smrg * LG2 tmpA.x, src0.x 366b8e80941Smrg * MUL tmpA.x, src1.x, tmpA.x 367b8e80941Smrg * EX2 dst, tmpA.x 368b8e80941Smrg */ 369b8e80941Smrg#define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2)) 370b8e80941Smrg#define POW_TMP 1 371b8e80941Smrgstatic void 372b8e80941Smrgtransform_pow(struct tgsi_transform_context *tctx, 373b8e80941Smrg struct tgsi_full_instruction *inst) 374b8e80941Smrg{ 375b8e80941Smrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 376b8e80941Smrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 377b8e80941Smrg struct tgsi_full_src_register *src0 = &inst->Src[0]; 378b8e80941Smrg struct tgsi_full_src_register *src1 = &inst->Src[1]; 379b8e80941Smrg struct tgsi_full_instruction new_inst; 380b8e80941Smrg 381b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 382b8e80941Smrg /* LG2 tmpA.x, src0.x */ 383b8e80941Smrg new_inst = tgsi_default_full_instruction(); 384b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 385b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 386b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 387b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 388b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _)); 389b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 390b8e80941Smrg 391b8e80941Smrg /* MUL tmpA.x, src1.x, tmpA.x */ 392b8e80941Smrg new_inst = tgsi_default_full_instruction(); 393b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 394b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 395b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 396b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 397b8e80941Smrg reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _)); 398b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 399b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 400b8e80941Smrg 401b8e80941Smrg /* EX2 dst, tmpA.x */ 402b8e80941Smrg new_inst = tgsi_default_full_instruction(); 403b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 404b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 405b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 406b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 407b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 408b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 409b8e80941Smrg } 410b8e80941Smrg} 411b8e80941Smrg 412b8e80941Smrg/* LIT - Light Coefficients 413b8e80941Smrg * dst.x = 1.0 414b8e80941Smrg * dst.y = max(src.x, 0.0) 415b8e80941Smrg * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0 416b8e80941Smrg * dst.w = 1.0 417b8e80941Smrg * 418b8e80941Smrg * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0} 419b8e80941Smrg * MAX tmpA.xy, src.xy, imm{0.0} 420b8e80941Smrg * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} 421b8e80941Smrg * LG2 tmpA.y, tmpA.y 422b8e80941Smrg * MUL tmpA.y, tmpA.z, tmpA.y 423b8e80941Smrg * EX2 tmpA.y, tmpA.y 424b8e80941Smrg * CMP tmpA.y, -src.x, tmpA.y, imm{0.0} 425b8e80941Smrg * MOV dst.yz, tmpA.xy 426b8e80941Smrg * MOV dst.xw, imm{1.0} 427b8e80941Smrg */ 428b8e80941Smrg#define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \ 429b8e80941Smrg NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1)) 430b8e80941Smrg#define LIT_TMP 1 431b8e80941Smrgstatic void 432b8e80941Smrgtransform_lit(struct tgsi_transform_context *tctx, 433b8e80941Smrg struct tgsi_full_instruction *inst) 434b8e80941Smrg{ 435b8e80941Smrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 436b8e80941Smrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 437b8e80941Smrg struct tgsi_full_src_register *src = &inst->Src[0]; 438b8e80941Smrg struct tgsi_full_instruction new_inst; 439b8e80941Smrg 440b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) { 441b8e80941Smrg /* MAX tmpA.xy, src.xy, imm{0.0} */ 442b8e80941Smrg new_inst = tgsi_default_full_instruction(); 443b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; 444b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 445b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY); 446b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 447b8e80941Smrg reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _)); 448b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _)); 449b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 450b8e80941Smrg 451b8e80941Smrg /* MIN tmpA.z, src.w, imm{128.0} */ 452b8e80941Smrg new_inst = tgsi_default_full_instruction(); 453b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MIN; 454b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 455b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 456b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 457b8e80941Smrg reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _)); 458b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); 459b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 460b8e80941Smrg 461b8e80941Smrg /* MAX tmpA.z, tmpA.z, -imm{128.0} */ 462b8e80941Smrg new_inst = tgsi_default_full_instruction(); 463b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; 464b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 465b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 466b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 467b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _)); 468b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); 469b8e80941Smrg new_inst.Src[1].Register.Negate = true; 470b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 471b8e80941Smrg 472b8e80941Smrg /* LG2 tmpA.y, tmpA.y */ 473b8e80941Smrg new_inst = tgsi_default_full_instruction(); 474b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 475b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 476b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 477b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 478b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 479b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 480b8e80941Smrg 481b8e80941Smrg /* MUL tmpA.y, tmpA.z, tmpA.y */ 482b8e80941Smrg new_inst = tgsi_default_full_instruction(); 483b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 484b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 485b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 486b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 487b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _)); 488b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 489b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 490b8e80941Smrg 491b8e80941Smrg /* EX2 tmpA.y, tmpA.y */ 492b8e80941Smrg new_inst = tgsi_default_full_instruction(); 493b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 494b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 495b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 496b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 497b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 498b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 499b8e80941Smrg 500b8e80941Smrg /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */ 501b8e80941Smrg new_inst = tgsi_default_full_instruction(); 502b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 503b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 504b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 505b8e80941Smrg new_inst.Instruction.NumSrcRegs = 3; 506b8e80941Smrg reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 507b8e80941Smrg new_inst.Src[0].Register.Negate = true; 508b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 509b8e80941Smrg reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _)); 510b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 511b8e80941Smrg 512b8e80941Smrg /* MOV dst.yz, tmpA.xy */ 513b8e80941Smrg new_inst = tgsi_default_full_instruction(); 514b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 515b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 516b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ); 517b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 518b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _)); 519b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 520b8e80941Smrg } 521b8e80941Smrg 522b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) { 523b8e80941Smrg /* MOV dst.xw, imm{1.0} */ 524b8e80941Smrg new_inst = tgsi_default_full_instruction(); 525b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 526b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 527b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW); 528b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 529b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y)); 530b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 531b8e80941Smrg } 532b8e80941Smrg} 533b8e80941Smrg 534b8e80941Smrg/* EXP - Approximate Exponential Base 2 535b8e80941Smrg * dst.x = 2^{\lfloor src.x\rfloor} 536b8e80941Smrg * dst.y = src.x - \lfloor src.x\rfloor 537b8e80941Smrg * dst.z = 2^{src.x} 538b8e80941Smrg * dst.w = 1.0 539b8e80941Smrg * 540b8e80941Smrg * ; needs: 1 tmp, imm{1.0} 541b8e80941Smrg * if (lowering FLR) { 542b8e80941Smrg * FRC tmpA.x, src.x 543b8e80941Smrg * SUB tmpA.x, src.x, tmpA.x 544b8e80941Smrg * } else { 545b8e80941Smrg * FLR tmpA.x, src.x 546b8e80941Smrg * } 547b8e80941Smrg * EX2 tmpA.y, src.x 548b8e80941Smrg * SUB dst.y, src.x, tmpA.x 549b8e80941Smrg * EX2 dst.x, tmpA.x 550b8e80941Smrg * MOV dst.z, tmpA.y 551b8e80941Smrg * MOV dst.w, imm{1.0} 552b8e80941Smrg */ 553b8e80941Smrg#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \ 554b8e80941Smrg NINST(1)+ NINST(1) - OINST(1)) 555b8e80941Smrg#define EXP_TMP 1 556b8e80941Smrgstatic void 557b8e80941Smrgtransform_exp(struct tgsi_transform_context *tctx, 558b8e80941Smrg struct tgsi_full_instruction *inst) 559b8e80941Smrg{ 560b8e80941Smrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 561b8e80941Smrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 562b8e80941Smrg struct tgsi_full_src_register *src = &inst->Src[0]; 563b8e80941Smrg struct tgsi_full_instruction new_inst; 564b8e80941Smrg 565b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { 566b8e80941Smrg if (ctx->config->lower_FLR) { 567b8e80941Smrg /* FRC tmpA.x, src.x */ 568b8e80941Smrg new_inst = tgsi_default_full_instruction(); 569b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 570b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 571b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 572b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 573b8e80941Smrg reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 574b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 575b8e80941Smrg 576b8e80941Smrg /* SUB tmpA.x, src.x, tmpA.x */ 577b8e80941Smrg new_inst = tgsi_default_full_instruction(); 578b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 579b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 580b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 581b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 582b8e80941Smrg reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 583b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 584b8e80941Smrg new_inst.Src[1].Register.Negate = 1; 585b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 586b8e80941Smrg } else { 587b8e80941Smrg /* FLR tmpA.x, src.x */ 588b8e80941Smrg new_inst = tgsi_default_full_instruction(); 589b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 590b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 591b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 592b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 593b8e80941Smrg reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 594b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 595b8e80941Smrg } 596b8e80941Smrg } 597b8e80941Smrg 598b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 599b8e80941Smrg /* EX2 tmpA.y, src.x */ 600b8e80941Smrg new_inst = tgsi_default_full_instruction(); 601b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 602b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 603b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 604b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 605b8e80941Smrg reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 606b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 607b8e80941Smrg } 608b8e80941Smrg 609b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 610b8e80941Smrg /* SUB dst.y, src.x, tmpA.x */ 611b8e80941Smrg new_inst = tgsi_default_full_instruction(); 612b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 613b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 614b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 615b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 616b8e80941Smrg reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 617b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 618b8e80941Smrg new_inst.Src[1].Register.Negate = 1; 619b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 620b8e80941Smrg } 621b8e80941Smrg 622b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 623b8e80941Smrg /* EX2 dst.x, tmpA.x */ 624b8e80941Smrg new_inst = tgsi_default_full_instruction(); 625b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 626b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 627b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); 628b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 629b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 630b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 631b8e80941Smrg } 632b8e80941Smrg 633b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 634b8e80941Smrg /* MOV dst.z, tmpA.y */ 635b8e80941Smrg new_inst = tgsi_default_full_instruction(); 636b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 637b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 638b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z); 639b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 640b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _)); 641b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 642b8e80941Smrg } 643b8e80941Smrg 644b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 645b8e80941Smrg /* MOV dst.w, imm{1.0} */ 646b8e80941Smrg new_inst = tgsi_default_full_instruction(); 647b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 648b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 649b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 650b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 651b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); 652b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 653b8e80941Smrg } 654b8e80941Smrg} 655b8e80941Smrg 656b8e80941Smrg/* LOG - Approximate Logarithm Base 2 657b8e80941Smrg * dst.x = \lfloor\log_2{|src.x|}\rfloor 658b8e80941Smrg * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} 659b8e80941Smrg * dst.z = \log_2{|src.x|} 660b8e80941Smrg * dst.w = 1.0 661b8e80941Smrg * 662b8e80941Smrg * ; needs: 1 tmp, imm{1.0} 663b8e80941Smrg * LG2 tmpA.x, |src.x| 664b8e80941Smrg * if (lowering FLR) { 665b8e80941Smrg * FRC tmpA.y, tmpA.x 666b8e80941Smrg * SUB tmpA.y, tmpA.x, tmpA.y 667b8e80941Smrg * } else { 668b8e80941Smrg * FLR tmpA.y, tmpA.x 669b8e80941Smrg * } 670b8e80941Smrg * EX2 tmpA.z, tmpA.y 671b8e80941Smrg * RCP tmpA.z, tmpA.z 672b8e80941Smrg * MUL dst.y, |src.x|, tmpA.z 673b8e80941Smrg * MOV dst.xz, tmpA.yx 674b8e80941Smrg * MOV dst.w, imm{1.0} 675b8e80941Smrg */ 676b8e80941Smrg#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \ 677b8e80941Smrg NINST(2) + NINST(1) + NINST(1) - OINST(1)) 678b8e80941Smrg#define LOG_TMP 1 679b8e80941Smrgstatic void 680b8e80941Smrgtransform_log(struct tgsi_transform_context *tctx, 681b8e80941Smrg struct tgsi_full_instruction *inst) 682b8e80941Smrg{ 683b8e80941Smrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 684b8e80941Smrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 685b8e80941Smrg struct tgsi_full_src_register *src = &inst->Src[0]; 686b8e80941Smrg struct tgsi_full_instruction new_inst; 687b8e80941Smrg 688b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) { 689b8e80941Smrg /* LG2 tmpA.x, |src.x| */ 690b8e80941Smrg new_inst = tgsi_default_full_instruction(); 691b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 692b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 693b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 694b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 695b8e80941Smrg reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 696b8e80941Smrg new_inst.Src[0].Register.Absolute = true; 697b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 698b8e80941Smrg } 699b8e80941Smrg 700b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { 701b8e80941Smrg if (ctx->config->lower_FLR) { 702b8e80941Smrg /* FRC tmpA.y, tmpA.x */ 703b8e80941Smrg new_inst = tgsi_default_full_instruction(); 704b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 705b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 706b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 707b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 708b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 709b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 710b8e80941Smrg 711b8e80941Smrg /* SUB tmpA.y, tmpA.x, tmpA.y */ 712b8e80941Smrg new_inst = tgsi_default_full_instruction(); 713b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 714b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 715b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 716b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 717b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 718b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 719b8e80941Smrg new_inst.Src[1].Register.Negate = 1; 720b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 721b8e80941Smrg } else { 722b8e80941Smrg /* FLR tmpA.y, tmpA.x */ 723b8e80941Smrg new_inst = tgsi_default_full_instruction(); 724b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 725b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 726b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 727b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 728b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 729b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 730b8e80941Smrg } 731b8e80941Smrg } 732b8e80941Smrg 733b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 734b8e80941Smrg /* EX2 tmpA.z, tmpA.y */ 735b8e80941Smrg new_inst = tgsi_default_full_instruction(); 736b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 737b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 738b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 739b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 740b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 741b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 742b8e80941Smrg 743b8e80941Smrg /* RCP tmpA.z, tmpA.z */ 744b8e80941Smrg new_inst = tgsi_default_full_instruction(); 745b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_RCP; 746b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 747b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 748b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 749b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _)); 750b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 751b8e80941Smrg 752b8e80941Smrg /* MUL dst.y, |src.x|, tmpA.z */ 753b8e80941Smrg new_inst = tgsi_default_full_instruction(); 754b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 755b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 756b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 757b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 758b8e80941Smrg reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 759b8e80941Smrg new_inst.Src[0].Register.Absolute = true; 760b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _)); 761b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 762b8e80941Smrg } 763b8e80941Smrg 764b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) { 765b8e80941Smrg /* MOV dst.xz, tmpA.yx */ 766b8e80941Smrg new_inst = tgsi_default_full_instruction(); 767b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 768b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 769b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ); 770b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 771b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _)); 772b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 773b8e80941Smrg } 774b8e80941Smrg 775b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 776b8e80941Smrg /* MOV dst.w, imm{1.0} */ 777b8e80941Smrg new_inst = tgsi_default_full_instruction(); 778b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 779b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 780b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 781b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 782b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); 783b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 784b8e80941Smrg } 785b8e80941Smrg} 786b8e80941Smrg 787b8e80941Smrg/* DP4 - 4-component Dot Product 788b8e80941Smrg * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w 789b8e80941Smrg * 790b8e80941Smrg * DP3 - 3-component Dot Product 791b8e80941Smrg * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z 792b8e80941Smrg * 793b8e80941Smrg * DP2 - 2-component Dot Product 794b8e80941Smrg * dst = src0.x \times src1.x + src0.y \times src1.y 795b8e80941Smrg * 796b8e80941Smrg * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar 797b8e80941Smrg * operations, which is what you'd prefer for a ISA that is natively 798b8e80941Smrg * scalar. Probably a native vector ISA would at least already have 799b8e80941Smrg * DP4/DP3 instructions, but perhaps there is room for an alternative 800b8e80941Smrg * translation for DP2 using vector instructions. 801b8e80941Smrg * 802b8e80941Smrg * ; needs: 1 tmp 803b8e80941Smrg * MUL tmpA.x, src0.x, src1.x 804b8e80941Smrg * MAD tmpA.x, src0.y, src1.y, tmpA.x 805b8e80941Smrg * if (DP3 || DP4) { 806b8e80941Smrg * MAD tmpA.x, src0.z, src1.z, tmpA.x 807b8e80941Smrg * if (DP4) { 808b8e80941Smrg * MAD tmpA.x, src0.w, src1.w, tmpA.x 809b8e80941Smrg * } 810b8e80941Smrg * } 811b8e80941Smrg * ; fixup last instruction to replicate into dst 812b8e80941Smrg */ 813b8e80941Smrg#define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2)) 814b8e80941Smrg#define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2)) 815b8e80941Smrg#define DP2_GROW (NINST(2) + NINST(3) - OINST(2)) 816b8e80941Smrg#define DOTP_TMP 1 817b8e80941Smrgstatic void 818b8e80941Smrgtransform_dotp(struct tgsi_transform_context *tctx, 819b8e80941Smrg struct tgsi_full_instruction *inst) 820b8e80941Smrg{ 821b8e80941Smrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 822b8e80941Smrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 823b8e80941Smrg struct tgsi_full_src_register *src0 = &inst->Src[0]; 824b8e80941Smrg struct tgsi_full_src_register *src1 = &inst->Src[1]; 825b8e80941Smrg struct tgsi_full_instruction new_inst; 826b8e80941Smrg enum tgsi_opcode opcode = inst->Instruction.Opcode; 827b8e80941Smrg 828b8e80941Smrg /* NOTE: any potential last instruction must replicate src on all 829b8e80941Smrg * components (since it could be re-written to write to final dst) 830b8e80941Smrg */ 831b8e80941Smrg 832b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 833b8e80941Smrg /* MUL tmpA.x, src0.x, src1.x */ 834b8e80941Smrg new_inst = tgsi_default_full_instruction(); 835b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 836b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 837b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 838b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 839b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _)); 840b8e80941Smrg reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _)); 841b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 842b8e80941Smrg 843b8e80941Smrg /* MAD tmpA.x, src0.y, src1.y, tmpA.x */ 844b8e80941Smrg new_inst = tgsi_default_full_instruction(); 845b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 846b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 847b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 848b8e80941Smrg new_inst.Instruction.NumSrcRegs = 3; 849b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y)); 850b8e80941Smrg reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y)); 851b8e80941Smrg reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 852b8e80941Smrg 853b8e80941Smrg if ((opcode == TGSI_OPCODE_DP3) || 854b8e80941Smrg (opcode == TGSI_OPCODE_DP4)) { 855b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 856b8e80941Smrg 857b8e80941Smrg /* MAD tmpA.x, src0.z, src1.z, tmpA.x */ 858b8e80941Smrg new_inst = tgsi_default_full_instruction(); 859b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 860b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 861b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 862b8e80941Smrg new_inst.Instruction.NumSrcRegs = 3; 863b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z)); 864b8e80941Smrg reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z)); 865b8e80941Smrg reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 866b8e80941Smrg 867b8e80941Smrg if (opcode == TGSI_OPCODE_DP4) { 868b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 869b8e80941Smrg 870b8e80941Smrg /* MAD tmpA.x, src0.w, src1.w, tmpA.x */ 871b8e80941Smrg new_inst = tgsi_default_full_instruction(); 872b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 873b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 874b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 875b8e80941Smrg new_inst.Instruction.NumSrcRegs = 3; 876b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W)); 877b8e80941Smrg reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W)); 878b8e80941Smrg reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 879b8e80941Smrg } 880b8e80941Smrg } 881b8e80941Smrg 882b8e80941Smrg /* fixup last instruction to write to dst: */ 883b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 884b8e80941Smrg 885b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 886b8e80941Smrg } 887b8e80941Smrg} 888b8e80941Smrg 889b8e80941Smrg/* FLR - floor, CEIL - ceil 890b8e80941Smrg * ; needs: 1 tmp 891b8e80941Smrg * if (CEIL) { 892b8e80941Smrg * FRC tmpA, -src 893b8e80941Smrg * ADD dst, src, tmpA 894b8e80941Smrg * } else { 895b8e80941Smrg * FRC tmpA, src 896b8e80941Smrg * SUB dst, src, tmpA 897b8e80941Smrg * } 898b8e80941Smrg */ 899b8e80941Smrg#define FLR_GROW (NINST(1) + NINST(2) - OINST(1)) 900b8e80941Smrg#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1)) 901b8e80941Smrg#define FLR_TMP 1 902b8e80941Smrg#define CEIL_TMP 1 903b8e80941Smrgstatic void 904b8e80941Smrgtransform_flr_ceil(struct tgsi_transform_context *tctx, 905b8e80941Smrg struct tgsi_full_instruction *inst) 906b8e80941Smrg{ 907b8e80941Smrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 908b8e80941Smrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 909b8e80941Smrg struct tgsi_full_src_register *src0 = &inst->Src[0]; 910b8e80941Smrg struct tgsi_full_instruction new_inst; 911b8e80941Smrg enum tgsi_opcode opcode = inst->Instruction.Opcode; 912b8e80941Smrg 913b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 914b8e80941Smrg /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */ 915b8e80941Smrg new_inst = tgsi_default_full_instruction(); 916b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 917b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 918b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 919b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 920b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 921b8e80941Smrg 922b8e80941Smrg if (opcode == TGSI_OPCODE_CEIL) 923b8e80941Smrg new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate; 924b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 925b8e80941Smrg 926b8e80941Smrg /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */ 927b8e80941Smrg new_inst = tgsi_default_full_instruction(); 928b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 929b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 930b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 931b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 932b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 933b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 934b8e80941Smrg if (opcode == TGSI_OPCODE_FLR) 935b8e80941Smrg new_inst.Src[1].Register.Negate = 1; 936b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 937b8e80941Smrg } 938b8e80941Smrg} 939b8e80941Smrg 940b8e80941Smrg/* TRUNC - truncate off fractional part 941b8e80941Smrg * dst.x = trunc(src.x) 942b8e80941Smrg * dst.y = trunc(src.y) 943b8e80941Smrg * dst.z = trunc(src.z) 944b8e80941Smrg * dst.w = trunc(src.w) 945b8e80941Smrg * 946b8e80941Smrg * ; needs: 1 tmp 947b8e80941Smrg * if (lower FLR) { 948b8e80941Smrg * FRC tmpA, |src| 949b8e80941Smrg * SUB tmpA, |src|, tmpA 950b8e80941Smrg * } else { 951b8e80941Smrg * FLR tmpA, |src| 952b8e80941Smrg * } 953b8e80941Smrg * CMP dst, src, -tmpA, tmpA 954b8e80941Smrg */ 955b8e80941Smrg#define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1)) 956b8e80941Smrg#define TRUNC_TMP 1 957b8e80941Smrgstatic void 958b8e80941Smrgtransform_trunc(struct tgsi_transform_context *tctx, 959b8e80941Smrg struct tgsi_full_instruction *inst) 960b8e80941Smrg{ 961b8e80941Smrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 962b8e80941Smrg struct tgsi_full_dst_register *dst = &inst->Dst[0]; 963b8e80941Smrg struct tgsi_full_src_register *src0 = &inst->Src[0]; 964b8e80941Smrg struct tgsi_full_instruction new_inst; 965b8e80941Smrg 966b8e80941Smrg if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 967b8e80941Smrg if (ctx->config->lower_FLR) { 968b8e80941Smrg new_inst = tgsi_default_full_instruction(); 969b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 970b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 971b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 972b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 973b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 974b8e80941Smrg new_inst.Src[0].Register.Absolute = true; 975b8e80941Smrg new_inst.Src[0].Register.Negate = false; 976b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 977b8e80941Smrg 978b8e80941Smrg new_inst = tgsi_default_full_instruction(); 979b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 980b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 981b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 982b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 983b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 984b8e80941Smrg new_inst.Src[0].Register.Absolute = true; 985b8e80941Smrg new_inst.Src[0].Register.Negate = false; 986b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 987b8e80941Smrg new_inst.Src[1].Register.Negate = 1; 988b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 989b8e80941Smrg } else { 990b8e80941Smrg new_inst = tgsi_default_full_instruction(); 991b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 992b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 993b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 994b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 995b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 996b8e80941Smrg new_inst.Src[0].Register.Absolute = true; 997b8e80941Smrg new_inst.Src[0].Register.Negate = false; 998b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 999b8e80941Smrg } 1000b8e80941Smrg 1001b8e80941Smrg new_inst = tgsi_default_full_instruction(); 1002b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 1003b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 1004b8e80941Smrg reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 1005b8e80941Smrg new_inst.Instruction.NumSrcRegs = 3; 1006b8e80941Smrg reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 1007b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1008b8e80941Smrg new_inst.Src[1].Register.Negate = true; 1009b8e80941Smrg reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1010b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 1011b8e80941Smrg } 1012b8e80941Smrg} 1013b8e80941Smrg 1014b8e80941Smrg/* Inserts a MOV_SAT for the needed components of tex coord. Note that 1015b8e80941Smrg * in the case of TXP, the clamping must happen *after* projection, so 1016b8e80941Smrg * we need to lower TXP to TEX. 1017b8e80941Smrg * 1018b8e80941Smrg * MOV tmpA, src0 1019b8e80941Smrg * if (opc == TXP) { 1020b8e80941Smrg * ; do perspective division manually before clamping: 1021b8e80941Smrg * RCP tmpB, tmpA.w 1022b8e80941Smrg * MUL tmpB.<pmask>, tmpA, tmpB.xxxx 1023b8e80941Smrg * opc = TEX; 1024b8e80941Smrg * } 1025b8e80941Smrg * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords 1026b8e80941Smrg * <opc> dst, tmpA, ... 1027b8e80941Smrg */ 1028b8e80941Smrg#define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1)) 1029b8e80941Smrg#define SAMP_TMP 2 1030b8e80941Smrgstatic int 1031b8e80941Smrgtransform_samp(struct tgsi_transform_context *tctx, 1032b8e80941Smrg struct tgsi_full_instruction *inst) 1033b8e80941Smrg{ 1034b8e80941Smrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1035b8e80941Smrg struct tgsi_full_src_register *coord = &inst->Src[0]; 1036b8e80941Smrg struct tgsi_full_src_register *samp; 1037b8e80941Smrg struct tgsi_full_instruction new_inst; 1038b8e80941Smrg /* mask is clamped coords, pmask is all coords (for projection): */ 1039b8e80941Smrg unsigned mask = 0, pmask = 0, smask; 1040b8e80941Smrg unsigned tex = inst->Texture.Texture; 1041b8e80941Smrg enum tgsi_opcode opcode = inst->Instruction.Opcode; 1042b8e80941Smrg bool lower_txp = (opcode == TGSI_OPCODE_TXP) && 1043b8e80941Smrg (ctx->config->lower_TXP & (1 << tex)); 1044b8e80941Smrg 1045b8e80941Smrg if (opcode == TGSI_OPCODE_TXB2) { 1046b8e80941Smrg samp = &inst->Src[2]; 1047b8e80941Smrg } else { 1048b8e80941Smrg samp = &inst->Src[1]; 1049b8e80941Smrg } 1050b8e80941Smrg 1051b8e80941Smrg /* convert sampler # to bitmask to test: */ 1052b8e80941Smrg smask = 1 << samp->Register.Index; 1053b8e80941Smrg 1054b8e80941Smrg /* check if we actually need to lower this one: */ 1055b8e80941Smrg if (!(ctx->saturate & smask) && !lower_txp) 1056b8e80941Smrg return -1; 1057b8e80941Smrg 1058b8e80941Smrg /* figure out which coordinates need saturating: 1059b8e80941Smrg * - RECT textures should not get saturated 1060b8e80941Smrg * - array index coords should not get saturated 1061b8e80941Smrg */ 1062b8e80941Smrg switch (tex) { 1063b8e80941Smrg case TGSI_TEXTURE_3D: 1064b8e80941Smrg case TGSI_TEXTURE_CUBE: 1065b8e80941Smrg case TGSI_TEXTURE_CUBE_ARRAY: 1066b8e80941Smrg case TGSI_TEXTURE_SHADOWCUBE: 1067b8e80941Smrg case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 1068b8e80941Smrg if (ctx->config->saturate_r & smask) 1069b8e80941Smrg mask |= TGSI_WRITEMASK_Z; 1070b8e80941Smrg pmask |= TGSI_WRITEMASK_Z; 1071b8e80941Smrg /* fallthrough */ 1072b8e80941Smrg 1073b8e80941Smrg case TGSI_TEXTURE_2D: 1074b8e80941Smrg case TGSI_TEXTURE_2D_ARRAY: 1075b8e80941Smrg case TGSI_TEXTURE_SHADOW2D: 1076b8e80941Smrg case TGSI_TEXTURE_SHADOW2D_ARRAY: 1077b8e80941Smrg case TGSI_TEXTURE_2D_MSAA: 1078b8e80941Smrg case TGSI_TEXTURE_2D_ARRAY_MSAA: 1079b8e80941Smrg if (ctx->config->saturate_t & smask) 1080b8e80941Smrg mask |= TGSI_WRITEMASK_Y; 1081b8e80941Smrg pmask |= TGSI_WRITEMASK_Y; 1082b8e80941Smrg /* fallthrough */ 1083b8e80941Smrg 1084b8e80941Smrg case TGSI_TEXTURE_1D: 1085b8e80941Smrg case TGSI_TEXTURE_1D_ARRAY: 1086b8e80941Smrg case TGSI_TEXTURE_SHADOW1D: 1087b8e80941Smrg case TGSI_TEXTURE_SHADOW1D_ARRAY: 1088b8e80941Smrg if (ctx->config->saturate_s & smask) 1089b8e80941Smrg mask |= TGSI_WRITEMASK_X; 1090b8e80941Smrg pmask |= TGSI_WRITEMASK_X; 1091b8e80941Smrg break; 1092b8e80941Smrg 1093b8e80941Smrg case TGSI_TEXTURE_RECT: 1094b8e80941Smrg case TGSI_TEXTURE_SHADOWRECT: 1095b8e80941Smrg /* we don't saturate, but in case of lower_txp we 1096b8e80941Smrg * still need to do the perspective divide: 1097b8e80941Smrg */ 1098b8e80941Smrg pmask = TGSI_WRITEMASK_XY; 1099b8e80941Smrg break; 1100b8e80941Smrg } 1101b8e80941Smrg 1102b8e80941Smrg /* sanity check.. driver could be asking to saturate a non- 1103b8e80941Smrg * existent coordinate component: 1104b8e80941Smrg */ 1105b8e80941Smrg if (!mask && !lower_txp) 1106b8e80941Smrg return -1; 1107b8e80941Smrg 1108b8e80941Smrg /* MOV tmpA, src0 */ 1109b8e80941Smrg create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0); 1110b8e80941Smrg 1111b8e80941Smrg /* This is a bit sad.. we need to clamp *after* the coords 1112b8e80941Smrg * are projected, which means lowering TXP to TEX and doing 1113b8e80941Smrg * the projection ourself. But since I haven't figured out 1114b8e80941Smrg * how to make the lowering code deliver an electric shock 1115b8e80941Smrg * to anyone using GL_CLAMP, we must do this instead: 1116b8e80941Smrg */ 1117b8e80941Smrg if (opcode == TGSI_OPCODE_TXP) { 1118b8e80941Smrg /* RCP tmpB.x tmpA.w */ 1119b8e80941Smrg new_inst = tgsi_default_full_instruction(); 1120b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_RCP; 1121b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 1122b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); 1123b8e80941Smrg new_inst.Instruction.NumSrcRegs = 1; 1124b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _)); 1125b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 1126b8e80941Smrg 1127b8e80941Smrg /* MUL tmpA.mask, tmpA, tmpB.xxxx */ 1128b8e80941Smrg new_inst = tgsi_default_full_instruction(); 1129b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 1130b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 1131b8e80941Smrg reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask); 1132b8e80941Smrg new_inst.Instruction.NumSrcRegs = 2; 1133b8e80941Smrg reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1134b8e80941Smrg reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X)); 1135b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 1136b8e80941Smrg 1137b8e80941Smrg opcode = TGSI_OPCODE_TEX; 1138b8e80941Smrg } 1139b8e80941Smrg 1140b8e80941Smrg /* MOV_SAT tmpA.<mask>, tmpA */ 1141b8e80941Smrg if (mask) { 1142b8e80941Smrg create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1); 1143b8e80941Smrg } 1144b8e80941Smrg 1145b8e80941Smrg /* modify the texture samp instruction to take fixed up coord: */ 1146b8e80941Smrg new_inst = *inst; 1147b8e80941Smrg new_inst.Instruction.Opcode = opcode; 1148b8e80941Smrg new_inst.Src[0] = ctx->tmp[A].src; 1149b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 1150b8e80941Smrg 1151b8e80941Smrg return 0; 1152b8e80941Smrg} 1153b8e80941Smrg 1154b8e80941Smrg/* Two-sided color emulation: 1155b8e80941Smrg * For each COLOR input, create a corresponding BCOLOR input, plus 1156b8e80941Smrg * CMP instruction to select front or back color based on FACE 1157b8e80941Smrg */ 1158b8e80941Smrg#define TWOSIDE_GROW(n) ( \ 1159b8e80941Smrg 2 + /* FACE */ \ 1160b8e80941Smrg ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\ 1161b8e80941Smrg ((n) * 1) + /* TEMP[] */ \ 1162b8e80941Smrg ((n) * NINST(3)) /* CMP instr */ \ 1163b8e80941Smrg ) 1164b8e80941Smrg 1165b8e80941Smrgstatic void 1166b8e80941Smrgemit_twoside(struct tgsi_transform_context *tctx) 1167b8e80941Smrg{ 1168b8e80941Smrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1169b8e80941Smrg struct tgsi_shader_info *info = ctx->info; 1170b8e80941Smrg struct tgsi_full_declaration decl; 1171b8e80941Smrg struct tgsi_full_instruction new_inst; 1172b8e80941Smrg unsigned inbase, tmpbase; 1173b8e80941Smrg unsigned i; 1174b8e80941Smrg 1175b8e80941Smrg inbase = info->file_max[TGSI_FILE_INPUT] + 1; 1176b8e80941Smrg tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; 1177b8e80941Smrg 1178b8e80941Smrg /* additional inputs for BCOLOR's */ 1179b8e80941Smrg for (i = 0; i < ctx->two_side_colors; i++) { 1180b8e80941Smrg unsigned in_idx = ctx->two_side_idx[i]; 1181b8e80941Smrg decl = tgsi_default_full_declaration(); 1182b8e80941Smrg decl.Declaration.File = TGSI_FILE_INPUT; 1183b8e80941Smrg decl.Declaration.Semantic = true; 1184b8e80941Smrg decl.Range.First = decl.Range.Last = inbase + i; 1185b8e80941Smrg decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR; 1186b8e80941Smrg decl.Semantic.Index = info->input_semantic_index[in_idx]; 1187b8e80941Smrg decl.Declaration.Interpolate = true; 1188b8e80941Smrg decl.Interp.Interpolate = info->input_interpolate[in_idx]; 1189b8e80941Smrg decl.Interp.Location = info->input_interpolate_loc[in_idx]; 1190b8e80941Smrg decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx]; 1191b8e80941Smrg tctx->emit_declaration(tctx, &decl); 1192b8e80941Smrg } 1193b8e80941Smrg 1194b8e80941Smrg /* additional input for FACE */ 1195b8e80941Smrg if (ctx->two_side_colors && (ctx->face_idx == -1)) { 1196b8e80941Smrg decl = tgsi_default_full_declaration(); 1197b8e80941Smrg decl.Declaration.File = TGSI_FILE_INPUT; 1198b8e80941Smrg decl.Declaration.Semantic = true; 1199b8e80941Smrg decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors; 1200b8e80941Smrg decl.Semantic.Name = TGSI_SEMANTIC_FACE; 1201b8e80941Smrg decl.Semantic.Index = 0; 1202b8e80941Smrg tctx->emit_declaration(tctx, &decl); 1203b8e80941Smrg 1204b8e80941Smrg ctx->face_idx = decl.Range.First; 1205b8e80941Smrg } 1206b8e80941Smrg 1207b8e80941Smrg /* additional temps for COLOR/BCOLOR selection: */ 1208b8e80941Smrg for (i = 0; i < ctx->two_side_colors; i++) { 1209b8e80941Smrg decl = tgsi_default_full_declaration(); 1210b8e80941Smrg decl.Declaration.File = TGSI_FILE_TEMPORARY; 1211b8e80941Smrg decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i; 1212b8e80941Smrg tctx->emit_declaration(tctx, &decl); 1213b8e80941Smrg } 1214b8e80941Smrg 1215b8e80941Smrg /* and finally additional instructions to select COLOR/BCOLOR: */ 1216b8e80941Smrg for (i = 0; i < ctx->two_side_colors; i++) { 1217b8e80941Smrg new_inst = tgsi_default_full_instruction(); 1218b8e80941Smrg new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 1219b8e80941Smrg 1220b8e80941Smrg new_inst.Instruction.NumDstRegs = 1; 1221b8e80941Smrg new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 1222b8e80941Smrg new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i; 1223b8e80941Smrg new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 1224b8e80941Smrg 1225b8e80941Smrg new_inst.Instruction.NumSrcRegs = 3; 1226b8e80941Smrg new_inst.Src[0].Register.File = TGSI_FILE_INPUT; 1227b8e80941Smrg new_inst.Src[0].Register.Index = ctx->face_idx; 1228b8e80941Smrg new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; 1229b8e80941Smrg new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; 1230b8e80941Smrg new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; 1231b8e80941Smrg new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X; 1232b8e80941Smrg new_inst.Src[1].Register.File = TGSI_FILE_INPUT; 1233b8e80941Smrg new_inst.Src[1].Register.Index = inbase + i; 1234b8e80941Smrg new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X; 1235b8e80941Smrg new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y; 1236b8e80941Smrg new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z; 1237b8e80941Smrg new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; 1238b8e80941Smrg new_inst.Src[2].Register.File = TGSI_FILE_INPUT; 1239b8e80941Smrg new_inst.Src[2].Register.Index = ctx->two_side_idx[i]; 1240b8e80941Smrg new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X; 1241b8e80941Smrg new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y; 1242b8e80941Smrg new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z; 1243b8e80941Smrg new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W; 1244b8e80941Smrg 1245b8e80941Smrg tctx->emit_instruction(tctx, &new_inst); 1246b8e80941Smrg } 1247b8e80941Smrg} 1248b8e80941Smrg 1249b8e80941Smrgstatic void 1250b8e80941Smrgemit_decls(struct tgsi_transform_context *tctx) 1251b8e80941Smrg{ 1252b8e80941Smrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1253b8e80941Smrg struct tgsi_shader_info *info = ctx->info; 1254b8e80941Smrg struct tgsi_full_declaration decl; 1255b8e80941Smrg struct tgsi_full_immediate immed; 1256b8e80941Smrg unsigned tmpbase; 1257b8e80941Smrg unsigned i; 1258b8e80941Smrg 1259b8e80941Smrg tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; 1260b8e80941Smrg 1261b8e80941Smrg ctx->color_base = tmpbase + ctx->numtmp; 1262b8e80941Smrg 1263b8e80941Smrg /* declare immediate: */ 1264b8e80941Smrg immed = tgsi_default_full_immediate(); 1265b8e80941Smrg immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ 1266b8e80941Smrg immed.u[0].Float = 0.0; 1267b8e80941Smrg immed.u[1].Float = 1.0; 1268b8e80941Smrg immed.u[2].Float = 128.0; 1269b8e80941Smrg immed.u[3].Float = 0.0; 1270b8e80941Smrg tctx->emit_immediate(tctx, &immed); 1271b8e80941Smrg 1272b8e80941Smrg ctx->imm.Register.File = TGSI_FILE_IMMEDIATE; 1273b8e80941Smrg ctx->imm.Register.Index = info->immediate_count; 1274b8e80941Smrg ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X; 1275b8e80941Smrg ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y; 1276b8e80941Smrg ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1277b8e80941Smrg ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W; 1278b8e80941Smrg 1279b8e80941Smrg /* declare temp regs: */ 1280b8e80941Smrg for (i = 0; i < ctx->numtmp; i++) { 1281b8e80941Smrg decl = tgsi_default_full_declaration(); 1282b8e80941Smrg decl.Declaration.File = TGSI_FILE_TEMPORARY; 1283b8e80941Smrg decl.Range.First = decl.Range.Last = tmpbase + i; 1284b8e80941Smrg tctx->emit_declaration(tctx, &decl); 1285b8e80941Smrg 1286b8e80941Smrg ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; 1287b8e80941Smrg ctx->tmp[i].src.Register.Index = tmpbase + i; 1288b8e80941Smrg ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; 1289b8e80941Smrg ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; 1290b8e80941Smrg ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1291b8e80941Smrg ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; 1292b8e80941Smrg 1293b8e80941Smrg ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; 1294b8e80941Smrg ctx->tmp[i].dst.Register.Index = tmpbase + i; 1295b8e80941Smrg ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; 1296b8e80941Smrg } 1297b8e80941Smrg 1298b8e80941Smrg if (ctx->two_side_colors) 1299b8e80941Smrg emit_twoside(tctx); 1300b8e80941Smrg} 1301b8e80941Smrg 1302b8e80941Smrgstatic void 1303b8e80941Smrgrename_color_inputs(struct tgsi_lowering_context *ctx, 1304b8e80941Smrg struct tgsi_full_instruction *inst) 1305b8e80941Smrg{ 1306b8e80941Smrg unsigned i, j; 1307b8e80941Smrg for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1308b8e80941Smrg struct tgsi_src_register *src = &inst->Src[i].Register; 1309b8e80941Smrg if (src->File == TGSI_FILE_INPUT) { 1310b8e80941Smrg for (j = 0; j < ctx->two_side_colors; j++) { 1311b8e80941Smrg if (src->Index == (int)ctx->two_side_idx[j]) { 1312b8e80941Smrg src->File = TGSI_FILE_TEMPORARY; 1313b8e80941Smrg src->Index = ctx->color_base + j; 1314b8e80941Smrg break; 1315b8e80941Smrg } 1316b8e80941Smrg } 1317b8e80941Smrg } 1318b8e80941Smrg } 1319b8e80941Smrg 1320b8e80941Smrg} 1321b8e80941Smrg 1322b8e80941Smrgstatic void 1323b8e80941Smrgtransform_instr(struct tgsi_transform_context *tctx, 1324b8e80941Smrg struct tgsi_full_instruction *inst) 1325b8e80941Smrg{ 1326b8e80941Smrg struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1327b8e80941Smrg 1328b8e80941Smrg if (!ctx->emitted_decls) { 1329b8e80941Smrg emit_decls(tctx); 1330b8e80941Smrg ctx->emitted_decls = 1; 1331b8e80941Smrg } 1332b8e80941Smrg 1333b8e80941Smrg /* if emulating two-sided-color, we need to re-write some 1334b8e80941Smrg * src registers: 1335b8e80941Smrg */ 1336b8e80941Smrg if (ctx->two_side_colors) 1337b8e80941Smrg rename_color_inputs(ctx, inst); 1338b8e80941Smrg 1339b8e80941Smrg switch (inst->Instruction.Opcode) { 1340b8e80941Smrg case TGSI_OPCODE_DST: 1341b8e80941Smrg if (!ctx->config->lower_DST) 1342b8e80941Smrg goto skip; 1343b8e80941Smrg transform_dst(tctx, inst); 1344b8e80941Smrg break; 1345b8e80941Smrg case TGSI_OPCODE_LRP: 1346b8e80941Smrg if (!ctx->config->lower_LRP) 1347b8e80941Smrg goto skip; 1348b8e80941Smrg transform_lrp(tctx, inst); 1349b8e80941Smrg break; 1350b8e80941Smrg case TGSI_OPCODE_FRC: 1351b8e80941Smrg if (!ctx->config->lower_FRC) 1352b8e80941Smrg goto skip; 1353b8e80941Smrg transform_frc(tctx, inst); 1354b8e80941Smrg break; 1355b8e80941Smrg case TGSI_OPCODE_POW: 1356b8e80941Smrg if (!ctx->config->lower_POW) 1357b8e80941Smrg goto skip; 1358b8e80941Smrg transform_pow(tctx, inst); 1359b8e80941Smrg break; 1360b8e80941Smrg case TGSI_OPCODE_LIT: 1361b8e80941Smrg if (!ctx->config->lower_LIT) 1362b8e80941Smrg goto skip; 1363b8e80941Smrg transform_lit(tctx, inst); 1364b8e80941Smrg break; 1365b8e80941Smrg case TGSI_OPCODE_EXP: 1366b8e80941Smrg if (!ctx->config->lower_EXP) 1367b8e80941Smrg goto skip; 1368b8e80941Smrg transform_exp(tctx, inst); 1369b8e80941Smrg break; 1370b8e80941Smrg case TGSI_OPCODE_LOG: 1371b8e80941Smrg if (!ctx->config->lower_LOG) 1372b8e80941Smrg goto skip; 1373b8e80941Smrg transform_log(tctx, inst); 1374b8e80941Smrg break; 1375b8e80941Smrg case TGSI_OPCODE_DP4: 1376b8e80941Smrg if (!ctx->config->lower_DP4) 1377b8e80941Smrg goto skip; 1378b8e80941Smrg transform_dotp(tctx, inst); 1379b8e80941Smrg break; 1380b8e80941Smrg case TGSI_OPCODE_DP3: 1381b8e80941Smrg if (!ctx->config->lower_DP3) 1382b8e80941Smrg goto skip; 1383b8e80941Smrg transform_dotp(tctx, inst); 1384b8e80941Smrg break; 1385b8e80941Smrg case TGSI_OPCODE_DP2: 1386b8e80941Smrg if (!ctx->config->lower_DP2) 1387b8e80941Smrg goto skip; 1388b8e80941Smrg transform_dotp(tctx, inst); 1389b8e80941Smrg break; 1390b8e80941Smrg case TGSI_OPCODE_FLR: 1391b8e80941Smrg if (!ctx->config->lower_FLR) 1392b8e80941Smrg goto skip; 1393b8e80941Smrg transform_flr_ceil(tctx, inst); 1394b8e80941Smrg break; 1395b8e80941Smrg case TGSI_OPCODE_CEIL: 1396b8e80941Smrg if (!ctx->config->lower_CEIL) 1397b8e80941Smrg goto skip; 1398b8e80941Smrg transform_flr_ceil(tctx, inst); 1399b8e80941Smrg break; 1400b8e80941Smrg case TGSI_OPCODE_TRUNC: 1401b8e80941Smrg if (!ctx->config->lower_TRUNC) 1402b8e80941Smrg goto skip; 1403b8e80941Smrg transform_trunc(tctx, inst); 1404b8e80941Smrg break; 1405b8e80941Smrg case TGSI_OPCODE_TEX: 1406b8e80941Smrg case TGSI_OPCODE_TXP: 1407b8e80941Smrg case TGSI_OPCODE_TXB: 1408b8e80941Smrg case TGSI_OPCODE_TXB2: 1409b8e80941Smrg case TGSI_OPCODE_TXL: 1410b8e80941Smrg if (transform_samp(tctx, inst)) 1411b8e80941Smrg goto skip; 1412b8e80941Smrg break; 1413b8e80941Smrg default: 1414b8e80941Smrg skip: 1415b8e80941Smrg tctx->emit_instruction(tctx, inst); 1416b8e80941Smrg break; 1417b8e80941Smrg } 1418b8e80941Smrg} 1419b8e80941Smrg 1420b8e80941Smrg/* returns NULL if no lowering required, else returns the new 1421b8e80941Smrg * tokens (which caller is required to free()). In either case 1422b8e80941Smrg * returns the current info. 1423b8e80941Smrg */ 1424b8e80941Smrgconst struct tgsi_token * 1425b8e80941Smrgtgsi_transform_lowering(const struct tgsi_lowering_config *config, 1426b8e80941Smrg const struct tgsi_token *tokens, 1427b8e80941Smrg struct tgsi_shader_info *info) 1428b8e80941Smrg{ 1429b8e80941Smrg struct tgsi_lowering_context ctx; 1430b8e80941Smrg struct tgsi_token *newtoks; 1431b8e80941Smrg int newlen, numtmp; 1432b8e80941Smrg 1433b8e80941Smrg /* sanity check in case limit is ever increased: */ 1434b8e80941Smrg STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS); 1435b8e80941Smrg 1436b8e80941Smrg /* sanity check the lowering */ 1437b8e80941Smrg assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL))); 1438b8e80941Smrg assert(!(config->lower_FRC && config->lower_TRUNC)); 1439b8e80941Smrg 1440b8e80941Smrg memset(&ctx, 0, sizeof(ctx)); 1441b8e80941Smrg ctx.base.transform_instruction = transform_instr; 1442b8e80941Smrg ctx.info = info; 1443b8e80941Smrg ctx.config = config; 1444b8e80941Smrg 1445b8e80941Smrg tgsi_scan_shader(tokens, info); 1446b8e80941Smrg 1447b8e80941Smrg /* if we are adding fragment shader support to emulate two-sided 1448b8e80941Smrg * color, then figure out the number of additional inputs we need 1449b8e80941Smrg * to create for BCOLOR's.. 1450b8e80941Smrg */ 1451b8e80941Smrg if ((info->processor == PIPE_SHADER_FRAGMENT) && 1452b8e80941Smrg config->color_two_side) { 1453b8e80941Smrg int i; 1454b8e80941Smrg ctx.face_idx = -1; 1455b8e80941Smrg for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) { 1456b8e80941Smrg if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR) 1457b8e80941Smrg ctx.two_side_idx[ctx.two_side_colors++] = i; 1458b8e80941Smrg if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE) 1459b8e80941Smrg ctx.face_idx = i; 1460b8e80941Smrg } 1461b8e80941Smrg } 1462b8e80941Smrg 1463b8e80941Smrg ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t; 1464b8e80941Smrg 1465b8e80941Smrg#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0) 1466b8e80941Smrg /* if there are no instructions to lower, then we are done: */ 1467b8e80941Smrg if (!(OPCS(DST) || 1468b8e80941Smrg OPCS(LRP) || 1469b8e80941Smrg OPCS(FRC) || 1470b8e80941Smrg OPCS(POW) || 1471b8e80941Smrg OPCS(LIT) || 1472b8e80941Smrg OPCS(EXP) || 1473b8e80941Smrg OPCS(LOG) || 1474b8e80941Smrg OPCS(DP4) || 1475b8e80941Smrg OPCS(DP3) || 1476b8e80941Smrg OPCS(DP2) || 1477b8e80941Smrg OPCS(FLR) || 1478b8e80941Smrg OPCS(CEIL) || 1479b8e80941Smrg OPCS(TRUNC) || 1480b8e80941Smrg OPCS(TXP) || 1481b8e80941Smrg ctx.two_side_colors || 1482b8e80941Smrg ctx.saturate)) 1483b8e80941Smrg return NULL; 1484b8e80941Smrg 1485b8e80941Smrg#if 0 /* debug */ 1486b8e80941Smrg _debug_printf("BEFORE:"); 1487b8e80941Smrg tgsi_dump(tokens, 0); 1488b8e80941Smrg#endif 1489b8e80941Smrg 1490b8e80941Smrg numtmp = 0; 1491b8e80941Smrg newlen = tgsi_num_tokens(tokens); 1492b8e80941Smrg if (OPCS(DST)) { 1493b8e80941Smrg newlen += DST_GROW * OPCS(DST); 1494b8e80941Smrg numtmp = MAX2(numtmp, DST_TMP); 1495b8e80941Smrg } 1496b8e80941Smrg if (OPCS(LRP)) { 1497b8e80941Smrg newlen += LRP_GROW * OPCS(LRP); 1498b8e80941Smrg numtmp = MAX2(numtmp, LRP_TMP); 1499b8e80941Smrg } 1500b8e80941Smrg if (OPCS(FRC)) { 1501b8e80941Smrg newlen += FRC_GROW * OPCS(FRC); 1502b8e80941Smrg numtmp = MAX2(numtmp, FRC_TMP); 1503b8e80941Smrg } 1504b8e80941Smrg if (OPCS(POW)) { 1505b8e80941Smrg newlen += POW_GROW * OPCS(POW); 1506b8e80941Smrg numtmp = MAX2(numtmp, POW_TMP); 1507b8e80941Smrg } 1508b8e80941Smrg if (OPCS(LIT)) { 1509b8e80941Smrg newlen += LIT_GROW * OPCS(LIT); 1510b8e80941Smrg numtmp = MAX2(numtmp, LIT_TMP); 1511b8e80941Smrg } 1512b8e80941Smrg if (OPCS(EXP)) { 1513b8e80941Smrg newlen += EXP_GROW * OPCS(EXP); 1514b8e80941Smrg numtmp = MAX2(numtmp, EXP_TMP); 1515b8e80941Smrg } 1516b8e80941Smrg if (OPCS(LOG)) { 1517b8e80941Smrg newlen += LOG_GROW * OPCS(LOG); 1518b8e80941Smrg numtmp = MAX2(numtmp, LOG_TMP); 1519b8e80941Smrg } 1520b8e80941Smrg if (OPCS(DP4)) { 1521b8e80941Smrg newlen += DP4_GROW * OPCS(DP4); 1522b8e80941Smrg numtmp = MAX2(numtmp, DOTP_TMP); 1523b8e80941Smrg } 1524b8e80941Smrg if (OPCS(DP3)) { 1525b8e80941Smrg newlen += DP3_GROW * OPCS(DP3); 1526b8e80941Smrg numtmp = MAX2(numtmp, DOTP_TMP); 1527b8e80941Smrg } 1528b8e80941Smrg if (OPCS(DP2)) { 1529b8e80941Smrg newlen += DP2_GROW * OPCS(DP2); 1530b8e80941Smrg numtmp = MAX2(numtmp, DOTP_TMP); 1531b8e80941Smrg } 1532b8e80941Smrg if (OPCS(FLR)) { 1533b8e80941Smrg newlen += FLR_GROW * OPCS(FLR); 1534b8e80941Smrg numtmp = MAX2(numtmp, FLR_TMP); 1535b8e80941Smrg } 1536b8e80941Smrg if (OPCS(CEIL)) { 1537b8e80941Smrg newlen += CEIL_GROW * OPCS(CEIL); 1538b8e80941Smrg numtmp = MAX2(numtmp, CEIL_TMP); 1539b8e80941Smrg } 1540b8e80941Smrg if (OPCS(TRUNC)) { 1541b8e80941Smrg newlen += TRUNC_GROW * OPCS(TRUNC); 1542b8e80941Smrg numtmp = MAX2(numtmp, TRUNC_TMP); 1543b8e80941Smrg } 1544b8e80941Smrg if (ctx.saturate || config->lower_TXP) { 1545b8e80941Smrg int n = 0; 1546b8e80941Smrg 1547b8e80941Smrg if (ctx.saturate) { 1548b8e80941Smrg n = info->opcode_count[TGSI_OPCODE_TEX] + 1549b8e80941Smrg info->opcode_count[TGSI_OPCODE_TXP] + 1550b8e80941Smrg info->opcode_count[TGSI_OPCODE_TXB] + 1551b8e80941Smrg info->opcode_count[TGSI_OPCODE_TXB2] + 1552b8e80941Smrg info->opcode_count[TGSI_OPCODE_TXL]; 1553b8e80941Smrg } else if (config->lower_TXP) { 1554b8e80941Smrg n = info->opcode_count[TGSI_OPCODE_TXP]; 1555b8e80941Smrg } 1556b8e80941Smrg 1557b8e80941Smrg newlen += SAMP_GROW * n; 1558b8e80941Smrg numtmp = MAX2(numtmp, SAMP_TMP); 1559b8e80941Smrg } 1560b8e80941Smrg 1561b8e80941Smrg /* specifically don't include two_side_colors temps in the count: */ 1562b8e80941Smrg ctx.numtmp = numtmp; 1563b8e80941Smrg 1564b8e80941Smrg if (ctx.two_side_colors) { 1565b8e80941Smrg newlen += TWOSIDE_GROW(ctx.two_side_colors); 1566b8e80941Smrg /* note: we permanently consume temp regs, re-writing references 1567b8e80941Smrg * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP 1568b8e80941Smrg * instruction that selects which varying to use): 1569b8e80941Smrg */ 1570b8e80941Smrg numtmp += ctx.two_side_colors; 1571b8e80941Smrg } 1572b8e80941Smrg 1573b8e80941Smrg newlen += 2 * numtmp; 1574b8e80941Smrg newlen += 5; /* immediate */ 1575b8e80941Smrg 1576b8e80941Smrg newtoks = tgsi_alloc_tokens(newlen); 1577b8e80941Smrg if (!newtoks) 1578b8e80941Smrg return NULL; 1579b8e80941Smrg 1580b8e80941Smrg tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); 1581b8e80941Smrg 1582b8e80941Smrg tgsi_scan_shader(newtoks, info); 1583b8e80941Smrg 1584b8e80941Smrg#if 0 /* debug */ 1585b8e80941Smrg _debug_printf("AFTER:"); 1586b8e80941Smrg tgsi_dump(newtoks, 0); 1587b8e80941Smrg#endif 1588b8e80941Smrg 1589b8e80941Smrg return newtoks; 1590b8e80941Smrg} 1591