tgsi_lowering.c revision 7ec681f3
1/* 2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "tgsi/tgsi_transform.h" 28#include "tgsi/tgsi_scan.h" 29#include "tgsi/tgsi_dump.h" 30 31#include "util/compiler.h" 32#include "util/u_debug.h" 33#include "util/u_math.h" 34 35#include "tgsi_lowering.h" 36 37struct tgsi_lowering_context { 38 struct tgsi_transform_context base; 39 const struct tgsi_lowering_config *config; 40 struct tgsi_shader_info *info; 41 unsigned two_side_colors; 42 unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS]; 43 unsigned color_base; /* base register for chosen COLOR/BCOLOR's */ 44 int face_idx; 45 unsigned numtmp; 46 struct { 47 struct tgsi_full_src_register src; 48 struct tgsi_full_dst_register dst; 49 } tmp[2]; 50#define A 0 51#define B 1 52 struct tgsi_full_src_register imm; 53 int emitted_decls; 54 unsigned saturate; 55}; 56 57static inline struct tgsi_lowering_context * 58tgsi_lowering_context(struct tgsi_transform_context *tctx) 59{ 60 return (struct tgsi_lowering_context *)tctx; 61} 62 63/* 64 * Utility helpers: 65 */ 66 67static void 68reg_dst(struct tgsi_full_dst_register *dst, 69 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask) 70{ 71 *dst = *orig_dst; 72 dst->Register.WriteMask &= wrmask; 73 assert(dst->Register.WriteMask); 74} 75 76static inline void 77get_swiz(unsigned *swiz, const struct tgsi_src_register *src) 78{ 79 swiz[0] = src->SwizzleX; 80 swiz[1] = src->SwizzleY; 81 swiz[2] = src->SwizzleZ; 82 swiz[3] = src->SwizzleW; 83} 84 85static void 86reg_src(struct tgsi_full_src_register *src, 87 const struct tgsi_full_src_register *orig_src, 88 unsigned sx, unsigned sy, unsigned sz, unsigned sw) 89{ 90 unsigned swiz[4]; 91 get_swiz(swiz, &orig_src->Register); 92 *src = *orig_src; 93 src->Register.SwizzleX = swiz[sx]; 94 src->Register.SwizzleY = swiz[sy]; 95 src->Register.SwizzleZ = swiz[sz]; 96 src->Register.SwizzleW = swiz[sw]; 97} 98 99#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */ 100#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \ 101 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w 102 103/* 104 * if (dst.x aliases src.x) { 105 * MOV tmpA.x, src.x 106 * src = tmpA 107 * } 108 * COS dst.x, src.x 109 * SIN dst.y, src.x 110 * MOV dst.zw, imm{0.0, 1.0} 111 */ 112static bool 113aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask, 114 const struct tgsi_full_src_register *src, unsigned src_mask) 115{ 116 if ((dst->Register.File == src->Register.File) && 117 (dst->Register.Index == src->Register.Index)) { 118 unsigned i, actual_mask = 0; 119 unsigned swiz[4]; 120 get_swiz(swiz, &src->Register); 121 for (i = 0; i < 4; i++) 122 if (src_mask & (1 << i)) 123 actual_mask |= (1 << swiz[i]); 124 if (actual_mask & dst_mask) 125 return true; 126 } 127 return false; 128} 129 130static void 131create_mov(struct tgsi_transform_context *tctx, 132 const struct tgsi_full_dst_register *dst, 133 const struct tgsi_full_src_register *src, 134 unsigned mask, unsigned saturate) 135{ 136 struct tgsi_full_instruction new_inst; 137 138 new_inst = tgsi_default_full_instruction(); 139 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 140 new_inst.Instruction.Saturate = saturate; 141 new_inst.Instruction.NumDstRegs = 1; 142 reg_dst(&new_inst.Dst[0], dst, mask); 143 new_inst.Instruction.NumSrcRegs = 1; 144 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 145 tctx->emit_instruction(tctx, &new_inst); 146} 147 148/* to help calculate # of tgsi tokens for a lowering.. we assume 149 * the worst case, ie. removed instructions don't have ADDR[] or 150 * anything which increases the # of tokens per src/dst and the 151 * inserted instructions do. 152 * 153 * OINST() - old instruction 154 * 1 : instruction itself 155 * 1 : dst 156 * 1 * nargs : srcN 157 * 158 * NINST() - new instruction 159 * 1 : instruction itself 160 * 2 : dst 161 * 2 * nargs : srcN 162 */ 163 164#define OINST(nargs) (1 + 1 + 1 * (nargs)) 165#define NINST(nargs) (1 + 2 + 2 * (nargs)) 166 167/* 168 * Lowering Translators: 169 */ 170 171/* DST - Distance Vector 172 * dst.x = 1.0 173 * dst.y = src0.y \times src1.y 174 * dst.z = src0.z 175 * dst.w = src1.w 176 * 177 * ; note: could be more clever and use just a single temp 178 * ; if I was clever enough to re-write the swizzles. 179 * ; needs: 2 tmp, imm{1.0} 180 * if (dst.y aliases src0.z) { 181 * MOV tmpA.yz, src0.yz 182 * src0 = tmpA 183 * } 184 * if (dst.yz aliases src1.w) { 185 * MOV tmpB.yw, src1.yw 186 * src1 = tmpB 187 * } 188 * MUL dst.y, src0.y, src1.y 189 * MOV dst.z, src0.z 190 * MOV dst.w, src1.w 191 * MOV dst.x, imm{1.0} 192 */ 193#define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \ 194 NINST(1) + NINST(1) - OINST(2)) 195#define DST_TMP 2 196static void 197transform_dst(struct tgsi_transform_context *tctx, 198 struct tgsi_full_instruction *inst) 199{ 200 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 201 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 202 struct tgsi_full_src_register *src0 = &inst->Src[0]; 203 struct tgsi_full_src_register *src1 = &inst->Src[1]; 204 struct tgsi_full_instruction new_inst; 205 206 if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) { 207 create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0); 208 src0 = &ctx->tmp[A].src; 209 } 210 211 if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) { 212 create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0); 213 src1 = &ctx->tmp[B].src; 214 } 215 216 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 217 /* MUL dst.y, src0.y, src1.y */ 218 new_inst = tgsi_default_full_instruction(); 219 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 220 new_inst.Instruction.NumDstRegs = 1; 221 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 222 new_inst.Instruction.NumSrcRegs = 2; 223 reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _)); 224 reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _)); 225 tctx->emit_instruction(tctx, &new_inst); 226 } 227 228 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 229 /* MOV dst.z, src0.z */ 230 new_inst = tgsi_default_full_instruction(); 231 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 232 new_inst.Instruction.NumDstRegs = 1; 233 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z); 234 new_inst.Instruction.NumSrcRegs = 1; 235 reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _)); 236 tctx->emit_instruction(tctx, &new_inst); 237 } 238 239 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 240 /* MOV dst.w, src1.w */ 241 new_inst = tgsi_default_full_instruction(); 242 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 243 new_inst.Instruction.NumDstRegs = 1; 244 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 245 new_inst.Instruction.NumSrcRegs = 1; 246 reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W)); 247 tctx->emit_instruction(tctx, &new_inst); 248 } 249 250 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 251 /* MOV dst.x, imm{1.0} */ 252 new_inst = tgsi_default_full_instruction(); 253 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 254 new_inst.Instruction.NumDstRegs = 1; 255 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); 256 new_inst.Instruction.NumSrcRegs = 1; 257 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _)); 258 tctx->emit_instruction(tctx, &new_inst); 259 } 260} 261 262/* LRP - Linear Interpolate 263 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x 264 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y 265 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z 266 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w 267 * 268 * This becomes: src0 \times src1 + src2 - src0 \times src2, which 269 * can then become: src0 \times src1 - (src0 \times src2 - src2) 270 * 271 * ; needs: 1 tmp 272 * MAD tmpA, src0, src2, -src2 273 * MAD dst, src0, src1, -tmpA 274 */ 275#define LRP_GROW (NINST(3) + NINST(3) - OINST(3)) 276#define LRP_TMP 1 277static void 278transform_lrp(struct tgsi_transform_context *tctx, 279 struct tgsi_full_instruction *inst) 280{ 281 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 282 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 283 struct tgsi_full_src_register *src0 = &inst->Src[0]; 284 struct tgsi_full_src_register *src1 = &inst->Src[1]; 285 struct tgsi_full_src_register *src2 = &inst->Src[2]; 286 struct tgsi_full_instruction new_inst; 287 288 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 289 /* MAD tmpA, src0, src2, -src2 */ 290 new_inst = tgsi_default_full_instruction(); 291 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 292 new_inst.Instruction.NumDstRegs = 1; 293 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 294 new_inst.Instruction.NumSrcRegs = 3; 295 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 296 reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W)); 297 reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W)); 298 new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate; 299 tctx->emit_instruction(tctx, &new_inst); 300 301 /* MAD dst, src0, src1, -tmpA */ 302 new_inst = tgsi_default_full_instruction(); 303 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 304 new_inst.Instruction.NumDstRegs = 1; 305 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 306 new_inst.Instruction.NumSrcRegs = 3; 307 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 308 reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W)); 309 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 310 new_inst.Src[2].Register.Negate = true; 311 tctx->emit_instruction(tctx, &new_inst); 312 } 313} 314 315/* FRC - Fraction 316 * dst.x = src.x - \lfloor src.x\rfloor 317 * dst.y = src.y - \lfloor src.y\rfloor 318 * dst.z = src.z - \lfloor src.z\rfloor 319 * dst.w = src.w - \lfloor src.w\rfloor 320 * 321 * ; needs: 1 tmp 322 * FLR tmpA, src 323 * SUB dst, src, tmpA 324 */ 325#define FRC_GROW (NINST(1) + NINST(2) - OINST(1)) 326#define FRC_TMP 1 327static void 328transform_frc(struct tgsi_transform_context *tctx, 329 struct tgsi_full_instruction *inst) 330{ 331 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 332 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 333 struct tgsi_full_src_register *src = &inst->Src[0]; 334 struct tgsi_full_instruction new_inst; 335 336 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 337 /* FLR tmpA, src */ 338 new_inst = tgsi_default_full_instruction(); 339 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 340 new_inst.Instruction.NumDstRegs = 1; 341 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 342 new_inst.Instruction.NumSrcRegs = 1; 343 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 344 tctx->emit_instruction(tctx, &new_inst); 345 346 /* SUB dst, src, tmpA */ 347 new_inst = tgsi_default_full_instruction(); 348 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 349 new_inst.Instruction.NumDstRegs = 1; 350 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 351 new_inst.Instruction.NumSrcRegs = 2; 352 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 353 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 354 new_inst.Src[1].Register.Negate = 1; 355 tctx->emit_instruction(tctx, &new_inst); 356 } 357} 358 359/* POW - Power 360 * dst.x = src0.x^{src1.x} 361 * dst.y = src0.x^{src1.x} 362 * dst.z = src0.x^{src1.x} 363 * dst.w = src0.x^{src1.x} 364 * 365 * ; needs: 1 tmp 366 * LG2 tmpA.x, src0.x 367 * MUL tmpA.x, src1.x, tmpA.x 368 * EX2 dst, tmpA.x 369 */ 370#define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2)) 371#define POW_TMP 1 372static void 373transform_pow(struct tgsi_transform_context *tctx, 374 struct tgsi_full_instruction *inst) 375{ 376 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 377 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 378 struct tgsi_full_src_register *src0 = &inst->Src[0]; 379 struct tgsi_full_src_register *src1 = &inst->Src[1]; 380 struct tgsi_full_instruction new_inst; 381 382 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 383 /* LG2 tmpA.x, src0.x */ 384 new_inst = tgsi_default_full_instruction(); 385 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 386 new_inst.Instruction.NumDstRegs = 1; 387 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 388 new_inst.Instruction.NumSrcRegs = 1; 389 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _)); 390 tctx->emit_instruction(tctx, &new_inst); 391 392 /* MUL tmpA.x, src1.x, tmpA.x */ 393 new_inst = tgsi_default_full_instruction(); 394 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 395 new_inst.Instruction.NumDstRegs = 1; 396 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 397 new_inst.Instruction.NumSrcRegs = 2; 398 reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _)); 399 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 400 tctx->emit_instruction(tctx, &new_inst); 401 402 /* EX2 dst, tmpA.x */ 403 new_inst = tgsi_default_full_instruction(); 404 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 405 new_inst.Instruction.NumDstRegs = 1; 406 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 407 new_inst.Instruction.NumSrcRegs = 1; 408 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 409 tctx->emit_instruction(tctx, &new_inst); 410 } 411} 412 413/* LIT - Light Coefficients 414 * dst.x = 1.0 415 * dst.y = max(src.x, 0.0) 416 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0 417 * dst.w = 1.0 418 * 419 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0} 420 * MAX tmpA.xy, src.xy, imm{0.0} 421 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} 422 * LG2 tmpA.y, tmpA.y 423 * MUL tmpA.y, tmpA.z, tmpA.y 424 * EX2 tmpA.y, tmpA.y 425 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0} 426 * MOV dst.yz, tmpA.xy 427 * MOV dst.xw, imm{1.0} 428 */ 429#define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \ 430 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1)) 431#define LIT_TMP 1 432static void 433transform_lit(struct tgsi_transform_context *tctx, 434 struct tgsi_full_instruction *inst) 435{ 436 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 437 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 438 struct tgsi_full_src_register *src = &inst->Src[0]; 439 struct tgsi_full_instruction new_inst; 440 441 if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) { 442 /* MAX tmpA.xy, src.xy, imm{0.0} */ 443 new_inst = tgsi_default_full_instruction(); 444 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; 445 new_inst.Instruction.NumDstRegs = 1; 446 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY); 447 new_inst.Instruction.NumSrcRegs = 2; 448 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _)); 449 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _)); 450 tctx->emit_instruction(tctx, &new_inst); 451 452 /* MIN tmpA.z, src.w, imm{128.0} */ 453 new_inst = tgsi_default_full_instruction(); 454 new_inst.Instruction.Opcode = TGSI_OPCODE_MIN; 455 new_inst.Instruction.NumDstRegs = 1; 456 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 457 new_inst.Instruction.NumSrcRegs = 2; 458 reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _)); 459 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); 460 tctx->emit_instruction(tctx, &new_inst); 461 462 /* MAX tmpA.z, tmpA.z, -imm{128.0} */ 463 new_inst = tgsi_default_full_instruction(); 464 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; 465 new_inst.Instruction.NumDstRegs = 1; 466 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 467 new_inst.Instruction.NumSrcRegs = 2; 468 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _)); 469 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); 470 new_inst.Src[1].Register.Negate = true; 471 tctx->emit_instruction(tctx, &new_inst); 472 473 /* LG2 tmpA.y, tmpA.y */ 474 new_inst = tgsi_default_full_instruction(); 475 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 476 new_inst.Instruction.NumDstRegs = 1; 477 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 478 new_inst.Instruction.NumSrcRegs = 1; 479 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 480 tctx->emit_instruction(tctx, &new_inst); 481 482 /* MUL tmpA.y, tmpA.z, tmpA.y */ 483 new_inst = tgsi_default_full_instruction(); 484 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 485 new_inst.Instruction.NumDstRegs = 1; 486 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 487 new_inst.Instruction.NumSrcRegs = 2; 488 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _)); 489 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 490 tctx->emit_instruction(tctx, &new_inst); 491 492 /* EX2 tmpA.y, tmpA.y */ 493 new_inst = tgsi_default_full_instruction(); 494 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 495 new_inst.Instruction.NumDstRegs = 1; 496 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 497 new_inst.Instruction.NumSrcRegs = 1; 498 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 499 tctx->emit_instruction(tctx, &new_inst); 500 501 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */ 502 new_inst = tgsi_default_full_instruction(); 503 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 504 new_inst.Instruction.NumDstRegs = 1; 505 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 506 new_inst.Instruction.NumSrcRegs = 3; 507 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 508 new_inst.Src[0].Register.Negate = true; 509 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 510 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _)); 511 tctx->emit_instruction(tctx, &new_inst); 512 513 /* MOV dst.yz, tmpA.xy */ 514 new_inst = tgsi_default_full_instruction(); 515 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 516 new_inst.Instruction.NumDstRegs = 1; 517 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ); 518 new_inst.Instruction.NumSrcRegs = 1; 519 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _)); 520 tctx->emit_instruction(tctx, &new_inst); 521 } 522 523 if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) { 524 /* MOV dst.xw, imm{1.0} */ 525 new_inst = tgsi_default_full_instruction(); 526 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 527 new_inst.Instruction.NumDstRegs = 1; 528 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW); 529 new_inst.Instruction.NumSrcRegs = 1; 530 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y)); 531 tctx->emit_instruction(tctx, &new_inst); 532 } 533} 534 535/* EXP - Approximate Exponential Base 2 536 * dst.x = 2^{\lfloor src.x\rfloor} 537 * dst.y = src.x - \lfloor src.x\rfloor 538 * dst.z = 2^{src.x} 539 * dst.w = 1.0 540 * 541 * ; needs: 1 tmp, imm{1.0} 542 * if (lowering FLR) { 543 * FRC tmpA.x, src.x 544 * SUB tmpA.x, src.x, tmpA.x 545 * } else { 546 * FLR tmpA.x, src.x 547 * } 548 * EX2 tmpA.y, src.x 549 * SUB dst.y, src.x, tmpA.x 550 * EX2 dst.x, tmpA.x 551 * MOV dst.z, tmpA.y 552 * MOV dst.w, imm{1.0} 553 */ 554#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \ 555 NINST(1)+ NINST(1) - OINST(1)) 556#define EXP_TMP 1 557static void 558transform_exp(struct tgsi_transform_context *tctx, 559 struct tgsi_full_instruction *inst) 560{ 561 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 562 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 563 struct tgsi_full_src_register *src = &inst->Src[0]; 564 struct tgsi_full_instruction new_inst; 565 566 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { 567 if (ctx->config->lower_FLR) { 568 /* FRC tmpA.x, src.x */ 569 new_inst = tgsi_default_full_instruction(); 570 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 571 new_inst.Instruction.NumDstRegs = 1; 572 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 573 new_inst.Instruction.NumSrcRegs = 1; 574 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 575 tctx->emit_instruction(tctx, &new_inst); 576 577 /* SUB tmpA.x, src.x, tmpA.x */ 578 new_inst = tgsi_default_full_instruction(); 579 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 580 new_inst.Instruction.NumDstRegs = 1; 581 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 582 new_inst.Instruction.NumSrcRegs = 2; 583 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 584 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 585 new_inst.Src[1].Register.Negate = 1; 586 tctx->emit_instruction(tctx, &new_inst); 587 } else { 588 /* FLR tmpA.x, src.x */ 589 new_inst = tgsi_default_full_instruction(); 590 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 591 new_inst.Instruction.NumDstRegs = 1; 592 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 593 new_inst.Instruction.NumSrcRegs = 1; 594 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 595 tctx->emit_instruction(tctx, &new_inst); 596 } 597 } 598 599 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 600 /* EX2 tmpA.y, src.x */ 601 new_inst = tgsi_default_full_instruction(); 602 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 603 new_inst.Instruction.NumDstRegs = 1; 604 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 605 new_inst.Instruction.NumSrcRegs = 1; 606 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 607 tctx->emit_instruction(tctx, &new_inst); 608 } 609 610 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 611 /* SUB dst.y, src.x, tmpA.x */ 612 new_inst = tgsi_default_full_instruction(); 613 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 614 new_inst.Instruction.NumDstRegs = 1; 615 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 616 new_inst.Instruction.NumSrcRegs = 2; 617 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 618 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 619 new_inst.Src[1].Register.Negate = 1; 620 tctx->emit_instruction(tctx, &new_inst); 621 } 622 623 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 624 /* EX2 dst.x, tmpA.x */ 625 new_inst = tgsi_default_full_instruction(); 626 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 627 new_inst.Instruction.NumDstRegs = 1; 628 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); 629 new_inst.Instruction.NumSrcRegs = 1; 630 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 631 tctx->emit_instruction(tctx, &new_inst); 632 } 633 634 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 635 /* MOV dst.z, tmpA.y */ 636 new_inst = tgsi_default_full_instruction(); 637 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 638 new_inst.Instruction.NumDstRegs = 1; 639 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z); 640 new_inst.Instruction.NumSrcRegs = 1; 641 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _)); 642 tctx->emit_instruction(tctx, &new_inst); 643 } 644 645 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 646 /* MOV dst.w, imm{1.0} */ 647 new_inst = tgsi_default_full_instruction(); 648 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 649 new_inst.Instruction.NumDstRegs = 1; 650 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 651 new_inst.Instruction.NumSrcRegs = 1; 652 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); 653 tctx->emit_instruction(tctx, &new_inst); 654 } 655} 656 657/* LOG - Approximate Logarithm Base 2 658 * dst.x = \lfloor\log_2{|src.x|}\rfloor 659 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} 660 * dst.z = \log_2{|src.x|} 661 * dst.w = 1.0 662 * 663 * ; needs: 1 tmp, imm{1.0} 664 * LG2 tmpA.x, |src.x| 665 * if (lowering FLR) { 666 * FRC tmpA.y, tmpA.x 667 * SUB tmpA.y, tmpA.x, tmpA.y 668 * } else { 669 * FLR tmpA.y, tmpA.x 670 * } 671 * EX2 tmpA.z, tmpA.y 672 * RCP tmpA.z, tmpA.z 673 * MUL dst.y, |src.x|, tmpA.z 674 * MOV dst.xz, tmpA.yx 675 * MOV dst.w, imm{1.0} 676 */ 677#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \ 678 NINST(2) + NINST(1) + NINST(1) - OINST(1)) 679#define LOG_TMP 1 680static void 681transform_log(struct tgsi_transform_context *tctx, 682 struct tgsi_full_instruction *inst) 683{ 684 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 685 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 686 struct tgsi_full_src_register *src = &inst->Src[0]; 687 struct tgsi_full_instruction new_inst; 688 689 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) { 690 /* LG2 tmpA.x, |src.x| */ 691 new_inst = tgsi_default_full_instruction(); 692 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 693 new_inst.Instruction.NumDstRegs = 1; 694 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 695 new_inst.Instruction.NumSrcRegs = 1; 696 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 697 new_inst.Src[0].Register.Absolute = true; 698 tctx->emit_instruction(tctx, &new_inst); 699 } 700 701 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { 702 if (ctx->config->lower_FLR) { 703 /* FRC tmpA.y, tmpA.x */ 704 new_inst = tgsi_default_full_instruction(); 705 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 706 new_inst.Instruction.NumDstRegs = 1; 707 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 708 new_inst.Instruction.NumSrcRegs = 1; 709 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 710 tctx->emit_instruction(tctx, &new_inst); 711 712 /* SUB tmpA.y, tmpA.x, tmpA.y */ 713 new_inst = tgsi_default_full_instruction(); 714 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 715 new_inst.Instruction.NumDstRegs = 1; 716 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 717 new_inst.Instruction.NumSrcRegs = 2; 718 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 719 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 720 new_inst.Src[1].Register.Negate = 1; 721 tctx->emit_instruction(tctx, &new_inst); 722 } else { 723 /* FLR tmpA.y, tmpA.x */ 724 new_inst = tgsi_default_full_instruction(); 725 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 726 new_inst.Instruction.NumDstRegs = 1; 727 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 728 new_inst.Instruction.NumSrcRegs = 1; 729 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 730 tctx->emit_instruction(tctx, &new_inst); 731 } 732 } 733 734 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 735 /* EX2 tmpA.z, tmpA.y */ 736 new_inst = tgsi_default_full_instruction(); 737 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 738 new_inst.Instruction.NumDstRegs = 1; 739 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 740 new_inst.Instruction.NumSrcRegs = 1; 741 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 742 tctx->emit_instruction(tctx, &new_inst); 743 744 /* RCP tmpA.z, tmpA.z */ 745 new_inst = tgsi_default_full_instruction(); 746 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP; 747 new_inst.Instruction.NumDstRegs = 1; 748 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 749 new_inst.Instruction.NumSrcRegs = 1; 750 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _)); 751 tctx->emit_instruction(tctx, &new_inst); 752 753 /* MUL dst.y, |src.x|, tmpA.z */ 754 new_inst = tgsi_default_full_instruction(); 755 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 756 new_inst.Instruction.NumDstRegs = 1; 757 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 758 new_inst.Instruction.NumSrcRegs = 2; 759 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 760 new_inst.Src[0].Register.Absolute = true; 761 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _)); 762 tctx->emit_instruction(tctx, &new_inst); 763 } 764 765 if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) { 766 /* MOV dst.xz, tmpA.yx */ 767 new_inst = tgsi_default_full_instruction(); 768 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 769 new_inst.Instruction.NumDstRegs = 1; 770 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ); 771 new_inst.Instruction.NumSrcRegs = 1; 772 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _)); 773 tctx->emit_instruction(tctx, &new_inst); 774 } 775 776 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 777 /* MOV dst.w, imm{1.0} */ 778 new_inst = tgsi_default_full_instruction(); 779 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 780 new_inst.Instruction.NumDstRegs = 1; 781 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 782 new_inst.Instruction.NumSrcRegs = 1; 783 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); 784 tctx->emit_instruction(tctx, &new_inst); 785 } 786} 787 788/* DP4 - 4-component Dot Product 789 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w 790 * 791 * DP3 - 3-component Dot Product 792 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z 793 * 794 * DP2 - 2-component Dot Product 795 * dst = src0.x \times src1.x + src0.y \times src1.y 796 * 797 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar 798 * operations, which is what you'd prefer for a ISA that is natively 799 * scalar. Probably a native vector ISA would at least already have 800 * DP4/DP3 instructions, but perhaps there is room for an alternative 801 * translation for DP2 using vector instructions. 802 * 803 * ; needs: 1 tmp 804 * MUL tmpA.x, src0.x, src1.x 805 * MAD tmpA.x, src0.y, src1.y, tmpA.x 806 * if (DP3 || DP4) { 807 * MAD tmpA.x, src0.z, src1.z, tmpA.x 808 * if (DP4) { 809 * MAD tmpA.x, src0.w, src1.w, tmpA.x 810 * } 811 * } 812 * ; fixup last instruction to replicate into dst 813 */ 814#define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2)) 815#define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2)) 816#define DP2_GROW (NINST(2) + NINST(3) - OINST(2)) 817#define DOTP_TMP 1 818static void 819transform_dotp(struct tgsi_transform_context *tctx, 820 struct tgsi_full_instruction *inst) 821{ 822 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 823 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 824 struct tgsi_full_src_register *src0 = &inst->Src[0]; 825 struct tgsi_full_src_register *src1 = &inst->Src[1]; 826 struct tgsi_full_instruction new_inst; 827 enum tgsi_opcode opcode = inst->Instruction.Opcode; 828 829 /* NOTE: any potential last instruction must replicate src on all 830 * components (since it could be re-written to write to final dst) 831 */ 832 833 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 834 /* MUL tmpA.x, src0.x, src1.x */ 835 new_inst = tgsi_default_full_instruction(); 836 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 837 new_inst.Instruction.NumDstRegs = 1; 838 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 839 new_inst.Instruction.NumSrcRegs = 2; 840 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _)); 841 reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _)); 842 tctx->emit_instruction(tctx, &new_inst); 843 844 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */ 845 new_inst = tgsi_default_full_instruction(); 846 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 847 new_inst.Instruction.NumDstRegs = 1; 848 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 849 new_inst.Instruction.NumSrcRegs = 3; 850 reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y)); 851 reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y)); 852 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 853 854 if ((opcode == TGSI_OPCODE_DP3) || 855 (opcode == TGSI_OPCODE_DP4)) { 856 tctx->emit_instruction(tctx, &new_inst); 857 858 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */ 859 new_inst = tgsi_default_full_instruction(); 860 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 861 new_inst.Instruction.NumDstRegs = 1; 862 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 863 new_inst.Instruction.NumSrcRegs = 3; 864 reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z)); 865 reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z)); 866 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 867 868 if (opcode == TGSI_OPCODE_DP4) { 869 tctx->emit_instruction(tctx, &new_inst); 870 871 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */ 872 new_inst = tgsi_default_full_instruction(); 873 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 874 new_inst.Instruction.NumDstRegs = 1; 875 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 876 new_inst.Instruction.NumSrcRegs = 3; 877 reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W)); 878 reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W)); 879 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 880 } 881 } 882 883 /* fixup last instruction to write to dst: */ 884 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 885 886 tctx->emit_instruction(tctx, &new_inst); 887 } 888} 889 890/* FLR - floor, CEIL - ceil 891 * ; needs: 1 tmp 892 * if (CEIL) { 893 * FRC tmpA, -src 894 * ADD dst, src, tmpA 895 * } else { 896 * FRC tmpA, src 897 * SUB dst, src, tmpA 898 * } 899 */ 900#define FLR_GROW (NINST(1) + NINST(2) - OINST(1)) 901#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1)) 902#define FLR_TMP 1 903#define CEIL_TMP 1 904static void 905transform_flr_ceil(struct tgsi_transform_context *tctx, 906 struct tgsi_full_instruction *inst) 907{ 908 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 909 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 910 struct tgsi_full_src_register *src0 = &inst->Src[0]; 911 struct tgsi_full_instruction new_inst; 912 enum tgsi_opcode opcode = inst->Instruction.Opcode; 913 914 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 915 /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */ 916 new_inst = tgsi_default_full_instruction(); 917 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 918 new_inst.Instruction.NumDstRegs = 1; 919 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 920 new_inst.Instruction.NumSrcRegs = 1; 921 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 922 923 if (opcode == TGSI_OPCODE_CEIL) 924 new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate; 925 tctx->emit_instruction(tctx, &new_inst); 926 927 /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */ 928 new_inst = tgsi_default_full_instruction(); 929 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 930 new_inst.Instruction.NumDstRegs = 1; 931 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 932 new_inst.Instruction.NumSrcRegs = 2; 933 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 934 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 935 if (opcode == TGSI_OPCODE_FLR) 936 new_inst.Src[1].Register.Negate = 1; 937 tctx->emit_instruction(tctx, &new_inst); 938 } 939} 940 941/* TRUNC - truncate off fractional part 942 * dst.x = trunc(src.x) 943 * dst.y = trunc(src.y) 944 * dst.z = trunc(src.z) 945 * dst.w = trunc(src.w) 946 * 947 * ; needs: 1 tmp 948 * if (lower FLR) { 949 * FRC tmpA, |src| 950 * SUB tmpA, |src|, tmpA 951 * } else { 952 * FLR tmpA, |src| 953 * } 954 * CMP dst, src, -tmpA, tmpA 955 */ 956#define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1)) 957#define TRUNC_TMP 1 958static void 959transform_trunc(struct tgsi_transform_context *tctx, 960 struct tgsi_full_instruction *inst) 961{ 962 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 963 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 964 struct tgsi_full_src_register *src0 = &inst->Src[0]; 965 struct tgsi_full_instruction new_inst; 966 967 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 968 if (ctx->config->lower_FLR) { 969 new_inst = tgsi_default_full_instruction(); 970 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 971 new_inst.Instruction.NumDstRegs = 1; 972 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 973 new_inst.Instruction.NumSrcRegs = 1; 974 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 975 new_inst.Src[0].Register.Absolute = true; 976 new_inst.Src[0].Register.Negate = false; 977 tctx->emit_instruction(tctx, &new_inst); 978 979 new_inst = tgsi_default_full_instruction(); 980 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 981 new_inst.Instruction.NumDstRegs = 1; 982 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 983 new_inst.Instruction.NumSrcRegs = 2; 984 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 985 new_inst.Src[0].Register.Absolute = true; 986 new_inst.Src[0].Register.Negate = false; 987 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 988 new_inst.Src[1].Register.Negate = 1; 989 tctx->emit_instruction(tctx, &new_inst); 990 } else { 991 new_inst = tgsi_default_full_instruction(); 992 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 993 new_inst.Instruction.NumDstRegs = 1; 994 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 995 new_inst.Instruction.NumSrcRegs = 1; 996 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 997 new_inst.Src[0].Register.Absolute = true; 998 new_inst.Src[0].Register.Negate = false; 999 tctx->emit_instruction(tctx, &new_inst); 1000 } 1001 1002 new_inst = tgsi_default_full_instruction(); 1003 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 1004 new_inst.Instruction.NumDstRegs = 1; 1005 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 1006 new_inst.Instruction.NumSrcRegs = 3; 1007 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 1008 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1009 new_inst.Src[1].Register.Negate = true; 1010 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1011 tctx->emit_instruction(tctx, &new_inst); 1012 } 1013} 1014 1015/* Inserts a MOV_SAT for the needed components of tex coord. Note that 1016 * in the case of TXP, the clamping must happen *after* projection, so 1017 * we need to lower TXP to TEX. 1018 * 1019 * MOV tmpA, src0 1020 * if (opc == TXP) { 1021 * ; do perspective division manually before clamping: 1022 * RCP tmpB, tmpA.w 1023 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx 1024 * opc = TEX; 1025 * } 1026 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords 1027 * <opc> dst, tmpA, ... 1028 */ 1029#define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1)) 1030#define SAMP_TMP 2 1031static int 1032transform_samp(struct tgsi_transform_context *tctx, 1033 struct tgsi_full_instruction *inst) 1034{ 1035 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1036 struct tgsi_full_src_register *coord = &inst->Src[0]; 1037 struct tgsi_full_src_register *samp; 1038 struct tgsi_full_instruction new_inst; 1039 /* mask is clamped coords, pmask is all coords (for projection): */ 1040 unsigned mask = 0, pmask = 0, smask; 1041 unsigned tex = inst->Texture.Texture; 1042 enum tgsi_opcode opcode = inst->Instruction.Opcode; 1043 bool lower_txp = (opcode == TGSI_OPCODE_TXP) && 1044 (ctx->config->lower_TXP & (1 << tex)); 1045 1046 if (opcode == TGSI_OPCODE_TXB2) { 1047 samp = &inst->Src[2]; 1048 } else { 1049 samp = &inst->Src[1]; 1050 } 1051 1052 /* convert sampler # to bitmask to test: */ 1053 smask = 1 << samp->Register.Index; 1054 1055 /* check if we actually need to lower this one: */ 1056 if (!(ctx->saturate & smask) && !lower_txp) 1057 return -1; 1058 1059 /* figure out which coordinates need saturating: 1060 * - RECT textures should not get saturated 1061 * - array index coords should not get saturated 1062 */ 1063 switch (tex) { 1064 case TGSI_TEXTURE_3D: 1065 case TGSI_TEXTURE_CUBE: 1066 case TGSI_TEXTURE_CUBE_ARRAY: 1067 case TGSI_TEXTURE_SHADOWCUBE: 1068 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 1069 if (ctx->config->saturate_r & smask) 1070 mask |= TGSI_WRITEMASK_Z; 1071 pmask |= TGSI_WRITEMASK_Z; 1072 FALLTHROUGH; 1073 1074 case TGSI_TEXTURE_2D: 1075 case TGSI_TEXTURE_2D_ARRAY: 1076 case TGSI_TEXTURE_SHADOW2D: 1077 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1078 case TGSI_TEXTURE_2D_MSAA: 1079 case TGSI_TEXTURE_2D_ARRAY_MSAA: 1080 if (ctx->config->saturate_t & smask) 1081 mask |= TGSI_WRITEMASK_Y; 1082 pmask |= TGSI_WRITEMASK_Y; 1083 FALLTHROUGH; 1084 1085 case TGSI_TEXTURE_1D: 1086 case TGSI_TEXTURE_1D_ARRAY: 1087 case TGSI_TEXTURE_SHADOW1D: 1088 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1089 if (ctx->config->saturate_s & smask) 1090 mask |= TGSI_WRITEMASK_X; 1091 pmask |= TGSI_WRITEMASK_X; 1092 break; 1093 1094 case TGSI_TEXTURE_RECT: 1095 case TGSI_TEXTURE_SHADOWRECT: 1096 /* we don't saturate, but in case of lower_txp we 1097 * still need to do the perspective divide: 1098 */ 1099 pmask = TGSI_WRITEMASK_XY; 1100 break; 1101 } 1102 1103 /* sanity check.. driver could be asking to saturate a non- 1104 * existent coordinate component: 1105 */ 1106 if (!mask && !lower_txp) 1107 return -1; 1108 1109 /* MOV tmpA, src0 */ 1110 create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0); 1111 1112 /* This is a bit sad.. we need to clamp *after* the coords 1113 * are projected, which means lowering TXP to TEX and doing 1114 * the projection ourself. But since I haven't figured out 1115 * how to make the lowering code deliver an electric shock 1116 * to anyone using GL_CLAMP, we must do this instead: 1117 */ 1118 if (opcode == TGSI_OPCODE_TXP) { 1119 /* RCP tmpB.x tmpA.w */ 1120 new_inst = tgsi_default_full_instruction(); 1121 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP; 1122 new_inst.Instruction.NumDstRegs = 1; 1123 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); 1124 new_inst.Instruction.NumSrcRegs = 1; 1125 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _)); 1126 tctx->emit_instruction(tctx, &new_inst); 1127 1128 /* MUL tmpA.mask, tmpA, tmpB.xxxx */ 1129 new_inst = tgsi_default_full_instruction(); 1130 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 1131 new_inst.Instruction.NumDstRegs = 1; 1132 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask); 1133 new_inst.Instruction.NumSrcRegs = 2; 1134 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1135 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X)); 1136 tctx->emit_instruction(tctx, &new_inst); 1137 1138 opcode = TGSI_OPCODE_TEX; 1139 } 1140 1141 /* MOV_SAT tmpA.<mask>, tmpA */ 1142 if (mask) { 1143 create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1); 1144 } 1145 1146 /* modify the texture samp instruction to take fixed up coord: */ 1147 new_inst = *inst; 1148 new_inst.Instruction.Opcode = opcode; 1149 new_inst.Src[0] = ctx->tmp[A].src; 1150 tctx->emit_instruction(tctx, &new_inst); 1151 1152 return 0; 1153} 1154 1155/* Two-sided color emulation: 1156 * For each COLOR input, create a corresponding BCOLOR input, plus 1157 * CMP instruction to select front or back color based on FACE 1158 */ 1159#define TWOSIDE_GROW(n) ( \ 1160 2 + /* FACE */ \ 1161 ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\ 1162 ((n) * 1) + /* TEMP[] */ \ 1163 ((n) * NINST(3)) /* CMP instr */ \ 1164 ) 1165 1166static void 1167emit_twoside(struct tgsi_transform_context *tctx) 1168{ 1169 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1170 struct tgsi_shader_info *info = ctx->info; 1171 struct tgsi_full_declaration decl; 1172 struct tgsi_full_instruction new_inst; 1173 unsigned inbase, tmpbase; 1174 unsigned i; 1175 1176 inbase = info->file_max[TGSI_FILE_INPUT] + 1; 1177 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; 1178 1179 /* additional inputs for BCOLOR's */ 1180 for (i = 0; i < ctx->two_side_colors; i++) { 1181 unsigned in_idx = ctx->two_side_idx[i]; 1182 decl = tgsi_default_full_declaration(); 1183 decl.Declaration.File = TGSI_FILE_INPUT; 1184 decl.Declaration.Semantic = true; 1185 decl.Range.First = decl.Range.Last = inbase + i; 1186 decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR; 1187 decl.Semantic.Index = info->input_semantic_index[in_idx]; 1188 decl.Declaration.Interpolate = true; 1189 decl.Interp.Interpolate = info->input_interpolate[in_idx]; 1190 decl.Interp.Location = info->input_interpolate_loc[in_idx]; 1191 tctx->emit_declaration(tctx, &decl); 1192 } 1193 1194 /* additional input for FACE */ 1195 if (ctx->two_side_colors && (ctx->face_idx == -1)) { 1196 decl = tgsi_default_full_declaration(); 1197 decl.Declaration.File = TGSI_FILE_INPUT; 1198 decl.Declaration.Semantic = true; 1199 decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors; 1200 decl.Semantic.Name = TGSI_SEMANTIC_FACE; 1201 decl.Semantic.Index = 0; 1202 tctx->emit_declaration(tctx, &decl); 1203 1204 ctx->face_idx = decl.Range.First; 1205 } 1206 1207 /* additional temps for COLOR/BCOLOR selection: */ 1208 for (i = 0; i < ctx->two_side_colors; i++) { 1209 decl = tgsi_default_full_declaration(); 1210 decl.Declaration.File = TGSI_FILE_TEMPORARY; 1211 decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i; 1212 tctx->emit_declaration(tctx, &decl); 1213 } 1214 1215 /* and finally additional instructions to select COLOR/BCOLOR: */ 1216 for (i = 0; i < ctx->two_side_colors; i++) { 1217 new_inst = tgsi_default_full_instruction(); 1218 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 1219 1220 new_inst.Instruction.NumDstRegs = 1; 1221 new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 1222 new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i; 1223 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 1224 1225 new_inst.Instruction.NumSrcRegs = 3; 1226 new_inst.Src[0].Register.File = TGSI_FILE_INPUT; 1227 new_inst.Src[0].Register.Index = ctx->face_idx; 1228 new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; 1229 new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; 1230 new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; 1231 new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X; 1232 new_inst.Src[1].Register.File = TGSI_FILE_INPUT; 1233 new_inst.Src[1].Register.Index = inbase + i; 1234 new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X; 1235 new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y; 1236 new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z; 1237 new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; 1238 new_inst.Src[2].Register.File = TGSI_FILE_INPUT; 1239 new_inst.Src[2].Register.Index = ctx->two_side_idx[i]; 1240 new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X; 1241 new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y; 1242 new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z; 1243 new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W; 1244 1245 tctx->emit_instruction(tctx, &new_inst); 1246 } 1247} 1248 1249static void 1250emit_decls(struct tgsi_transform_context *tctx) 1251{ 1252 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1253 struct tgsi_shader_info *info = ctx->info; 1254 struct tgsi_full_declaration decl; 1255 struct tgsi_full_immediate immed; 1256 unsigned tmpbase; 1257 unsigned i; 1258 1259 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; 1260 1261 ctx->color_base = tmpbase + ctx->numtmp; 1262 1263 /* declare immediate: */ 1264 immed = tgsi_default_full_immediate(); 1265 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ 1266 immed.u[0].Float = 0.0; 1267 immed.u[1].Float = 1.0; 1268 immed.u[2].Float = 128.0; 1269 immed.u[3].Float = 0.0; 1270 tctx->emit_immediate(tctx, &immed); 1271 1272 ctx->imm.Register.File = TGSI_FILE_IMMEDIATE; 1273 ctx->imm.Register.Index = info->immediate_count; 1274 ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X; 1275 ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y; 1276 ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1277 ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W; 1278 1279 /* declare temp regs: */ 1280 for (i = 0; i < ctx->numtmp; i++) { 1281 decl = tgsi_default_full_declaration(); 1282 decl.Declaration.File = TGSI_FILE_TEMPORARY; 1283 decl.Range.First = decl.Range.Last = tmpbase + i; 1284 tctx->emit_declaration(tctx, &decl); 1285 1286 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; 1287 ctx->tmp[i].src.Register.Index = tmpbase + i; 1288 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; 1289 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; 1290 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1291 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; 1292 1293 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; 1294 ctx->tmp[i].dst.Register.Index = tmpbase + i; 1295 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; 1296 } 1297 1298 if (ctx->two_side_colors) 1299 emit_twoside(tctx); 1300} 1301 1302static void 1303rename_color_inputs(struct tgsi_lowering_context *ctx, 1304 struct tgsi_full_instruction *inst) 1305{ 1306 unsigned i, j; 1307 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1308 struct tgsi_src_register *src = &inst->Src[i].Register; 1309 if (src->File == TGSI_FILE_INPUT) { 1310 for (j = 0; j < ctx->two_side_colors; j++) { 1311 if (src->Index == (int)ctx->two_side_idx[j]) { 1312 src->File = TGSI_FILE_TEMPORARY; 1313 src->Index = ctx->color_base + j; 1314 break; 1315 } 1316 } 1317 } 1318 } 1319 1320} 1321 1322static void 1323transform_instr(struct tgsi_transform_context *tctx, 1324 struct tgsi_full_instruction *inst) 1325{ 1326 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1327 1328 if (!ctx->emitted_decls) { 1329 emit_decls(tctx); 1330 ctx->emitted_decls = 1; 1331 } 1332 1333 /* if emulating two-sided-color, we need to re-write some 1334 * src registers: 1335 */ 1336 if (ctx->two_side_colors) 1337 rename_color_inputs(ctx, inst); 1338 1339 switch (inst->Instruction.Opcode) { 1340 case TGSI_OPCODE_DST: 1341 if (!ctx->config->lower_DST) 1342 goto skip; 1343 transform_dst(tctx, inst); 1344 break; 1345 case TGSI_OPCODE_LRP: 1346 if (!ctx->config->lower_LRP) 1347 goto skip; 1348 transform_lrp(tctx, inst); 1349 break; 1350 case TGSI_OPCODE_FRC: 1351 if (!ctx->config->lower_FRC) 1352 goto skip; 1353 transform_frc(tctx, inst); 1354 break; 1355 case TGSI_OPCODE_POW: 1356 if (!ctx->config->lower_POW) 1357 goto skip; 1358 transform_pow(tctx, inst); 1359 break; 1360 case TGSI_OPCODE_LIT: 1361 if (!ctx->config->lower_LIT) 1362 goto skip; 1363 transform_lit(tctx, inst); 1364 break; 1365 case TGSI_OPCODE_EXP: 1366 if (!ctx->config->lower_EXP) 1367 goto skip; 1368 transform_exp(tctx, inst); 1369 break; 1370 case TGSI_OPCODE_LOG: 1371 if (!ctx->config->lower_LOG) 1372 goto skip; 1373 transform_log(tctx, inst); 1374 break; 1375 case TGSI_OPCODE_DP4: 1376 if (!ctx->config->lower_DP4) 1377 goto skip; 1378 transform_dotp(tctx, inst); 1379 break; 1380 case TGSI_OPCODE_DP3: 1381 if (!ctx->config->lower_DP3) 1382 goto skip; 1383 transform_dotp(tctx, inst); 1384 break; 1385 case TGSI_OPCODE_DP2: 1386 if (!ctx->config->lower_DP2) 1387 goto skip; 1388 transform_dotp(tctx, inst); 1389 break; 1390 case TGSI_OPCODE_FLR: 1391 if (!ctx->config->lower_FLR) 1392 goto skip; 1393 transform_flr_ceil(tctx, inst); 1394 break; 1395 case TGSI_OPCODE_CEIL: 1396 if (!ctx->config->lower_CEIL) 1397 goto skip; 1398 transform_flr_ceil(tctx, inst); 1399 break; 1400 case TGSI_OPCODE_TRUNC: 1401 if (!ctx->config->lower_TRUNC) 1402 goto skip; 1403 transform_trunc(tctx, inst); 1404 break; 1405 case TGSI_OPCODE_TEX: 1406 case TGSI_OPCODE_TXP: 1407 case TGSI_OPCODE_TXB: 1408 case TGSI_OPCODE_TXB2: 1409 case TGSI_OPCODE_TXL: 1410 if (transform_samp(tctx, inst)) 1411 goto skip; 1412 break; 1413 default: 1414 skip: 1415 tctx->emit_instruction(tctx, inst); 1416 break; 1417 } 1418} 1419 1420/* returns NULL if no lowering required, else returns the new 1421 * tokens (which caller is required to free()). In either case 1422 * returns the current info. 1423 */ 1424const struct tgsi_token * 1425tgsi_transform_lowering(const struct tgsi_lowering_config *config, 1426 const struct tgsi_token *tokens, 1427 struct tgsi_shader_info *info) 1428{ 1429 struct tgsi_lowering_context ctx; 1430 struct tgsi_token *newtoks; 1431 int newlen, numtmp; 1432 1433 /* sanity check in case limit is ever increased: */ 1434 STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS); 1435 1436 /* sanity check the lowering */ 1437 assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL))); 1438 assert(!(config->lower_FRC && config->lower_TRUNC)); 1439 1440 memset(&ctx, 0, sizeof(ctx)); 1441 ctx.base.transform_instruction = transform_instr; 1442 ctx.info = info; 1443 ctx.config = config; 1444 1445 tgsi_scan_shader(tokens, info); 1446 1447 /* if we are adding fragment shader support to emulate two-sided 1448 * color, then figure out the number of additional inputs we need 1449 * to create for BCOLOR's.. 1450 */ 1451 if ((info->processor == PIPE_SHADER_FRAGMENT) && 1452 config->color_two_side) { 1453 int i; 1454 ctx.face_idx = -1; 1455 for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) { 1456 if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR) 1457 ctx.two_side_idx[ctx.two_side_colors++] = i; 1458 if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE) 1459 ctx.face_idx = i; 1460 } 1461 } 1462 1463 ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t; 1464 1465#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0) 1466 /* if there are no instructions to lower, then we are done: */ 1467 if (!(OPCS(DST) || 1468 OPCS(LRP) || 1469 OPCS(FRC) || 1470 OPCS(POW) || 1471 OPCS(LIT) || 1472 OPCS(EXP) || 1473 OPCS(LOG) || 1474 OPCS(DP4) || 1475 OPCS(DP3) || 1476 OPCS(DP2) || 1477 OPCS(FLR) || 1478 OPCS(CEIL) || 1479 OPCS(TRUNC) || 1480 OPCS(TXP) || 1481 ctx.two_side_colors || 1482 ctx.saturate)) 1483 return NULL; 1484 1485#if 0 /* debug */ 1486 _debug_printf("BEFORE:"); 1487 tgsi_dump(tokens, 0); 1488#endif 1489 1490 numtmp = 0; 1491 newlen = tgsi_num_tokens(tokens); 1492 if (OPCS(DST)) { 1493 newlen += DST_GROW * OPCS(DST); 1494 numtmp = MAX2(numtmp, DST_TMP); 1495 } 1496 if (OPCS(LRP)) { 1497 newlen += LRP_GROW * OPCS(LRP); 1498 numtmp = MAX2(numtmp, LRP_TMP); 1499 } 1500 if (OPCS(FRC)) { 1501 newlen += FRC_GROW * OPCS(FRC); 1502 numtmp = MAX2(numtmp, FRC_TMP); 1503 } 1504 if (OPCS(POW)) { 1505 newlen += POW_GROW * OPCS(POW); 1506 numtmp = MAX2(numtmp, POW_TMP); 1507 } 1508 if (OPCS(LIT)) { 1509 newlen += LIT_GROW * OPCS(LIT); 1510 numtmp = MAX2(numtmp, LIT_TMP); 1511 } 1512 if (OPCS(EXP)) { 1513 newlen += EXP_GROW * OPCS(EXP); 1514 numtmp = MAX2(numtmp, EXP_TMP); 1515 } 1516 if (OPCS(LOG)) { 1517 newlen += LOG_GROW * OPCS(LOG); 1518 numtmp = MAX2(numtmp, LOG_TMP); 1519 } 1520 if (OPCS(DP4)) { 1521 newlen += DP4_GROW * OPCS(DP4); 1522 numtmp = MAX2(numtmp, DOTP_TMP); 1523 } 1524 if (OPCS(DP3)) { 1525 newlen += DP3_GROW * OPCS(DP3); 1526 numtmp = MAX2(numtmp, DOTP_TMP); 1527 } 1528 if (OPCS(DP2)) { 1529 newlen += DP2_GROW * OPCS(DP2); 1530 numtmp = MAX2(numtmp, DOTP_TMP); 1531 } 1532 if (OPCS(FLR)) { 1533 newlen += FLR_GROW * OPCS(FLR); 1534 numtmp = MAX2(numtmp, FLR_TMP); 1535 } 1536 if (OPCS(CEIL)) { 1537 newlen += CEIL_GROW * OPCS(CEIL); 1538 numtmp = MAX2(numtmp, CEIL_TMP); 1539 } 1540 if (OPCS(TRUNC)) { 1541 newlen += TRUNC_GROW * OPCS(TRUNC); 1542 numtmp = MAX2(numtmp, TRUNC_TMP); 1543 } 1544 if (ctx.saturate || config->lower_TXP) { 1545 int n = 0; 1546 1547 if (ctx.saturate) { 1548 n = info->opcode_count[TGSI_OPCODE_TEX] + 1549 info->opcode_count[TGSI_OPCODE_TXP] + 1550 info->opcode_count[TGSI_OPCODE_TXB] + 1551 info->opcode_count[TGSI_OPCODE_TXB2] + 1552 info->opcode_count[TGSI_OPCODE_TXL]; 1553 } else if (config->lower_TXP) { 1554 n = info->opcode_count[TGSI_OPCODE_TXP]; 1555 } 1556 1557 newlen += SAMP_GROW * n; 1558 numtmp = MAX2(numtmp, SAMP_TMP); 1559 } 1560 1561 /* specifically don't include two_side_colors temps in the count: */ 1562 ctx.numtmp = numtmp; 1563 1564 if (ctx.two_side_colors) { 1565 newlen += TWOSIDE_GROW(ctx.two_side_colors); 1566 /* note: we permanently consume temp regs, re-writing references 1567 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP 1568 * instruction that selects which varying to use): 1569 */ 1570 numtmp += ctx.two_side_colors; 1571 } 1572 1573 newlen += 2 * numtmp; 1574 newlen += 5; /* immediate */ 1575 1576 newtoks = tgsi_alloc_tokens(newlen); 1577 if (!newtoks) 1578 return NULL; 1579 1580 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); 1581 1582 tgsi_scan_shader(newtoks, info); 1583 1584#if 0 /* debug */ 1585 _debug_printf("AFTER:"); 1586 tgsi_dump(newtoks, 0); 1587#endif 1588 1589 return newtoks; 1590} 1591