1/* 2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com> 3 * Copyright 2013 Christoph Bumiller 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */ 23 24#include "nine_shader.h" 25 26#include "device9.h" 27#include "nine_debug.h" 28#include "nine_state.h" 29#include "vertexdeclaration9.h" 30 31#include "util/macros.h" 32#include "util/u_memory.h" 33#include "util/u_inlines.h" 34#include "pipe/p_shader_tokens.h" 35#include "tgsi/tgsi_ureg.h" 36#include "tgsi/tgsi_dump.h" 37 38#define DBG_CHANNEL DBG_SHADER 39 40#define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args) 41 42 43struct shader_translator; 44 45typedef HRESULT (*translate_instruction_func)(struct shader_translator *); 46 47static inline const char *d3dsio_to_string(unsigned opcode); 48 49 50#define NINED3D_SM1_VS 0xfffe 51#define NINED3D_SM1_PS 0xffff 52 53#define NINE_MAX_COND_DEPTH 64 54#define NINE_MAX_LOOP_DEPTH 64 55 56#define NINED3DSP_END 0x0000ffff 57 58#define NINED3DSPTYPE_FLOAT4 0 59#define NINED3DSPTYPE_INT4 1 60#define NINED3DSPTYPE_BOOL 2 61 62#define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1) 63 64#define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL 65#define NINED3DSP_WRITEMASK_SHIFT 16 66 67#define NINED3DSHADER_INST_PREDICATED (1 << 28) 68 69#define NINED3DSHADER_REL_OP_GT 1 70#define NINED3DSHADER_REL_OP_EQ 2 71#define NINED3DSHADER_REL_OP_GE 3 72#define NINED3DSHADER_REL_OP_LT 4 73#define NINED3DSHADER_REL_OP_NE 5 74#define NINED3DSHADER_REL_OP_LE 6 75 76#define NINED3DSIO_OPCODE_FLAGS_SHIFT 16 77#define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT) 78 79#define NINED3DSI_TEXLD_PROJECT 0x1 80#define NINED3DSI_TEXLD_BIAS 0x2 81 82#define NINED3DSP_WRITEMASK_0 0x1 83#define NINED3DSP_WRITEMASK_1 0x2 84#define NINED3DSP_WRITEMASK_2 0x4 85#define NINED3DSP_WRITEMASK_3 0x8 86#define NINED3DSP_WRITEMASK_ALL 0xf 87 88#define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6)) 89 90#define NINE_SWIZZLE4(x,y,z,w) \ 91 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w 92 93#define NINE_APPLY_SWIZZLE(src, s) \ 94 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s)) 95 96#define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT) 97#define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT) 98#define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT) 99 100/* 101 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4 102 * BIAS <= PS 1.4 (x-0.5) 103 * BIASNEG <= PS 1.4 (-(x-0.5)) 104 * SIGN <= PS 1.4 (2(x-0.5)) 105 * SIGNNEG <= PS 1.4 (-2(x-0.5)) 106 * COMP <= PS 1.4 (1-x) 107 * X2 = PS 1.4 (2x) 108 * X2NEG = PS 1.4 (-2x) 109 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11 110 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11 111 * ABS >= SM 3.0 (abs(x)) 112 * ABSNEG >= SM 3.0 (-abs(x)) 113 * NOT >= SM 2.0 pedication only 114 */ 115#define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT) 116#define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT) 117#define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT) 118#define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT) 119#define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT) 120#define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT) 121#define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT) 122#define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT) 123#define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT) 124#define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT) 125#define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT) 126#define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT) 127#define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT) 128#define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT) 129 130static const char *sm1_mod_str[] = 131{ 132 [NINED3DSPSM_NONE] = "", 133 [NINED3DSPSM_NEG] = "-", 134 [NINED3DSPSM_BIAS] = "bias", 135 [NINED3DSPSM_BIASNEG] = "biasneg", 136 [NINED3DSPSM_SIGN] = "sign", 137 [NINED3DSPSM_SIGNNEG] = "signneg", 138 [NINED3DSPSM_COMP] = "comp", 139 [NINED3DSPSM_X2] = "x2", 140 [NINED3DSPSM_X2NEG] = "x2neg", 141 [NINED3DSPSM_DZ] = "dz", 142 [NINED3DSPSM_DW] = "dw", 143 [NINED3DSPSM_ABS] = "abs", 144 [NINED3DSPSM_ABSNEG] = "-abs", 145 [NINED3DSPSM_NOT] = "not" 146}; 147 148static void 149sm1_dump_writemask(BYTE mask) 150{ 151 if (mask & 1) DUMP("x"); else DUMP("_"); 152 if (mask & 2) DUMP("y"); else DUMP("_"); 153 if (mask & 4) DUMP("z"); else DUMP("_"); 154 if (mask & 8) DUMP("w"); else DUMP("_"); 155} 156 157static void 158sm1_dump_swizzle(BYTE s) 159{ 160 char c[4] = { 'x', 'y', 'z', 'w' }; 161 DUMP("%c%c%c%c", 162 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]); 163} 164 165static const char sm1_file_char[] = 166{ 167 [D3DSPR_TEMP] = 'r', 168 [D3DSPR_INPUT] = 'v', 169 [D3DSPR_CONST] = 'c', 170 [D3DSPR_ADDR] = 'A', 171 [D3DSPR_RASTOUT] = 'R', 172 [D3DSPR_ATTROUT] = 'D', 173 [D3DSPR_OUTPUT] = 'o', 174 [D3DSPR_CONSTINT] = 'I', 175 [D3DSPR_COLOROUT] = 'C', 176 [D3DSPR_DEPTHOUT] = 'D', 177 [D3DSPR_SAMPLER] = 's', 178 [D3DSPR_CONST2] = 'c', 179 [D3DSPR_CONST3] = 'c', 180 [D3DSPR_CONST4] = 'c', 181 [D3DSPR_CONSTBOOL] = 'B', 182 [D3DSPR_LOOP] = 'L', 183 [D3DSPR_TEMPFLOAT16] = 'h', 184 [D3DSPR_MISCTYPE] = 'M', 185 [D3DSPR_LABEL] = 'X', 186 [D3DSPR_PREDICATE] = 'p' 187}; 188 189static void 190sm1_dump_reg(BYTE file, INT index) 191{ 192 switch (file) { 193 case D3DSPR_LOOP: 194 DUMP("aL"); 195 break; 196 case D3DSPR_COLOROUT: 197 DUMP("oC%i", index); 198 break; 199 case D3DSPR_DEPTHOUT: 200 DUMP("oDepth"); 201 break; 202 case D3DSPR_RASTOUT: 203 DUMP("oRast%i", index); 204 break; 205 case D3DSPR_CONSTINT: 206 DUMP("iconst[%i]", index); 207 break; 208 case D3DSPR_CONSTBOOL: 209 DUMP("bconst[%i]", index); 210 break; 211 default: 212 DUMP("%c%i", sm1_file_char[file], index); 213 break; 214 } 215} 216 217struct sm1_src_param 218{ 219 INT idx; 220 struct sm1_src_param *rel; 221 BYTE file; 222 BYTE swizzle; 223 BYTE mod; 224 BYTE type; 225 union { 226 DWORD d[4]; 227 float f[4]; 228 int i[4]; 229 BOOL b; 230 } imm; 231}; 232static void 233sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *); 234 235struct sm1_dst_param 236{ 237 INT idx; 238 struct sm1_src_param *rel; 239 BYTE file; 240 BYTE mask; 241 BYTE mod; 242 int8_t shift; /* sint4 */ 243 BYTE type; 244}; 245 246static inline void 247assert_replicate_swizzle(const struct ureg_src *reg) 248{ 249 assert(reg->SwizzleY == reg->SwizzleX && 250 reg->SwizzleZ == reg->SwizzleX && 251 reg->SwizzleW == reg->SwizzleX); 252} 253 254static void 255sm1_dump_immediate(const struct sm1_src_param *param) 256{ 257 switch (param->type) { 258 case NINED3DSPTYPE_FLOAT4: 259 DUMP("{ %f %f %f %f }", 260 param->imm.f[0], param->imm.f[1], 261 param->imm.f[2], param->imm.f[3]); 262 break; 263 case NINED3DSPTYPE_INT4: 264 DUMP("{ %i %i %i %i }", 265 param->imm.i[0], param->imm.i[1], 266 param->imm.i[2], param->imm.i[3]); 267 break; 268 case NINED3DSPTYPE_BOOL: 269 DUMP("%s", param->imm.b ? "TRUE" : "FALSE"); 270 break; 271 default: 272 assert(0); 273 break; 274 } 275} 276 277static void 278sm1_dump_src_param(const struct sm1_src_param *param) 279{ 280 if (param->file == NINED3DSPR_IMMEDIATE) { 281 assert(!param->mod && 282 !param->rel && 283 param->swizzle == NINED3DSP_NOSWIZZLE); 284 sm1_dump_immediate(param); 285 return; 286 } 287 288 if (param->mod) 289 DUMP("%s(", sm1_mod_str[param->mod]); 290 if (param->rel) { 291 DUMP("%c[", sm1_file_char[param->file]); 292 sm1_dump_src_param(param->rel); 293 DUMP("+%i]", param->idx); 294 } else { 295 sm1_dump_reg(param->file, param->idx); 296 } 297 if (param->mod) 298 DUMP(")"); 299 if (param->swizzle != NINED3DSP_NOSWIZZLE) { 300 DUMP("."); 301 sm1_dump_swizzle(param->swizzle); 302 } 303} 304 305static void 306sm1_dump_dst_param(const struct sm1_dst_param *param) 307{ 308 if (param->mod & NINED3DSPDM_SATURATE) 309 DUMP("sat "); 310 if (param->mod & NINED3DSPDM_PARTIALP) 311 DUMP("pp "); 312 if (param->mod & NINED3DSPDM_CENTROID) 313 DUMP("centroid "); 314 if (param->shift < 0) 315 DUMP("/%u ", 1 << -param->shift); 316 if (param->shift > 0) 317 DUMP("*%u ", 1 << param->shift); 318 319 if (param->rel) { 320 DUMP("%c[", sm1_file_char[param->file]); 321 sm1_dump_src_param(param->rel); 322 DUMP("+%i]", param->idx); 323 } else { 324 sm1_dump_reg(param->file, param->idx); 325 } 326 if (param->mask != NINED3DSP_WRITEMASK_ALL) { 327 DUMP("."); 328 sm1_dump_writemask(param->mask); 329 } 330} 331 332struct sm1_semantic 333{ 334 struct sm1_dst_param reg; 335 BYTE sampler_type; 336 D3DDECLUSAGE usage; 337 BYTE usage_idx; 338}; 339 340struct sm1_op_info 341{ 342 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter 343 * should be ignored completely */ 344 unsigned sio; 345 unsigned opcode; /* TGSI_OPCODE_x */ 346 347 /* versions are still set even handler is set */ 348 struct { 349 unsigned min; 350 unsigned max; 351 } vert_version, frag_version; 352 353 /* number of regs parsed outside of special handler */ 354 unsigned ndst; 355 unsigned nsrc; 356 357 /* some instructions don't map perfectly, so use a special handler */ 358 translate_instruction_func handler; 359}; 360 361struct sm1_instruction 362{ 363 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; 364 BYTE flags; 365 BOOL coissue; 366 BOOL predicated; 367 BYTE ndst; 368 BYTE nsrc; 369 struct sm1_src_param src[4]; 370 struct sm1_src_param src_rel[4]; 371 struct sm1_src_param pred; 372 struct sm1_src_param dst_rel[1]; 373 struct sm1_dst_param dst[1]; 374 375 const struct sm1_op_info *info; 376}; 377 378static void 379sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent) 380{ 381 unsigned i; 382 383 /* no info stored for these: */ 384 if (insn->opcode == D3DSIO_DCL) 385 return; 386 for (i = 0; i < indent; ++i) 387 DUMP(" "); 388 389 if (insn->predicated) { 390 DUMP("@"); 391 sm1_dump_src_param(&insn->pred); 392 DUMP(" "); 393 } 394 DUMP("%s", d3dsio_to_string(insn->opcode)); 395 if (insn->flags) { 396 switch (insn->opcode) { 397 case D3DSIO_TEX: 398 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b"); 399 break; 400 default: 401 DUMP("_%x", insn->flags); 402 break; 403 } 404 } 405 if (insn->coissue) 406 DUMP("_co"); 407 DUMP(" "); 408 409 for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) { 410 sm1_dump_dst_param(&insn->dst[i]); 411 DUMP(" "); 412 } 413 414 for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) { 415 sm1_dump_src_param(&insn->src[i]); 416 DUMP(" "); 417 } 418 if (insn->opcode == D3DSIO_DEF || 419 insn->opcode == D3DSIO_DEFI || 420 insn->opcode == D3DSIO_DEFB) 421 sm1_dump_immediate(&insn->src[0]); 422 423 DUMP("\n"); 424} 425 426struct sm1_local_const 427{ 428 INT idx; 429 struct ureg_src reg; 430 float f[4]; /* for indirect addressing of float constants */ 431}; 432 433struct shader_translator 434{ 435 const DWORD *byte_code; 436 const DWORD *parse; 437 const DWORD *parse_next; 438 439 struct ureg_program *ureg; 440 441 /* shader version */ 442 struct { 443 BYTE major; 444 BYTE minor; 445 } version; 446 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */ 447 unsigned num_constf_allowed; 448 unsigned num_consti_allowed; 449 unsigned num_constb_allowed; 450 451 boolean native_integers; 452 boolean inline_subroutines; 453 boolean want_texcoord; 454 boolean shift_wpos; 455 boolean wpos_is_sysval; 456 boolean face_is_sysval_integer; 457 boolean mul_zero_wins; 458 unsigned texcoord_sn; 459 460 struct sm1_instruction insn; /* current instruction */ 461 462 struct { 463 struct ureg_dst *r; 464 struct ureg_dst oPos; 465 struct ureg_dst oPos_out; /* the real output when doing streamout */ 466 struct ureg_dst oFog; 467 struct ureg_dst oPts; 468 struct ureg_dst oCol[4]; 469 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS]; 470 struct ureg_dst oDepth; 471 struct ureg_src v[PIPE_MAX_SHADER_INPUTS]; 472 struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */ 473 struct ureg_src vPos; 474 struct ureg_src vFace; 475 struct ureg_src s; 476 struct ureg_dst p; 477 struct ureg_dst address; 478 struct ureg_dst a0; 479 struct ureg_dst predicate; 480 struct ureg_dst predicate_tmp; 481 struct ureg_dst predicate_dst; 482 struct ureg_dst tS[8]; /* texture stage registers */ 483 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */ 484 struct ureg_dst t[8]; /* scratch TEMPs */ 485 struct ureg_src vC[2]; /* PS color in */ 486 struct ureg_src vT[8]; /* PS texcoord in */ 487 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */ 488 } regs; 489 unsigned num_temp; /* ARRAY_SIZE(regs.r) */ 490 unsigned num_scratch; 491 unsigned loop_depth; 492 unsigned loop_depth_max; 493 unsigned cond_depth; 494 unsigned loop_labels[NINE_MAX_LOOP_DEPTH]; 495 unsigned cond_labels[NINE_MAX_COND_DEPTH]; 496 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */ 497 boolean predicated_activated; 498 499 unsigned *inst_labels; /* LABEL op */ 500 unsigned num_inst_labels; 501 502 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */ 503 504 struct sm1_local_const *lconstf; 505 unsigned num_lconstf; 506 struct sm1_local_const *lconsti; 507 unsigned num_lconsti; 508 struct sm1_local_const *lconstb; 509 unsigned num_lconstb; 510 511 boolean slots_used[NINE_MAX_CONST_ALL]; 512 unsigned *slot_map; 513 unsigned num_slots; 514 515 boolean indirect_const_access; 516 boolean failure; 517 518 struct nine_vs_output_info output_info[16]; 519 int num_outputs; 520 521 struct nine_shader_info *info; 522 523 int16_t op_info_map[D3DSIO_BREAKP + 1]; 524}; 525 526#define IS_VS (tx->processor == PIPE_SHADER_VERTEX) 527#define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT) 528 529#define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;} 530 531static void 532sm1_read_semantic(struct shader_translator *, struct sm1_semantic *); 533 534static void 535sm1_instruction_check(const struct sm1_instruction *insn) 536{ 537 if (insn->opcode == D3DSIO_CRS) 538 { 539 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3) 540 { 541 DBG("CRS.mask.w\n"); 542 } 543 } 544} 545 546static void 547nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex, 548 int mask, int output_index) 549{ 550 tx->output_info[tx->num_outputs].output_semantic = Usage; 551 tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex; 552 tx->output_info[tx->num_outputs].mask = mask; 553 tx->output_info[tx->num_outputs].output_index = output_index; 554 tx->num_outputs++; 555} 556 557static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx) 558{ 559 struct ureg_src src; 560 561 if (tx->slot_map) 562 idx = tx->slot_map[idx]; 563 /* vswp constant handling: we use two buffers 564 * to fit all the float constants. The special handling 565 * doesn't need to be elsewhere, because all the instructions 566 * accessing the constants directly are VS1, and swvp 567 * is VS >= 2 */ 568 if (tx->info->swvp_on && idx >= 4096) { 569 /* TODO: swvp rel is broken if many constants are used */ 570 src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096); 571 src = ureg_src_dimension(src, 1); 572 } else { 573 src = ureg_src_register(TGSI_FILE_CONSTANT, idx); 574 src = ureg_src_dimension(src, 0); 575 } 576 577 if (!tx->info->swvp_on) 578 tx->slots_used[idx] = TRUE; 579 if (tx->info->const_float_slots < (idx + 1)) 580 tx->info->const_float_slots = idx + 1; 581 if (tx->num_slots < (idx + 1)) 582 tx->num_slots = idx + 1; 583 584 return src; 585} 586 587static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx) 588{ 589 struct ureg_src src; 590 591 if (tx->info->swvp_on) { 592 src = ureg_src_register(TGSI_FILE_CONSTANT, idx); 593 src = ureg_src_dimension(src, 2); 594 } else { 595 unsigned slot_idx = tx->info->const_i_base + idx; 596 if (tx->slot_map) 597 slot_idx = tx->slot_map[slot_idx]; 598 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx); 599 src = ureg_src_dimension(src, 0); 600 tx->slots_used[slot_idx] = TRUE; 601 tx->info->int_slots_used[idx] = TRUE; 602 if (tx->num_slots < (slot_idx + 1)) 603 tx->num_slots = slot_idx + 1; 604 } 605 606 if (tx->info->const_int_slots < (idx + 1)) 607 tx->info->const_int_slots = idx + 1; 608 609 return src; 610} 611 612static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx) 613{ 614 struct ureg_src src; 615 616 char r = idx / 4; 617 char s = idx & 3; 618 619 if (tx->info->swvp_on) { 620 src = ureg_src_register(TGSI_FILE_CONSTANT, r); 621 src = ureg_src_dimension(src, 3); 622 } else { 623 unsigned slot_idx = tx->info->const_b_base + r; 624 if (tx->slot_map) 625 slot_idx = tx->slot_map[slot_idx]; 626 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx); 627 src = ureg_src_dimension(src, 0); 628 tx->slots_used[slot_idx] = TRUE; 629 tx->info->bool_slots_used[idx] = TRUE; 630 if (tx->num_slots < (slot_idx + 1)) 631 tx->num_slots = slot_idx + 1; 632 } 633 src = ureg_swizzle(src, s, s, s, s); 634 635 if (tx->info->const_bool_slots < (idx + 1)) 636 tx->info->const_bool_slots = idx + 1; 637 638 return src; 639} 640 641static boolean 642tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index) 643{ 644 INT i; 645 646 if (index < 0 || index >= tx->num_constf_allowed) { 647 tx->failure = TRUE; 648 return FALSE; 649 } 650 for (i = 0; i < tx->num_lconstf; ++i) { 651 if (tx->lconstf[i].idx == index) { 652 *src = tx->lconstf[i].reg; 653 return TRUE; 654 } 655 } 656 return FALSE; 657} 658static boolean 659tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index) 660{ 661 int i; 662 663 if (index < 0 || index >= tx->num_consti_allowed) { 664 tx->failure = TRUE; 665 return FALSE; 666 } 667 for (i = 0; i < tx->num_lconsti; ++i) { 668 if (tx->lconsti[i].idx == index) { 669 *src = tx->lconsti[i].reg; 670 return TRUE; 671 } 672 } 673 return FALSE; 674} 675static boolean 676tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index) 677{ 678 int i; 679 680 if (index < 0 || index >= tx->num_constb_allowed) { 681 tx->failure = TRUE; 682 return FALSE; 683 } 684 for (i = 0; i < tx->num_lconstb; ++i) { 685 if (tx->lconstb[i].idx == index) { 686 *src = tx->lconstb[i].reg; 687 return TRUE; 688 } 689 } 690 return FALSE; 691} 692 693static void 694tx_set_lconstf(struct shader_translator *tx, INT index, float f[4]) 695{ 696 unsigned n; 697 698 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed) 699 700 for (n = 0; n < tx->num_lconstf; ++n) 701 if (tx->lconstf[n].idx == index) 702 break; 703 if (n == tx->num_lconstf) { 704 if ((n % 8) == 0) { 705 tx->lconstf = REALLOC(tx->lconstf, 706 (n + 0) * sizeof(tx->lconstf[0]), 707 (n + 8) * sizeof(tx->lconstf[0])); 708 assert(tx->lconstf); 709 } 710 tx->num_lconstf++; 711 } 712 tx->lconstf[n].idx = index; 713 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]); 714 715 memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f)); 716} 717static void 718tx_set_lconsti(struct shader_translator *tx, INT index, int i[4]) 719{ 720 unsigned n; 721 722 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed) 723 724 for (n = 0; n < tx->num_lconsti; ++n) 725 if (tx->lconsti[n].idx == index) 726 break; 727 if (n == tx->num_lconsti) { 728 if ((n % 8) == 0) { 729 tx->lconsti = REALLOC(tx->lconsti, 730 (n + 0) * sizeof(tx->lconsti[0]), 731 (n + 8) * sizeof(tx->lconsti[0])); 732 assert(tx->lconsti); 733 } 734 tx->num_lconsti++; 735 } 736 737 tx->lconsti[n].idx = index; 738 tx->lconsti[n].reg = tx->native_integers ? 739 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) : 740 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]); 741} 742static void 743tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b) 744{ 745 unsigned n; 746 747 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed) 748 749 for (n = 0; n < tx->num_lconstb; ++n) 750 if (tx->lconstb[n].idx == index) 751 break; 752 if (n == tx->num_lconstb) { 753 if ((n % 8) == 0) { 754 tx->lconstb = REALLOC(tx->lconstb, 755 (n + 0) * sizeof(tx->lconstb[0]), 756 (n + 8) * sizeof(tx->lconstb[0])); 757 assert(tx->lconstb); 758 } 759 tx->num_lconstb++; 760 } 761 762 tx->lconstb[n].idx = index; 763 tx->lconstb[n].reg = tx->native_integers ? 764 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) : 765 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f); 766} 767 768static inline struct ureg_dst 769tx_scratch(struct shader_translator *tx) 770{ 771 if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) { 772 tx->failure = TRUE; 773 return tx->regs.t[0]; 774 } 775 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch])) 776 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg); 777 return tx->regs.t[tx->num_scratch++]; 778} 779 780static inline struct ureg_dst 781tx_scratch_scalar(struct shader_translator *tx) 782{ 783 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); 784} 785 786static inline struct ureg_src 787tx_src_scalar(struct ureg_dst dst) 788{ 789 struct ureg_src src = ureg_src(dst); 790 int c = ffs(dst.WriteMask) - 1; 791 if (dst.WriteMask == (1 << c)) 792 src = ureg_scalar(src, c); 793 return src; 794} 795 796static inline void 797tx_temp_alloc(struct shader_translator *tx, INT idx) 798{ 799 assert(idx >= 0); 800 if (idx >= tx->num_temp) { 801 unsigned k = tx->num_temp; 802 unsigned n = idx + 1; 803 tx->regs.r = REALLOC(tx->regs.r, 804 k * sizeof(tx->regs.r[0]), 805 n * sizeof(tx->regs.r[0])); 806 for (; k < n; ++k) 807 tx->regs.r[k] = ureg_dst_undef(); 808 tx->num_temp = n; 809 } 810 if (ureg_dst_is_undef(tx->regs.r[idx])) 811 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg); 812} 813 814static inline void 815tx_addr_alloc(struct shader_translator *tx, INT idx) 816{ 817 assert(idx == 0); 818 if (ureg_dst_is_undef(tx->regs.address)) 819 tx->regs.address = ureg_DECL_address(tx->ureg); 820 if (ureg_dst_is_undef(tx->regs.a0)) 821 tx->regs.a0 = ureg_DECL_temporary(tx->ureg); 822} 823 824/* NOTE: It's not very clear on which ps1.1-ps1.3 instructions 825 * the projection should be applied on the texture. It doesn't 826 * apply on texkill. 827 * The doc is very imprecise here (it says the projection is done 828 * before rasterization, thus in vs, which seems wrong since ps instructions 829 * are affected differently) 830 * For now we only apply to the ps TEX instruction and TEXBEM. 831 * Perhaps some other instructions would need it */ 832static inline void 833apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst, 834 struct ureg_src src, INT idx) 835{ 836 struct ureg_dst tmp; 837 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3); 838 839 /* no projection */ 840 if (dim == 1) { 841 ureg_MOV(tx->ureg, dst, src); 842 } else { 843 tmp = tx_scratch_scalar(tx); 844 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1)); 845 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src); 846 } 847} 848 849static inline void 850TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst, 851 unsigned target, struct ureg_src src0, 852 struct ureg_src src1, INT idx) 853{ 854 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3); 855 struct ureg_dst tmp; 856 boolean shadow = !!(tx->info->sampler_mask_shadow & (1 << idx)); 857 858 /* dim == 1: no projection 859 * Looks like must be disabled when it makes no 860 * sense according the texture dimensions 861 */ 862 if (dim == 1 || (dim <= target && !shadow)) { 863 ureg_TEX(tx->ureg, dst, target, src0, src1); 864 } else if (dim == 4) { 865 ureg_TXP(tx->ureg, dst, target, src0, src1); 866 } else { 867 tmp = tx_scratch(tx); 868 apply_ps1x_projection(tx, tmp, src0, idx); 869 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1); 870 } 871} 872 873static inline void 874tx_texcoord_alloc(struct shader_translator *tx, INT idx) 875{ 876 assert(IS_PS); 877 assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT)); 878 if (ureg_src_is_undef(tx->regs.vT[idx])) 879 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx, 880 TGSI_INTERPOLATE_PERSPECTIVE); 881} 882 883static inline unsigned * 884tx_bgnloop(struct shader_translator *tx) 885{ 886 tx->loop_depth++; 887 if (tx->loop_depth_max < tx->loop_depth) 888 tx->loop_depth_max = tx->loop_depth; 889 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH); 890 return &tx->loop_labels[tx->loop_depth - 1]; 891} 892 893static inline unsigned * 894tx_endloop(struct shader_translator *tx) 895{ 896 assert(tx->loop_depth); 897 tx->loop_depth--; 898 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth], 899 ureg_get_instruction_number(tx->ureg)); 900 return &tx->loop_labels[tx->loop_depth]; 901} 902 903static struct ureg_dst 904tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep) 905{ 906 const unsigned l = tx->loop_depth - 1; 907 908 if (!tx->loop_depth) 909 { 910 DBG("loop counter requested outside of loop\n"); 911 return ureg_dst_undef(); 912 } 913 914 if (ureg_dst_is_undef(tx->regs.rL[l])) { 915 /* loop or rep ctr creation */ 916 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg); 917 tx->loop_or_rep[l] = loop_or_rep; 918 } 919 /* loop - rep - endloop - endrep not allowed */ 920 assert(tx->loop_or_rep[l] == loop_or_rep); 921 922 return tx->regs.rL[l]; 923} 924 925static struct ureg_src 926tx_get_loopal(struct shader_translator *tx) 927{ 928 int loop_level = tx->loop_depth - 1; 929 930 while (loop_level >= 0) { 931 /* handle loop - rep - endrep - endloop case */ 932 if (tx->loop_or_rep[loop_level]) 933 /* the value is in the loop counter y component (nine implementation) */ 934 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y); 935 loop_level--; 936 } 937 938 DBG("aL counter requested outside of loop\n"); 939 return ureg_src_undef(); 940} 941 942static inline unsigned * 943tx_cond(struct shader_translator *tx) 944{ 945 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH); 946 tx->cond_depth++; 947 return &tx->cond_labels[tx->cond_depth - 1]; 948} 949 950static inline unsigned * 951tx_elsecond(struct shader_translator *tx) 952{ 953 assert(tx->cond_depth); 954 return &tx->cond_labels[tx->cond_depth - 1]; 955} 956 957static inline void 958tx_endcond(struct shader_translator *tx) 959{ 960 assert(tx->cond_depth); 961 tx->cond_depth--; 962 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth], 963 ureg_get_instruction_number(tx->ureg)); 964} 965 966static inline struct ureg_dst 967nine_ureg_dst_register(unsigned file, int index) 968{ 969 return ureg_dst(ureg_src_register(file, index)); 970} 971 972static inline struct ureg_src 973nine_get_position_input(struct shader_translator *tx) 974{ 975 struct ureg_program *ureg = tx->ureg; 976 977 if (tx->wpos_is_sysval) 978 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); 979 else 980 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 981 0, TGSI_INTERPOLATE_LINEAR); 982} 983 984static struct ureg_src 985tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param) 986{ 987 struct ureg_program *ureg = tx->ureg; 988 struct ureg_src src; 989 struct ureg_dst tmp; 990 991 assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) || 992 (D3DSPR_ADDR && tx->version.major == 3)); 993 994 switch (param->file) 995 { 996 case D3DSPR_TEMP: 997 tx_temp_alloc(tx, param->idx); 998 src = ureg_src(tx->regs.r[param->idx]); 999 break; 1000 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */ 1001 case D3DSPR_ADDR: 1002 if (IS_VS) { 1003 assert(param->idx == 0); 1004 /* the address register (vs only) must be 1005 * assigned before use */ 1006 assert(!ureg_dst_is_undef(tx->regs.a0)); 1007 /* Round to lowest for vs1.1 (contrary to the doc), else 1008 * round to nearest */ 1009 if (tx->version.major < 2 && tx->version.minor < 2) 1010 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0)); 1011 else 1012 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0)); 1013 src = ureg_src(tx->regs.address); 1014 } else { 1015 if (tx->version.major < 2 && tx->version.minor < 4) { 1016 /* no subroutines, so should be defined */ 1017 src = ureg_src(tx->regs.tS[param->idx]); 1018 } else { 1019 tx_texcoord_alloc(tx, param->idx); 1020 src = tx->regs.vT[param->idx]; 1021 } 1022 } 1023 break; 1024 case D3DSPR_INPUT: 1025 if (IS_VS) { 1026 src = ureg_src_register(TGSI_FILE_INPUT, param->idx); 1027 } else { 1028 if (tx->version.major < 3) { 1029 src = ureg_DECL_fs_input_cyl_centroid( 1030 ureg, TGSI_SEMANTIC_COLOR, param->idx, 1031 TGSI_INTERPOLATE_COLOR, 0, 1032 tx->info->force_color_in_centroid ? 1033 TGSI_INTERPOLATE_LOC_CENTROID : 0, 1034 0, 1); 1035 } else { 1036 if(param->rel) { 1037 /* Copy all inputs (non consecutive) 1038 * to temp array (consecutive). 1039 * This is not good for performance. 1040 * A better way would be to have inputs 1041 * consecutive (would need implement alternative 1042 * way to match vs outputs and ps inputs). 1043 * However even with the better way, the temp array 1044 * copy would need to be used if some inputs 1045 * are not GENERIC or if they have different 1046 * interpolation flag. */ 1047 if (ureg_src_is_undef(tx->regs.v_consecutive)) { 1048 int i; 1049 tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0)); 1050 for (i = 0; i < 10; i++) { 1051 if (!ureg_src_is_undef(tx->regs.v[i])) 1052 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]); 1053 else 1054 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); 1055 } 1056 } 1057 src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx); 1058 } else { 1059 assert(param->idx < ARRAY_SIZE(tx->regs.v)); 1060 src = tx->regs.v[param->idx]; 1061 } 1062 } 1063 } 1064 if (param->rel) 1065 src = ureg_src_indirect(src, tx_src_param(tx, param->rel)); 1066 break; 1067 case D3DSPR_PREDICATE: 1068 if (ureg_dst_is_undef(tx->regs.predicate)) { 1069 /* Forbidden to use the predicate register before being set */ 1070 tx->failure = TRUE; 1071 tx->regs.predicate = ureg_DECL_temporary(tx->ureg); 1072 } 1073 src = ureg_src(tx->regs.predicate); 1074 break; 1075 case D3DSPR_SAMPLER: 1076 assert(param->mod == NINED3DSPSM_NONE); 1077 assert(param->swizzle == NINED3DSP_NOSWIZZLE); 1078 src = ureg_DECL_sampler(ureg, param->idx); 1079 break; 1080 case D3DSPR_CONST: 1081 if (param->rel || !tx_lconstf(tx, &src, param->idx)) { 1082 src = nine_float_constant_src(tx, param->idx); 1083 if (param->rel) { 1084 tx->indirect_const_access = TRUE; 1085 src = ureg_src_indirect(src, tx_src_param(tx, param->rel)); 1086 } 1087 } 1088 if (!IS_VS && tx->version.major < 2) { 1089 /* ps 1.X clamps constants */ 1090 tmp = tx_scratch(tx); 1091 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f)); 1092 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f)); 1093 src = ureg_src(tmp); 1094 } 1095 break; 1096 case D3DSPR_CONST2: 1097 case D3DSPR_CONST3: 1098 case D3DSPR_CONST4: 1099 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n"); 1100 assert(!"CONST2/3/4"); 1101 src = ureg_imm1f(ureg, 0.0f); 1102 break; 1103 case D3DSPR_CONSTINT: 1104 /* relative adressing only possible for float constants in vs */ 1105 if (!tx_lconsti(tx, &src, param->idx)) 1106 src = nine_integer_constant_src(tx, param->idx); 1107 break; 1108 case D3DSPR_CONSTBOOL: 1109 if (!tx_lconstb(tx, &src, param->idx)) 1110 src = nine_boolean_constant_src(tx, param->idx); 1111 break; 1112 case D3DSPR_LOOP: 1113 if (ureg_dst_is_undef(tx->regs.address)) 1114 tx->regs.address = ureg_DECL_address(ureg); 1115 if (!tx->native_integers) 1116 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx)); 1117 else 1118 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx)); 1119 src = ureg_src(tx->regs.address); 1120 break; 1121 case D3DSPR_MISCTYPE: 1122 switch (param->idx) { 1123 case D3DSMO_POSITION: 1124 if (ureg_src_is_undef(tx->regs.vPos)) 1125 tx->regs.vPos = nine_get_position_input(tx); 1126 if (tx->shift_wpos) { 1127 /* TODO: do this only once */ 1128 struct ureg_dst wpos = tx_scratch(tx); 1129 ureg_ADD(ureg, wpos, tx->regs.vPos, 1130 ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f)); 1131 src = ureg_src(wpos); 1132 } else { 1133 src = tx->regs.vPos; 1134 } 1135 break; 1136 case D3DSMO_FACE: 1137 if (ureg_src_is_undef(tx->regs.vFace)) { 1138 if (tx->face_is_sysval_integer) { 1139 tmp = ureg_DECL_temporary(ureg); 1140 tx->regs.vFace = 1141 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0); 1142 1143 /* convert bool to float */ 1144 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X), 1145 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1)); 1146 tx->regs.vFace = ureg_src(tmp); 1147 } else { 1148 tx->regs.vFace = ureg_DECL_fs_input(ureg, 1149 TGSI_SEMANTIC_FACE, 0, 1150 TGSI_INTERPOLATE_CONSTANT); 1151 } 1152 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X); 1153 } 1154 src = tx->regs.vFace; 1155 break; 1156 default: 1157 assert(!"invalid src D3DSMO"); 1158 break; 1159 } 1160 break; 1161 case D3DSPR_TEMPFLOAT16: 1162 break; 1163 default: 1164 assert(!"invalid src D3DSPR"); 1165 } 1166 1167 switch (param->mod) { 1168 case NINED3DSPSM_DW: 1169 tmp = tx_scratch(tx); 1170 /* NOTE: app is not allowed to read w with this modifier */ 1171 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W)); 1172 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W))); 1173 src = ureg_src(tmp); 1174 break; 1175 case NINED3DSPSM_DZ: 1176 tmp = tx_scratch(tx); 1177 /* NOTE: app is not allowed to read z with this modifier */ 1178 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z)); 1179 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z))); 1180 src = ureg_src(tmp); 1181 break; 1182 default: 1183 break; 1184 } 1185 1186 if (param->swizzle != NINED3DSP_NOSWIZZLE) 1187 src = ureg_swizzle(src, 1188 (param->swizzle >> 0) & 0x3, 1189 (param->swizzle >> 2) & 0x3, 1190 (param->swizzle >> 4) & 0x3, 1191 (param->swizzle >> 6) & 0x3); 1192 1193 switch (param->mod) { 1194 case NINED3DSPSM_ABS: 1195 src = ureg_abs(src); 1196 break; 1197 case NINED3DSPSM_ABSNEG: 1198 src = ureg_negate(ureg_abs(src)); 1199 break; 1200 case NINED3DSPSM_NEG: 1201 src = ureg_negate(src); 1202 break; 1203 case NINED3DSPSM_BIAS: 1204 tmp = tx_scratch(tx); 1205 ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f)); 1206 src = ureg_src(tmp); 1207 break; 1208 case NINED3DSPSM_BIASNEG: 1209 tmp = tx_scratch(tx); 1210 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src)); 1211 src = ureg_src(tmp); 1212 break; 1213 case NINED3DSPSM_NOT: 1214 if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) { 1215 tmp = tx_scratch(tx); 1216 ureg_NOT(ureg, tmp, src); 1217 src = ureg_src(tmp); 1218 break; 1219 } else { /* predicate */ 1220 tmp = tx_scratch(tx); 1221 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src)); 1222 src = ureg_src(tmp); 1223 } 1224 /* fall through */ 1225 case NINED3DSPSM_COMP: 1226 tmp = tx_scratch(tx); 1227 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src)); 1228 src = ureg_src(tmp); 1229 break; 1230 case NINED3DSPSM_DZ: 1231 case NINED3DSPSM_DW: 1232 /* Already handled*/ 1233 break; 1234 case NINED3DSPSM_SIGN: 1235 tmp = tx_scratch(tx); 1236 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f)); 1237 src = ureg_src(tmp); 1238 break; 1239 case NINED3DSPSM_SIGNNEG: 1240 tmp = tx_scratch(tx); 1241 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f)); 1242 src = ureg_src(tmp); 1243 break; 1244 case NINED3DSPSM_X2: 1245 tmp = tx_scratch(tx); 1246 ureg_ADD(ureg, tmp, src, src); 1247 src = ureg_src(tmp); 1248 break; 1249 case NINED3DSPSM_X2NEG: 1250 tmp = tx_scratch(tx); 1251 ureg_ADD(ureg, tmp, src, src); 1252 src = ureg_negate(ureg_src(tmp)); 1253 break; 1254 default: 1255 assert(param->mod == NINED3DSPSM_NONE); 1256 break; 1257 } 1258 1259 return src; 1260} 1261 1262static struct ureg_dst 1263_tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) 1264{ 1265 struct ureg_dst dst; 1266 1267 switch (param->file) 1268 { 1269 case D3DSPR_TEMP: 1270 assert(!param->rel); 1271 tx_temp_alloc(tx, param->idx); 1272 dst = tx->regs.r[param->idx]; 1273 break; 1274 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */ 1275 case D3DSPR_ADDR: 1276 assert(!param->rel); 1277 if (tx->version.major < 2 && !IS_VS) { 1278 if (ureg_dst_is_undef(tx->regs.tS[param->idx])) 1279 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg); 1280 dst = tx->regs.tS[param->idx]; 1281 } else 1282 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */ 1283 tx_texcoord_alloc(tx, param->idx); 1284 dst = ureg_dst(tx->regs.vT[param->idx]); 1285 } else { 1286 tx_addr_alloc(tx, param->idx); 1287 dst = tx->regs.a0; 1288 } 1289 break; 1290 case D3DSPR_RASTOUT: 1291 assert(!param->rel); 1292 switch (param->idx) { 1293 case 0: 1294 if (ureg_dst_is_undef(tx->regs.oPos)) 1295 tx->regs.oPos = 1296 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0); 1297 dst = tx->regs.oPos; 1298 break; 1299 case 1: 1300 if (ureg_dst_is_undef(tx->regs.oFog)) 1301 tx->regs.oFog = 1302 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16)); 1303 dst = tx->regs.oFog; 1304 break; 1305 case 2: 1306 if (ureg_dst_is_undef(tx->regs.oPts)) 1307 tx->regs.oPts = ureg_DECL_temporary(tx->ureg); 1308 dst = tx->regs.oPts; 1309 break; 1310 default: 1311 assert(0); 1312 break; 1313 } 1314 break; 1315 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */ 1316 case D3DSPR_OUTPUT: 1317 if (tx->version.major < 3) { 1318 assert(!param->rel); 1319 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx); 1320 } else { 1321 assert(!param->rel); /* TODO */ 1322 assert(param->idx < ARRAY_SIZE(tx->regs.o)); 1323 dst = tx->regs.o[param->idx]; 1324 } 1325 break; 1326 case D3DSPR_ATTROUT: /* VS */ 1327 case D3DSPR_COLOROUT: /* PS */ 1328 assert(param->idx >= 0 && param->idx < 4); 1329 assert(!param->rel); 1330 tx->info->rt_mask |= 1 << param->idx; 1331 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) { 1332 /* ps < 3: oCol[0] will have fog blending afterward */ 1333 if (!IS_VS && tx->version.major < 3 && param->idx == 0) { 1334 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg); 1335 } else { 1336 tx->regs.oCol[param->idx] = 1337 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx); 1338 } 1339 } 1340 dst = tx->regs.oCol[param->idx]; 1341 if (IS_VS && tx->version.major < 3) 1342 dst = ureg_saturate(dst); 1343 break; 1344 case D3DSPR_DEPTHOUT: 1345 assert(!param->rel); 1346 if (ureg_dst_is_undef(tx->regs.oDepth)) 1347 tx->regs.oDepth = 1348 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0, 1349 TGSI_WRITEMASK_Z, 0, 1); 1350 dst = tx->regs.oDepth; /* XXX: must write .z component */ 1351 break; 1352 case D3DSPR_PREDICATE: 1353 if (ureg_dst_is_undef(tx->regs.predicate)) 1354 tx->regs.predicate = ureg_DECL_temporary(tx->ureg); 1355 dst = tx->regs.predicate; 1356 break; 1357 case D3DSPR_TEMPFLOAT16: 1358 DBG("unhandled D3DSPR: %u\n", param->file); 1359 break; 1360 default: 1361 assert(!"invalid dst D3DSPR"); 1362 break; 1363 } 1364 if (param->rel) 1365 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel)); 1366 1367 if (param->mask != NINED3DSP_WRITEMASK_ALL) 1368 dst = ureg_writemask(dst, param->mask); 1369 if (param->mod & NINED3DSPDM_SATURATE) 1370 dst = ureg_saturate(dst); 1371 1372 if (tx->predicated_activated) { 1373 tx->regs.predicate_dst = dst; 1374 dst = tx->regs.predicate_tmp; 1375 } 1376 1377 return dst; 1378} 1379 1380static struct ureg_dst 1381tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) 1382{ 1383 if (param->shift) { 1384 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask); 1385 return tx->regs.tdst; 1386 } 1387 return _tx_dst_param(tx, param); 1388} 1389 1390static void 1391tx_apply_dst0_modifiers(struct shader_translator *tx) 1392{ 1393 struct ureg_dst rdst; 1394 float f; 1395 1396 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL) 1397 return; 1398 rdst = _tx_dst_param(tx, &tx->insn.dst[0]); 1399 1400 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */ 1401 1402 if (tx->insn.dst[0].shift < 0) 1403 f = 1.0f / (1 << -tx->insn.dst[0].shift); 1404 else 1405 f = 1 << tx->insn.dst[0].shift; 1406 1407 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f)); 1408} 1409 1410static struct ureg_src 1411tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param) 1412{ 1413 struct ureg_src src; 1414 1415 assert(!param->shift); 1416 assert(!(param->mod & NINED3DSPDM_SATURATE)); 1417 1418 switch (param->file) { 1419 case D3DSPR_INPUT: 1420 if (IS_VS) { 1421 src = ureg_src_register(TGSI_FILE_INPUT, param->idx); 1422 } else { 1423 assert(!param->rel); 1424 assert(param->idx < ARRAY_SIZE(tx->regs.v)); 1425 src = tx->regs.v[param->idx]; 1426 } 1427 break; 1428 default: 1429 src = ureg_src(tx_dst_param(tx, param)); 1430 break; 1431 } 1432 if (param->rel) 1433 src = ureg_src_indirect(src, tx_src_param(tx, param->rel)); 1434 1435 if (!param->mask) 1436 WARN("mask is 0, using identity swizzle\n"); 1437 1438 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) { 1439 char s[4]; 1440 int n; 1441 int c; 1442 for (n = 0, c = 0; c < 4; ++c) 1443 if (param->mask & (1 << c)) 1444 s[n++] = c; 1445 assert(n); 1446 for (c = n; c < 4; ++c) 1447 s[c] = s[n - 1]; 1448 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]); 1449 } 1450 return src; 1451} 1452 1453static HRESULT 1454NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n) 1455{ 1456 struct ureg_program *ureg = tx->ureg; 1457 struct ureg_dst dst; 1458 struct ureg_src src[2]; 1459 struct sm1_src_param *src_mat = &tx->insn.src[1]; 1460 unsigned i; 1461 1462 dst = tx_dst_param(tx, &tx->insn.dst[0]); 1463 src[0] = tx_src_param(tx, &tx->insn.src[0]); 1464 1465 for (i = 0; i < n; i++) 1466 { 1467 const unsigned m = (1 << i); 1468 1469 src[1] = tx_src_param(tx, src_mat); 1470 src_mat->idx++; 1471 1472 if (!(dst.WriteMask & m)) 1473 continue; 1474 1475 /* XXX: src == dst case ? */ 1476 1477 switch (k) { 1478 case 3: 1479 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]); 1480 break; 1481 case 4: 1482 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]); 1483 break; 1484 default: 1485 DBG("invalid operation: M%ux%u\n", m, n); 1486 break; 1487 } 1488 } 1489 1490 return D3D_OK; 1491} 1492 1493#define VNOTSUPPORTED 0, 0 1494#define V(maj, min) (((maj) << 8) | (min)) 1495 1496static inline const char * 1497d3dsio_to_string( unsigned opcode ) 1498{ 1499 static const char *names[] = { 1500 "NOP", 1501 "MOV", 1502 "ADD", 1503 "SUB", 1504 "MAD", 1505 "MUL", 1506 "RCP", 1507 "RSQ", 1508 "DP3", 1509 "DP4", 1510 "MIN", 1511 "MAX", 1512 "SLT", 1513 "SGE", 1514 "EXP", 1515 "LOG", 1516 "LIT", 1517 "DST", 1518 "LRP", 1519 "FRC", 1520 "M4x4", 1521 "M4x3", 1522 "M3x4", 1523 "M3x3", 1524 "M3x2", 1525 "CALL", 1526 "CALLNZ", 1527 "LOOP", 1528 "RET", 1529 "ENDLOOP", 1530 "LABEL", 1531 "DCL", 1532 "POW", 1533 "CRS", 1534 "SGN", 1535 "ABS", 1536 "NRM", 1537 "SINCOS", 1538 "REP", 1539 "ENDREP", 1540 "IF", 1541 "IFC", 1542 "ELSE", 1543 "ENDIF", 1544 "BREAK", 1545 "BREAKC", 1546 "MOVA", 1547 "DEFB", 1548 "DEFI", 1549 NULL, 1550 NULL, 1551 NULL, 1552 NULL, 1553 NULL, 1554 NULL, 1555 NULL, 1556 NULL, 1557 NULL, 1558 NULL, 1559 NULL, 1560 NULL, 1561 NULL, 1562 NULL, 1563 NULL, 1564 "TEXCOORD", 1565 "TEXKILL", 1566 "TEX", 1567 "TEXBEM", 1568 "TEXBEML", 1569 "TEXREG2AR", 1570 "TEXREG2GB", 1571 "TEXM3x2PAD", 1572 "TEXM3x2TEX", 1573 "TEXM3x3PAD", 1574 "TEXM3x3TEX", 1575 NULL, 1576 "TEXM3x3SPEC", 1577 "TEXM3x3VSPEC", 1578 "EXPP", 1579 "LOGP", 1580 "CND", 1581 "DEF", 1582 "TEXREG2RGB", 1583 "TEXDP3TEX", 1584 "TEXM3x2DEPTH", 1585 "TEXDP3", 1586 "TEXM3x3", 1587 "TEXDEPTH", 1588 "CMP", 1589 "BEM", 1590 "DP2ADD", 1591 "DSX", 1592 "DSY", 1593 "TEXLDD", 1594 "SETP", 1595 "TEXLDL", 1596 "BREAKP" 1597 }; 1598 1599 if (opcode < ARRAY_SIZE(names)) return names[opcode]; 1600 1601 switch (opcode) { 1602 case D3DSIO_PHASE: return "PHASE"; 1603 case D3DSIO_COMMENT: return "COMMENT"; 1604 case D3DSIO_END: return "END"; 1605 default: 1606 return NULL; 1607 } 1608} 1609 1610#define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL } 1611#define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \ 1612 (inst).vert_version.max | \ 1613 (inst).frag_version.min | \ 1614 (inst).frag_version.max) 1615 1616#define SPECIAL(name) \ 1617 NineTranslateInstruction_##name 1618 1619#define DECL_SPECIAL(name) \ 1620 static HRESULT \ 1621 NineTranslateInstruction_##name( struct shader_translator *tx ) 1622 1623static HRESULT 1624NineTranslateInstruction_Generic(struct shader_translator *); 1625 1626DECL_SPECIAL(NOP) 1627{ 1628 /* Nothing to do. NOP was used to avoid hangs 1629 * with very old d3d drivers. */ 1630 return D3D_OK; 1631} 1632 1633DECL_SPECIAL(SUB) 1634{ 1635 struct ureg_program *ureg = tx->ureg; 1636 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 1637 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); 1638 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); 1639 1640 ureg_ADD(ureg, dst, src0, ureg_negate(src1)); 1641 return D3D_OK; 1642} 1643 1644DECL_SPECIAL(ABS) 1645{ 1646 struct ureg_program *ureg = tx->ureg; 1647 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 1648 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 1649 1650 ureg_MOV(ureg, dst, ureg_abs(src)); 1651 return D3D_OK; 1652} 1653 1654DECL_SPECIAL(XPD) 1655{ 1656 struct ureg_program *ureg = tx->ureg; 1657 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 1658 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); 1659 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); 1660 1661 ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), 1662 ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, 1663 TGSI_SWIZZLE_X, 0), 1664 ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 1665 TGSI_SWIZZLE_Y, 0)); 1666 ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), 1667 ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 1668 TGSI_SWIZZLE_Y, 0), 1669 ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y, 1670 TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)), 1671 ureg_src(dst)); 1672 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), 1673 ureg_imm1f(ureg, 1)); 1674 return D3D_OK; 1675} 1676 1677DECL_SPECIAL(M4x4) 1678{ 1679 return NineTranslateInstruction_Mkxn(tx, 4, 4); 1680} 1681 1682DECL_SPECIAL(M4x3) 1683{ 1684 return NineTranslateInstruction_Mkxn(tx, 4, 3); 1685} 1686 1687DECL_SPECIAL(M3x4) 1688{ 1689 return NineTranslateInstruction_Mkxn(tx, 3, 4); 1690} 1691 1692DECL_SPECIAL(M3x3) 1693{ 1694 return NineTranslateInstruction_Mkxn(tx, 3, 3); 1695} 1696 1697DECL_SPECIAL(M3x2) 1698{ 1699 return NineTranslateInstruction_Mkxn(tx, 3, 2); 1700} 1701 1702DECL_SPECIAL(CMP) 1703{ 1704 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]), 1705 tx_src_param(tx, &tx->insn.src[0]), 1706 tx_src_param(tx, &tx->insn.src[2]), 1707 tx_src_param(tx, &tx->insn.src[1])); 1708 return D3D_OK; 1709} 1710 1711DECL_SPECIAL(CND) 1712{ 1713 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 1714 struct ureg_dst cgt; 1715 struct ureg_src cnd; 1716 1717 /* the coissue flag was a tip for compilers to advise to 1718 * execute two operations at the same time, in cases 1719 * the two executions had same dst with different channels. 1720 * It has no effect on current hw. However it seems CND 1721 * is affected. The handling of this very specific case 1722 * handled below mimick wine behaviour */ 1723 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) { 1724 ureg_MOV(tx->ureg, 1725 dst, tx_src_param(tx, &tx->insn.src[1])); 1726 return D3D_OK; 1727 } 1728 1729 cnd = tx_src_param(tx, &tx->insn.src[0]); 1730 cgt = tx_scratch(tx); 1731 1732 if (tx->version.major == 1 && tx->version.minor < 4) 1733 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W); 1734 1735 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f)); 1736 1737 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)), 1738 tx_src_param(tx, &tx->insn.src[1]), 1739 tx_src_param(tx, &tx->insn.src[2])); 1740 return D3D_OK; 1741} 1742 1743DECL_SPECIAL(CALL) 1744{ 1745 assert(tx->insn.src[0].idx < tx->num_inst_labels); 1746 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]); 1747 return D3D_OK; 1748} 1749 1750DECL_SPECIAL(CALLNZ) 1751{ 1752 struct ureg_program *ureg = tx->ureg; 1753 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]); 1754 1755 if (!tx->native_integers) 1756 ureg_IF(ureg, src, tx_cond(tx)); 1757 else 1758 ureg_UIF(ureg, src, tx_cond(tx)); 1759 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]); 1760 tx_endcond(tx); 1761 ureg_ENDIF(ureg); 1762 return D3D_OK; 1763} 1764 1765DECL_SPECIAL(LOOP) 1766{ 1767 struct ureg_program *ureg = tx->ureg; 1768 unsigned *label; 1769 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]); 1770 struct ureg_dst ctr; 1771 struct ureg_dst tmp; 1772 struct ureg_src ctrx; 1773 1774 label = tx_bgnloop(tx); 1775 ctr = tx_get_loopctr(tx, TRUE); 1776 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X); 1777 1778 /* src: num_iterations - start_value of al - step for al - 0 */ 1779 ureg_MOV(ureg, ctr, src); 1780 ureg_BGNLOOP(tx->ureg, label); 1781 tmp = tx_scratch_scalar(tx); 1782 /* Initially ctr.x contains the number of iterations. 1783 * ctr.y will contain the updated value of al. 1784 * We decrease ctr.x at the end of every iteration, 1785 * and stop when it reaches 0. */ 1786 1787 if (!tx->native_integers) { 1788 /* case src and ctr contain floats */ 1789 /* to avoid precision issue, we stop when ctr <= 0.5 */ 1790 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx); 1791 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 1792 } else { 1793 /* case src and ctr contain integers */ 1794 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx); 1795 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 1796 } 1797 ureg_BRK(ureg); 1798 tx_endcond(tx); 1799 ureg_ENDIF(ureg); 1800 return D3D_OK; 1801} 1802 1803DECL_SPECIAL(RET) 1804{ 1805 ureg_RET(tx->ureg); 1806 return D3D_OK; 1807} 1808 1809DECL_SPECIAL(ENDLOOP) 1810{ 1811 struct ureg_program *ureg = tx->ureg; 1812 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE); 1813 struct ureg_dst dst_ctrx, dst_al; 1814 struct ureg_src src_ctr, al_counter; 1815 1816 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0); 1817 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1); 1818 src_ctr = ureg_src(ctr); 1819 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z); 1820 1821 /* ctr.x -= 1 1822 * ctr.y (aL) += step */ 1823 if (!tx->native_integers) { 1824 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f)); 1825 ureg_ADD(ureg, dst_al, src_ctr, al_counter); 1826 } else { 1827 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1)); 1828 ureg_UADD(ureg, dst_al, src_ctr, al_counter); 1829 } 1830 ureg_ENDLOOP(tx->ureg, tx_endloop(tx)); 1831 return D3D_OK; 1832} 1833 1834DECL_SPECIAL(LABEL) 1835{ 1836 unsigned k = tx->num_inst_labels; 1837 unsigned n = tx->insn.src[0].idx; 1838 assert(n < 2048); 1839 if (n >= k) 1840 tx->inst_labels = REALLOC(tx->inst_labels, 1841 k * sizeof(tx->inst_labels[0]), 1842 n * sizeof(tx->inst_labels[0])); 1843 1844 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg); 1845 return D3D_OK; 1846} 1847 1848DECL_SPECIAL(SINCOS) 1849{ 1850 struct ureg_program *ureg = tx->ureg; 1851 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 1852 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 1853 struct ureg_dst tmp = tx_scratch_scalar(tx); 1854 1855 assert(!(dst.WriteMask & 0xc)); 1856 1857 /* Copying to a temporary register avoids src/dst aliasing. 1858 * src is supposed to have replicated swizzle. */ 1859 ureg_MOV(ureg, tmp, src); 1860 1861 /* z undefined, w untouched */ 1862 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), 1863 tx_src_scalar(tmp)); 1864 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), 1865 tx_src_scalar(tmp)); 1866 return D3D_OK; 1867} 1868 1869DECL_SPECIAL(SGN) 1870{ 1871 ureg_SSG(tx->ureg, 1872 tx_dst_param(tx, &tx->insn.dst[0]), 1873 tx_src_param(tx, &tx->insn.src[0])); 1874 return D3D_OK; 1875} 1876 1877DECL_SPECIAL(REP) 1878{ 1879 struct ureg_program *ureg = tx->ureg; 1880 unsigned *label; 1881 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]); 1882 struct ureg_dst ctr; 1883 struct ureg_dst tmp; 1884 struct ureg_src ctrx; 1885 1886 label = tx_bgnloop(tx); 1887 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0); 1888 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X); 1889 1890 /* NOTE: rep must be constant, so we don't have to save the count */ 1891 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE); 1892 1893 /* rep: num_iterations - 0 - 0 - 0 */ 1894 ureg_MOV(ureg, ctr, rep); 1895 ureg_BGNLOOP(ureg, label); 1896 tmp = tx_scratch_scalar(tx); 1897 /* Initially ctr.x contains the number of iterations. 1898 * We decrease ctr.x at the end of every iteration, 1899 * and stop when it reaches 0. */ 1900 1901 if (!tx->native_integers) { 1902 /* case src and ctr contain floats */ 1903 /* to avoid precision issue, we stop when ctr <= 0.5 */ 1904 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx); 1905 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 1906 } else { 1907 /* case src and ctr contain integers */ 1908 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx); 1909 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 1910 } 1911 ureg_BRK(ureg); 1912 tx_endcond(tx); 1913 ureg_ENDIF(ureg); 1914 1915 return D3D_OK; 1916} 1917 1918DECL_SPECIAL(ENDREP) 1919{ 1920 struct ureg_program *ureg = tx->ureg; 1921 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE); 1922 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0); 1923 struct ureg_src src_ctr = ureg_src(ctr); 1924 1925 /* ctr.x -= 1 */ 1926 if (!tx->native_integers) 1927 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f)); 1928 else 1929 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1)); 1930 1931 ureg_ENDLOOP(tx->ureg, tx_endloop(tx)); 1932 return D3D_OK; 1933} 1934 1935DECL_SPECIAL(ENDIF) 1936{ 1937 tx_endcond(tx); 1938 ureg_ENDIF(tx->ureg); 1939 return D3D_OK; 1940} 1941 1942DECL_SPECIAL(IF) 1943{ 1944 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 1945 1946 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL) 1947 ureg_UIF(tx->ureg, src, tx_cond(tx)); 1948 else 1949 ureg_IF(tx->ureg, src, tx_cond(tx)); 1950 1951 return D3D_OK; 1952} 1953 1954static inline unsigned 1955sm1_insn_flags_to_tgsi_setop(BYTE flags) 1956{ 1957 switch (flags) { 1958 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT; 1959 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ; 1960 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE; 1961 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT; 1962 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE; 1963 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE; 1964 default: 1965 assert(!"invalid comparison flags"); 1966 return TGSI_OPCODE_SGT; 1967 } 1968} 1969 1970DECL_SPECIAL(IFC) 1971{ 1972 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags); 1973 struct ureg_src src[2]; 1974 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); 1975 src[0] = tx_src_param(tx, &tx->insn.src[0]); 1976 src[1] = tx_src_param(tx, &tx->insn.src[1]); 1977 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0); 1978 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx)); 1979 return D3D_OK; 1980} 1981 1982DECL_SPECIAL(ELSE) 1983{ 1984 ureg_ELSE(tx->ureg, tx_elsecond(tx)); 1985 return D3D_OK; 1986} 1987 1988DECL_SPECIAL(BREAKC) 1989{ 1990 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags); 1991 struct ureg_src src[2]; 1992 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); 1993 src[0] = tx_src_param(tx, &tx->insn.src[0]); 1994 src[1] = tx_src_param(tx, &tx->insn.src[1]); 1995 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0); 1996 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx)); 1997 ureg_BRK(tx->ureg); 1998 tx_endcond(tx); 1999 ureg_ENDIF(tx->ureg); 2000 return D3D_OK; 2001} 2002 2003static const char *sm1_declusage_names[] = 2004{ 2005 [D3DDECLUSAGE_POSITION] = "POSITION", 2006 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT", 2007 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES", 2008 [D3DDECLUSAGE_NORMAL] = "NORMAL", 2009 [D3DDECLUSAGE_PSIZE] = "PSIZE", 2010 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD", 2011 [D3DDECLUSAGE_TANGENT] = "TANGENT", 2012 [D3DDECLUSAGE_BINORMAL] = "BINORMAL", 2013 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR", 2014 [D3DDECLUSAGE_POSITIONT] = "POSITIONT", 2015 [D3DDECLUSAGE_COLOR] = "COLOR", 2016 [D3DDECLUSAGE_FOG] = "FOG", 2017 [D3DDECLUSAGE_DEPTH] = "DEPTH", 2018 [D3DDECLUSAGE_SAMPLE] = "SAMPLE" 2019}; 2020 2021static inline unsigned 2022sm1_to_nine_declusage(struct sm1_semantic *dcl) 2023{ 2024 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx); 2025} 2026 2027static void 2028sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem, 2029 boolean tc, 2030 struct sm1_semantic *dcl) 2031{ 2032 BYTE index = dcl->usage_idx; 2033 2034 /* For everything that is not matching to a TGSI_SEMANTIC_****, 2035 * we match to a TGSI_SEMANTIC_GENERIC with index. 2036 * 2037 * The index can be anything UINT16 and usage_idx is BYTE, 2038 * so we can fit everything. It doesn't matter if indices 2039 * are close together or low. 2040 * 2041 * 2042 * POSITION >= 1: 10 * index + 7 2043 * COLOR >= 2: 10 * (index-1) + 8 2044 * FOG: 16 2045 * TEXCOORD[0..15]: index 2046 * BLENDWEIGHT: 10 * index + 19 2047 * BLENDINDICES: 10 * index + 20 2048 * NORMAL: 10 * index + 21 2049 * TANGENT: 10 * index + 22 2050 * BINORMAL: 10 * index + 23 2051 * TESSFACTOR: 10 * index + 24 2052 */ 2053 2054 switch (dcl->usage) { 2055 case D3DDECLUSAGE_POSITION: 2056 case D3DDECLUSAGE_POSITIONT: 2057 case D3DDECLUSAGE_DEPTH: 2058 if (index == 0) { 2059 sem->Name = TGSI_SEMANTIC_POSITION; 2060 sem->Index = 0; 2061 } else { 2062 sem->Name = TGSI_SEMANTIC_GENERIC; 2063 sem->Index = 10 * index + 7; 2064 } 2065 break; 2066 case D3DDECLUSAGE_COLOR: 2067 if (index < 2) { 2068 sem->Name = TGSI_SEMANTIC_COLOR; 2069 sem->Index = index; 2070 } else { 2071 sem->Name = TGSI_SEMANTIC_GENERIC; 2072 sem->Index = 10 * (index-1) + 8; 2073 } 2074 break; 2075 case D3DDECLUSAGE_FOG: 2076 assert(index == 0); 2077 sem->Name = TGSI_SEMANTIC_GENERIC; 2078 sem->Index = 16; 2079 break; 2080 case D3DDECLUSAGE_PSIZE: 2081 assert(index == 0); 2082 sem->Name = TGSI_SEMANTIC_PSIZE; 2083 sem->Index = 0; 2084 break; 2085 case D3DDECLUSAGE_TEXCOORD: 2086 assert(index < 16); 2087 if (index < 8 && tc) 2088 sem->Name = TGSI_SEMANTIC_TEXCOORD; 2089 else 2090 sem->Name = TGSI_SEMANTIC_GENERIC; 2091 sem->Index = index; 2092 break; 2093 case D3DDECLUSAGE_BLENDWEIGHT: 2094 sem->Name = TGSI_SEMANTIC_GENERIC; 2095 sem->Index = 10 * index + 19; 2096 break; 2097 case D3DDECLUSAGE_BLENDINDICES: 2098 sem->Name = TGSI_SEMANTIC_GENERIC; 2099 sem->Index = 10 * index + 20; 2100 break; 2101 case D3DDECLUSAGE_NORMAL: 2102 sem->Name = TGSI_SEMANTIC_GENERIC; 2103 sem->Index = 10 * index + 21; 2104 break; 2105 case D3DDECLUSAGE_TANGENT: 2106 sem->Name = TGSI_SEMANTIC_GENERIC; 2107 sem->Index = 10 * index + 22; 2108 break; 2109 case D3DDECLUSAGE_BINORMAL: 2110 sem->Name = TGSI_SEMANTIC_GENERIC; 2111 sem->Index = 10 * index + 23; 2112 break; 2113 case D3DDECLUSAGE_TESSFACTOR: 2114 sem->Name = TGSI_SEMANTIC_GENERIC; 2115 sem->Index = 10 * index + 24; 2116 break; 2117 case D3DDECLUSAGE_SAMPLE: 2118 sem->Name = TGSI_SEMANTIC_COUNT; 2119 sem->Index = 0; 2120 break; 2121 default: 2122 unreachable("Invalid DECLUSAGE."); 2123 break; 2124 } 2125} 2126 2127#define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT) 2128#define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT) 2129#define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT) 2130#define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT) 2131static inline unsigned 2132d3dstt_to_tgsi_tex(BYTE sampler_type) 2133{ 2134 switch (sampler_type) { 2135 case NINED3DSTT_1D: return TGSI_TEXTURE_1D; 2136 case NINED3DSTT_2D: return TGSI_TEXTURE_2D; 2137 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D; 2138 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE; 2139 default: 2140 assert(0); 2141 return TGSI_TEXTURE_UNKNOWN; 2142 } 2143} 2144static inline unsigned 2145d3dstt_to_tgsi_tex_shadow(BYTE sampler_type) 2146{ 2147 switch (sampler_type) { 2148 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D; 2149 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D; 2150 case NINED3DSTT_VOLUME: 2151 case NINED3DSTT_CUBE: 2152 default: 2153 assert(0); 2154 return TGSI_TEXTURE_UNKNOWN; 2155 } 2156} 2157static inline unsigned 2158ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage) 2159{ 2160 boolean shadow = !!(info->sampler_mask_shadow & (1 << stage)); 2161 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) { 2162 case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D; 2163 case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D; 2164 case 3: return TGSI_TEXTURE_3D; 2165 default: 2166 return TGSI_TEXTURE_CUBE; 2167 } 2168} 2169 2170static const char * 2171sm1_sampler_type_name(BYTE sampler_type) 2172{ 2173 switch (sampler_type) { 2174 case NINED3DSTT_1D: return "1D"; 2175 case NINED3DSTT_2D: return "2D"; 2176 case NINED3DSTT_VOLUME: return "VOLUME"; 2177 case NINED3DSTT_CUBE: return "CUBE"; 2178 default: 2179 return "(D3DSTT_?)"; 2180 } 2181} 2182 2183static inline unsigned 2184nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem) 2185{ 2186 switch (sem->Name) { 2187 case TGSI_SEMANTIC_POSITION: 2188 case TGSI_SEMANTIC_NORMAL: 2189 return TGSI_INTERPOLATE_LINEAR; 2190 case TGSI_SEMANTIC_BCOLOR: 2191 case TGSI_SEMANTIC_COLOR: 2192 return TGSI_INTERPOLATE_COLOR; 2193 case TGSI_SEMANTIC_FOG: 2194 case TGSI_SEMANTIC_GENERIC: 2195 case TGSI_SEMANTIC_TEXCOORD: 2196 case TGSI_SEMANTIC_CLIPDIST: 2197 case TGSI_SEMANTIC_CLIPVERTEX: 2198 return TGSI_INTERPOLATE_PERSPECTIVE; 2199 case TGSI_SEMANTIC_EDGEFLAG: 2200 case TGSI_SEMANTIC_FACE: 2201 case TGSI_SEMANTIC_INSTANCEID: 2202 case TGSI_SEMANTIC_PCOORD: 2203 case TGSI_SEMANTIC_PRIMID: 2204 case TGSI_SEMANTIC_PSIZE: 2205 case TGSI_SEMANTIC_VERTEXID: 2206 return TGSI_INTERPOLATE_CONSTANT; 2207 default: 2208 assert(0); 2209 return TGSI_INTERPOLATE_CONSTANT; 2210 } 2211} 2212 2213DECL_SPECIAL(DCL) 2214{ 2215 struct ureg_program *ureg = tx->ureg; 2216 boolean is_input; 2217 boolean is_sampler; 2218 struct tgsi_declaration_semantic tgsi; 2219 struct sm1_semantic sem; 2220 sm1_read_semantic(tx, &sem); 2221 2222 is_input = sem.reg.file == D3DSPR_INPUT; 2223 is_sampler = 2224 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER; 2225 2226 DUMP("DCL "); 2227 sm1_dump_dst_param(&sem.reg); 2228 if (is_sampler) 2229 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type)); 2230 else 2231 if (tx->version.major >= 3) 2232 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx); 2233 else 2234 if (sem.usage | sem.usage_idx) 2235 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx); 2236 else 2237 DUMP("\n"); 2238 2239 if (is_sampler) { 2240 const unsigned m = 1 << sem.reg.idx; 2241 ureg_DECL_sampler(ureg, sem.reg.idx); 2242 tx->info->sampler_mask |= m; 2243 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ? 2244 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) : 2245 d3dstt_to_tgsi_tex(sem.sampler_type); 2246 return D3D_OK; 2247 } 2248 2249 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem); 2250 if (IS_VS) { 2251 if (is_input) { 2252 /* linkage outside of shader with vertex declaration */ 2253 ureg_DECL_vs_input(ureg, sem.reg.idx); 2254 assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map)); 2255 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem); 2256 tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1); 2257 /* NOTE: preserving order in case of indirect access */ 2258 } else 2259 if (tx->version.major >= 3) { 2260 /* SM2 output semantic determined by file */ 2261 assert(sem.reg.mask != 0); 2262 if (sem.usage == D3DDECLUSAGE_POSITIONT) 2263 tx->info->position_t = TRUE; 2264 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o)); 2265 assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing"); 2266 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked( 2267 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1); 2268 nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx); 2269 if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) { 2270 tx->regs.oPos_out = tx->regs.o[sem.reg.idx]; 2271 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg); 2272 tx->regs.oPos = tx->regs.o[sem.reg.idx]; 2273 } 2274 2275 if (tgsi.Name == TGSI_SEMANTIC_PSIZE) { 2276 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg); 2277 tx->regs.oPts = tx->regs.o[sem.reg.idx]; 2278 } 2279 } 2280 } else { 2281 if (is_input && tx->version.major >= 3) { 2282 unsigned interp_location = 0; 2283 /* SM3 only, SM2 input semantic determined by file */ 2284 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v)); 2285 assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing"); 2286 /* PositionT and tessfactor forbidden */ 2287 if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR) 2288 return D3DERR_INVALIDCALL; 2289 2290 if (tgsi.Name == TGSI_SEMANTIC_POSITION) { 2291 /* Position0 is forbidden (likely because vPos already does that) */ 2292 if (sem.usage == D3DDECLUSAGE_POSITION) 2293 return D3DERR_INVALIDCALL; 2294 /* Following code is for depth */ 2295 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx); 2296 return D3D_OK; 2297 } 2298 2299 if (sem.reg.mod & NINED3DSPDM_CENTROID || 2300 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid)) 2301 interp_location = TGSI_INTERPOLATE_LOC_CENTROID; 2302 2303 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid( 2304 ureg, tgsi.Name, tgsi.Index, 2305 nine_tgsi_to_interp_mode(&tgsi), 2306 0, /* cylwrap */ 2307 interp_location, 0, 1); 2308 } else 2309 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */ 2310 /* FragColor or FragDepth */ 2311 assert(sem.reg.mask != 0); 2312 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 2313 0, 1); 2314 } 2315 } 2316 return D3D_OK; 2317} 2318 2319DECL_SPECIAL(DEF) 2320{ 2321 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f); 2322 return D3D_OK; 2323} 2324 2325DECL_SPECIAL(DEFB) 2326{ 2327 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b); 2328 return D3D_OK; 2329} 2330 2331DECL_SPECIAL(DEFI) 2332{ 2333 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i); 2334 return D3D_OK; 2335} 2336 2337DECL_SPECIAL(POW) 2338{ 2339 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2340 struct ureg_src src[2] = { 2341 tx_src_param(tx, &tx->insn.src[0]), 2342 tx_src_param(tx, &tx->insn.src[1]) 2343 }; 2344 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]); 2345 return D3D_OK; 2346} 2347 2348/* Tests results on Win 10: 2349 * NV (NVIDIA GeForce GT 635M) 2350 * AMD (AMD Radeon HD 7730M) 2351 * INTEL (Intel(R) HD Graphics 4000) 2352 * PS2 and PS3: 2353 * RCP and RSQ can generate inf on NV and AMD. 2354 * RCP and RSQ are clamped on INTEL (+- FLT_MAX), 2355 * NV: log not clamped 2356 * AMD: log(0) is -FLT_MAX (but log(inf) is inf) 2357 * INTEL: log(0) is -FLT_MAX and log(inf) is 127 2358 * All devices have 0*anything = 0 2359 * 2360 * INTEL VS2 and VS3: same behaviour. 2361 * Some differences VS2 and VS3 for constants defined with inf/NaN. 2362 * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change, 2363 * VS2 seems to clamp to zero (may be test failure). 2364 * AMD VS2: unknown, VS3: very likely behaviour of PS3 2365 * NV VS2 and VS3: very likely behaviour of PS3 2366 * For both, Inf in VS becomes NaN is PS 2367 * "Very likely" because the test was less extensive. 2368 * 2369 * Thus all clamping can be removed for shaders 2 and 3, 2370 * as long as 0*anything = 0. 2371 * Else clamps to enforce 0*anything = 0 (anything being then 2372 * neither inf or NaN, the user being unlikely to pass them 2373 * as constant). 2374 * The status for VS1 and PS1 is unknown. 2375 */ 2376 2377DECL_SPECIAL(RCP) 2378{ 2379 struct ureg_program *ureg = tx->ureg; 2380 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2381 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2382 struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx); 2383 ureg_RCP(ureg, tmp, src); 2384 if (!tx->mul_zero_wins) { 2385 /* FLT_MAX has issues with Rayman */ 2386 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp)); 2387 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp)); 2388 } 2389 return D3D_OK; 2390} 2391 2392DECL_SPECIAL(RSQ) 2393{ 2394 struct ureg_program *ureg = tx->ureg; 2395 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2396 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2397 struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx); 2398 ureg_RSQ(ureg, tmp, ureg_abs(src)); 2399 if (!tx->mul_zero_wins) 2400 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp)); 2401 return D3D_OK; 2402} 2403 2404DECL_SPECIAL(LOG) 2405{ 2406 struct ureg_program *ureg = tx->ureg; 2407 struct ureg_dst tmp = tx_scratch_scalar(tx); 2408 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2409 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2410 ureg_LG2(ureg, tmp, ureg_abs(src)); 2411 if (tx->mul_zero_wins) { 2412 ureg_MOV(ureg, dst, tx_src_scalar(tmp)); 2413 } else { 2414 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp)); 2415 } 2416 return D3D_OK; 2417} 2418 2419DECL_SPECIAL(LIT) 2420{ 2421 struct ureg_program *ureg = tx->ureg; 2422 struct ureg_dst tmp = tx_scratch(tx); 2423 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2424 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2425 ureg_LIT(ureg, tmp, src); 2426 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9 2427 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign 2428 * it 0^0 if src.w=0, which value is driver dependent. */ 2429 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), 2430 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)), 2431 ureg_src(tmp), ureg_imm1f(ureg, 0.0f)); 2432 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp)); 2433 return D3D_OK; 2434} 2435 2436DECL_SPECIAL(NRM) 2437{ 2438 struct ureg_program *ureg = tx->ureg; 2439 struct ureg_dst tmp = tx_scratch_scalar(tx); 2440 struct ureg_src nrm = tx_src_scalar(tmp); 2441 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2442 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2443 ureg_DP3(ureg, tmp, src, src); 2444 ureg_RSQ(ureg, tmp, nrm); 2445 if (!tx->mul_zero_wins) 2446 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm); 2447 ureg_MUL(ureg, dst, src, nrm); 2448 return D3D_OK; 2449} 2450 2451DECL_SPECIAL(DP2ADD) 2452{ 2453 struct ureg_dst tmp = tx_scratch_scalar(tx); 2454 struct ureg_src dp2 = tx_src_scalar(tmp); 2455 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2456 struct ureg_src src[3]; 2457 int i; 2458 for (i = 0; i < 3; ++i) 2459 src[i] = tx_src_param(tx, &tx->insn.src[i]); 2460 assert_replicate_swizzle(&src[2]); 2461 2462 ureg_DP2(tx->ureg, tmp, src[0], src[1]); 2463 ureg_ADD(tx->ureg, dst, src[2], dp2); 2464 2465 return D3D_OK; 2466} 2467 2468DECL_SPECIAL(TEXCOORD) 2469{ 2470 struct ureg_program *ureg = tx->ureg; 2471 const unsigned s = tx->insn.dst[0].idx; 2472 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2473 2474 tx_texcoord_alloc(tx, s); 2475 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]); 2476 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f)); 2477 2478 return D3D_OK; 2479} 2480 2481DECL_SPECIAL(TEXCOORD_ps14) 2482{ 2483 struct ureg_program *ureg = tx->ureg; 2484 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2485 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2486 2487 assert(tx->insn.src[0].file == D3DSPR_TEXTURE); 2488 2489 ureg_MOV(ureg, dst, src); 2490 2491 return D3D_OK; 2492} 2493 2494DECL_SPECIAL(TEXKILL) 2495{ 2496 struct ureg_src reg; 2497 2498 if (tx->version.major > 1 || tx->version.minor > 3) { 2499 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]); 2500 } else { 2501 tx_texcoord_alloc(tx, tx->insn.dst[0].idx); 2502 reg = tx->regs.vT[tx->insn.dst[0].idx]; 2503 } 2504 if (tx->version.major < 2) 2505 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z)); 2506 ureg_KILL_IF(tx->ureg, reg); 2507 2508 return D3D_OK; 2509} 2510 2511DECL_SPECIAL(TEXBEM) 2512{ 2513 struct ureg_program *ureg = tx->ureg; 2514 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2515 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2516 struct ureg_dst tmp, tmp2, texcoord; 2517 struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2; 2518 struct ureg_src bumpenvlscale, bumpenvloffset; 2519 const int m = tx->insn.dst[0].idx; 2520 2521 assert(tx->version.major == 1); 2522 2523 sample = ureg_DECL_sampler(ureg, m); 2524 tx->info->sampler_mask |= 1 << m; 2525 2526 tx_texcoord_alloc(tx, m); 2527 2528 tmp = tx_scratch(tx); 2529 tmp2 = tx_scratch(tx); 2530 texcoord = tx_scratch(tx); 2531 /* 2532 * Bump-env-matrix: 2533 * 00 is X 2534 * 01 is Y 2535 * 10 is Z 2536 * 11 is W 2537 */ 2538 c8m = nine_float_constant_src(tx, 8+m); 2539 c16m2 = nine_float_constant_src(tx, 8+8+m/2); 2540 2541 m00 = NINE_APPLY_SWIZZLE(c8m, X); 2542 m01 = NINE_APPLY_SWIZZLE(c8m, Y); 2543 m10 = NINE_APPLY_SWIZZLE(c8m, Z); 2544 m11 = NINE_APPLY_SWIZZLE(c8m, W); 2545 2546 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */ 2547 if (m % 2 == 0) { 2548 bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X); 2549 bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y); 2550 } else { 2551 bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z); 2552 bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W); 2553 } 2554 2555 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m); 2556 2557 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */ 2558 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, 2559 NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord)); 2560 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */ 2561 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, 2562 NINE_APPLY_SWIZZLE(src, Y), 2563 NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); 2564 2565 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */ 2566 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, 2567 NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord)); 2568 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/ 2569 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, 2570 NINE_APPLY_SWIZZLE(src, Y), 2571 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); 2572 2573 /* Now the texture coordinates are in tmp.xy */ 2574 2575 if (tx->insn.opcode == D3DSIO_TEXBEM) { 2576 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); 2577 } else if (tx->insn.opcode == D3DSIO_TEXBEML) { 2578 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */ 2579 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); 2580 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z), 2581 bumpenvlscale, bumpenvloffset); 2582 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2)); 2583 } 2584 2585 tx->info->bumpenvmat_needed = 1; 2586 2587 return D3D_OK; 2588} 2589 2590DECL_SPECIAL(TEXREG2AR) 2591{ 2592 struct ureg_program *ureg = tx->ureg; 2593 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2594 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2595 struct ureg_src sample; 2596 const int m = tx->insn.dst[0].idx; 2597 const int n = tx->insn.src[0].idx; 2598 assert(m >= 0 && m > n); 2599 2600 sample = ureg_DECL_sampler(ureg, m); 2601 tx->info->sampler_mask |= 1 << m; 2602 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample); 2603 2604 return D3D_OK; 2605} 2606 2607DECL_SPECIAL(TEXREG2GB) 2608{ 2609 struct ureg_program *ureg = tx->ureg; 2610 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2611 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2612 struct ureg_src sample; 2613 const int m = tx->insn.dst[0].idx; 2614 const int n = tx->insn.src[0].idx; 2615 assert(m >= 0 && m > n); 2616 2617 sample = ureg_DECL_sampler(ureg, m); 2618 tx->info->sampler_mask |= 1 << m; 2619 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample); 2620 2621 return D3D_OK; 2622} 2623 2624DECL_SPECIAL(TEXM3x2PAD) 2625{ 2626 return D3D_OK; /* this is just padding */ 2627} 2628 2629DECL_SPECIAL(TEXM3x2TEX) 2630{ 2631 struct ureg_program *ureg = tx->ureg; 2632 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2633 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2634 struct ureg_src sample; 2635 const int m = tx->insn.dst[0].idx - 1; 2636 const int n = tx->insn.src[0].idx; 2637 assert(m >= 0 && m > n); 2638 2639 tx_texcoord_alloc(tx, m); 2640 tx_texcoord_alloc(tx, m+1); 2641 2642 /* performs the matrix multiplication */ 2643 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 2644 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src); 2645 2646 sample = ureg_DECL_sampler(ureg, m + 1); 2647 tx->info->sampler_mask |= 1 << (m + 1); 2648 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample); 2649 2650 return D3D_OK; 2651} 2652 2653DECL_SPECIAL(TEXM3x3PAD) 2654{ 2655 return D3D_OK; /* this is just padding */ 2656} 2657 2658DECL_SPECIAL(TEXM3x3SPEC) 2659{ 2660 struct ureg_program *ureg = tx->ureg; 2661 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2662 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2663 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]); 2664 struct ureg_src sample; 2665 struct ureg_dst tmp; 2666 const int m = tx->insn.dst[0].idx - 2; 2667 const int n = tx->insn.src[0].idx; 2668 assert(m >= 0 && m > n); 2669 2670 tx_texcoord_alloc(tx, m); 2671 tx_texcoord_alloc(tx, m+1); 2672 tx_texcoord_alloc(tx, m+2); 2673 2674 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 2675 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src); 2676 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src); 2677 2678 sample = ureg_DECL_sampler(ureg, m + 2); 2679 tx->info->sampler_mask |= 1 << (m + 2); 2680 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ); 2681 2682 /* At this step, dst = N = (u', w', z'). 2683 * We want dst to be the texture sampled at (u'', w'', z''), with 2684 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */ 2685 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst)); 2686 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 2687 /* at this step tmp.x = 1/N.N */ 2688 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E); 2689 /* at this step tmp.y = N.E */ 2690 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 2691 /* at this step tmp.x = N.E/N.N */ 2692 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f)); 2693 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst)); 2694 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */ 2695 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E)); 2696 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample); 2697 2698 return D3D_OK; 2699} 2700 2701DECL_SPECIAL(TEXREG2RGB) 2702{ 2703 struct ureg_program *ureg = tx->ureg; 2704 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2705 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2706 struct ureg_src sample; 2707 const int m = tx->insn.dst[0].idx; 2708 const int n = tx->insn.src[0].idx; 2709 assert(m >= 0 && m > n); 2710 2711 sample = ureg_DECL_sampler(ureg, m); 2712 tx->info->sampler_mask |= 1 << m; 2713 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample); 2714 2715 return D3D_OK; 2716} 2717 2718DECL_SPECIAL(TEXDP3TEX) 2719{ 2720 struct ureg_program *ureg = tx->ureg; 2721 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2722 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2723 struct ureg_dst tmp; 2724 struct ureg_src sample; 2725 const int m = tx->insn.dst[0].idx; 2726 const int n = tx->insn.src[0].idx; 2727 assert(m >= 0 && m > n); 2728 2729 tx_texcoord_alloc(tx, m); 2730 2731 tmp = tx_scratch(tx); 2732 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 2733 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f)); 2734 2735 sample = ureg_DECL_sampler(ureg, m); 2736 tx->info->sampler_mask |= 1 << m; 2737 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); 2738 2739 return D3D_OK; 2740} 2741 2742DECL_SPECIAL(TEXM3x2DEPTH) 2743{ 2744 struct ureg_program *ureg = tx->ureg; 2745 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2746 struct ureg_dst tmp; 2747 const int m = tx->insn.dst[0].idx - 1; 2748 const int n = tx->insn.src[0].idx; 2749 assert(m >= 0 && m > n); 2750 2751 tx_texcoord_alloc(tx, m); 2752 tx_texcoord_alloc(tx, m+1); 2753 2754 tmp = tx_scratch(tx); 2755 2756 /* performs the matrix multiplication */ 2757 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 2758 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src); 2759 2760 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 2761 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */ 2762 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z)); 2763 /* res = 'w' == 0 ? 1.0 : z/w */ 2764 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))), 2765 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f)); 2766 /* replace the depth for depth testing with the result */ 2767 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, 2768 TGSI_WRITEMASK_Z, 0, 1); 2769 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 2770 /* note that we write nothing to the destination, since it's disallowed to use it afterward */ 2771 return D3D_OK; 2772} 2773 2774DECL_SPECIAL(TEXDP3) 2775{ 2776 struct ureg_program *ureg = tx->ureg; 2777 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2778 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2779 const int m = tx->insn.dst[0].idx; 2780 const int n = tx->insn.src[0].idx; 2781 assert(m >= 0 && m > n); 2782 2783 tx_texcoord_alloc(tx, m); 2784 2785 ureg_DP3(ureg, dst, tx->regs.vT[m], src); 2786 2787 return D3D_OK; 2788} 2789 2790DECL_SPECIAL(TEXM3x3) 2791{ 2792 struct ureg_program *ureg = tx->ureg; 2793 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2794 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2795 struct ureg_src sample; 2796 struct ureg_dst E, tmp; 2797 const int m = tx->insn.dst[0].idx - 2; 2798 const int n = tx->insn.src[0].idx; 2799 assert(m >= 0 && m > n); 2800 2801 tx_texcoord_alloc(tx, m); 2802 tx_texcoord_alloc(tx, m+1); 2803 tx_texcoord_alloc(tx, m+2); 2804 2805 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 2806 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src); 2807 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src); 2808 2809 switch (tx->insn.opcode) { 2810 case D3DSIO_TEXM3x3: 2811 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 2812 break; 2813 case D3DSIO_TEXM3x3TEX: 2814 sample = ureg_DECL_sampler(ureg, m + 2); 2815 tx->info->sampler_mask |= 1 << (m + 2); 2816 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample); 2817 break; 2818 case D3DSIO_TEXM3x3VSPEC: 2819 sample = ureg_DECL_sampler(ureg, m + 2); 2820 tx->info->sampler_mask |= 1 << (m + 2); 2821 E = tx_scratch(tx); 2822 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ); 2823 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W)); 2824 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W)); 2825 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W)); 2826 /* At this step, dst = N = (u', w', z'). 2827 * We want dst to be the texture sampled at (u'', w'', z''), with 2828 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */ 2829 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst)); 2830 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 2831 /* at this step tmp.x = 1/N.N */ 2832 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E)); 2833 /* at this step tmp.y = N.E */ 2834 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 2835 /* at this step tmp.x = N.E/N.N */ 2836 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f)); 2837 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst)); 2838 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */ 2839 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E))); 2840 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample); 2841 break; 2842 default: 2843 return D3DERR_INVALIDCALL; 2844 } 2845 return D3D_OK; 2846} 2847 2848DECL_SPECIAL(TEXDEPTH) 2849{ 2850 struct ureg_program *ureg = tx->ureg; 2851 struct ureg_dst r5; 2852 struct ureg_src r5r, r5g; 2853 2854 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */ 2855 2856 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g. 2857 * r5 won't be used afterward, thus we can use r5.ba */ 2858 r5 = tx->regs.r[5]; 2859 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X); 2860 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y); 2861 2862 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g); 2863 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z)); 2864 /* r5.r = r/g */ 2865 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)), 2866 r5r, ureg_imm1f(ureg, 1.0f)); 2867 /* replace the depth for depth testing with the result */ 2868 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, 2869 TGSI_WRITEMASK_Z, 0, 1); 2870 ureg_MOV(ureg, tx->regs.oDepth, r5r); 2871 2872 return D3D_OK; 2873} 2874 2875DECL_SPECIAL(BEM) 2876{ 2877 struct ureg_program *ureg = tx->ureg; 2878 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2879 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); 2880 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); 2881 struct ureg_src m00, m01, m10, m11, c8m; 2882 const int m = tx->insn.dst[0].idx; 2883 struct ureg_dst tmp; 2884 /* 2885 * Bump-env-matrix: 2886 * 00 is X 2887 * 01 is Y 2888 * 10 is Z 2889 * 11 is W 2890 */ 2891 c8m = nine_float_constant_src(tx, 8+m); 2892 m00 = NINE_APPLY_SWIZZLE(c8m, X); 2893 m01 = NINE_APPLY_SWIZZLE(c8m, Y); 2894 m10 = NINE_APPLY_SWIZZLE(c8m, Z); 2895 m11 = NINE_APPLY_SWIZZLE(c8m, W); 2896 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */ 2897 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, 2898 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X)); 2899 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */ 2900 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, 2901 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); 2902 2903 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */ 2904 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, 2905 NINE_APPLY_SWIZZLE(src1, X), src0); 2906 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */ 2907 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, 2908 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); 2909 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp)); 2910 2911 tx->info->bumpenvmat_needed = 1; 2912 2913 return D3D_OK; 2914} 2915 2916DECL_SPECIAL(TEXLD) 2917{ 2918 struct ureg_program *ureg = tx->ureg; 2919 unsigned target; 2920 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2921 struct ureg_src src[2] = { 2922 tx_src_param(tx, &tx->insn.src[0]), 2923 tx_src_param(tx, &tx->insn.src[1]) 2924 }; 2925 assert(tx->insn.src[1].idx >= 0 && 2926 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); 2927 target = tx->sampler_targets[tx->insn.src[1].idx]; 2928 2929 switch (tx->insn.flags) { 2930 case 0: 2931 ureg_TEX(ureg, dst, target, src[0], src[1]); 2932 break; 2933 case NINED3DSI_TEXLD_PROJECT: 2934 ureg_TXP(ureg, dst, target, src[0], src[1]); 2935 break; 2936 case NINED3DSI_TEXLD_BIAS: 2937 ureg_TXB(ureg, dst, target, src[0], src[1]); 2938 break; 2939 default: 2940 assert(0); 2941 return D3DERR_INVALIDCALL; 2942 } 2943 return D3D_OK; 2944} 2945 2946DECL_SPECIAL(TEXLD_14) 2947{ 2948 struct ureg_program *ureg = tx->ureg; 2949 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2950 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2951 const unsigned s = tx->insn.dst[0].idx; 2952 const unsigned t = ps1x_sampler_type(tx->info, s); 2953 2954 tx->info->sampler_mask |= 1 << s; 2955 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s)); 2956 2957 return D3D_OK; 2958} 2959 2960DECL_SPECIAL(TEX) 2961{ 2962 struct ureg_program *ureg = tx->ureg; 2963 const unsigned s = tx->insn.dst[0].idx; 2964 const unsigned t = ps1x_sampler_type(tx->info, s); 2965 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2966 struct ureg_src src[2]; 2967 2968 tx_texcoord_alloc(tx, s); 2969 2970 src[0] = tx->regs.vT[s]; 2971 src[1] = ureg_DECL_sampler(ureg, s); 2972 tx->info->sampler_mask |= 1 << s; 2973 2974 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s); 2975 2976 return D3D_OK; 2977} 2978 2979DECL_SPECIAL(TEXLDD) 2980{ 2981 unsigned target; 2982 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2983 struct ureg_src src[4] = { 2984 tx_src_param(tx, &tx->insn.src[0]), 2985 tx_src_param(tx, &tx->insn.src[1]), 2986 tx_src_param(tx, &tx->insn.src[2]), 2987 tx_src_param(tx, &tx->insn.src[3]) 2988 }; 2989 assert(tx->insn.src[1].idx >= 0 && 2990 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); 2991 target = tx->sampler_targets[tx->insn.src[1].idx]; 2992 2993 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]); 2994 return D3D_OK; 2995} 2996 2997DECL_SPECIAL(TEXLDL) 2998{ 2999 unsigned target; 3000 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 3001 struct ureg_src src[2] = { 3002 tx_src_param(tx, &tx->insn.src[0]), 3003 tx_src_param(tx, &tx->insn.src[1]) 3004 }; 3005 assert(tx->insn.src[1].idx >= 0 && 3006 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); 3007 target = tx->sampler_targets[tx->insn.src[1].idx]; 3008 3009 ureg_TXL(tx->ureg, dst, target, src[0], src[1]); 3010 return D3D_OK; 3011} 3012 3013DECL_SPECIAL(SETP) 3014{ 3015 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags); 3016 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 3017 struct ureg_src src[2] = { 3018 tx_src_param(tx, &tx->insn.src[0]), 3019 tx_src_param(tx, &tx->insn.src[1]) 3020 }; 3021 ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0); 3022 return D3D_OK; 3023} 3024 3025DECL_SPECIAL(BREAKP) 3026{ 3027 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 3028 ureg_IF(tx->ureg, src, tx_cond(tx)); 3029 ureg_BRK(tx->ureg); 3030 tx_endcond(tx); 3031 ureg_ENDIF(tx->ureg); 3032 return D3D_OK; 3033} 3034 3035DECL_SPECIAL(PHASE) 3036{ 3037 return D3D_OK; /* we don't care about phase */ 3038} 3039 3040DECL_SPECIAL(COMMENT) 3041{ 3042 return D3D_OK; /* nothing to do */ 3043} 3044 3045 3046#define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \ 3047 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h } 3048 3049static const struct sm1_op_info inst_table[] = 3050{ 3051 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */ 3052 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), 3053 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */ 3054 _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */ 3055 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */ 3056 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */ 3057 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */ 3058 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */ 3059 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */ 3060 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */ 3061 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */ 3062 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */ 3063 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */ 3064 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */ 3065 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */ 3066 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */ 3067 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */ 3068 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */ 3069 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */ 3070 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */ 3071 3072 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)), 3073 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)), 3074 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)), 3075 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)), 3076 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)), 3077 3078 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)), 3079 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)), 3080 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)), 3081 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)), 3082 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)), 3083 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)), 3084 3085 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)), 3086 3087 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)), 3088 _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */ 3089 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */ 3090 _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)), 3091 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */ 3092 3093 _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)), 3094 _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)), 3095 3096 /* More flow control */ 3097 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)), 3098 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)), 3099 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)), 3100 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)), 3101 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)), 3102 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)), 3103 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL), 3104 _OPI(BREAKC, NOP, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)), 3105 /* we don't write to the address register, but a normal register (copied 3106 * when needed to the address register), thus we don't use ARR */ 3107 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), 3108 3109 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)), 3110 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)), 3111 3112 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)), 3113 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)), 3114 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)), 3115 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)), 3116 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)), 3117 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)), 3118 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), 3119 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), 3120 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)), 3121 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)), 3122 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)), 3123 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)), 3124 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)), 3125 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)), 3126 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)), 3127 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)), 3128 3129 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL), 3130 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), 3131 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)), 3132 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)), 3133 3134 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)), 3135 3136 /* More tex stuff */ 3137 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)), 3138 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)), 3139 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)), 3140 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)), 3141 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)), 3142 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)), 3143 3144 /* Misc */ 3145 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */ 3146 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)), 3147 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)), 3148 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL), 3149 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL), 3150 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)), 3151 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)), 3152 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)), 3153 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP)) 3154}; 3155 3156static const struct sm1_op_info inst_phase = 3157 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE)); 3158 3159static const struct sm1_op_info inst_comment = 3160 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT)); 3161 3162static void 3163create_op_info_map(struct shader_translator *tx) 3164{ 3165 const unsigned version = (tx->version.major << 8) | tx->version.minor; 3166 unsigned i; 3167 3168 for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i) 3169 tx->op_info_map[i] = -1; 3170 3171 if (tx->processor == PIPE_SHADER_VERTEX) { 3172 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) { 3173 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map)); 3174 if (inst_table[i].vert_version.min <= version && 3175 inst_table[i].vert_version.max >= version) 3176 tx->op_info_map[inst_table[i].sio] = i; 3177 } 3178 } else { 3179 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) { 3180 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map)); 3181 if (inst_table[i].frag_version.min <= version && 3182 inst_table[i].frag_version.max >= version) 3183 tx->op_info_map[inst_table[i].sio] = i; 3184 } 3185 } 3186} 3187 3188static inline HRESULT 3189NineTranslateInstruction_Generic(struct shader_translator *tx) 3190{ 3191 struct ureg_dst dst[1]; 3192 struct ureg_src src[4]; 3193 unsigned i; 3194 3195 for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i) 3196 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]); 3197 for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i) 3198 src[i] = tx_src_param(tx, &tx->insn.src[i]); 3199 3200 ureg_insn(tx->ureg, tx->insn.info->opcode, 3201 dst, tx->insn.ndst, 3202 src, tx->insn.nsrc, 0); 3203 return D3D_OK; 3204} 3205 3206static inline DWORD 3207TOKEN_PEEK(struct shader_translator *tx) 3208{ 3209 return *(tx->parse); 3210} 3211 3212static inline DWORD 3213TOKEN_NEXT(struct shader_translator *tx) 3214{ 3215 return *(tx->parse)++; 3216} 3217 3218static inline void 3219TOKEN_JUMP(struct shader_translator *tx) 3220{ 3221 if (tx->parse_next && tx->parse != tx->parse_next) { 3222 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next); 3223 tx->parse = tx->parse_next; 3224 } 3225} 3226 3227static inline boolean 3228sm1_parse_eof(struct shader_translator *tx) 3229{ 3230 return TOKEN_PEEK(tx) == NINED3DSP_END; 3231} 3232 3233static void 3234sm1_read_version(struct shader_translator *tx) 3235{ 3236 const DWORD tok = TOKEN_NEXT(tx); 3237 3238 tx->version.major = D3DSHADER_VERSION_MAJOR(tok); 3239 tx->version.minor = D3DSHADER_VERSION_MINOR(tok); 3240 3241 switch (tok >> 16) { 3242 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break; 3243 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break; 3244 default: 3245 DBG("Invalid shader type: %x\n", tok); 3246 tx->processor = ~0; 3247 break; 3248 } 3249} 3250 3251/* This is just to check if we parsed the instruction properly. */ 3252static void 3253sm1_parse_get_skip(struct shader_translator *tx) 3254{ 3255 const DWORD tok = TOKEN_PEEK(tx); 3256 3257 if (tx->version.major >= 2) { 3258 tx->parse_next = tx->parse + 1 /* this */ + 3259 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT); 3260 } else { 3261 tx->parse_next = NULL; /* TODO: determine from param count */ 3262 } 3263} 3264 3265static void 3266sm1_print_comment(const char *comment, UINT size) 3267{ 3268 if (!size) 3269 return; 3270 /* TODO */ 3271} 3272 3273static void 3274sm1_parse_comments(struct shader_translator *tx, BOOL print) 3275{ 3276 DWORD tok = TOKEN_PEEK(tx); 3277 3278 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT) 3279 { 3280 const char *comment = ""; 3281 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT; 3282 tx->parse += size + 1; 3283 3284 if (print) 3285 sm1_print_comment(comment, size); 3286 3287 tok = TOKEN_PEEK(tx); 3288 } 3289} 3290 3291static void 3292sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel) 3293{ 3294 *reg = TOKEN_NEXT(tx); 3295 3296 if (*reg & D3DSHADER_ADDRMODE_RELATIVE) 3297 { 3298 if (tx->version.major < 2) 3299 *rel = (1 << 31) | 3300 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) | 3301 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) | 3302 D3DSP_NOSWIZZLE; 3303 else 3304 *rel = TOKEN_NEXT(tx); 3305 } 3306} 3307 3308static void 3309sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok) 3310{ 3311 int8_t shift; 3312 dst->file = 3313 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT | 3314 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2; 3315 dst->type = TGSI_RETURN_TYPE_FLOAT; 3316 dst->idx = tok & D3DSP_REGNUM_MASK; 3317 dst->rel = NULL; 3318 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT; 3319 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT; 3320 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT; 3321 dst->shift = (shift & 0x7) - (shift & 0x8); 3322} 3323 3324static void 3325sm1_parse_src_param(struct sm1_src_param *src, DWORD tok) 3326{ 3327 src->file = 3328 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | 3329 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2); 3330 src->type = TGSI_RETURN_TYPE_FLOAT; 3331 src->idx = tok & D3DSP_REGNUM_MASK; 3332 src->rel = NULL; 3333 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT; 3334 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT; 3335 3336 switch (src->file) { 3337 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break; 3338 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break; 3339 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break; 3340 default: 3341 break; 3342 } 3343} 3344 3345static void 3346sm1_parse_immediate(struct shader_translator *tx, 3347 struct sm1_src_param *imm) 3348{ 3349 imm->file = NINED3DSPR_IMMEDIATE; 3350 imm->idx = INT_MIN; 3351 imm->rel = NULL; 3352 imm->swizzle = NINED3DSP_NOSWIZZLE; 3353 imm->mod = 0; 3354 switch (tx->insn.opcode) { 3355 case D3DSIO_DEF: 3356 imm->type = NINED3DSPTYPE_FLOAT4; 3357 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD)); 3358 tx->parse += 4; 3359 break; 3360 case D3DSIO_DEFI: 3361 imm->type = NINED3DSPTYPE_INT4; 3362 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD)); 3363 tx->parse += 4; 3364 break; 3365 case D3DSIO_DEFB: 3366 imm->type = NINED3DSPTYPE_BOOL; 3367 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD)); 3368 tx->parse += 1; 3369 break; 3370 default: 3371 assert(0); 3372 break; 3373 } 3374} 3375 3376static void 3377sm1_read_dst_param(struct shader_translator *tx, 3378 struct sm1_dst_param *dst, 3379 struct sm1_src_param *rel) 3380{ 3381 DWORD tok_dst, tok_rel = 0; 3382 3383 sm1_parse_get_param(tx, &tok_dst, &tok_rel); 3384 sm1_parse_dst_param(dst, tok_dst); 3385 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) { 3386 sm1_parse_src_param(rel, tok_rel); 3387 dst->rel = rel; 3388 } 3389} 3390 3391static void 3392sm1_read_src_param(struct shader_translator *tx, 3393 struct sm1_src_param *src, 3394 struct sm1_src_param *rel) 3395{ 3396 DWORD tok_src, tok_rel = 0; 3397 3398 sm1_parse_get_param(tx, &tok_src, &tok_rel); 3399 sm1_parse_src_param(src, tok_src); 3400 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) { 3401 assert(rel); 3402 sm1_parse_src_param(rel, tok_rel); 3403 src->rel = rel; 3404 } 3405} 3406 3407static void 3408sm1_read_semantic(struct shader_translator *tx, 3409 struct sm1_semantic *sem) 3410{ 3411 const DWORD tok_usg = TOKEN_NEXT(tx); 3412 const DWORD tok_dst = TOKEN_NEXT(tx); 3413 3414 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT; 3415 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT; 3416 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT; 3417 3418 sm1_parse_dst_param(&sem->reg, tok_dst); 3419} 3420 3421static void 3422sm1_parse_instruction(struct shader_translator *tx) 3423{ 3424 struct sm1_instruction *insn = &tx->insn; 3425 HRESULT hr; 3426 DWORD tok; 3427 const struct sm1_op_info *info = NULL; 3428 unsigned i; 3429 3430 sm1_parse_comments(tx, TRUE); 3431 sm1_parse_get_skip(tx); 3432 3433 tok = TOKEN_NEXT(tx); 3434 3435 insn->opcode = tok & D3DSI_OPCODE_MASK; 3436 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT; 3437 insn->coissue = !!(tok & D3DSI_COISSUE); 3438 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED); 3439 3440 if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) { 3441 int k = tx->op_info_map[insn->opcode]; 3442 if (k >= 0) { 3443 assert(k < ARRAY_SIZE(inst_table)); 3444 info = &inst_table[k]; 3445 } 3446 } else { 3447 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase; 3448 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment; 3449 } 3450 if (!info) { 3451 DBG("illegal or unhandled opcode: %08x\n", insn->opcode); 3452 TOKEN_JUMP(tx); 3453 return; 3454 } 3455 insn->info = info; 3456 insn->ndst = info->ndst; 3457 insn->nsrc = info->nsrc; 3458 3459 /* check version */ 3460 { 3461 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min; 3462 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max; 3463 unsigned ver = (tx->version.major << 8) | tx->version.minor; 3464 if (ver < min || ver > max) { 3465 DBG("opcode not supported in this shader version: %x <= %x <= %x\n", 3466 min, ver, max); 3467 return; 3468 } 3469 } 3470 3471 for (i = 0; i < insn->ndst; ++i) 3472 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]); 3473 if (insn->predicated) 3474 sm1_read_src_param(tx, &insn->pred, NULL); 3475 for (i = 0; i < insn->nsrc; ++i) 3476 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]); 3477 3478 /* parse here so we can dump them before processing */ 3479 if (insn->opcode == D3DSIO_DEF || 3480 insn->opcode == D3DSIO_DEFI || 3481 insn->opcode == D3DSIO_DEFB) 3482 sm1_parse_immediate(tx, &tx->insn.src[0]); 3483 3484 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth); 3485 sm1_instruction_check(insn); 3486 3487 if (insn->predicated) { 3488 tx->predicated_activated = true; 3489 if (ureg_dst_is_undef(tx->regs.predicate_tmp)) { 3490 tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg); 3491 tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg); 3492 } 3493 } 3494 3495 if (info->handler) 3496 hr = info->handler(tx); 3497 else 3498 hr = NineTranslateInstruction_Generic(tx); 3499 tx_apply_dst0_modifiers(tx); 3500 3501 if (insn->predicated) { 3502 tx->predicated_activated = false; 3503 /* TODO: predicate might be allowed on outputs, 3504 * which cannot be src. Workaround it. */ 3505 ureg_CMP(tx->ureg, tx->regs.predicate_dst, 3506 ureg_negate(tx_src_param(tx, &insn->pred)), 3507 ureg_src(tx->regs.predicate_tmp), 3508 ureg_src(tx->regs.predicate_dst)); 3509 } 3510 3511 if (hr != D3D_OK) 3512 tx->failure = TRUE; 3513 tx->num_scratch = 0; /* reset */ 3514 3515 TOKEN_JUMP(tx); 3516} 3517 3518#define GET_CAP(n) screen->get_param( \ 3519 screen, PIPE_CAP_##n) 3520#define GET_SHADER_CAP(n) screen->get_shader_param( \ 3521 screen, info->type, PIPE_SHADER_CAP_##n) 3522 3523static HRESULT 3524tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info) 3525{ 3526 unsigned i; 3527 3528 memset(tx, 0, sizeof(*tx)); 3529 3530 tx->info = info; 3531 3532 tx->byte_code = info->byte_code; 3533 tx->parse = info->byte_code; 3534 3535 for (i = 0; i < ARRAY_SIZE(info->input_map); ++i) 3536 info->input_map[i] = NINE_DECLUSAGE_NONE; 3537 info->num_inputs = 0; 3538 3539 info->position_t = FALSE; 3540 info->point_size = FALSE; 3541 3542 memset(tx->slots_used, 0, sizeof(tx->slots_used)); 3543 memset(info->int_slots_used, 0, sizeof(info->int_slots_used)); 3544 memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used)); 3545 3546 tx->info->const_float_slots = 0; 3547 tx->info->const_int_slots = 0; 3548 tx->info->const_bool_slots = 0; 3549 3550 info->sampler_mask = 0x0; 3551 info->rt_mask = 0x0; 3552 3553 info->lconstf.data = NULL; 3554 info->lconstf.ranges = NULL; 3555 3556 info->bumpenvmat_needed = 0; 3557 3558 for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) { 3559 tx->regs.rL[i] = ureg_dst_undef(); 3560 } 3561 tx->regs.address = ureg_dst_undef(); 3562 tx->regs.a0 = ureg_dst_undef(); 3563 tx->regs.p = ureg_dst_undef(); 3564 tx->regs.oDepth = ureg_dst_undef(); 3565 tx->regs.vPos = ureg_src_undef(); 3566 tx->regs.vFace = ureg_src_undef(); 3567 for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i) 3568 tx->regs.o[i] = ureg_dst_undef(); 3569 for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i) 3570 tx->regs.oCol[i] = ureg_dst_undef(); 3571 for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i) 3572 tx->regs.vC[i] = ureg_src_undef(); 3573 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i) 3574 tx->regs.vT[i] = ureg_src_undef(); 3575 3576 sm1_read_version(tx); 3577 3578 info->version = (tx->version.major << 4) | tx->version.minor; 3579 3580 tx->num_outputs = 0; 3581 3582 create_op_info_map(tx); 3583 3584 tx->ureg = ureg_create(info->type); 3585 if (!tx->ureg) { 3586 return E_OUTOFMEMORY; 3587 } 3588 3589 tx->native_integers = GET_SHADER_CAP(INTEGERS); 3590 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES); 3591 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD); 3592 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 3593 tx->texcoord_sn = tx->want_texcoord ? 3594 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC; 3595 tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL); 3596 tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL); 3597 3598 if (IS_VS) { 3599 tx->num_constf_allowed = NINE_MAX_CONST_F; 3600 } else if (tx->version.major < 2) {/* IS_PS v1 */ 3601 tx->num_constf_allowed = 8; 3602 } else if (tx->version.major == 2) {/* IS_PS v2 */ 3603 tx->num_constf_allowed = 32; 3604 } else {/* IS_PS v3 */ 3605 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3; 3606 } 3607 3608 if (tx->version.major < 2) { 3609 tx->num_consti_allowed = 0; 3610 tx->num_constb_allowed = 0; 3611 } else { 3612 tx->num_consti_allowed = NINE_MAX_CONST_I; 3613 tx->num_constb_allowed = NINE_MAX_CONST_B; 3614 } 3615 3616 if (info->swvp_on && tx->version.major >= 2) { 3617 tx->num_constf_allowed = 8192; 3618 tx->num_consti_allowed = 2048; 3619 tx->num_constb_allowed = 2048; 3620 } 3621 3622 /* VS must always write position. Declare it here to make it the 1st output. 3623 * (Some drivers like nv50 are buggy and rely on that.) 3624 */ 3625 if (IS_VS) { 3626 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0); 3627 } else { 3628 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT); 3629 if (!tx->shift_wpos) 3630 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 3631 } 3632 3633 tx->mul_zero_wins = GET_CAP(TGSI_MUL_ZERO_WINS); 3634 if (tx->mul_zero_wins) 3635 ureg_property(tx->ureg, TGSI_PROPERTY_MUL_ZERO_WINS, 1); 3636 3637 /* Add additional definition of constants */ 3638 if (info->add_constants_defs.c_combination) { 3639 unsigned i; 3640 3641 assert(info->add_constants_defs.int_const_added); 3642 assert(info->add_constants_defs.bool_const_added); 3643 /* We only add constants that are used by the shader 3644 * and that are not defined in the shader */ 3645 for (i = 0; i < NINE_MAX_CONST_I; ++i) { 3646 if ((*info->add_constants_defs.int_const_added)[i]) { 3647 DBG("Defining const i%i : { %i %i %i %i }\n", i, 3648 info->add_constants_defs.c_combination->const_i[i][0], 3649 info->add_constants_defs.c_combination->const_i[i][1], 3650 info->add_constants_defs.c_combination->const_i[i][2], 3651 info->add_constants_defs.c_combination->const_i[i][3]); 3652 tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]); 3653 } 3654 } 3655 for (i = 0; i < NINE_MAX_CONST_B; ++i) { 3656 if ((*info->add_constants_defs.bool_const_added)[i]) { 3657 DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0)); 3658 tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]); 3659 } 3660 } 3661 } 3662 return D3D_OK; 3663} 3664 3665static void 3666tx_dtor(struct shader_translator *tx) 3667{ 3668 if (tx->slot_map) 3669 FREE(tx->slot_map); 3670 if (tx->num_inst_labels) 3671 FREE(tx->inst_labels); 3672 FREE(tx->lconstf); 3673 FREE(tx->regs.r); 3674 FREE(tx); 3675} 3676 3677/* CONST[0].xyz = width/2, -height/2, zmax-zmin 3678 * CONST[1].xyz = x+width/2, y+height/2, zmin */ 3679static void 3680shader_add_vs_viewport_transform(struct shader_translator *tx) 3681{ 3682 struct ureg_program *ureg = tx->ureg; 3683 struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0); 3684 struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1); 3685 /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/ 3686 3687 c0 = ureg_src_dimension(c0, 4); 3688 c1 = ureg_src_dimension(c1, 4); 3689 /* TODO: find out when we need to apply the viewport transformation or not. 3690 * Likely will be XYZ vs XYZRHW in vdecl_out 3691 * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0); 3692 * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1); 3693 */ 3694 ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos)); 3695} 3696 3697static void 3698shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col) 3699{ 3700 struct ureg_program *ureg = tx->ureg; 3701 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); 3702 struct ureg_src fog_end, fog_coeff, fog_density, fog_params; 3703 struct ureg_src fog_vs, fog_color; 3704 struct ureg_dst fog_factor, depth; 3705 3706 if (!tx->info->fog_enable) { 3707 ureg_MOV(ureg, oCol0, src_col); 3708 return; 3709 } 3710 3711 if (tx->info->fog_mode != D3DFOG_NONE) { 3712 depth = tx_scratch_scalar(tx); 3713 /* Depth used for fog is perspective interpolated */ 3714 ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W)); 3715 ureg_MUL(ureg, depth, ureg_src(depth), ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z)); 3716 } 3717 3718 fog_color = nine_float_constant_src(tx, 32); 3719 fog_params = nine_float_constant_src(tx, 33); 3720 fog_factor = tx_scratch_scalar(tx); 3721 3722 if (tx->info->fog_mode == D3DFOG_LINEAR) { 3723 fog_end = NINE_APPLY_SWIZZLE(fog_params, X); 3724 fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y); 3725 ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth))); 3726 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff); 3727 } else if (tx->info->fog_mode == D3DFOG_EXP) { 3728 fog_density = NINE_APPLY_SWIZZLE(fog_params, X); 3729 ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density); 3730 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f)); 3731 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor)); 3732 } else if (tx->info->fog_mode == D3DFOG_EXP2) { 3733 fog_density = NINE_APPLY_SWIZZLE(fog_params, X); 3734 ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density); 3735 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor)); 3736 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f)); 3737 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor)); 3738 } else { 3739 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16, 3740 TGSI_INTERPOLATE_PERSPECTIVE), 3741 TGSI_SWIZZLE_X); 3742 ureg_MOV(ureg, fog_factor, fog_vs); 3743 } 3744 3745 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ), 3746 tx_src_scalar(fog_factor), src_col, fog_color); 3747 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col); 3748} 3749 3750static void parse_shader(struct shader_translator *tx) 3751{ 3752 struct nine_shader_info *info = tx->info; 3753 3754 while (!sm1_parse_eof(tx) && !tx->failure) 3755 sm1_parse_instruction(tx); 3756 tx->parse++; /* for byte_size */ 3757 3758 if (tx->failure) 3759 return; 3760 3761 if (IS_PS && tx->version.major < 3) { 3762 if (tx->version.major < 2) { 3763 assert(tx->num_temp); /* there must be color output */ 3764 info->rt_mask |= 0x1; 3765 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0])); 3766 } else { 3767 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0])); 3768 } 3769 } 3770 3771 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) { 3772 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16); 3773 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f)); 3774 } 3775 3776 if (info->position_t) 3777 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); 3778 3779 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) { 3780 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0); 3781 ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min)); 3782 ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max)); 3783 info->point_size = TRUE; 3784 } 3785 3786 if (info->process_vertices) 3787 shader_add_vs_viewport_transform(tx); 3788 3789 ureg_END(tx->ureg); 3790} 3791 3792HRESULT 3793nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe) 3794{ 3795 struct shader_translator *tx; 3796 HRESULT hr = D3D_OK; 3797 const unsigned processor = info->type; 3798 struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen; 3799 unsigned *const_ranges = NULL; 3800 3801 user_assert(processor != ~0, D3DERR_INVALIDCALL); 3802 3803 tx = MALLOC_STRUCT(shader_translator); 3804 if (!tx) 3805 return E_OUTOFMEMORY; 3806 3807 if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) { 3808 hr = E_OUTOFMEMORY; 3809 goto out; 3810 } 3811 3812 assert(IS_VS || !info->swvp_on); 3813 3814 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) { 3815 hr = D3DERR_INVALIDCALL; 3816 DBG("Unsupported shader version: %u.%u !\n", 3817 tx->version.major, tx->version.minor); 3818 goto out; 3819 } 3820 if (tx->processor != processor) { 3821 hr = D3DERR_INVALIDCALL; 3822 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor); 3823 goto out; 3824 } 3825 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS", 3826 tx->version.major, tx->version.minor); 3827 3828 parse_shader(tx); 3829 3830 if (tx->failure) { 3831 /* For VS shaders, we print the warning later, 3832 * we first try with swvp. */ 3833 if (IS_PS) 3834 ERR("Encountered buggy shader\n"); 3835 ureg_destroy(tx->ureg); 3836 hr = D3DERR_INVALIDCALL; 3837 goto out; 3838 } 3839 3840 /* Recompile after compacting constant slots if possible */ 3841 if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0) { 3842 unsigned *slot_map; 3843 unsigned c; 3844 int i, j, num_ranges, prev; 3845 3846 DBG("Recompiling shader for constant compaction\n"); 3847 ureg_destroy(tx->ureg); 3848 3849 if (tx->num_inst_labels) 3850 FREE(tx->inst_labels); 3851 FREE(tx->lconstf); 3852 FREE(tx->regs.r); 3853 3854 num_ranges = 0; 3855 prev = -2; 3856 for (i = 0; i < NINE_MAX_CONST_ALL; i++) { 3857 if (tx->slots_used[i]) { 3858 if (prev != i - 1) 3859 num_ranges++; 3860 prev = i; 3861 } 3862 } 3863 slot_map = MALLOC(NINE_MAX_CONST_ALL * sizeof(unsigned)); 3864 const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */ 3865 if (!slot_map || !const_ranges) { 3866 hr = E_OUTOFMEMORY; 3867 goto out; 3868 } 3869 c = 0; 3870 j = -1; 3871 prev = -2; 3872 for (i = 0; i < NINE_MAX_CONST_ALL; i++) { 3873 if (tx->slots_used[i]) { 3874 if (prev != i - 1) 3875 j++; 3876 /* Initialize first slot of the range */ 3877 if (!const_ranges[2*j+1]) 3878 const_ranges[2*j] = i; 3879 const_ranges[2*j+1]++; 3880 prev = i; 3881 slot_map[i] = c++; 3882 } 3883 } 3884 3885 if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) { 3886 hr = E_OUTOFMEMORY; 3887 goto out; 3888 } 3889 tx->slot_map = slot_map; 3890 parse_shader(tx); 3891 assert(!tx->failure); 3892#if !defined(NDEBUG) 3893 i = 0; 3894 j = 0; 3895 while (const_ranges[i*2+1] != 0) { 3896 j += const_ranges[i*2+1]; 3897 i++; 3898 } 3899 assert(j == tx->num_slots); 3900#endif 3901 } 3902 3903 /* record local constants */ 3904 if (tx->num_lconstf && tx->indirect_const_access) { 3905 struct nine_range *ranges; 3906 float *data; 3907 int *indices; 3908 unsigned i, k, n; 3909 3910 hr = E_OUTOFMEMORY; 3911 3912 data = MALLOC(tx->num_lconstf * 4 * sizeof(float)); 3913 if (!data) 3914 goto out; 3915 info->lconstf.data = data; 3916 3917 indices = MALLOC(tx->num_lconstf * sizeof(indices[0])); 3918 if (!indices) 3919 goto out; 3920 3921 /* lazy sort, num_lconstf should be small */ 3922 for (n = 0; n < tx->num_lconstf; ++n) { 3923 for (k = 0, i = 0; i < tx->num_lconstf; ++i) { 3924 if (tx->lconstf[i].idx < tx->lconstf[k].idx) 3925 k = i; 3926 } 3927 indices[n] = tx->lconstf[k].idx; 3928 memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float)); 3929 tx->lconstf[k].idx = INT_MAX; 3930 } 3931 3932 /* count ranges */ 3933 for (n = 1, i = 1; i < tx->num_lconstf; ++i) 3934 if (indices[i] != indices[i - 1] + 1) 3935 ++n; 3936 ranges = MALLOC(n * sizeof(ranges[0])); 3937 if (!ranges) { 3938 FREE(indices); 3939 goto out; 3940 } 3941 info->lconstf.ranges = ranges; 3942 3943 k = 0; 3944 ranges[k].bgn = indices[0]; 3945 for (i = 1; i < tx->num_lconstf; ++i) { 3946 if (indices[i] != indices[i - 1] + 1) { 3947 ranges[k].next = &ranges[k + 1]; 3948 ranges[k].end = indices[i - 1] + 1; 3949 ++k; 3950 ranges[k].bgn = indices[i]; 3951 } 3952 } 3953 ranges[k].end = indices[i - 1] + 1; 3954 ranges[k].next = NULL; 3955 assert(n == (k + 1)); 3956 3957 FREE(indices); 3958 hr = D3D_OK; 3959 } 3960 3961 /* r500 */ 3962 if (info->const_float_slots > device->max_vs_const_f && 3963 (info->const_int_slots || info->const_bool_slots) && 3964 !info->swvp_on) 3965 ERR("Overlapping constant slots. The shader is likely to be buggy\n"); 3966 3967 3968 if (tx->indirect_const_access) { /* vs only */ 3969 info->const_float_slots = device->max_vs_const_f; 3970 tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f); 3971 } 3972 3973 if (!info->swvp_on) { 3974 info->const_used_size = sizeof(float[4]) * tx->num_slots; 3975 if (tx->num_slots) 3976 ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0); 3977 } else { 3978 ureg_DECL_constant2D(tx->ureg, 0, 4095, 0); 3979 ureg_DECL_constant2D(tx->ureg, 0, 4095, 1); 3980 ureg_DECL_constant2D(tx->ureg, 0, 2047, 2); 3981 ureg_DECL_constant2D(tx->ureg, 0, 511, 3); 3982 } 3983 3984 if (info->process_vertices) 3985 ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */ 3986 3987 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) { 3988 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL); 3989 tgsi_dump(toks, 0); 3990 ureg_free_tokens(toks); 3991 } 3992 3993 if (info->process_vertices) { 3994 NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out, 3995 tx->output_info, 3996 tx->num_outputs, 3997 &(info->so)); 3998 info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so)); 3999 } else 4000 info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe); 4001 if (!info->cso) { 4002 hr = D3DERR_DRIVERINTERNALERROR; 4003 FREE(info->lconstf.data); 4004 FREE(info->lconstf.ranges); 4005 goto out; 4006 } 4007 4008 info->const_ranges = const_ranges; 4009 const_ranges = NULL; 4010 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD); 4011out: 4012 if (const_ranges) 4013 FREE(const_ranges); 4014 tx_dtor(tx); 4015 return hr; 4016} 4017