17ec681f3Smrg/* 27ec681f3Smrg * Copyright 2011 Joakim Sindholt <opensource@zhasha.com> 37ec681f3Smrg * Copyright 2013 Christoph Bumiller 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub 97ec681f3Smrg * license, and/or sell copies of the Software, and to permit persons to whom 107ec681f3Smrg * the Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 207ec681f3Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 217ec681f3Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 227ec681f3Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. */ 237ec681f3Smrg 247ec681f3Smrg#include "nine_shader.h" 257ec681f3Smrg 267ec681f3Smrg#include "device9.h" 277ec681f3Smrg#include "nine_debug.h" 287ec681f3Smrg#include "nine_state.h" 297ec681f3Smrg#include "vertexdeclaration9.h" 307ec681f3Smrg 317ec681f3Smrg#include "util/macros.h" 327ec681f3Smrg#include "util/u_memory.h" 337ec681f3Smrg#include "util/u_inlines.h" 347ec681f3Smrg#include "pipe/p_shader_tokens.h" 357ec681f3Smrg#include "tgsi/tgsi_ureg.h" 367ec681f3Smrg#include "tgsi/tgsi_dump.h" 377ec681f3Smrg#include "nir/tgsi_to_nir.h" 387ec681f3Smrg 397ec681f3Smrg#define DBG_CHANNEL DBG_SHADER 407ec681f3Smrg 417ec681f3Smrg#define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args) 427ec681f3Smrg 437ec681f3Smrg 447ec681f3Smrgstruct shader_translator; 457ec681f3Smrg 467ec681f3Smrgtypedef HRESULT (*translate_instruction_func)(struct shader_translator *); 477ec681f3Smrg 487ec681f3Smrgstatic inline const char *d3dsio_to_string(unsigned opcode); 497ec681f3Smrg 507ec681f3Smrg 517ec681f3Smrg#define NINED3D_SM1_VS 0xfffe 527ec681f3Smrg#define NINED3D_SM1_PS 0xffff 537ec681f3Smrg 547ec681f3Smrg#define NINE_MAX_COND_DEPTH 64 557ec681f3Smrg#define NINE_MAX_LOOP_DEPTH 64 567ec681f3Smrg 577ec681f3Smrg#define NINED3DSP_END 0x0000ffff 587ec681f3Smrg 597ec681f3Smrg#define NINED3DSPTYPE_FLOAT4 0 607ec681f3Smrg#define NINED3DSPTYPE_INT4 1 617ec681f3Smrg#define NINED3DSPTYPE_BOOL 2 627ec681f3Smrg 637ec681f3Smrg#define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1) 647ec681f3Smrg 657ec681f3Smrg#define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL 667ec681f3Smrg#define NINED3DSP_WRITEMASK_SHIFT 16 677ec681f3Smrg 687ec681f3Smrg#define NINED3DSHADER_INST_PREDICATED (1 << 28) 697ec681f3Smrg 707ec681f3Smrg#define NINED3DSHADER_REL_OP_GT 1 717ec681f3Smrg#define NINED3DSHADER_REL_OP_EQ 2 727ec681f3Smrg#define NINED3DSHADER_REL_OP_GE 3 737ec681f3Smrg#define NINED3DSHADER_REL_OP_LT 4 747ec681f3Smrg#define NINED3DSHADER_REL_OP_NE 5 757ec681f3Smrg#define NINED3DSHADER_REL_OP_LE 6 767ec681f3Smrg 777ec681f3Smrg#define NINED3DSIO_OPCODE_FLAGS_SHIFT 16 787ec681f3Smrg#define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT) 797ec681f3Smrg 807ec681f3Smrg#define NINED3DSI_TEXLD_PROJECT 0x1 817ec681f3Smrg#define NINED3DSI_TEXLD_BIAS 0x2 827ec681f3Smrg 837ec681f3Smrg#define NINED3DSP_WRITEMASK_0 0x1 847ec681f3Smrg#define NINED3DSP_WRITEMASK_1 0x2 857ec681f3Smrg#define NINED3DSP_WRITEMASK_2 0x4 867ec681f3Smrg#define NINED3DSP_WRITEMASK_3 0x8 877ec681f3Smrg#define NINED3DSP_WRITEMASK_ALL 0xf 887ec681f3Smrg 897ec681f3Smrg#define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6)) 907ec681f3Smrg 917ec681f3Smrg#define NINE_SWIZZLE4(x,y,z,w) \ 927ec681f3Smrg TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w 937ec681f3Smrg 947ec681f3Smrg#define NINE_APPLY_SWIZZLE(src, s) \ 957ec681f3Smrg ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s)) 967ec681f3Smrg 977ec681f3Smrg#define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT) 987ec681f3Smrg#define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT) 997ec681f3Smrg#define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT) 1007ec681f3Smrg 1017ec681f3Smrg/* 1027ec681f3Smrg * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4 1037ec681f3Smrg * BIAS <= PS 1.4 (x-0.5) 1047ec681f3Smrg * BIASNEG <= PS 1.4 (-(x-0.5)) 1057ec681f3Smrg * SIGN <= PS 1.4 (2(x-0.5)) 1067ec681f3Smrg * SIGNNEG <= PS 1.4 (-2(x-0.5)) 1077ec681f3Smrg * COMP <= PS 1.4 (1-x) 1087ec681f3Smrg * X2 = PS 1.4 (2x) 1097ec681f3Smrg * X2NEG = PS 1.4 (-2x) 1107ec681f3Smrg * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11 1117ec681f3Smrg * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11 1127ec681f3Smrg * ABS >= SM 3.0 (abs(x)) 1137ec681f3Smrg * ABSNEG >= SM 3.0 (-abs(x)) 1147ec681f3Smrg * NOT >= SM 2.0 pedication only 1157ec681f3Smrg */ 1167ec681f3Smrg#define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT) 1177ec681f3Smrg#define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT) 1187ec681f3Smrg#define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT) 1197ec681f3Smrg#define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT) 1207ec681f3Smrg#define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT) 1217ec681f3Smrg#define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT) 1227ec681f3Smrg#define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT) 1237ec681f3Smrg#define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT) 1247ec681f3Smrg#define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT) 1257ec681f3Smrg#define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT) 1267ec681f3Smrg#define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT) 1277ec681f3Smrg#define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT) 1287ec681f3Smrg#define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT) 1297ec681f3Smrg#define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT) 1307ec681f3Smrg 1317ec681f3Smrgstatic const char *sm1_mod_str[] = 1327ec681f3Smrg{ 1337ec681f3Smrg [NINED3DSPSM_NONE] = "", 1347ec681f3Smrg [NINED3DSPSM_NEG] = "-", 1357ec681f3Smrg [NINED3DSPSM_BIAS] = "bias", 1367ec681f3Smrg [NINED3DSPSM_BIASNEG] = "biasneg", 1377ec681f3Smrg [NINED3DSPSM_SIGN] = "sign", 1387ec681f3Smrg [NINED3DSPSM_SIGNNEG] = "signneg", 1397ec681f3Smrg [NINED3DSPSM_COMP] = "comp", 1407ec681f3Smrg [NINED3DSPSM_X2] = "x2", 1417ec681f3Smrg [NINED3DSPSM_X2NEG] = "x2neg", 1427ec681f3Smrg [NINED3DSPSM_DZ] = "dz", 1437ec681f3Smrg [NINED3DSPSM_DW] = "dw", 1447ec681f3Smrg [NINED3DSPSM_ABS] = "abs", 1457ec681f3Smrg [NINED3DSPSM_ABSNEG] = "-abs", 1467ec681f3Smrg [NINED3DSPSM_NOT] = "not" 1477ec681f3Smrg}; 1487ec681f3Smrg 1497ec681f3Smrgstatic void 1507ec681f3Smrgsm1_dump_writemask(BYTE mask) 1517ec681f3Smrg{ 1527ec681f3Smrg if (mask & 1) DUMP("x"); else DUMP("_"); 1537ec681f3Smrg if (mask & 2) DUMP("y"); else DUMP("_"); 1547ec681f3Smrg if (mask & 4) DUMP("z"); else DUMP("_"); 1557ec681f3Smrg if (mask & 8) DUMP("w"); else DUMP("_"); 1567ec681f3Smrg} 1577ec681f3Smrg 1587ec681f3Smrgstatic void 1597ec681f3Smrgsm1_dump_swizzle(BYTE s) 1607ec681f3Smrg{ 1617ec681f3Smrg char c[4] = { 'x', 'y', 'z', 'w' }; 1627ec681f3Smrg DUMP("%c%c%c%c", 1637ec681f3Smrg c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]); 1647ec681f3Smrg} 1657ec681f3Smrg 1667ec681f3Smrgstatic const char sm1_file_char[] = 1677ec681f3Smrg{ 1687ec681f3Smrg [D3DSPR_TEMP] = 'r', 1697ec681f3Smrg [D3DSPR_INPUT] = 'v', 1707ec681f3Smrg [D3DSPR_CONST] = 'c', 1717ec681f3Smrg [D3DSPR_ADDR] = 'A', 1727ec681f3Smrg [D3DSPR_RASTOUT] = 'R', 1737ec681f3Smrg [D3DSPR_ATTROUT] = 'D', 1747ec681f3Smrg [D3DSPR_OUTPUT] = 'o', 1757ec681f3Smrg [D3DSPR_CONSTINT] = 'I', 1767ec681f3Smrg [D3DSPR_COLOROUT] = 'C', 1777ec681f3Smrg [D3DSPR_DEPTHOUT] = 'D', 1787ec681f3Smrg [D3DSPR_SAMPLER] = 's', 1797ec681f3Smrg [D3DSPR_CONST2] = 'c', 1807ec681f3Smrg [D3DSPR_CONST3] = 'c', 1817ec681f3Smrg [D3DSPR_CONST4] = 'c', 1827ec681f3Smrg [D3DSPR_CONSTBOOL] = 'B', 1837ec681f3Smrg [D3DSPR_LOOP] = 'L', 1847ec681f3Smrg [D3DSPR_TEMPFLOAT16] = 'h', 1857ec681f3Smrg [D3DSPR_MISCTYPE] = 'M', 1867ec681f3Smrg [D3DSPR_LABEL] = 'X', 1877ec681f3Smrg [D3DSPR_PREDICATE] = 'p' 1887ec681f3Smrg}; 1897ec681f3Smrg 1907ec681f3Smrgstatic void 1917ec681f3Smrgsm1_dump_reg(BYTE file, INT index) 1927ec681f3Smrg{ 1937ec681f3Smrg switch (file) { 1947ec681f3Smrg case D3DSPR_LOOP: 1957ec681f3Smrg DUMP("aL"); 1967ec681f3Smrg break; 1977ec681f3Smrg case D3DSPR_COLOROUT: 1987ec681f3Smrg DUMP("oC%i", index); 1997ec681f3Smrg break; 2007ec681f3Smrg case D3DSPR_DEPTHOUT: 2017ec681f3Smrg DUMP("oDepth"); 2027ec681f3Smrg break; 2037ec681f3Smrg case D3DSPR_RASTOUT: 2047ec681f3Smrg DUMP("oRast%i", index); 2057ec681f3Smrg break; 2067ec681f3Smrg case D3DSPR_CONSTINT: 2077ec681f3Smrg DUMP("iconst[%i]", index); 2087ec681f3Smrg break; 2097ec681f3Smrg case D3DSPR_CONSTBOOL: 2107ec681f3Smrg DUMP("bconst[%i]", index); 2117ec681f3Smrg break; 2127ec681f3Smrg default: 2137ec681f3Smrg DUMP("%c%i", sm1_file_char[file], index); 2147ec681f3Smrg break; 2157ec681f3Smrg } 2167ec681f3Smrg} 2177ec681f3Smrg 2187ec681f3Smrgstruct sm1_src_param 2197ec681f3Smrg{ 2207ec681f3Smrg INT idx; 2217ec681f3Smrg struct sm1_src_param *rel; 2227ec681f3Smrg BYTE file; 2237ec681f3Smrg BYTE swizzle; 2247ec681f3Smrg BYTE mod; 2257ec681f3Smrg BYTE type; 2267ec681f3Smrg union { 2277ec681f3Smrg DWORD d[4]; 2287ec681f3Smrg float f[4]; 2297ec681f3Smrg int i[4]; 2307ec681f3Smrg BOOL b; 2317ec681f3Smrg } imm; 2327ec681f3Smrg}; 2337ec681f3Smrgstatic void 2347ec681f3Smrgsm1_parse_immediate(struct shader_translator *, struct sm1_src_param *); 2357ec681f3Smrg 2367ec681f3Smrgstruct sm1_dst_param 2377ec681f3Smrg{ 2387ec681f3Smrg INT idx; 2397ec681f3Smrg struct sm1_src_param *rel; 2407ec681f3Smrg BYTE file; 2417ec681f3Smrg BYTE mask; 2427ec681f3Smrg BYTE mod; 2437ec681f3Smrg int8_t shift; /* sint4 */ 2447ec681f3Smrg BYTE type; 2457ec681f3Smrg}; 2467ec681f3Smrg 2477ec681f3Smrgstatic inline void 2487ec681f3Smrgassert_replicate_swizzle(const struct ureg_src *reg) 2497ec681f3Smrg{ 2507ec681f3Smrg assert(reg->SwizzleY == reg->SwizzleX && 2517ec681f3Smrg reg->SwizzleZ == reg->SwizzleX && 2527ec681f3Smrg reg->SwizzleW == reg->SwizzleX); 2537ec681f3Smrg} 2547ec681f3Smrg 2557ec681f3Smrgstatic void 2567ec681f3Smrgsm1_dump_immediate(const struct sm1_src_param *param) 2577ec681f3Smrg{ 2587ec681f3Smrg switch (param->type) { 2597ec681f3Smrg case NINED3DSPTYPE_FLOAT4: 2607ec681f3Smrg DUMP("{ %f %f %f %f }", 2617ec681f3Smrg param->imm.f[0], param->imm.f[1], 2627ec681f3Smrg param->imm.f[2], param->imm.f[3]); 2637ec681f3Smrg break; 2647ec681f3Smrg case NINED3DSPTYPE_INT4: 2657ec681f3Smrg DUMP("{ %i %i %i %i }", 2667ec681f3Smrg param->imm.i[0], param->imm.i[1], 2677ec681f3Smrg param->imm.i[2], param->imm.i[3]); 2687ec681f3Smrg break; 2697ec681f3Smrg case NINED3DSPTYPE_BOOL: 2707ec681f3Smrg DUMP("%s", param->imm.b ? "TRUE" : "FALSE"); 2717ec681f3Smrg break; 2727ec681f3Smrg default: 2737ec681f3Smrg assert(0); 2747ec681f3Smrg break; 2757ec681f3Smrg } 2767ec681f3Smrg} 2777ec681f3Smrg 2787ec681f3Smrgstatic void 2797ec681f3Smrgsm1_dump_src_param(const struct sm1_src_param *param) 2807ec681f3Smrg{ 2817ec681f3Smrg if (param->file == NINED3DSPR_IMMEDIATE) { 2827ec681f3Smrg assert(!param->mod && 2837ec681f3Smrg !param->rel && 2847ec681f3Smrg param->swizzle == NINED3DSP_NOSWIZZLE); 2857ec681f3Smrg sm1_dump_immediate(param); 2867ec681f3Smrg return; 2877ec681f3Smrg } 2887ec681f3Smrg 2897ec681f3Smrg if (param->mod) 2907ec681f3Smrg DUMP("%s(", sm1_mod_str[param->mod]); 2917ec681f3Smrg if (param->rel) { 2927ec681f3Smrg DUMP("%c[", sm1_file_char[param->file]); 2937ec681f3Smrg sm1_dump_src_param(param->rel); 2947ec681f3Smrg DUMP("+%i]", param->idx); 2957ec681f3Smrg } else { 2967ec681f3Smrg sm1_dump_reg(param->file, param->idx); 2977ec681f3Smrg } 2987ec681f3Smrg if (param->mod) 2997ec681f3Smrg DUMP(")"); 3007ec681f3Smrg if (param->swizzle != NINED3DSP_NOSWIZZLE) { 3017ec681f3Smrg DUMP("."); 3027ec681f3Smrg sm1_dump_swizzle(param->swizzle); 3037ec681f3Smrg } 3047ec681f3Smrg} 3057ec681f3Smrg 3067ec681f3Smrgstatic void 3077ec681f3Smrgsm1_dump_dst_param(const struct sm1_dst_param *param) 3087ec681f3Smrg{ 3097ec681f3Smrg if (param->mod & NINED3DSPDM_SATURATE) 3107ec681f3Smrg DUMP("sat "); 3117ec681f3Smrg if (param->mod & NINED3DSPDM_PARTIALP) 3127ec681f3Smrg DUMP("pp "); 3137ec681f3Smrg if (param->mod & NINED3DSPDM_CENTROID) 3147ec681f3Smrg DUMP("centroid "); 3157ec681f3Smrg if (param->shift < 0) 3167ec681f3Smrg DUMP("/%u ", 1 << -param->shift); 3177ec681f3Smrg if (param->shift > 0) 3187ec681f3Smrg DUMP("*%u ", 1 << param->shift); 3197ec681f3Smrg 3207ec681f3Smrg if (param->rel) { 3217ec681f3Smrg DUMP("%c[", sm1_file_char[param->file]); 3227ec681f3Smrg sm1_dump_src_param(param->rel); 3237ec681f3Smrg DUMP("+%i]", param->idx); 3247ec681f3Smrg } else { 3257ec681f3Smrg sm1_dump_reg(param->file, param->idx); 3267ec681f3Smrg } 3277ec681f3Smrg if (param->mask != NINED3DSP_WRITEMASK_ALL) { 3287ec681f3Smrg DUMP("."); 3297ec681f3Smrg sm1_dump_writemask(param->mask); 3307ec681f3Smrg } 3317ec681f3Smrg} 3327ec681f3Smrg 3337ec681f3Smrgstruct sm1_semantic 3347ec681f3Smrg{ 3357ec681f3Smrg struct sm1_dst_param reg; 3367ec681f3Smrg BYTE sampler_type; 3377ec681f3Smrg D3DDECLUSAGE usage; 3387ec681f3Smrg BYTE usage_idx; 3397ec681f3Smrg}; 3407ec681f3Smrg 3417ec681f3Smrgstruct sm1_op_info 3427ec681f3Smrg{ 3437ec681f3Smrg /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter 3447ec681f3Smrg * should be ignored completely */ 3457ec681f3Smrg unsigned sio; 3467ec681f3Smrg unsigned opcode; /* TGSI_OPCODE_x */ 3477ec681f3Smrg 3487ec681f3Smrg /* versions are still set even handler is set */ 3497ec681f3Smrg struct { 3507ec681f3Smrg unsigned min; 3517ec681f3Smrg unsigned max; 3527ec681f3Smrg } vert_version, frag_version; 3537ec681f3Smrg 3547ec681f3Smrg /* number of regs parsed outside of special handler */ 3557ec681f3Smrg unsigned ndst; 3567ec681f3Smrg unsigned nsrc; 3577ec681f3Smrg 3587ec681f3Smrg /* some instructions don't map perfectly, so use a special handler */ 3597ec681f3Smrg translate_instruction_func handler; 3607ec681f3Smrg}; 3617ec681f3Smrg 3627ec681f3Smrgstruct sm1_instruction 3637ec681f3Smrg{ 3647ec681f3Smrg D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; 3657ec681f3Smrg BYTE flags; 3667ec681f3Smrg BOOL coissue; 3677ec681f3Smrg BOOL predicated; 3687ec681f3Smrg BYTE ndst; 3697ec681f3Smrg BYTE nsrc; 3707ec681f3Smrg struct sm1_src_param src[4]; 3717ec681f3Smrg struct sm1_src_param src_rel[4]; 3727ec681f3Smrg struct sm1_src_param pred; 3737ec681f3Smrg struct sm1_src_param dst_rel[1]; 3747ec681f3Smrg struct sm1_dst_param dst[1]; 3757ec681f3Smrg 3767ec681f3Smrg const struct sm1_op_info *info; 3777ec681f3Smrg}; 3787ec681f3Smrg 3797ec681f3Smrgstatic void 3807ec681f3Smrgsm1_dump_instruction(struct sm1_instruction *insn, unsigned indent) 3817ec681f3Smrg{ 3827ec681f3Smrg unsigned i; 3837ec681f3Smrg 3847ec681f3Smrg /* no info stored for these: */ 3857ec681f3Smrg if (insn->opcode == D3DSIO_DCL) 3867ec681f3Smrg return; 3877ec681f3Smrg for (i = 0; i < indent; ++i) 3887ec681f3Smrg DUMP(" "); 3897ec681f3Smrg 3907ec681f3Smrg if (insn->predicated) { 3917ec681f3Smrg DUMP("@"); 3927ec681f3Smrg sm1_dump_src_param(&insn->pred); 3937ec681f3Smrg DUMP(" "); 3947ec681f3Smrg } 3957ec681f3Smrg DUMP("%s", d3dsio_to_string(insn->opcode)); 3967ec681f3Smrg if (insn->flags) { 3977ec681f3Smrg switch (insn->opcode) { 3987ec681f3Smrg case D3DSIO_TEX: 3997ec681f3Smrg DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b"); 4007ec681f3Smrg break; 4017ec681f3Smrg default: 4027ec681f3Smrg DUMP("_%x", insn->flags); 4037ec681f3Smrg break; 4047ec681f3Smrg } 4057ec681f3Smrg } 4067ec681f3Smrg if (insn->coissue) 4077ec681f3Smrg DUMP("_co"); 4087ec681f3Smrg DUMP(" "); 4097ec681f3Smrg 4107ec681f3Smrg for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) { 4117ec681f3Smrg sm1_dump_dst_param(&insn->dst[i]); 4127ec681f3Smrg DUMP(" "); 4137ec681f3Smrg } 4147ec681f3Smrg 4157ec681f3Smrg for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) { 4167ec681f3Smrg sm1_dump_src_param(&insn->src[i]); 4177ec681f3Smrg DUMP(" "); 4187ec681f3Smrg } 4197ec681f3Smrg if (insn->opcode == D3DSIO_DEF || 4207ec681f3Smrg insn->opcode == D3DSIO_DEFI || 4217ec681f3Smrg insn->opcode == D3DSIO_DEFB) 4227ec681f3Smrg sm1_dump_immediate(&insn->src[0]); 4237ec681f3Smrg 4247ec681f3Smrg DUMP("\n"); 4257ec681f3Smrg} 4267ec681f3Smrg 4277ec681f3Smrgstruct sm1_local_const 4287ec681f3Smrg{ 4297ec681f3Smrg INT idx; 4307ec681f3Smrg struct ureg_src reg; 4317ec681f3Smrg float f[4]; /* for indirect addressing of float constants */ 4327ec681f3Smrg}; 4337ec681f3Smrg 4347ec681f3Smrgstruct shader_translator 4357ec681f3Smrg{ 4367ec681f3Smrg const DWORD *byte_code; 4377ec681f3Smrg const DWORD *parse; 4387ec681f3Smrg const DWORD *parse_next; 4397ec681f3Smrg 4407ec681f3Smrg struct ureg_program *ureg; 4417ec681f3Smrg 4427ec681f3Smrg /* shader version */ 4437ec681f3Smrg struct { 4447ec681f3Smrg BYTE major; 4457ec681f3Smrg BYTE minor; 4467ec681f3Smrg } version; 4477ec681f3Smrg unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */ 4487ec681f3Smrg unsigned num_constf_allowed; 4497ec681f3Smrg unsigned num_consti_allowed; 4507ec681f3Smrg unsigned num_constb_allowed; 4517ec681f3Smrg 4527ec681f3Smrg boolean native_integers; 4537ec681f3Smrg boolean inline_subroutines; 4547ec681f3Smrg boolean want_texcoord; 4557ec681f3Smrg boolean shift_wpos; 4567ec681f3Smrg boolean wpos_is_sysval; 4577ec681f3Smrg boolean face_is_sysval_integer; 4587ec681f3Smrg boolean mul_zero_wins; 4597ec681f3Smrg unsigned texcoord_sn; 4607ec681f3Smrg 4617ec681f3Smrg struct sm1_instruction insn; /* current instruction */ 4627ec681f3Smrg 4637ec681f3Smrg struct { 4647ec681f3Smrg struct ureg_dst *r; 4657ec681f3Smrg struct ureg_dst oPos; 4667ec681f3Smrg struct ureg_dst oPos_out; /* the real output when doing streamout */ 4677ec681f3Smrg struct ureg_dst oFog; 4687ec681f3Smrg struct ureg_dst oPts; 4697ec681f3Smrg struct ureg_dst oCol[4]; 4707ec681f3Smrg struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS]; 4717ec681f3Smrg struct ureg_dst oDepth; 4727ec681f3Smrg struct ureg_src v[PIPE_MAX_SHADER_INPUTS]; 4737ec681f3Smrg struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */ 4747ec681f3Smrg struct ureg_src vPos; 4757ec681f3Smrg struct ureg_src vFace; 4767ec681f3Smrg struct ureg_src s; 4777ec681f3Smrg struct ureg_dst p; 4787ec681f3Smrg struct ureg_dst address; 4797ec681f3Smrg struct ureg_dst a0; 4807ec681f3Smrg struct ureg_dst predicate; 4817ec681f3Smrg struct ureg_dst predicate_tmp; 4827ec681f3Smrg struct ureg_dst predicate_dst; 4837ec681f3Smrg struct ureg_dst tS[8]; /* texture stage registers */ 4847ec681f3Smrg struct ureg_dst tdst; /* scratch dst if we need extra modifiers */ 4857ec681f3Smrg struct ureg_dst t[8]; /* scratch TEMPs */ 4867ec681f3Smrg struct ureg_src vC[2]; /* PS color in */ 4877ec681f3Smrg struct ureg_src vT[8]; /* PS texcoord in */ 4887ec681f3Smrg struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */ 4897ec681f3Smrg } regs; 4907ec681f3Smrg unsigned num_temp; /* ARRAY_SIZE(regs.r) */ 4917ec681f3Smrg unsigned num_scratch; 4927ec681f3Smrg unsigned loop_depth; 4937ec681f3Smrg unsigned loop_depth_max; 4947ec681f3Smrg unsigned cond_depth; 4957ec681f3Smrg unsigned loop_labels[NINE_MAX_LOOP_DEPTH]; 4967ec681f3Smrg unsigned cond_labels[NINE_MAX_COND_DEPTH]; 4977ec681f3Smrg boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */ 4987ec681f3Smrg boolean predicated_activated; 4997ec681f3Smrg 5007ec681f3Smrg unsigned *inst_labels; /* LABEL op */ 5017ec681f3Smrg unsigned num_inst_labels; 5027ec681f3Smrg 5037ec681f3Smrg unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */ 5047ec681f3Smrg 5057ec681f3Smrg struct sm1_local_const *lconstf; 5067ec681f3Smrg unsigned num_lconstf; 5077ec681f3Smrg struct sm1_local_const *lconsti; 5087ec681f3Smrg unsigned num_lconsti; 5097ec681f3Smrg struct sm1_local_const *lconstb; 5107ec681f3Smrg unsigned num_lconstb; 5117ec681f3Smrg 5127ec681f3Smrg boolean slots_used[NINE_MAX_CONST_ALL]; 5137ec681f3Smrg unsigned *slot_map; 5147ec681f3Smrg unsigned num_slots; 5157ec681f3Smrg 5167ec681f3Smrg boolean indirect_const_access; 5177ec681f3Smrg boolean failure; 5187ec681f3Smrg 5197ec681f3Smrg struct nine_vs_output_info output_info[16]; 5207ec681f3Smrg int num_outputs; 5217ec681f3Smrg 5227ec681f3Smrg struct nine_shader_info *info; 5237ec681f3Smrg 5247ec681f3Smrg int16_t op_info_map[D3DSIO_BREAKP + 1]; 5257ec681f3Smrg}; 5267ec681f3Smrg 5277ec681f3Smrg#define IS_VS (tx->processor == PIPE_SHADER_VERTEX) 5287ec681f3Smrg#define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT) 5297ec681f3Smrg 5307ec681f3Smrg#define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;} 5317ec681f3Smrg 5327ec681f3Smrgstatic void 5337ec681f3Smrgsm1_read_semantic(struct shader_translator *, struct sm1_semantic *); 5347ec681f3Smrg 5357ec681f3Smrgstatic void 5367ec681f3Smrgsm1_instruction_check(const struct sm1_instruction *insn) 5377ec681f3Smrg{ 5387ec681f3Smrg if (insn->opcode == D3DSIO_CRS) 5397ec681f3Smrg { 5407ec681f3Smrg if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3) 5417ec681f3Smrg { 5427ec681f3Smrg DBG("CRS.mask.w\n"); 5437ec681f3Smrg } 5447ec681f3Smrg } 5457ec681f3Smrg} 5467ec681f3Smrg 5477ec681f3Smrgstatic void 5487ec681f3Smrgnine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex, 5497ec681f3Smrg int mask, int output_index) 5507ec681f3Smrg{ 5517ec681f3Smrg tx->output_info[tx->num_outputs].output_semantic = Usage; 5527ec681f3Smrg tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex; 5537ec681f3Smrg tx->output_info[tx->num_outputs].mask = mask; 5547ec681f3Smrg tx->output_info[tx->num_outputs].output_index = output_index; 5557ec681f3Smrg tx->num_outputs++; 5567ec681f3Smrg} 5577ec681f3Smrg 5587ec681f3Smrgstatic struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx) 5597ec681f3Smrg{ 5607ec681f3Smrg struct ureg_src src; 5617ec681f3Smrg 5627ec681f3Smrg if (tx->slot_map) 5637ec681f3Smrg idx = tx->slot_map[idx]; 5647ec681f3Smrg /* vswp constant handling: we use two buffers 5657ec681f3Smrg * to fit all the float constants. The special handling 5667ec681f3Smrg * doesn't need to be elsewhere, because all the instructions 5677ec681f3Smrg * accessing the constants directly are VS1, and swvp 5687ec681f3Smrg * is VS >= 2 */ 5697ec681f3Smrg if (tx->info->swvp_on && idx >= 4096) { 5707ec681f3Smrg /* TODO: swvp rel is broken if many constants are used */ 5717ec681f3Smrg src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096); 5727ec681f3Smrg src = ureg_src_dimension(src, 1); 5737ec681f3Smrg } else { 5747ec681f3Smrg src = ureg_src_register(TGSI_FILE_CONSTANT, idx); 5757ec681f3Smrg src = ureg_src_dimension(src, 0); 5767ec681f3Smrg } 5777ec681f3Smrg 5787ec681f3Smrg if (!tx->info->swvp_on) 5797ec681f3Smrg tx->slots_used[idx] = TRUE; 5807ec681f3Smrg if (tx->info->const_float_slots < (idx + 1)) 5817ec681f3Smrg tx->info->const_float_slots = idx + 1; 5827ec681f3Smrg if (tx->num_slots < (idx + 1)) 5837ec681f3Smrg tx->num_slots = idx + 1; 5847ec681f3Smrg 5857ec681f3Smrg return src; 5867ec681f3Smrg} 5877ec681f3Smrg 5887ec681f3Smrgstatic struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx) 5897ec681f3Smrg{ 5907ec681f3Smrg struct ureg_src src; 5917ec681f3Smrg 5927ec681f3Smrg if (tx->info->swvp_on) { 5937ec681f3Smrg src = ureg_src_register(TGSI_FILE_CONSTANT, idx); 5947ec681f3Smrg src = ureg_src_dimension(src, 2); 5957ec681f3Smrg } else { 5967ec681f3Smrg unsigned slot_idx = tx->info->const_i_base + idx; 5977ec681f3Smrg if (tx->slot_map) 5987ec681f3Smrg slot_idx = tx->slot_map[slot_idx]; 5997ec681f3Smrg src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx); 6007ec681f3Smrg src = ureg_src_dimension(src, 0); 6017ec681f3Smrg tx->slots_used[slot_idx] = TRUE; 6027ec681f3Smrg tx->info->int_slots_used[idx] = TRUE; 6037ec681f3Smrg if (tx->num_slots < (slot_idx + 1)) 6047ec681f3Smrg tx->num_slots = slot_idx + 1; 6057ec681f3Smrg } 6067ec681f3Smrg 6077ec681f3Smrg if (tx->info->const_int_slots < (idx + 1)) 6087ec681f3Smrg tx->info->const_int_slots = idx + 1; 6097ec681f3Smrg 6107ec681f3Smrg return src; 6117ec681f3Smrg} 6127ec681f3Smrg 6137ec681f3Smrgstatic struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx) 6147ec681f3Smrg{ 6157ec681f3Smrg struct ureg_src src; 6167ec681f3Smrg 6177ec681f3Smrg char r = idx / 4; 6187ec681f3Smrg char s = idx & 3; 6197ec681f3Smrg 6207ec681f3Smrg if (tx->info->swvp_on) { 6217ec681f3Smrg src = ureg_src_register(TGSI_FILE_CONSTANT, r); 6227ec681f3Smrg src = ureg_src_dimension(src, 3); 6237ec681f3Smrg } else { 6247ec681f3Smrg unsigned slot_idx = tx->info->const_b_base + r; 6257ec681f3Smrg if (tx->slot_map) 6267ec681f3Smrg slot_idx = tx->slot_map[slot_idx]; 6277ec681f3Smrg src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx); 6287ec681f3Smrg src = ureg_src_dimension(src, 0); 6297ec681f3Smrg tx->slots_used[slot_idx] = TRUE; 6307ec681f3Smrg tx->info->bool_slots_used[idx] = TRUE; 6317ec681f3Smrg if (tx->num_slots < (slot_idx + 1)) 6327ec681f3Smrg tx->num_slots = slot_idx + 1; 6337ec681f3Smrg } 6347ec681f3Smrg src = ureg_swizzle(src, s, s, s, s); 6357ec681f3Smrg 6367ec681f3Smrg if (tx->info->const_bool_slots < (idx + 1)) 6377ec681f3Smrg tx->info->const_bool_slots = idx + 1; 6387ec681f3Smrg 6397ec681f3Smrg return src; 6407ec681f3Smrg} 6417ec681f3Smrg 6427ec681f3Smrgstatic boolean 6437ec681f3Smrgtx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index) 6447ec681f3Smrg{ 6457ec681f3Smrg INT i; 6467ec681f3Smrg 6477ec681f3Smrg if (index < 0 || index >= tx->num_constf_allowed) { 6487ec681f3Smrg tx->failure = TRUE; 6497ec681f3Smrg return FALSE; 6507ec681f3Smrg } 6517ec681f3Smrg for (i = 0; i < tx->num_lconstf; ++i) { 6527ec681f3Smrg if (tx->lconstf[i].idx == index) { 6537ec681f3Smrg *src = tx->lconstf[i].reg; 6547ec681f3Smrg return TRUE; 6557ec681f3Smrg } 6567ec681f3Smrg } 6577ec681f3Smrg return FALSE; 6587ec681f3Smrg} 6597ec681f3Smrgstatic boolean 6607ec681f3Smrgtx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index) 6617ec681f3Smrg{ 6627ec681f3Smrg int i; 6637ec681f3Smrg 6647ec681f3Smrg if (index < 0 || index >= tx->num_consti_allowed) { 6657ec681f3Smrg tx->failure = TRUE; 6667ec681f3Smrg return FALSE; 6677ec681f3Smrg } 6687ec681f3Smrg for (i = 0; i < tx->num_lconsti; ++i) { 6697ec681f3Smrg if (tx->lconsti[i].idx == index) { 6707ec681f3Smrg *src = tx->lconsti[i].reg; 6717ec681f3Smrg return TRUE; 6727ec681f3Smrg } 6737ec681f3Smrg } 6747ec681f3Smrg return FALSE; 6757ec681f3Smrg} 6767ec681f3Smrgstatic boolean 6777ec681f3Smrgtx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index) 6787ec681f3Smrg{ 6797ec681f3Smrg int i; 6807ec681f3Smrg 6817ec681f3Smrg if (index < 0 || index >= tx->num_constb_allowed) { 6827ec681f3Smrg tx->failure = TRUE; 6837ec681f3Smrg return FALSE; 6847ec681f3Smrg } 6857ec681f3Smrg for (i = 0; i < tx->num_lconstb; ++i) { 6867ec681f3Smrg if (tx->lconstb[i].idx == index) { 6877ec681f3Smrg *src = tx->lconstb[i].reg; 6887ec681f3Smrg return TRUE; 6897ec681f3Smrg } 6907ec681f3Smrg } 6917ec681f3Smrg return FALSE; 6927ec681f3Smrg} 6937ec681f3Smrg 6947ec681f3Smrgstatic void 6957ec681f3Smrgtx_set_lconstf(struct shader_translator *tx, INT index, float f[4]) 6967ec681f3Smrg{ 6977ec681f3Smrg unsigned n; 6987ec681f3Smrg 6997ec681f3Smrg FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed) 7007ec681f3Smrg 7017ec681f3Smrg for (n = 0; n < tx->num_lconstf; ++n) 7027ec681f3Smrg if (tx->lconstf[n].idx == index) 7037ec681f3Smrg break; 7047ec681f3Smrg if (n == tx->num_lconstf) { 7057ec681f3Smrg if ((n % 8) == 0) { 7067ec681f3Smrg tx->lconstf = REALLOC(tx->lconstf, 7077ec681f3Smrg (n + 0) * sizeof(tx->lconstf[0]), 7087ec681f3Smrg (n + 8) * sizeof(tx->lconstf[0])); 7097ec681f3Smrg assert(tx->lconstf); 7107ec681f3Smrg } 7117ec681f3Smrg tx->num_lconstf++; 7127ec681f3Smrg } 7137ec681f3Smrg tx->lconstf[n].idx = index; 7147ec681f3Smrg tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]); 7157ec681f3Smrg 7167ec681f3Smrg memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f)); 7177ec681f3Smrg} 7187ec681f3Smrgstatic void 7197ec681f3Smrgtx_set_lconsti(struct shader_translator *tx, INT index, int i[4]) 7207ec681f3Smrg{ 7217ec681f3Smrg unsigned n; 7227ec681f3Smrg 7237ec681f3Smrg FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed) 7247ec681f3Smrg 7257ec681f3Smrg for (n = 0; n < tx->num_lconsti; ++n) 7267ec681f3Smrg if (tx->lconsti[n].idx == index) 7277ec681f3Smrg break; 7287ec681f3Smrg if (n == tx->num_lconsti) { 7297ec681f3Smrg if ((n % 8) == 0) { 7307ec681f3Smrg tx->lconsti = REALLOC(tx->lconsti, 7317ec681f3Smrg (n + 0) * sizeof(tx->lconsti[0]), 7327ec681f3Smrg (n + 8) * sizeof(tx->lconsti[0])); 7337ec681f3Smrg assert(tx->lconsti); 7347ec681f3Smrg } 7357ec681f3Smrg tx->num_lconsti++; 7367ec681f3Smrg } 7377ec681f3Smrg 7387ec681f3Smrg tx->lconsti[n].idx = index; 7397ec681f3Smrg tx->lconsti[n].reg = tx->native_integers ? 7407ec681f3Smrg ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) : 7417ec681f3Smrg ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]); 7427ec681f3Smrg} 7437ec681f3Smrgstatic void 7447ec681f3Smrgtx_set_lconstb(struct shader_translator *tx, INT index, BOOL b) 7457ec681f3Smrg{ 7467ec681f3Smrg unsigned n; 7477ec681f3Smrg 7487ec681f3Smrg FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed) 7497ec681f3Smrg 7507ec681f3Smrg for (n = 0; n < tx->num_lconstb; ++n) 7517ec681f3Smrg if (tx->lconstb[n].idx == index) 7527ec681f3Smrg break; 7537ec681f3Smrg if (n == tx->num_lconstb) { 7547ec681f3Smrg if ((n % 8) == 0) { 7557ec681f3Smrg tx->lconstb = REALLOC(tx->lconstb, 7567ec681f3Smrg (n + 0) * sizeof(tx->lconstb[0]), 7577ec681f3Smrg (n + 8) * sizeof(tx->lconstb[0])); 7587ec681f3Smrg assert(tx->lconstb); 7597ec681f3Smrg } 7607ec681f3Smrg tx->num_lconstb++; 7617ec681f3Smrg } 7627ec681f3Smrg 7637ec681f3Smrg tx->lconstb[n].idx = index; 7647ec681f3Smrg tx->lconstb[n].reg = tx->native_integers ? 7657ec681f3Smrg ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) : 7667ec681f3Smrg ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f); 7677ec681f3Smrg} 7687ec681f3Smrg 7697ec681f3Smrgstatic inline struct ureg_dst 7707ec681f3Smrgtx_scratch(struct shader_translator *tx) 7717ec681f3Smrg{ 7727ec681f3Smrg if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) { 7737ec681f3Smrg tx->failure = TRUE; 7747ec681f3Smrg return tx->regs.t[0]; 7757ec681f3Smrg } 7767ec681f3Smrg if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch])) 7777ec681f3Smrg tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg); 7787ec681f3Smrg return tx->regs.t[tx->num_scratch++]; 7797ec681f3Smrg} 7807ec681f3Smrg 7817ec681f3Smrgstatic inline struct ureg_dst 7827ec681f3Smrgtx_scratch_scalar(struct shader_translator *tx) 7837ec681f3Smrg{ 7847ec681f3Smrg return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); 7857ec681f3Smrg} 7867ec681f3Smrg 7877ec681f3Smrgstatic inline struct ureg_src 7887ec681f3Smrgtx_src_scalar(struct ureg_dst dst) 7897ec681f3Smrg{ 7907ec681f3Smrg struct ureg_src src = ureg_src(dst); 7917ec681f3Smrg int c = ffs(dst.WriteMask) - 1; 7927ec681f3Smrg if (dst.WriteMask == (1 << c)) 7937ec681f3Smrg src = ureg_scalar(src, c); 7947ec681f3Smrg return src; 7957ec681f3Smrg} 7967ec681f3Smrg 7977ec681f3Smrgstatic inline void 7987ec681f3Smrgtx_temp_alloc(struct shader_translator *tx, INT idx) 7997ec681f3Smrg{ 8007ec681f3Smrg assert(idx >= 0); 8017ec681f3Smrg if (idx >= tx->num_temp) { 8027ec681f3Smrg unsigned k = tx->num_temp; 8037ec681f3Smrg unsigned n = idx + 1; 8047ec681f3Smrg tx->regs.r = REALLOC(tx->regs.r, 8057ec681f3Smrg k * sizeof(tx->regs.r[0]), 8067ec681f3Smrg n * sizeof(tx->regs.r[0])); 8077ec681f3Smrg for (; k < n; ++k) 8087ec681f3Smrg tx->regs.r[k] = ureg_dst_undef(); 8097ec681f3Smrg tx->num_temp = n; 8107ec681f3Smrg } 8117ec681f3Smrg if (ureg_dst_is_undef(tx->regs.r[idx])) 8127ec681f3Smrg tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg); 8137ec681f3Smrg} 8147ec681f3Smrg 8157ec681f3Smrgstatic inline void 8167ec681f3Smrgtx_addr_alloc(struct shader_translator *tx, INT idx) 8177ec681f3Smrg{ 8187ec681f3Smrg assert(idx == 0); 8197ec681f3Smrg if (ureg_dst_is_undef(tx->regs.address)) 8207ec681f3Smrg tx->regs.address = ureg_DECL_address(tx->ureg); 8217ec681f3Smrg if (ureg_dst_is_undef(tx->regs.a0)) 8227ec681f3Smrg tx->regs.a0 = ureg_DECL_temporary(tx->ureg); 8237ec681f3Smrg} 8247ec681f3Smrg 8257ec681f3Smrgstatic inline bool 8267ec681f3SmrgTEX_if_fetch4(struct shader_translator *tx, struct ureg_dst dst, 8277ec681f3Smrg unsigned target, struct ureg_src src0, 8287ec681f3Smrg struct ureg_src src1, INT idx) 8297ec681f3Smrg{ 8307ec681f3Smrg struct ureg_dst tmp; 8317ec681f3Smrg struct ureg_src src_tg4[3] = {src0, ureg_imm1f(tx->ureg, 0.f), src1}; 8327ec681f3Smrg 8337ec681f3Smrg if (!(tx->info->fetch4 & (1 << idx))) 8347ec681f3Smrg return false; 8357ec681f3Smrg 8367ec681f3Smrg /* TODO: needs more tests, but this feature is not much used at all */ 8377ec681f3Smrg 8387ec681f3Smrg tmp = tx_scratch(tx); 8397ec681f3Smrg ureg_tex_insn(tx->ureg, TGSI_OPCODE_TG4, &tmp, 1, target, TGSI_RETURN_TYPE_FLOAT, 8407ec681f3Smrg NULL, 0, src_tg4, 3); 8417ec681f3Smrg ureg_MOV(tx->ureg, dst, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z, X, Y, W))); 8427ec681f3Smrg return true; 8437ec681f3Smrg} 8447ec681f3Smrg 8457ec681f3Smrg/* NOTE: It's not very clear on which ps1.1-ps1.3 instructions 8467ec681f3Smrg * the projection should be applied on the texture. It doesn't 8477ec681f3Smrg * apply on texkill. 8487ec681f3Smrg * The doc is very imprecise here (it says the projection is done 8497ec681f3Smrg * before rasterization, thus in vs, which seems wrong since ps instructions 8507ec681f3Smrg * are affected differently) 8517ec681f3Smrg * For now we only apply to the ps TEX instruction and TEXBEM. 8527ec681f3Smrg * Perhaps some other instructions would need it */ 8537ec681f3Smrgstatic inline void 8547ec681f3Smrgapply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst, 8557ec681f3Smrg struct ureg_src src, INT idx) 8567ec681f3Smrg{ 8577ec681f3Smrg struct ureg_dst tmp; 8587ec681f3Smrg unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3); 8597ec681f3Smrg 8607ec681f3Smrg /* no projection */ 8617ec681f3Smrg if (dim == 1) { 8627ec681f3Smrg ureg_MOV(tx->ureg, dst, src); 8637ec681f3Smrg } else { 8647ec681f3Smrg tmp = tx_scratch_scalar(tx); 8657ec681f3Smrg ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1)); 8667ec681f3Smrg ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src); 8677ec681f3Smrg } 8687ec681f3Smrg} 8697ec681f3Smrg 8707ec681f3Smrgstatic inline void 8717ec681f3SmrgTEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst, 8727ec681f3Smrg unsigned target, struct ureg_src src0, 8737ec681f3Smrg struct ureg_src src1, INT idx) 8747ec681f3Smrg{ 8757ec681f3Smrg unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3); 8767ec681f3Smrg struct ureg_dst tmp; 8777ec681f3Smrg boolean shadow = !!(tx->info->sampler_mask_shadow & (1 << idx)); 8787ec681f3Smrg 8797ec681f3Smrg /* dim == 1: no projection 8807ec681f3Smrg * Looks like must be disabled when it makes no 8817ec681f3Smrg * sense according the texture dimensions 8827ec681f3Smrg */ 8837ec681f3Smrg if (dim == 1 || (dim <= target && !shadow)) { 8847ec681f3Smrg ureg_TEX(tx->ureg, dst, target, src0, src1); 8857ec681f3Smrg } else if (dim == 4) { 8867ec681f3Smrg ureg_TXP(tx->ureg, dst, target, src0, src1); 8877ec681f3Smrg } else { 8887ec681f3Smrg tmp = tx_scratch(tx); 8897ec681f3Smrg apply_ps1x_projection(tx, tmp, src0, idx); 8907ec681f3Smrg ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1); 8917ec681f3Smrg } 8927ec681f3Smrg} 8937ec681f3Smrg 8947ec681f3Smrgstatic inline void 8957ec681f3Smrgtx_texcoord_alloc(struct shader_translator *tx, INT idx) 8967ec681f3Smrg{ 8977ec681f3Smrg assert(IS_PS); 8987ec681f3Smrg assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT)); 8997ec681f3Smrg if (ureg_src_is_undef(tx->regs.vT[idx])) 9007ec681f3Smrg tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx, 9017ec681f3Smrg TGSI_INTERPOLATE_PERSPECTIVE); 9027ec681f3Smrg} 9037ec681f3Smrg 9047ec681f3Smrgstatic inline unsigned * 9057ec681f3Smrgtx_bgnloop(struct shader_translator *tx) 9067ec681f3Smrg{ 9077ec681f3Smrg tx->loop_depth++; 9087ec681f3Smrg if (tx->loop_depth_max < tx->loop_depth) 9097ec681f3Smrg tx->loop_depth_max = tx->loop_depth; 9107ec681f3Smrg assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH); 9117ec681f3Smrg return &tx->loop_labels[tx->loop_depth - 1]; 9127ec681f3Smrg} 9137ec681f3Smrg 9147ec681f3Smrgstatic inline unsigned * 9157ec681f3Smrgtx_endloop(struct shader_translator *tx) 9167ec681f3Smrg{ 9177ec681f3Smrg assert(tx->loop_depth); 9187ec681f3Smrg tx->loop_depth--; 9197ec681f3Smrg ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth], 9207ec681f3Smrg ureg_get_instruction_number(tx->ureg)); 9217ec681f3Smrg return &tx->loop_labels[tx->loop_depth]; 9227ec681f3Smrg} 9237ec681f3Smrg 9247ec681f3Smrgstatic struct ureg_dst 9257ec681f3Smrgtx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep) 9267ec681f3Smrg{ 9277ec681f3Smrg const unsigned l = tx->loop_depth - 1; 9287ec681f3Smrg 9297ec681f3Smrg if (!tx->loop_depth) 9307ec681f3Smrg { 9317ec681f3Smrg DBG("loop counter requested outside of loop\n"); 9327ec681f3Smrg return ureg_dst_undef(); 9337ec681f3Smrg } 9347ec681f3Smrg 9357ec681f3Smrg if (ureg_dst_is_undef(tx->regs.rL[l])) { 9367ec681f3Smrg /* loop or rep ctr creation */ 9377ec681f3Smrg tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg); 9387ec681f3Smrg tx->loop_or_rep[l] = loop_or_rep; 9397ec681f3Smrg } 9407ec681f3Smrg /* loop - rep - endloop - endrep not allowed */ 9417ec681f3Smrg assert(tx->loop_or_rep[l] == loop_or_rep); 9427ec681f3Smrg 9437ec681f3Smrg return tx->regs.rL[l]; 9447ec681f3Smrg} 9457ec681f3Smrg 9467ec681f3Smrgstatic struct ureg_src 9477ec681f3Smrgtx_get_loopal(struct shader_translator *tx) 9487ec681f3Smrg{ 9497ec681f3Smrg int loop_level = tx->loop_depth - 1; 9507ec681f3Smrg 9517ec681f3Smrg while (loop_level >= 0) { 9527ec681f3Smrg /* handle loop - rep - endrep - endloop case */ 9537ec681f3Smrg if (tx->loop_or_rep[loop_level]) 9547ec681f3Smrg /* the value is in the loop counter y component (nine implementation) */ 9557ec681f3Smrg return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y); 9567ec681f3Smrg loop_level--; 9577ec681f3Smrg } 9587ec681f3Smrg 9597ec681f3Smrg DBG("aL counter requested outside of loop\n"); 9607ec681f3Smrg return ureg_src_undef(); 9617ec681f3Smrg} 9627ec681f3Smrg 9637ec681f3Smrgstatic inline unsigned * 9647ec681f3Smrgtx_cond(struct shader_translator *tx) 9657ec681f3Smrg{ 9667ec681f3Smrg assert(tx->cond_depth <= NINE_MAX_COND_DEPTH); 9677ec681f3Smrg tx->cond_depth++; 9687ec681f3Smrg return &tx->cond_labels[tx->cond_depth - 1]; 9697ec681f3Smrg} 9707ec681f3Smrg 9717ec681f3Smrgstatic inline unsigned * 9727ec681f3Smrgtx_elsecond(struct shader_translator *tx) 9737ec681f3Smrg{ 9747ec681f3Smrg assert(tx->cond_depth); 9757ec681f3Smrg return &tx->cond_labels[tx->cond_depth - 1]; 9767ec681f3Smrg} 9777ec681f3Smrg 9787ec681f3Smrgstatic inline void 9797ec681f3Smrgtx_endcond(struct shader_translator *tx) 9807ec681f3Smrg{ 9817ec681f3Smrg assert(tx->cond_depth); 9827ec681f3Smrg tx->cond_depth--; 9837ec681f3Smrg ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth], 9847ec681f3Smrg ureg_get_instruction_number(tx->ureg)); 9857ec681f3Smrg} 9867ec681f3Smrg 9877ec681f3Smrgstatic inline struct ureg_dst 9887ec681f3Smrgnine_ureg_dst_register(unsigned file, int index) 9897ec681f3Smrg{ 9907ec681f3Smrg return ureg_dst(ureg_src_register(file, index)); 9917ec681f3Smrg} 9927ec681f3Smrg 9937ec681f3Smrgstatic inline struct ureg_src 9947ec681f3Smrgnine_get_position_input(struct shader_translator *tx) 9957ec681f3Smrg{ 9967ec681f3Smrg struct ureg_program *ureg = tx->ureg; 9977ec681f3Smrg 9987ec681f3Smrg if (tx->wpos_is_sysval) 9997ec681f3Smrg return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); 10007ec681f3Smrg else 10017ec681f3Smrg return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 10027ec681f3Smrg 0, TGSI_INTERPOLATE_LINEAR); 10037ec681f3Smrg} 10047ec681f3Smrg 10057ec681f3Smrgstatic struct ureg_src 10067ec681f3Smrgtx_src_param(struct shader_translator *tx, const struct sm1_src_param *param) 10077ec681f3Smrg{ 10087ec681f3Smrg struct ureg_program *ureg = tx->ureg; 10097ec681f3Smrg struct ureg_src src; 10107ec681f3Smrg struct ureg_dst tmp; 10117ec681f3Smrg 10127ec681f3Smrg assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) || 10137ec681f3Smrg (param->file == D3DSPR_INPUT && tx->version.major == 3)); 10147ec681f3Smrg 10157ec681f3Smrg switch (param->file) 10167ec681f3Smrg { 10177ec681f3Smrg case D3DSPR_TEMP: 10187ec681f3Smrg tx_temp_alloc(tx, param->idx); 10197ec681f3Smrg src = ureg_src(tx->regs.r[param->idx]); 10207ec681f3Smrg break; 10217ec681f3Smrg /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */ 10227ec681f3Smrg case D3DSPR_ADDR: 10237ec681f3Smrg if (IS_VS) { 10247ec681f3Smrg assert(param->idx == 0); 10257ec681f3Smrg /* the address register (vs only) must be 10267ec681f3Smrg * assigned before use */ 10277ec681f3Smrg assert(!ureg_dst_is_undef(tx->regs.a0)); 10287ec681f3Smrg /* Round to lowest for vs1.1 (contrary to the doc), else 10297ec681f3Smrg * round to nearest */ 10307ec681f3Smrg if (tx->version.major < 2 && tx->version.minor < 2) 10317ec681f3Smrg ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0)); 10327ec681f3Smrg else 10337ec681f3Smrg ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0)); 10347ec681f3Smrg src = ureg_src(tx->regs.address); 10357ec681f3Smrg } else { 10367ec681f3Smrg if (tx->version.major < 2 && tx->version.minor < 4) { 10377ec681f3Smrg /* no subroutines, so should be defined */ 10387ec681f3Smrg src = ureg_src(tx->regs.tS[param->idx]); 10397ec681f3Smrg } else { 10407ec681f3Smrg tx_texcoord_alloc(tx, param->idx); 10417ec681f3Smrg src = tx->regs.vT[param->idx]; 10427ec681f3Smrg } 10437ec681f3Smrg } 10447ec681f3Smrg break; 10457ec681f3Smrg case D3DSPR_INPUT: 10467ec681f3Smrg if (IS_VS) { 10477ec681f3Smrg src = ureg_src_register(TGSI_FILE_INPUT, param->idx); 10487ec681f3Smrg } else { 10497ec681f3Smrg if (tx->version.major < 3) { 10507ec681f3Smrg src = ureg_DECL_fs_input_centroid( 10517ec681f3Smrg ureg, TGSI_SEMANTIC_COLOR, param->idx, 10527ec681f3Smrg TGSI_INTERPOLATE_COLOR, 10537ec681f3Smrg tx->info->force_color_in_centroid ? 10547ec681f3Smrg TGSI_INTERPOLATE_LOC_CENTROID : 0, 10557ec681f3Smrg 0, 1); 10567ec681f3Smrg } else { 10577ec681f3Smrg if(param->rel) { 10587ec681f3Smrg /* Copy all inputs (non consecutive) 10597ec681f3Smrg * to temp array (consecutive). 10607ec681f3Smrg * This is not good for performance. 10617ec681f3Smrg * A better way would be to have inputs 10627ec681f3Smrg * consecutive (would need implement alternative 10637ec681f3Smrg * way to match vs outputs and ps inputs). 10647ec681f3Smrg * However even with the better way, the temp array 10657ec681f3Smrg * copy would need to be used if some inputs 10667ec681f3Smrg * are not GENERIC or if they have different 10677ec681f3Smrg * interpolation flag. */ 10687ec681f3Smrg if (ureg_src_is_undef(tx->regs.v_consecutive)) { 10697ec681f3Smrg int i; 10707ec681f3Smrg tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0)); 10717ec681f3Smrg for (i = 0; i < 10; i++) { 10727ec681f3Smrg if (!ureg_src_is_undef(tx->regs.v[i])) 10737ec681f3Smrg ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]); 10747ec681f3Smrg else 10757ec681f3Smrg ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); 10767ec681f3Smrg } 10777ec681f3Smrg } 10787ec681f3Smrg src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx); 10797ec681f3Smrg } else { 10807ec681f3Smrg assert(param->idx < ARRAY_SIZE(tx->regs.v)); 10817ec681f3Smrg src = tx->regs.v[param->idx]; 10827ec681f3Smrg } 10837ec681f3Smrg } 10847ec681f3Smrg } 10857ec681f3Smrg if (param->rel) 10867ec681f3Smrg src = ureg_src_indirect(src, tx_src_param(tx, param->rel)); 10877ec681f3Smrg break; 10887ec681f3Smrg case D3DSPR_PREDICATE: 10897ec681f3Smrg if (ureg_dst_is_undef(tx->regs.predicate)) { 10907ec681f3Smrg /* Forbidden to use the predicate register before being set */ 10917ec681f3Smrg tx->failure = TRUE; 10927ec681f3Smrg tx->regs.predicate = ureg_DECL_temporary(tx->ureg); 10937ec681f3Smrg } 10947ec681f3Smrg src = ureg_src(tx->regs.predicate); 10957ec681f3Smrg break; 10967ec681f3Smrg case D3DSPR_SAMPLER: 10977ec681f3Smrg assert(param->mod == NINED3DSPSM_NONE); 10987ec681f3Smrg /* assert(param->swizzle == NINED3DSP_NOSWIZZLE); Passed by wine tests */ 10997ec681f3Smrg src = ureg_DECL_sampler(ureg, param->idx); 11007ec681f3Smrg break; 11017ec681f3Smrg case D3DSPR_CONST: 11027ec681f3Smrg if (param->rel || !tx_lconstf(tx, &src, param->idx)) { 11037ec681f3Smrg src = nine_float_constant_src(tx, param->idx); 11047ec681f3Smrg if (param->rel) { 11057ec681f3Smrg tx->indirect_const_access = TRUE; 11067ec681f3Smrg src = ureg_src_indirect(src, tx_src_param(tx, param->rel)); 11077ec681f3Smrg } 11087ec681f3Smrg } 11097ec681f3Smrg if (!IS_VS && tx->version.major < 2) { 11107ec681f3Smrg /* ps 1.X clamps constants */ 11117ec681f3Smrg tmp = tx_scratch(tx); 11127ec681f3Smrg ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f)); 11137ec681f3Smrg ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f)); 11147ec681f3Smrg src = ureg_src(tmp); 11157ec681f3Smrg } 11167ec681f3Smrg break; 11177ec681f3Smrg case D3DSPR_CONST2: 11187ec681f3Smrg case D3DSPR_CONST3: 11197ec681f3Smrg case D3DSPR_CONST4: 11207ec681f3Smrg DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n"); 11217ec681f3Smrg assert(!"CONST2/3/4"); 11227ec681f3Smrg src = ureg_imm1f(ureg, 0.0f); 11237ec681f3Smrg break; 11247ec681f3Smrg case D3DSPR_CONSTINT: 11257ec681f3Smrg /* relative adressing only possible for float constants in vs */ 11267ec681f3Smrg if (!tx_lconsti(tx, &src, param->idx)) 11277ec681f3Smrg src = nine_integer_constant_src(tx, param->idx); 11287ec681f3Smrg break; 11297ec681f3Smrg case D3DSPR_CONSTBOOL: 11307ec681f3Smrg if (!tx_lconstb(tx, &src, param->idx)) 11317ec681f3Smrg src = nine_boolean_constant_src(tx, param->idx); 11327ec681f3Smrg break; 11337ec681f3Smrg case D3DSPR_LOOP: 11347ec681f3Smrg if (ureg_dst_is_undef(tx->regs.address)) 11357ec681f3Smrg tx->regs.address = ureg_DECL_address(ureg); 11367ec681f3Smrg if (!tx->native_integers) 11377ec681f3Smrg ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx)); 11387ec681f3Smrg else 11397ec681f3Smrg ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx)); 11407ec681f3Smrg src = ureg_src(tx->regs.address); 11417ec681f3Smrg break; 11427ec681f3Smrg case D3DSPR_MISCTYPE: 11437ec681f3Smrg switch (param->idx) { 11447ec681f3Smrg case D3DSMO_POSITION: 11457ec681f3Smrg if (ureg_src_is_undef(tx->regs.vPos)) 11467ec681f3Smrg tx->regs.vPos = nine_get_position_input(tx); 11477ec681f3Smrg if (tx->shift_wpos) { 11487ec681f3Smrg /* TODO: do this only once */ 11497ec681f3Smrg struct ureg_dst wpos = tx_scratch(tx); 11507ec681f3Smrg ureg_ADD(ureg, wpos, tx->regs.vPos, 11517ec681f3Smrg ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f)); 11527ec681f3Smrg src = ureg_src(wpos); 11537ec681f3Smrg } else { 11547ec681f3Smrg src = tx->regs.vPos; 11557ec681f3Smrg } 11567ec681f3Smrg break; 11577ec681f3Smrg case D3DSMO_FACE: 11587ec681f3Smrg if (ureg_src_is_undef(tx->regs.vFace)) { 11597ec681f3Smrg if (tx->face_is_sysval_integer) { 11607ec681f3Smrg tmp = ureg_DECL_temporary(ureg); 11617ec681f3Smrg tx->regs.vFace = 11627ec681f3Smrg ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0); 11637ec681f3Smrg 11647ec681f3Smrg /* convert bool to float */ 11657ec681f3Smrg ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X), 11667ec681f3Smrg ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1)); 11677ec681f3Smrg tx->regs.vFace = ureg_src(tmp); 11687ec681f3Smrg } else { 11697ec681f3Smrg tx->regs.vFace = ureg_DECL_fs_input(ureg, 11707ec681f3Smrg TGSI_SEMANTIC_FACE, 0, 11717ec681f3Smrg TGSI_INTERPOLATE_CONSTANT); 11727ec681f3Smrg } 11737ec681f3Smrg tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X); 11747ec681f3Smrg } 11757ec681f3Smrg src = tx->regs.vFace; 11767ec681f3Smrg break; 11777ec681f3Smrg default: 11787ec681f3Smrg assert(!"invalid src D3DSMO"); 11797ec681f3Smrg break; 11807ec681f3Smrg } 11817ec681f3Smrg break; 11827ec681f3Smrg case D3DSPR_TEMPFLOAT16: 11837ec681f3Smrg break; 11847ec681f3Smrg default: 11857ec681f3Smrg assert(!"invalid src D3DSPR"); 11867ec681f3Smrg } 11877ec681f3Smrg 11887ec681f3Smrg switch (param->mod) { 11897ec681f3Smrg case NINED3DSPSM_DW: 11907ec681f3Smrg tmp = tx_scratch(tx); 11917ec681f3Smrg /* NOTE: app is not allowed to read w with this modifier */ 11927ec681f3Smrg ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W)); 11937ec681f3Smrg ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W))); 11947ec681f3Smrg src = ureg_src(tmp); 11957ec681f3Smrg break; 11967ec681f3Smrg case NINED3DSPSM_DZ: 11977ec681f3Smrg tmp = tx_scratch(tx); 11987ec681f3Smrg /* NOTE: app is not allowed to read z with this modifier */ 11997ec681f3Smrg ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z)); 12007ec681f3Smrg ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z))); 12017ec681f3Smrg src = ureg_src(tmp); 12027ec681f3Smrg break; 12037ec681f3Smrg default: 12047ec681f3Smrg break; 12057ec681f3Smrg } 12067ec681f3Smrg 12077ec681f3Smrg if (param->swizzle != NINED3DSP_NOSWIZZLE && param->file != D3DSPR_SAMPLER) 12087ec681f3Smrg src = ureg_swizzle(src, 12097ec681f3Smrg (param->swizzle >> 0) & 0x3, 12107ec681f3Smrg (param->swizzle >> 2) & 0x3, 12117ec681f3Smrg (param->swizzle >> 4) & 0x3, 12127ec681f3Smrg (param->swizzle >> 6) & 0x3); 12137ec681f3Smrg 12147ec681f3Smrg switch (param->mod) { 12157ec681f3Smrg case NINED3DSPSM_ABS: 12167ec681f3Smrg src = ureg_abs(src); 12177ec681f3Smrg break; 12187ec681f3Smrg case NINED3DSPSM_ABSNEG: 12197ec681f3Smrg src = ureg_negate(ureg_abs(src)); 12207ec681f3Smrg break; 12217ec681f3Smrg case NINED3DSPSM_NEG: 12227ec681f3Smrg src = ureg_negate(src); 12237ec681f3Smrg break; 12247ec681f3Smrg case NINED3DSPSM_BIAS: 12257ec681f3Smrg tmp = tx_scratch(tx); 12267ec681f3Smrg ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f)); 12277ec681f3Smrg src = ureg_src(tmp); 12287ec681f3Smrg break; 12297ec681f3Smrg case NINED3DSPSM_BIASNEG: 12307ec681f3Smrg tmp = tx_scratch(tx); 12317ec681f3Smrg ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src)); 12327ec681f3Smrg src = ureg_src(tmp); 12337ec681f3Smrg break; 12347ec681f3Smrg case NINED3DSPSM_NOT: 12357ec681f3Smrg if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) { 12367ec681f3Smrg tmp = tx_scratch(tx); 12377ec681f3Smrg ureg_NOT(ureg, tmp, src); 12387ec681f3Smrg src = ureg_src(tmp); 12397ec681f3Smrg break; 12407ec681f3Smrg } else { /* predicate */ 12417ec681f3Smrg tmp = tx_scratch(tx); 12427ec681f3Smrg ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src)); 12437ec681f3Smrg src = ureg_src(tmp); 12447ec681f3Smrg } 12457ec681f3Smrg FALLTHROUGH; 12467ec681f3Smrg case NINED3DSPSM_COMP: 12477ec681f3Smrg tmp = tx_scratch(tx); 12487ec681f3Smrg ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src)); 12497ec681f3Smrg src = ureg_src(tmp); 12507ec681f3Smrg break; 12517ec681f3Smrg case NINED3DSPSM_DZ: 12527ec681f3Smrg case NINED3DSPSM_DW: 12537ec681f3Smrg /* Already handled*/ 12547ec681f3Smrg break; 12557ec681f3Smrg case NINED3DSPSM_SIGN: 12567ec681f3Smrg tmp = tx_scratch(tx); 12577ec681f3Smrg ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f)); 12587ec681f3Smrg src = ureg_src(tmp); 12597ec681f3Smrg break; 12607ec681f3Smrg case NINED3DSPSM_SIGNNEG: 12617ec681f3Smrg tmp = tx_scratch(tx); 12627ec681f3Smrg ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f)); 12637ec681f3Smrg src = ureg_src(tmp); 12647ec681f3Smrg break; 12657ec681f3Smrg case NINED3DSPSM_X2: 12667ec681f3Smrg tmp = tx_scratch(tx); 12677ec681f3Smrg ureg_ADD(ureg, tmp, src, src); 12687ec681f3Smrg src = ureg_src(tmp); 12697ec681f3Smrg break; 12707ec681f3Smrg case NINED3DSPSM_X2NEG: 12717ec681f3Smrg tmp = tx_scratch(tx); 12727ec681f3Smrg ureg_ADD(ureg, tmp, src, src); 12737ec681f3Smrg src = ureg_negate(ureg_src(tmp)); 12747ec681f3Smrg break; 12757ec681f3Smrg default: 12767ec681f3Smrg assert(param->mod == NINED3DSPSM_NONE); 12777ec681f3Smrg break; 12787ec681f3Smrg } 12797ec681f3Smrg 12807ec681f3Smrg return src; 12817ec681f3Smrg} 12827ec681f3Smrg 12837ec681f3Smrgstatic struct ureg_dst 12847ec681f3Smrg_tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) 12857ec681f3Smrg{ 12867ec681f3Smrg struct ureg_dst dst; 12877ec681f3Smrg 12887ec681f3Smrg switch (param->file) 12897ec681f3Smrg { 12907ec681f3Smrg case D3DSPR_TEMP: 12917ec681f3Smrg assert(!param->rel); 12927ec681f3Smrg tx_temp_alloc(tx, param->idx); 12937ec681f3Smrg dst = tx->regs.r[param->idx]; 12947ec681f3Smrg break; 12957ec681f3Smrg /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */ 12967ec681f3Smrg case D3DSPR_ADDR: 12977ec681f3Smrg assert(!param->rel); 12987ec681f3Smrg if (tx->version.major < 2 && !IS_VS) { 12997ec681f3Smrg if (ureg_dst_is_undef(tx->regs.tS[param->idx])) 13007ec681f3Smrg tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg); 13017ec681f3Smrg dst = tx->regs.tS[param->idx]; 13027ec681f3Smrg } else 13037ec681f3Smrg if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */ 13047ec681f3Smrg tx_texcoord_alloc(tx, param->idx); 13057ec681f3Smrg dst = ureg_dst(tx->regs.vT[param->idx]); 13067ec681f3Smrg } else { 13077ec681f3Smrg tx_addr_alloc(tx, param->idx); 13087ec681f3Smrg dst = tx->regs.a0; 13097ec681f3Smrg } 13107ec681f3Smrg break; 13117ec681f3Smrg case D3DSPR_RASTOUT: 13127ec681f3Smrg assert(!param->rel); 13137ec681f3Smrg switch (param->idx) { 13147ec681f3Smrg case 0: 13157ec681f3Smrg if (ureg_dst_is_undef(tx->regs.oPos)) 13167ec681f3Smrg tx->regs.oPos = 13177ec681f3Smrg ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0); 13187ec681f3Smrg dst = tx->regs.oPos; 13197ec681f3Smrg break; 13207ec681f3Smrg case 1: 13217ec681f3Smrg if (ureg_dst_is_undef(tx->regs.oFog)) 13227ec681f3Smrg tx->regs.oFog = 13237ec681f3Smrg ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16)); 13247ec681f3Smrg dst = tx->regs.oFog; 13257ec681f3Smrg break; 13267ec681f3Smrg case 2: 13277ec681f3Smrg if (ureg_dst_is_undef(tx->regs.oPts)) 13287ec681f3Smrg tx->regs.oPts = ureg_DECL_temporary(tx->ureg); 13297ec681f3Smrg dst = tx->regs.oPts; 13307ec681f3Smrg break; 13317ec681f3Smrg default: 13327ec681f3Smrg assert(0); 13337ec681f3Smrg break; 13347ec681f3Smrg } 13357ec681f3Smrg break; 13367ec681f3Smrg /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */ 13377ec681f3Smrg case D3DSPR_OUTPUT: 13387ec681f3Smrg if (tx->version.major < 3) { 13397ec681f3Smrg assert(!param->rel); 13407ec681f3Smrg dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx); 13417ec681f3Smrg } else { 13427ec681f3Smrg assert(!param->rel); /* TODO */ 13437ec681f3Smrg assert(param->idx < ARRAY_SIZE(tx->regs.o)); 13447ec681f3Smrg dst = tx->regs.o[param->idx]; 13457ec681f3Smrg } 13467ec681f3Smrg break; 13477ec681f3Smrg case D3DSPR_ATTROUT: /* VS */ 13487ec681f3Smrg case D3DSPR_COLOROUT: /* PS */ 13497ec681f3Smrg assert(param->idx >= 0 && param->idx < 4); 13507ec681f3Smrg assert(!param->rel); 13517ec681f3Smrg tx->info->rt_mask |= 1 << param->idx; 13527ec681f3Smrg if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) { 13537ec681f3Smrg /* ps < 3: oCol[0] will have fog blending afterward */ 13547ec681f3Smrg if (!IS_VS && tx->version.major < 3 && param->idx == 0) { 13557ec681f3Smrg tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg); 13567ec681f3Smrg } else { 13577ec681f3Smrg tx->regs.oCol[param->idx] = 13587ec681f3Smrg ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx); 13597ec681f3Smrg } 13607ec681f3Smrg } 13617ec681f3Smrg dst = tx->regs.oCol[param->idx]; 13627ec681f3Smrg if (IS_VS && tx->version.major < 3) 13637ec681f3Smrg dst = ureg_saturate(dst); 13647ec681f3Smrg break; 13657ec681f3Smrg case D3DSPR_DEPTHOUT: 13667ec681f3Smrg assert(!param->rel); 13677ec681f3Smrg if (ureg_dst_is_undef(tx->regs.oDepth)) 13687ec681f3Smrg tx->regs.oDepth = 13697ec681f3Smrg ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0, 13707ec681f3Smrg TGSI_WRITEMASK_Z, 0, 1); 13717ec681f3Smrg dst = tx->regs.oDepth; /* XXX: must write .z component */ 13727ec681f3Smrg break; 13737ec681f3Smrg case D3DSPR_PREDICATE: 13747ec681f3Smrg if (ureg_dst_is_undef(tx->regs.predicate)) 13757ec681f3Smrg tx->regs.predicate = ureg_DECL_temporary(tx->ureg); 13767ec681f3Smrg dst = tx->regs.predicate; 13777ec681f3Smrg break; 13787ec681f3Smrg case D3DSPR_TEMPFLOAT16: 13797ec681f3Smrg DBG("unhandled D3DSPR: %u\n", param->file); 13807ec681f3Smrg break; 13817ec681f3Smrg default: 13827ec681f3Smrg assert(!"invalid dst D3DSPR"); 13837ec681f3Smrg break; 13847ec681f3Smrg } 13857ec681f3Smrg if (param->rel) 13867ec681f3Smrg dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel)); 13877ec681f3Smrg 13887ec681f3Smrg if (param->mask != NINED3DSP_WRITEMASK_ALL) 13897ec681f3Smrg dst = ureg_writemask(dst, param->mask); 13907ec681f3Smrg if (param->mod & NINED3DSPDM_SATURATE) 13917ec681f3Smrg dst = ureg_saturate(dst); 13927ec681f3Smrg 13937ec681f3Smrg if (tx->predicated_activated) { 13947ec681f3Smrg tx->regs.predicate_dst = dst; 13957ec681f3Smrg dst = tx->regs.predicate_tmp; 13967ec681f3Smrg } 13977ec681f3Smrg 13987ec681f3Smrg return dst; 13997ec681f3Smrg} 14007ec681f3Smrg 14017ec681f3Smrgstatic struct ureg_dst 14027ec681f3Smrgtx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) 14037ec681f3Smrg{ 14047ec681f3Smrg if (param->shift) { 14057ec681f3Smrg tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask); 14067ec681f3Smrg return tx->regs.tdst; 14077ec681f3Smrg } 14087ec681f3Smrg return _tx_dst_param(tx, param); 14097ec681f3Smrg} 14107ec681f3Smrg 14117ec681f3Smrgstatic void 14127ec681f3Smrgtx_apply_dst0_modifiers(struct shader_translator *tx) 14137ec681f3Smrg{ 14147ec681f3Smrg struct ureg_dst rdst; 14157ec681f3Smrg float f; 14167ec681f3Smrg 14177ec681f3Smrg if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL) 14187ec681f3Smrg return; 14197ec681f3Smrg rdst = _tx_dst_param(tx, &tx->insn.dst[0]); 14207ec681f3Smrg 14217ec681f3Smrg assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */ 14227ec681f3Smrg 14237ec681f3Smrg if (tx->insn.dst[0].shift < 0) 14247ec681f3Smrg f = 1.0f / (1 << -tx->insn.dst[0].shift); 14257ec681f3Smrg else 14267ec681f3Smrg f = 1 << tx->insn.dst[0].shift; 14277ec681f3Smrg 14287ec681f3Smrg ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f)); 14297ec681f3Smrg} 14307ec681f3Smrg 14317ec681f3Smrgstatic struct ureg_src 14327ec681f3Smrgtx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param) 14337ec681f3Smrg{ 14347ec681f3Smrg struct ureg_src src; 14357ec681f3Smrg 14367ec681f3Smrg assert(!param->shift); 14377ec681f3Smrg assert(!(param->mod & NINED3DSPDM_SATURATE)); 14387ec681f3Smrg 14397ec681f3Smrg switch (param->file) { 14407ec681f3Smrg case D3DSPR_INPUT: 14417ec681f3Smrg if (IS_VS) { 14427ec681f3Smrg src = ureg_src_register(TGSI_FILE_INPUT, param->idx); 14437ec681f3Smrg } else { 14447ec681f3Smrg assert(!param->rel); 14457ec681f3Smrg assert(param->idx < ARRAY_SIZE(tx->regs.v)); 14467ec681f3Smrg src = tx->regs.v[param->idx]; 14477ec681f3Smrg } 14487ec681f3Smrg break; 14497ec681f3Smrg default: 14507ec681f3Smrg src = ureg_src(tx_dst_param(tx, param)); 14517ec681f3Smrg break; 14527ec681f3Smrg } 14537ec681f3Smrg if (param->rel) 14547ec681f3Smrg src = ureg_src_indirect(src, tx_src_param(tx, param->rel)); 14557ec681f3Smrg 14567ec681f3Smrg if (!param->mask) 14577ec681f3Smrg WARN("mask is 0, using identity swizzle\n"); 14587ec681f3Smrg 14597ec681f3Smrg if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) { 14607ec681f3Smrg char s[4]; 14617ec681f3Smrg int n; 14627ec681f3Smrg int c; 14637ec681f3Smrg for (n = 0, c = 0; c < 4; ++c) 14647ec681f3Smrg if (param->mask & (1 << c)) 14657ec681f3Smrg s[n++] = c; 14667ec681f3Smrg assert(n); 14677ec681f3Smrg for (c = n; c < 4; ++c) 14687ec681f3Smrg s[c] = s[n - 1]; 14697ec681f3Smrg src = ureg_swizzle(src, s[0], s[1], s[2], s[3]); 14707ec681f3Smrg } 14717ec681f3Smrg return src; 14727ec681f3Smrg} 14737ec681f3Smrg 14747ec681f3Smrgstatic HRESULT 14757ec681f3SmrgNineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n) 14767ec681f3Smrg{ 14777ec681f3Smrg struct ureg_program *ureg = tx->ureg; 14787ec681f3Smrg struct ureg_dst dst; 14797ec681f3Smrg struct ureg_src src[2]; 14807ec681f3Smrg struct sm1_src_param *src_mat = &tx->insn.src[1]; 14817ec681f3Smrg unsigned i; 14827ec681f3Smrg 14837ec681f3Smrg dst = tx_dst_param(tx, &tx->insn.dst[0]); 14847ec681f3Smrg src[0] = tx_src_param(tx, &tx->insn.src[0]); 14857ec681f3Smrg 14867ec681f3Smrg for (i = 0; i < n; i++) 14877ec681f3Smrg { 14887ec681f3Smrg const unsigned m = (1 << i); 14897ec681f3Smrg 14907ec681f3Smrg src[1] = tx_src_param(tx, src_mat); 14917ec681f3Smrg src_mat->idx++; 14927ec681f3Smrg 14937ec681f3Smrg if (!(dst.WriteMask & m)) 14947ec681f3Smrg continue; 14957ec681f3Smrg 14967ec681f3Smrg /* XXX: src == dst case ? */ 14977ec681f3Smrg 14987ec681f3Smrg switch (k) { 14997ec681f3Smrg case 3: 15007ec681f3Smrg ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]); 15017ec681f3Smrg break; 15027ec681f3Smrg case 4: 15037ec681f3Smrg ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]); 15047ec681f3Smrg break; 15057ec681f3Smrg default: 15067ec681f3Smrg DBG("invalid operation: M%ux%u\n", m, n); 15077ec681f3Smrg break; 15087ec681f3Smrg } 15097ec681f3Smrg } 15107ec681f3Smrg 15117ec681f3Smrg return D3D_OK; 15127ec681f3Smrg} 15137ec681f3Smrg 15147ec681f3Smrg#define VNOTSUPPORTED 0, 0 15157ec681f3Smrg#define V(maj, min) (((maj) << 8) | (min)) 15167ec681f3Smrg 15177ec681f3Smrgstatic inline const char * 15187ec681f3Smrgd3dsio_to_string( unsigned opcode ) 15197ec681f3Smrg{ 15207ec681f3Smrg static const char *names[] = { 15217ec681f3Smrg "NOP", 15227ec681f3Smrg "MOV", 15237ec681f3Smrg "ADD", 15247ec681f3Smrg "SUB", 15257ec681f3Smrg "MAD", 15267ec681f3Smrg "MUL", 15277ec681f3Smrg "RCP", 15287ec681f3Smrg "RSQ", 15297ec681f3Smrg "DP3", 15307ec681f3Smrg "DP4", 15317ec681f3Smrg "MIN", 15327ec681f3Smrg "MAX", 15337ec681f3Smrg "SLT", 15347ec681f3Smrg "SGE", 15357ec681f3Smrg "EXP", 15367ec681f3Smrg "LOG", 15377ec681f3Smrg "LIT", 15387ec681f3Smrg "DST", 15397ec681f3Smrg "LRP", 15407ec681f3Smrg "FRC", 15417ec681f3Smrg "M4x4", 15427ec681f3Smrg "M4x3", 15437ec681f3Smrg "M3x4", 15447ec681f3Smrg "M3x3", 15457ec681f3Smrg "M3x2", 15467ec681f3Smrg "CALL", 15477ec681f3Smrg "CALLNZ", 15487ec681f3Smrg "LOOP", 15497ec681f3Smrg "RET", 15507ec681f3Smrg "ENDLOOP", 15517ec681f3Smrg "LABEL", 15527ec681f3Smrg "DCL", 15537ec681f3Smrg "POW", 15547ec681f3Smrg "CRS", 15557ec681f3Smrg "SGN", 15567ec681f3Smrg "ABS", 15577ec681f3Smrg "NRM", 15587ec681f3Smrg "SINCOS", 15597ec681f3Smrg "REP", 15607ec681f3Smrg "ENDREP", 15617ec681f3Smrg "IF", 15627ec681f3Smrg "IFC", 15637ec681f3Smrg "ELSE", 15647ec681f3Smrg "ENDIF", 15657ec681f3Smrg "BREAK", 15667ec681f3Smrg "BREAKC", 15677ec681f3Smrg "MOVA", 15687ec681f3Smrg "DEFB", 15697ec681f3Smrg "DEFI", 15707ec681f3Smrg NULL, 15717ec681f3Smrg NULL, 15727ec681f3Smrg NULL, 15737ec681f3Smrg NULL, 15747ec681f3Smrg NULL, 15757ec681f3Smrg NULL, 15767ec681f3Smrg NULL, 15777ec681f3Smrg NULL, 15787ec681f3Smrg NULL, 15797ec681f3Smrg NULL, 15807ec681f3Smrg NULL, 15817ec681f3Smrg NULL, 15827ec681f3Smrg NULL, 15837ec681f3Smrg NULL, 15847ec681f3Smrg NULL, 15857ec681f3Smrg "TEXCOORD", 15867ec681f3Smrg "TEXKILL", 15877ec681f3Smrg "TEX", 15887ec681f3Smrg "TEXBEM", 15897ec681f3Smrg "TEXBEML", 15907ec681f3Smrg "TEXREG2AR", 15917ec681f3Smrg "TEXREG2GB", 15927ec681f3Smrg "TEXM3x2PAD", 15937ec681f3Smrg "TEXM3x2TEX", 15947ec681f3Smrg "TEXM3x3PAD", 15957ec681f3Smrg "TEXM3x3TEX", 15967ec681f3Smrg NULL, 15977ec681f3Smrg "TEXM3x3SPEC", 15987ec681f3Smrg "TEXM3x3VSPEC", 15997ec681f3Smrg "EXPP", 16007ec681f3Smrg "LOGP", 16017ec681f3Smrg "CND", 16027ec681f3Smrg "DEF", 16037ec681f3Smrg "TEXREG2RGB", 16047ec681f3Smrg "TEXDP3TEX", 16057ec681f3Smrg "TEXM3x2DEPTH", 16067ec681f3Smrg "TEXDP3", 16077ec681f3Smrg "TEXM3x3", 16087ec681f3Smrg "TEXDEPTH", 16097ec681f3Smrg "CMP", 16107ec681f3Smrg "BEM", 16117ec681f3Smrg "DP2ADD", 16127ec681f3Smrg "DSX", 16137ec681f3Smrg "DSY", 16147ec681f3Smrg "TEXLDD", 16157ec681f3Smrg "SETP", 16167ec681f3Smrg "TEXLDL", 16177ec681f3Smrg "BREAKP" 16187ec681f3Smrg }; 16197ec681f3Smrg 16207ec681f3Smrg if (opcode < ARRAY_SIZE(names)) return names[opcode]; 16217ec681f3Smrg 16227ec681f3Smrg switch (opcode) { 16237ec681f3Smrg case D3DSIO_PHASE: return "PHASE"; 16247ec681f3Smrg case D3DSIO_COMMENT: return "COMMENT"; 16257ec681f3Smrg case D3DSIO_END: return "END"; 16267ec681f3Smrg default: 16277ec681f3Smrg return NULL; 16287ec681f3Smrg } 16297ec681f3Smrg} 16307ec681f3Smrg 16317ec681f3Smrg#define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL } 16327ec681f3Smrg#define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \ 16337ec681f3Smrg (inst).vert_version.max | \ 16347ec681f3Smrg (inst).frag_version.min | \ 16357ec681f3Smrg (inst).frag_version.max) 16367ec681f3Smrg 16377ec681f3Smrg#define SPECIAL(name) \ 16387ec681f3Smrg NineTranslateInstruction_##name 16397ec681f3Smrg 16407ec681f3Smrg#define DECL_SPECIAL(name) \ 16417ec681f3Smrg static HRESULT \ 16427ec681f3Smrg NineTranslateInstruction_##name( struct shader_translator *tx ) 16437ec681f3Smrg 16447ec681f3Smrgstatic HRESULT 16457ec681f3SmrgNineTranslateInstruction_Generic(struct shader_translator *); 16467ec681f3Smrg 16477ec681f3SmrgDECL_SPECIAL(NOP) 16487ec681f3Smrg{ 16497ec681f3Smrg /* Nothing to do. NOP was used to avoid hangs 16507ec681f3Smrg * with very old d3d drivers. */ 16517ec681f3Smrg return D3D_OK; 16527ec681f3Smrg} 16537ec681f3Smrg 16547ec681f3SmrgDECL_SPECIAL(SUB) 16557ec681f3Smrg{ 16567ec681f3Smrg struct ureg_program *ureg = tx->ureg; 16577ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 16587ec681f3Smrg struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); 16597ec681f3Smrg struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); 16607ec681f3Smrg 16617ec681f3Smrg ureg_ADD(ureg, dst, src0, ureg_negate(src1)); 16627ec681f3Smrg return D3D_OK; 16637ec681f3Smrg} 16647ec681f3Smrg 16657ec681f3SmrgDECL_SPECIAL(ABS) 16667ec681f3Smrg{ 16677ec681f3Smrg struct ureg_program *ureg = tx->ureg; 16687ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 16697ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 16707ec681f3Smrg 16717ec681f3Smrg ureg_MOV(ureg, dst, ureg_abs(src)); 16727ec681f3Smrg return D3D_OK; 16737ec681f3Smrg} 16747ec681f3Smrg 16757ec681f3SmrgDECL_SPECIAL(XPD) 16767ec681f3Smrg{ 16777ec681f3Smrg struct ureg_program *ureg = tx->ureg; 16787ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 16797ec681f3Smrg struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); 16807ec681f3Smrg struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); 16817ec681f3Smrg 16827ec681f3Smrg ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), 16837ec681f3Smrg ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, 16847ec681f3Smrg TGSI_SWIZZLE_X, 0), 16857ec681f3Smrg ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 16867ec681f3Smrg TGSI_SWIZZLE_Y, 0)); 16877ec681f3Smrg ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), 16887ec681f3Smrg ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 16897ec681f3Smrg TGSI_SWIZZLE_Y, 0), 16907ec681f3Smrg ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y, 16917ec681f3Smrg TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)), 16927ec681f3Smrg ureg_src(dst)); 16937ec681f3Smrg ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), 16947ec681f3Smrg ureg_imm1f(ureg, 1)); 16957ec681f3Smrg return D3D_OK; 16967ec681f3Smrg} 16977ec681f3Smrg 16987ec681f3SmrgDECL_SPECIAL(M4x4) 16997ec681f3Smrg{ 17007ec681f3Smrg return NineTranslateInstruction_Mkxn(tx, 4, 4); 17017ec681f3Smrg} 17027ec681f3Smrg 17037ec681f3SmrgDECL_SPECIAL(M4x3) 17047ec681f3Smrg{ 17057ec681f3Smrg return NineTranslateInstruction_Mkxn(tx, 4, 3); 17067ec681f3Smrg} 17077ec681f3Smrg 17087ec681f3SmrgDECL_SPECIAL(M3x4) 17097ec681f3Smrg{ 17107ec681f3Smrg return NineTranslateInstruction_Mkxn(tx, 3, 4); 17117ec681f3Smrg} 17127ec681f3Smrg 17137ec681f3SmrgDECL_SPECIAL(M3x3) 17147ec681f3Smrg{ 17157ec681f3Smrg return NineTranslateInstruction_Mkxn(tx, 3, 3); 17167ec681f3Smrg} 17177ec681f3Smrg 17187ec681f3SmrgDECL_SPECIAL(M3x2) 17197ec681f3Smrg{ 17207ec681f3Smrg return NineTranslateInstruction_Mkxn(tx, 3, 2); 17217ec681f3Smrg} 17227ec681f3Smrg 17237ec681f3SmrgDECL_SPECIAL(CMP) 17247ec681f3Smrg{ 17257ec681f3Smrg ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]), 17267ec681f3Smrg tx_src_param(tx, &tx->insn.src[0]), 17277ec681f3Smrg tx_src_param(tx, &tx->insn.src[2]), 17287ec681f3Smrg tx_src_param(tx, &tx->insn.src[1])); 17297ec681f3Smrg return D3D_OK; 17307ec681f3Smrg} 17317ec681f3Smrg 17327ec681f3SmrgDECL_SPECIAL(CND) 17337ec681f3Smrg{ 17347ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 17357ec681f3Smrg struct ureg_dst cgt; 17367ec681f3Smrg struct ureg_src cnd; 17377ec681f3Smrg 17387ec681f3Smrg /* the coissue flag was a tip for compilers to advise to 17397ec681f3Smrg * execute two operations at the same time, in cases 17407ec681f3Smrg * the two executions had same dst with different channels. 17417ec681f3Smrg * It has no effect on current hw. However it seems CND 17427ec681f3Smrg * is affected. The handling of this very specific case 17437ec681f3Smrg * handled below mimick wine behaviour */ 17447ec681f3Smrg if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) { 17457ec681f3Smrg ureg_MOV(tx->ureg, 17467ec681f3Smrg dst, tx_src_param(tx, &tx->insn.src[1])); 17477ec681f3Smrg return D3D_OK; 17487ec681f3Smrg } 17497ec681f3Smrg 17507ec681f3Smrg cnd = tx_src_param(tx, &tx->insn.src[0]); 17517ec681f3Smrg cgt = tx_scratch(tx); 17527ec681f3Smrg 17537ec681f3Smrg if (tx->version.major == 1 && tx->version.minor < 4) 17547ec681f3Smrg cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W); 17557ec681f3Smrg 17567ec681f3Smrg ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f)); 17577ec681f3Smrg 17587ec681f3Smrg ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)), 17597ec681f3Smrg tx_src_param(tx, &tx->insn.src[1]), 17607ec681f3Smrg tx_src_param(tx, &tx->insn.src[2])); 17617ec681f3Smrg return D3D_OK; 17627ec681f3Smrg} 17637ec681f3Smrg 17647ec681f3SmrgDECL_SPECIAL(CALL) 17657ec681f3Smrg{ 17667ec681f3Smrg assert(tx->insn.src[0].idx < tx->num_inst_labels); 17677ec681f3Smrg ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]); 17687ec681f3Smrg return D3D_OK; 17697ec681f3Smrg} 17707ec681f3Smrg 17717ec681f3SmrgDECL_SPECIAL(CALLNZ) 17727ec681f3Smrg{ 17737ec681f3Smrg struct ureg_program *ureg = tx->ureg; 17747ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]); 17757ec681f3Smrg 17767ec681f3Smrg if (!tx->native_integers) 17777ec681f3Smrg ureg_IF(ureg, src, tx_cond(tx)); 17787ec681f3Smrg else 17797ec681f3Smrg ureg_UIF(ureg, src, tx_cond(tx)); 17807ec681f3Smrg ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]); 17817ec681f3Smrg tx_endcond(tx); 17827ec681f3Smrg ureg_ENDIF(ureg); 17837ec681f3Smrg return D3D_OK; 17847ec681f3Smrg} 17857ec681f3Smrg 17867ec681f3SmrgDECL_SPECIAL(LOOP) 17877ec681f3Smrg{ 17887ec681f3Smrg struct ureg_program *ureg = tx->ureg; 17897ec681f3Smrg unsigned *label; 17907ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]); 17917ec681f3Smrg struct ureg_dst ctr; 17927ec681f3Smrg struct ureg_dst tmp; 17937ec681f3Smrg struct ureg_src ctrx; 17947ec681f3Smrg 17957ec681f3Smrg label = tx_bgnloop(tx); 17967ec681f3Smrg ctr = tx_get_loopctr(tx, TRUE); 17977ec681f3Smrg ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X); 17987ec681f3Smrg 17997ec681f3Smrg /* src: num_iterations - start_value of al - step for al - 0 */ 18007ec681f3Smrg ureg_MOV(ureg, ctr, src); 18017ec681f3Smrg ureg_BGNLOOP(tx->ureg, label); 18027ec681f3Smrg tmp = tx_scratch_scalar(tx); 18037ec681f3Smrg /* Initially ctr.x contains the number of iterations. 18047ec681f3Smrg * ctr.y will contain the updated value of al. 18057ec681f3Smrg * We decrease ctr.x at the end of every iteration, 18067ec681f3Smrg * and stop when it reaches 0. */ 18077ec681f3Smrg 18087ec681f3Smrg if (!tx->native_integers) { 18097ec681f3Smrg /* case src and ctr contain floats */ 18107ec681f3Smrg /* to avoid precision issue, we stop when ctr <= 0.5 */ 18117ec681f3Smrg ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx); 18127ec681f3Smrg ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 18137ec681f3Smrg } else { 18147ec681f3Smrg /* case src and ctr contain integers */ 18157ec681f3Smrg ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx); 18167ec681f3Smrg ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 18177ec681f3Smrg } 18187ec681f3Smrg ureg_BRK(ureg); 18197ec681f3Smrg tx_endcond(tx); 18207ec681f3Smrg ureg_ENDIF(ureg); 18217ec681f3Smrg return D3D_OK; 18227ec681f3Smrg} 18237ec681f3Smrg 18247ec681f3SmrgDECL_SPECIAL(RET) 18257ec681f3Smrg{ 18267ec681f3Smrg /* RET as a last instruction could be safely ignored. 18277ec681f3Smrg * Remove it to prevent crashes/warnings in case underlying 18287ec681f3Smrg * driver doesn't implement arbitrary returns. 18297ec681f3Smrg */ 18307ec681f3Smrg if (*(tx->parse_next) != NINED3DSP_END) { 18317ec681f3Smrg ureg_RET(tx->ureg); 18327ec681f3Smrg } 18337ec681f3Smrg return D3D_OK; 18347ec681f3Smrg} 18357ec681f3Smrg 18367ec681f3SmrgDECL_SPECIAL(ENDLOOP) 18377ec681f3Smrg{ 18387ec681f3Smrg struct ureg_program *ureg = tx->ureg; 18397ec681f3Smrg struct ureg_dst ctr = tx_get_loopctr(tx, TRUE); 18407ec681f3Smrg struct ureg_dst dst_ctrx, dst_al; 18417ec681f3Smrg struct ureg_src src_ctr, al_counter; 18427ec681f3Smrg 18437ec681f3Smrg dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0); 18447ec681f3Smrg dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1); 18457ec681f3Smrg src_ctr = ureg_src(ctr); 18467ec681f3Smrg al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z); 18477ec681f3Smrg 18487ec681f3Smrg /* ctr.x -= 1 18497ec681f3Smrg * ctr.y (aL) += step */ 18507ec681f3Smrg if (!tx->native_integers) { 18517ec681f3Smrg ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f)); 18527ec681f3Smrg ureg_ADD(ureg, dst_al, src_ctr, al_counter); 18537ec681f3Smrg } else { 18547ec681f3Smrg ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1)); 18557ec681f3Smrg ureg_UADD(ureg, dst_al, src_ctr, al_counter); 18567ec681f3Smrg } 18577ec681f3Smrg ureg_ENDLOOP(tx->ureg, tx_endloop(tx)); 18587ec681f3Smrg return D3D_OK; 18597ec681f3Smrg} 18607ec681f3Smrg 18617ec681f3SmrgDECL_SPECIAL(LABEL) 18627ec681f3Smrg{ 18637ec681f3Smrg unsigned k = tx->num_inst_labels; 18647ec681f3Smrg unsigned n = tx->insn.src[0].idx; 18657ec681f3Smrg assert(n < 2048); 18667ec681f3Smrg if (n >= k) 18677ec681f3Smrg tx->inst_labels = REALLOC(tx->inst_labels, 18687ec681f3Smrg k * sizeof(tx->inst_labels[0]), 18697ec681f3Smrg n * sizeof(tx->inst_labels[0])); 18707ec681f3Smrg 18717ec681f3Smrg tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg); 18727ec681f3Smrg return D3D_OK; 18737ec681f3Smrg} 18747ec681f3Smrg 18757ec681f3SmrgDECL_SPECIAL(SINCOS) 18767ec681f3Smrg{ 18777ec681f3Smrg struct ureg_program *ureg = tx->ureg; 18787ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 18797ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 18807ec681f3Smrg struct ureg_dst tmp = tx_scratch_scalar(tx); 18817ec681f3Smrg 18827ec681f3Smrg assert(!(dst.WriteMask & 0xc)); 18837ec681f3Smrg 18847ec681f3Smrg /* Copying to a temporary register avoids src/dst aliasing. 18857ec681f3Smrg * src is supposed to have replicated swizzle. */ 18867ec681f3Smrg ureg_MOV(ureg, tmp, src); 18877ec681f3Smrg 18887ec681f3Smrg /* z undefined, w untouched */ 18897ec681f3Smrg ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), 18907ec681f3Smrg tx_src_scalar(tmp)); 18917ec681f3Smrg ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), 18927ec681f3Smrg tx_src_scalar(tmp)); 18937ec681f3Smrg return D3D_OK; 18947ec681f3Smrg} 18957ec681f3Smrg 18967ec681f3SmrgDECL_SPECIAL(SGN) 18977ec681f3Smrg{ 18987ec681f3Smrg ureg_SSG(tx->ureg, 18997ec681f3Smrg tx_dst_param(tx, &tx->insn.dst[0]), 19007ec681f3Smrg tx_src_param(tx, &tx->insn.src[0])); 19017ec681f3Smrg return D3D_OK; 19027ec681f3Smrg} 19037ec681f3Smrg 19047ec681f3SmrgDECL_SPECIAL(REP) 19057ec681f3Smrg{ 19067ec681f3Smrg struct ureg_program *ureg = tx->ureg; 19077ec681f3Smrg unsigned *label; 19087ec681f3Smrg struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]); 19097ec681f3Smrg struct ureg_dst ctr; 19107ec681f3Smrg struct ureg_dst tmp; 19117ec681f3Smrg struct ureg_src ctrx; 19127ec681f3Smrg 19137ec681f3Smrg label = tx_bgnloop(tx); 19147ec681f3Smrg ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0); 19157ec681f3Smrg ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X); 19167ec681f3Smrg 19177ec681f3Smrg /* NOTE: rep must be constant, so we don't have to save the count */ 19187ec681f3Smrg assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE); 19197ec681f3Smrg 19207ec681f3Smrg /* rep: num_iterations - 0 - 0 - 0 */ 19217ec681f3Smrg ureg_MOV(ureg, ctr, rep); 19227ec681f3Smrg ureg_BGNLOOP(ureg, label); 19237ec681f3Smrg tmp = tx_scratch_scalar(tx); 19247ec681f3Smrg /* Initially ctr.x contains the number of iterations. 19257ec681f3Smrg * We decrease ctr.x at the end of every iteration, 19267ec681f3Smrg * and stop when it reaches 0. */ 19277ec681f3Smrg 19287ec681f3Smrg if (!tx->native_integers) { 19297ec681f3Smrg /* case src and ctr contain floats */ 19307ec681f3Smrg /* to avoid precision issue, we stop when ctr <= 0.5 */ 19317ec681f3Smrg ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx); 19327ec681f3Smrg ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 19337ec681f3Smrg } else { 19347ec681f3Smrg /* case src and ctr contain integers */ 19357ec681f3Smrg ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx); 19367ec681f3Smrg ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 19377ec681f3Smrg } 19387ec681f3Smrg ureg_BRK(ureg); 19397ec681f3Smrg tx_endcond(tx); 19407ec681f3Smrg ureg_ENDIF(ureg); 19417ec681f3Smrg 19427ec681f3Smrg return D3D_OK; 19437ec681f3Smrg} 19447ec681f3Smrg 19457ec681f3SmrgDECL_SPECIAL(ENDREP) 19467ec681f3Smrg{ 19477ec681f3Smrg struct ureg_program *ureg = tx->ureg; 19487ec681f3Smrg struct ureg_dst ctr = tx_get_loopctr(tx, FALSE); 19497ec681f3Smrg struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0); 19507ec681f3Smrg struct ureg_src src_ctr = ureg_src(ctr); 19517ec681f3Smrg 19527ec681f3Smrg /* ctr.x -= 1 */ 19537ec681f3Smrg if (!tx->native_integers) 19547ec681f3Smrg ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f)); 19557ec681f3Smrg else 19567ec681f3Smrg ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1)); 19577ec681f3Smrg 19587ec681f3Smrg ureg_ENDLOOP(tx->ureg, tx_endloop(tx)); 19597ec681f3Smrg return D3D_OK; 19607ec681f3Smrg} 19617ec681f3Smrg 19627ec681f3SmrgDECL_SPECIAL(ENDIF) 19637ec681f3Smrg{ 19647ec681f3Smrg tx_endcond(tx); 19657ec681f3Smrg ureg_ENDIF(tx->ureg); 19667ec681f3Smrg return D3D_OK; 19677ec681f3Smrg} 19687ec681f3Smrg 19697ec681f3SmrgDECL_SPECIAL(IF) 19707ec681f3Smrg{ 19717ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 19727ec681f3Smrg 19737ec681f3Smrg if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL) 19747ec681f3Smrg ureg_UIF(tx->ureg, src, tx_cond(tx)); 19757ec681f3Smrg else 19767ec681f3Smrg ureg_IF(tx->ureg, src, tx_cond(tx)); 19777ec681f3Smrg 19787ec681f3Smrg return D3D_OK; 19797ec681f3Smrg} 19807ec681f3Smrg 19817ec681f3Smrgstatic inline unsigned 19827ec681f3Smrgsm1_insn_flags_to_tgsi_setop(BYTE flags) 19837ec681f3Smrg{ 19847ec681f3Smrg switch (flags) { 19857ec681f3Smrg case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT; 19867ec681f3Smrg case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ; 19877ec681f3Smrg case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE; 19887ec681f3Smrg case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT; 19897ec681f3Smrg case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE; 19907ec681f3Smrg case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE; 19917ec681f3Smrg default: 19927ec681f3Smrg assert(!"invalid comparison flags"); 19937ec681f3Smrg return TGSI_OPCODE_SGT; 19947ec681f3Smrg } 19957ec681f3Smrg} 19967ec681f3Smrg 19977ec681f3SmrgDECL_SPECIAL(IFC) 19987ec681f3Smrg{ 19997ec681f3Smrg const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags); 20007ec681f3Smrg struct ureg_src src[2]; 20017ec681f3Smrg struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); 20027ec681f3Smrg src[0] = tx_src_param(tx, &tx->insn.src[0]); 20037ec681f3Smrg src[1] = tx_src_param(tx, &tx->insn.src[1]); 20047ec681f3Smrg ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0); 20057ec681f3Smrg ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx)); 20067ec681f3Smrg return D3D_OK; 20077ec681f3Smrg} 20087ec681f3Smrg 20097ec681f3SmrgDECL_SPECIAL(ELSE) 20107ec681f3Smrg{ 20117ec681f3Smrg ureg_ELSE(tx->ureg, tx_elsecond(tx)); 20127ec681f3Smrg return D3D_OK; 20137ec681f3Smrg} 20147ec681f3Smrg 20157ec681f3SmrgDECL_SPECIAL(BREAKC) 20167ec681f3Smrg{ 20177ec681f3Smrg const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags); 20187ec681f3Smrg struct ureg_src src[2]; 20197ec681f3Smrg struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); 20207ec681f3Smrg src[0] = tx_src_param(tx, &tx->insn.src[0]); 20217ec681f3Smrg src[1] = tx_src_param(tx, &tx->insn.src[1]); 20227ec681f3Smrg ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0); 20237ec681f3Smrg ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx)); 20247ec681f3Smrg ureg_BRK(tx->ureg); 20257ec681f3Smrg tx_endcond(tx); 20267ec681f3Smrg ureg_ENDIF(tx->ureg); 20277ec681f3Smrg return D3D_OK; 20287ec681f3Smrg} 20297ec681f3Smrg 20307ec681f3Smrgstatic const char *sm1_declusage_names[] = 20317ec681f3Smrg{ 20327ec681f3Smrg [D3DDECLUSAGE_POSITION] = "POSITION", 20337ec681f3Smrg [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT", 20347ec681f3Smrg [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES", 20357ec681f3Smrg [D3DDECLUSAGE_NORMAL] = "NORMAL", 20367ec681f3Smrg [D3DDECLUSAGE_PSIZE] = "PSIZE", 20377ec681f3Smrg [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD", 20387ec681f3Smrg [D3DDECLUSAGE_TANGENT] = "TANGENT", 20397ec681f3Smrg [D3DDECLUSAGE_BINORMAL] = "BINORMAL", 20407ec681f3Smrg [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR", 20417ec681f3Smrg [D3DDECLUSAGE_POSITIONT] = "POSITIONT", 20427ec681f3Smrg [D3DDECLUSAGE_COLOR] = "COLOR", 20437ec681f3Smrg [D3DDECLUSAGE_FOG] = "FOG", 20447ec681f3Smrg [D3DDECLUSAGE_DEPTH] = "DEPTH", 20457ec681f3Smrg [D3DDECLUSAGE_SAMPLE] = "SAMPLE" 20467ec681f3Smrg}; 20477ec681f3Smrg 20487ec681f3Smrgstatic inline unsigned 20497ec681f3Smrgsm1_to_nine_declusage(struct sm1_semantic *dcl) 20507ec681f3Smrg{ 20517ec681f3Smrg return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx); 20527ec681f3Smrg} 20537ec681f3Smrg 20547ec681f3Smrgstatic void 20557ec681f3Smrgsm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem, 20567ec681f3Smrg boolean tc, 20577ec681f3Smrg struct sm1_semantic *dcl) 20587ec681f3Smrg{ 20597ec681f3Smrg BYTE index = dcl->usage_idx; 20607ec681f3Smrg 20617ec681f3Smrg /* For everything that is not matching to a TGSI_SEMANTIC_****, 20627ec681f3Smrg * we match to a TGSI_SEMANTIC_GENERIC with index. 20637ec681f3Smrg * 20647ec681f3Smrg * The index can be anything UINT16 and usage_idx is BYTE, 20657ec681f3Smrg * so we can fit everything. It doesn't matter if indices 20667ec681f3Smrg * are close together or low. 20677ec681f3Smrg * 20687ec681f3Smrg * 20697ec681f3Smrg * POSITION >= 1: 10 * index + 7 20707ec681f3Smrg * COLOR >= 2: 10 * (index-1) + 8 20717ec681f3Smrg * FOG: 16 20727ec681f3Smrg * TEXCOORD[0..15]: index 20737ec681f3Smrg * BLENDWEIGHT: 10 * index + 19 20747ec681f3Smrg * BLENDINDICES: 10 * index + 20 20757ec681f3Smrg * NORMAL: 10 * index + 21 20767ec681f3Smrg * TANGENT: 10 * index + 22 20777ec681f3Smrg * BINORMAL: 10 * index + 23 20787ec681f3Smrg * TESSFACTOR: 10 * index + 24 20797ec681f3Smrg */ 20807ec681f3Smrg 20817ec681f3Smrg switch (dcl->usage) { 20827ec681f3Smrg case D3DDECLUSAGE_POSITION: 20837ec681f3Smrg case D3DDECLUSAGE_POSITIONT: 20847ec681f3Smrg case D3DDECLUSAGE_DEPTH: 20857ec681f3Smrg if (index == 0) { 20867ec681f3Smrg sem->Name = TGSI_SEMANTIC_POSITION; 20877ec681f3Smrg sem->Index = 0; 20887ec681f3Smrg } else { 20897ec681f3Smrg sem->Name = TGSI_SEMANTIC_GENERIC; 20907ec681f3Smrg sem->Index = 10 * index + 7; 20917ec681f3Smrg } 20927ec681f3Smrg break; 20937ec681f3Smrg case D3DDECLUSAGE_COLOR: 20947ec681f3Smrg if (index < 2) { 20957ec681f3Smrg sem->Name = TGSI_SEMANTIC_COLOR; 20967ec681f3Smrg sem->Index = index; 20977ec681f3Smrg } else { 20987ec681f3Smrg sem->Name = TGSI_SEMANTIC_GENERIC; 20997ec681f3Smrg sem->Index = 10 * (index-1) + 8; 21007ec681f3Smrg } 21017ec681f3Smrg break; 21027ec681f3Smrg case D3DDECLUSAGE_FOG: 21037ec681f3Smrg assert(index == 0); 21047ec681f3Smrg sem->Name = TGSI_SEMANTIC_GENERIC; 21057ec681f3Smrg sem->Index = 16; 21067ec681f3Smrg break; 21077ec681f3Smrg case D3DDECLUSAGE_PSIZE: 21087ec681f3Smrg assert(index == 0); 21097ec681f3Smrg sem->Name = TGSI_SEMANTIC_PSIZE; 21107ec681f3Smrg sem->Index = 0; 21117ec681f3Smrg break; 21127ec681f3Smrg case D3DDECLUSAGE_TEXCOORD: 21137ec681f3Smrg assert(index < 16); 21147ec681f3Smrg if (index < 8 && tc) 21157ec681f3Smrg sem->Name = TGSI_SEMANTIC_TEXCOORD; 21167ec681f3Smrg else 21177ec681f3Smrg sem->Name = TGSI_SEMANTIC_GENERIC; 21187ec681f3Smrg sem->Index = index; 21197ec681f3Smrg break; 21207ec681f3Smrg case D3DDECLUSAGE_BLENDWEIGHT: 21217ec681f3Smrg sem->Name = TGSI_SEMANTIC_GENERIC; 21227ec681f3Smrg sem->Index = 10 * index + 19; 21237ec681f3Smrg break; 21247ec681f3Smrg case D3DDECLUSAGE_BLENDINDICES: 21257ec681f3Smrg sem->Name = TGSI_SEMANTIC_GENERIC; 21267ec681f3Smrg sem->Index = 10 * index + 20; 21277ec681f3Smrg break; 21287ec681f3Smrg case D3DDECLUSAGE_NORMAL: 21297ec681f3Smrg sem->Name = TGSI_SEMANTIC_GENERIC; 21307ec681f3Smrg sem->Index = 10 * index + 21; 21317ec681f3Smrg break; 21327ec681f3Smrg case D3DDECLUSAGE_TANGENT: 21337ec681f3Smrg sem->Name = TGSI_SEMANTIC_GENERIC; 21347ec681f3Smrg sem->Index = 10 * index + 22; 21357ec681f3Smrg break; 21367ec681f3Smrg case D3DDECLUSAGE_BINORMAL: 21377ec681f3Smrg sem->Name = TGSI_SEMANTIC_GENERIC; 21387ec681f3Smrg sem->Index = 10 * index + 23; 21397ec681f3Smrg break; 21407ec681f3Smrg case D3DDECLUSAGE_TESSFACTOR: 21417ec681f3Smrg sem->Name = TGSI_SEMANTIC_GENERIC; 21427ec681f3Smrg sem->Index = 10 * index + 24; 21437ec681f3Smrg break; 21447ec681f3Smrg case D3DDECLUSAGE_SAMPLE: 21457ec681f3Smrg sem->Name = TGSI_SEMANTIC_COUNT; 21467ec681f3Smrg sem->Index = 0; 21477ec681f3Smrg break; 21487ec681f3Smrg default: 21497ec681f3Smrg unreachable("Invalid DECLUSAGE."); 21507ec681f3Smrg break; 21517ec681f3Smrg } 21527ec681f3Smrg} 21537ec681f3Smrg 21547ec681f3Smrg#define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT) 21557ec681f3Smrg#define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT) 21567ec681f3Smrg#define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT) 21577ec681f3Smrg#define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT) 21587ec681f3Smrgstatic inline unsigned 21597ec681f3Smrgd3dstt_to_tgsi_tex(BYTE sampler_type) 21607ec681f3Smrg{ 21617ec681f3Smrg switch (sampler_type) { 21627ec681f3Smrg case NINED3DSTT_1D: return TGSI_TEXTURE_1D; 21637ec681f3Smrg case NINED3DSTT_2D: return TGSI_TEXTURE_2D; 21647ec681f3Smrg case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D; 21657ec681f3Smrg case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE; 21667ec681f3Smrg default: 21677ec681f3Smrg assert(0); 21687ec681f3Smrg return TGSI_TEXTURE_UNKNOWN; 21697ec681f3Smrg } 21707ec681f3Smrg} 21717ec681f3Smrgstatic inline unsigned 21727ec681f3Smrgd3dstt_to_tgsi_tex_shadow(BYTE sampler_type) 21737ec681f3Smrg{ 21747ec681f3Smrg switch (sampler_type) { 21757ec681f3Smrg case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D; 21767ec681f3Smrg case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D; 21777ec681f3Smrg case NINED3DSTT_VOLUME: 21787ec681f3Smrg case NINED3DSTT_CUBE: 21797ec681f3Smrg default: 21807ec681f3Smrg assert(0); 21817ec681f3Smrg return TGSI_TEXTURE_UNKNOWN; 21827ec681f3Smrg } 21837ec681f3Smrg} 21847ec681f3Smrgstatic inline unsigned 21857ec681f3Smrgps1x_sampler_type(const struct nine_shader_info *info, unsigned stage) 21867ec681f3Smrg{ 21877ec681f3Smrg boolean shadow = !!(info->sampler_mask_shadow & (1 << stage)); 21887ec681f3Smrg switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) { 21897ec681f3Smrg case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D; 21907ec681f3Smrg case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D; 21917ec681f3Smrg case 3: return TGSI_TEXTURE_3D; 21927ec681f3Smrg default: 21937ec681f3Smrg return TGSI_TEXTURE_CUBE; 21947ec681f3Smrg } 21957ec681f3Smrg} 21967ec681f3Smrg 21977ec681f3Smrgstatic const char * 21987ec681f3Smrgsm1_sampler_type_name(BYTE sampler_type) 21997ec681f3Smrg{ 22007ec681f3Smrg switch (sampler_type) { 22017ec681f3Smrg case NINED3DSTT_1D: return "1D"; 22027ec681f3Smrg case NINED3DSTT_2D: return "2D"; 22037ec681f3Smrg case NINED3DSTT_VOLUME: return "VOLUME"; 22047ec681f3Smrg case NINED3DSTT_CUBE: return "CUBE"; 22057ec681f3Smrg default: 22067ec681f3Smrg return "(D3DSTT_?)"; 22077ec681f3Smrg } 22087ec681f3Smrg} 22097ec681f3Smrg 22107ec681f3Smrgstatic inline unsigned 22117ec681f3Smrgnine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem) 22127ec681f3Smrg{ 22137ec681f3Smrg switch (sem->Name) { 22147ec681f3Smrg case TGSI_SEMANTIC_POSITION: 22157ec681f3Smrg case TGSI_SEMANTIC_NORMAL: 22167ec681f3Smrg return TGSI_INTERPOLATE_LINEAR; 22177ec681f3Smrg case TGSI_SEMANTIC_BCOLOR: 22187ec681f3Smrg case TGSI_SEMANTIC_COLOR: 22197ec681f3Smrg return TGSI_INTERPOLATE_COLOR; 22207ec681f3Smrg case TGSI_SEMANTIC_FOG: 22217ec681f3Smrg case TGSI_SEMANTIC_GENERIC: 22227ec681f3Smrg case TGSI_SEMANTIC_TEXCOORD: 22237ec681f3Smrg case TGSI_SEMANTIC_CLIPDIST: 22247ec681f3Smrg case TGSI_SEMANTIC_CLIPVERTEX: 22257ec681f3Smrg return TGSI_INTERPOLATE_PERSPECTIVE; 22267ec681f3Smrg case TGSI_SEMANTIC_EDGEFLAG: 22277ec681f3Smrg case TGSI_SEMANTIC_FACE: 22287ec681f3Smrg case TGSI_SEMANTIC_INSTANCEID: 22297ec681f3Smrg case TGSI_SEMANTIC_PCOORD: 22307ec681f3Smrg case TGSI_SEMANTIC_PRIMID: 22317ec681f3Smrg case TGSI_SEMANTIC_PSIZE: 22327ec681f3Smrg case TGSI_SEMANTIC_VERTEXID: 22337ec681f3Smrg return TGSI_INTERPOLATE_CONSTANT; 22347ec681f3Smrg default: 22357ec681f3Smrg assert(0); 22367ec681f3Smrg return TGSI_INTERPOLATE_CONSTANT; 22377ec681f3Smrg } 22387ec681f3Smrg} 22397ec681f3Smrg 22407ec681f3SmrgDECL_SPECIAL(DCL) 22417ec681f3Smrg{ 22427ec681f3Smrg struct ureg_program *ureg = tx->ureg; 22437ec681f3Smrg boolean is_input; 22447ec681f3Smrg boolean is_sampler; 22457ec681f3Smrg struct tgsi_declaration_semantic tgsi; 22467ec681f3Smrg struct sm1_semantic sem; 22477ec681f3Smrg sm1_read_semantic(tx, &sem); 22487ec681f3Smrg 22497ec681f3Smrg is_input = sem.reg.file == D3DSPR_INPUT; 22507ec681f3Smrg is_sampler = 22517ec681f3Smrg sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER; 22527ec681f3Smrg 22537ec681f3Smrg DUMP("DCL "); 22547ec681f3Smrg sm1_dump_dst_param(&sem.reg); 22557ec681f3Smrg if (is_sampler) 22567ec681f3Smrg DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type)); 22577ec681f3Smrg else 22587ec681f3Smrg if (tx->version.major >= 3) 22597ec681f3Smrg DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx); 22607ec681f3Smrg else 22617ec681f3Smrg if (sem.usage | sem.usage_idx) 22627ec681f3Smrg DUMP(" %u[%u]\n", sem.usage, sem.usage_idx); 22637ec681f3Smrg else 22647ec681f3Smrg DUMP("\n"); 22657ec681f3Smrg 22667ec681f3Smrg if (is_sampler) { 22677ec681f3Smrg const unsigned m = 1 << sem.reg.idx; 22687ec681f3Smrg ureg_DECL_sampler(ureg, sem.reg.idx); 22697ec681f3Smrg tx->info->sampler_mask |= m; 22707ec681f3Smrg tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ? 22717ec681f3Smrg d3dstt_to_tgsi_tex_shadow(sem.sampler_type) : 22727ec681f3Smrg d3dstt_to_tgsi_tex(sem.sampler_type); 22737ec681f3Smrg return D3D_OK; 22747ec681f3Smrg } 22757ec681f3Smrg 22767ec681f3Smrg sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem); 22777ec681f3Smrg if (IS_VS) { 22787ec681f3Smrg if (is_input) { 22797ec681f3Smrg /* linkage outside of shader with vertex declaration */ 22807ec681f3Smrg ureg_DECL_vs_input(ureg, sem.reg.idx); 22817ec681f3Smrg assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map)); 22827ec681f3Smrg tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem); 22837ec681f3Smrg tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1); 22847ec681f3Smrg /* NOTE: preserving order in case of indirect access */ 22857ec681f3Smrg } else 22867ec681f3Smrg if (tx->version.major >= 3) { 22877ec681f3Smrg /* SM2 output semantic determined by file */ 22887ec681f3Smrg assert(sem.reg.mask != 0); 22897ec681f3Smrg if (sem.usage == D3DDECLUSAGE_POSITIONT) 22907ec681f3Smrg tx->info->position_t = TRUE; 22917ec681f3Smrg assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o)); 22927ec681f3Smrg assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing"); 22937ec681f3Smrg tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked( 22947ec681f3Smrg ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1); 22957ec681f3Smrg nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx); 22967ec681f3Smrg if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) { 22977ec681f3Smrg tx->regs.oPos_out = tx->regs.o[sem.reg.idx]; 22987ec681f3Smrg tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg); 22997ec681f3Smrg tx->regs.oPos = tx->regs.o[sem.reg.idx]; 23007ec681f3Smrg } 23017ec681f3Smrg 23027ec681f3Smrg if (tgsi.Name == TGSI_SEMANTIC_PSIZE) { 23037ec681f3Smrg tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg); 23047ec681f3Smrg tx->regs.oPts = tx->regs.o[sem.reg.idx]; 23057ec681f3Smrg } 23067ec681f3Smrg } 23077ec681f3Smrg } else { 23087ec681f3Smrg if (is_input && tx->version.major >= 3) { 23097ec681f3Smrg unsigned interp_location = 0; 23107ec681f3Smrg /* SM3 only, SM2 input semantic determined by file */ 23117ec681f3Smrg assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v)); 23127ec681f3Smrg assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing"); 23137ec681f3Smrg /* PositionT and tessfactor forbidden */ 23147ec681f3Smrg if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR) 23157ec681f3Smrg return D3DERR_INVALIDCALL; 23167ec681f3Smrg 23177ec681f3Smrg if (tgsi.Name == TGSI_SEMANTIC_POSITION) { 23187ec681f3Smrg /* Position0 is forbidden (likely because vPos already does that) */ 23197ec681f3Smrg if (sem.usage == D3DDECLUSAGE_POSITION) 23207ec681f3Smrg return D3DERR_INVALIDCALL; 23217ec681f3Smrg /* Following code is for depth */ 23227ec681f3Smrg tx->regs.v[sem.reg.idx] = nine_get_position_input(tx); 23237ec681f3Smrg return D3D_OK; 23247ec681f3Smrg } 23257ec681f3Smrg 23267ec681f3Smrg if (sem.reg.mod & NINED3DSPDM_CENTROID || 23277ec681f3Smrg (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid)) 23287ec681f3Smrg interp_location = TGSI_INTERPOLATE_LOC_CENTROID; 23297ec681f3Smrg 23307ec681f3Smrg tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_centroid( 23317ec681f3Smrg ureg, tgsi.Name, tgsi.Index, 23327ec681f3Smrg nine_tgsi_to_interp_mode(&tgsi), 23337ec681f3Smrg interp_location, 0, 1); 23347ec681f3Smrg } else 23357ec681f3Smrg if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */ 23367ec681f3Smrg /* FragColor or FragDepth */ 23377ec681f3Smrg assert(sem.reg.mask != 0); 23387ec681f3Smrg ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 23397ec681f3Smrg 0, 1); 23407ec681f3Smrg } 23417ec681f3Smrg } 23427ec681f3Smrg return D3D_OK; 23437ec681f3Smrg} 23447ec681f3Smrg 23457ec681f3SmrgDECL_SPECIAL(DEF) 23467ec681f3Smrg{ 23477ec681f3Smrg tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f); 23487ec681f3Smrg return D3D_OK; 23497ec681f3Smrg} 23507ec681f3Smrg 23517ec681f3SmrgDECL_SPECIAL(DEFB) 23527ec681f3Smrg{ 23537ec681f3Smrg tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b); 23547ec681f3Smrg return D3D_OK; 23557ec681f3Smrg} 23567ec681f3Smrg 23577ec681f3SmrgDECL_SPECIAL(DEFI) 23587ec681f3Smrg{ 23597ec681f3Smrg tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i); 23607ec681f3Smrg return D3D_OK; 23617ec681f3Smrg} 23627ec681f3Smrg 23637ec681f3SmrgDECL_SPECIAL(POW) 23647ec681f3Smrg{ 23657ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 23667ec681f3Smrg struct ureg_src src[2] = { 23677ec681f3Smrg tx_src_param(tx, &tx->insn.src[0]), 23687ec681f3Smrg tx_src_param(tx, &tx->insn.src[1]) 23697ec681f3Smrg }; 23707ec681f3Smrg ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]); 23717ec681f3Smrg return D3D_OK; 23727ec681f3Smrg} 23737ec681f3Smrg 23747ec681f3Smrg/* Tests results on Win 10: 23757ec681f3Smrg * NV (NVIDIA GeForce GT 635M) 23767ec681f3Smrg * AMD (AMD Radeon HD 7730M) 23777ec681f3Smrg * INTEL (Intel(R) HD Graphics 4000) 23787ec681f3Smrg * PS2 and PS3: 23797ec681f3Smrg * RCP and RSQ can generate inf on NV and AMD. 23807ec681f3Smrg * RCP and RSQ are clamped on INTEL (+- FLT_MAX), 23817ec681f3Smrg * NV: log not clamped 23827ec681f3Smrg * AMD: log(0) is -FLT_MAX (but log(inf) is inf) 23837ec681f3Smrg * INTEL: log(0) is -FLT_MAX and log(inf) is 127 23847ec681f3Smrg * All devices have 0*anything = 0 23857ec681f3Smrg * 23867ec681f3Smrg * INTEL VS2 and VS3: same behaviour. 23877ec681f3Smrg * Some differences VS2 and VS3 for constants defined with inf/NaN. 23887ec681f3Smrg * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change, 23897ec681f3Smrg * VS2 seems to clamp to zero (may be test failure). 23907ec681f3Smrg * AMD VS2: unknown, VS3: very likely behaviour of PS3 23917ec681f3Smrg * NV VS2 and VS3: very likely behaviour of PS3 23927ec681f3Smrg * For both, Inf in VS becomes NaN is PS 23937ec681f3Smrg * "Very likely" because the test was less extensive. 23947ec681f3Smrg * 23957ec681f3Smrg * Thus all clamping can be removed for shaders 2 and 3, 23967ec681f3Smrg * as long as 0*anything = 0. 23977ec681f3Smrg * Else clamps to enforce 0*anything = 0 (anything being then 23987ec681f3Smrg * neither inf or NaN, the user being unlikely to pass them 23997ec681f3Smrg * as constant). 24007ec681f3Smrg * The status for VS1 and PS1 is unknown. 24017ec681f3Smrg */ 24027ec681f3Smrg 24037ec681f3SmrgDECL_SPECIAL(RCP) 24047ec681f3Smrg{ 24057ec681f3Smrg struct ureg_program *ureg = tx->ureg; 24067ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 24077ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 24087ec681f3Smrg struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx); 24097ec681f3Smrg ureg_RCP(ureg, tmp, src); 24107ec681f3Smrg if (!tx->mul_zero_wins) { 24117ec681f3Smrg /* FLT_MAX has issues with Rayman */ 24127ec681f3Smrg ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp)); 24137ec681f3Smrg ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp)); 24147ec681f3Smrg } 24157ec681f3Smrg return D3D_OK; 24167ec681f3Smrg} 24177ec681f3Smrg 24187ec681f3SmrgDECL_SPECIAL(RSQ) 24197ec681f3Smrg{ 24207ec681f3Smrg struct ureg_program *ureg = tx->ureg; 24217ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 24227ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 24237ec681f3Smrg struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx); 24247ec681f3Smrg ureg_RSQ(ureg, tmp, ureg_abs(src)); 24257ec681f3Smrg if (!tx->mul_zero_wins) 24267ec681f3Smrg ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp)); 24277ec681f3Smrg return D3D_OK; 24287ec681f3Smrg} 24297ec681f3Smrg 24307ec681f3SmrgDECL_SPECIAL(LOG) 24317ec681f3Smrg{ 24327ec681f3Smrg struct ureg_program *ureg = tx->ureg; 24337ec681f3Smrg struct ureg_dst tmp = tx_scratch_scalar(tx); 24347ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 24357ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 24367ec681f3Smrg ureg_LG2(ureg, tmp, ureg_abs(src)); 24377ec681f3Smrg if (tx->mul_zero_wins) { 24387ec681f3Smrg ureg_MOV(ureg, dst, tx_src_scalar(tmp)); 24397ec681f3Smrg } else { 24407ec681f3Smrg ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp)); 24417ec681f3Smrg } 24427ec681f3Smrg return D3D_OK; 24437ec681f3Smrg} 24447ec681f3Smrg 24457ec681f3SmrgDECL_SPECIAL(LIT) 24467ec681f3Smrg{ 24477ec681f3Smrg struct ureg_program *ureg = tx->ureg; 24487ec681f3Smrg struct ureg_dst tmp = tx_scratch(tx); 24497ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 24507ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 24517ec681f3Smrg ureg_LIT(ureg, tmp, src); 24527ec681f3Smrg /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9 24537ec681f3Smrg * states that dst.z is 0 when src.y <= 0. Gallium definition can assign 24547ec681f3Smrg * it 0^0 if src.w=0, which value is driver dependent. */ 24557ec681f3Smrg ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), 24567ec681f3Smrg ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)), 24577ec681f3Smrg ureg_src(tmp), ureg_imm1f(ureg, 0.0f)); 24587ec681f3Smrg ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp)); 24597ec681f3Smrg return D3D_OK; 24607ec681f3Smrg} 24617ec681f3Smrg 24627ec681f3SmrgDECL_SPECIAL(NRM) 24637ec681f3Smrg{ 24647ec681f3Smrg struct ureg_program *ureg = tx->ureg; 24657ec681f3Smrg struct ureg_dst tmp = tx_scratch_scalar(tx); 24667ec681f3Smrg struct ureg_src nrm = tx_src_scalar(tmp); 24677ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 24687ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 24697ec681f3Smrg ureg_DP3(ureg, tmp, src, src); 24707ec681f3Smrg ureg_RSQ(ureg, tmp, nrm); 24717ec681f3Smrg if (!tx->mul_zero_wins) 24727ec681f3Smrg ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm); 24737ec681f3Smrg ureg_MUL(ureg, dst, src, nrm); 24747ec681f3Smrg return D3D_OK; 24757ec681f3Smrg} 24767ec681f3Smrg 24777ec681f3SmrgDECL_SPECIAL(DP2ADD) 24787ec681f3Smrg{ 24797ec681f3Smrg struct ureg_dst tmp = tx_scratch_scalar(tx); 24807ec681f3Smrg struct ureg_src dp2 = tx_src_scalar(tmp); 24817ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 24827ec681f3Smrg struct ureg_src src[3]; 24837ec681f3Smrg int i; 24847ec681f3Smrg for (i = 0; i < 3; ++i) 24857ec681f3Smrg src[i] = tx_src_param(tx, &tx->insn.src[i]); 24867ec681f3Smrg assert_replicate_swizzle(&src[2]); 24877ec681f3Smrg 24887ec681f3Smrg ureg_DP2(tx->ureg, tmp, src[0], src[1]); 24897ec681f3Smrg ureg_ADD(tx->ureg, dst, src[2], dp2); 24907ec681f3Smrg 24917ec681f3Smrg return D3D_OK; 24927ec681f3Smrg} 24937ec681f3Smrg 24947ec681f3SmrgDECL_SPECIAL(TEXCOORD) 24957ec681f3Smrg{ 24967ec681f3Smrg struct ureg_program *ureg = tx->ureg; 24977ec681f3Smrg const unsigned s = tx->insn.dst[0].idx; 24987ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 24997ec681f3Smrg 25007ec681f3Smrg tx_texcoord_alloc(tx, s); 25017ec681f3Smrg ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]); 25027ec681f3Smrg ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f)); 25037ec681f3Smrg 25047ec681f3Smrg return D3D_OK; 25057ec681f3Smrg} 25067ec681f3Smrg 25077ec681f3SmrgDECL_SPECIAL(TEXCOORD_ps14) 25087ec681f3Smrg{ 25097ec681f3Smrg struct ureg_program *ureg = tx->ureg; 25107ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 25117ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 25127ec681f3Smrg 25137ec681f3Smrg assert(tx->insn.src[0].file == D3DSPR_TEXTURE); 25147ec681f3Smrg 25157ec681f3Smrg ureg_MOV(ureg, dst, src); 25167ec681f3Smrg 25177ec681f3Smrg return D3D_OK; 25187ec681f3Smrg} 25197ec681f3Smrg 25207ec681f3SmrgDECL_SPECIAL(TEXKILL) 25217ec681f3Smrg{ 25227ec681f3Smrg struct ureg_src reg; 25237ec681f3Smrg 25247ec681f3Smrg if (tx->version.major > 1 || tx->version.minor > 3) { 25257ec681f3Smrg reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]); 25267ec681f3Smrg } else { 25277ec681f3Smrg tx_texcoord_alloc(tx, tx->insn.dst[0].idx); 25287ec681f3Smrg reg = tx->regs.vT[tx->insn.dst[0].idx]; 25297ec681f3Smrg } 25307ec681f3Smrg if (tx->version.major < 2) 25317ec681f3Smrg reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z)); 25327ec681f3Smrg ureg_KILL_IF(tx->ureg, reg); 25337ec681f3Smrg 25347ec681f3Smrg return D3D_OK; 25357ec681f3Smrg} 25367ec681f3Smrg 25377ec681f3SmrgDECL_SPECIAL(TEXBEM) 25387ec681f3Smrg{ 25397ec681f3Smrg struct ureg_program *ureg = tx->ureg; 25407ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 25417ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 25427ec681f3Smrg struct ureg_dst tmp, tmp2, texcoord; 25437ec681f3Smrg struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2; 25447ec681f3Smrg struct ureg_src bumpenvlscale, bumpenvloffset; 25457ec681f3Smrg const int m = tx->insn.dst[0].idx; 25467ec681f3Smrg 25477ec681f3Smrg assert(tx->version.major == 1); 25487ec681f3Smrg 25497ec681f3Smrg sample = ureg_DECL_sampler(ureg, m); 25507ec681f3Smrg tx->info->sampler_mask |= 1 << m; 25517ec681f3Smrg 25527ec681f3Smrg tx_texcoord_alloc(tx, m); 25537ec681f3Smrg 25547ec681f3Smrg tmp = tx_scratch(tx); 25557ec681f3Smrg tmp2 = tx_scratch(tx); 25567ec681f3Smrg texcoord = tx_scratch(tx); 25577ec681f3Smrg /* 25587ec681f3Smrg * Bump-env-matrix: 25597ec681f3Smrg * 00 is X 25607ec681f3Smrg * 01 is Y 25617ec681f3Smrg * 10 is Z 25627ec681f3Smrg * 11 is W 25637ec681f3Smrg */ 25647ec681f3Smrg c8m = nine_float_constant_src(tx, 8+m); 25657ec681f3Smrg c16m2 = nine_float_constant_src(tx, 8+8+m/2); 25667ec681f3Smrg 25677ec681f3Smrg m00 = NINE_APPLY_SWIZZLE(c8m, X); 25687ec681f3Smrg m01 = NINE_APPLY_SWIZZLE(c8m, Y); 25697ec681f3Smrg m10 = NINE_APPLY_SWIZZLE(c8m, Z); 25707ec681f3Smrg m11 = NINE_APPLY_SWIZZLE(c8m, W); 25717ec681f3Smrg 25727ec681f3Smrg /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */ 25737ec681f3Smrg if (m % 2 == 0) { 25747ec681f3Smrg bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X); 25757ec681f3Smrg bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y); 25767ec681f3Smrg } else { 25777ec681f3Smrg bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z); 25787ec681f3Smrg bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W); 25797ec681f3Smrg } 25807ec681f3Smrg 25817ec681f3Smrg apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m); 25827ec681f3Smrg 25837ec681f3Smrg /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */ 25847ec681f3Smrg ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, 25857ec681f3Smrg NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord)); 25867ec681f3Smrg /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */ 25877ec681f3Smrg ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, 25887ec681f3Smrg NINE_APPLY_SWIZZLE(src, Y), 25897ec681f3Smrg NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); 25907ec681f3Smrg 25917ec681f3Smrg /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */ 25927ec681f3Smrg ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, 25937ec681f3Smrg NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord)); 25947ec681f3Smrg /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/ 25957ec681f3Smrg ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, 25967ec681f3Smrg NINE_APPLY_SWIZZLE(src, Y), 25977ec681f3Smrg NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); 25987ec681f3Smrg 25997ec681f3Smrg /* Now the texture coordinates are in tmp.xy */ 26007ec681f3Smrg 26017ec681f3Smrg if (tx->insn.opcode == D3DSIO_TEXBEM) { 26027ec681f3Smrg ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); 26037ec681f3Smrg } else if (tx->insn.opcode == D3DSIO_TEXBEML) { 26047ec681f3Smrg /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */ 26057ec681f3Smrg ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); 26067ec681f3Smrg ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z), 26077ec681f3Smrg bumpenvlscale, bumpenvloffset); 26087ec681f3Smrg ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2)); 26097ec681f3Smrg } 26107ec681f3Smrg 26117ec681f3Smrg tx->info->bumpenvmat_needed = 1; 26127ec681f3Smrg 26137ec681f3Smrg return D3D_OK; 26147ec681f3Smrg} 26157ec681f3Smrg 26167ec681f3SmrgDECL_SPECIAL(TEXREG2AR) 26177ec681f3Smrg{ 26187ec681f3Smrg struct ureg_program *ureg = tx->ureg; 26197ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 26207ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 26217ec681f3Smrg struct ureg_src sample; 26227ec681f3Smrg const int m = tx->insn.dst[0].idx; 26237ec681f3Smrg ASSERTED const int n = tx->insn.src[0].idx; 26247ec681f3Smrg assert(m >= 0 && m > n); 26257ec681f3Smrg 26267ec681f3Smrg sample = ureg_DECL_sampler(ureg, m); 26277ec681f3Smrg tx->info->sampler_mask |= 1 << m; 26287ec681f3Smrg ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample); 26297ec681f3Smrg 26307ec681f3Smrg return D3D_OK; 26317ec681f3Smrg} 26327ec681f3Smrg 26337ec681f3SmrgDECL_SPECIAL(TEXREG2GB) 26347ec681f3Smrg{ 26357ec681f3Smrg struct ureg_program *ureg = tx->ureg; 26367ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 26377ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 26387ec681f3Smrg struct ureg_src sample; 26397ec681f3Smrg const int m = tx->insn.dst[0].idx; 26407ec681f3Smrg ASSERTED const int n = tx->insn.src[0].idx; 26417ec681f3Smrg assert(m >= 0 && m > n); 26427ec681f3Smrg 26437ec681f3Smrg sample = ureg_DECL_sampler(ureg, m); 26447ec681f3Smrg tx->info->sampler_mask |= 1 << m; 26457ec681f3Smrg ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample); 26467ec681f3Smrg 26477ec681f3Smrg return D3D_OK; 26487ec681f3Smrg} 26497ec681f3Smrg 26507ec681f3SmrgDECL_SPECIAL(TEXM3x2PAD) 26517ec681f3Smrg{ 26527ec681f3Smrg return D3D_OK; /* this is just padding */ 26537ec681f3Smrg} 26547ec681f3Smrg 26557ec681f3SmrgDECL_SPECIAL(TEXM3x2TEX) 26567ec681f3Smrg{ 26577ec681f3Smrg struct ureg_program *ureg = tx->ureg; 26587ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 26597ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 26607ec681f3Smrg struct ureg_src sample; 26617ec681f3Smrg const int m = tx->insn.dst[0].idx - 1; 26627ec681f3Smrg ASSERTED const int n = tx->insn.src[0].idx; 26637ec681f3Smrg assert(m >= 0 && m > n); 26647ec681f3Smrg 26657ec681f3Smrg tx_texcoord_alloc(tx, m); 26667ec681f3Smrg tx_texcoord_alloc(tx, m+1); 26677ec681f3Smrg 26687ec681f3Smrg /* performs the matrix multiplication */ 26697ec681f3Smrg ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 26707ec681f3Smrg ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src); 26717ec681f3Smrg 26727ec681f3Smrg sample = ureg_DECL_sampler(ureg, m + 1); 26737ec681f3Smrg tx->info->sampler_mask |= 1 << (m + 1); 26747ec681f3Smrg ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample); 26757ec681f3Smrg 26767ec681f3Smrg return D3D_OK; 26777ec681f3Smrg} 26787ec681f3Smrg 26797ec681f3SmrgDECL_SPECIAL(TEXM3x3PAD) 26807ec681f3Smrg{ 26817ec681f3Smrg return D3D_OK; /* this is just padding */ 26827ec681f3Smrg} 26837ec681f3Smrg 26847ec681f3SmrgDECL_SPECIAL(TEXM3x3SPEC) 26857ec681f3Smrg{ 26867ec681f3Smrg struct ureg_program *ureg = tx->ureg; 26877ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 26887ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 26897ec681f3Smrg struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]); 26907ec681f3Smrg struct ureg_src sample; 26917ec681f3Smrg struct ureg_dst tmp; 26927ec681f3Smrg const int m = tx->insn.dst[0].idx - 2; 26937ec681f3Smrg ASSERTED const int n = tx->insn.src[0].idx; 26947ec681f3Smrg assert(m >= 0 && m > n); 26957ec681f3Smrg 26967ec681f3Smrg tx_texcoord_alloc(tx, m); 26977ec681f3Smrg tx_texcoord_alloc(tx, m+1); 26987ec681f3Smrg tx_texcoord_alloc(tx, m+2); 26997ec681f3Smrg 27007ec681f3Smrg ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 27017ec681f3Smrg ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src); 27027ec681f3Smrg ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src); 27037ec681f3Smrg 27047ec681f3Smrg sample = ureg_DECL_sampler(ureg, m + 2); 27057ec681f3Smrg tx->info->sampler_mask |= 1 << (m + 2); 27067ec681f3Smrg tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ); 27077ec681f3Smrg 27087ec681f3Smrg /* At this step, dst = N = (u', w', z'). 27097ec681f3Smrg * We want dst to be the texture sampled at (u'', w'', z''), with 27107ec681f3Smrg * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */ 27117ec681f3Smrg ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst)); 27127ec681f3Smrg ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 27137ec681f3Smrg /* at this step tmp.x = 1/N.N */ 27147ec681f3Smrg ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E); 27157ec681f3Smrg /* at this step tmp.y = N.E */ 27167ec681f3Smrg ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 27177ec681f3Smrg /* at this step tmp.x = N.E/N.N */ 27187ec681f3Smrg ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f)); 27197ec681f3Smrg ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst)); 27207ec681f3Smrg /* at this step tmp.xyz = 2 * (N.E / N.N) * N */ 27217ec681f3Smrg ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E)); 27227ec681f3Smrg ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample); 27237ec681f3Smrg 27247ec681f3Smrg return D3D_OK; 27257ec681f3Smrg} 27267ec681f3Smrg 27277ec681f3SmrgDECL_SPECIAL(TEXREG2RGB) 27287ec681f3Smrg{ 27297ec681f3Smrg struct ureg_program *ureg = tx->ureg; 27307ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 27317ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 27327ec681f3Smrg struct ureg_src sample; 27337ec681f3Smrg const int m = tx->insn.dst[0].idx; 27347ec681f3Smrg ASSERTED const int n = tx->insn.src[0].idx; 27357ec681f3Smrg assert(m >= 0 && m > n); 27367ec681f3Smrg 27377ec681f3Smrg sample = ureg_DECL_sampler(ureg, m); 27387ec681f3Smrg tx->info->sampler_mask |= 1 << m; 27397ec681f3Smrg ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample); 27407ec681f3Smrg 27417ec681f3Smrg return D3D_OK; 27427ec681f3Smrg} 27437ec681f3Smrg 27447ec681f3SmrgDECL_SPECIAL(TEXDP3TEX) 27457ec681f3Smrg{ 27467ec681f3Smrg struct ureg_program *ureg = tx->ureg; 27477ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 27487ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 27497ec681f3Smrg struct ureg_dst tmp; 27507ec681f3Smrg struct ureg_src sample; 27517ec681f3Smrg const int m = tx->insn.dst[0].idx; 27527ec681f3Smrg ASSERTED const int n = tx->insn.src[0].idx; 27537ec681f3Smrg assert(m >= 0 && m > n); 27547ec681f3Smrg 27557ec681f3Smrg tx_texcoord_alloc(tx, m); 27567ec681f3Smrg 27577ec681f3Smrg tmp = tx_scratch(tx); 27587ec681f3Smrg ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 27597ec681f3Smrg ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f)); 27607ec681f3Smrg 27617ec681f3Smrg sample = ureg_DECL_sampler(ureg, m); 27627ec681f3Smrg tx->info->sampler_mask |= 1 << m; 27637ec681f3Smrg ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); 27647ec681f3Smrg 27657ec681f3Smrg return D3D_OK; 27667ec681f3Smrg} 27677ec681f3Smrg 27687ec681f3SmrgDECL_SPECIAL(TEXM3x2DEPTH) 27697ec681f3Smrg{ 27707ec681f3Smrg struct ureg_program *ureg = tx->ureg; 27717ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 27727ec681f3Smrg struct ureg_dst tmp; 27737ec681f3Smrg const int m = tx->insn.dst[0].idx - 1; 27747ec681f3Smrg ASSERTED const int n = tx->insn.src[0].idx; 27757ec681f3Smrg assert(m >= 0 && m > n); 27767ec681f3Smrg 27777ec681f3Smrg tx_texcoord_alloc(tx, m); 27787ec681f3Smrg tx_texcoord_alloc(tx, m+1); 27797ec681f3Smrg 27807ec681f3Smrg tmp = tx_scratch(tx); 27817ec681f3Smrg 27827ec681f3Smrg /* performs the matrix multiplication */ 27837ec681f3Smrg ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 27847ec681f3Smrg ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src); 27857ec681f3Smrg 27867ec681f3Smrg ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 27877ec681f3Smrg /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */ 27887ec681f3Smrg ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z)); 27897ec681f3Smrg /* res = 'w' == 0 ? 1.0 : z/w */ 27907ec681f3Smrg ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))), 27917ec681f3Smrg ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f)); 27927ec681f3Smrg /* replace the depth for depth testing with the result */ 27937ec681f3Smrg tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, 27947ec681f3Smrg TGSI_WRITEMASK_Z, 0, 1); 27957ec681f3Smrg ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 27967ec681f3Smrg /* note that we write nothing to the destination, since it's disallowed to use it afterward */ 27977ec681f3Smrg return D3D_OK; 27987ec681f3Smrg} 27997ec681f3Smrg 28007ec681f3SmrgDECL_SPECIAL(TEXDP3) 28017ec681f3Smrg{ 28027ec681f3Smrg struct ureg_program *ureg = tx->ureg; 28037ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 28047ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 28057ec681f3Smrg const int m = tx->insn.dst[0].idx; 28067ec681f3Smrg ASSERTED const int n = tx->insn.src[0].idx; 28077ec681f3Smrg assert(m >= 0 && m > n); 28087ec681f3Smrg 28097ec681f3Smrg tx_texcoord_alloc(tx, m); 28107ec681f3Smrg 28117ec681f3Smrg ureg_DP3(ureg, dst, tx->regs.vT[m], src); 28127ec681f3Smrg 28137ec681f3Smrg return D3D_OK; 28147ec681f3Smrg} 28157ec681f3Smrg 28167ec681f3SmrgDECL_SPECIAL(TEXM3x3) 28177ec681f3Smrg{ 28187ec681f3Smrg struct ureg_program *ureg = tx->ureg; 28197ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 28207ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 28217ec681f3Smrg struct ureg_src sample; 28227ec681f3Smrg struct ureg_dst E, tmp; 28237ec681f3Smrg const int m = tx->insn.dst[0].idx - 2; 28247ec681f3Smrg ASSERTED const int n = tx->insn.src[0].idx; 28257ec681f3Smrg assert(m >= 0 && m > n); 28267ec681f3Smrg 28277ec681f3Smrg tx_texcoord_alloc(tx, m); 28287ec681f3Smrg tx_texcoord_alloc(tx, m+1); 28297ec681f3Smrg tx_texcoord_alloc(tx, m+2); 28307ec681f3Smrg 28317ec681f3Smrg ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 28327ec681f3Smrg ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src); 28337ec681f3Smrg ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src); 28347ec681f3Smrg 28357ec681f3Smrg switch (tx->insn.opcode) { 28367ec681f3Smrg case D3DSIO_TEXM3x3: 28377ec681f3Smrg ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 28387ec681f3Smrg break; 28397ec681f3Smrg case D3DSIO_TEXM3x3TEX: 28407ec681f3Smrg sample = ureg_DECL_sampler(ureg, m + 2); 28417ec681f3Smrg tx->info->sampler_mask |= 1 << (m + 2); 28427ec681f3Smrg ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample); 28437ec681f3Smrg break; 28447ec681f3Smrg case D3DSIO_TEXM3x3VSPEC: 28457ec681f3Smrg sample = ureg_DECL_sampler(ureg, m + 2); 28467ec681f3Smrg tx->info->sampler_mask |= 1 << (m + 2); 28477ec681f3Smrg E = tx_scratch(tx); 28487ec681f3Smrg tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ); 28497ec681f3Smrg ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W)); 28507ec681f3Smrg ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W)); 28517ec681f3Smrg ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W)); 28527ec681f3Smrg /* At this step, dst = N = (u', w', z'). 28537ec681f3Smrg * We want dst to be the texture sampled at (u'', w'', z''), with 28547ec681f3Smrg * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */ 28557ec681f3Smrg ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst)); 28567ec681f3Smrg ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 28577ec681f3Smrg /* at this step tmp.x = 1/N.N */ 28587ec681f3Smrg ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E)); 28597ec681f3Smrg /* at this step tmp.y = N.E */ 28607ec681f3Smrg ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 28617ec681f3Smrg /* at this step tmp.x = N.E/N.N */ 28627ec681f3Smrg ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f)); 28637ec681f3Smrg ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst)); 28647ec681f3Smrg /* at this step tmp.xyz = 2 * (N.E / N.N) * N */ 28657ec681f3Smrg ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E))); 28667ec681f3Smrg ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample); 28677ec681f3Smrg break; 28687ec681f3Smrg default: 28697ec681f3Smrg return D3DERR_INVALIDCALL; 28707ec681f3Smrg } 28717ec681f3Smrg return D3D_OK; 28727ec681f3Smrg} 28737ec681f3Smrg 28747ec681f3SmrgDECL_SPECIAL(TEXDEPTH) 28757ec681f3Smrg{ 28767ec681f3Smrg struct ureg_program *ureg = tx->ureg; 28777ec681f3Smrg struct ureg_dst r5; 28787ec681f3Smrg struct ureg_src r5r, r5g; 28797ec681f3Smrg 28807ec681f3Smrg assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */ 28817ec681f3Smrg 28827ec681f3Smrg /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g. 28837ec681f3Smrg * r5 won't be used afterward, thus we can use r5.ba */ 28847ec681f3Smrg r5 = tx->regs.r[5]; 28857ec681f3Smrg r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X); 28867ec681f3Smrg r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y); 28877ec681f3Smrg 28887ec681f3Smrg ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g); 28897ec681f3Smrg ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z)); 28907ec681f3Smrg /* r5.r = r/g */ 28917ec681f3Smrg ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)), 28927ec681f3Smrg r5r, ureg_imm1f(ureg, 1.0f)); 28937ec681f3Smrg /* replace the depth for depth testing with the result */ 28947ec681f3Smrg tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, 28957ec681f3Smrg TGSI_WRITEMASK_Z, 0, 1); 28967ec681f3Smrg ureg_MOV(ureg, tx->regs.oDepth, r5r); 28977ec681f3Smrg 28987ec681f3Smrg return D3D_OK; 28997ec681f3Smrg} 29007ec681f3Smrg 29017ec681f3SmrgDECL_SPECIAL(BEM) 29027ec681f3Smrg{ 29037ec681f3Smrg struct ureg_program *ureg = tx->ureg; 29047ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 29057ec681f3Smrg struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); 29067ec681f3Smrg struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); 29077ec681f3Smrg struct ureg_src m00, m01, m10, m11, c8m; 29087ec681f3Smrg const int m = tx->insn.dst[0].idx; 29097ec681f3Smrg struct ureg_dst tmp = tx_scratch(tx); 29107ec681f3Smrg /* 29117ec681f3Smrg * Bump-env-matrix: 29127ec681f3Smrg * 00 is X 29137ec681f3Smrg * 01 is Y 29147ec681f3Smrg * 10 is Z 29157ec681f3Smrg * 11 is W 29167ec681f3Smrg */ 29177ec681f3Smrg c8m = nine_float_constant_src(tx, 8+m); 29187ec681f3Smrg m00 = NINE_APPLY_SWIZZLE(c8m, X); 29197ec681f3Smrg m01 = NINE_APPLY_SWIZZLE(c8m, Y); 29207ec681f3Smrg m10 = NINE_APPLY_SWIZZLE(c8m, Z); 29217ec681f3Smrg m11 = NINE_APPLY_SWIZZLE(c8m, W); 29227ec681f3Smrg /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */ 29237ec681f3Smrg ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, 29247ec681f3Smrg NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X)); 29257ec681f3Smrg /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */ 29267ec681f3Smrg ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, 29277ec681f3Smrg NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); 29287ec681f3Smrg 29297ec681f3Smrg /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */ 29307ec681f3Smrg ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, 29317ec681f3Smrg NINE_APPLY_SWIZZLE(src1, X), src0); 29327ec681f3Smrg /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */ 29337ec681f3Smrg ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, 29347ec681f3Smrg NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); 29357ec681f3Smrg ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp)); 29367ec681f3Smrg 29377ec681f3Smrg tx->info->bumpenvmat_needed = 1; 29387ec681f3Smrg 29397ec681f3Smrg return D3D_OK; 29407ec681f3Smrg} 29417ec681f3Smrg 29427ec681f3SmrgDECL_SPECIAL(TEXLD) 29437ec681f3Smrg{ 29447ec681f3Smrg struct ureg_program *ureg = tx->ureg; 29457ec681f3Smrg unsigned target; 29467ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 29477ec681f3Smrg struct ureg_src src[2] = { 29487ec681f3Smrg tx_src_param(tx, &tx->insn.src[0]), 29497ec681f3Smrg tx_src_param(tx, &tx->insn.src[1]) 29507ec681f3Smrg }; 29517ec681f3Smrg assert(tx->insn.src[1].idx >= 0 && 29527ec681f3Smrg tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); 29537ec681f3Smrg target = tx->sampler_targets[tx->insn.src[1].idx]; 29547ec681f3Smrg 29557ec681f3Smrg if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx)) 29567ec681f3Smrg return D3D_OK; 29577ec681f3Smrg 29587ec681f3Smrg switch (tx->insn.flags) { 29597ec681f3Smrg case 0: 29607ec681f3Smrg ureg_TEX(ureg, dst, target, src[0], src[1]); 29617ec681f3Smrg break; 29627ec681f3Smrg case NINED3DSI_TEXLD_PROJECT: 29637ec681f3Smrg ureg_TXP(ureg, dst, target, src[0], src[1]); 29647ec681f3Smrg break; 29657ec681f3Smrg case NINED3DSI_TEXLD_BIAS: 29667ec681f3Smrg ureg_TXB(ureg, dst, target, src[0], src[1]); 29677ec681f3Smrg break; 29687ec681f3Smrg default: 29697ec681f3Smrg assert(0); 29707ec681f3Smrg return D3DERR_INVALIDCALL; 29717ec681f3Smrg } 29727ec681f3Smrg return D3D_OK; 29737ec681f3Smrg} 29747ec681f3Smrg 29757ec681f3SmrgDECL_SPECIAL(TEXLD_14) 29767ec681f3Smrg{ 29777ec681f3Smrg struct ureg_program *ureg = tx->ureg; 29787ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 29797ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 29807ec681f3Smrg const unsigned s = tx->insn.dst[0].idx; 29817ec681f3Smrg const unsigned t = ps1x_sampler_type(tx->info, s); 29827ec681f3Smrg 29837ec681f3Smrg tx->info->sampler_mask |= 1 << s; 29847ec681f3Smrg ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s)); 29857ec681f3Smrg 29867ec681f3Smrg return D3D_OK; 29877ec681f3Smrg} 29887ec681f3Smrg 29897ec681f3SmrgDECL_SPECIAL(TEX) 29907ec681f3Smrg{ 29917ec681f3Smrg struct ureg_program *ureg = tx->ureg; 29927ec681f3Smrg const unsigned s = tx->insn.dst[0].idx; 29937ec681f3Smrg const unsigned t = ps1x_sampler_type(tx->info, s); 29947ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 29957ec681f3Smrg struct ureg_src src[2]; 29967ec681f3Smrg 29977ec681f3Smrg tx_texcoord_alloc(tx, s); 29987ec681f3Smrg 29997ec681f3Smrg src[0] = tx->regs.vT[s]; 30007ec681f3Smrg src[1] = ureg_DECL_sampler(ureg, s); 30017ec681f3Smrg tx->info->sampler_mask |= 1 << s; 30027ec681f3Smrg 30037ec681f3Smrg TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s); 30047ec681f3Smrg 30057ec681f3Smrg return D3D_OK; 30067ec681f3Smrg} 30077ec681f3Smrg 30087ec681f3SmrgDECL_SPECIAL(TEXLDD) 30097ec681f3Smrg{ 30107ec681f3Smrg unsigned target; 30117ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 30127ec681f3Smrg struct ureg_src src[4] = { 30137ec681f3Smrg tx_src_param(tx, &tx->insn.src[0]), 30147ec681f3Smrg tx_src_param(tx, &tx->insn.src[1]), 30157ec681f3Smrg tx_src_param(tx, &tx->insn.src[2]), 30167ec681f3Smrg tx_src_param(tx, &tx->insn.src[3]) 30177ec681f3Smrg }; 30187ec681f3Smrg assert(tx->insn.src[1].idx >= 0 && 30197ec681f3Smrg tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); 30207ec681f3Smrg target = tx->sampler_targets[tx->insn.src[1].idx]; 30217ec681f3Smrg 30227ec681f3Smrg if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx)) 30237ec681f3Smrg return D3D_OK; 30247ec681f3Smrg 30257ec681f3Smrg ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]); 30267ec681f3Smrg return D3D_OK; 30277ec681f3Smrg} 30287ec681f3Smrg 30297ec681f3SmrgDECL_SPECIAL(TEXLDL) 30307ec681f3Smrg{ 30317ec681f3Smrg unsigned target; 30327ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 30337ec681f3Smrg struct ureg_src src[2] = { 30347ec681f3Smrg tx_src_param(tx, &tx->insn.src[0]), 30357ec681f3Smrg tx_src_param(tx, &tx->insn.src[1]) 30367ec681f3Smrg }; 30377ec681f3Smrg assert(tx->insn.src[1].idx >= 0 && 30387ec681f3Smrg tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); 30397ec681f3Smrg target = tx->sampler_targets[tx->insn.src[1].idx]; 30407ec681f3Smrg 30417ec681f3Smrg if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx)) 30427ec681f3Smrg return D3D_OK; 30437ec681f3Smrg 30447ec681f3Smrg ureg_TXL(tx->ureg, dst, target, src[0], src[1]); 30457ec681f3Smrg return D3D_OK; 30467ec681f3Smrg} 30477ec681f3Smrg 30487ec681f3SmrgDECL_SPECIAL(SETP) 30497ec681f3Smrg{ 30507ec681f3Smrg const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags); 30517ec681f3Smrg struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 30527ec681f3Smrg struct ureg_src src[2] = { 30537ec681f3Smrg tx_src_param(tx, &tx->insn.src[0]), 30547ec681f3Smrg tx_src_param(tx, &tx->insn.src[1]) 30557ec681f3Smrg }; 30567ec681f3Smrg ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0); 30577ec681f3Smrg return D3D_OK; 30587ec681f3Smrg} 30597ec681f3Smrg 30607ec681f3SmrgDECL_SPECIAL(BREAKP) 30617ec681f3Smrg{ 30627ec681f3Smrg struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 30637ec681f3Smrg ureg_IF(tx->ureg, src, tx_cond(tx)); 30647ec681f3Smrg ureg_BRK(tx->ureg); 30657ec681f3Smrg tx_endcond(tx); 30667ec681f3Smrg ureg_ENDIF(tx->ureg); 30677ec681f3Smrg return D3D_OK; 30687ec681f3Smrg} 30697ec681f3Smrg 30707ec681f3SmrgDECL_SPECIAL(PHASE) 30717ec681f3Smrg{ 30727ec681f3Smrg return D3D_OK; /* we don't care about phase */ 30737ec681f3Smrg} 30747ec681f3Smrg 30757ec681f3SmrgDECL_SPECIAL(COMMENT) 30767ec681f3Smrg{ 30777ec681f3Smrg return D3D_OK; /* nothing to do */ 30787ec681f3Smrg} 30797ec681f3Smrg 30807ec681f3Smrg 30817ec681f3Smrg#define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \ 30827ec681f3Smrg { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h } 30837ec681f3Smrg 30847ec681f3Smrgstatic const struct sm1_op_info inst_table[] = 30857ec681f3Smrg{ 30867ec681f3Smrg _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */ 30877ec681f3Smrg _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), 30887ec681f3Smrg _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */ 30897ec681f3Smrg _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */ 30907ec681f3Smrg _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */ 30917ec681f3Smrg _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */ 30927ec681f3Smrg _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */ 30937ec681f3Smrg _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */ 30947ec681f3Smrg _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */ 30957ec681f3Smrg _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */ 30967ec681f3Smrg _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */ 30977ec681f3Smrg _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */ 30987ec681f3Smrg _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */ 30997ec681f3Smrg _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */ 31007ec681f3Smrg _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */ 31017ec681f3Smrg _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */ 31027ec681f3Smrg _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */ 31037ec681f3Smrg _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */ 31047ec681f3Smrg _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */ 31057ec681f3Smrg _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */ 31067ec681f3Smrg 31077ec681f3Smrg _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)), 31087ec681f3Smrg _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)), 31097ec681f3Smrg _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)), 31107ec681f3Smrg _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)), 31117ec681f3Smrg _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)), 31127ec681f3Smrg 31137ec681f3Smrg _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)), 31147ec681f3Smrg _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)), 31157ec681f3Smrg _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)), 31167ec681f3Smrg _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)), 31177ec681f3Smrg _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)), 31187ec681f3Smrg _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)), 31197ec681f3Smrg 31207ec681f3Smrg _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)), 31217ec681f3Smrg 31227ec681f3Smrg _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)), 31237ec681f3Smrg _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */ 31247ec681f3Smrg _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */ 31257ec681f3Smrg _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)), 31267ec681f3Smrg _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */ 31277ec681f3Smrg 31287ec681f3Smrg _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)), 31297ec681f3Smrg _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)), 31307ec681f3Smrg 31317ec681f3Smrg /* More flow control */ 31327ec681f3Smrg _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)), 31337ec681f3Smrg _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)), 31347ec681f3Smrg _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)), 31357ec681f3Smrg _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)), 31367ec681f3Smrg _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)), 31377ec681f3Smrg _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)), 31387ec681f3Smrg _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL), 31397ec681f3Smrg _OPI(BREAKC, NOP, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)), 31407ec681f3Smrg /* we don't write to the address register, but a normal register (copied 31417ec681f3Smrg * when needed to the address register), thus we don't use ARR */ 31427ec681f3Smrg _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), 31437ec681f3Smrg 31447ec681f3Smrg _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)), 31457ec681f3Smrg _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)), 31467ec681f3Smrg 31477ec681f3Smrg _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)), 31487ec681f3Smrg _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)), 31497ec681f3Smrg _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)), 31507ec681f3Smrg _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)), 31517ec681f3Smrg _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)), 31527ec681f3Smrg _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)), 31537ec681f3Smrg _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), 31547ec681f3Smrg _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), 31557ec681f3Smrg _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)), 31567ec681f3Smrg _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)), 31577ec681f3Smrg _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)), 31587ec681f3Smrg _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)), 31597ec681f3Smrg _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)), 31607ec681f3Smrg _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)), 31617ec681f3Smrg _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)), 31627ec681f3Smrg _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)), 31637ec681f3Smrg 31647ec681f3Smrg _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL), 31657ec681f3Smrg _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), 31667ec681f3Smrg _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)), 31677ec681f3Smrg _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)), 31687ec681f3Smrg 31697ec681f3Smrg _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)), 31707ec681f3Smrg 31717ec681f3Smrg /* More tex stuff */ 31727ec681f3Smrg _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)), 31737ec681f3Smrg _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)), 31747ec681f3Smrg _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)), 31757ec681f3Smrg _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)), 31767ec681f3Smrg _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)), 31777ec681f3Smrg _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)), 31787ec681f3Smrg 31797ec681f3Smrg /* Misc */ 31807ec681f3Smrg _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */ 31817ec681f3Smrg _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)), 31827ec681f3Smrg _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)), 31837ec681f3Smrg _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL), 31847ec681f3Smrg _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL), 31857ec681f3Smrg _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)), 31867ec681f3Smrg _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)), 31877ec681f3Smrg _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)), 31887ec681f3Smrg _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP)) 31897ec681f3Smrg}; 31907ec681f3Smrg 31917ec681f3Smrgstatic const struct sm1_op_info inst_phase = 31927ec681f3Smrg _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE)); 31937ec681f3Smrg 31947ec681f3Smrgstatic const struct sm1_op_info inst_comment = 31957ec681f3Smrg _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT)); 31967ec681f3Smrg 31977ec681f3Smrgstatic void 31987ec681f3Smrgcreate_op_info_map(struct shader_translator *tx) 31997ec681f3Smrg{ 32007ec681f3Smrg const unsigned version = (tx->version.major << 8) | tx->version.minor; 32017ec681f3Smrg unsigned i; 32027ec681f3Smrg 32037ec681f3Smrg for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i) 32047ec681f3Smrg tx->op_info_map[i] = -1; 32057ec681f3Smrg 32067ec681f3Smrg if (tx->processor == PIPE_SHADER_VERTEX) { 32077ec681f3Smrg for (i = 0; i < ARRAY_SIZE(inst_table); ++i) { 32087ec681f3Smrg assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map)); 32097ec681f3Smrg if (inst_table[i].vert_version.min <= version && 32107ec681f3Smrg inst_table[i].vert_version.max >= version) 32117ec681f3Smrg tx->op_info_map[inst_table[i].sio] = i; 32127ec681f3Smrg } 32137ec681f3Smrg } else { 32147ec681f3Smrg for (i = 0; i < ARRAY_SIZE(inst_table); ++i) { 32157ec681f3Smrg assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map)); 32167ec681f3Smrg if (inst_table[i].frag_version.min <= version && 32177ec681f3Smrg inst_table[i].frag_version.max >= version) 32187ec681f3Smrg tx->op_info_map[inst_table[i].sio] = i; 32197ec681f3Smrg } 32207ec681f3Smrg } 32217ec681f3Smrg} 32227ec681f3Smrg 32237ec681f3Smrgstatic inline HRESULT 32247ec681f3SmrgNineTranslateInstruction_Generic(struct shader_translator *tx) 32257ec681f3Smrg{ 32267ec681f3Smrg struct ureg_dst dst[1]; 32277ec681f3Smrg struct ureg_src src[4]; 32287ec681f3Smrg unsigned i; 32297ec681f3Smrg 32307ec681f3Smrg for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i) 32317ec681f3Smrg dst[i] = tx_dst_param(tx, &tx->insn.dst[i]); 32327ec681f3Smrg for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i) 32337ec681f3Smrg src[i] = tx_src_param(tx, &tx->insn.src[i]); 32347ec681f3Smrg 32357ec681f3Smrg ureg_insn(tx->ureg, tx->insn.info->opcode, 32367ec681f3Smrg dst, tx->insn.ndst, 32377ec681f3Smrg src, tx->insn.nsrc, 0); 32387ec681f3Smrg return D3D_OK; 32397ec681f3Smrg} 32407ec681f3Smrg 32417ec681f3Smrgstatic inline DWORD 32427ec681f3SmrgTOKEN_PEEK(struct shader_translator *tx) 32437ec681f3Smrg{ 32447ec681f3Smrg return *(tx->parse); 32457ec681f3Smrg} 32467ec681f3Smrg 32477ec681f3Smrgstatic inline DWORD 32487ec681f3SmrgTOKEN_NEXT(struct shader_translator *tx) 32497ec681f3Smrg{ 32507ec681f3Smrg return *(tx->parse)++; 32517ec681f3Smrg} 32527ec681f3Smrg 32537ec681f3Smrgstatic inline void 32547ec681f3SmrgTOKEN_JUMP(struct shader_translator *tx) 32557ec681f3Smrg{ 32567ec681f3Smrg if (tx->parse_next && tx->parse != tx->parse_next) { 32577ec681f3Smrg WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next); 32587ec681f3Smrg tx->parse = tx->parse_next; 32597ec681f3Smrg } 32607ec681f3Smrg} 32617ec681f3Smrg 32627ec681f3Smrgstatic inline boolean 32637ec681f3Smrgsm1_parse_eof(struct shader_translator *tx) 32647ec681f3Smrg{ 32657ec681f3Smrg return TOKEN_PEEK(tx) == NINED3DSP_END; 32667ec681f3Smrg} 32677ec681f3Smrg 32687ec681f3Smrgstatic void 32697ec681f3Smrgsm1_read_version(struct shader_translator *tx) 32707ec681f3Smrg{ 32717ec681f3Smrg const DWORD tok = TOKEN_NEXT(tx); 32727ec681f3Smrg 32737ec681f3Smrg tx->version.major = D3DSHADER_VERSION_MAJOR(tok); 32747ec681f3Smrg tx->version.minor = D3DSHADER_VERSION_MINOR(tok); 32757ec681f3Smrg 32767ec681f3Smrg switch (tok >> 16) { 32777ec681f3Smrg case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break; 32787ec681f3Smrg case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break; 32797ec681f3Smrg default: 32807ec681f3Smrg DBG("Invalid shader type: %x\n", tok); 32817ec681f3Smrg tx->processor = ~0; 32827ec681f3Smrg break; 32837ec681f3Smrg } 32847ec681f3Smrg} 32857ec681f3Smrg 32867ec681f3Smrg/* This is just to check if we parsed the instruction properly. */ 32877ec681f3Smrgstatic void 32887ec681f3Smrgsm1_parse_get_skip(struct shader_translator *tx) 32897ec681f3Smrg{ 32907ec681f3Smrg const DWORD tok = TOKEN_PEEK(tx); 32917ec681f3Smrg 32927ec681f3Smrg if (tx->version.major >= 2) { 32937ec681f3Smrg tx->parse_next = tx->parse + 1 /* this */ + 32947ec681f3Smrg ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT); 32957ec681f3Smrg } else { 32967ec681f3Smrg tx->parse_next = NULL; /* TODO: determine from param count */ 32977ec681f3Smrg } 32987ec681f3Smrg} 32997ec681f3Smrg 33007ec681f3Smrgstatic void 33017ec681f3Smrgsm1_print_comment(const char *comment, UINT size) 33027ec681f3Smrg{ 33037ec681f3Smrg if (!size) 33047ec681f3Smrg return; 33057ec681f3Smrg /* TODO */ 33067ec681f3Smrg} 33077ec681f3Smrg 33087ec681f3Smrgstatic void 33097ec681f3Smrgsm1_parse_comments(struct shader_translator *tx, BOOL print) 33107ec681f3Smrg{ 33117ec681f3Smrg DWORD tok = TOKEN_PEEK(tx); 33127ec681f3Smrg 33137ec681f3Smrg while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT) 33147ec681f3Smrg { 33157ec681f3Smrg const char *comment = ""; 33167ec681f3Smrg UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT; 33177ec681f3Smrg tx->parse += size + 1; 33187ec681f3Smrg 33197ec681f3Smrg if (print) 33207ec681f3Smrg sm1_print_comment(comment, size); 33217ec681f3Smrg 33227ec681f3Smrg tok = TOKEN_PEEK(tx); 33237ec681f3Smrg } 33247ec681f3Smrg} 33257ec681f3Smrg 33267ec681f3Smrgstatic void 33277ec681f3Smrgsm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel) 33287ec681f3Smrg{ 33297ec681f3Smrg *reg = TOKEN_NEXT(tx); 33307ec681f3Smrg 33317ec681f3Smrg if (*reg & D3DSHADER_ADDRMODE_RELATIVE) 33327ec681f3Smrg { 33337ec681f3Smrg if (tx->version.major < 2) 33347ec681f3Smrg *rel = (1 << 31) | 33357ec681f3Smrg ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) | 33367ec681f3Smrg ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) | 33377ec681f3Smrg D3DSP_NOSWIZZLE; 33387ec681f3Smrg else 33397ec681f3Smrg *rel = TOKEN_NEXT(tx); 33407ec681f3Smrg } 33417ec681f3Smrg} 33427ec681f3Smrg 33437ec681f3Smrgstatic void 33447ec681f3Smrgsm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok) 33457ec681f3Smrg{ 33467ec681f3Smrg int8_t shift; 33477ec681f3Smrg dst->file = 33487ec681f3Smrg (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT | 33497ec681f3Smrg (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2; 33507ec681f3Smrg dst->type = TGSI_RETURN_TYPE_FLOAT; 33517ec681f3Smrg dst->idx = tok & D3DSP_REGNUM_MASK; 33527ec681f3Smrg dst->rel = NULL; 33537ec681f3Smrg dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT; 33547ec681f3Smrg dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT; 33557ec681f3Smrg shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT; 33567ec681f3Smrg dst->shift = (shift & 0x7) - (shift & 0x8); 33577ec681f3Smrg} 33587ec681f3Smrg 33597ec681f3Smrgstatic void 33607ec681f3Smrgsm1_parse_src_param(struct sm1_src_param *src, DWORD tok) 33617ec681f3Smrg{ 33627ec681f3Smrg src->file = 33637ec681f3Smrg ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | 33647ec681f3Smrg ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2); 33657ec681f3Smrg src->type = TGSI_RETURN_TYPE_FLOAT; 33667ec681f3Smrg src->idx = tok & D3DSP_REGNUM_MASK; 33677ec681f3Smrg src->rel = NULL; 33687ec681f3Smrg src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT; 33697ec681f3Smrg src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT; 33707ec681f3Smrg 33717ec681f3Smrg switch (src->file) { 33727ec681f3Smrg case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break; 33737ec681f3Smrg case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break; 33747ec681f3Smrg case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break; 33757ec681f3Smrg default: 33767ec681f3Smrg break; 33777ec681f3Smrg } 33787ec681f3Smrg} 33797ec681f3Smrg 33807ec681f3Smrgstatic void 33817ec681f3Smrgsm1_parse_immediate(struct shader_translator *tx, 33827ec681f3Smrg struct sm1_src_param *imm) 33837ec681f3Smrg{ 33847ec681f3Smrg imm->file = NINED3DSPR_IMMEDIATE; 33857ec681f3Smrg imm->idx = INT_MIN; 33867ec681f3Smrg imm->rel = NULL; 33877ec681f3Smrg imm->swizzle = NINED3DSP_NOSWIZZLE; 33887ec681f3Smrg imm->mod = 0; 33897ec681f3Smrg switch (tx->insn.opcode) { 33907ec681f3Smrg case D3DSIO_DEF: 33917ec681f3Smrg imm->type = NINED3DSPTYPE_FLOAT4; 33927ec681f3Smrg memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD)); 33937ec681f3Smrg tx->parse += 4; 33947ec681f3Smrg break; 33957ec681f3Smrg case D3DSIO_DEFI: 33967ec681f3Smrg imm->type = NINED3DSPTYPE_INT4; 33977ec681f3Smrg memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD)); 33987ec681f3Smrg tx->parse += 4; 33997ec681f3Smrg break; 34007ec681f3Smrg case D3DSIO_DEFB: 34017ec681f3Smrg imm->type = NINED3DSPTYPE_BOOL; 34027ec681f3Smrg memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD)); 34037ec681f3Smrg tx->parse += 1; 34047ec681f3Smrg break; 34057ec681f3Smrg default: 34067ec681f3Smrg assert(0); 34077ec681f3Smrg break; 34087ec681f3Smrg } 34097ec681f3Smrg} 34107ec681f3Smrg 34117ec681f3Smrgstatic void 34127ec681f3Smrgsm1_read_dst_param(struct shader_translator *tx, 34137ec681f3Smrg struct sm1_dst_param *dst, 34147ec681f3Smrg struct sm1_src_param *rel) 34157ec681f3Smrg{ 34167ec681f3Smrg DWORD tok_dst, tok_rel = 0; 34177ec681f3Smrg 34187ec681f3Smrg sm1_parse_get_param(tx, &tok_dst, &tok_rel); 34197ec681f3Smrg sm1_parse_dst_param(dst, tok_dst); 34207ec681f3Smrg if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) { 34217ec681f3Smrg sm1_parse_src_param(rel, tok_rel); 34227ec681f3Smrg dst->rel = rel; 34237ec681f3Smrg } 34247ec681f3Smrg} 34257ec681f3Smrg 34267ec681f3Smrgstatic void 34277ec681f3Smrgsm1_read_src_param(struct shader_translator *tx, 34287ec681f3Smrg struct sm1_src_param *src, 34297ec681f3Smrg struct sm1_src_param *rel) 34307ec681f3Smrg{ 34317ec681f3Smrg DWORD tok_src, tok_rel = 0; 34327ec681f3Smrg 34337ec681f3Smrg sm1_parse_get_param(tx, &tok_src, &tok_rel); 34347ec681f3Smrg sm1_parse_src_param(src, tok_src); 34357ec681f3Smrg if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) { 34367ec681f3Smrg assert(rel); 34377ec681f3Smrg sm1_parse_src_param(rel, tok_rel); 34387ec681f3Smrg src->rel = rel; 34397ec681f3Smrg } 34407ec681f3Smrg} 34417ec681f3Smrg 34427ec681f3Smrgstatic void 34437ec681f3Smrgsm1_read_semantic(struct shader_translator *tx, 34447ec681f3Smrg struct sm1_semantic *sem) 34457ec681f3Smrg{ 34467ec681f3Smrg const DWORD tok_usg = TOKEN_NEXT(tx); 34477ec681f3Smrg const DWORD tok_dst = TOKEN_NEXT(tx); 34487ec681f3Smrg 34497ec681f3Smrg sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT; 34507ec681f3Smrg sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT; 34517ec681f3Smrg sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT; 34527ec681f3Smrg 34537ec681f3Smrg sm1_parse_dst_param(&sem->reg, tok_dst); 34547ec681f3Smrg} 34557ec681f3Smrg 34567ec681f3Smrgstatic void 34577ec681f3Smrgsm1_parse_instruction(struct shader_translator *tx) 34587ec681f3Smrg{ 34597ec681f3Smrg struct sm1_instruction *insn = &tx->insn; 34607ec681f3Smrg HRESULT hr; 34617ec681f3Smrg DWORD tok; 34627ec681f3Smrg const struct sm1_op_info *info = NULL; 34637ec681f3Smrg unsigned i; 34647ec681f3Smrg 34657ec681f3Smrg sm1_parse_comments(tx, TRUE); 34667ec681f3Smrg sm1_parse_get_skip(tx); 34677ec681f3Smrg 34687ec681f3Smrg tok = TOKEN_NEXT(tx); 34697ec681f3Smrg 34707ec681f3Smrg insn->opcode = tok & D3DSI_OPCODE_MASK; 34717ec681f3Smrg insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT; 34727ec681f3Smrg insn->coissue = !!(tok & D3DSI_COISSUE); 34737ec681f3Smrg insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED); 34747ec681f3Smrg 34757ec681f3Smrg if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) { 34767ec681f3Smrg int k = tx->op_info_map[insn->opcode]; 34777ec681f3Smrg if (k >= 0) { 34787ec681f3Smrg assert(k < ARRAY_SIZE(inst_table)); 34797ec681f3Smrg info = &inst_table[k]; 34807ec681f3Smrg } 34817ec681f3Smrg } else { 34827ec681f3Smrg if (insn->opcode == D3DSIO_PHASE) info = &inst_phase; 34837ec681f3Smrg if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment; 34847ec681f3Smrg } 34857ec681f3Smrg if (!info) { 34867ec681f3Smrg DBG("illegal or unhandled opcode: %08x\n", insn->opcode); 34877ec681f3Smrg TOKEN_JUMP(tx); 34887ec681f3Smrg return; 34897ec681f3Smrg } 34907ec681f3Smrg insn->info = info; 34917ec681f3Smrg insn->ndst = info->ndst; 34927ec681f3Smrg insn->nsrc = info->nsrc; 34937ec681f3Smrg 34947ec681f3Smrg /* check version */ 34957ec681f3Smrg { 34967ec681f3Smrg unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min; 34977ec681f3Smrg unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max; 34987ec681f3Smrg unsigned ver = (tx->version.major << 8) | tx->version.minor; 34997ec681f3Smrg if (ver < min || ver > max) { 35007ec681f3Smrg DBG("opcode not supported in this shader version: %x <= %x <= %x\n", 35017ec681f3Smrg min, ver, max); 35027ec681f3Smrg return; 35037ec681f3Smrg } 35047ec681f3Smrg } 35057ec681f3Smrg 35067ec681f3Smrg for (i = 0; i < insn->ndst; ++i) 35077ec681f3Smrg sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]); 35087ec681f3Smrg if (insn->predicated) 35097ec681f3Smrg sm1_read_src_param(tx, &insn->pred, NULL); 35107ec681f3Smrg for (i = 0; i < insn->nsrc; ++i) 35117ec681f3Smrg sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]); 35127ec681f3Smrg 35137ec681f3Smrg /* parse here so we can dump them before processing */ 35147ec681f3Smrg if (insn->opcode == D3DSIO_DEF || 35157ec681f3Smrg insn->opcode == D3DSIO_DEFI || 35167ec681f3Smrg insn->opcode == D3DSIO_DEFB) 35177ec681f3Smrg sm1_parse_immediate(tx, &tx->insn.src[0]); 35187ec681f3Smrg 35197ec681f3Smrg sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth); 35207ec681f3Smrg sm1_instruction_check(insn); 35217ec681f3Smrg 35227ec681f3Smrg if (insn->predicated) { 35237ec681f3Smrg tx->predicated_activated = true; 35247ec681f3Smrg if (ureg_dst_is_undef(tx->regs.predicate_tmp)) { 35257ec681f3Smrg tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg); 35267ec681f3Smrg tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg); 35277ec681f3Smrg } 35287ec681f3Smrg } 35297ec681f3Smrg 35307ec681f3Smrg if (info->handler) 35317ec681f3Smrg hr = info->handler(tx); 35327ec681f3Smrg else 35337ec681f3Smrg hr = NineTranslateInstruction_Generic(tx); 35347ec681f3Smrg tx_apply_dst0_modifiers(tx); 35357ec681f3Smrg 35367ec681f3Smrg if (insn->predicated) { 35377ec681f3Smrg tx->predicated_activated = false; 35387ec681f3Smrg /* TODO: predicate might be allowed on outputs, 35397ec681f3Smrg * which cannot be src. Workaround it. */ 35407ec681f3Smrg ureg_CMP(tx->ureg, tx->regs.predicate_dst, 35417ec681f3Smrg ureg_negate(tx_src_param(tx, &insn->pred)), 35427ec681f3Smrg ureg_src(tx->regs.predicate_tmp), 35437ec681f3Smrg ureg_src(tx->regs.predicate_dst)); 35447ec681f3Smrg } 35457ec681f3Smrg 35467ec681f3Smrg if (hr != D3D_OK) 35477ec681f3Smrg tx->failure = TRUE; 35487ec681f3Smrg tx->num_scratch = 0; /* reset */ 35497ec681f3Smrg 35507ec681f3Smrg TOKEN_JUMP(tx); 35517ec681f3Smrg} 35527ec681f3Smrg 35537ec681f3Smrg#define GET_CAP(n) screen->get_param( \ 35547ec681f3Smrg screen, PIPE_CAP_##n) 35557ec681f3Smrg#define GET_SHADER_CAP(n) screen->get_shader_param( \ 35567ec681f3Smrg screen, info->type, PIPE_SHADER_CAP_##n) 35577ec681f3Smrg 35587ec681f3Smrgstatic HRESULT 35597ec681f3Smrgtx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info) 35607ec681f3Smrg{ 35617ec681f3Smrg unsigned i; 35627ec681f3Smrg 35637ec681f3Smrg memset(tx, 0, sizeof(*tx)); 35647ec681f3Smrg 35657ec681f3Smrg tx->info = info; 35667ec681f3Smrg 35677ec681f3Smrg tx->byte_code = info->byte_code; 35687ec681f3Smrg tx->parse = info->byte_code; 35697ec681f3Smrg 35707ec681f3Smrg for (i = 0; i < ARRAY_SIZE(info->input_map); ++i) 35717ec681f3Smrg info->input_map[i] = NINE_DECLUSAGE_NONE; 35727ec681f3Smrg info->num_inputs = 0; 35737ec681f3Smrg 35747ec681f3Smrg info->position_t = FALSE; 35757ec681f3Smrg info->point_size = FALSE; 35767ec681f3Smrg 35777ec681f3Smrg memset(tx->slots_used, 0, sizeof(tx->slots_used)); 35787ec681f3Smrg memset(info->int_slots_used, 0, sizeof(info->int_slots_used)); 35797ec681f3Smrg memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used)); 35807ec681f3Smrg 35817ec681f3Smrg tx->info->const_float_slots = 0; 35827ec681f3Smrg tx->info->const_int_slots = 0; 35837ec681f3Smrg tx->info->const_bool_slots = 0; 35847ec681f3Smrg 35857ec681f3Smrg info->sampler_mask = 0x0; 35867ec681f3Smrg info->rt_mask = 0x0; 35877ec681f3Smrg 35887ec681f3Smrg info->lconstf.data = NULL; 35897ec681f3Smrg info->lconstf.ranges = NULL; 35907ec681f3Smrg 35917ec681f3Smrg info->bumpenvmat_needed = 0; 35927ec681f3Smrg 35937ec681f3Smrg for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) { 35947ec681f3Smrg tx->regs.rL[i] = ureg_dst_undef(); 35957ec681f3Smrg } 35967ec681f3Smrg tx->regs.address = ureg_dst_undef(); 35977ec681f3Smrg tx->regs.a0 = ureg_dst_undef(); 35987ec681f3Smrg tx->regs.p = ureg_dst_undef(); 35997ec681f3Smrg tx->regs.oDepth = ureg_dst_undef(); 36007ec681f3Smrg tx->regs.vPos = ureg_src_undef(); 36017ec681f3Smrg tx->regs.vFace = ureg_src_undef(); 36027ec681f3Smrg for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i) 36037ec681f3Smrg tx->regs.o[i] = ureg_dst_undef(); 36047ec681f3Smrg for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i) 36057ec681f3Smrg tx->regs.oCol[i] = ureg_dst_undef(); 36067ec681f3Smrg for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i) 36077ec681f3Smrg tx->regs.vC[i] = ureg_src_undef(); 36087ec681f3Smrg for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i) 36097ec681f3Smrg tx->regs.vT[i] = ureg_src_undef(); 36107ec681f3Smrg 36117ec681f3Smrg sm1_read_version(tx); 36127ec681f3Smrg 36137ec681f3Smrg info->version = (tx->version.major << 4) | tx->version.minor; 36147ec681f3Smrg 36157ec681f3Smrg tx->num_outputs = 0; 36167ec681f3Smrg 36177ec681f3Smrg create_op_info_map(tx); 36187ec681f3Smrg 36197ec681f3Smrg tx->ureg = ureg_create(info->type); 36207ec681f3Smrg if (!tx->ureg) { 36217ec681f3Smrg return E_OUTOFMEMORY; 36227ec681f3Smrg } 36237ec681f3Smrg 36247ec681f3Smrg tx->native_integers = GET_SHADER_CAP(INTEGERS); 36257ec681f3Smrg tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES); 36267ec681f3Smrg tx->want_texcoord = GET_CAP(TGSI_TEXCOORD); 36277ec681f3Smrg tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 36287ec681f3Smrg tx->texcoord_sn = tx->want_texcoord ? 36297ec681f3Smrg TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC; 36307ec681f3Smrg tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL); 36317ec681f3Smrg tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL); 36327ec681f3Smrg 36337ec681f3Smrg if (IS_VS) { 36347ec681f3Smrg tx->num_constf_allowed = NINE_MAX_CONST_F; 36357ec681f3Smrg } else if (tx->version.major < 2) {/* IS_PS v1 */ 36367ec681f3Smrg tx->num_constf_allowed = 8; 36377ec681f3Smrg } else if (tx->version.major == 2) {/* IS_PS v2 */ 36387ec681f3Smrg tx->num_constf_allowed = 32; 36397ec681f3Smrg } else {/* IS_PS v3 */ 36407ec681f3Smrg tx->num_constf_allowed = NINE_MAX_CONST_F_PS3; 36417ec681f3Smrg } 36427ec681f3Smrg 36437ec681f3Smrg if (tx->version.major < 2) { 36447ec681f3Smrg tx->num_consti_allowed = 0; 36457ec681f3Smrg tx->num_constb_allowed = 0; 36467ec681f3Smrg } else { 36477ec681f3Smrg tx->num_consti_allowed = NINE_MAX_CONST_I; 36487ec681f3Smrg tx->num_constb_allowed = NINE_MAX_CONST_B; 36497ec681f3Smrg } 36507ec681f3Smrg 36517ec681f3Smrg if (info->swvp_on) { 36527ec681f3Smrg /* TODO: The values tx->version.major == 1 */ 36537ec681f3Smrg tx->num_constf_allowed = 8192; 36547ec681f3Smrg tx->num_consti_allowed = 2048; 36557ec681f3Smrg tx->num_constb_allowed = 2048; 36567ec681f3Smrg } 36577ec681f3Smrg 36587ec681f3Smrg /* VS must always write position. Declare it here to make it the 1st output. 36597ec681f3Smrg * (Some drivers like nv50 are buggy and rely on that.) 36607ec681f3Smrg */ 36617ec681f3Smrg if (IS_VS) { 36627ec681f3Smrg tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0); 36637ec681f3Smrg } else { 36647ec681f3Smrg ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT); 36657ec681f3Smrg if (!tx->shift_wpos) 36667ec681f3Smrg ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 36677ec681f3Smrg } 36687ec681f3Smrg 36697ec681f3Smrg tx->mul_zero_wins = GET_CAP(TGSI_MUL_ZERO_WINS); 36707ec681f3Smrg if (tx->mul_zero_wins) 36717ec681f3Smrg ureg_property(tx->ureg, TGSI_PROPERTY_MUL_ZERO_WINS, 1); 36727ec681f3Smrg 36737ec681f3Smrg /* Add additional definition of constants */ 36747ec681f3Smrg if (info->add_constants_defs.c_combination) { 36757ec681f3Smrg unsigned i; 36767ec681f3Smrg 36777ec681f3Smrg assert(info->add_constants_defs.int_const_added); 36787ec681f3Smrg assert(info->add_constants_defs.bool_const_added); 36797ec681f3Smrg /* We only add constants that are used by the shader 36807ec681f3Smrg * and that are not defined in the shader */ 36817ec681f3Smrg for (i = 0; i < NINE_MAX_CONST_I; ++i) { 36827ec681f3Smrg if ((*info->add_constants_defs.int_const_added)[i]) { 36837ec681f3Smrg DBG("Defining const i%i : { %i %i %i %i }\n", i, 36847ec681f3Smrg info->add_constants_defs.c_combination->const_i[i][0], 36857ec681f3Smrg info->add_constants_defs.c_combination->const_i[i][1], 36867ec681f3Smrg info->add_constants_defs.c_combination->const_i[i][2], 36877ec681f3Smrg info->add_constants_defs.c_combination->const_i[i][3]); 36887ec681f3Smrg tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]); 36897ec681f3Smrg } 36907ec681f3Smrg } 36917ec681f3Smrg for (i = 0; i < NINE_MAX_CONST_B; ++i) { 36927ec681f3Smrg if ((*info->add_constants_defs.bool_const_added)[i]) { 36937ec681f3Smrg DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0)); 36947ec681f3Smrg tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]); 36957ec681f3Smrg } 36967ec681f3Smrg } 36977ec681f3Smrg } 36987ec681f3Smrg return D3D_OK; 36997ec681f3Smrg} 37007ec681f3Smrg 37017ec681f3Smrgstatic void 37027ec681f3Smrgtx_dtor(struct shader_translator *tx) 37037ec681f3Smrg{ 37047ec681f3Smrg if (tx->slot_map) 37057ec681f3Smrg FREE(tx->slot_map); 37067ec681f3Smrg if (tx->num_inst_labels) 37077ec681f3Smrg FREE(tx->inst_labels); 37087ec681f3Smrg FREE(tx->lconstf); 37097ec681f3Smrg FREE(tx->regs.r); 37107ec681f3Smrg FREE(tx); 37117ec681f3Smrg} 37127ec681f3Smrg 37137ec681f3Smrg/* CONST[0].xyz = width/2, -height/2, zmax-zmin 37147ec681f3Smrg * CONST[1].xyz = x+width/2, y+height/2, zmin */ 37157ec681f3Smrgstatic void 37167ec681f3Smrgshader_add_vs_viewport_transform(struct shader_translator *tx) 37177ec681f3Smrg{ 37187ec681f3Smrg struct ureg_program *ureg = tx->ureg; 37197ec681f3Smrg struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0); 37207ec681f3Smrg struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1); 37217ec681f3Smrg /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/ 37227ec681f3Smrg 37237ec681f3Smrg c0 = ureg_src_dimension(c0, 4); 37247ec681f3Smrg c1 = ureg_src_dimension(c1, 4); 37257ec681f3Smrg /* TODO: find out when we need to apply the viewport transformation or not. 37267ec681f3Smrg * Likely will be XYZ vs XYZRHW in vdecl_out 37277ec681f3Smrg * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0); 37287ec681f3Smrg * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1); 37297ec681f3Smrg */ 37307ec681f3Smrg ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos)); 37317ec681f3Smrg} 37327ec681f3Smrg 37337ec681f3Smrgstatic void 37347ec681f3Smrgshader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col) 37357ec681f3Smrg{ 37367ec681f3Smrg struct ureg_program *ureg = tx->ureg; 37377ec681f3Smrg struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); 37387ec681f3Smrg struct ureg_src fog_end, fog_coeff, fog_density, fog_params; 37397ec681f3Smrg struct ureg_src fog_vs, fog_color; 37407ec681f3Smrg struct ureg_dst fog_factor, depth; 37417ec681f3Smrg 37427ec681f3Smrg if (!tx->info->fog_enable) { 37437ec681f3Smrg ureg_MOV(ureg, oCol0, src_col); 37447ec681f3Smrg return; 37457ec681f3Smrg } 37467ec681f3Smrg 37477ec681f3Smrg if (tx->info->fog_mode != D3DFOG_NONE) { 37487ec681f3Smrg depth = tx_scratch_scalar(tx); 37497ec681f3Smrg /* Depth used for fog is perspective interpolated */ 37507ec681f3Smrg ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W)); 37517ec681f3Smrg ureg_MUL(ureg, depth, ureg_src(depth), ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z)); 37527ec681f3Smrg } 37537ec681f3Smrg 37547ec681f3Smrg fog_color = nine_float_constant_src(tx, 32); 37557ec681f3Smrg fog_params = nine_float_constant_src(tx, 33); 37567ec681f3Smrg fog_factor = tx_scratch_scalar(tx); 37577ec681f3Smrg 37587ec681f3Smrg if (tx->info->fog_mode == D3DFOG_LINEAR) { 37597ec681f3Smrg fog_end = NINE_APPLY_SWIZZLE(fog_params, X); 37607ec681f3Smrg fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y); 37617ec681f3Smrg ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth))); 37627ec681f3Smrg ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff); 37637ec681f3Smrg } else if (tx->info->fog_mode == D3DFOG_EXP) { 37647ec681f3Smrg fog_density = NINE_APPLY_SWIZZLE(fog_params, X); 37657ec681f3Smrg ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density); 37667ec681f3Smrg ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f)); 37677ec681f3Smrg ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor)); 37687ec681f3Smrg } else if (tx->info->fog_mode == D3DFOG_EXP2) { 37697ec681f3Smrg fog_density = NINE_APPLY_SWIZZLE(fog_params, X); 37707ec681f3Smrg ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density); 37717ec681f3Smrg ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor)); 37727ec681f3Smrg ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f)); 37737ec681f3Smrg ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor)); 37747ec681f3Smrg } else { 37757ec681f3Smrg fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16, 37767ec681f3Smrg TGSI_INTERPOLATE_PERSPECTIVE), 37777ec681f3Smrg TGSI_SWIZZLE_X); 37787ec681f3Smrg ureg_MOV(ureg, fog_factor, fog_vs); 37797ec681f3Smrg } 37807ec681f3Smrg 37817ec681f3Smrg ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ), 37827ec681f3Smrg tx_src_scalar(fog_factor), src_col, fog_color); 37837ec681f3Smrg ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col); 37847ec681f3Smrg} 37857ec681f3Smrg 37867ec681f3Smrgstatic void parse_shader(struct shader_translator *tx) 37877ec681f3Smrg{ 37887ec681f3Smrg struct nine_shader_info *info = tx->info; 37897ec681f3Smrg 37907ec681f3Smrg while (!sm1_parse_eof(tx) && !tx->failure) 37917ec681f3Smrg sm1_parse_instruction(tx); 37927ec681f3Smrg tx->parse++; /* for byte_size */ 37937ec681f3Smrg 37947ec681f3Smrg if (tx->failure) 37957ec681f3Smrg return; 37967ec681f3Smrg 37977ec681f3Smrg if (IS_PS && tx->version.major < 3) { 37987ec681f3Smrg if (tx->version.major < 2) { 37997ec681f3Smrg assert(tx->num_temp); /* there must be color output */ 38007ec681f3Smrg info->rt_mask |= 0x1; 38017ec681f3Smrg shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0])); 38027ec681f3Smrg } else { 38037ec681f3Smrg shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0])); 38047ec681f3Smrg } 38057ec681f3Smrg } 38067ec681f3Smrg 38077ec681f3Smrg if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) { 38087ec681f3Smrg tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16); 38097ec681f3Smrg ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f)); 38107ec681f3Smrg } 38117ec681f3Smrg 38127ec681f3Smrg if (info->position_t) 38137ec681f3Smrg ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); 38147ec681f3Smrg 38157ec681f3Smrg if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) { 38167ec681f3Smrg struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0); 38177ec681f3Smrg ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min)); 38187ec681f3Smrg ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max)); 38197ec681f3Smrg info->point_size = TRUE; 38207ec681f3Smrg } 38217ec681f3Smrg 38227ec681f3Smrg if (info->process_vertices) 38237ec681f3Smrg shader_add_vs_viewport_transform(tx); 38247ec681f3Smrg 38257ec681f3Smrg ureg_END(tx->ureg); 38267ec681f3Smrg} 38277ec681f3Smrg 38287ec681f3Smrg#define NINE_SHADER_DEBUG_OPTION_NIR_VS (1 << 0) 38297ec681f3Smrg#define NINE_SHADER_DEBUG_OPTION_NIR_PS (1 << 1) 38307ec681f3Smrg#define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS (1 << 2) 38317ec681f3Smrg#define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS (1 << 3) 38327ec681f3Smrg#define NINE_SHADER_DEBUG_OPTION_DUMP_NIR (1 << 4) 38337ec681f3Smrg#define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI (1 << 5) 38347ec681f3Smrg 38357ec681f3Smrgstatic const struct debug_named_value nine_shader_debug_options[] = { 38367ec681f3Smrg { "nir_vs", NINE_SHADER_DEBUG_OPTION_NIR_VS, "Use NIR for vertex shaders even if the driver doesn't prefer it." }, 38377ec681f3Smrg { "nir_ps", NINE_SHADER_DEBUG_OPTION_NIR_PS, "Use NIR for pixel shaders even if the driver doesn't prefer it." }, 38387ec681f3Smrg { "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS, "Never use NIR for vertex shaders even if the driver prefers it." }, 38397ec681f3Smrg { "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS, "Never use NIR for pixel shaders even if the driver prefers it." }, 38407ec681f3Smrg { "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR, "Print translated NIR shaders." }, 38417ec681f3Smrg { "dump_tgsi", NINE_SHADER_DEBUG_OPTION_DUMP_TGSI, "Print TGSI shaders." }, 38427ec681f3Smrg DEBUG_NAMED_VALUE_END /* must be last */ 38437ec681f3Smrg}; 38447ec681f3Smrg 38457ec681f3Smrgstatic inline boolean 38467ec681f3Smrgnine_shader_get_debug_flag(uint64_t flag) 38477ec681f3Smrg{ 38487ec681f3Smrg static uint64_t flags = 0; 38497ec681f3Smrg static boolean first_run = TRUE; 38507ec681f3Smrg 38517ec681f3Smrg if (unlikely(first_run)) { 38527ec681f3Smrg first_run = FALSE; 38537ec681f3Smrg flags = debug_get_flags_option("NINE_SHADER", nine_shader_debug_options, 0); 38547ec681f3Smrg 38557ec681f3Smrg // Check old TGSI dump envvar too 38567ec681f3Smrg if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) { 38577ec681f3Smrg flags |= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI; 38587ec681f3Smrg } 38597ec681f3Smrg } 38607ec681f3Smrg 38617ec681f3Smrg return !!(flags & flag); 38627ec681f3Smrg} 38637ec681f3Smrg 38647ec681f3Smrgstatic void 38657ec681f3Smrgnine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state *state, const struct tgsi_token *tgsi_tokens, 38667ec681f3Smrg struct pipe_screen *screen) 38677ec681f3Smrg{ 38687ec681f3Smrg struct nir_shader *nir = tgsi_to_nir(tgsi_tokens, screen, screen->get_disk_shader_cache != NULL); 38697ec681f3Smrg 38707ec681f3Smrg if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_NIR))) { 38717ec681f3Smrg nir_print_shader(nir, stdout); 38727ec681f3Smrg } 38737ec681f3Smrg 38747ec681f3Smrg state->type = PIPE_SHADER_IR_NIR; 38757ec681f3Smrg state->tokens = NULL; 38767ec681f3Smrg state->ir.nir = nir; 38777ec681f3Smrg memset(&state->stream_output, 0, sizeof(state->stream_output)); 38787ec681f3Smrg} 38797ec681f3Smrg 38807ec681f3Smrgstatic void * 38817ec681f3Smrgnine_ureg_create_shader(struct ureg_program *ureg, 38827ec681f3Smrg struct pipe_context *pipe, 38837ec681f3Smrg const struct pipe_stream_output_info *so) 38847ec681f3Smrg{ 38857ec681f3Smrg struct pipe_shader_state state; 38867ec681f3Smrg const struct tgsi_token *tgsi_tokens; 38877ec681f3Smrg struct pipe_screen *screen = pipe->screen; 38887ec681f3Smrg 38897ec681f3Smrg tgsi_tokens = ureg_finalize(ureg); 38907ec681f3Smrg if (!tgsi_tokens) 38917ec681f3Smrg return NULL; 38927ec681f3Smrg 38937ec681f3Smrg assert(((struct tgsi_header *) &tgsi_tokens[0])->HeaderSize >= 2); 38947ec681f3Smrg enum pipe_shader_type shader_type = ((struct tgsi_processor *) &tgsi_tokens[1])->Processor; 38957ec681f3Smrg 38967ec681f3Smrg int preferred_ir = screen->get_shader_param(screen, shader_type, PIPE_SHADER_CAP_PREFERRED_IR); 38977ec681f3Smrg bool prefer_nir = (preferred_ir == PIPE_SHADER_IR_NIR); 38987ec681f3Smrg bool use_nir = prefer_nir || 38997ec681f3Smrg ((shader_type == PIPE_SHADER_VERTEX) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_VS)) || 39007ec681f3Smrg ((shader_type == PIPE_SHADER_FRAGMENT) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_PS)); 39017ec681f3Smrg 39027ec681f3Smrg /* Allow user to override preferred IR, this is very useful for debugging */ 39037ec681f3Smrg if (unlikely(shader_type == PIPE_SHADER_VERTEX && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS))) 39047ec681f3Smrg use_nir = false; 39057ec681f3Smrg if (unlikely(shader_type == PIPE_SHADER_FRAGMENT && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS))) 39067ec681f3Smrg use_nir = false; 39077ec681f3Smrg 39087ec681f3Smrg DUMP("shader type: %s, preferred IR: %s, selected IR: %s\n", 39097ec681f3Smrg shader_type == PIPE_SHADER_VERTEX ? "VS" : "PS", 39107ec681f3Smrg prefer_nir ? "NIR" : "TGSI", 39117ec681f3Smrg use_nir ? "NIR" : "TGSI"); 39127ec681f3Smrg 39137ec681f3Smrg if (use_nir) { 39147ec681f3Smrg nine_pipe_nir_shader_state_from_tgsi(&state, tgsi_tokens, screen); 39157ec681f3Smrg } else { 39167ec681f3Smrg pipe_shader_state_from_tgsi(&state, tgsi_tokens); 39177ec681f3Smrg } 39187ec681f3Smrg 39197ec681f3Smrg assert(state.tokens || state.ir.nir); 39207ec681f3Smrg 39217ec681f3Smrg if (so) 39227ec681f3Smrg state.stream_output = *so; 39237ec681f3Smrg 39247ec681f3Smrg switch (shader_type) { 39257ec681f3Smrg case PIPE_SHADER_VERTEX: 39267ec681f3Smrg return pipe->create_vs_state(pipe, &state); 39277ec681f3Smrg case PIPE_SHADER_FRAGMENT: 39287ec681f3Smrg return pipe->create_fs_state(pipe, &state); 39297ec681f3Smrg default: 39307ec681f3Smrg unreachable("unsupported shader type"); 39317ec681f3Smrg } 39327ec681f3Smrg} 39337ec681f3Smrg 39347ec681f3Smrg 39357ec681f3Smrgvoid * 39367ec681f3Smrgnine_create_shader_with_so_and_destroy(struct ureg_program *p, 39377ec681f3Smrg struct pipe_context *pipe, 39387ec681f3Smrg const struct pipe_stream_output_info *so) 39397ec681f3Smrg{ 39407ec681f3Smrg void *result = nine_ureg_create_shader(p, pipe, so); 39417ec681f3Smrg ureg_destroy(p); 39427ec681f3Smrg return result; 39437ec681f3Smrg} 39447ec681f3Smrg 39457ec681f3SmrgHRESULT 39467ec681f3Smrgnine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe) 39477ec681f3Smrg{ 39487ec681f3Smrg struct shader_translator *tx; 39497ec681f3Smrg HRESULT hr = D3D_OK; 39507ec681f3Smrg const unsigned processor = info->type; 39517ec681f3Smrg struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen; 39527ec681f3Smrg unsigned *const_ranges = NULL; 39537ec681f3Smrg 39547ec681f3Smrg user_assert(processor != ~0, D3DERR_INVALIDCALL); 39557ec681f3Smrg 39567ec681f3Smrg tx = MALLOC_STRUCT(shader_translator); 39577ec681f3Smrg if (!tx) 39587ec681f3Smrg return E_OUTOFMEMORY; 39597ec681f3Smrg 39607ec681f3Smrg if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) { 39617ec681f3Smrg hr = E_OUTOFMEMORY; 39627ec681f3Smrg goto out; 39637ec681f3Smrg } 39647ec681f3Smrg 39657ec681f3Smrg assert(IS_VS || !info->swvp_on); 39667ec681f3Smrg 39677ec681f3Smrg if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) { 39687ec681f3Smrg hr = D3DERR_INVALIDCALL; 39697ec681f3Smrg DBG("Unsupported shader version: %u.%u !\n", 39707ec681f3Smrg tx->version.major, tx->version.minor); 39717ec681f3Smrg goto out; 39727ec681f3Smrg } 39737ec681f3Smrg if (tx->processor != processor) { 39747ec681f3Smrg hr = D3DERR_INVALIDCALL; 39757ec681f3Smrg DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor); 39767ec681f3Smrg goto out; 39777ec681f3Smrg } 39787ec681f3Smrg DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS", 39797ec681f3Smrg tx->version.major, tx->version.minor); 39807ec681f3Smrg 39817ec681f3Smrg parse_shader(tx); 39827ec681f3Smrg 39837ec681f3Smrg if (tx->failure) { 39847ec681f3Smrg /* For VS shaders, we print the warning later, 39857ec681f3Smrg * we first try with swvp. */ 39867ec681f3Smrg if (IS_PS) 39877ec681f3Smrg ERR("Encountered buggy shader\n"); 39887ec681f3Smrg ureg_destroy(tx->ureg); 39897ec681f3Smrg hr = D3DERR_INVALIDCALL; 39907ec681f3Smrg goto out; 39917ec681f3Smrg } 39927ec681f3Smrg 39937ec681f3Smrg /* Recompile after compacting constant slots if possible */ 39947ec681f3Smrg if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0) { 39957ec681f3Smrg unsigned *slot_map; 39967ec681f3Smrg unsigned c; 39977ec681f3Smrg int i, j, num_ranges, prev; 39987ec681f3Smrg 39997ec681f3Smrg DBG("Recompiling shader for constant compaction\n"); 40007ec681f3Smrg ureg_destroy(tx->ureg); 40017ec681f3Smrg 40027ec681f3Smrg if (tx->num_inst_labels) 40037ec681f3Smrg FREE(tx->inst_labels); 40047ec681f3Smrg FREE(tx->lconstf); 40057ec681f3Smrg FREE(tx->regs.r); 40067ec681f3Smrg 40077ec681f3Smrg num_ranges = 0; 40087ec681f3Smrg prev = -2; 40097ec681f3Smrg for (i = 0; i < NINE_MAX_CONST_ALL; i++) { 40107ec681f3Smrg if (tx->slots_used[i]) { 40117ec681f3Smrg if (prev != i - 1) 40127ec681f3Smrg num_ranges++; 40137ec681f3Smrg prev = i; 40147ec681f3Smrg } 40157ec681f3Smrg } 40167ec681f3Smrg slot_map = MALLOC(NINE_MAX_CONST_ALL * sizeof(unsigned)); 40177ec681f3Smrg const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */ 40187ec681f3Smrg if (!slot_map || !const_ranges) { 40197ec681f3Smrg hr = E_OUTOFMEMORY; 40207ec681f3Smrg goto out; 40217ec681f3Smrg } 40227ec681f3Smrg c = 0; 40237ec681f3Smrg j = -1; 40247ec681f3Smrg prev = -2; 40257ec681f3Smrg for (i = 0; i < NINE_MAX_CONST_ALL; i++) { 40267ec681f3Smrg if (tx->slots_used[i]) { 40277ec681f3Smrg if (prev != i - 1) 40287ec681f3Smrg j++; 40297ec681f3Smrg /* Initialize first slot of the range */ 40307ec681f3Smrg if (!const_ranges[2*j+1]) 40317ec681f3Smrg const_ranges[2*j] = i; 40327ec681f3Smrg const_ranges[2*j+1]++; 40337ec681f3Smrg prev = i; 40347ec681f3Smrg slot_map[i] = c++; 40357ec681f3Smrg } 40367ec681f3Smrg } 40377ec681f3Smrg 40387ec681f3Smrg if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) { 40397ec681f3Smrg hr = E_OUTOFMEMORY; 40407ec681f3Smrg goto out; 40417ec681f3Smrg } 40427ec681f3Smrg tx->slot_map = slot_map; 40437ec681f3Smrg parse_shader(tx); 40447ec681f3Smrg assert(!tx->failure); 40457ec681f3Smrg#if !defined(NDEBUG) 40467ec681f3Smrg i = 0; 40477ec681f3Smrg j = 0; 40487ec681f3Smrg while (const_ranges[i*2+1] != 0) { 40497ec681f3Smrg j += const_ranges[i*2+1]; 40507ec681f3Smrg i++; 40517ec681f3Smrg } 40527ec681f3Smrg assert(j == tx->num_slots); 40537ec681f3Smrg#endif 40547ec681f3Smrg } 40557ec681f3Smrg 40567ec681f3Smrg /* record local constants */ 40577ec681f3Smrg if (tx->num_lconstf && tx->indirect_const_access) { 40587ec681f3Smrg struct nine_range *ranges; 40597ec681f3Smrg float *data; 40607ec681f3Smrg int *indices; 40617ec681f3Smrg unsigned i, k, n; 40627ec681f3Smrg 40637ec681f3Smrg hr = E_OUTOFMEMORY; 40647ec681f3Smrg 40657ec681f3Smrg data = MALLOC(tx->num_lconstf * 4 * sizeof(float)); 40667ec681f3Smrg if (!data) 40677ec681f3Smrg goto out; 40687ec681f3Smrg info->lconstf.data = data; 40697ec681f3Smrg 40707ec681f3Smrg indices = MALLOC(tx->num_lconstf * sizeof(indices[0])); 40717ec681f3Smrg if (!indices) 40727ec681f3Smrg goto out; 40737ec681f3Smrg 40747ec681f3Smrg /* lazy sort, num_lconstf should be small */ 40757ec681f3Smrg for (n = 0; n < tx->num_lconstf; ++n) { 40767ec681f3Smrg for (k = 0, i = 0; i < tx->num_lconstf; ++i) { 40777ec681f3Smrg if (tx->lconstf[i].idx < tx->lconstf[k].idx) 40787ec681f3Smrg k = i; 40797ec681f3Smrg } 40807ec681f3Smrg indices[n] = tx->lconstf[k].idx; 40817ec681f3Smrg memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float)); 40827ec681f3Smrg tx->lconstf[k].idx = INT_MAX; 40837ec681f3Smrg } 40847ec681f3Smrg 40857ec681f3Smrg /* count ranges */ 40867ec681f3Smrg for (n = 1, i = 1; i < tx->num_lconstf; ++i) 40877ec681f3Smrg if (indices[i] != indices[i - 1] + 1) 40887ec681f3Smrg ++n; 40897ec681f3Smrg ranges = MALLOC(n * sizeof(ranges[0])); 40907ec681f3Smrg if (!ranges) { 40917ec681f3Smrg FREE(indices); 40927ec681f3Smrg goto out; 40937ec681f3Smrg } 40947ec681f3Smrg info->lconstf.ranges = ranges; 40957ec681f3Smrg 40967ec681f3Smrg k = 0; 40977ec681f3Smrg ranges[k].bgn = indices[0]; 40987ec681f3Smrg for (i = 1; i < tx->num_lconstf; ++i) { 40997ec681f3Smrg if (indices[i] != indices[i - 1] + 1) { 41007ec681f3Smrg ranges[k].next = &ranges[k + 1]; 41017ec681f3Smrg ranges[k].end = indices[i - 1] + 1; 41027ec681f3Smrg ++k; 41037ec681f3Smrg ranges[k].bgn = indices[i]; 41047ec681f3Smrg } 41057ec681f3Smrg } 41067ec681f3Smrg ranges[k].end = indices[i - 1] + 1; 41077ec681f3Smrg ranges[k].next = NULL; 41087ec681f3Smrg assert(n == (k + 1)); 41097ec681f3Smrg 41107ec681f3Smrg FREE(indices); 41117ec681f3Smrg hr = D3D_OK; 41127ec681f3Smrg } 41137ec681f3Smrg 41147ec681f3Smrg /* r500 */ 41157ec681f3Smrg if (info->const_float_slots > device->max_vs_const_f && 41167ec681f3Smrg (info->const_int_slots || info->const_bool_slots) && 41177ec681f3Smrg !info->swvp_on) 41187ec681f3Smrg ERR("Overlapping constant slots. The shader is likely to be buggy\n"); 41197ec681f3Smrg 41207ec681f3Smrg 41217ec681f3Smrg if (tx->indirect_const_access) { /* vs only */ 41227ec681f3Smrg info->const_float_slots = device->max_vs_const_f; 41237ec681f3Smrg tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f); 41247ec681f3Smrg } 41257ec681f3Smrg 41267ec681f3Smrg if (!info->swvp_on) { 41277ec681f3Smrg info->const_used_size = sizeof(float[4]) * tx->num_slots; 41287ec681f3Smrg if (tx->num_slots) 41297ec681f3Smrg ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0); 41307ec681f3Smrg } else { 41317ec681f3Smrg ureg_DECL_constant2D(tx->ureg, 0, 4095, 0); 41327ec681f3Smrg ureg_DECL_constant2D(tx->ureg, 0, 4095, 1); 41337ec681f3Smrg ureg_DECL_constant2D(tx->ureg, 0, 2047, 2); 41347ec681f3Smrg ureg_DECL_constant2D(tx->ureg, 0, 511, 3); 41357ec681f3Smrg } 41367ec681f3Smrg 41377ec681f3Smrg if (info->process_vertices) 41387ec681f3Smrg ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */ 41397ec681f3Smrg 41407ec681f3Smrg if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_TGSI))) { 41417ec681f3Smrg const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL); 41427ec681f3Smrg tgsi_dump(toks, 0); 41437ec681f3Smrg ureg_free_tokens(toks); 41447ec681f3Smrg } 41457ec681f3Smrg 41467ec681f3Smrg if (info->process_vertices) { 41477ec681f3Smrg NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out, 41487ec681f3Smrg tx->output_info, 41497ec681f3Smrg tx->num_outputs, 41507ec681f3Smrg &(info->so)); 41517ec681f3Smrg info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so)); 41527ec681f3Smrg } else 41537ec681f3Smrg info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, NULL); 41547ec681f3Smrg if (!info->cso) { 41557ec681f3Smrg hr = D3DERR_DRIVERINTERNALERROR; 41567ec681f3Smrg FREE(info->lconstf.data); 41577ec681f3Smrg FREE(info->lconstf.ranges); 41587ec681f3Smrg goto out; 41597ec681f3Smrg } 41607ec681f3Smrg 41617ec681f3Smrg info->const_ranges = const_ranges; 41627ec681f3Smrg const_ranges = NULL; 41637ec681f3Smrg info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD); 41647ec681f3Smrgout: 41657ec681f3Smrg if (const_ranges) 41667ec681f3Smrg FREE(const_ranges); 41677ec681f3Smrg tx_dtor(tx); 41687ec681f3Smrg return hr; 41697ec681f3Smrg} 4170