17ec681f3Smrg/*
27ec681f3Smrg * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
37ec681f3Smrg * Copyright 2013 Christoph Bumiller
47ec681f3Smrg *
57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
67ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
77ec681f3Smrg * to deal in the Software without restriction, including without limitation
87ec681f3Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub
97ec681f3Smrg * license, and/or sell copies of the Software, and to permit persons to whom
107ec681f3Smrg * the Software is furnished to do so, subject to the following conditions:
117ec681f3Smrg *
127ec681f3Smrg * The above copyright notice and this permission notice (including the next
137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
147ec681f3Smrg * Software.
157ec681f3Smrg *
167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
197ec681f3Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
207ec681f3Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
217ec681f3Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
227ec681f3Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. */
237ec681f3Smrg
247ec681f3Smrg#include "nine_shader.h"
257ec681f3Smrg
267ec681f3Smrg#include "device9.h"
277ec681f3Smrg#include "nine_debug.h"
287ec681f3Smrg#include "nine_state.h"
297ec681f3Smrg#include "vertexdeclaration9.h"
307ec681f3Smrg
317ec681f3Smrg#include "util/macros.h"
327ec681f3Smrg#include "util/u_memory.h"
337ec681f3Smrg#include "util/u_inlines.h"
347ec681f3Smrg#include "pipe/p_shader_tokens.h"
357ec681f3Smrg#include "tgsi/tgsi_ureg.h"
367ec681f3Smrg#include "tgsi/tgsi_dump.h"
377ec681f3Smrg#include "nir/tgsi_to_nir.h"
387ec681f3Smrg
397ec681f3Smrg#define DBG_CHANNEL DBG_SHADER
407ec681f3Smrg
417ec681f3Smrg#define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
427ec681f3Smrg
437ec681f3Smrg
447ec681f3Smrgstruct shader_translator;
457ec681f3Smrg
467ec681f3Smrgtypedef HRESULT (*translate_instruction_func)(struct shader_translator *);
477ec681f3Smrg
487ec681f3Smrgstatic inline const char *d3dsio_to_string(unsigned opcode);
497ec681f3Smrg
507ec681f3Smrg
517ec681f3Smrg#define NINED3D_SM1_VS 0xfffe
527ec681f3Smrg#define NINED3D_SM1_PS 0xffff
537ec681f3Smrg
547ec681f3Smrg#define NINE_MAX_COND_DEPTH 64
557ec681f3Smrg#define NINE_MAX_LOOP_DEPTH 64
567ec681f3Smrg
577ec681f3Smrg#define NINED3DSP_END 0x0000ffff
587ec681f3Smrg
597ec681f3Smrg#define NINED3DSPTYPE_FLOAT4  0
607ec681f3Smrg#define NINED3DSPTYPE_INT4    1
617ec681f3Smrg#define NINED3DSPTYPE_BOOL    2
627ec681f3Smrg
637ec681f3Smrg#define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
647ec681f3Smrg
657ec681f3Smrg#define NINED3DSP_WRITEMASK_MASK  D3DSP_WRITEMASK_ALL
667ec681f3Smrg#define NINED3DSP_WRITEMASK_SHIFT 16
677ec681f3Smrg
687ec681f3Smrg#define NINED3DSHADER_INST_PREDICATED (1 << 28)
697ec681f3Smrg
707ec681f3Smrg#define NINED3DSHADER_REL_OP_GT 1
717ec681f3Smrg#define NINED3DSHADER_REL_OP_EQ 2
727ec681f3Smrg#define NINED3DSHADER_REL_OP_GE 3
737ec681f3Smrg#define NINED3DSHADER_REL_OP_LT 4
747ec681f3Smrg#define NINED3DSHADER_REL_OP_NE 5
757ec681f3Smrg#define NINED3DSHADER_REL_OP_LE 6
767ec681f3Smrg
777ec681f3Smrg#define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
787ec681f3Smrg#define NINED3DSIO_OPCODE_FLAGS_MASK  (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
797ec681f3Smrg
807ec681f3Smrg#define NINED3DSI_TEXLD_PROJECT 0x1
817ec681f3Smrg#define NINED3DSI_TEXLD_BIAS    0x2
827ec681f3Smrg
837ec681f3Smrg#define NINED3DSP_WRITEMASK_0   0x1
847ec681f3Smrg#define NINED3DSP_WRITEMASK_1   0x2
857ec681f3Smrg#define NINED3DSP_WRITEMASK_2   0x4
867ec681f3Smrg#define NINED3DSP_WRITEMASK_3   0x8
877ec681f3Smrg#define NINED3DSP_WRITEMASK_ALL 0xf
887ec681f3Smrg
897ec681f3Smrg#define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
907ec681f3Smrg
917ec681f3Smrg#define NINE_SWIZZLE4(x,y,z,w) \
927ec681f3Smrg   TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
937ec681f3Smrg
947ec681f3Smrg#define NINE_APPLY_SWIZZLE(src, s) \
957ec681f3Smrg   ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
967ec681f3Smrg
977ec681f3Smrg#define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
987ec681f3Smrg#define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
997ec681f3Smrg#define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
1007ec681f3Smrg
1017ec681f3Smrg/*
1027ec681f3Smrg * NEG     all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
1037ec681f3Smrg * BIAS    <= PS 1.4 (x-0.5)
1047ec681f3Smrg * BIASNEG <= PS 1.4 (-(x-0.5))
1057ec681f3Smrg * SIGN    <= PS 1.4 (2(x-0.5))
1067ec681f3Smrg * SIGNNEG <= PS 1.4 (-2(x-0.5))
1077ec681f3Smrg * COMP    <= PS 1.4 (1-x)
1087ec681f3Smrg * X2       = PS 1.4 (2x)
1097ec681f3Smrg * X2NEG    = PS 1.4 (-2x)
1107ec681f3Smrg * DZ      <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
1117ec681f3Smrg * DW      <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
1127ec681f3Smrg * ABS     >= SM 3.0 (abs(x))
1137ec681f3Smrg * ABSNEG  >= SM 3.0 (-abs(x))
1147ec681f3Smrg * NOT     >= SM 2.0 pedication only
1157ec681f3Smrg */
1167ec681f3Smrg#define NINED3DSPSM_NONE    (D3DSPSM_NONE    >> D3DSP_SRCMOD_SHIFT)
1177ec681f3Smrg#define NINED3DSPSM_NEG     (D3DSPSM_NEG     >> D3DSP_SRCMOD_SHIFT)
1187ec681f3Smrg#define NINED3DSPSM_BIAS    (D3DSPSM_BIAS    >> D3DSP_SRCMOD_SHIFT)
1197ec681f3Smrg#define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
1207ec681f3Smrg#define NINED3DSPSM_SIGN    (D3DSPSM_SIGN    >> D3DSP_SRCMOD_SHIFT)
1217ec681f3Smrg#define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
1227ec681f3Smrg#define NINED3DSPSM_COMP    (D3DSPSM_COMP    >> D3DSP_SRCMOD_SHIFT)
1237ec681f3Smrg#define NINED3DSPSM_X2      (D3DSPSM_X2      >> D3DSP_SRCMOD_SHIFT)
1247ec681f3Smrg#define NINED3DSPSM_X2NEG   (D3DSPSM_X2NEG   >> D3DSP_SRCMOD_SHIFT)
1257ec681f3Smrg#define NINED3DSPSM_DZ      (D3DSPSM_DZ      >> D3DSP_SRCMOD_SHIFT)
1267ec681f3Smrg#define NINED3DSPSM_DW      (D3DSPSM_DW      >> D3DSP_SRCMOD_SHIFT)
1277ec681f3Smrg#define NINED3DSPSM_ABS     (D3DSPSM_ABS     >> D3DSP_SRCMOD_SHIFT)
1287ec681f3Smrg#define NINED3DSPSM_ABSNEG  (D3DSPSM_ABSNEG  >> D3DSP_SRCMOD_SHIFT)
1297ec681f3Smrg#define NINED3DSPSM_NOT     (D3DSPSM_NOT     >> D3DSP_SRCMOD_SHIFT)
1307ec681f3Smrg
1317ec681f3Smrgstatic const char *sm1_mod_str[] =
1327ec681f3Smrg{
1337ec681f3Smrg    [NINED3DSPSM_NONE] = "",
1347ec681f3Smrg    [NINED3DSPSM_NEG] = "-",
1357ec681f3Smrg    [NINED3DSPSM_BIAS] = "bias",
1367ec681f3Smrg    [NINED3DSPSM_BIASNEG] = "biasneg",
1377ec681f3Smrg    [NINED3DSPSM_SIGN] = "sign",
1387ec681f3Smrg    [NINED3DSPSM_SIGNNEG] = "signneg",
1397ec681f3Smrg    [NINED3DSPSM_COMP] = "comp",
1407ec681f3Smrg    [NINED3DSPSM_X2] = "x2",
1417ec681f3Smrg    [NINED3DSPSM_X2NEG] = "x2neg",
1427ec681f3Smrg    [NINED3DSPSM_DZ] = "dz",
1437ec681f3Smrg    [NINED3DSPSM_DW] = "dw",
1447ec681f3Smrg    [NINED3DSPSM_ABS] = "abs",
1457ec681f3Smrg    [NINED3DSPSM_ABSNEG] = "-abs",
1467ec681f3Smrg    [NINED3DSPSM_NOT] = "not"
1477ec681f3Smrg};
1487ec681f3Smrg
1497ec681f3Smrgstatic void
1507ec681f3Smrgsm1_dump_writemask(BYTE mask)
1517ec681f3Smrg{
1527ec681f3Smrg    if (mask & 1) DUMP("x"); else DUMP("_");
1537ec681f3Smrg    if (mask & 2) DUMP("y"); else DUMP("_");
1547ec681f3Smrg    if (mask & 4) DUMP("z"); else DUMP("_");
1557ec681f3Smrg    if (mask & 8) DUMP("w"); else DUMP("_");
1567ec681f3Smrg}
1577ec681f3Smrg
1587ec681f3Smrgstatic void
1597ec681f3Smrgsm1_dump_swizzle(BYTE s)
1607ec681f3Smrg{
1617ec681f3Smrg    char c[4] = { 'x', 'y', 'z', 'w' };
1627ec681f3Smrg    DUMP("%c%c%c%c",
1637ec681f3Smrg         c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
1647ec681f3Smrg}
1657ec681f3Smrg
1667ec681f3Smrgstatic const char sm1_file_char[] =
1677ec681f3Smrg{
1687ec681f3Smrg    [D3DSPR_TEMP] = 'r',
1697ec681f3Smrg    [D3DSPR_INPUT] = 'v',
1707ec681f3Smrg    [D3DSPR_CONST] = 'c',
1717ec681f3Smrg    [D3DSPR_ADDR] = 'A',
1727ec681f3Smrg    [D3DSPR_RASTOUT] = 'R',
1737ec681f3Smrg    [D3DSPR_ATTROUT] = 'D',
1747ec681f3Smrg    [D3DSPR_OUTPUT] = 'o',
1757ec681f3Smrg    [D3DSPR_CONSTINT] = 'I',
1767ec681f3Smrg    [D3DSPR_COLOROUT] = 'C',
1777ec681f3Smrg    [D3DSPR_DEPTHOUT] = 'D',
1787ec681f3Smrg    [D3DSPR_SAMPLER] = 's',
1797ec681f3Smrg    [D3DSPR_CONST2] = 'c',
1807ec681f3Smrg    [D3DSPR_CONST3] = 'c',
1817ec681f3Smrg    [D3DSPR_CONST4] = 'c',
1827ec681f3Smrg    [D3DSPR_CONSTBOOL] = 'B',
1837ec681f3Smrg    [D3DSPR_LOOP] = 'L',
1847ec681f3Smrg    [D3DSPR_TEMPFLOAT16] = 'h',
1857ec681f3Smrg    [D3DSPR_MISCTYPE] = 'M',
1867ec681f3Smrg    [D3DSPR_LABEL] = 'X',
1877ec681f3Smrg    [D3DSPR_PREDICATE] = 'p'
1887ec681f3Smrg};
1897ec681f3Smrg
1907ec681f3Smrgstatic void
1917ec681f3Smrgsm1_dump_reg(BYTE file, INT index)
1927ec681f3Smrg{
1937ec681f3Smrg    switch (file) {
1947ec681f3Smrg    case D3DSPR_LOOP:
1957ec681f3Smrg        DUMP("aL");
1967ec681f3Smrg        break;
1977ec681f3Smrg    case D3DSPR_COLOROUT:
1987ec681f3Smrg        DUMP("oC%i", index);
1997ec681f3Smrg        break;
2007ec681f3Smrg    case D3DSPR_DEPTHOUT:
2017ec681f3Smrg        DUMP("oDepth");
2027ec681f3Smrg        break;
2037ec681f3Smrg    case D3DSPR_RASTOUT:
2047ec681f3Smrg        DUMP("oRast%i", index);
2057ec681f3Smrg        break;
2067ec681f3Smrg    case D3DSPR_CONSTINT:
2077ec681f3Smrg        DUMP("iconst[%i]", index);
2087ec681f3Smrg        break;
2097ec681f3Smrg    case D3DSPR_CONSTBOOL:
2107ec681f3Smrg        DUMP("bconst[%i]", index);
2117ec681f3Smrg        break;
2127ec681f3Smrg    default:
2137ec681f3Smrg        DUMP("%c%i", sm1_file_char[file], index);
2147ec681f3Smrg        break;
2157ec681f3Smrg    }
2167ec681f3Smrg}
2177ec681f3Smrg
2187ec681f3Smrgstruct sm1_src_param
2197ec681f3Smrg{
2207ec681f3Smrg    INT idx;
2217ec681f3Smrg    struct sm1_src_param *rel;
2227ec681f3Smrg    BYTE file;
2237ec681f3Smrg    BYTE swizzle;
2247ec681f3Smrg    BYTE mod;
2257ec681f3Smrg    BYTE type;
2267ec681f3Smrg    union {
2277ec681f3Smrg        DWORD d[4];
2287ec681f3Smrg        float f[4];
2297ec681f3Smrg        int i[4];
2307ec681f3Smrg        BOOL b;
2317ec681f3Smrg    } imm;
2327ec681f3Smrg};
2337ec681f3Smrgstatic void
2347ec681f3Smrgsm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
2357ec681f3Smrg
2367ec681f3Smrgstruct sm1_dst_param
2377ec681f3Smrg{
2387ec681f3Smrg    INT idx;
2397ec681f3Smrg    struct sm1_src_param *rel;
2407ec681f3Smrg    BYTE file;
2417ec681f3Smrg    BYTE mask;
2427ec681f3Smrg    BYTE mod;
2437ec681f3Smrg    int8_t shift; /* sint4 */
2447ec681f3Smrg    BYTE type;
2457ec681f3Smrg};
2467ec681f3Smrg
2477ec681f3Smrgstatic inline void
2487ec681f3Smrgassert_replicate_swizzle(const struct ureg_src *reg)
2497ec681f3Smrg{
2507ec681f3Smrg    assert(reg->SwizzleY == reg->SwizzleX &&
2517ec681f3Smrg           reg->SwizzleZ == reg->SwizzleX &&
2527ec681f3Smrg           reg->SwizzleW == reg->SwizzleX);
2537ec681f3Smrg}
2547ec681f3Smrg
2557ec681f3Smrgstatic void
2567ec681f3Smrgsm1_dump_immediate(const struct sm1_src_param *param)
2577ec681f3Smrg{
2587ec681f3Smrg    switch (param->type) {
2597ec681f3Smrg    case NINED3DSPTYPE_FLOAT4:
2607ec681f3Smrg        DUMP("{ %f %f %f %f }",
2617ec681f3Smrg             param->imm.f[0], param->imm.f[1],
2627ec681f3Smrg             param->imm.f[2], param->imm.f[3]);
2637ec681f3Smrg        break;
2647ec681f3Smrg    case NINED3DSPTYPE_INT4:
2657ec681f3Smrg        DUMP("{ %i %i %i %i }",
2667ec681f3Smrg             param->imm.i[0], param->imm.i[1],
2677ec681f3Smrg             param->imm.i[2], param->imm.i[3]);
2687ec681f3Smrg        break;
2697ec681f3Smrg    case NINED3DSPTYPE_BOOL:
2707ec681f3Smrg        DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
2717ec681f3Smrg        break;
2727ec681f3Smrg    default:
2737ec681f3Smrg        assert(0);
2747ec681f3Smrg        break;
2757ec681f3Smrg    }
2767ec681f3Smrg}
2777ec681f3Smrg
2787ec681f3Smrgstatic void
2797ec681f3Smrgsm1_dump_src_param(const struct sm1_src_param *param)
2807ec681f3Smrg{
2817ec681f3Smrg    if (param->file == NINED3DSPR_IMMEDIATE) {
2827ec681f3Smrg        assert(!param->mod &&
2837ec681f3Smrg               !param->rel &&
2847ec681f3Smrg               param->swizzle == NINED3DSP_NOSWIZZLE);
2857ec681f3Smrg        sm1_dump_immediate(param);
2867ec681f3Smrg        return;
2877ec681f3Smrg    }
2887ec681f3Smrg
2897ec681f3Smrg    if (param->mod)
2907ec681f3Smrg        DUMP("%s(", sm1_mod_str[param->mod]);
2917ec681f3Smrg    if (param->rel) {
2927ec681f3Smrg        DUMP("%c[", sm1_file_char[param->file]);
2937ec681f3Smrg        sm1_dump_src_param(param->rel);
2947ec681f3Smrg        DUMP("+%i]", param->idx);
2957ec681f3Smrg    } else {
2967ec681f3Smrg        sm1_dump_reg(param->file, param->idx);
2977ec681f3Smrg    }
2987ec681f3Smrg    if (param->mod)
2997ec681f3Smrg       DUMP(")");
3007ec681f3Smrg    if (param->swizzle != NINED3DSP_NOSWIZZLE) {
3017ec681f3Smrg       DUMP(".");
3027ec681f3Smrg       sm1_dump_swizzle(param->swizzle);
3037ec681f3Smrg    }
3047ec681f3Smrg}
3057ec681f3Smrg
3067ec681f3Smrgstatic void
3077ec681f3Smrgsm1_dump_dst_param(const struct sm1_dst_param *param)
3087ec681f3Smrg{
3097ec681f3Smrg   if (param->mod & NINED3DSPDM_SATURATE)
3107ec681f3Smrg      DUMP("sat ");
3117ec681f3Smrg   if (param->mod & NINED3DSPDM_PARTIALP)
3127ec681f3Smrg      DUMP("pp ");
3137ec681f3Smrg   if (param->mod & NINED3DSPDM_CENTROID)
3147ec681f3Smrg      DUMP("centroid ");
3157ec681f3Smrg   if (param->shift < 0)
3167ec681f3Smrg      DUMP("/%u ", 1 << -param->shift);
3177ec681f3Smrg   if (param->shift > 0)
3187ec681f3Smrg      DUMP("*%u ", 1 << param->shift);
3197ec681f3Smrg
3207ec681f3Smrg   if (param->rel) {
3217ec681f3Smrg      DUMP("%c[", sm1_file_char[param->file]);
3227ec681f3Smrg      sm1_dump_src_param(param->rel);
3237ec681f3Smrg      DUMP("+%i]", param->idx);
3247ec681f3Smrg   } else {
3257ec681f3Smrg      sm1_dump_reg(param->file, param->idx);
3267ec681f3Smrg   }
3277ec681f3Smrg   if (param->mask != NINED3DSP_WRITEMASK_ALL) {
3287ec681f3Smrg      DUMP(".");
3297ec681f3Smrg      sm1_dump_writemask(param->mask);
3307ec681f3Smrg   }
3317ec681f3Smrg}
3327ec681f3Smrg
3337ec681f3Smrgstruct sm1_semantic
3347ec681f3Smrg{
3357ec681f3Smrg   struct sm1_dst_param reg;
3367ec681f3Smrg   BYTE sampler_type;
3377ec681f3Smrg   D3DDECLUSAGE usage;
3387ec681f3Smrg   BYTE usage_idx;
3397ec681f3Smrg};
3407ec681f3Smrg
3417ec681f3Smrgstruct sm1_op_info
3427ec681f3Smrg{
3437ec681f3Smrg    /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
3447ec681f3Smrg     * should be ignored completely */
3457ec681f3Smrg    unsigned sio;
3467ec681f3Smrg    unsigned opcode; /* TGSI_OPCODE_x */
3477ec681f3Smrg
3487ec681f3Smrg    /* versions are still set even handler is set */
3497ec681f3Smrg    struct {
3507ec681f3Smrg        unsigned min;
3517ec681f3Smrg        unsigned max;
3527ec681f3Smrg    } vert_version, frag_version;
3537ec681f3Smrg
3547ec681f3Smrg    /* number of regs parsed outside of special handler */
3557ec681f3Smrg    unsigned ndst;
3567ec681f3Smrg    unsigned nsrc;
3577ec681f3Smrg
3587ec681f3Smrg    /* some instructions don't map perfectly, so use a special handler */
3597ec681f3Smrg    translate_instruction_func handler;
3607ec681f3Smrg};
3617ec681f3Smrg
3627ec681f3Smrgstruct sm1_instruction
3637ec681f3Smrg{
3647ec681f3Smrg    D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
3657ec681f3Smrg    BYTE flags;
3667ec681f3Smrg    BOOL coissue;
3677ec681f3Smrg    BOOL predicated;
3687ec681f3Smrg    BYTE ndst;
3697ec681f3Smrg    BYTE nsrc;
3707ec681f3Smrg    struct sm1_src_param src[4];
3717ec681f3Smrg    struct sm1_src_param src_rel[4];
3727ec681f3Smrg    struct sm1_src_param pred;
3737ec681f3Smrg    struct sm1_src_param dst_rel[1];
3747ec681f3Smrg    struct sm1_dst_param dst[1];
3757ec681f3Smrg
3767ec681f3Smrg    const struct sm1_op_info *info;
3777ec681f3Smrg};
3787ec681f3Smrg
3797ec681f3Smrgstatic void
3807ec681f3Smrgsm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
3817ec681f3Smrg{
3827ec681f3Smrg    unsigned i;
3837ec681f3Smrg
3847ec681f3Smrg    /* no info stored for these: */
3857ec681f3Smrg    if (insn->opcode == D3DSIO_DCL)
3867ec681f3Smrg        return;
3877ec681f3Smrg    for (i = 0; i < indent; ++i)
3887ec681f3Smrg        DUMP("  ");
3897ec681f3Smrg
3907ec681f3Smrg    if (insn->predicated) {
3917ec681f3Smrg        DUMP("@");
3927ec681f3Smrg        sm1_dump_src_param(&insn->pred);
3937ec681f3Smrg        DUMP(" ");
3947ec681f3Smrg    }
3957ec681f3Smrg    DUMP("%s", d3dsio_to_string(insn->opcode));
3967ec681f3Smrg    if (insn->flags) {
3977ec681f3Smrg        switch (insn->opcode) {
3987ec681f3Smrg        case D3DSIO_TEX:
3997ec681f3Smrg            DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
4007ec681f3Smrg            break;
4017ec681f3Smrg        default:
4027ec681f3Smrg            DUMP("_%x", insn->flags);
4037ec681f3Smrg            break;
4047ec681f3Smrg        }
4057ec681f3Smrg    }
4067ec681f3Smrg    if (insn->coissue)
4077ec681f3Smrg        DUMP("_co");
4087ec681f3Smrg    DUMP(" ");
4097ec681f3Smrg
4107ec681f3Smrg    for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
4117ec681f3Smrg        sm1_dump_dst_param(&insn->dst[i]);
4127ec681f3Smrg        DUMP(" ");
4137ec681f3Smrg    }
4147ec681f3Smrg
4157ec681f3Smrg    for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
4167ec681f3Smrg        sm1_dump_src_param(&insn->src[i]);
4177ec681f3Smrg        DUMP(" ");
4187ec681f3Smrg    }
4197ec681f3Smrg    if (insn->opcode == D3DSIO_DEF ||
4207ec681f3Smrg        insn->opcode == D3DSIO_DEFI ||
4217ec681f3Smrg        insn->opcode == D3DSIO_DEFB)
4227ec681f3Smrg        sm1_dump_immediate(&insn->src[0]);
4237ec681f3Smrg
4247ec681f3Smrg    DUMP("\n");
4257ec681f3Smrg}
4267ec681f3Smrg
4277ec681f3Smrgstruct sm1_local_const
4287ec681f3Smrg{
4297ec681f3Smrg    INT idx;
4307ec681f3Smrg    struct ureg_src reg;
4317ec681f3Smrg    float f[4]; /* for indirect addressing of float constants */
4327ec681f3Smrg};
4337ec681f3Smrg
4347ec681f3Smrgstruct shader_translator
4357ec681f3Smrg{
4367ec681f3Smrg    const DWORD *byte_code;
4377ec681f3Smrg    const DWORD *parse;
4387ec681f3Smrg    const DWORD *parse_next;
4397ec681f3Smrg
4407ec681f3Smrg    struct ureg_program *ureg;
4417ec681f3Smrg
4427ec681f3Smrg    /* shader version */
4437ec681f3Smrg    struct {
4447ec681f3Smrg        BYTE major;
4457ec681f3Smrg        BYTE minor;
4467ec681f3Smrg    } version;
4477ec681f3Smrg    unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
4487ec681f3Smrg    unsigned num_constf_allowed;
4497ec681f3Smrg    unsigned num_consti_allowed;
4507ec681f3Smrg    unsigned num_constb_allowed;
4517ec681f3Smrg
4527ec681f3Smrg    boolean native_integers;
4537ec681f3Smrg    boolean inline_subroutines;
4547ec681f3Smrg    boolean want_texcoord;
4557ec681f3Smrg    boolean shift_wpos;
4567ec681f3Smrg    boolean wpos_is_sysval;
4577ec681f3Smrg    boolean face_is_sysval_integer;
4587ec681f3Smrg    boolean mul_zero_wins;
4597ec681f3Smrg    unsigned texcoord_sn;
4607ec681f3Smrg
4617ec681f3Smrg    struct sm1_instruction insn; /* current instruction */
4627ec681f3Smrg
4637ec681f3Smrg    struct {
4647ec681f3Smrg        struct ureg_dst *r;
4657ec681f3Smrg        struct ureg_dst oPos;
4667ec681f3Smrg        struct ureg_dst oPos_out; /* the real output when doing streamout */
4677ec681f3Smrg        struct ureg_dst oFog;
4687ec681f3Smrg        struct ureg_dst oPts;
4697ec681f3Smrg        struct ureg_dst oCol[4];
4707ec681f3Smrg        struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
4717ec681f3Smrg        struct ureg_dst oDepth;
4727ec681f3Smrg        struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
4737ec681f3Smrg        struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
4747ec681f3Smrg        struct ureg_src vPos;
4757ec681f3Smrg        struct ureg_src vFace;
4767ec681f3Smrg        struct ureg_src s;
4777ec681f3Smrg        struct ureg_dst p;
4787ec681f3Smrg        struct ureg_dst address;
4797ec681f3Smrg        struct ureg_dst a0;
4807ec681f3Smrg        struct ureg_dst predicate;
4817ec681f3Smrg        struct ureg_dst predicate_tmp;
4827ec681f3Smrg        struct ureg_dst predicate_dst;
4837ec681f3Smrg        struct ureg_dst tS[8]; /* texture stage registers */
4847ec681f3Smrg        struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
4857ec681f3Smrg        struct ureg_dst t[8]; /* scratch TEMPs */
4867ec681f3Smrg        struct ureg_src vC[2]; /* PS color in */
4877ec681f3Smrg        struct ureg_src vT[8]; /* PS texcoord in */
4887ec681f3Smrg        struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
4897ec681f3Smrg    } regs;
4907ec681f3Smrg    unsigned num_temp; /* ARRAY_SIZE(regs.r) */
4917ec681f3Smrg    unsigned num_scratch;
4927ec681f3Smrg    unsigned loop_depth;
4937ec681f3Smrg    unsigned loop_depth_max;
4947ec681f3Smrg    unsigned cond_depth;
4957ec681f3Smrg    unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
4967ec681f3Smrg    unsigned cond_labels[NINE_MAX_COND_DEPTH];
4977ec681f3Smrg    boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
4987ec681f3Smrg    boolean predicated_activated;
4997ec681f3Smrg
5007ec681f3Smrg    unsigned *inst_labels; /* LABEL op */
5017ec681f3Smrg    unsigned num_inst_labels;
5027ec681f3Smrg
5037ec681f3Smrg    unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
5047ec681f3Smrg
5057ec681f3Smrg    struct sm1_local_const *lconstf;
5067ec681f3Smrg    unsigned num_lconstf;
5077ec681f3Smrg    struct sm1_local_const *lconsti;
5087ec681f3Smrg    unsigned num_lconsti;
5097ec681f3Smrg    struct sm1_local_const *lconstb;
5107ec681f3Smrg    unsigned num_lconstb;
5117ec681f3Smrg
5127ec681f3Smrg    boolean slots_used[NINE_MAX_CONST_ALL];
5137ec681f3Smrg    unsigned *slot_map;
5147ec681f3Smrg    unsigned num_slots;
5157ec681f3Smrg
5167ec681f3Smrg    boolean indirect_const_access;
5177ec681f3Smrg    boolean failure;
5187ec681f3Smrg
5197ec681f3Smrg    struct nine_vs_output_info output_info[16];
5207ec681f3Smrg    int num_outputs;
5217ec681f3Smrg
5227ec681f3Smrg    struct nine_shader_info *info;
5237ec681f3Smrg
5247ec681f3Smrg    int16_t op_info_map[D3DSIO_BREAKP + 1];
5257ec681f3Smrg};
5267ec681f3Smrg
5277ec681f3Smrg#define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
5287ec681f3Smrg#define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
5297ec681f3Smrg
5307ec681f3Smrg#define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
5317ec681f3Smrg
5327ec681f3Smrgstatic void
5337ec681f3Smrgsm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
5347ec681f3Smrg
5357ec681f3Smrgstatic void
5367ec681f3Smrgsm1_instruction_check(const struct sm1_instruction *insn)
5377ec681f3Smrg{
5387ec681f3Smrg    if (insn->opcode == D3DSIO_CRS)
5397ec681f3Smrg    {
5407ec681f3Smrg        if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
5417ec681f3Smrg        {
5427ec681f3Smrg            DBG("CRS.mask.w\n");
5437ec681f3Smrg        }
5447ec681f3Smrg    }
5457ec681f3Smrg}
5467ec681f3Smrg
5477ec681f3Smrgstatic void
5487ec681f3Smrgnine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
5497ec681f3Smrg                    int mask, int output_index)
5507ec681f3Smrg{
5517ec681f3Smrg    tx->output_info[tx->num_outputs].output_semantic = Usage;
5527ec681f3Smrg    tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
5537ec681f3Smrg    tx->output_info[tx->num_outputs].mask = mask;
5547ec681f3Smrg    tx->output_info[tx->num_outputs].output_index = output_index;
5557ec681f3Smrg    tx->num_outputs++;
5567ec681f3Smrg}
5577ec681f3Smrg
5587ec681f3Smrgstatic struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx)
5597ec681f3Smrg{
5607ec681f3Smrg    struct ureg_src src;
5617ec681f3Smrg
5627ec681f3Smrg    if (tx->slot_map)
5637ec681f3Smrg        idx = tx->slot_map[idx];
5647ec681f3Smrg    /* vswp constant handling: we use two buffers
5657ec681f3Smrg     * to fit all the float constants. The special handling
5667ec681f3Smrg     * doesn't need to be elsewhere, because all the instructions
5677ec681f3Smrg     * accessing the constants directly are VS1, and swvp
5687ec681f3Smrg     * is VS >= 2 */
5697ec681f3Smrg    if (tx->info->swvp_on && idx >= 4096) {
5707ec681f3Smrg        /* TODO: swvp rel is broken if many constants are used */
5717ec681f3Smrg        src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096);
5727ec681f3Smrg        src = ureg_src_dimension(src, 1);
5737ec681f3Smrg    } else {
5747ec681f3Smrg        src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
5757ec681f3Smrg        src = ureg_src_dimension(src, 0);
5767ec681f3Smrg    }
5777ec681f3Smrg
5787ec681f3Smrg    if (!tx->info->swvp_on)
5797ec681f3Smrg        tx->slots_used[idx] = TRUE;
5807ec681f3Smrg    if (tx->info->const_float_slots < (idx + 1))
5817ec681f3Smrg        tx->info->const_float_slots = idx + 1;
5827ec681f3Smrg    if (tx->num_slots < (idx + 1))
5837ec681f3Smrg        tx->num_slots = idx + 1;
5847ec681f3Smrg
5857ec681f3Smrg    return src;
5867ec681f3Smrg}
5877ec681f3Smrg
5887ec681f3Smrgstatic struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx)
5897ec681f3Smrg{
5907ec681f3Smrg    struct ureg_src src;
5917ec681f3Smrg
5927ec681f3Smrg    if (tx->info->swvp_on) {
5937ec681f3Smrg        src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
5947ec681f3Smrg        src = ureg_src_dimension(src, 2);
5957ec681f3Smrg    } else {
5967ec681f3Smrg        unsigned slot_idx = tx->info->const_i_base + idx;
5977ec681f3Smrg        if (tx->slot_map)
5987ec681f3Smrg            slot_idx = tx->slot_map[slot_idx];
5997ec681f3Smrg        src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
6007ec681f3Smrg        src = ureg_src_dimension(src, 0);
6017ec681f3Smrg        tx->slots_used[slot_idx] = TRUE;
6027ec681f3Smrg        tx->info->int_slots_used[idx] = TRUE;
6037ec681f3Smrg        if (tx->num_slots < (slot_idx + 1))
6047ec681f3Smrg            tx->num_slots = slot_idx + 1;
6057ec681f3Smrg    }
6067ec681f3Smrg
6077ec681f3Smrg    if (tx->info->const_int_slots < (idx + 1))
6087ec681f3Smrg        tx->info->const_int_slots = idx + 1;
6097ec681f3Smrg
6107ec681f3Smrg    return src;
6117ec681f3Smrg}
6127ec681f3Smrg
6137ec681f3Smrgstatic struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx)
6147ec681f3Smrg{
6157ec681f3Smrg    struct ureg_src src;
6167ec681f3Smrg
6177ec681f3Smrg    char r = idx / 4;
6187ec681f3Smrg    char s = idx & 3;
6197ec681f3Smrg
6207ec681f3Smrg    if (tx->info->swvp_on) {
6217ec681f3Smrg        src = ureg_src_register(TGSI_FILE_CONSTANT, r);
6227ec681f3Smrg        src = ureg_src_dimension(src, 3);
6237ec681f3Smrg    } else {
6247ec681f3Smrg        unsigned slot_idx = tx->info->const_b_base + r;
6257ec681f3Smrg        if (tx->slot_map)
6267ec681f3Smrg            slot_idx = tx->slot_map[slot_idx];
6277ec681f3Smrg        src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
6287ec681f3Smrg        src = ureg_src_dimension(src, 0);
6297ec681f3Smrg        tx->slots_used[slot_idx] = TRUE;
6307ec681f3Smrg        tx->info->bool_slots_used[idx] = TRUE;
6317ec681f3Smrg        if (tx->num_slots < (slot_idx + 1))
6327ec681f3Smrg            tx->num_slots = slot_idx + 1;
6337ec681f3Smrg    }
6347ec681f3Smrg    src = ureg_swizzle(src, s, s, s, s);
6357ec681f3Smrg
6367ec681f3Smrg    if (tx->info->const_bool_slots < (idx + 1))
6377ec681f3Smrg        tx->info->const_bool_slots = idx + 1;
6387ec681f3Smrg
6397ec681f3Smrg    return src;
6407ec681f3Smrg}
6417ec681f3Smrg
6427ec681f3Smrgstatic boolean
6437ec681f3Smrgtx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
6447ec681f3Smrg{
6457ec681f3Smrg   INT i;
6467ec681f3Smrg
6477ec681f3Smrg   if (index < 0 || index >= tx->num_constf_allowed) {
6487ec681f3Smrg       tx->failure = TRUE;
6497ec681f3Smrg       return FALSE;
6507ec681f3Smrg   }
6517ec681f3Smrg   for (i = 0; i < tx->num_lconstf; ++i) {
6527ec681f3Smrg      if (tx->lconstf[i].idx == index) {
6537ec681f3Smrg         *src = tx->lconstf[i].reg;
6547ec681f3Smrg         return TRUE;
6557ec681f3Smrg      }
6567ec681f3Smrg   }
6577ec681f3Smrg   return FALSE;
6587ec681f3Smrg}
6597ec681f3Smrgstatic boolean
6607ec681f3Smrgtx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
6617ec681f3Smrg{
6627ec681f3Smrg   int i;
6637ec681f3Smrg
6647ec681f3Smrg   if (index < 0 || index >= tx->num_consti_allowed) {
6657ec681f3Smrg       tx->failure = TRUE;
6667ec681f3Smrg       return FALSE;
6677ec681f3Smrg   }
6687ec681f3Smrg   for (i = 0; i < tx->num_lconsti; ++i) {
6697ec681f3Smrg      if (tx->lconsti[i].idx == index) {
6707ec681f3Smrg         *src = tx->lconsti[i].reg;
6717ec681f3Smrg         return TRUE;
6727ec681f3Smrg      }
6737ec681f3Smrg   }
6747ec681f3Smrg   return FALSE;
6757ec681f3Smrg}
6767ec681f3Smrgstatic boolean
6777ec681f3Smrgtx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
6787ec681f3Smrg{
6797ec681f3Smrg   int i;
6807ec681f3Smrg
6817ec681f3Smrg   if (index < 0 || index >= tx->num_constb_allowed) {
6827ec681f3Smrg       tx->failure = TRUE;
6837ec681f3Smrg       return FALSE;
6847ec681f3Smrg   }
6857ec681f3Smrg   for (i = 0; i < tx->num_lconstb; ++i) {
6867ec681f3Smrg      if (tx->lconstb[i].idx == index) {
6877ec681f3Smrg         *src = tx->lconstb[i].reg;
6887ec681f3Smrg         return TRUE;
6897ec681f3Smrg      }
6907ec681f3Smrg   }
6917ec681f3Smrg   return FALSE;
6927ec681f3Smrg}
6937ec681f3Smrg
6947ec681f3Smrgstatic void
6957ec681f3Smrgtx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
6967ec681f3Smrg{
6977ec681f3Smrg    unsigned n;
6987ec681f3Smrg
6997ec681f3Smrg    FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
7007ec681f3Smrg
7017ec681f3Smrg    for (n = 0; n < tx->num_lconstf; ++n)
7027ec681f3Smrg        if (tx->lconstf[n].idx == index)
7037ec681f3Smrg            break;
7047ec681f3Smrg    if (n == tx->num_lconstf) {
7057ec681f3Smrg       if ((n % 8) == 0) {
7067ec681f3Smrg          tx->lconstf = REALLOC(tx->lconstf,
7077ec681f3Smrg                                (n + 0) * sizeof(tx->lconstf[0]),
7087ec681f3Smrg                                (n + 8) * sizeof(tx->lconstf[0]));
7097ec681f3Smrg          assert(tx->lconstf);
7107ec681f3Smrg       }
7117ec681f3Smrg       tx->num_lconstf++;
7127ec681f3Smrg    }
7137ec681f3Smrg    tx->lconstf[n].idx = index;
7147ec681f3Smrg    tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
7157ec681f3Smrg
7167ec681f3Smrg    memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
7177ec681f3Smrg}
7187ec681f3Smrgstatic void
7197ec681f3Smrgtx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
7207ec681f3Smrg{
7217ec681f3Smrg    unsigned n;
7227ec681f3Smrg
7237ec681f3Smrg    FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
7247ec681f3Smrg
7257ec681f3Smrg    for (n = 0; n < tx->num_lconsti; ++n)
7267ec681f3Smrg        if (tx->lconsti[n].idx == index)
7277ec681f3Smrg            break;
7287ec681f3Smrg    if (n == tx->num_lconsti) {
7297ec681f3Smrg       if ((n % 8) == 0) {
7307ec681f3Smrg          tx->lconsti = REALLOC(tx->lconsti,
7317ec681f3Smrg                                (n + 0) * sizeof(tx->lconsti[0]),
7327ec681f3Smrg                                (n + 8) * sizeof(tx->lconsti[0]));
7337ec681f3Smrg          assert(tx->lconsti);
7347ec681f3Smrg       }
7357ec681f3Smrg       tx->num_lconsti++;
7367ec681f3Smrg    }
7377ec681f3Smrg
7387ec681f3Smrg    tx->lconsti[n].idx = index;
7397ec681f3Smrg    tx->lconsti[n].reg = tx->native_integers ?
7407ec681f3Smrg       ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
7417ec681f3Smrg       ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
7427ec681f3Smrg}
7437ec681f3Smrgstatic void
7447ec681f3Smrgtx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
7457ec681f3Smrg{
7467ec681f3Smrg    unsigned n;
7477ec681f3Smrg
7487ec681f3Smrg    FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
7497ec681f3Smrg
7507ec681f3Smrg    for (n = 0; n < tx->num_lconstb; ++n)
7517ec681f3Smrg        if (tx->lconstb[n].idx == index)
7527ec681f3Smrg            break;
7537ec681f3Smrg    if (n == tx->num_lconstb) {
7547ec681f3Smrg       if ((n % 8) == 0) {
7557ec681f3Smrg          tx->lconstb = REALLOC(tx->lconstb,
7567ec681f3Smrg                                (n + 0) * sizeof(tx->lconstb[0]),
7577ec681f3Smrg                                (n + 8) * sizeof(tx->lconstb[0]));
7587ec681f3Smrg          assert(tx->lconstb);
7597ec681f3Smrg       }
7607ec681f3Smrg       tx->num_lconstb++;
7617ec681f3Smrg    }
7627ec681f3Smrg
7637ec681f3Smrg    tx->lconstb[n].idx = index;
7647ec681f3Smrg    tx->lconstb[n].reg = tx->native_integers ?
7657ec681f3Smrg       ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
7667ec681f3Smrg       ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
7677ec681f3Smrg}
7687ec681f3Smrg
7697ec681f3Smrgstatic inline struct ureg_dst
7707ec681f3Smrgtx_scratch(struct shader_translator *tx)
7717ec681f3Smrg{
7727ec681f3Smrg    if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
7737ec681f3Smrg        tx->failure = TRUE;
7747ec681f3Smrg        return tx->regs.t[0];
7757ec681f3Smrg    }
7767ec681f3Smrg    if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
7777ec681f3Smrg        tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
7787ec681f3Smrg    return tx->regs.t[tx->num_scratch++];
7797ec681f3Smrg}
7807ec681f3Smrg
7817ec681f3Smrgstatic inline struct ureg_dst
7827ec681f3Smrgtx_scratch_scalar(struct shader_translator *tx)
7837ec681f3Smrg{
7847ec681f3Smrg    return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
7857ec681f3Smrg}
7867ec681f3Smrg
7877ec681f3Smrgstatic inline struct ureg_src
7887ec681f3Smrgtx_src_scalar(struct ureg_dst dst)
7897ec681f3Smrg{
7907ec681f3Smrg    struct ureg_src src = ureg_src(dst);
7917ec681f3Smrg    int c = ffs(dst.WriteMask) - 1;
7927ec681f3Smrg    if (dst.WriteMask == (1 << c))
7937ec681f3Smrg        src = ureg_scalar(src, c);
7947ec681f3Smrg    return src;
7957ec681f3Smrg}
7967ec681f3Smrg
7977ec681f3Smrgstatic inline void
7987ec681f3Smrgtx_temp_alloc(struct shader_translator *tx, INT idx)
7997ec681f3Smrg{
8007ec681f3Smrg    assert(idx >= 0);
8017ec681f3Smrg    if (idx >= tx->num_temp) {
8027ec681f3Smrg       unsigned k = tx->num_temp;
8037ec681f3Smrg       unsigned n = idx + 1;
8047ec681f3Smrg       tx->regs.r = REALLOC(tx->regs.r,
8057ec681f3Smrg                            k * sizeof(tx->regs.r[0]),
8067ec681f3Smrg                            n * sizeof(tx->regs.r[0]));
8077ec681f3Smrg       for (; k < n; ++k)
8087ec681f3Smrg          tx->regs.r[k] = ureg_dst_undef();
8097ec681f3Smrg       tx->num_temp = n;
8107ec681f3Smrg    }
8117ec681f3Smrg    if (ureg_dst_is_undef(tx->regs.r[idx]))
8127ec681f3Smrg        tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
8137ec681f3Smrg}
8147ec681f3Smrg
8157ec681f3Smrgstatic inline void
8167ec681f3Smrgtx_addr_alloc(struct shader_translator *tx, INT idx)
8177ec681f3Smrg{
8187ec681f3Smrg    assert(idx == 0);
8197ec681f3Smrg    if (ureg_dst_is_undef(tx->regs.address))
8207ec681f3Smrg        tx->regs.address = ureg_DECL_address(tx->ureg);
8217ec681f3Smrg    if (ureg_dst_is_undef(tx->regs.a0))
8227ec681f3Smrg        tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
8237ec681f3Smrg}
8247ec681f3Smrg
8257ec681f3Smrgstatic inline bool
8267ec681f3SmrgTEX_if_fetch4(struct shader_translator *tx, struct ureg_dst dst,
8277ec681f3Smrg              unsigned target, struct ureg_src src0,
8287ec681f3Smrg              struct ureg_src src1, INT idx)
8297ec681f3Smrg{
8307ec681f3Smrg    struct ureg_dst tmp;
8317ec681f3Smrg    struct ureg_src src_tg4[3] = {src0, ureg_imm1f(tx->ureg, 0.f), src1};
8327ec681f3Smrg
8337ec681f3Smrg    if (!(tx->info->fetch4 & (1 << idx)))
8347ec681f3Smrg        return false;
8357ec681f3Smrg
8367ec681f3Smrg    /* TODO: needs more tests, but this feature is not much used at all */
8377ec681f3Smrg
8387ec681f3Smrg    tmp = tx_scratch(tx);
8397ec681f3Smrg    ureg_tex_insn(tx->ureg, TGSI_OPCODE_TG4, &tmp, 1, target, TGSI_RETURN_TYPE_FLOAT,
8407ec681f3Smrg                  NULL, 0, src_tg4, 3);
8417ec681f3Smrg    ureg_MOV(tx->ureg, dst, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z, X, Y, W)));
8427ec681f3Smrg    return true;
8437ec681f3Smrg}
8447ec681f3Smrg
8457ec681f3Smrg/* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
8467ec681f3Smrg * the projection should be applied on the texture. It doesn't
8477ec681f3Smrg * apply on texkill.
8487ec681f3Smrg * The doc is very imprecise here (it says the projection is done
8497ec681f3Smrg * before rasterization, thus in vs, which seems wrong since ps instructions
8507ec681f3Smrg * are affected differently)
8517ec681f3Smrg * For now we only apply to the ps TEX instruction and TEXBEM.
8527ec681f3Smrg * Perhaps some other instructions would need it */
8537ec681f3Smrgstatic inline void
8547ec681f3Smrgapply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
8557ec681f3Smrg                      struct ureg_src src, INT idx)
8567ec681f3Smrg{
8577ec681f3Smrg    struct ureg_dst tmp;
8587ec681f3Smrg    unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
8597ec681f3Smrg
8607ec681f3Smrg    /* no projection */
8617ec681f3Smrg    if (dim == 1) {
8627ec681f3Smrg        ureg_MOV(tx->ureg, dst, src);
8637ec681f3Smrg    } else {
8647ec681f3Smrg        tmp = tx_scratch_scalar(tx);
8657ec681f3Smrg        ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
8667ec681f3Smrg        ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
8677ec681f3Smrg    }
8687ec681f3Smrg}
8697ec681f3Smrg
8707ec681f3Smrgstatic inline void
8717ec681f3SmrgTEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
8727ec681f3Smrg                         unsigned target, struct ureg_src src0,
8737ec681f3Smrg                         struct ureg_src src1, INT idx)
8747ec681f3Smrg{
8757ec681f3Smrg    unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
8767ec681f3Smrg    struct ureg_dst tmp;
8777ec681f3Smrg    boolean shadow = !!(tx->info->sampler_mask_shadow & (1 << idx));
8787ec681f3Smrg
8797ec681f3Smrg    /* dim == 1: no projection
8807ec681f3Smrg     * Looks like must be disabled when it makes no
8817ec681f3Smrg     * sense according the texture dimensions
8827ec681f3Smrg     */
8837ec681f3Smrg    if (dim == 1 || (dim <= target && !shadow)) {
8847ec681f3Smrg        ureg_TEX(tx->ureg, dst, target, src0, src1);
8857ec681f3Smrg    } else if (dim == 4) {
8867ec681f3Smrg        ureg_TXP(tx->ureg, dst, target, src0, src1);
8877ec681f3Smrg    } else {
8887ec681f3Smrg        tmp = tx_scratch(tx);
8897ec681f3Smrg        apply_ps1x_projection(tx, tmp, src0, idx);
8907ec681f3Smrg        ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
8917ec681f3Smrg    }
8927ec681f3Smrg}
8937ec681f3Smrg
8947ec681f3Smrgstatic inline void
8957ec681f3Smrgtx_texcoord_alloc(struct shader_translator *tx, INT idx)
8967ec681f3Smrg{
8977ec681f3Smrg    assert(IS_PS);
8987ec681f3Smrg    assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
8997ec681f3Smrg    if (ureg_src_is_undef(tx->regs.vT[idx]))
9007ec681f3Smrg       tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
9017ec681f3Smrg                                             TGSI_INTERPOLATE_PERSPECTIVE);
9027ec681f3Smrg}
9037ec681f3Smrg
9047ec681f3Smrgstatic inline unsigned *
9057ec681f3Smrgtx_bgnloop(struct shader_translator *tx)
9067ec681f3Smrg{
9077ec681f3Smrg    tx->loop_depth++;
9087ec681f3Smrg    if (tx->loop_depth_max < tx->loop_depth)
9097ec681f3Smrg        tx->loop_depth_max = tx->loop_depth;
9107ec681f3Smrg    assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
9117ec681f3Smrg    return &tx->loop_labels[tx->loop_depth - 1];
9127ec681f3Smrg}
9137ec681f3Smrg
9147ec681f3Smrgstatic inline unsigned *
9157ec681f3Smrgtx_endloop(struct shader_translator *tx)
9167ec681f3Smrg{
9177ec681f3Smrg    assert(tx->loop_depth);
9187ec681f3Smrg    tx->loop_depth--;
9197ec681f3Smrg    ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
9207ec681f3Smrg                     ureg_get_instruction_number(tx->ureg));
9217ec681f3Smrg    return &tx->loop_labels[tx->loop_depth];
9227ec681f3Smrg}
9237ec681f3Smrg
9247ec681f3Smrgstatic struct ureg_dst
9257ec681f3Smrgtx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
9267ec681f3Smrg{
9277ec681f3Smrg    const unsigned l = tx->loop_depth - 1;
9287ec681f3Smrg
9297ec681f3Smrg    if (!tx->loop_depth)
9307ec681f3Smrg    {
9317ec681f3Smrg        DBG("loop counter requested outside of loop\n");
9327ec681f3Smrg        return ureg_dst_undef();
9337ec681f3Smrg    }
9347ec681f3Smrg
9357ec681f3Smrg    if (ureg_dst_is_undef(tx->regs.rL[l])) {
9367ec681f3Smrg        /* loop or rep ctr creation */
9377ec681f3Smrg        tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
9387ec681f3Smrg        tx->loop_or_rep[l] = loop_or_rep;
9397ec681f3Smrg    }
9407ec681f3Smrg    /* loop - rep - endloop - endrep not allowed */
9417ec681f3Smrg    assert(tx->loop_or_rep[l] == loop_or_rep);
9427ec681f3Smrg
9437ec681f3Smrg    return tx->regs.rL[l];
9447ec681f3Smrg}
9457ec681f3Smrg
9467ec681f3Smrgstatic struct ureg_src
9477ec681f3Smrgtx_get_loopal(struct shader_translator *tx)
9487ec681f3Smrg{
9497ec681f3Smrg    int loop_level = tx->loop_depth - 1;
9507ec681f3Smrg
9517ec681f3Smrg    while (loop_level >= 0) {
9527ec681f3Smrg        /* handle loop - rep - endrep - endloop case */
9537ec681f3Smrg        if (tx->loop_or_rep[loop_level])
9547ec681f3Smrg            /* the value is in the loop counter y component (nine implementation) */
9557ec681f3Smrg            return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
9567ec681f3Smrg        loop_level--;
9577ec681f3Smrg    }
9587ec681f3Smrg
9597ec681f3Smrg    DBG("aL counter requested outside of loop\n");
9607ec681f3Smrg    return ureg_src_undef();
9617ec681f3Smrg}
9627ec681f3Smrg
9637ec681f3Smrgstatic inline unsigned *
9647ec681f3Smrgtx_cond(struct shader_translator *tx)
9657ec681f3Smrg{
9667ec681f3Smrg   assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
9677ec681f3Smrg   tx->cond_depth++;
9687ec681f3Smrg   return &tx->cond_labels[tx->cond_depth - 1];
9697ec681f3Smrg}
9707ec681f3Smrg
9717ec681f3Smrgstatic inline unsigned *
9727ec681f3Smrgtx_elsecond(struct shader_translator *tx)
9737ec681f3Smrg{
9747ec681f3Smrg   assert(tx->cond_depth);
9757ec681f3Smrg   return &tx->cond_labels[tx->cond_depth - 1];
9767ec681f3Smrg}
9777ec681f3Smrg
9787ec681f3Smrgstatic inline void
9797ec681f3Smrgtx_endcond(struct shader_translator *tx)
9807ec681f3Smrg{
9817ec681f3Smrg   assert(tx->cond_depth);
9827ec681f3Smrg   tx->cond_depth--;
9837ec681f3Smrg   ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
9847ec681f3Smrg                    ureg_get_instruction_number(tx->ureg));
9857ec681f3Smrg}
9867ec681f3Smrg
9877ec681f3Smrgstatic inline struct ureg_dst
9887ec681f3Smrgnine_ureg_dst_register(unsigned file, int index)
9897ec681f3Smrg{
9907ec681f3Smrg    return ureg_dst(ureg_src_register(file, index));
9917ec681f3Smrg}
9927ec681f3Smrg
9937ec681f3Smrgstatic inline struct ureg_src
9947ec681f3Smrgnine_get_position_input(struct shader_translator *tx)
9957ec681f3Smrg{
9967ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
9977ec681f3Smrg
9987ec681f3Smrg    if (tx->wpos_is_sysval)
9997ec681f3Smrg        return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
10007ec681f3Smrg    else
10017ec681f3Smrg        return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
10027ec681f3Smrg                                  0, TGSI_INTERPOLATE_LINEAR);
10037ec681f3Smrg}
10047ec681f3Smrg
10057ec681f3Smrgstatic struct ureg_src
10067ec681f3Smrgtx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
10077ec681f3Smrg{
10087ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
10097ec681f3Smrg    struct ureg_src src;
10107ec681f3Smrg    struct ureg_dst tmp;
10117ec681f3Smrg
10127ec681f3Smrg    assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) ||
10137ec681f3Smrg        (param->file == D3DSPR_INPUT && tx->version.major == 3));
10147ec681f3Smrg
10157ec681f3Smrg    switch (param->file)
10167ec681f3Smrg    {
10177ec681f3Smrg    case D3DSPR_TEMP:
10187ec681f3Smrg        tx_temp_alloc(tx, param->idx);
10197ec681f3Smrg        src = ureg_src(tx->regs.r[param->idx]);
10207ec681f3Smrg        break;
10217ec681f3Smrg /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
10227ec681f3Smrg    case D3DSPR_ADDR:
10237ec681f3Smrg        if (IS_VS) {
10247ec681f3Smrg            assert(param->idx == 0);
10257ec681f3Smrg            /* the address register (vs only) must be
10267ec681f3Smrg             * assigned before use */
10277ec681f3Smrg            assert(!ureg_dst_is_undef(tx->regs.a0));
10287ec681f3Smrg            /* Round to lowest for vs1.1 (contrary to the doc), else
10297ec681f3Smrg             * round to nearest */
10307ec681f3Smrg            if (tx->version.major < 2 && tx->version.minor < 2)
10317ec681f3Smrg                ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
10327ec681f3Smrg            else
10337ec681f3Smrg                ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
10347ec681f3Smrg            src = ureg_src(tx->regs.address);
10357ec681f3Smrg        } else {
10367ec681f3Smrg            if (tx->version.major < 2 && tx->version.minor < 4) {
10377ec681f3Smrg                /* no subroutines, so should be defined */
10387ec681f3Smrg                src = ureg_src(tx->regs.tS[param->idx]);
10397ec681f3Smrg            } else {
10407ec681f3Smrg                tx_texcoord_alloc(tx, param->idx);
10417ec681f3Smrg                src = tx->regs.vT[param->idx];
10427ec681f3Smrg            }
10437ec681f3Smrg        }
10447ec681f3Smrg        break;
10457ec681f3Smrg    case D3DSPR_INPUT:
10467ec681f3Smrg        if (IS_VS) {
10477ec681f3Smrg            src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
10487ec681f3Smrg        } else {
10497ec681f3Smrg            if (tx->version.major < 3) {
10507ec681f3Smrg                src = ureg_DECL_fs_input_centroid(
10517ec681f3Smrg                    ureg, TGSI_SEMANTIC_COLOR, param->idx,
10527ec681f3Smrg                    TGSI_INTERPOLATE_COLOR,
10537ec681f3Smrg                    tx->info->force_color_in_centroid ?
10547ec681f3Smrg                      TGSI_INTERPOLATE_LOC_CENTROID : 0,
10557ec681f3Smrg                    0, 1);
10567ec681f3Smrg            } else {
10577ec681f3Smrg                if(param->rel) {
10587ec681f3Smrg                    /* Copy all inputs (non consecutive)
10597ec681f3Smrg                     * to temp array (consecutive).
10607ec681f3Smrg                     * This is not good for performance.
10617ec681f3Smrg                     * A better way would be to have inputs
10627ec681f3Smrg                     * consecutive (would need implement alternative
10637ec681f3Smrg                     * way to match vs outputs and ps inputs).
10647ec681f3Smrg                     * However even with the better way, the temp array
10657ec681f3Smrg                     * copy would need to be used if some inputs
10667ec681f3Smrg                     * are not GENERIC or if they have different
10677ec681f3Smrg                     * interpolation flag. */
10687ec681f3Smrg                    if (ureg_src_is_undef(tx->regs.v_consecutive)) {
10697ec681f3Smrg                        int i;
10707ec681f3Smrg                        tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
10717ec681f3Smrg                        for (i = 0; i < 10; i++) {
10727ec681f3Smrg                            if (!ureg_src_is_undef(tx->regs.v[i]))
10737ec681f3Smrg                                ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
10747ec681f3Smrg                            else
10757ec681f3Smrg                                ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
10767ec681f3Smrg                        }
10777ec681f3Smrg                    }
10787ec681f3Smrg                    src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
10797ec681f3Smrg                } else {
10807ec681f3Smrg                    assert(param->idx < ARRAY_SIZE(tx->regs.v));
10817ec681f3Smrg                    src = tx->regs.v[param->idx];
10827ec681f3Smrg                }
10837ec681f3Smrg            }
10847ec681f3Smrg        }
10857ec681f3Smrg        if (param->rel)
10867ec681f3Smrg            src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
10877ec681f3Smrg        break;
10887ec681f3Smrg    case D3DSPR_PREDICATE:
10897ec681f3Smrg        if (ureg_dst_is_undef(tx->regs.predicate)) {
10907ec681f3Smrg            /* Forbidden to use the predicate register before being set */
10917ec681f3Smrg            tx->failure = TRUE;
10927ec681f3Smrg            tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
10937ec681f3Smrg        }
10947ec681f3Smrg        src = ureg_src(tx->regs.predicate);
10957ec681f3Smrg        break;
10967ec681f3Smrg    case D3DSPR_SAMPLER:
10977ec681f3Smrg        assert(param->mod == NINED3DSPSM_NONE);
10987ec681f3Smrg        /* assert(param->swizzle == NINED3DSP_NOSWIZZLE); Passed by wine tests */
10997ec681f3Smrg        src = ureg_DECL_sampler(ureg, param->idx);
11007ec681f3Smrg        break;
11017ec681f3Smrg    case D3DSPR_CONST:
11027ec681f3Smrg        if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
11037ec681f3Smrg            src = nine_float_constant_src(tx, param->idx);
11047ec681f3Smrg            if (param->rel) {
11057ec681f3Smrg                tx->indirect_const_access = TRUE;
11067ec681f3Smrg                src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
11077ec681f3Smrg            }
11087ec681f3Smrg        }
11097ec681f3Smrg        if (!IS_VS && tx->version.major < 2) {
11107ec681f3Smrg            /* ps 1.X clamps constants */
11117ec681f3Smrg            tmp = tx_scratch(tx);
11127ec681f3Smrg            ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
11137ec681f3Smrg            ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
11147ec681f3Smrg            src = ureg_src(tmp);
11157ec681f3Smrg        }
11167ec681f3Smrg        break;
11177ec681f3Smrg    case D3DSPR_CONST2:
11187ec681f3Smrg    case D3DSPR_CONST3:
11197ec681f3Smrg    case D3DSPR_CONST4:
11207ec681f3Smrg        DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
11217ec681f3Smrg        assert(!"CONST2/3/4");
11227ec681f3Smrg        src = ureg_imm1f(ureg, 0.0f);
11237ec681f3Smrg        break;
11247ec681f3Smrg    case D3DSPR_CONSTINT:
11257ec681f3Smrg        /* relative adressing only possible for float constants in vs */
11267ec681f3Smrg        if (!tx_lconsti(tx, &src, param->idx))
11277ec681f3Smrg            src = nine_integer_constant_src(tx, param->idx);
11287ec681f3Smrg        break;
11297ec681f3Smrg    case D3DSPR_CONSTBOOL:
11307ec681f3Smrg        if (!tx_lconstb(tx, &src, param->idx))
11317ec681f3Smrg            src = nine_boolean_constant_src(tx, param->idx);
11327ec681f3Smrg        break;
11337ec681f3Smrg    case D3DSPR_LOOP:
11347ec681f3Smrg        if (ureg_dst_is_undef(tx->regs.address))
11357ec681f3Smrg            tx->regs.address = ureg_DECL_address(ureg);
11367ec681f3Smrg        if (!tx->native_integers)
11377ec681f3Smrg            ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
11387ec681f3Smrg        else
11397ec681f3Smrg            ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
11407ec681f3Smrg        src = ureg_src(tx->regs.address);
11417ec681f3Smrg        break;
11427ec681f3Smrg    case D3DSPR_MISCTYPE:
11437ec681f3Smrg        switch (param->idx) {
11447ec681f3Smrg        case D3DSMO_POSITION:
11457ec681f3Smrg           if (ureg_src_is_undef(tx->regs.vPos))
11467ec681f3Smrg              tx->regs.vPos = nine_get_position_input(tx);
11477ec681f3Smrg           if (tx->shift_wpos) {
11487ec681f3Smrg               /* TODO: do this only once */
11497ec681f3Smrg               struct ureg_dst wpos = tx_scratch(tx);
11507ec681f3Smrg               ureg_ADD(ureg, wpos, tx->regs.vPos,
11517ec681f3Smrg                        ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
11527ec681f3Smrg               src = ureg_src(wpos);
11537ec681f3Smrg           } else {
11547ec681f3Smrg               src = tx->regs.vPos;
11557ec681f3Smrg           }
11567ec681f3Smrg           break;
11577ec681f3Smrg        case D3DSMO_FACE:
11587ec681f3Smrg           if (ureg_src_is_undef(tx->regs.vFace)) {
11597ec681f3Smrg               if (tx->face_is_sysval_integer) {
11607ec681f3Smrg                   tmp = ureg_DECL_temporary(ureg);
11617ec681f3Smrg                   tx->regs.vFace =
11627ec681f3Smrg                       ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
11637ec681f3Smrg
11647ec681f3Smrg                   /* convert bool to float */
11657ec681f3Smrg                   ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
11667ec681f3Smrg                             ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
11677ec681f3Smrg                   tx->regs.vFace = ureg_src(tmp);
11687ec681f3Smrg               } else {
11697ec681f3Smrg                   tx->regs.vFace = ureg_DECL_fs_input(ureg,
11707ec681f3Smrg                                                       TGSI_SEMANTIC_FACE, 0,
11717ec681f3Smrg                                                       TGSI_INTERPOLATE_CONSTANT);
11727ec681f3Smrg               }
11737ec681f3Smrg               tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
11747ec681f3Smrg           }
11757ec681f3Smrg           src = tx->regs.vFace;
11767ec681f3Smrg           break;
11777ec681f3Smrg        default:
11787ec681f3Smrg            assert(!"invalid src D3DSMO");
11797ec681f3Smrg            break;
11807ec681f3Smrg        }
11817ec681f3Smrg        break;
11827ec681f3Smrg    case D3DSPR_TEMPFLOAT16:
11837ec681f3Smrg        break;
11847ec681f3Smrg    default:
11857ec681f3Smrg        assert(!"invalid src D3DSPR");
11867ec681f3Smrg    }
11877ec681f3Smrg
11887ec681f3Smrg    switch (param->mod) {
11897ec681f3Smrg    case NINED3DSPSM_DW:
11907ec681f3Smrg        tmp = tx_scratch(tx);
11917ec681f3Smrg        /* NOTE: app is not allowed to read w with this modifier */
11927ec681f3Smrg        ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
11937ec681f3Smrg        ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
11947ec681f3Smrg        src = ureg_src(tmp);
11957ec681f3Smrg        break;
11967ec681f3Smrg    case NINED3DSPSM_DZ:
11977ec681f3Smrg        tmp = tx_scratch(tx);
11987ec681f3Smrg        /* NOTE: app is not allowed to read z with this modifier */
11997ec681f3Smrg        ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
12007ec681f3Smrg        ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
12017ec681f3Smrg        src = ureg_src(tmp);
12027ec681f3Smrg        break;
12037ec681f3Smrg    default:
12047ec681f3Smrg        break;
12057ec681f3Smrg    }
12067ec681f3Smrg
12077ec681f3Smrg    if (param->swizzle != NINED3DSP_NOSWIZZLE && param->file != D3DSPR_SAMPLER)
12087ec681f3Smrg        src = ureg_swizzle(src,
12097ec681f3Smrg                           (param->swizzle >> 0) & 0x3,
12107ec681f3Smrg                           (param->swizzle >> 2) & 0x3,
12117ec681f3Smrg                           (param->swizzle >> 4) & 0x3,
12127ec681f3Smrg                           (param->swizzle >> 6) & 0x3);
12137ec681f3Smrg
12147ec681f3Smrg    switch (param->mod) {
12157ec681f3Smrg    case NINED3DSPSM_ABS:
12167ec681f3Smrg        src = ureg_abs(src);
12177ec681f3Smrg        break;
12187ec681f3Smrg    case NINED3DSPSM_ABSNEG:
12197ec681f3Smrg        src = ureg_negate(ureg_abs(src));
12207ec681f3Smrg        break;
12217ec681f3Smrg    case NINED3DSPSM_NEG:
12227ec681f3Smrg        src = ureg_negate(src);
12237ec681f3Smrg        break;
12247ec681f3Smrg    case NINED3DSPSM_BIAS:
12257ec681f3Smrg        tmp = tx_scratch(tx);
12267ec681f3Smrg        ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
12277ec681f3Smrg        src = ureg_src(tmp);
12287ec681f3Smrg        break;
12297ec681f3Smrg    case NINED3DSPSM_BIASNEG:
12307ec681f3Smrg        tmp = tx_scratch(tx);
12317ec681f3Smrg        ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
12327ec681f3Smrg        src = ureg_src(tmp);
12337ec681f3Smrg        break;
12347ec681f3Smrg    case NINED3DSPSM_NOT:
12357ec681f3Smrg        if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) {
12367ec681f3Smrg            tmp = tx_scratch(tx);
12377ec681f3Smrg            ureg_NOT(ureg, tmp, src);
12387ec681f3Smrg            src = ureg_src(tmp);
12397ec681f3Smrg            break;
12407ec681f3Smrg        } else { /* predicate */
12417ec681f3Smrg            tmp = tx_scratch(tx);
12427ec681f3Smrg            ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
12437ec681f3Smrg            src = ureg_src(tmp);
12447ec681f3Smrg        }
12457ec681f3Smrg        FALLTHROUGH;
12467ec681f3Smrg    case NINED3DSPSM_COMP:
12477ec681f3Smrg        tmp = tx_scratch(tx);
12487ec681f3Smrg        ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
12497ec681f3Smrg        src = ureg_src(tmp);
12507ec681f3Smrg        break;
12517ec681f3Smrg    case NINED3DSPSM_DZ:
12527ec681f3Smrg    case NINED3DSPSM_DW:
12537ec681f3Smrg        /* Already handled*/
12547ec681f3Smrg        break;
12557ec681f3Smrg    case NINED3DSPSM_SIGN:
12567ec681f3Smrg        tmp = tx_scratch(tx);
12577ec681f3Smrg        ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
12587ec681f3Smrg        src = ureg_src(tmp);
12597ec681f3Smrg        break;
12607ec681f3Smrg    case NINED3DSPSM_SIGNNEG:
12617ec681f3Smrg        tmp = tx_scratch(tx);
12627ec681f3Smrg        ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
12637ec681f3Smrg        src = ureg_src(tmp);
12647ec681f3Smrg        break;
12657ec681f3Smrg    case NINED3DSPSM_X2:
12667ec681f3Smrg        tmp = tx_scratch(tx);
12677ec681f3Smrg        ureg_ADD(ureg, tmp, src, src);
12687ec681f3Smrg        src = ureg_src(tmp);
12697ec681f3Smrg        break;
12707ec681f3Smrg    case NINED3DSPSM_X2NEG:
12717ec681f3Smrg        tmp = tx_scratch(tx);
12727ec681f3Smrg        ureg_ADD(ureg, tmp, src, src);
12737ec681f3Smrg        src = ureg_negate(ureg_src(tmp));
12747ec681f3Smrg        break;
12757ec681f3Smrg    default:
12767ec681f3Smrg        assert(param->mod == NINED3DSPSM_NONE);
12777ec681f3Smrg        break;
12787ec681f3Smrg    }
12797ec681f3Smrg
12807ec681f3Smrg    return src;
12817ec681f3Smrg}
12827ec681f3Smrg
12837ec681f3Smrgstatic struct ureg_dst
12847ec681f3Smrg_tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
12857ec681f3Smrg{
12867ec681f3Smrg    struct ureg_dst dst;
12877ec681f3Smrg
12887ec681f3Smrg    switch (param->file)
12897ec681f3Smrg    {
12907ec681f3Smrg    case D3DSPR_TEMP:
12917ec681f3Smrg        assert(!param->rel);
12927ec681f3Smrg        tx_temp_alloc(tx, param->idx);
12937ec681f3Smrg        dst = tx->regs.r[param->idx];
12947ec681f3Smrg        break;
12957ec681f3Smrg /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
12967ec681f3Smrg    case D3DSPR_ADDR:
12977ec681f3Smrg        assert(!param->rel);
12987ec681f3Smrg        if (tx->version.major < 2 && !IS_VS) {
12997ec681f3Smrg            if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
13007ec681f3Smrg                tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
13017ec681f3Smrg            dst = tx->regs.tS[param->idx];
13027ec681f3Smrg        } else
13037ec681f3Smrg        if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
13047ec681f3Smrg            tx_texcoord_alloc(tx, param->idx);
13057ec681f3Smrg            dst = ureg_dst(tx->regs.vT[param->idx]);
13067ec681f3Smrg        } else {
13077ec681f3Smrg            tx_addr_alloc(tx, param->idx);
13087ec681f3Smrg            dst = tx->regs.a0;
13097ec681f3Smrg        }
13107ec681f3Smrg        break;
13117ec681f3Smrg    case D3DSPR_RASTOUT:
13127ec681f3Smrg        assert(!param->rel);
13137ec681f3Smrg        switch (param->idx) {
13147ec681f3Smrg        case 0:
13157ec681f3Smrg            if (ureg_dst_is_undef(tx->regs.oPos))
13167ec681f3Smrg                tx->regs.oPos =
13177ec681f3Smrg                    ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
13187ec681f3Smrg            dst = tx->regs.oPos;
13197ec681f3Smrg            break;
13207ec681f3Smrg        case 1:
13217ec681f3Smrg            if (ureg_dst_is_undef(tx->regs.oFog))
13227ec681f3Smrg                tx->regs.oFog =
13237ec681f3Smrg                    ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16));
13247ec681f3Smrg            dst = tx->regs.oFog;
13257ec681f3Smrg            break;
13267ec681f3Smrg        case 2:
13277ec681f3Smrg            if (ureg_dst_is_undef(tx->regs.oPts))
13287ec681f3Smrg                tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
13297ec681f3Smrg            dst = tx->regs.oPts;
13307ec681f3Smrg            break;
13317ec681f3Smrg        default:
13327ec681f3Smrg            assert(0);
13337ec681f3Smrg            break;
13347ec681f3Smrg        }
13357ec681f3Smrg        break;
13367ec681f3Smrg /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
13377ec681f3Smrg    case D3DSPR_OUTPUT:
13387ec681f3Smrg        if (tx->version.major < 3) {
13397ec681f3Smrg            assert(!param->rel);
13407ec681f3Smrg            dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
13417ec681f3Smrg        } else {
13427ec681f3Smrg            assert(!param->rel); /* TODO */
13437ec681f3Smrg            assert(param->idx < ARRAY_SIZE(tx->regs.o));
13447ec681f3Smrg            dst = tx->regs.o[param->idx];
13457ec681f3Smrg        }
13467ec681f3Smrg        break;
13477ec681f3Smrg    case D3DSPR_ATTROUT: /* VS */
13487ec681f3Smrg    case D3DSPR_COLOROUT: /* PS */
13497ec681f3Smrg        assert(param->idx >= 0 && param->idx < 4);
13507ec681f3Smrg        assert(!param->rel);
13517ec681f3Smrg        tx->info->rt_mask |= 1 << param->idx;
13527ec681f3Smrg        if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
13537ec681f3Smrg            /* ps < 3: oCol[0] will have fog blending afterward */
13547ec681f3Smrg            if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
13557ec681f3Smrg                tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
13567ec681f3Smrg            } else {
13577ec681f3Smrg                tx->regs.oCol[param->idx] =
13587ec681f3Smrg                    ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
13597ec681f3Smrg            }
13607ec681f3Smrg        }
13617ec681f3Smrg        dst = tx->regs.oCol[param->idx];
13627ec681f3Smrg        if (IS_VS && tx->version.major < 3)
13637ec681f3Smrg            dst = ureg_saturate(dst);
13647ec681f3Smrg        break;
13657ec681f3Smrg    case D3DSPR_DEPTHOUT:
13667ec681f3Smrg        assert(!param->rel);
13677ec681f3Smrg        if (ureg_dst_is_undef(tx->regs.oDepth))
13687ec681f3Smrg           tx->regs.oDepth =
13697ec681f3Smrg              ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
13707ec681f3Smrg                                      TGSI_WRITEMASK_Z, 0, 1);
13717ec681f3Smrg        dst = tx->regs.oDepth; /* XXX: must write .z component */
13727ec681f3Smrg        break;
13737ec681f3Smrg    case D3DSPR_PREDICATE:
13747ec681f3Smrg        if (ureg_dst_is_undef(tx->regs.predicate))
13757ec681f3Smrg            tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
13767ec681f3Smrg        dst = tx->regs.predicate;
13777ec681f3Smrg        break;
13787ec681f3Smrg    case D3DSPR_TEMPFLOAT16:
13797ec681f3Smrg        DBG("unhandled D3DSPR: %u\n", param->file);
13807ec681f3Smrg        break;
13817ec681f3Smrg    default:
13827ec681f3Smrg        assert(!"invalid dst D3DSPR");
13837ec681f3Smrg        break;
13847ec681f3Smrg    }
13857ec681f3Smrg    if (param->rel)
13867ec681f3Smrg        dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
13877ec681f3Smrg
13887ec681f3Smrg    if (param->mask != NINED3DSP_WRITEMASK_ALL)
13897ec681f3Smrg        dst = ureg_writemask(dst, param->mask);
13907ec681f3Smrg    if (param->mod & NINED3DSPDM_SATURATE)
13917ec681f3Smrg        dst = ureg_saturate(dst);
13927ec681f3Smrg
13937ec681f3Smrg    if (tx->predicated_activated) {
13947ec681f3Smrg        tx->regs.predicate_dst = dst;
13957ec681f3Smrg        dst = tx->regs.predicate_tmp;
13967ec681f3Smrg    }
13977ec681f3Smrg
13987ec681f3Smrg    return dst;
13997ec681f3Smrg}
14007ec681f3Smrg
14017ec681f3Smrgstatic struct ureg_dst
14027ec681f3Smrgtx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
14037ec681f3Smrg{
14047ec681f3Smrg    if (param->shift) {
14057ec681f3Smrg        tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
14067ec681f3Smrg        return tx->regs.tdst;
14077ec681f3Smrg    }
14087ec681f3Smrg    return _tx_dst_param(tx, param);
14097ec681f3Smrg}
14107ec681f3Smrg
14117ec681f3Smrgstatic void
14127ec681f3Smrgtx_apply_dst0_modifiers(struct shader_translator *tx)
14137ec681f3Smrg{
14147ec681f3Smrg    struct ureg_dst rdst;
14157ec681f3Smrg    float f;
14167ec681f3Smrg
14177ec681f3Smrg    if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
14187ec681f3Smrg        return;
14197ec681f3Smrg    rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
14207ec681f3Smrg
14217ec681f3Smrg    assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
14227ec681f3Smrg
14237ec681f3Smrg    if (tx->insn.dst[0].shift < 0)
14247ec681f3Smrg        f = 1.0f / (1 << -tx->insn.dst[0].shift);
14257ec681f3Smrg    else
14267ec681f3Smrg        f = 1 << tx->insn.dst[0].shift;
14277ec681f3Smrg
14287ec681f3Smrg    ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
14297ec681f3Smrg}
14307ec681f3Smrg
14317ec681f3Smrgstatic struct ureg_src
14327ec681f3Smrgtx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
14337ec681f3Smrg{
14347ec681f3Smrg    struct ureg_src src;
14357ec681f3Smrg
14367ec681f3Smrg    assert(!param->shift);
14377ec681f3Smrg    assert(!(param->mod & NINED3DSPDM_SATURATE));
14387ec681f3Smrg
14397ec681f3Smrg    switch (param->file) {
14407ec681f3Smrg    case D3DSPR_INPUT:
14417ec681f3Smrg        if (IS_VS) {
14427ec681f3Smrg            src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
14437ec681f3Smrg        } else {
14447ec681f3Smrg            assert(!param->rel);
14457ec681f3Smrg            assert(param->idx < ARRAY_SIZE(tx->regs.v));
14467ec681f3Smrg            src = tx->regs.v[param->idx];
14477ec681f3Smrg        }
14487ec681f3Smrg        break;
14497ec681f3Smrg    default:
14507ec681f3Smrg        src = ureg_src(tx_dst_param(tx, param));
14517ec681f3Smrg        break;
14527ec681f3Smrg    }
14537ec681f3Smrg    if (param->rel)
14547ec681f3Smrg        src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
14557ec681f3Smrg
14567ec681f3Smrg    if (!param->mask)
14577ec681f3Smrg        WARN("mask is 0, using identity swizzle\n");
14587ec681f3Smrg
14597ec681f3Smrg    if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
14607ec681f3Smrg        char s[4];
14617ec681f3Smrg        int n;
14627ec681f3Smrg        int c;
14637ec681f3Smrg        for (n = 0, c = 0; c < 4; ++c)
14647ec681f3Smrg            if (param->mask & (1 << c))
14657ec681f3Smrg                s[n++] = c;
14667ec681f3Smrg        assert(n);
14677ec681f3Smrg        for (c = n; c < 4; ++c)
14687ec681f3Smrg            s[c] = s[n - 1];
14697ec681f3Smrg        src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
14707ec681f3Smrg    }
14717ec681f3Smrg    return src;
14727ec681f3Smrg}
14737ec681f3Smrg
14747ec681f3Smrgstatic HRESULT
14757ec681f3SmrgNineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
14767ec681f3Smrg{
14777ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
14787ec681f3Smrg    struct ureg_dst dst;
14797ec681f3Smrg    struct ureg_src src[2];
14807ec681f3Smrg    struct sm1_src_param *src_mat = &tx->insn.src[1];
14817ec681f3Smrg    unsigned i;
14827ec681f3Smrg
14837ec681f3Smrg    dst = tx_dst_param(tx, &tx->insn.dst[0]);
14847ec681f3Smrg    src[0] = tx_src_param(tx, &tx->insn.src[0]);
14857ec681f3Smrg
14867ec681f3Smrg    for (i = 0; i < n; i++)
14877ec681f3Smrg    {
14887ec681f3Smrg        const unsigned m = (1 << i);
14897ec681f3Smrg
14907ec681f3Smrg        src[1] = tx_src_param(tx, src_mat);
14917ec681f3Smrg        src_mat->idx++;
14927ec681f3Smrg
14937ec681f3Smrg        if (!(dst.WriteMask & m))
14947ec681f3Smrg            continue;
14957ec681f3Smrg
14967ec681f3Smrg        /* XXX: src == dst case ? */
14977ec681f3Smrg
14987ec681f3Smrg        switch (k) {
14997ec681f3Smrg        case 3:
15007ec681f3Smrg            ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
15017ec681f3Smrg            break;
15027ec681f3Smrg        case 4:
15037ec681f3Smrg            ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
15047ec681f3Smrg            break;
15057ec681f3Smrg        default:
15067ec681f3Smrg            DBG("invalid operation: M%ux%u\n", m, n);
15077ec681f3Smrg            break;
15087ec681f3Smrg        }
15097ec681f3Smrg    }
15107ec681f3Smrg
15117ec681f3Smrg    return D3D_OK;
15127ec681f3Smrg}
15137ec681f3Smrg
15147ec681f3Smrg#define VNOTSUPPORTED   0, 0
15157ec681f3Smrg#define V(maj, min)     (((maj) << 8) | (min))
15167ec681f3Smrg
15177ec681f3Smrgstatic inline const char *
15187ec681f3Smrgd3dsio_to_string( unsigned opcode )
15197ec681f3Smrg{
15207ec681f3Smrg    static const char *names[] = {
15217ec681f3Smrg        "NOP",
15227ec681f3Smrg        "MOV",
15237ec681f3Smrg        "ADD",
15247ec681f3Smrg        "SUB",
15257ec681f3Smrg        "MAD",
15267ec681f3Smrg        "MUL",
15277ec681f3Smrg        "RCP",
15287ec681f3Smrg        "RSQ",
15297ec681f3Smrg        "DP3",
15307ec681f3Smrg        "DP4",
15317ec681f3Smrg        "MIN",
15327ec681f3Smrg        "MAX",
15337ec681f3Smrg        "SLT",
15347ec681f3Smrg        "SGE",
15357ec681f3Smrg        "EXP",
15367ec681f3Smrg        "LOG",
15377ec681f3Smrg        "LIT",
15387ec681f3Smrg        "DST",
15397ec681f3Smrg        "LRP",
15407ec681f3Smrg        "FRC",
15417ec681f3Smrg        "M4x4",
15427ec681f3Smrg        "M4x3",
15437ec681f3Smrg        "M3x4",
15447ec681f3Smrg        "M3x3",
15457ec681f3Smrg        "M3x2",
15467ec681f3Smrg        "CALL",
15477ec681f3Smrg        "CALLNZ",
15487ec681f3Smrg        "LOOP",
15497ec681f3Smrg        "RET",
15507ec681f3Smrg        "ENDLOOP",
15517ec681f3Smrg        "LABEL",
15527ec681f3Smrg        "DCL",
15537ec681f3Smrg        "POW",
15547ec681f3Smrg        "CRS",
15557ec681f3Smrg        "SGN",
15567ec681f3Smrg        "ABS",
15577ec681f3Smrg        "NRM",
15587ec681f3Smrg        "SINCOS",
15597ec681f3Smrg        "REP",
15607ec681f3Smrg        "ENDREP",
15617ec681f3Smrg        "IF",
15627ec681f3Smrg        "IFC",
15637ec681f3Smrg        "ELSE",
15647ec681f3Smrg        "ENDIF",
15657ec681f3Smrg        "BREAK",
15667ec681f3Smrg        "BREAKC",
15677ec681f3Smrg        "MOVA",
15687ec681f3Smrg        "DEFB",
15697ec681f3Smrg        "DEFI",
15707ec681f3Smrg        NULL,
15717ec681f3Smrg        NULL,
15727ec681f3Smrg        NULL,
15737ec681f3Smrg        NULL,
15747ec681f3Smrg        NULL,
15757ec681f3Smrg        NULL,
15767ec681f3Smrg        NULL,
15777ec681f3Smrg        NULL,
15787ec681f3Smrg        NULL,
15797ec681f3Smrg        NULL,
15807ec681f3Smrg        NULL,
15817ec681f3Smrg        NULL,
15827ec681f3Smrg        NULL,
15837ec681f3Smrg        NULL,
15847ec681f3Smrg        NULL,
15857ec681f3Smrg        "TEXCOORD",
15867ec681f3Smrg        "TEXKILL",
15877ec681f3Smrg        "TEX",
15887ec681f3Smrg        "TEXBEM",
15897ec681f3Smrg        "TEXBEML",
15907ec681f3Smrg        "TEXREG2AR",
15917ec681f3Smrg        "TEXREG2GB",
15927ec681f3Smrg        "TEXM3x2PAD",
15937ec681f3Smrg        "TEXM3x2TEX",
15947ec681f3Smrg        "TEXM3x3PAD",
15957ec681f3Smrg        "TEXM3x3TEX",
15967ec681f3Smrg        NULL,
15977ec681f3Smrg        "TEXM3x3SPEC",
15987ec681f3Smrg        "TEXM3x3VSPEC",
15997ec681f3Smrg        "EXPP",
16007ec681f3Smrg        "LOGP",
16017ec681f3Smrg        "CND",
16027ec681f3Smrg        "DEF",
16037ec681f3Smrg        "TEXREG2RGB",
16047ec681f3Smrg        "TEXDP3TEX",
16057ec681f3Smrg        "TEXM3x2DEPTH",
16067ec681f3Smrg        "TEXDP3",
16077ec681f3Smrg        "TEXM3x3",
16087ec681f3Smrg        "TEXDEPTH",
16097ec681f3Smrg        "CMP",
16107ec681f3Smrg        "BEM",
16117ec681f3Smrg        "DP2ADD",
16127ec681f3Smrg        "DSX",
16137ec681f3Smrg        "DSY",
16147ec681f3Smrg        "TEXLDD",
16157ec681f3Smrg        "SETP",
16167ec681f3Smrg        "TEXLDL",
16177ec681f3Smrg        "BREAKP"
16187ec681f3Smrg    };
16197ec681f3Smrg
16207ec681f3Smrg    if (opcode < ARRAY_SIZE(names)) return names[opcode];
16217ec681f3Smrg
16227ec681f3Smrg    switch (opcode) {
16237ec681f3Smrg    case D3DSIO_PHASE: return "PHASE";
16247ec681f3Smrg    case D3DSIO_COMMENT: return "COMMENT";
16257ec681f3Smrg    case D3DSIO_END: return "END";
16267ec681f3Smrg    default:
16277ec681f3Smrg        return NULL;
16287ec681f3Smrg    }
16297ec681f3Smrg}
16307ec681f3Smrg
16317ec681f3Smrg#define NULL_INSTRUCTION            { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
16327ec681f3Smrg#define IS_VALID_INSTRUCTION(inst)  ((inst).vert_version.min | \
16337ec681f3Smrg                                     (inst).vert_version.max | \
16347ec681f3Smrg                                     (inst).frag_version.min | \
16357ec681f3Smrg                                     (inst).frag_version.max)
16367ec681f3Smrg
16377ec681f3Smrg#define SPECIAL(name) \
16387ec681f3Smrg    NineTranslateInstruction_##name
16397ec681f3Smrg
16407ec681f3Smrg#define DECL_SPECIAL(name) \
16417ec681f3Smrg    static HRESULT \
16427ec681f3Smrg    NineTranslateInstruction_##name( struct shader_translator *tx )
16437ec681f3Smrg
16447ec681f3Smrgstatic HRESULT
16457ec681f3SmrgNineTranslateInstruction_Generic(struct shader_translator *);
16467ec681f3Smrg
16477ec681f3SmrgDECL_SPECIAL(NOP)
16487ec681f3Smrg{
16497ec681f3Smrg    /* Nothing to do. NOP was used to avoid hangs
16507ec681f3Smrg     * with very old d3d drivers. */
16517ec681f3Smrg    return D3D_OK;
16527ec681f3Smrg}
16537ec681f3Smrg
16547ec681f3SmrgDECL_SPECIAL(SUB)
16557ec681f3Smrg{
16567ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
16577ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
16587ec681f3Smrg    struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
16597ec681f3Smrg    struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
16607ec681f3Smrg
16617ec681f3Smrg    ureg_ADD(ureg, dst, src0, ureg_negate(src1));
16627ec681f3Smrg    return D3D_OK;
16637ec681f3Smrg}
16647ec681f3Smrg
16657ec681f3SmrgDECL_SPECIAL(ABS)
16667ec681f3Smrg{
16677ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
16687ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
16697ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
16707ec681f3Smrg
16717ec681f3Smrg    ureg_MOV(ureg, dst, ureg_abs(src));
16727ec681f3Smrg    return D3D_OK;
16737ec681f3Smrg}
16747ec681f3Smrg
16757ec681f3SmrgDECL_SPECIAL(XPD)
16767ec681f3Smrg{
16777ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
16787ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
16797ec681f3Smrg    struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
16807ec681f3Smrg    struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
16817ec681f3Smrg
16827ec681f3Smrg    ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
16837ec681f3Smrg             ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
16847ec681f3Smrg                          TGSI_SWIZZLE_X, 0),
16857ec681f3Smrg             ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
16867ec681f3Smrg                          TGSI_SWIZZLE_Y, 0));
16877ec681f3Smrg    ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
16887ec681f3Smrg             ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
16897ec681f3Smrg                          TGSI_SWIZZLE_Y, 0),
16907ec681f3Smrg             ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
16917ec681f3Smrg                                      TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
16927ec681f3Smrg             ureg_src(dst));
16937ec681f3Smrg    ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
16947ec681f3Smrg             ureg_imm1f(ureg, 1));
16957ec681f3Smrg    return D3D_OK;
16967ec681f3Smrg}
16977ec681f3Smrg
16987ec681f3SmrgDECL_SPECIAL(M4x4)
16997ec681f3Smrg{
17007ec681f3Smrg    return NineTranslateInstruction_Mkxn(tx, 4, 4);
17017ec681f3Smrg}
17027ec681f3Smrg
17037ec681f3SmrgDECL_SPECIAL(M4x3)
17047ec681f3Smrg{
17057ec681f3Smrg    return NineTranslateInstruction_Mkxn(tx, 4, 3);
17067ec681f3Smrg}
17077ec681f3Smrg
17087ec681f3SmrgDECL_SPECIAL(M3x4)
17097ec681f3Smrg{
17107ec681f3Smrg    return NineTranslateInstruction_Mkxn(tx, 3, 4);
17117ec681f3Smrg}
17127ec681f3Smrg
17137ec681f3SmrgDECL_SPECIAL(M3x3)
17147ec681f3Smrg{
17157ec681f3Smrg    return NineTranslateInstruction_Mkxn(tx, 3, 3);
17167ec681f3Smrg}
17177ec681f3Smrg
17187ec681f3SmrgDECL_SPECIAL(M3x2)
17197ec681f3Smrg{
17207ec681f3Smrg    return NineTranslateInstruction_Mkxn(tx, 3, 2);
17217ec681f3Smrg}
17227ec681f3Smrg
17237ec681f3SmrgDECL_SPECIAL(CMP)
17247ec681f3Smrg{
17257ec681f3Smrg    ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
17267ec681f3Smrg             tx_src_param(tx, &tx->insn.src[0]),
17277ec681f3Smrg             tx_src_param(tx, &tx->insn.src[2]),
17287ec681f3Smrg             tx_src_param(tx, &tx->insn.src[1]));
17297ec681f3Smrg    return D3D_OK;
17307ec681f3Smrg}
17317ec681f3Smrg
17327ec681f3SmrgDECL_SPECIAL(CND)
17337ec681f3Smrg{
17347ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
17357ec681f3Smrg    struct ureg_dst cgt;
17367ec681f3Smrg    struct ureg_src cnd;
17377ec681f3Smrg
17387ec681f3Smrg    /* the coissue flag was a tip for compilers to advise to
17397ec681f3Smrg     * execute two operations at the same time, in cases
17407ec681f3Smrg     * the two executions had same dst with different channels.
17417ec681f3Smrg     * It has no effect on current hw. However it seems CND
17427ec681f3Smrg     * is affected. The handling of this very specific case
17437ec681f3Smrg     * handled below mimick wine behaviour */
17447ec681f3Smrg    if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
17457ec681f3Smrg        ureg_MOV(tx->ureg,
17467ec681f3Smrg                 dst, tx_src_param(tx, &tx->insn.src[1]));
17477ec681f3Smrg        return D3D_OK;
17487ec681f3Smrg    }
17497ec681f3Smrg
17507ec681f3Smrg    cnd = tx_src_param(tx, &tx->insn.src[0]);
17517ec681f3Smrg    cgt = tx_scratch(tx);
17527ec681f3Smrg
17537ec681f3Smrg    if (tx->version.major == 1 && tx->version.minor < 4)
17547ec681f3Smrg        cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
17557ec681f3Smrg
17567ec681f3Smrg    ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
17577ec681f3Smrg
17587ec681f3Smrg    ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
17597ec681f3Smrg             tx_src_param(tx, &tx->insn.src[1]),
17607ec681f3Smrg             tx_src_param(tx, &tx->insn.src[2]));
17617ec681f3Smrg    return D3D_OK;
17627ec681f3Smrg}
17637ec681f3Smrg
17647ec681f3SmrgDECL_SPECIAL(CALL)
17657ec681f3Smrg{
17667ec681f3Smrg    assert(tx->insn.src[0].idx < tx->num_inst_labels);
17677ec681f3Smrg    ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
17687ec681f3Smrg    return D3D_OK;
17697ec681f3Smrg}
17707ec681f3Smrg
17717ec681f3SmrgDECL_SPECIAL(CALLNZ)
17727ec681f3Smrg{
17737ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
17747ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
17757ec681f3Smrg
17767ec681f3Smrg    if (!tx->native_integers)
17777ec681f3Smrg        ureg_IF(ureg, src, tx_cond(tx));
17787ec681f3Smrg    else
17797ec681f3Smrg        ureg_UIF(ureg, src, tx_cond(tx));
17807ec681f3Smrg    ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
17817ec681f3Smrg    tx_endcond(tx);
17827ec681f3Smrg    ureg_ENDIF(ureg);
17837ec681f3Smrg    return D3D_OK;
17847ec681f3Smrg}
17857ec681f3Smrg
17867ec681f3SmrgDECL_SPECIAL(LOOP)
17877ec681f3Smrg{
17887ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
17897ec681f3Smrg    unsigned *label;
17907ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
17917ec681f3Smrg    struct ureg_dst ctr;
17927ec681f3Smrg    struct ureg_dst tmp;
17937ec681f3Smrg    struct ureg_src ctrx;
17947ec681f3Smrg
17957ec681f3Smrg    label = tx_bgnloop(tx);
17967ec681f3Smrg    ctr = tx_get_loopctr(tx, TRUE);
17977ec681f3Smrg    ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
17987ec681f3Smrg
17997ec681f3Smrg    /* src: num_iterations - start_value of al - step for al - 0 */
18007ec681f3Smrg    ureg_MOV(ureg, ctr, src);
18017ec681f3Smrg    ureg_BGNLOOP(tx->ureg, label);
18027ec681f3Smrg    tmp = tx_scratch_scalar(tx);
18037ec681f3Smrg    /* Initially ctr.x contains the number of iterations.
18047ec681f3Smrg     * ctr.y will contain the updated value of al.
18057ec681f3Smrg     * We decrease ctr.x at the end of every iteration,
18067ec681f3Smrg     * and stop when it reaches 0. */
18077ec681f3Smrg
18087ec681f3Smrg    if (!tx->native_integers) {
18097ec681f3Smrg        /* case src and ctr contain floats */
18107ec681f3Smrg        /* to avoid precision issue, we stop when ctr <= 0.5 */
18117ec681f3Smrg        ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
18127ec681f3Smrg        ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
18137ec681f3Smrg    } else {
18147ec681f3Smrg        /* case src and ctr contain integers */
18157ec681f3Smrg        ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
18167ec681f3Smrg        ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
18177ec681f3Smrg    }
18187ec681f3Smrg    ureg_BRK(ureg);
18197ec681f3Smrg    tx_endcond(tx);
18207ec681f3Smrg    ureg_ENDIF(ureg);
18217ec681f3Smrg    return D3D_OK;
18227ec681f3Smrg}
18237ec681f3Smrg
18247ec681f3SmrgDECL_SPECIAL(RET)
18257ec681f3Smrg{
18267ec681f3Smrg    /* RET as a last instruction could be safely ignored.
18277ec681f3Smrg     * Remove it to prevent crashes/warnings in case underlying
18287ec681f3Smrg     * driver doesn't implement arbitrary returns.
18297ec681f3Smrg     */
18307ec681f3Smrg    if (*(tx->parse_next) != NINED3DSP_END) {
18317ec681f3Smrg        ureg_RET(tx->ureg);
18327ec681f3Smrg    }
18337ec681f3Smrg    return D3D_OK;
18347ec681f3Smrg}
18357ec681f3Smrg
18367ec681f3SmrgDECL_SPECIAL(ENDLOOP)
18377ec681f3Smrg{
18387ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
18397ec681f3Smrg    struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
18407ec681f3Smrg    struct ureg_dst dst_ctrx, dst_al;
18417ec681f3Smrg    struct ureg_src src_ctr, al_counter;
18427ec681f3Smrg
18437ec681f3Smrg    dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
18447ec681f3Smrg    dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
18457ec681f3Smrg    src_ctr = ureg_src(ctr);
18467ec681f3Smrg    al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
18477ec681f3Smrg
18487ec681f3Smrg    /* ctr.x -= 1
18497ec681f3Smrg     * ctr.y (aL) += step */
18507ec681f3Smrg    if (!tx->native_integers) {
18517ec681f3Smrg        ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
18527ec681f3Smrg        ureg_ADD(ureg, dst_al, src_ctr, al_counter);
18537ec681f3Smrg    } else {
18547ec681f3Smrg        ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
18557ec681f3Smrg        ureg_UADD(ureg, dst_al, src_ctr, al_counter);
18567ec681f3Smrg    }
18577ec681f3Smrg    ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
18587ec681f3Smrg    return D3D_OK;
18597ec681f3Smrg}
18607ec681f3Smrg
18617ec681f3SmrgDECL_SPECIAL(LABEL)
18627ec681f3Smrg{
18637ec681f3Smrg    unsigned k = tx->num_inst_labels;
18647ec681f3Smrg    unsigned n = tx->insn.src[0].idx;
18657ec681f3Smrg    assert(n < 2048);
18667ec681f3Smrg    if (n >= k)
18677ec681f3Smrg       tx->inst_labels = REALLOC(tx->inst_labels,
18687ec681f3Smrg                                 k * sizeof(tx->inst_labels[0]),
18697ec681f3Smrg                                 n * sizeof(tx->inst_labels[0]));
18707ec681f3Smrg
18717ec681f3Smrg    tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
18727ec681f3Smrg    return D3D_OK;
18737ec681f3Smrg}
18747ec681f3Smrg
18757ec681f3SmrgDECL_SPECIAL(SINCOS)
18767ec681f3Smrg{
18777ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
18787ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
18797ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
18807ec681f3Smrg    struct ureg_dst tmp = tx_scratch_scalar(tx);
18817ec681f3Smrg
18827ec681f3Smrg    assert(!(dst.WriteMask & 0xc));
18837ec681f3Smrg
18847ec681f3Smrg    /* Copying to a temporary register avoids src/dst aliasing.
18857ec681f3Smrg     * src is supposed to have replicated swizzle. */
18867ec681f3Smrg    ureg_MOV(ureg, tmp, src);
18877ec681f3Smrg
18887ec681f3Smrg    /* z undefined, w untouched */
18897ec681f3Smrg    ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
18907ec681f3Smrg             tx_src_scalar(tmp));
18917ec681f3Smrg    ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
18927ec681f3Smrg             tx_src_scalar(tmp));
18937ec681f3Smrg    return D3D_OK;
18947ec681f3Smrg}
18957ec681f3Smrg
18967ec681f3SmrgDECL_SPECIAL(SGN)
18977ec681f3Smrg{
18987ec681f3Smrg    ureg_SSG(tx->ureg,
18997ec681f3Smrg             tx_dst_param(tx, &tx->insn.dst[0]),
19007ec681f3Smrg             tx_src_param(tx, &tx->insn.src[0]));
19017ec681f3Smrg    return D3D_OK;
19027ec681f3Smrg}
19037ec681f3Smrg
19047ec681f3SmrgDECL_SPECIAL(REP)
19057ec681f3Smrg{
19067ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
19077ec681f3Smrg    unsigned *label;
19087ec681f3Smrg    struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
19097ec681f3Smrg    struct ureg_dst ctr;
19107ec681f3Smrg    struct ureg_dst tmp;
19117ec681f3Smrg    struct ureg_src ctrx;
19127ec681f3Smrg
19137ec681f3Smrg    label = tx_bgnloop(tx);
19147ec681f3Smrg    ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
19157ec681f3Smrg    ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
19167ec681f3Smrg
19177ec681f3Smrg    /* NOTE: rep must be constant, so we don't have to save the count */
19187ec681f3Smrg    assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
19197ec681f3Smrg
19207ec681f3Smrg    /* rep: num_iterations - 0 - 0 - 0 */
19217ec681f3Smrg    ureg_MOV(ureg, ctr, rep);
19227ec681f3Smrg    ureg_BGNLOOP(ureg, label);
19237ec681f3Smrg    tmp = tx_scratch_scalar(tx);
19247ec681f3Smrg    /* Initially ctr.x contains the number of iterations.
19257ec681f3Smrg     * We decrease ctr.x at the end of every iteration,
19267ec681f3Smrg     * and stop when it reaches 0. */
19277ec681f3Smrg
19287ec681f3Smrg    if (!tx->native_integers) {
19297ec681f3Smrg        /* case src and ctr contain floats */
19307ec681f3Smrg        /* to avoid precision issue, we stop when ctr <= 0.5 */
19317ec681f3Smrg        ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
19327ec681f3Smrg        ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
19337ec681f3Smrg    } else {
19347ec681f3Smrg        /* case src and ctr contain integers */
19357ec681f3Smrg        ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
19367ec681f3Smrg        ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
19377ec681f3Smrg    }
19387ec681f3Smrg    ureg_BRK(ureg);
19397ec681f3Smrg    tx_endcond(tx);
19407ec681f3Smrg    ureg_ENDIF(ureg);
19417ec681f3Smrg
19427ec681f3Smrg    return D3D_OK;
19437ec681f3Smrg}
19447ec681f3Smrg
19457ec681f3SmrgDECL_SPECIAL(ENDREP)
19467ec681f3Smrg{
19477ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
19487ec681f3Smrg    struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
19497ec681f3Smrg    struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
19507ec681f3Smrg    struct ureg_src src_ctr = ureg_src(ctr);
19517ec681f3Smrg
19527ec681f3Smrg    /* ctr.x -= 1 */
19537ec681f3Smrg    if (!tx->native_integers)
19547ec681f3Smrg        ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
19557ec681f3Smrg    else
19567ec681f3Smrg        ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
19577ec681f3Smrg
19587ec681f3Smrg    ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
19597ec681f3Smrg    return D3D_OK;
19607ec681f3Smrg}
19617ec681f3Smrg
19627ec681f3SmrgDECL_SPECIAL(ENDIF)
19637ec681f3Smrg{
19647ec681f3Smrg    tx_endcond(tx);
19657ec681f3Smrg    ureg_ENDIF(tx->ureg);
19667ec681f3Smrg    return D3D_OK;
19677ec681f3Smrg}
19687ec681f3Smrg
19697ec681f3SmrgDECL_SPECIAL(IF)
19707ec681f3Smrg{
19717ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
19727ec681f3Smrg
19737ec681f3Smrg    if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
19747ec681f3Smrg        ureg_UIF(tx->ureg, src, tx_cond(tx));
19757ec681f3Smrg    else
19767ec681f3Smrg        ureg_IF(tx->ureg, src, tx_cond(tx));
19777ec681f3Smrg
19787ec681f3Smrg    return D3D_OK;
19797ec681f3Smrg}
19807ec681f3Smrg
19817ec681f3Smrgstatic inline unsigned
19827ec681f3Smrgsm1_insn_flags_to_tgsi_setop(BYTE flags)
19837ec681f3Smrg{
19847ec681f3Smrg    switch (flags) {
19857ec681f3Smrg    case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
19867ec681f3Smrg    case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
19877ec681f3Smrg    case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
19887ec681f3Smrg    case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
19897ec681f3Smrg    case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
19907ec681f3Smrg    case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
19917ec681f3Smrg    default:
19927ec681f3Smrg        assert(!"invalid comparison flags");
19937ec681f3Smrg        return TGSI_OPCODE_SGT;
19947ec681f3Smrg    }
19957ec681f3Smrg}
19967ec681f3Smrg
19977ec681f3SmrgDECL_SPECIAL(IFC)
19987ec681f3Smrg{
19997ec681f3Smrg    const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
20007ec681f3Smrg    struct ureg_src src[2];
20017ec681f3Smrg    struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
20027ec681f3Smrg    src[0] = tx_src_param(tx, &tx->insn.src[0]);
20037ec681f3Smrg    src[1] = tx_src_param(tx, &tx->insn.src[1]);
20047ec681f3Smrg    ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
20057ec681f3Smrg    ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
20067ec681f3Smrg    return D3D_OK;
20077ec681f3Smrg}
20087ec681f3Smrg
20097ec681f3SmrgDECL_SPECIAL(ELSE)
20107ec681f3Smrg{
20117ec681f3Smrg    ureg_ELSE(tx->ureg, tx_elsecond(tx));
20127ec681f3Smrg    return D3D_OK;
20137ec681f3Smrg}
20147ec681f3Smrg
20157ec681f3SmrgDECL_SPECIAL(BREAKC)
20167ec681f3Smrg{
20177ec681f3Smrg    const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
20187ec681f3Smrg    struct ureg_src src[2];
20197ec681f3Smrg    struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
20207ec681f3Smrg    src[0] = tx_src_param(tx, &tx->insn.src[0]);
20217ec681f3Smrg    src[1] = tx_src_param(tx, &tx->insn.src[1]);
20227ec681f3Smrg    ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
20237ec681f3Smrg    ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
20247ec681f3Smrg    ureg_BRK(tx->ureg);
20257ec681f3Smrg    tx_endcond(tx);
20267ec681f3Smrg    ureg_ENDIF(tx->ureg);
20277ec681f3Smrg    return D3D_OK;
20287ec681f3Smrg}
20297ec681f3Smrg
20307ec681f3Smrgstatic const char *sm1_declusage_names[] =
20317ec681f3Smrg{
20327ec681f3Smrg    [D3DDECLUSAGE_POSITION] = "POSITION",
20337ec681f3Smrg    [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
20347ec681f3Smrg    [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
20357ec681f3Smrg    [D3DDECLUSAGE_NORMAL] = "NORMAL",
20367ec681f3Smrg    [D3DDECLUSAGE_PSIZE] = "PSIZE",
20377ec681f3Smrg    [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
20387ec681f3Smrg    [D3DDECLUSAGE_TANGENT] = "TANGENT",
20397ec681f3Smrg    [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
20407ec681f3Smrg    [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
20417ec681f3Smrg    [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
20427ec681f3Smrg    [D3DDECLUSAGE_COLOR] = "COLOR",
20437ec681f3Smrg    [D3DDECLUSAGE_FOG] = "FOG",
20447ec681f3Smrg    [D3DDECLUSAGE_DEPTH] = "DEPTH",
20457ec681f3Smrg    [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
20467ec681f3Smrg};
20477ec681f3Smrg
20487ec681f3Smrgstatic inline unsigned
20497ec681f3Smrgsm1_to_nine_declusage(struct sm1_semantic *dcl)
20507ec681f3Smrg{
20517ec681f3Smrg    return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
20527ec681f3Smrg}
20537ec681f3Smrg
20547ec681f3Smrgstatic void
20557ec681f3Smrgsm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
20567ec681f3Smrg                      boolean tc,
20577ec681f3Smrg                      struct sm1_semantic *dcl)
20587ec681f3Smrg{
20597ec681f3Smrg    BYTE index = dcl->usage_idx;
20607ec681f3Smrg
20617ec681f3Smrg    /* For everything that is not matching to a TGSI_SEMANTIC_****,
20627ec681f3Smrg     * we match to a TGSI_SEMANTIC_GENERIC with index.
20637ec681f3Smrg     *
20647ec681f3Smrg     * The index can be anything UINT16 and usage_idx is BYTE,
20657ec681f3Smrg     * so we can fit everything. It doesn't matter if indices
20667ec681f3Smrg     * are close together or low.
20677ec681f3Smrg     *
20687ec681f3Smrg     *
20697ec681f3Smrg     * POSITION >= 1: 10 * index + 7
20707ec681f3Smrg     * COLOR >= 2: 10 * (index-1) + 8
20717ec681f3Smrg     * FOG: 16
20727ec681f3Smrg     * TEXCOORD[0..15]: index
20737ec681f3Smrg     * BLENDWEIGHT: 10 * index + 19
20747ec681f3Smrg     * BLENDINDICES: 10 * index + 20
20757ec681f3Smrg     * NORMAL: 10 * index + 21
20767ec681f3Smrg     * TANGENT: 10 * index + 22
20777ec681f3Smrg     * BINORMAL: 10 * index + 23
20787ec681f3Smrg     * TESSFACTOR: 10 * index + 24
20797ec681f3Smrg     */
20807ec681f3Smrg
20817ec681f3Smrg    switch (dcl->usage) {
20827ec681f3Smrg    case D3DDECLUSAGE_POSITION:
20837ec681f3Smrg    case D3DDECLUSAGE_POSITIONT:
20847ec681f3Smrg    case D3DDECLUSAGE_DEPTH:
20857ec681f3Smrg        if (index == 0) {
20867ec681f3Smrg            sem->Name = TGSI_SEMANTIC_POSITION;
20877ec681f3Smrg            sem->Index = 0;
20887ec681f3Smrg        } else {
20897ec681f3Smrg            sem->Name = TGSI_SEMANTIC_GENERIC;
20907ec681f3Smrg            sem->Index = 10 * index + 7;
20917ec681f3Smrg        }
20927ec681f3Smrg        break;
20937ec681f3Smrg    case D3DDECLUSAGE_COLOR:
20947ec681f3Smrg        if (index < 2) {
20957ec681f3Smrg            sem->Name = TGSI_SEMANTIC_COLOR;
20967ec681f3Smrg            sem->Index = index;
20977ec681f3Smrg        } else {
20987ec681f3Smrg            sem->Name = TGSI_SEMANTIC_GENERIC;
20997ec681f3Smrg            sem->Index = 10 * (index-1) + 8;
21007ec681f3Smrg        }
21017ec681f3Smrg        break;
21027ec681f3Smrg    case D3DDECLUSAGE_FOG:
21037ec681f3Smrg        assert(index == 0);
21047ec681f3Smrg        sem->Name = TGSI_SEMANTIC_GENERIC;
21057ec681f3Smrg        sem->Index = 16;
21067ec681f3Smrg        break;
21077ec681f3Smrg    case D3DDECLUSAGE_PSIZE:
21087ec681f3Smrg        assert(index == 0);
21097ec681f3Smrg        sem->Name = TGSI_SEMANTIC_PSIZE;
21107ec681f3Smrg        sem->Index = 0;
21117ec681f3Smrg        break;
21127ec681f3Smrg    case D3DDECLUSAGE_TEXCOORD:
21137ec681f3Smrg        assert(index < 16);
21147ec681f3Smrg        if (index < 8 && tc)
21157ec681f3Smrg            sem->Name = TGSI_SEMANTIC_TEXCOORD;
21167ec681f3Smrg        else
21177ec681f3Smrg            sem->Name = TGSI_SEMANTIC_GENERIC;
21187ec681f3Smrg        sem->Index = index;
21197ec681f3Smrg        break;
21207ec681f3Smrg    case D3DDECLUSAGE_BLENDWEIGHT:
21217ec681f3Smrg        sem->Name = TGSI_SEMANTIC_GENERIC;
21227ec681f3Smrg        sem->Index = 10 * index + 19;
21237ec681f3Smrg        break;
21247ec681f3Smrg    case D3DDECLUSAGE_BLENDINDICES:
21257ec681f3Smrg        sem->Name = TGSI_SEMANTIC_GENERIC;
21267ec681f3Smrg        sem->Index = 10 * index + 20;
21277ec681f3Smrg        break;
21287ec681f3Smrg    case D3DDECLUSAGE_NORMAL:
21297ec681f3Smrg        sem->Name = TGSI_SEMANTIC_GENERIC;
21307ec681f3Smrg        sem->Index = 10 * index + 21;
21317ec681f3Smrg        break;
21327ec681f3Smrg    case D3DDECLUSAGE_TANGENT:
21337ec681f3Smrg        sem->Name = TGSI_SEMANTIC_GENERIC;
21347ec681f3Smrg        sem->Index = 10 * index + 22;
21357ec681f3Smrg        break;
21367ec681f3Smrg    case D3DDECLUSAGE_BINORMAL:
21377ec681f3Smrg        sem->Name = TGSI_SEMANTIC_GENERIC;
21387ec681f3Smrg        sem->Index = 10 * index + 23;
21397ec681f3Smrg        break;
21407ec681f3Smrg    case D3DDECLUSAGE_TESSFACTOR:
21417ec681f3Smrg        sem->Name = TGSI_SEMANTIC_GENERIC;
21427ec681f3Smrg        sem->Index = 10 * index + 24;
21437ec681f3Smrg        break;
21447ec681f3Smrg    case D3DDECLUSAGE_SAMPLE:
21457ec681f3Smrg        sem->Name = TGSI_SEMANTIC_COUNT;
21467ec681f3Smrg        sem->Index = 0;
21477ec681f3Smrg        break;
21487ec681f3Smrg    default:
21497ec681f3Smrg        unreachable("Invalid DECLUSAGE.");
21507ec681f3Smrg        break;
21517ec681f3Smrg    }
21527ec681f3Smrg}
21537ec681f3Smrg
21547ec681f3Smrg#define NINED3DSTT_1D     (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
21557ec681f3Smrg#define NINED3DSTT_2D     (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
21567ec681f3Smrg#define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
21577ec681f3Smrg#define NINED3DSTT_CUBE   (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
21587ec681f3Smrgstatic inline unsigned
21597ec681f3Smrgd3dstt_to_tgsi_tex(BYTE sampler_type)
21607ec681f3Smrg{
21617ec681f3Smrg    switch (sampler_type) {
21627ec681f3Smrg    case NINED3DSTT_1D:     return TGSI_TEXTURE_1D;
21637ec681f3Smrg    case NINED3DSTT_2D:     return TGSI_TEXTURE_2D;
21647ec681f3Smrg    case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
21657ec681f3Smrg    case NINED3DSTT_CUBE:   return TGSI_TEXTURE_CUBE;
21667ec681f3Smrg    default:
21677ec681f3Smrg        assert(0);
21687ec681f3Smrg        return TGSI_TEXTURE_UNKNOWN;
21697ec681f3Smrg    }
21707ec681f3Smrg}
21717ec681f3Smrgstatic inline unsigned
21727ec681f3Smrgd3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
21737ec681f3Smrg{
21747ec681f3Smrg    switch (sampler_type) {
21757ec681f3Smrg    case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
21767ec681f3Smrg    case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
21777ec681f3Smrg    case NINED3DSTT_VOLUME:
21787ec681f3Smrg    case NINED3DSTT_CUBE:
21797ec681f3Smrg    default:
21807ec681f3Smrg        assert(0);
21817ec681f3Smrg        return TGSI_TEXTURE_UNKNOWN;
21827ec681f3Smrg    }
21837ec681f3Smrg}
21847ec681f3Smrgstatic inline unsigned
21857ec681f3Smrgps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
21867ec681f3Smrg{
21877ec681f3Smrg    boolean shadow = !!(info->sampler_mask_shadow & (1 << stage));
21887ec681f3Smrg    switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
21897ec681f3Smrg    case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D;
21907ec681f3Smrg    case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D;
21917ec681f3Smrg    case 3: return TGSI_TEXTURE_3D;
21927ec681f3Smrg    default:
21937ec681f3Smrg        return TGSI_TEXTURE_CUBE;
21947ec681f3Smrg    }
21957ec681f3Smrg}
21967ec681f3Smrg
21977ec681f3Smrgstatic const char *
21987ec681f3Smrgsm1_sampler_type_name(BYTE sampler_type)
21997ec681f3Smrg{
22007ec681f3Smrg    switch (sampler_type) {
22017ec681f3Smrg    case NINED3DSTT_1D:     return "1D";
22027ec681f3Smrg    case NINED3DSTT_2D:     return "2D";
22037ec681f3Smrg    case NINED3DSTT_VOLUME: return "VOLUME";
22047ec681f3Smrg    case NINED3DSTT_CUBE:   return "CUBE";
22057ec681f3Smrg    default:
22067ec681f3Smrg        return "(D3DSTT_?)";
22077ec681f3Smrg    }
22087ec681f3Smrg}
22097ec681f3Smrg
22107ec681f3Smrgstatic inline unsigned
22117ec681f3Smrgnine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
22127ec681f3Smrg{
22137ec681f3Smrg    switch (sem->Name) {
22147ec681f3Smrg    case TGSI_SEMANTIC_POSITION:
22157ec681f3Smrg    case TGSI_SEMANTIC_NORMAL:
22167ec681f3Smrg        return TGSI_INTERPOLATE_LINEAR;
22177ec681f3Smrg    case TGSI_SEMANTIC_BCOLOR:
22187ec681f3Smrg    case TGSI_SEMANTIC_COLOR:
22197ec681f3Smrg        return TGSI_INTERPOLATE_COLOR;
22207ec681f3Smrg    case TGSI_SEMANTIC_FOG:
22217ec681f3Smrg    case TGSI_SEMANTIC_GENERIC:
22227ec681f3Smrg    case TGSI_SEMANTIC_TEXCOORD:
22237ec681f3Smrg    case TGSI_SEMANTIC_CLIPDIST:
22247ec681f3Smrg    case TGSI_SEMANTIC_CLIPVERTEX:
22257ec681f3Smrg        return TGSI_INTERPOLATE_PERSPECTIVE;
22267ec681f3Smrg    case TGSI_SEMANTIC_EDGEFLAG:
22277ec681f3Smrg    case TGSI_SEMANTIC_FACE:
22287ec681f3Smrg    case TGSI_SEMANTIC_INSTANCEID:
22297ec681f3Smrg    case TGSI_SEMANTIC_PCOORD:
22307ec681f3Smrg    case TGSI_SEMANTIC_PRIMID:
22317ec681f3Smrg    case TGSI_SEMANTIC_PSIZE:
22327ec681f3Smrg    case TGSI_SEMANTIC_VERTEXID:
22337ec681f3Smrg        return TGSI_INTERPOLATE_CONSTANT;
22347ec681f3Smrg    default:
22357ec681f3Smrg        assert(0);
22367ec681f3Smrg        return TGSI_INTERPOLATE_CONSTANT;
22377ec681f3Smrg    }
22387ec681f3Smrg}
22397ec681f3Smrg
22407ec681f3SmrgDECL_SPECIAL(DCL)
22417ec681f3Smrg{
22427ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
22437ec681f3Smrg    boolean is_input;
22447ec681f3Smrg    boolean is_sampler;
22457ec681f3Smrg    struct tgsi_declaration_semantic tgsi;
22467ec681f3Smrg    struct sm1_semantic sem;
22477ec681f3Smrg    sm1_read_semantic(tx, &sem);
22487ec681f3Smrg
22497ec681f3Smrg    is_input = sem.reg.file == D3DSPR_INPUT;
22507ec681f3Smrg    is_sampler =
22517ec681f3Smrg        sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
22527ec681f3Smrg
22537ec681f3Smrg    DUMP("DCL ");
22547ec681f3Smrg    sm1_dump_dst_param(&sem.reg);
22557ec681f3Smrg    if (is_sampler)
22567ec681f3Smrg        DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
22577ec681f3Smrg    else
22587ec681f3Smrg    if (tx->version.major >= 3)
22597ec681f3Smrg        DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
22607ec681f3Smrg    else
22617ec681f3Smrg    if (sem.usage | sem.usage_idx)
22627ec681f3Smrg        DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
22637ec681f3Smrg    else
22647ec681f3Smrg        DUMP("\n");
22657ec681f3Smrg
22667ec681f3Smrg    if (is_sampler) {
22677ec681f3Smrg        const unsigned m = 1 << sem.reg.idx;
22687ec681f3Smrg        ureg_DECL_sampler(ureg, sem.reg.idx);
22697ec681f3Smrg        tx->info->sampler_mask |= m;
22707ec681f3Smrg        tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
22717ec681f3Smrg            d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
22727ec681f3Smrg            d3dstt_to_tgsi_tex(sem.sampler_type);
22737ec681f3Smrg        return D3D_OK;
22747ec681f3Smrg    }
22757ec681f3Smrg
22767ec681f3Smrg    sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
22777ec681f3Smrg    if (IS_VS) {
22787ec681f3Smrg        if (is_input) {
22797ec681f3Smrg            /* linkage outside of shader with vertex declaration */
22807ec681f3Smrg            ureg_DECL_vs_input(ureg, sem.reg.idx);
22817ec681f3Smrg            assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
22827ec681f3Smrg            tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
22837ec681f3Smrg            tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
22847ec681f3Smrg            /* NOTE: preserving order in case of indirect access */
22857ec681f3Smrg        } else
22867ec681f3Smrg        if (tx->version.major >= 3) {
22877ec681f3Smrg            /* SM2 output semantic determined by file */
22887ec681f3Smrg            assert(sem.reg.mask != 0);
22897ec681f3Smrg            if (sem.usage == D3DDECLUSAGE_POSITIONT)
22907ec681f3Smrg                tx->info->position_t = TRUE;
22917ec681f3Smrg            assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
22927ec681f3Smrg            assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
22937ec681f3Smrg            tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
22947ec681f3Smrg                ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
22957ec681f3Smrg            nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
22967ec681f3Smrg            if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
22977ec681f3Smrg                tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
22987ec681f3Smrg                tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
22997ec681f3Smrg                tx->regs.oPos = tx->regs.o[sem.reg.idx];
23007ec681f3Smrg            }
23017ec681f3Smrg
23027ec681f3Smrg            if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
23037ec681f3Smrg                tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
23047ec681f3Smrg                tx->regs.oPts = tx->regs.o[sem.reg.idx];
23057ec681f3Smrg            }
23067ec681f3Smrg        }
23077ec681f3Smrg    } else {
23087ec681f3Smrg        if (is_input && tx->version.major >= 3) {
23097ec681f3Smrg            unsigned interp_location = 0;
23107ec681f3Smrg            /* SM3 only, SM2 input semantic determined by file */
23117ec681f3Smrg            assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
23127ec681f3Smrg            assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
23137ec681f3Smrg            /* PositionT and tessfactor forbidden */
23147ec681f3Smrg            if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
23157ec681f3Smrg                return D3DERR_INVALIDCALL;
23167ec681f3Smrg
23177ec681f3Smrg            if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
23187ec681f3Smrg                /* Position0 is forbidden (likely because vPos already does that) */
23197ec681f3Smrg                if (sem.usage == D3DDECLUSAGE_POSITION)
23207ec681f3Smrg                    return D3DERR_INVALIDCALL;
23217ec681f3Smrg                /* Following code is for depth */
23227ec681f3Smrg                tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
23237ec681f3Smrg                return D3D_OK;
23247ec681f3Smrg            }
23257ec681f3Smrg
23267ec681f3Smrg            if (sem.reg.mod & NINED3DSPDM_CENTROID ||
23277ec681f3Smrg                (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
23287ec681f3Smrg                interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
23297ec681f3Smrg
23307ec681f3Smrg            tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_centroid(
23317ec681f3Smrg                ureg, tgsi.Name, tgsi.Index,
23327ec681f3Smrg                nine_tgsi_to_interp_mode(&tgsi),
23337ec681f3Smrg                interp_location, 0, 1);
23347ec681f3Smrg        } else
23357ec681f3Smrg        if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
23367ec681f3Smrg            /* FragColor or FragDepth */
23377ec681f3Smrg            assert(sem.reg.mask != 0);
23387ec681f3Smrg            ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
23397ec681f3Smrg                                    0, 1);
23407ec681f3Smrg        }
23417ec681f3Smrg    }
23427ec681f3Smrg    return D3D_OK;
23437ec681f3Smrg}
23447ec681f3Smrg
23457ec681f3SmrgDECL_SPECIAL(DEF)
23467ec681f3Smrg{
23477ec681f3Smrg    tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
23487ec681f3Smrg    return D3D_OK;
23497ec681f3Smrg}
23507ec681f3Smrg
23517ec681f3SmrgDECL_SPECIAL(DEFB)
23527ec681f3Smrg{
23537ec681f3Smrg    tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
23547ec681f3Smrg    return D3D_OK;
23557ec681f3Smrg}
23567ec681f3Smrg
23577ec681f3SmrgDECL_SPECIAL(DEFI)
23587ec681f3Smrg{
23597ec681f3Smrg    tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
23607ec681f3Smrg    return D3D_OK;
23617ec681f3Smrg}
23627ec681f3Smrg
23637ec681f3SmrgDECL_SPECIAL(POW)
23647ec681f3Smrg{
23657ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
23667ec681f3Smrg    struct ureg_src src[2] = {
23677ec681f3Smrg        tx_src_param(tx, &tx->insn.src[0]),
23687ec681f3Smrg        tx_src_param(tx, &tx->insn.src[1])
23697ec681f3Smrg    };
23707ec681f3Smrg    ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
23717ec681f3Smrg    return D3D_OK;
23727ec681f3Smrg}
23737ec681f3Smrg
23747ec681f3Smrg/* Tests results on Win 10:
23757ec681f3Smrg * NV (NVIDIA GeForce GT 635M)
23767ec681f3Smrg * AMD (AMD Radeon HD 7730M)
23777ec681f3Smrg * INTEL (Intel(R) HD Graphics 4000)
23787ec681f3Smrg * PS2 and PS3:
23797ec681f3Smrg * RCP and RSQ can generate inf on NV and AMD.
23807ec681f3Smrg * RCP and RSQ are clamped on INTEL (+- FLT_MAX),
23817ec681f3Smrg * NV: log not clamped
23827ec681f3Smrg * AMD: log(0) is -FLT_MAX (but log(inf) is inf)
23837ec681f3Smrg * INTEL: log(0) is -FLT_MAX and log(inf) is 127
23847ec681f3Smrg * All devices have 0*anything = 0
23857ec681f3Smrg *
23867ec681f3Smrg * INTEL VS2 and VS3: same behaviour.
23877ec681f3Smrg * Some differences VS2 and VS3 for constants defined with inf/NaN.
23887ec681f3Smrg * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change,
23897ec681f3Smrg * VS2 seems to clamp to zero (may be test failure).
23907ec681f3Smrg * AMD VS2: unknown, VS3: very likely behaviour of PS3
23917ec681f3Smrg * NV VS2 and VS3: very likely behaviour of PS3
23927ec681f3Smrg * For both, Inf in VS becomes NaN is PS
23937ec681f3Smrg * "Very likely" because the test was less extensive.
23947ec681f3Smrg *
23957ec681f3Smrg * Thus all clamping can be removed for shaders 2 and 3,
23967ec681f3Smrg * as long as 0*anything = 0.
23977ec681f3Smrg * Else clamps to enforce 0*anything = 0 (anything being then
23987ec681f3Smrg * neither inf or NaN, the user being unlikely to pass them
23997ec681f3Smrg * as constant).
24007ec681f3Smrg * The status for VS1 and PS1 is unknown.
24017ec681f3Smrg */
24027ec681f3Smrg
24037ec681f3SmrgDECL_SPECIAL(RCP)
24047ec681f3Smrg{
24057ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
24067ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
24077ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
24087ec681f3Smrg    struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
24097ec681f3Smrg    ureg_RCP(ureg, tmp, src);
24107ec681f3Smrg    if (!tx->mul_zero_wins) {
24117ec681f3Smrg        /* FLT_MAX has issues with Rayman */
24127ec681f3Smrg        ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp));
24137ec681f3Smrg        ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp));
24147ec681f3Smrg    }
24157ec681f3Smrg    return D3D_OK;
24167ec681f3Smrg}
24177ec681f3Smrg
24187ec681f3SmrgDECL_SPECIAL(RSQ)
24197ec681f3Smrg{
24207ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
24217ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
24227ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
24237ec681f3Smrg    struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
24247ec681f3Smrg    ureg_RSQ(ureg, tmp, ureg_abs(src));
24257ec681f3Smrg    if (!tx->mul_zero_wins)
24267ec681f3Smrg        ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
24277ec681f3Smrg    return D3D_OK;
24287ec681f3Smrg}
24297ec681f3Smrg
24307ec681f3SmrgDECL_SPECIAL(LOG)
24317ec681f3Smrg{
24327ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
24337ec681f3Smrg    struct ureg_dst tmp = tx_scratch_scalar(tx);
24347ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
24357ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
24367ec681f3Smrg    ureg_LG2(ureg, tmp, ureg_abs(src));
24377ec681f3Smrg    if (tx->mul_zero_wins) {
24387ec681f3Smrg        ureg_MOV(ureg, dst, tx_src_scalar(tmp));
24397ec681f3Smrg    } else {
24407ec681f3Smrg        ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
24417ec681f3Smrg    }
24427ec681f3Smrg    return D3D_OK;
24437ec681f3Smrg}
24447ec681f3Smrg
24457ec681f3SmrgDECL_SPECIAL(LIT)
24467ec681f3Smrg{
24477ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
24487ec681f3Smrg    struct ureg_dst tmp = tx_scratch(tx);
24497ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
24507ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
24517ec681f3Smrg    ureg_LIT(ureg, tmp, src);
24527ec681f3Smrg    /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
24537ec681f3Smrg     * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
24547ec681f3Smrg     * it 0^0 if src.w=0, which value is driver dependent. */
24557ec681f3Smrg    ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
24567ec681f3Smrg             ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
24577ec681f3Smrg             ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
24587ec681f3Smrg    ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
24597ec681f3Smrg    return D3D_OK;
24607ec681f3Smrg}
24617ec681f3Smrg
24627ec681f3SmrgDECL_SPECIAL(NRM)
24637ec681f3Smrg{
24647ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
24657ec681f3Smrg    struct ureg_dst tmp = tx_scratch_scalar(tx);
24667ec681f3Smrg    struct ureg_src nrm = tx_src_scalar(tmp);
24677ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
24687ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
24697ec681f3Smrg    ureg_DP3(ureg, tmp, src, src);
24707ec681f3Smrg    ureg_RSQ(ureg, tmp, nrm);
24717ec681f3Smrg    if (!tx->mul_zero_wins)
24727ec681f3Smrg        ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
24737ec681f3Smrg    ureg_MUL(ureg, dst, src, nrm);
24747ec681f3Smrg    return D3D_OK;
24757ec681f3Smrg}
24767ec681f3Smrg
24777ec681f3SmrgDECL_SPECIAL(DP2ADD)
24787ec681f3Smrg{
24797ec681f3Smrg    struct ureg_dst tmp = tx_scratch_scalar(tx);
24807ec681f3Smrg    struct ureg_src dp2 = tx_src_scalar(tmp);
24817ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
24827ec681f3Smrg    struct ureg_src src[3];
24837ec681f3Smrg    int i;
24847ec681f3Smrg    for (i = 0; i < 3; ++i)
24857ec681f3Smrg        src[i] = tx_src_param(tx, &tx->insn.src[i]);
24867ec681f3Smrg    assert_replicate_swizzle(&src[2]);
24877ec681f3Smrg
24887ec681f3Smrg    ureg_DP2(tx->ureg, tmp, src[0], src[1]);
24897ec681f3Smrg    ureg_ADD(tx->ureg, dst, src[2], dp2);
24907ec681f3Smrg
24917ec681f3Smrg    return D3D_OK;
24927ec681f3Smrg}
24937ec681f3Smrg
24947ec681f3SmrgDECL_SPECIAL(TEXCOORD)
24957ec681f3Smrg{
24967ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
24977ec681f3Smrg    const unsigned s = tx->insn.dst[0].idx;
24987ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
24997ec681f3Smrg
25007ec681f3Smrg    tx_texcoord_alloc(tx, s);
25017ec681f3Smrg    ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
25027ec681f3Smrg    ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
25037ec681f3Smrg
25047ec681f3Smrg    return D3D_OK;
25057ec681f3Smrg}
25067ec681f3Smrg
25077ec681f3SmrgDECL_SPECIAL(TEXCOORD_ps14)
25087ec681f3Smrg{
25097ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
25107ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
25117ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
25127ec681f3Smrg
25137ec681f3Smrg    assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
25147ec681f3Smrg
25157ec681f3Smrg    ureg_MOV(ureg, dst, src);
25167ec681f3Smrg
25177ec681f3Smrg    return D3D_OK;
25187ec681f3Smrg}
25197ec681f3Smrg
25207ec681f3SmrgDECL_SPECIAL(TEXKILL)
25217ec681f3Smrg{
25227ec681f3Smrg    struct ureg_src reg;
25237ec681f3Smrg
25247ec681f3Smrg    if (tx->version.major > 1 || tx->version.minor > 3) {
25257ec681f3Smrg        reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
25267ec681f3Smrg    } else {
25277ec681f3Smrg        tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
25287ec681f3Smrg        reg = tx->regs.vT[tx->insn.dst[0].idx];
25297ec681f3Smrg    }
25307ec681f3Smrg    if (tx->version.major < 2)
25317ec681f3Smrg        reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
25327ec681f3Smrg    ureg_KILL_IF(tx->ureg, reg);
25337ec681f3Smrg
25347ec681f3Smrg    return D3D_OK;
25357ec681f3Smrg}
25367ec681f3Smrg
25377ec681f3SmrgDECL_SPECIAL(TEXBEM)
25387ec681f3Smrg{
25397ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
25407ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
25417ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
25427ec681f3Smrg    struct ureg_dst tmp, tmp2, texcoord;
25437ec681f3Smrg    struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2;
25447ec681f3Smrg    struct ureg_src bumpenvlscale, bumpenvloffset;
25457ec681f3Smrg    const int m = tx->insn.dst[0].idx;
25467ec681f3Smrg
25477ec681f3Smrg    assert(tx->version.major == 1);
25487ec681f3Smrg
25497ec681f3Smrg    sample = ureg_DECL_sampler(ureg, m);
25507ec681f3Smrg    tx->info->sampler_mask |= 1 << m;
25517ec681f3Smrg
25527ec681f3Smrg    tx_texcoord_alloc(tx, m);
25537ec681f3Smrg
25547ec681f3Smrg    tmp = tx_scratch(tx);
25557ec681f3Smrg    tmp2 = tx_scratch(tx);
25567ec681f3Smrg    texcoord = tx_scratch(tx);
25577ec681f3Smrg    /*
25587ec681f3Smrg     * Bump-env-matrix:
25597ec681f3Smrg     * 00 is X
25607ec681f3Smrg     * 01 is Y
25617ec681f3Smrg     * 10 is Z
25627ec681f3Smrg     * 11 is W
25637ec681f3Smrg     */
25647ec681f3Smrg    c8m = nine_float_constant_src(tx, 8+m);
25657ec681f3Smrg    c16m2 = nine_float_constant_src(tx, 8+8+m/2);
25667ec681f3Smrg
25677ec681f3Smrg    m00 = NINE_APPLY_SWIZZLE(c8m, X);
25687ec681f3Smrg    m01 = NINE_APPLY_SWIZZLE(c8m, Y);
25697ec681f3Smrg    m10 = NINE_APPLY_SWIZZLE(c8m, Z);
25707ec681f3Smrg    m11 = NINE_APPLY_SWIZZLE(c8m, W);
25717ec681f3Smrg
25727ec681f3Smrg    /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
25737ec681f3Smrg    if (m % 2 == 0) {
25747ec681f3Smrg        bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X);
25757ec681f3Smrg        bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y);
25767ec681f3Smrg    } else {
25777ec681f3Smrg        bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z);
25787ec681f3Smrg        bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W);
25797ec681f3Smrg    }
25807ec681f3Smrg
25817ec681f3Smrg    apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
25827ec681f3Smrg
25837ec681f3Smrg    /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R  */
25847ec681f3Smrg    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
25857ec681f3Smrg             NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
25867ec681f3Smrg    /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
25877ec681f3Smrg    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
25887ec681f3Smrg             NINE_APPLY_SWIZZLE(src, Y),
25897ec681f3Smrg             NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
25907ec681f3Smrg
25917ec681f3Smrg    /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
25927ec681f3Smrg    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
25937ec681f3Smrg             NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
25947ec681f3Smrg    /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
25957ec681f3Smrg    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
25967ec681f3Smrg             NINE_APPLY_SWIZZLE(src, Y),
25977ec681f3Smrg             NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
25987ec681f3Smrg
25997ec681f3Smrg    /* Now the texture coordinates are in tmp.xy */
26007ec681f3Smrg
26017ec681f3Smrg    if (tx->insn.opcode == D3DSIO_TEXBEM) {
26027ec681f3Smrg        ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
26037ec681f3Smrg    } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
26047ec681f3Smrg        /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
26057ec681f3Smrg        ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
26067ec681f3Smrg        ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z),
26077ec681f3Smrg                 bumpenvlscale, bumpenvloffset);
26087ec681f3Smrg        ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
26097ec681f3Smrg    }
26107ec681f3Smrg
26117ec681f3Smrg    tx->info->bumpenvmat_needed = 1;
26127ec681f3Smrg
26137ec681f3Smrg    return D3D_OK;
26147ec681f3Smrg}
26157ec681f3Smrg
26167ec681f3SmrgDECL_SPECIAL(TEXREG2AR)
26177ec681f3Smrg{
26187ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
26197ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
26207ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
26217ec681f3Smrg    struct ureg_src sample;
26227ec681f3Smrg    const int m = tx->insn.dst[0].idx;
26237ec681f3Smrg    ASSERTED const int n = tx->insn.src[0].idx;
26247ec681f3Smrg    assert(m >= 0 && m > n);
26257ec681f3Smrg
26267ec681f3Smrg    sample = ureg_DECL_sampler(ureg, m);
26277ec681f3Smrg    tx->info->sampler_mask |= 1 << m;
26287ec681f3Smrg    ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample);
26297ec681f3Smrg
26307ec681f3Smrg    return D3D_OK;
26317ec681f3Smrg}
26327ec681f3Smrg
26337ec681f3SmrgDECL_SPECIAL(TEXREG2GB)
26347ec681f3Smrg{
26357ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
26367ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
26377ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
26387ec681f3Smrg    struct ureg_src sample;
26397ec681f3Smrg    const int m = tx->insn.dst[0].idx;
26407ec681f3Smrg    ASSERTED const int n = tx->insn.src[0].idx;
26417ec681f3Smrg    assert(m >= 0 && m > n);
26427ec681f3Smrg
26437ec681f3Smrg    sample = ureg_DECL_sampler(ureg, m);
26447ec681f3Smrg    tx->info->sampler_mask |= 1 << m;
26457ec681f3Smrg    ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
26467ec681f3Smrg
26477ec681f3Smrg    return D3D_OK;
26487ec681f3Smrg}
26497ec681f3Smrg
26507ec681f3SmrgDECL_SPECIAL(TEXM3x2PAD)
26517ec681f3Smrg{
26527ec681f3Smrg    return D3D_OK; /* this is just padding */
26537ec681f3Smrg}
26547ec681f3Smrg
26557ec681f3SmrgDECL_SPECIAL(TEXM3x2TEX)
26567ec681f3Smrg{
26577ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
26587ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
26597ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
26607ec681f3Smrg    struct ureg_src sample;
26617ec681f3Smrg    const int m = tx->insn.dst[0].idx - 1;
26627ec681f3Smrg    ASSERTED const int n = tx->insn.src[0].idx;
26637ec681f3Smrg    assert(m >= 0 && m > n);
26647ec681f3Smrg
26657ec681f3Smrg    tx_texcoord_alloc(tx, m);
26667ec681f3Smrg    tx_texcoord_alloc(tx, m+1);
26677ec681f3Smrg
26687ec681f3Smrg    /* performs the matrix multiplication */
26697ec681f3Smrg    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
26707ec681f3Smrg    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
26717ec681f3Smrg
26727ec681f3Smrg    sample = ureg_DECL_sampler(ureg, m + 1);
26737ec681f3Smrg    tx->info->sampler_mask |= 1 << (m + 1);
26747ec681f3Smrg    ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
26757ec681f3Smrg
26767ec681f3Smrg    return D3D_OK;
26777ec681f3Smrg}
26787ec681f3Smrg
26797ec681f3SmrgDECL_SPECIAL(TEXM3x3PAD)
26807ec681f3Smrg{
26817ec681f3Smrg    return D3D_OK; /* this is just padding */
26827ec681f3Smrg}
26837ec681f3Smrg
26847ec681f3SmrgDECL_SPECIAL(TEXM3x3SPEC)
26857ec681f3Smrg{
26867ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
26877ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
26887ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
26897ec681f3Smrg    struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
26907ec681f3Smrg    struct ureg_src sample;
26917ec681f3Smrg    struct ureg_dst tmp;
26927ec681f3Smrg    const int m = tx->insn.dst[0].idx - 2;
26937ec681f3Smrg    ASSERTED const int n = tx->insn.src[0].idx;
26947ec681f3Smrg    assert(m >= 0 && m > n);
26957ec681f3Smrg
26967ec681f3Smrg    tx_texcoord_alloc(tx, m);
26977ec681f3Smrg    tx_texcoord_alloc(tx, m+1);
26987ec681f3Smrg    tx_texcoord_alloc(tx, m+2);
26997ec681f3Smrg
27007ec681f3Smrg    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
27017ec681f3Smrg    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
27027ec681f3Smrg    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
27037ec681f3Smrg
27047ec681f3Smrg    sample = ureg_DECL_sampler(ureg, m + 2);
27057ec681f3Smrg    tx->info->sampler_mask |= 1 << (m + 2);
27067ec681f3Smrg    tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
27077ec681f3Smrg
27087ec681f3Smrg    /* At this step, dst = N = (u', w', z').
27097ec681f3Smrg     * We want dst to be the texture sampled at (u'', w'', z''), with
27107ec681f3Smrg     * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
27117ec681f3Smrg    ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
27127ec681f3Smrg    ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
27137ec681f3Smrg    /* at this step tmp.x = 1/N.N */
27147ec681f3Smrg    ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
27157ec681f3Smrg    /* at this step tmp.y = N.E */
27167ec681f3Smrg    ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
27177ec681f3Smrg    /* at this step tmp.x = N.E/N.N */
27187ec681f3Smrg    ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
27197ec681f3Smrg    ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
27207ec681f3Smrg    /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
27217ec681f3Smrg    ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
27227ec681f3Smrg    ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
27237ec681f3Smrg
27247ec681f3Smrg    return D3D_OK;
27257ec681f3Smrg}
27267ec681f3Smrg
27277ec681f3SmrgDECL_SPECIAL(TEXREG2RGB)
27287ec681f3Smrg{
27297ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
27307ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
27317ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
27327ec681f3Smrg    struct ureg_src sample;
27337ec681f3Smrg    const int m = tx->insn.dst[0].idx;
27347ec681f3Smrg    ASSERTED const int n = tx->insn.src[0].idx;
27357ec681f3Smrg    assert(m >= 0 && m > n);
27367ec681f3Smrg
27377ec681f3Smrg    sample = ureg_DECL_sampler(ureg, m);
27387ec681f3Smrg    tx->info->sampler_mask |= 1 << m;
27397ec681f3Smrg    ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample);
27407ec681f3Smrg
27417ec681f3Smrg    return D3D_OK;
27427ec681f3Smrg}
27437ec681f3Smrg
27447ec681f3SmrgDECL_SPECIAL(TEXDP3TEX)
27457ec681f3Smrg{
27467ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
27477ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
27487ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
27497ec681f3Smrg    struct ureg_dst tmp;
27507ec681f3Smrg    struct ureg_src sample;
27517ec681f3Smrg    const int m = tx->insn.dst[0].idx;
27527ec681f3Smrg    ASSERTED const int n = tx->insn.src[0].idx;
27537ec681f3Smrg    assert(m >= 0 && m > n);
27547ec681f3Smrg
27557ec681f3Smrg    tx_texcoord_alloc(tx, m);
27567ec681f3Smrg
27577ec681f3Smrg    tmp = tx_scratch(tx);
27587ec681f3Smrg    ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
27597ec681f3Smrg    ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
27607ec681f3Smrg
27617ec681f3Smrg    sample = ureg_DECL_sampler(ureg, m);
27627ec681f3Smrg    tx->info->sampler_mask |= 1 << m;
27637ec681f3Smrg    ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
27647ec681f3Smrg
27657ec681f3Smrg    return D3D_OK;
27667ec681f3Smrg}
27677ec681f3Smrg
27687ec681f3SmrgDECL_SPECIAL(TEXM3x2DEPTH)
27697ec681f3Smrg{
27707ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
27717ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
27727ec681f3Smrg    struct ureg_dst tmp;
27737ec681f3Smrg    const int m = tx->insn.dst[0].idx - 1;
27747ec681f3Smrg    ASSERTED const int n = tx->insn.src[0].idx;
27757ec681f3Smrg    assert(m >= 0 && m > n);
27767ec681f3Smrg
27777ec681f3Smrg    tx_texcoord_alloc(tx, m);
27787ec681f3Smrg    tx_texcoord_alloc(tx, m+1);
27797ec681f3Smrg
27807ec681f3Smrg    tmp = tx_scratch(tx);
27817ec681f3Smrg
27827ec681f3Smrg    /* performs the matrix multiplication */
27837ec681f3Smrg    ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
27847ec681f3Smrg    ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
27857ec681f3Smrg
27867ec681f3Smrg    ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
27877ec681f3Smrg    /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
27887ec681f3Smrg    ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
27897ec681f3Smrg    /* res = 'w' == 0 ? 1.0 : z/w */
27907ec681f3Smrg    ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
27917ec681f3Smrg             ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
27927ec681f3Smrg    /* replace the depth for depth testing with the result */
27937ec681f3Smrg    tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
27947ec681f3Smrg                                              TGSI_WRITEMASK_Z, 0, 1);
27957ec681f3Smrg    ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
27967ec681f3Smrg    /* note that we write nothing to the destination, since it's disallowed to use it afterward */
27977ec681f3Smrg    return D3D_OK;
27987ec681f3Smrg}
27997ec681f3Smrg
28007ec681f3SmrgDECL_SPECIAL(TEXDP3)
28017ec681f3Smrg{
28027ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
28037ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
28047ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
28057ec681f3Smrg    const int m = tx->insn.dst[0].idx;
28067ec681f3Smrg    ASSERTED const int n = tx->insn.src[0].idx;
28077ec681f3Smrg    assert(m >= 0 && m > n);
28087ec681f3Smrg
28097ec681f3Smrg    tx_texcoord_alloc(tx, m);
28107ec681f3Smrg
28117ec681f3Smrg    ureg_DP3(ureg, dst, tx->regs.vT[m], src);
28127ec681f3Smrg
28137ec681f3Smrg    return D3D_OK;
28147ec681f3Smrg}
28157ec681f3Smrg
28167ec681f3SmrgDECL_SPECIAL(TEXM3x3)
28177ec681f3Smrg{
28187ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
28197ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
28207ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
28217ec681f3Smrg    struct ureg_src sample;
28227ec681f3Smrg    struct ureg_dst E, tmp;
28237ec681f3Smrg    const int m = tx->insn.dst[0].idx - 2;
28247ec681f3Smrg    ASSERTED const int n = tx->insn.src[0].idx;
28257ec681f3Smrg    assert(m >= 0 && m > n);
28267ec681f3Smrg
28277ec681f3Smrg    tx_texcoord_alloc(tx, m);
28287ec681f3Smrg    tx_texcoord_alloc(tx, m+1);
28297ec681f3Smrg    tx_texcoord_alloc(tx, m+2);
28307ec681f3Smrg
28317ec681f3Smrg    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
28327ec681f3Smrg    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
28337ec681f3Smrg    ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
28347ec681f3Smrg
28357ec681f3Smrg    switch (tx->insn.opcode) {
28367ec681f3Smrg    case D3DSIO_TEXM3x3:
28377ec681f3Smrg        ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
28387ec681f3Smrg        break;
28397ec681f3Smrg    case D3DSIO_TEXM3x3TEX:
28407ec681f3Smrg        sample = ureg_DECL_sampler(ureg, m + 2);
28417ec681f3Smrg        tx->info->sampler_mask |= 1 << (m + 2);
28427ec681f3Smrg        ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
28437ec681f3Smrg        break;
28447ec681f3Smrg    case D3DSIO_TEXM3x3VSPEC:
28457ec681f3Smrg        sample = ureg_DECL_sampler(ureg, m + 2);
28467ec681f3Smrg        tx->info->sampler_mask |= 1 << (m + 2);
28477ec681f3Smrg        E = tx_scratch(tx);
28487ec681f3Smrg        tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
28497ec681f3Smrg        ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
28507ec681f3Smrg        ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
28517ec681f3Smrg        ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
28527ec681f3Smrg        /* At this step, dst = N = (u', w', z').
28537ec681f3Smrg         * We want dst to be the texture sampled at (u'', w'', z''), with
28547ec681f3Smrg         * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
28557ec681f3Smrg        ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
28567ec681f3Smrg        ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
28577ec681f3Smrg        /* at this step tmp.x = 1/N.N */
28587ec681f3Smrg        ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
28597ec681f3Smrg        /* at this step tmp.y = N.E */
28607ec681f3Smrg        ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
28617ec681f3Smrg        /* at this step tmp.x = N.E/N.N */
28627ec681f3Smrg        ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
28637ec681f3Smrg        ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
28647ec681f3Smrg        /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
28657ec681f3Smrg        ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
28667ec681f3Smrg        ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
28677ec681f3Smrg        break;
28687ec681f3Smrg    default:
28697ec681f3Smrg        return D3DERR_INVALIDCALL;
28707ec681f3Smrg    }
28717ec681f3Smrg    return D3D_OK;
28727ec681f3Smrg}
28737ec681f3Smrg
28747ec681f3SmrgDECL_SPECIAL(TEXDEPTH)
28757ec681f3Smrg{
28767ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
28777ec681f3Smrg    struct ureg_dst r5;
28787ec681f3Smrg    struct ureg_src r5r, r5g;
28797ec681f3Smrg
28807ec681f3Smrg    assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
28817ec681f3Smrg
28827ec681f3Smrg    /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
28837ec681f3Smrg     * r5 won't be used afterward, thus we can use r5.ba */
28847ec681f3Smrg    r5 = tx->regs.r[5];
28857ec681f3Smrg    r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
28867ec681f3Smrg    r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
28877ec681f3Smrg
28887ec681f3Smrg    ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
28897ec681f3Smrg    ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
28907ec681f3Smrg    /* r5.r = r/g */
28917ec681f3Smrg    ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
28927ec681f3Smrg             r5r, ureg_imm1f(ureg, 1.0f));
28937ec681f3Smrg    /* replace the depth for depth testing with the result */
28947ec681f3Smrg    tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
28957ec681f3Smrg                                              TGSI_WRITEMASK_Z, 0, 1);
28967ec681f3Smrg    ureg_MOV(ureg, tx->regs.oDepth, r5r);
28977ec681f3Smrg
28987ec681f3Smrg    return D3D_OK;
28997ec681f3Smrg}
29007ec681f3Smrg
29017ec681f3SmrgDECL_SPECIAL(BEM)
29027ec681f3Smrg{
29037ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
29047ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
29057ec681f3Smrg    struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
29067ec681f3Smrg    struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
29077ec681f3Smrg    struct ureg_src m00, m01, m10, m11, c8m;
29087ec681f3Smrg    const int m = tx->insn.dst[0].idx;
29097ec681f3Smrg    struct ureg_dst tmp = tx_scratch(tx);
29107ec681f3Smrg    /*
29117ec681f3Smrg     * Bump-env-matrix:
29127ec681f3Smrg     * 00 is X
29137ec681f3Smrg     * 01 is Y
29147ec681f3Smrg     * 10 is Z
29157ec681f3Smrg     * 11 is W
29167ec681f3Smrg     */
29177ec681f3Smrg    c8m = nine_float_constant_src(tx, 8+m);
29187ec681f3Smrg    m00 = NINE_APPLY_SWIZZLE(c8m, X);
29197ec681f3Smrg    m01 = NINE_APPLY_SWIZZLE(c8m, Y);
29207ec681f3Smrg    m10 = NINE_APPLY_SWIZZLE(c8m, Z);
29217ec681f3Smrg    m11 = NINE_APPLY_SWIZZLE(c8m, W);
29227ec681f3Smrg    /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r  */
29237ec681f3Smrg    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
29247ec681f3Smrg             NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
29257ec681f3Smrg    /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
29267ec681f3Smrg    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
29277ec681f3Smrg             NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
29287ec681f3Smrg
29297ec681f3Smrg    /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
29307ec681f3Smrg    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
29317ec681f3Smrg             NINE_APPLY_SWIZZLE(src1, X), src0);
29327ec681f3Smrg    /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
29337ec681f3Smrg    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
29347ec681f3Smrg             NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
29357ec681f3Smrg    ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
29367ec681f3Smrg
29377ec681f3Smrg    tx->info->bumpenvmat_needed = 1;
29387ec681f3Smrg
29397ec681f3Smrg    return D3D_OK;
29407ec681f3Smrg}
29417ec681f3Smrg
29427ec681f3SmrgDECL_SPECIAL(TEXLD)
29437ec681f3Smrg{
29447ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
29457ec681f3Smrg    unsigned target;
29467ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
29477ec681f3Smrg    struct ureg_src src[2] = {
29487ec681f3Smrg        tx_src_param(tx, &tx->insn.src[0]),
29497ec681f3Smrg        tx_src_param(tx, &tx->insn.src[1])
29507ec681f3Smrg    };
29517ec681f3Smrg    assert(tx->insn.src[1].idx >= 0 &&
29527ec681f3Smrg           tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
29537ec681f3Smrg    target = tx->sampler_targets[tx->insn.src[1].idx];
29547ec681f3Smrg
29557ec681f3Smrg    if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
29567ec681f3Smrg        return D3D_OK;
29577ec681f3Smrg
29587ec681f3Smrg    switch (tx->insn.flags) {
29597ec681f3Smrg    case 0:
29607ec681f3Smrg        ureg_TEX(ureg, dst, target, src[0], src[1]);
29617ec681f3Smrg        break;
29627ec681f3Smrg    case NINED3DSI_TEXLD_PROJECT:
29637ec681f3Smrg        ureg_TXP(ureg, dst, target, src[0], src[1]);
29647ec681f3Smrg        break;
29657ec681f3Smrg    case NINED3DSI_TEXLD_BIAS:
29667ec681f3Smrg        ureg_TXB(ureg, dst, target, src[0], src[1]);
29677ec681f3Smrg        break;
29687ec681f3Smrg    default:
29697ec681f3Smrg        assert(0);
29707ec681f3Smrg        return D3DERR_INVALIDCALL;
29717ec681f3Smrg    }
29727ec681f3Smrg    return D3D_OK;
29737ec681f3Smrg}
29747ec681f3Smrg
29757ec681f3SmrgDECL_SPECIAL(TEXLD_14)
29767ec681f3Smrg{
29777ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
29787ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
29797ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
29807ec681f3Smrg    const unsigned s = tx->insn.dst[0].idx;
29817ec681f3Smrg    const unsigned t = ps1x_sampler_type(tx->info, s);
29827ec681f3Smrg
29837ec681f3Smrg    tx->info->sampler_mask |= 1 << s;
29847ec681f3Smrg    ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
29857ec681f3Smrg
29867ec681f3Smrg    return D3D_OK;
29877ec681f3Smrg}
29887ec681f3Smrg
29897ec681f3SmrgDECL_SPECIAL(TEX)
29907ec681f3Smrg{
29917ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
29927ec681f3Smrg    const unsigned s = tx->insn.dst[0].idx;
29937ec681f3Smrg    const unsigned t = ps1x_sampler_type(tx->info, s);
29947ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
29957ec681f3Smrg    struct ureg_src src[2];
29967ec681f3Smrg
29977ec681f3Smrg    tx_texcoord_alloc(tx, s);
29987ec681f3Smrg
29997ec681f3Smrg    src[0] = tx->regs.vT[s];
30007ec681f3Smrg    src[1] = ureg_DECL_sampler(ureg, s);
30017ec681f3Smrg    tx->info->sampler_mask |= 1 << s;
30027ec681f3Smrg
30037ec681f3Smrg    TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
30047ec681f3Smrg
30057ec681f3Smrg    return D3D_OK;
30067ec681f3Smrg}
30077ec681f3Smrg
30087ec681f3SmrgDECL_SPECIAL(TEXLDD)
30097ec681f3Smrg{
30107ec681f3Smrg    unsigned target;
30117ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
30127ec681f3Smrg    struct ureg_src src[4] = {
30137ec681f3Smrg        tx_src_param(tx, &tx->insn.src[0]),
30147ec681f3Smrg        tx_src_param(tx, &tx->insn.src[1]),
30157ec681f3Smrg        tx_src_param(tx, &tx->insn.src[2]),
30167ec681f3Smrg        tx_src_param(tx, &tx->insn.src[3])
30177ec681f3Smrg    };
30187ec681f3Smrg    assert(tx->insn.src[1].idx >= 0 &&
30197ec681f3Smrg           tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
30207ec681f3Smrg    target = tx->sampler_targets[tx->insn.src[1].idx];
30217ec681f3Smrg
30227ec681f3Smrg    if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
30237ec681f3Smrg        return D3D_OK;
30247ec681f3Smrg
30257ec681f3Smrg    ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
30267ec681f3Smrg    return D3D_OK;
30277ec681f3Smrg}
30287ec681f3Smrg
30297ec681f3SmrgDECL_SPECIAL(TEXLDL)
30307ec681f3Smrg{
30317ec681f3Smrg    unsigned target;
30327ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
30337ec681f3Smrg    struct ureg_src src[2] = {
30347ec681f3Smrg       tx_src_param(tx, &tx->insn.src[0]),
30357ec681f3Smrg       tx_src_param(tx, &tx->insn.src[1])
30367ec681f3Smrg    };
30377ec681f3Smrg    assert(tx->insn.src[1].idx >= 0 &&
30387ec681f3Smrg           tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
30397ec681f3Smrg    target = tx->sampler_targets[tx->insn.src[1].idx];
30407ec681f3Smrg
30417ec681f3Smrg    if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
30427ec681f3Smrg        return D3D_OK;
30437ec681f3Smrg
30447ec681f3Smrg    ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
30457ec681f3Smrg    return D3D_OK;
30467ec681f3Smrg}
30477ec681f3Smrg
30487ec681f3SmrgDECL_SPECIAL(SETP)
30497ec681f3Smrg{
30507ec681f3Smrg    const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
30517ec681f3Smrg    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
30527ec681f3Smrg    struct ureg_src src[2] = {
30537ec681f3Smrg       tx_src_param(tx, &tx->insn.src[0]),
30547ec681f3Smrg       tx_src_param(tx, &tx->insn.src[1])
30557ec681f3Smrg    };
30567ec681f3Smrg    ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0);
30577ec681f3Smrg    return D3D_OK;
30587ec681f3Smrg}
30597ec681f3Smrg
30607ec681f3SmrgDECL_SPECIAL(BREAKP)
30617ec681f3Smrg{
30627ec681f3Smrg    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
30637ec681f3Smrg    ureg_IF(tx->ureg, src, tx_cond(tx));
30647ec681f3Smrg    ureg_BRK(tx->ureg);
30657ec681f3Smrg    tx_endcond(tx);
30667ec681f3Smrg    ureg_ENDIF(tx->ureg);
30677ec681f3Smrg    return D3D_OK;
30687ec681f3Smrg}
30697ec681f3Smrg
30707ec681f3SmrgDECL_SPECIAL(PHASE)
30717ec681f3Smrg{
30727ec681f3Smrg    return D3D_OK; /* we don't care about phase */
30737ec681f3Smrg}
30747ec681f3Smrg
30757ec681f3SmrgDECL_SPECIAL(COMMENT)
30767ec681f3Smrg{
30777ec681f3Smrg    return D3D_OK; /* nothing to do */
30787ec681f3Smrg}
30797ec681f3Smrg
30807ec681f3Smrg
30817ec681f3Smrg#define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
30827ec681f3Smrg    { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
30837ec681f3Smrg
30847ec681f3Smrgstatic const struct sm1_op_info inst_table[] =
30857ec681f3Smrg{
30867ec681f3Smrg    _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
30877ec681f3Smrg    _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
30887ec681f3Smrg    _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
30897ec681f3Smrg    _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
30907ec681f3Smrg    _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
30917ec681f3Smrg    _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
30927ec681f3Smrg    _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */
30937ec681f3Smrg    _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
30947ec681f3Smrg    _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
30957ec681f3Smrg    _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
30967ec681f3Smrg    _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
30977ec681f3Smrg    _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
30987ec681f3Smrg    _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
30997ec681f3Smrg    _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
31007ec681f3Smrg    _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
31017ec681f3Smrg    _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
31027ec681f3Smrg    _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
31037ec681f3Smrg    _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
31047ec681f3Smrg    _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
31057ec681f3Smrg    _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
31067ec681f3Smrg
31077ec681f3Smrg    _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
31087ec681f3Smrg    _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
31097ec681f3Smrg    _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
31107ec681f3Smrg    _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
31117ec681f3Smrg    _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
31127ec681f3Smrg
31137ec681f3Smrg    _OPI(CALL,    CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
31147ec681f3Smrg    _OPI(CALLNZ,  CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
31157ec681f3Smrg    _OPI(LOOP,    BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
31167ec681f3Smrg    _OPI(RET,     RET,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
31177ec681f3Smrg    _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
31187ec681f3Smrg    _OPI(LABEL,   NOP,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
31197ec681f3Smrg
31207ec681f3Smrg    _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
31217ec681f3Smrg
31227ec681f3Smrg    _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
31237ec681f3Smrg    _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
31247ec681f3Smrg    _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
31257ec681f3Smrg    _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
31267ec681f3Smrg    _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
31277ec681f3Smrg
31287ec681f3Smrg    _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
31297ec681f3Smrg    _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
31307ec681f3Smrg
31317ec681f3Smrg    /* More flow control */
31327ec681f3Smrg    _OPI(REP,    NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
31337ec681f3Smrg    _OPI(ENDREP, NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
31347ec681f3Smrg    _OPI(IF,     IF,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
31357ec681f3Smrg    _OPI(IFC,    IF,     V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
31367ec681f3Smrg    _OPI(ELSE,   ELSE,   V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
31377ec681f3Smrg    _OPI(ENDIF,  ENDIF,  V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
31387ec681f3Smrg    _OPI(BREAK,  BRK,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
31397ec681f3Smrg    _OPI(BREAKC, NOP,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
31407ec681f3Smrg    /* we don't write to the address register, but a normal register (copied
31417ec681f3Smrg     * when needed to the address register), thus we don't use ARR */
31427ec681f3Smrg    _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
31437ec681f3Smrg
31447ec681f3Smrg    _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
31457ec681f3Smrg    _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
31467ec681f3Smrg
31477ec681f3Smrg    _OPI(TEXCOORD,     NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
31487ec681f3Smrg    _OPI(TEXCOORD,     MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
31497ec681f3Smrg    _OPI(TEXKILL,      KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
31507ec681f3Smrg    _OPI(TEX,          TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
31517ec681f3Smrg    _OPI(TEX,          TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
31527ec681f3Smrg    _OPI(TEX,          TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
31537ec681f3Smrg    _OPI(TEXBEM,       TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
31547ec681f3Smrg    _OPI(TEXBEML,      TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
31557ec681f3Smrg    _OPI(TEXREG2AR,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
31567ec681f3Smrg    _OPI(TEXREG2GB,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
31577ec681f3Smrg    _OPI(TEXM3x2PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
31587ec681f3Smrg    _OPI(TEXM3x2TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
31597ec681f3Smrg    _OPI(TEXM3x3PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
31607ec681f3Smrg    _OPI(TEXM3x3TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
31617ec681f3Smrg    _OPI(TEXM3x3SPEC,  TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
31627ec681f3Smrg    _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
31637ec681f3Smrg
31647ec681f3Smrg    _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
31657ec681f3Smrg    _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
31667ec681f3Smrg    _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
31677ec681f3Smrg    _OPI(CND,  NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
31687ec681f3Smrg
31697ec681f3Smrg    _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
31707ec681f3Smrg
31717ec681f3Smrg    /* More tex stuff */
31727ec681f3Smrg    _OPI(TEXREG2RGB,   TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
31737ec681f3Smrg    _OPI(TEXDP3TEX,    TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
31747ec681f3Smrg    _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
31757ec681f3Smrg    _OPI(TEXDP3,       TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
31767ec681f3Smrg    _OPI(TEXM3x3,      TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
31777ec681f3Smrg    _OPI(TEXDEPTH,     TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
31787ec681f3Smrg
31797ec681f3Smrg    /* Misc */
31807ec681f3Smrg    _OPI(CMP,    CMP,  V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
31817ec681f3Smrg    _OPI(BEM,    NOP,  V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
31827ec681f3Smrg    _OPI(DP2ADD, NOP,  V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
31837ec681f3Smrg    _OPI(DSX,    DDX,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
31847ec681f3Smrg    _OPI(DSY,    DDY,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
31857ec681f3Smrg    _OPI(TEXLDD, TXD,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
31867ec681f3Smrg    _OPI(SETP,   NOP,  V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
31877ec681f3Smrg    _OPI(TEXLDL, TXL,  V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
31887ec681f3Smrg    _OPI(BREAKP, BRK,  V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
31897ec681f3Smrg};
31907ec681f3Smrg
31917ec681f3Smrgstatic const struct sm1_op_info inst_phase =
31927ec681f3Smrg    _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
31937ec681f3Smrg
31947ec681f3Smrgstatic const struct sm1_op_info inst_comment =
31957ec681f3Smrg    _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
31967ec681f3Smrg
31977ec681f3Smrgstatic void
31987ec681f3Smrgcreate_op_info_map(struct shader_translator *tx)
31997ec681f3Smrg{
32007ec681f3Smrg    const unsigned version = (tx->version.major << 8) | tx->version.minor;
32017ec681f3Smrg    unsigned i;
32027ec681f3Smrg
32037ec681f3Smrg    for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
32047ec681f3Smrg        tx->op_info_map[i] = -1;
32057ec681f3Smrg
32067ec681f3Smrg    if (tx->processor == PIPE_SHADER_VERTEX) {
32077ec681f3Smrg        for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
32087ec681f3Smrg            assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
32097ec681f3Smrg            if (inst_table[i].vert_version.min <= version &&
32107ec681f3Smrg                inst_table[i].vert_version.max >= version)
32117ec681f3Smrg                tx->op_info_map[inst_table[i].sio] = i;
32127ec681f3Smrg        }
32137ec681f3Smrg    } else {
32147ec681f3Smrg        for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
32157ec681f3Smrg            assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
32167ec681f3Smrg            if (inst_table[i].frag_version.min <= version &&
32177ec681f3Smrg                inst_table[i].frag_version.max >= version)
32187ec681f3Smrg                tx->op_info_map[inst_table[i].sio] = i;
32197ec681f3Smrg        }
32207ec681f3Smrg    }
32217ec681f3Smrg}
32227ec681f3Smrg
32237ec681f3Smrgstatic inline HRESULT
32247ec681f3SmrgNineTranslateInstruction_Generic(struct shader_translator *tx)
32257ec681f3Smrg{
32267ec681f3Smrg    struct ureg_dst dst[1];
32277ec681f3Smrg    struct ureg_src src[4];
32287ec681f3Smrg    unsigned i;
32297ec681f3Smrg
32307ec681f3Smrg    for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
32317ec681f3Smrg        dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
32327ec681f3Smrg    for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
32337ec681f3Smrg        src[i] = tx_src_param(tx, &tx->insn.src[i]);
32347ec681f3Smrg
32357ec681f3Smrg    ureg_insn(tx->ureg, tx->insn.info->opcode,
32367ec681f3Smrg              dst, tx->insn.ndst,
32377ec681f3Smrg              src, tx->insn.nsrc, 0);
32387ec681f3Smrg    return D3D_OK;
32397ec681f3Smrg}
32407ec681f3Smrg
32417ec681f3Smrgstatic inline DWORD
32427ec681f3SmrgTOKEN_PEEK(struct shader_translator *tx)
32437ec681f3Smrg{
32447ec681f3Smrg    return *(tx->parse);
32457ec681f3Smrg}
32467ec681f3Smrg
32477ec681f3Smrgstatic inline DWORD
32487ec681f3SmrgTOKEN_NEXT(struct shader_translator *tx)
32497ec681f3Smrg{
32507ec681f3Smrg    return *(tx->parse)++;
32517ec681f3Smrg}
32527ec681f3Smrg
32537ec681f3Smrgstatic inline void
32547ec681f3SmrgTOKEN_JUMP(struct shader_translator *tx)
32557ec681f3Smrg{
32567ec681f3Smrg    if (tx->parse_next && tx->parse != tx->parse_next) {
32577ec681f3Smrg        WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
32587ec681f3Smrg        tx->parse = tx->parse_next;
32597ec681f3Smrg    }
32607ec681f3Smrg}
32617ec681f3Smrg
32627ec681f3Smrgstatic inline boolean
32637ec681f3Smrgsm1_parse_eof(struct shader_translator *tx)
32647ec681f3Smrg{
32657ec681f3Smrg    return TOKEN_PEEK(tx) == NINED3DSP_END;
32667ec681f3Smrg}
32677ec681f3Smrg
32687ec681f3Smrgstatic void
32697ec681f3Smrgsm1_read_version(struct shader_translator *tx)
32707ec681f3Smrg{
32717ec681f3Smrg    const DWORD tok = TOKEN_NEXT(tx);
32727ec681f3Smrg
32737ec681f3Smrg    tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
32747ec681f3Smrg    tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
32757ec681f3Smrg
32767ec681f3Smrg    switch (tok >> 16) {
32777ec681f3Smrg    case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
32787ec681f3Smrg    case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
32797ec681f3Smrg    default:
32807ec681f3Smrg       DBG("Invalid shader type: %x\n", tok);
32817ec681f3Smrg       tx->processor = ~0;
32827ec681f3Smrg       break;
32837ec681f3Smrg    }
32847ec681f3Smrg}
32857ec681f3Smrg
32867ec681f3Smrg/* This is just to check if we parsed the instruction properly. */
32877ec681f3Smrgstatic void
32887ec681f3Smrgsm1_parse_get_skip(struct shader_translator *tx)
32897ec681f3Smrg{
32907ec681f3Smrg    const DWORD tok = TOKEN_PEEK(tx);
32917ec681f3Smrg
32927ec681f3Smrg    if (tx->version.major >= 2) {
32937ec681f3Smrg        tx->parse_next = tx->parse + 1 /* this */ +
32947ec681f3Smrg            ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
32957ec681f3Smrg    } else {
32967ec681f3Smrg        tx->parse_next = NULL; /* TODO: determine from param count */
32977ec681f3Smrg    }
32987ec681f3Smrg}
32997ec681f3Smrg
33007ec681f3Smrgstatic void
33017ec681f3Smrgsm1_print_comment(const char *comment, UINT size)
33027ec681f3Smrg{
33037ec681f3Smrg    if (!size)
33047ec681f3Smrg        return;
33057ec681f3Smrg    /* TODO */
33067ec681f3Smrg}
33077ec681f3Smrg
33087ec681f3Smrgstatic void
33097ec681f3Smrgsm1_parse_comments(struct shader_translator *tx, BOOL print)
33107ec681f3Smrg{
33117ec681f3Smrg    DWORD tok = TOKEN_PEEK(tx);
33127ec681f3Smrg
33137ec681f3Smrg    while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
33147ec681f3Smrg    {
33157ec681f3Smrg        const char *comment = "";
33167ec681f3Smrg        UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
33177ec681f3Smrg        tx->parse += size + 1;
33187ec681f3Smrg
33197ec681f3Smrg        if (print)
33207ec681f3Smrg            sm1_print_comment(comment, size);
33217ec681f3Smrg
33227ec681f3Smrg        tok = TOKEN_PEEK(tx);
33237ec681f3Smrg    }
33247ec681f3Smrg}
33257ec681f3Smrg
33267ec681f3Smrgstatic void
33277ec681f3Smrgsm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
33287ec681f3Smrg{
33297ec681f3Smrg    *reg = TOKEN_NEXT(tx);
33307ec681f3Smrg
33317ec681f3Smrg    if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
33327ec681f3Smrg    {
33337ec681f3Smrg        if (tx->version.major < 2)
33347ec681f3Smrg            *rel = (1 << 31) |
33357ec681f3Smrg                ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
33367ec681f3Smrg                ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT)  & D3DSP_REGTYPE_MASK) |
33377ec681f3Smrg                D3DSP_NOSWIZZLE;
33387ec681f3Smrg        else
33397ec681f3Smrg            *rel = TOKEN_NEXT(tx);
33407ec681f3Smrg    }
33417ec681f3Smrg}
33427ec681f3Smrg
33437ec681f3Smrgstatic void
33447ec681f3Smrgsm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
33457ec681f3Smrg{
33467ec681f3Smrg    int8_t shift;
33477ec681f3Smrg    dst->file =
33487ec681f3Smrg        (tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT |
33497ec681f3Smrg        (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
33507ec681f3Smrg    dst->type = TGSI_RETURN_TYPE_FLOAT;
33517ec681f3Smrg    dst->idx = tok & D3DSP_REGNUM_MASK;
33527ec681f3Smrg    dst->rel = NULL;
33537ec681f3Smrg    dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
33547ec681f3Smrg    dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
33557ec681f3Smrg    shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
33567ec681f3Smrg    dst->shift = (shift & 0x7) - (shift & 0x8);
33577ec681f3Smrg}
33587ec681f3Smrg
33597ec681f3Smrgstatic void
33607ec681f3Smrgsm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
33617ec681f3Smrg{
33627ec681f3Smrg    src->file =
33637ec681f3Smrg        ((tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT) |
33647ec681f3Smrg        ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
33657ec681f3Smrg    src->type = TGSI_RETURN_TYPE_FLOAT;
33667ec681f3Smrg    src->idx = tok & D3DSP_REGNUM_MASK;
33677ec681f3Smrg    src->rel = NULL;
33687ec681f3Smrg    src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
33697ec681f3Smrg    src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
33707ec681f3Smrg
33717ec681f3Smrg    switch (src->file) {
33727ec681f3Smrg    case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
33737ec681f3Smrg    case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
33747ec681f3Smrg    case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
33757ec681f3Smrg    default:
33767ec681f3Smrg        break;
33777ec681f3Smrg    }
33787ec681f3Smrg}
33797ec681f3Smrg
33807ec681f3Smrgstatic void
33817ec681f3Smrgsm1_parse_immediate(struct shader_translator *tx,
33827ec681f3Smrg                    struct sm1_src_param *imm)
33837ec681f3Smrg{
33847ec681f3Smrg    imm->file = NINED3DSPR_IMMEDIATE;
33857ec681f3Smrg    imm->idx = INT_MIN;
33867ec681f3Smrg    imm->rel = NULL;
33877ec681f3Smrg    imm->swizzle = NINED3DSP_NOSWIZZLE;
33887ec681f3Smrg    imm->mod = 0;
33897ec681f3Smrg    switch (tx->insn.opcode) {
33907ec681f3Smrg    case D3DSIO_DEF:
33917ec681f3Smrg        imm->type = NINED3DSPTYPE_FLOAT4;
33927ec681f3Smrg        memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
33937ec681f3Smrg        tx->parse += 4;
33947ec681f3Smrg        break;
33957ec681f3Smrg    case D3DSIO_DEFI:
33967ec681f3Smrg        imm->type = NINED3DSPTYPE_INT4;
33977ec681f3Smrg        memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
33987ec681f3Smrg        tx->parse += 4;
33997ec681f3Smrg        break;
34007ec681f3Smrg    case D3DSIO_DEFB:
34017ec681f3Smrg        imm->type = NINED3DSPTYPE_BOOL;
34027ec681f3Smrg        memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
34037ec681f3Smrg        tx->parse += 1;
34047ec681f3Smrg        break;
34057ec681f3Smrg    default:
34067ec681f3Smrg       assert(0);
34077ec681f3Smrg       break;
34087ec681f3Smrg    }
34097ec681f3Smrg}
34107ec681f3Smrg
34117ec681f3Smrgstatic void
34127ec681f3Smrgsm1_read_dst_param(struct shader_translator *tx,
34137ec681f3Smrg                   struct sm1_dst_param *dst,
34147ec681f3Smrg                   struct sm1_src_param *rel)
34157ec681f3Smrg{
34167ec681f3Smrg    DWORD tok_dst, tok_rel = 0;
34177ec681f3Smrg
34187ec681f3Smrg    sm1_parse_get_param(tx, &tok_dst, &tok_rel);
34197ec681f3Smrg    sm1_parse_dst_param(dst, tok_dst);
34207ec681f3Smrg    if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
34217ec681f3Smrg        sm1_parse_src_param(rel, tok_rel);
34227ec681f3Smrg        dst->rel = rel;
34237ec681f3Smrg    }
34247ec681f3Smrg}
34257ec681f3Smrg
34267ec681f3Smrgstatic void
34277ec681f3Smrgsm1_read_src_param(struct shader_translator *tx,
34287ec681f3Smrg                   struct sm1_src_param *src,
34297ec681f3Smrg                   struct sm1_src_param *rel)
34307ec681f3Smrg{
34317ec681f3Smrg    DWORD tok_src, tok_rel = 0;
34327ec681f3Smrg
34337ec681f3Smrg    sm1_parse_get_param(tx, &tok_src, &tok_rel);
34347ec681f3Smrg    sm1_parse_src_param(src, tok_src);
34357ec681f3Smrg    if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
34367ec681f3Smrg        assert(rel);
34377ec681f3Smrg        sm1_parse_src_param(rel, tok_rel);
34387ec681f3Smrg        src->rel = rel;
34397ec681f3Smrg    }
34407ec681f3Smrg}
34417ec681f3Smrg
34427ec681f3Smrgstatic void
34437ec681f3Smrgsm1_read_semantic(struct shader_translator *tx,
34447ec681f3Smrg                  struct sm1_semantic *sem)
34457ec681f3Smrg{
34467ec681f3Smrg    const DWORD tok_usg = TOKEN_NEXT(tx);
34477ec681f3Smrg    const DWORD tok_dst = TOKEN_NEXT(tx);
34487ec681f3Smrg
34497ec681f3Smrg    sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
34507ec681f3Smrg    sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
34517ec681f3Smrg    sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
34527ec681f3Smrg
34537ec681f3Smrg    sm1_parse_dst_param(&sem->reg, tok_dst);
34547ec681f3Smrg}
34557ec681f3Smrg
34567ec681f3Smrgstatic void
34577ec681f3Smrgsm1_parse_instruction(struct shader_translator *tx)
34587ec681f3Smrg{
34597ec681f3Smrg    struct sm1_instruction *insn = &tx->insn;
34607ec681f3Smrg    HRESULT hr;
34617ec681f3Smrg    DWORD tok;
34627ec681f3Smrg    const struct sm1_op_info *info = NULL;
34637ec681f3Smrg    unsigned i;
34647ec681f3Smrg
34657ec681f3Smrg    sm1_parse_comments(tx, TRUE);
34667ec681f3Smrg    sm1_parse_get_skip(tx);
34677ec681f3Smrg
34687ec681f3Smrg    tok = TOKEN_NEXT(tx);
34697ec681f3Smrg
34707ec681f3Smrg    insn->opcode = tok & D3DSI_OPCODE_MASK;
34717ec681f3Smrg    insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
34727ec681f3Smrg    insn->coissue = !!(tok & D3DSI_COISSUE);
34737ec681f3Smrg    insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
34747ec681f3Smrg
34757ec681f3Smrg    if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
34767ec681f3Smrg        int k = tx->op_info_map[insn->opcode];
34777ec681f3Smrg        if (k >= 0) {
34787ec681f3Smrg            assert(k < ARRAY_SIZE(inst_table));
34797ec681f3Smrg            info = &inst_table[k];
34807ec681f3Smrg        }
34817ec681f3Smrg    } else {
34827ec681f3Smrg       if (insn->opcode == D3DSIO_PHASE)   info = &inst_phase;
34837ec681f3Smrg       if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
34847ec681f3Smrg    }
34857ec681f3Smrg    if (!info) {
34867ec681f3Smrg       DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
34877ec681f3Smrg       TOKEN_JUMP(tx);
34887ec681f3Smrg       return;
34897ec681f3Smrg    }
34907ec681f3Smrg    insn->info = info;
34917ec681f3Smrg    insn->ndst = info->ndst;
34927ec681f3Smrg    insn->nsrc = info->nsrc;
34937ec681f3Smrg
34947ec681f3Smrg    /* check version */
34957ec681f3Smrg    {
34967ec681f3Smrg        unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
34977ec681f3Smrg        unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
34987ec681f3Smrg        unsigned ver = (tx->version.major << 8) | tx->version.minor;
34997ec681f3Smrg        if (ver < min || ver > max) {
35007ec681f3Smrg            DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
35017ec681f3Smrg                min, ver, max);
35027ec681f3Smrg            return;
35037ec681f3Smrg        }
35047ec681f3Smrg    }
35057ec681f3Smrg
35067ec681f3Smrg    for (i = 0; i < insn->ndst; ++i)
35077ec681f3Smrg        sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
35087ec681f3Smrg    if (insn->predicated)
35097ec681f3Smrg        sm1_read_src_param(tx, &insn->pred, NULL);
35107ec681f3Smrg    for (i = 0; i < insn->nsrc; ++i)
35117ec681f3Smrg        sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
35127ec681f3Smrg
35137ec681f3Smrg    /* parse here so we can dump them before processing */
35147ec681f3Smrg    if (insn->opcode == D3DSIO_DEF ||
35157ec681f3Smrg        insn->opcode == D3DSIO_DEFI ||
35167ec681f3Smrg        insn->opcode == D3DSIO_DEFB)
35177ec681f3Smrg        sm1_parse_immediate(tx, &tx->insn.src[0]);
35187ec681f3Smrg
35197ec681f3Smrg    sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
35207ec681f3Smrg    sm1_instruction_check(insn);
35217ec681f3Smrg
35227ec681f3Smrg    if (insn->predicated) {
35237ec681f3Smrg        tx->predicated_activated = true;
35247ec681f3Smrg        if (ureg_dst_is_undef(tx->regs.predicate_tmp)) {
35257ec681f3Smrg            tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg);
35267ec681f3Smrg            tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg);
35277ec681f3Smrg        }
35287ec681f3Smrg    }
35297ec681f3Smrg
35307ec681f3Smrg    if (info->handler)
35317ec681f3Smrg        hr = info->handler(tx);
35327ec681f3Smrg    else
35337ec681f3Smrg        hr = NineTranslateInstruction_Generic(tx);
35347ec681f3Smrg    tx_apply_dst0_modifiers(tx);
35357ec681f3Smrg
35367ec681f3Smrg    if (insn->predicated) {
35377ec681f3Smrg        tx->predicated_activated = false;
35387ec681f3Smrg        /* TODO: predicate might be allowed on outputs,
35397ec681f3Smrg         * which cannot be src. Workaround it. */
35407ec681f3Smrg        ureg_CMP(tx->ureg, tx->regs.predicate_dst,
35417ec681f3Smrg                 ureg_negate(tx_src_param(tx, &insn->pred)),
35427ec681f3Smrg                 ureg_src(tx->regs.predicate_tmp),
35437ec681f3Smrg                 ureg_src(tx->regs.predicate_dst));
35447ec681f3Smrg    }
35457ec681f3Smrg
35467ec681f3Smrg    if (hr != D3D_OK)
35477ec681f3Smrg        tx->failure = TRUE;
35487ec681f3Smrg    tx->num_scratch = 0; /* reset */
35497ec681f3Smrg
35507ec681f3Smrg    TOKEN_JUMP(tx);
35517ec681f3Smrg}
35527ec681f3Smrg
35537ec681f3Smrg#define GET_CAP(n) screen->get_param( \
35547ec681f3Smrg      screen, PIPE_CAP_##n)
35557ec681f3Smrg#define GET_SHADER_CAP(n) screen->get_shader_param( \
35567ec681f3Smrg      screen, info->type, PIPE_SHADER_CAP_##n)
35577ec681f3Smrg
35587ec681f3Smrgstatic HRESULT
35597ec681f3Smrgtx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info)
35607ec681f3Smrg{
35617ec681f3Smrg    unsigned i;
35627ec681f3Smrg
35637ec681f3Smrg    memset(tx, 0, sizeof(*tx));
35647ec681f3Smrg
35657ec681f3Smrg    tx->info = info;
35667ec681f3Smrg
35677ec681f3Smrg    tx->byte_code = info->byte_code;
35687ec681f3Smrg    tx->parse = info->byte_code;
35697ec681f3Smrg
35707ec681f3Smrg    for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
35717ec681f3Smrg        info->input_map[i] = NINE_DECLUSAGE_NONE;
35727ec681f3Smrg    info->num_inputs = 0;
35737ec681f3Smrg
35747ec681f3Smrg    info->position_t = FALSE;
35757ec681f3Smrg    info->point_size = FALSE;
35767ec681f3Smrg
35777ec681f3Smrg    memset(tx->slots_used, 0, sizeof(tx->slots_used));
35787ec681f3Smrg    memset(info->int_slots_used, 0, sizeof(info->int_slots_used));
35797ec681f3Smrg    memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used));
35807ec681f3Smrg
35817ec681f3Smrg    tx->info->const_float_slots = 0;
35827ec681f3Smrg    tx->info->const_int_slots = 0;
35837ec681f3Smrg    tx->info->const_bool_slots = 0;
35847ec681f3Smrg
35857ec681f3Smrg    info->sampler_mask = 0x0;
35867ec681f3Smrg    info->rt_mask = 0x0;
35877ec681f3Smrg
35887ec681f3Smrg    info->lconstf.data = NULL;
35897ec681f3Smrg    info->lconstf.ranges = NULL;
35907ec681f3Smrg
35917ec681f3Smrg    info->bumpenvmat_needed = 0;
35927ec681f3Smrg
35937ec681f3Smrg    for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
35947ec681f3Smrg        tx->regs.rL[i] = ureg_dst_undef();
35957ec681f3Smrg    }
35967ec681f3Smrg    tx->regs.address = ureg_dst_undef();
35977ec681f3Smrg    tx->regs.a0 = ureg_dst_undef();
35987ec681f3Smrg    tx->regs.p = ureg_dst_undef();
35997ec681f3Smrg    tx->regs.oDepth = ureg_dst_undef();
36007ec681f3Smrg    tx->regs.vPos = ureg_src_undef();
36017ec681f3Smrg    tx->regs.vFace = ureg_src_undef();
36027ec681f3Smrg    for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
36037ec681f3Smrg        tx->regs.o[i] = ureg_dst_undef();
36047ec681f3Smrg    for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
36057ec681f3Smrg        tx->regs.oCol[i] = ureg_dst_undef();
36067ec681f3Smrg    for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
36077ec681f3Smrg        tx->regs.vC[i] = ureg_src_undef();
36087ec681f3Smrg    for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
36097ec681f3Smrg        tx->regs.vT[i] = ureg_src_undef();
36107ec681f3Smrg
36117ec681f3Smrg    sm1_read_version(tx);
36127ec681f3Smrg
36137ec681f3Smrg    info->version = (tx->version.major << 4) | tx->version.minor;
36147ec681f3Smrg
36157ec681f3Smrg    tx->num_outputs = 0;
36167ec681f3Smrg
36177ec681f3Smrg    create_op_info_map(tx);
36187ec681f3Smrg
36197ec681f3Smrg    tx->ureg = ureg_create(info->type);
36207ec681f3Smrg    if (!tx->ureg) {
36217ec681f3Smrg        return E_OUTOFMEMORY;
36227ec681f3Smrg    }
36237ec681f3Smrg
36247ec681f3Smrg    tx->native_integers = GET_SHADER_CAP(INTEGERS);
36257ec681f3Smrg    tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
36267ec681f3Smrg    tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
36277ec681f3Smrg    tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
36287ec681f3Smrg    tx->texcoord_sn = tx->want_texcoord ?
36297ec681f3Smrg        TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
36307ec681f3Smrg    tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
36317ec681f3Smrg    tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
36327ec681f3Smrg
36337ec681f3Smrg    if (IS_VS) {
36347ec681f3Smrg        tx->num_constf_allowed = NINE_MAX_CONST_F;
36357ec681f3Smrg    } else if (tx->version.major < 2) {/* IS_PS v1 */
36367ec681f3Smrg        tx->num_constf_allowed = 8;
36377ec681f3Smrg    } else if (tx->version.major == 2) {/* IS_PS v2 */
36387ec681f3Smrg        tx->num_constf_allowed = 32;
36397ec681f3Smrg    } else {/* IS_PS v3 */
36407ec681f3Smrg        tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
36417ec681f3Smrg    }
36427ec681f3Smrg
36437ec681f3Smrg    if (tx->version.major < 2) {
36447ec681f3Smrg        tx->num_consti_allowed = 0;
36457ec681f3Smrg        tx->num_constb_allowed = 0;
36467ec681f3Smrg    } else {
36477ec681f3Smrg        tx->num_consti_allowed = NINE_MAX_CONST_I;
36487ec681f3Smrg        tx->num_constb_allowed = NINE_MAX_CONST_B;
36497ec681f3Smrg    }
36507ec681f3Smrg
36517ec681f3Smrg    if (info->swvp_on) {
36527ec681f3Smrg        /* TODO: The values tx->version.major == 1 */
36537ec681f3Smrg        tx->num_constf_allowed = 8192;
36547ec681f3Smrg        tx->num_consti_allowed = 2048;
36557ec681f3Smrg        tx->num_constb_allowed = 2048;
36567ec681f3Smrg    }
36577ec681f3Smrg
36587ec681f3Smrg    /* VS must always write position. Declare it here to make it the 1st output.
36597ec681f3Smrg     * (Some drivers like nv50 are buggy and rely on that.)
36607ec681f3Smrg     */
36617ec681f3Smrg    if (IS_VS) {
36627ec681f3Smrg        tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
36637ec681f3Smrg    } else {
36647ec681f3Smrg        ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
36657ec681f3Smrg        if (!tx->shift_wpos)
36667ec681f3Smrg            ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
36677ec681f3Smrg    }
36687ec681f3Smrg
36697ec681f3Smrg    tx->mul_zero_wins = GET_CAP(TGSI_MUL_ZERO_WINS);
36707ec681f3Smrg    if (tx->mul_zero_wins)
36717ec681f3Smrg       ureg_property(tx->ureg, TGSI_PROPERTY_MUL_ZERO_WINS, 1);
36727ec681f3Smrg
36737ec681f3Smrg    /* Add additional definition of constants */
36747ec681f3Smrg    if (info->add_constants_defs.c_combination) {
36757ec681f3Smrg        unsigned i;
36767ec681f3Smrg
36777ec681f3Smrg        assert(info->add_constants_defs.int_const_added);
36787ec681f3Smrg        assert(info->add_constants_defs.bool_const_added);
36797ec681f3Smrg        /* We only add constants that are used by the shader
36807ec681f3Smrg         * and that are not defined in the shader */
36817ec681f3Smrg        for (i = 0; i < NINE_MAX_CONST_I; ++i) {
36827ec681f3Smrg            if ((*info->add_constants_defs.int_const_added)[i]) {
36837ec681f3Smrg                DBG("Defining const i%i : { %i %i %i %i }\n", i,
36847ec681f3Smrg                    info->add_constants_defs.c_combination->const_i[i][0],
36857ec681f3Smrg                    info->add_constants_defs.c_combination->const_i[i][1],
36867ec681f3Smrg                    info->add_constants_defs.c_combination->const_i[i][2],
36877ec681f3Smrg                    info->add_constants_defs.c_combination->const_i[i][3]);
36887ec681f3Smrg                tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]);
36897ec681f3Smrg            }
36907ec681f3Smrg        }
36917ec681f3Smrg        for (i = 0; i < NINE_MAX_CONST_B; ++i) {
36927ec681f3Smrg            if ((*info->add_constants_defs.bool_const_added)[i]) {
36937ec681f3Smrg                DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0));
36947ec681f3Smrg                tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]);
36957ec681f3Smrg            }
36967ec681f3Smrg        }
36977ec681f3Smrg    }
36987ec681f3Smrg    return D3D_OK;
36997ec681f3Smrg}
37007ec681f3Smrg
37017ec681f3Smrgstatic void
37027ec681f3Smrgtx_dtor(struct shader_translator *tx)
37037ec681f3Smrg{
37047ec681f3Smrg    if (tx->slot_map)
37057ec681f3Smrg        FREE(tx->slot_map);
37067ec681f3Smrg    if (tx->num_inst_labels)
37077ec681f3Smrg        FREE(tx->inst_labels);
37087ec681f3Smrg    FREE(tx->lconstf);
37097ec681f3Smrg    FREE(tx->regs.r);
37107ec681f3Smrg    FREE(tx);
37117ec681f3Smrg}
37127ec681f3Smrg
37137ec681f3Smrg/* CONST[0].xyz = width/2, -height/2, zmax-zmin
37147ec681f3Smrg * CONST[1].xyz = x+width/2, y+height/2, zmin */
37157ec681f3Smrgstatic void
37167ec681f3Smrgshader_add_vs_viewport_transform(struct shader_translator *tx)
37177ec681f3Smrg{
37187ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
37197ec681f3Smrg    struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0);
37207ec681f3Smrg    struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1);
37217ec681f3Smrg    /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
37227ec681f3Smrg
37237ec681f3Smrg    c0 = ureg_src_dimension(c0, 4);
37247ec681f3Smrg    c1 = ureg_src_dimension(c1, 4);
37257ec681f3Smrg    /* TODO: find out when we need to apply the viewport transformation or not.
37267ec681f3Smrg     * Likely will be XYZ vs XYZRHW in vdecl_out
37277ec681f3Smrg     * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
37287ec681f3Smrg     * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
37297ec681f3Smrg     */
37307ec681f3Smrg    ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
37317ec681f3Smrg}
37327ec681f3Smrg
37337ec681f3Smrgstatic void
37347ec681f3Smrgshader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
37357ec681f3Smrg{
37367ec681f3Smrg    struct ureg_program *ureg = tx->ureg;
37377ec681f3Smrg    struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
37387ec681f3Smrg    struct ureg_src fog_end, fog_coeff, fog_density, fog_params;
37397ec681f3Smrg    struct ureg_src fog_vs, fog_color;
37407ec681f3Smrg    struct ureg_dst fog_factor, depth;
37417ec681f3Smrg
37427ec681f3Smrg    if (!tx->info->fog_enable) {
37437ec681f3Smrg        ureg_MOV(ureg, oCol0, src_col);
37447ec681f3Smrg        return;
37457ec681f3Smrg    }
37467ec681f3Smrg
37477ec681f3Smrg    if (tx->info->fog_mode != D3DFOG_NONE) {
37487ec681f3Smrg        depth = tx_scratch_scalar(tx);
37497ec681f3Smrg        /* Depth used for fog is perspective interpolated */
37507ec681f3Smrg        ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W));
37517ec681f3Smrg        ureg_MUL(ureg, depth, ureg_src(depth), ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z));
37527ec681f3Smrg    }
37537ec681f3Smrg
37547ec681f3Smrg    fog_color = nine_float_constant_src(tx, 32);
37557ec681f3Smrg    fog_params = nine_float_constant_src(tx, 33);
37567ec681f3Smrg    fog_factor = tx_scratch_scalar(tx);
37577ec681f3Smrg
37587ec681f3Smrg    if (tx->info->fog_mode == D3DFOG_LINEAR) {
37597ec681f3Smrg        fog_end = NINE_APPLY_SWIZZLE(fog_params, X);
37607ec681f3Smrg        fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y);
37617ec681f3Smrg        ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth)));
37627ec681f3Smrg        ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
37637ec681f3Smrg    } else if (tx->info->fog_mode == D3DFOG_EXP) {
37647ec681f3Smrg        fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
37657ec681f3Smrg        ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
37667ec681f3Smrg        ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
37677ec681f3Smrg        ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
37687ec681f3Smrg    } else if (tx->info->fog_mode == D3DFOG_EXP2) {
37697ec681f3Smrg        fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
37707ec681f3Smrg        ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
37717ec681f3Smrg        ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
37727ec681f3Smrg        ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
37737ec681f3Smrg        ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
37747ec681f3Smrg    } else {
37757ec681f3Smrg        fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16,
37767ec681f3Smrg                                            TGSI_INTERPOLATE_PERSPECTIVE),
37777ec681f3Smrg                                            TGSI_SWIZZLE_X);
37787ec681f3Smrg        ureg_MOV(ureg, fog_factor, fog_vs);
37797ec681f3Smrg    }
37807ec681f3Smrg
37817ec681f3Smrg    ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
37827ec681f3Smrg             tx_src_scalar(fog_factor), src_col, fog_color);
37837ec681f3Smrg    ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
37847ec681f3Smrg}
37857ec681f3Smrg
37867ec681f3Smrgstatic void parse_shader(struct shader_translator *tx)
37877ec681f3Smrg{
37887ec681f3Smrg    struct nine_shader_info *info = tx->info;
37897ec681f3Smrg
37907ec681f3Smrg    while (!sm1_parse_eof(tx) && !tx->failure)
37917ec681f3Smrg        sm1_parse_instruction(tx);
37927ec681f3Smrg    tx->parse++; /* for byte_size */
37937ec681f3Smrg
37947ec681f3Smrg    if (tx->failure)
37957ec681f3Smrg        return;
37967ec681f3Smrg
37977ec681f3Smrg    if (IS_PS && tx->version.major < 3) {
37987ec681f3Smrg        if (tx->version.major < 2) {
37997ec681f3Smrg            assert(tx->num_temp); /* there must be color output */
38007ec681f3Smrg            info->rt_mask |= 0x1;
38017ec681f3Smrg            shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
38027ec681f3Smrg        } else {
38037ec681f3Smrg            shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
38047ec681f3Smrg        }
38057ec681f3Smrg    }
38067ec681f3Smrg
38077ec681f3Smrg    if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
38087ec681f3Smrg        tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16);
38097ec681f3Smrg        ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
38107ec681f3Smrg    }
38117ec681f3Smrg
38127ec681f3Smrg    if (info->position_t)
38137ec681f3Smrg        ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
38147ec681f3Smrg
38157ec681f3Smrg    if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
38167ec681f3Smrg        struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
38177ec681f3Smrg        ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
38187ec681f3Smrg        ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
38197ec681f3Smrg        info->point_size = TRUE;
38207ec681f3Smrg    }
38217ec681f3Smrg
38227ec681f3Smrg    if (info->process_vertices)
38237ec681f3Smrg        shader_add_vs_viewport_transform(tx);
38247ec681f3Smrg
38257ec681f3Smrg    ureg_END(tx->ureg);
38267ec681f3Smrg}
38277ec681f3Smrg
38287ec681f3Smrg#define NINE_SHADER_DEBUG_OPTION_NIR_VS           (1 << 0)
38297ec681f3Smrg#define NINE_SHADER_DEBUG_OPTION_NIR_PS           (1 << 1)
38307ec681f3Smrg#define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS        (1 << 2)
38317ec681f3Smrg#define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS        (1 << 3)
38327ec681f3Smrg#define NINE_SHADER_DEBUG_OPTION_DUMP_NIR         (1 << 4)
38337ec681f3Smrg#define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI        (1 << 5)
38347ec681f3Smrg
38357ec681f3Smrgstatic const struct debug_named_value nine_shader_debug_options[] = {
38367ec681f3Smrg    { "nir_vs", NINE_SHADER_DEBUG_OPTION_NIR_VS, "Use NIR for vertex shaders even if the driver doesn't prefer it." },
38377ec681f3Smrg    { "nir_ps", NINE_SHADER_DEBUG_OPTION_NIR_PS, "Use NIR for pixel shaders even if the driver doesn't prefer it." },
38387ec681f3Smrg    { "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS, "Never use NIR for vertex shaders even if the driver prefers it." },
38397ec681f3Smrg    { "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS, "Never use NIR for pixel shaders even if the driver prefers it." },
38407ec681f3Smrg    { "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR, "Print translated NIR shaders." },
38417ec681f3Smrg    { "dump_tgsi", NINE_SHADER_DEBUG_OPTION_DUMP_TGSI, "Print TGSI shaders." },
38427ec681f3Smrg    DEBUG_NAMED_VALUE_END /* must be last */
38437ec681f3Smrg};
38447ec681f3Smrg
38457ec681f3Smrgstatic inline boolean
38467ec681f3Smrgnine_shader_get_debug_flag(uint64_t flag)
38477ec681f3Smrg{
38487ec681f3Smrg    static uint64_t flags = 0;
38497ec681f3Smrg    static boolean first_run = TRUE;
38507ec681f3Smrg
38517ec681f3Smrg    if (unlikely(first_run)) {
38527ec681f3Smrg        first_run = FALSE;
38537ec681f3Smrg        flags = debug_get_flags_option("NINE_SHADER", nine_shader_debug_options, 0);
38547ec681f3Smrg
38557ec681f3Smrg        // Check old TGSI dump envvar too
38567ec681f3Smrg        if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
38577ec681f3Smrg            flags |= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI;
38587ec681f3Smrg        }
38597ec681f3Smrg    }
38607ec681f3Smrg
38617ec681f3Smrg    return !!(flags & flag);
38627ec681f3Smrg}
38637ec681f3Smrg
38647ec681f3Smrgstatic void
38657ec681f3Smrgnine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state *state, const struct tgsi_token *tgsi_tokens,
38667ec681f3Smrg                                     struct pipe_screen *screen)
38677ec681f3Smrg{
38687ec681f3Smrg    struct nir_shader *nir = tgsi_to_nir(tgsi_tokens, screen, screen->get_disk_shader_cache != NULL);
38697ec681f3Smrg
38707ec681f3Smrg    if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_NIR))) {
38717ec681f3Smrg        nir_print_shader(nir, stdout);
38727ec681f3Smrg    }
38737ec681f3Smrg
38747ec681f3Smrg    state->type = PIPE_SHADER_IR_NIR;
38757ec681f3Smrg    state->tokens = NULL;
38767ec681f3Smrg    state->ir.nir = nir;
38777ec681f3Smrg    memset(&state->stream_output, 0, sizeof(state->stream_output));
38787ec681f3Smrg}
38797ec681f3Smrg
38807ec681f3Smrgstatic void *
38817ec681f3Smrgnine_ureg_create_shader(struct ureg_program                  *ureg,
38827ec681f3Smrg                        struct pipe_context                  *pipe,
38837ec681f3Smrg                        const struct pipe_stream_output_info   *so)
38847ec681f3Smrg{
38857ec681f3Smrg    struct pipe_shader_state state;
38867ec681f3Smrg    const struct tgsi_token *tgsi_tokens;
38877ec681f3Smrg    struct pipe_screen *screen = pipe->screen;
38887ec681f3Smrg
38897ec681f3Smrg    tgsi_tokens = ureg_finalize(ureg);
38907ec681f3Smrg    if (!tgsi_tokens)
38917ec681f3Smrg        return NULL;
38927ec681f3Smrg
38937ec681f3Smrg    assert(((struct tgsi_header *) &tgsi_tokens[0])->HeaderSize >= 2);
38947ec681f3Smrg    enum pipe_shader_type shader_type = ((struct tgsi_processor *) &tgsi_tokens[1])->Processor;
38957ec681f3Smrg
38967ec681f3Smrg    int preferred_ir = screen->get_shader_param(screen, shader_type, PIPE_SHADER_CAP_PREFERRED_IR);
38977ec681f3Smrg    bool prefer_nir = (preferred_ir == PIPE_SHADER_IR_NIR);
38987ec681f3Smrg    bool use_nir = prefer_nir ||
38997ec681f3Smrg        ((shader_type == PIPE_SHADER_VERTEX) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_VS)) ||
39007ec681f3Smrg        ((shader_type == PIPE_SHADER_FRAGMENT) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_PS));
39017ec681f3Smrg
39027ec681f3Smrg    /* Allow user to override preferred IR, this is very useful for debugging */
39037ec681f3Smrg    if (unlikely(shader_type == PIPE_SHADER_VERTEX && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS)))
39047ec681f3Smrg        use_nir = false;
39057ec681f3Smrg    if (unlikely(shader_type == PIPE_SHADER_FRAGMENT && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS)))
39067ec681f3Smrg        use_nir = false;
39077ec681f3Smrg
39087ec681f3Smrg    DUMP("shader type: %s, preferred IR: %s, selected IR: %s\n",
39097ec681f3Smrg         shader_type == PIPE_SHADER_VERTEX ? "VS" : "PS",
39107ec681f3Smrg         prefer_nir ? "NIR" : "TGSI",
39117ec681f3Smrg         use_nir ? "NIR" : "TGSI");
39127ec681f3Smrg
39137ec681f3Smrg    if (use_nir) {
39147ec681f3Smrg        nine_pipe_nir_shader_state_from_tgsi(&state, tgsi_tokens, screen);
39157ec681f3Smrg    } else {
39167ec681f3Smrg        pipe_shader_state_from_tgsi(&state, tgsi_tokens);
39177ec681f3Smrg    }
39187ec681f3Smrg
39197ec681f3Smrg    assert(state.tokens || state.ir.nir);
39207ec681f3Smrg
39217ec681f3Smrg    if (so)
39227ec681f3Smrg        state.stream_output = *so;
39237ec681f3Smrg
39247ec681f3Smrg    switch (shader_type) {
39257ec681f3Smrg    case PIPE_SHADER_VERTEX:
39267ec681f3Smrg        return pipe->create_vs_state(pipe, &state);
39277ec681f3Smrg    case PIPE_SHADER_FRAGMENT:
39287ec681f3Smrg        return pipe->create_fs_state(pipe, &state);
39297ec681f3Smrg    default:
39307ec681f3Smrg        unreachable("unsupported shader type");
39317ec681f3Smrg    }
39327ec681f3Smrg}
39337ec681f3Smrg
39347ec681f3Smrg
39357ec681f3Smrgvoid *
39367ec681f3Smrgnine_create_shader_with_so_and_destroy(struct ureg_program                   *p,
39377ec681f3Smrg                                       struct pipe_context                *pipe,
39387ec681f3Smrg                                       const struct pipe_stream_output_info *so)
39397ec681f3Smrg{
39407ec681f3Smrg    void *result = nine_ureg_create_shader(p, pipe, so);
39417ec681f3Smrg    ureg_destroy(p);
39427ec681f3Smrg    return result;
39437ec681f3Smrg}
39447ec681f3Smrg
39457ec681f3SmrgHRESULT
39467ec681f3Smrgnine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
39477ec681f3Smrg{
39487ec681f3Smrg    struct shader_translator *tx;
39497ec681f3Smrg    HRESULT hr = D3D_OK;
39507ec681f3Smrg    const unsigned processor = info->type;
39517ec681f3Smrg    struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
39527ec681f3Smrg    unsigned *const_ranges = NULL;
39537ec681f3Smrg
39547ec681f3Smrg    user_assert(processor != ~0, D3DERR_INVALIDCALL);
39557ec681f3Smrg
39567ec681f3Smrg    tx = MALLOC_STRUCT(shader_translator);
39577ec681f3Smrg    if (!tx)
39587ec681f3Smrg        return E_OUTOFMEMORY;
39597ec681f3Smrg
39607ec681f3Smrg    if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
39617ec681f3Smrg        hr = E_OUTOFMEMORY;
39627ec681f3Smrg        goto out;
39637ec681f3Smrg    }
39647ec681f3Smrg
39657ec681f3Smrg    assert(IS_VS || !info->swvp_on);
39667ec681f3Smrg
39677ec681f3Smrg    if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
39687ec681f3Smrg        hr = D3DERR_INVALIDCALL;
39697ec681f3Smrg        DBG("Unsupported shader version: %u.%u !\n",
39707ec681f3Smrg            tx->version.major, tx->version.minor);
39717ec681f3Smrg        goto out;
39727ec681f3Smrg    }
39737ec681f3Smrg    if (tx->processor != processor) {
39747ec681f3Smrg        hr = D3DERR_INVALIDCALL;
39757ec681f3Smrg        DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
39767ec681f3Smrg        goto out;
39777ec681f3Smrg    }
39787ec681f3Smrg    DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
39797ec681f3Smrg         tx->version.major, tx->version.minor);
39807ec681f3Smrg
39817ec681f3Smrg    parse_shader(tx);
39827ec681f3Smrg
39837ec681f3Smrg    if (tx->failure) {
39847ec681f3Smrg        /* For VS shaders, we print the warning later,
39857ec681f3Smrg         * we first try with swvp. */
39867ec681f3Smrg        if (IS_PS)
39877ec681f3Smrg            ERR("Encountered buggy shader\n");
39887ec681f3Smrg        ureg_destroy(tx->ureg);
39897ec681f3Smrg        hr = D3DERR_INVALIDCALL;
39907ec681f3Smrg        goto out;
39917ec681f3Smrg    }
39927ec681f3Smrg
39937ec681f3Smrg    /* Recompile after compacting constant slots if possible */
39947ec681f3Smrg    if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0) {
39957ec681f3Smrg        unsigned *slot_map;
39967ec681f3Smrg        unsigned c;
39977ec681f3Smrg        int i, j, num_ranges, prev;
39987ec681f3Smrg
39997ec681f3Smrg        DBG("Recompiling shader for constant compaction\n");
40007ec681f3Smrg        ureg_destroy(tx->ureg);
40017ec681f3Smrg
40027ec681f3Smrg        if (tx->num_inst_labels)
40037ec681f3Smrg            FREE(tx->inst_labels);
40047ec681f3Smrg        FREE(tx->lconstf);
40057ec681f3Smrg        FREE(tx->regs.r);
40067ec681f3Smrg
40077ec681f3Smrg        num_ranges = 0;
40087ec681f3Smrg        prev = -2;
40097ec681f3Smrg        for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
40107ec681f3Smrg            if (tx->slots_used[i]) {
40117ec681f3Smrg                if (prev != i - 1)
40127ec681f3Smrg                    num_ranges++;
40137ec681f3Smrg                prev = i;
40147ec681f3Smrg            }
40157ec681f3Smrg        }
40167ec681f3Smrg        slot_map = MALLOC(NINE_MAX_CONST_ALL * sizeof(unsigned));
40177ec681f3Smrg        const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */
40187ec681f3Smrg        if (!slot_map || !const_ranges) {
40197ec681f3Smrg            hr = E_OUTOFMEMORY;
40207ec681f3Smrg            goto out;
40217ec681f3Smrg        }
40227ec681f3Smrg        c = 0;
40237ec681f3Smrg        j = -1;
40247ec681f3Smrg        prev = -2;
40257ec681f3Smrg        for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
40267ec681f3Smrg            if (tx->slots_used[i]) {
40277ec681f3Smrg                if (prev != i - 1)
40287ec681f3Smrg                    j++;
40297ec681f3Smrg                /* Initialize first slot of the range */
40307ec681f3Smrg                if (!const_ranges[2*j+1])
40317ec681f3Smrg                    const_ranges[2*j] = i;
40327ec681f3Smrg                const_ranges[2*j+1]++;
40337ec681f3Smrg                prev = i;
40347ec681f3Smrg                slot_map[i] = c++;
40357ec681f3Smrg            }
40367ec681f3Smrg        }
40377ec681f3Smrg
40387ec681f3Smrg        if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
40397ec681f3Smrg            hr = E_OUTOFMEMORY;
40407ec681f3Smrg            goto out;
40417ec681f3Smrg        }
40427ec681f3Smrg        tx->slot_map = slot_map;
40437ec681f3Smrg        parse_shader(tx);
40447ec681f3Smrg        assert(!tx->failure);
40457ec681f3Smrg#if !defined(NDEBUG)
40467ec681f3Smrg        i = 0;
40477ec681f3Smrg        j = 0;
40487ec681f3Smrg        while (const_ranges[i*2+1] != 0) {
40497ec681f3Smrg            j += const_ranges[i*2+1];
40507ec681f3Smrg            i++;
40517ec681f3Smrg        }
40527ec681f3Smrg        assert(j == tx->num_slots);
40537ec681f3Smrg#endif
40547ec681f3Smrg    }
40557ec681f3Smrg
40567ec681f3Smrg    /* record local constants */
40577ec681f3Smrg    if (tx->num_lconstf && tx->indirect_const_access) {
40587ec681f3Smrg        struct nine_range *ranges;
40597ec681f3Smrg        float *data;
40607ec681f3Smrg        int *indices;
40617ec681f3Smrg        unsigned i, k, n;
40627ec681f3Smrg
40637ec681f3Smrg        hr = E_OUTOFMEMORY;
40647ec681f3Smrg
40657ec681f3Smrg        data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
40667ec681f3Smrg        if (!data)
40677ec681f3Smrg            goto out;
40687ec681f3Smrg        info->lconstf.data = data;
40697ec681f3Smrg
40707ec681f3Smrg        indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
40717ec681f3Smrg        if (!indices)
40727ec681f3Smrg            goto out;
40737ec681f3Smrg
40747ec681f3Smrg        /* lazy sort, num_lconstf should be small */
40757ec681f3Smrg        for (n = 0; n < tx->num_lconstf; ++n) {
40767ec681f3Smrg            for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
40777ec681f3Smrg                if (tx->lconstf[i].idx < tx->lconstf[k].idx)
40787ec681f3Smrg                    k = i;
40797ec681f3Smrg            }
40807ec681f3Smrg            indices[n] = tx->lconstf[k].idx;
40817ec681f3Smrg            memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
40827ec681f3Smrg            tx->lconstf[k].idx = INT_MAX;
40837ec681f3Smrg        }
40847ec681f3Smrg
40857ec681f3Smrg        /* count ranges */
40867ec681f3Smrg        for (n = 1, i = 1; i < tx->num_lconstf; ++i)
40877ec681f3Smrg            if (indices[i] != indices[i - 1] + 1)
40887ec681f3Smrg                ++n;
40897ec681f3Smrg        ranges = MALLOC(n * sizeof(ranges[0]));
40907ec681f3Smrg        if (!ranges) {
40917ec681f3Smrg            FREE(indices);
40927ec681f3Smrg            goto out;
40937ec681f3Smrg        }
40947ec681f3Smrg        info->lconstf.ranges = ranges;
40957ec681f3Smrg
40967ec681f3Smrg        k = 0;
40977ec681f3Smrg        ranges[k].bgn = indices[0];
40987ec681f3Smrg        for (i = 1; i < tx->num_lconstf; ++i) {
40997ec681f3Smrg            if (indices[i] != indices[i - 1] + 1) {
41007ec681f3Smrg                ranges[k].next = &ranges[k + 1];
41017ec681f3Smrg                ranges[k].end = indices[i - 1] + 1;
41027ec681f3Smrg                ++k;
41037ec681f3Smrg                ranges[k].bgn = indices[i];
41047ec681f3Smrg            }
41057ec681f3Smrg        }
41067ec681f3Smrg        ranges[k].end = indices[i - 1] + 1;
41077ec681f3Smrg        ranges[k].next = NULL;
41087ec681f3Smrg        assert(n == (k + 1));
41097ec681f3Smrg
41107ec681f3Smrg        FREE(indices);
41117ec681f3Smrg        hr = D3D_OK;
41127ec681f3Smrg    }
41137ec681f3Smrg
41147ec681f3Smrg    /* r500 */
41157ec681f3Smrg    if (info->const_float_slots > device->max_vs_const_f &&
41167ec681f3Smrg        (info->const_int_slots || info->const_bool_slots) &&
41177ec681f3Smrg        !info->swvp_on)
41187ec681f3Smrg        ERR("Overlapping constant slots. The shader is likely to be buggy\n");
41197ec681f3Smrg
41207ec681f3Smrg
41217ec681f3Smrg    if (tx->indirect_const_access) { /* vs only */
41227ec681f3Smrg        info->const_float_slots = device->max_vs_const_f;
41237ec681f3Smrg        tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f);
41247ec681f3Smrg    }
41257ec681f3Smrg
41267ec681f3Smrg    if (!info->swvp_on) {
41277ec681f3Smrg        info->const_used_size = sizeof(float[4]) * tx->num_slots;
41287ec681f3Smrg        if (tx->num_slots)
41297ec681f3Smrg            ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0);
41307ec681f3Smrg    } else {
41317ec681f3Smrg         ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
41327ec681f3Smrg         ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
41337ec681f3Smrg         ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
41347ec681f3Smrg         ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
41357ec681f3Smrg    }
41367ec681f3Smrg
41377ec681f3Smrg    if (info->process_vertices)
41387ec681f3Smrg        ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
41397ec681f3Smrg
41407ec681f3Smrg    if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_TGSI))) {
41417ec681f3Smrg        const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
41427ec681f3Smrg        tgsi_dump(toks, 0);
41437ec681f3Smrg        ureg_free_tokens(toks);
41447ec681f3Smrg    }
41457ec681f3Smrg
41467ec681f3Smrg    if (info->process_vertices) {
41477ec681f3Smrg        NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
41487ec681f3Smrg                                                    tx->output_info,
41497ec681f3Smrg                                                    tx->num_outputs,
41507ec681f3Smrg                                                    &(info->so));
41517ec681f3Smrg        info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
41527ec681f3Smrg    } else
41537ec681f3Smrg        info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, NULL);
41547ec681f3Smrg    if (!info->cso) {
41557ec681f3Smrg        hr = D3DERR_DRIVERINTERNALERROR;
41567ec681f3Smrg        FREE(info->lconstf.data);
41577ec681f3Smrg        FREE(info->lconstf.ranges);
41587ec681f3Smrg        goto out;
41597ec681f3Smrg    }
41607ec681f3Smrg
41617ec681f3Smrg    info->const_ranges = const_ranges;
41627ec681f3Smrg    const_ranges = NULL;
41637ec681f3Smrg    info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
41647ec681f3Smrgout:
41657ec681f3Smrg    if (const_ranges)
41667ec681f3Smrg        FREE(const_ranges);
41677ec681f3Smrg    tx_dtor(tx);
41687ec681f3Smrg    return hr;
41697ec681f3Smrg}
4170