17ec681f3Smrg 27ec681f3Smrg/* FF is big and ugly so feel free to write lines as long as you like. 37ec681f3Smrg * Aieeeeeeeee ! 47ec681f3Smrg * 57ec681f3Smrg * Let me make that clearer: 67ec681f3Smrg * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!! 77ec681f3Smrg */ 87ec681f3Smrg 97ec681f3Smrg#include "device9.h" 107ec681f3Smrg#include "basetexture9.h" 117ec681f3Smrg#include "vertexdeclaration9.h" 127ec681f3Smrg#include "vertexshader9.h" 137ec681f3Smrg#include "pixelshader9.h" 147ec681f3Smrg#include "nine_ff.h" 157ec681f3Smrg#include "nine_defines.h" 167ec681f3Smrg#include "nine_helpers.h" 177ec681f3Smrg#include "nine_pipe.h" 187ec681f3Smrg#include "nine_dump.h" 197ec681f3Smrg 207ec681f3Smrg#include "pipe/p_context.h" 217ec681f3Smrg#include "tgsi/tgsi_ureg.h" 227ec681f3Smrg#include "tgsi/tgsi_dump.h" 237ec681f3Smrg#include "util/u_box.h" 247ec681f3Smrg#include "util/u_hash_table.h" 257ec681f3Smrg#include "util/u_upload_mgr.h" 267ec681f3Smrg 277ec681f3Smrg#define DBG_CHANNEL DBG_FF 287ec681f3Smrg 297ec681f3Smrg#define NINE_FF_NUM_VS_CONST 196 307ec681f3Smrg#define NINE_FF_NUM_PS_CONST 24 317ec681f3Smrg 327ec681f3Smrgstruct fvec4 337ec681f3Smrg{ 347ec681f3Smrg float x, y, z, w; 357ec681f3Smrg}; 367ec681f3Smrg 377ec681f3Smrgstruct nine_ff_vs_key 387ec681f3Smrg{ 397ec681f3Smrg union { 407ec681f3Smrg struct { 417ec681f3Smrg uint32_t position_t : 1; 427ec681f3Smrg uint32_t lighting : 1; 437ec681f3Smrg uint32_t darkness : 1; /* lighting enabled but no active lights */ 447ec681f3Smrg uint32_t localviewer : 1; 457ec681f3Smrg uint32_t vertexpointsize : 1; 467ec681f3Smrg uint32_t pointscale : 1; 477ec681f3Smrg uint32_t vertexblend : 3; 487ec681f3Smrg uint32_t vertexblend_indexed : 1; 497ec681f3Smrg uint32_t vertextween : 1; 507ec681f3Smrg uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */ 517ec681f3Smrg uint32_t mtl_ambient : 2; 527ec681f3Smrg uint32_t mtl_specular : 2; 537ec681f3Smrg uint32_t mtl_emissive : 2; 547ec681f3Smrg uint32_t fog_mode : 2; 557ec681f3Smrg uint32_t fog_range : 1; 567ec681f3Smrg uint32_t color0in_one : 1; 577ec681f3Smrg uint32_t color1in_zero : 1; 587ec681f3Smrg uint32_t has_normal : 1; 597ec681f3Smrg uint32_t fog : 1; 607ec681f3Smrg uint32_t normalizenormals : 1; 617ec681f3Smrg uint32_t ucp : 1; 627ec681f3Smrg uint32_t pad1 : 4; 637ec681f3Smrg uint32_t tc_dim_input: 16; /* 8 * 2 bits */ 647ec681f3Smrg uint32_t pad2 : 16; 657ec681f3Smrg uint32_t tc_dim_output: 24; /* 8 * 3 bits */ 667ec681f3Smrg uint32_t pad3 : 8; 677ec681f3Smrg uint32_t tc_gen : 24; /* 8 * 3 bits */ 687ec681f3Smrg uint32_t pad4 : 8; 697ec681f3Smrg uint32_t tc_idx : 24; 707ec681f3Smrg uint32_t pad5 : 8; 717ec681f3Smrg uint32_t passthrough; 727ec681f3Smrg }; 737ec681f3Smrg uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */ 747ec681f3Smrg uint32_t value32[6]; 757ec681f3Smrg }; 767ec681f3Smrg}; 777ec681f3Smrg 787ec681f3Smrg/* Texture stage state: 797ec681f3Smrg * 807ec681f3Smrg * COLOROP D3DTOP 5 bit 817ec681f3Smrg * ALPHAOP D3DTOP 5 bit 827ec681f3Smrg * COLORARG0 D3DTA 3 bit 837ec681f3Smrg * COLORARG1 D3DTA 3 bit 847ec681f3Smrg * COLORARG2 D3DTA 3 bit 857ec681f3Smrg * ALPHAARG0 D3DTA 3 bit 867ec681f3Smrg * ALPHAARG1 D3DTA 3 bit 877ec681f3Smrg * ALPHAARG2 D3DTA 3 bit 887ec681f3Smrg * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1) 897ec681f3Smrg * TEXCOORDINDEX 0 - 7 3 bit 907ec681f3Smrg * =========================== 917ec681f3Smrg * 32 bit per stage 927ec681f3Smrg */ 937ec681f3Smrgstruct nine_ff_ps_key 947ec681f3Smrg{ 957ec681f3Smrg union { 967ec681f3Smrg struct { 977ec681f3Smrg struct { 987ec681f3Smrg uint32_t colorop : 5; 997ec681f3Smrg uint32_t alphaop : 5; 1007ec681f3Smrg uint32_t colorarg0 : 3; 1017ec681f3Smrg uint32_t colorarg1 : 3; 1027ec681f3Smrg uint32_t colorarg2 : 3; 1037ec681f3Smrg uint32_t alphaarg0 : 3; 1047ec681f3Smrg uint32_t alphaarg1 : 3; 1057ec681f3Smrg uint32_t alphaarg2 : 3; 1067ec681f3Smrg uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */ 1077ec681f3Smrg uint32_t textarget : 2; /* 1D/2D/3D/CUBE */ 1087ec681f3Smrg uint32_t pad : 1; 1097ec681f3Smrg /* that's 32 bit exactly */ 1107ec681f3Smrg } ts[8]; 1117ec681f3Smrg uint32_t projected : 16; 1127ec681f3Smrg uint32_t fog : 1; /* for vFog coming from VS */ 1137ec681f3Smrg uint32_t fog_mode : 2; 1147ec681f3Smrg uint32_t fog_source : 1; /* 0: Z, 1: W */ 1157ec681f3Smrg uint32_t specular : 1; 1167ec681f3Smrg uint32_t pad1 : 11; /* 9 32-bit words with this */ 1177ec681f3Smrg uint8_t colorarg_b4[3]; 1187ec681f3Smrg uint8_t colorarg_b5[3]; 1197ec681f3Smrg uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */ 1207ec681f3Smrg uint8_t pad2[3]; 1217ec681f3Smrg }; 1227ec681f3Smrg uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */ 1237ec681f3Smrg uint32_t value32[12]; 1247ec681f3Smrg }; 1257ec681f3Smrg}; 1267ec681f3Smrg 1277ec681f3Smrgstatic uint32_t nine_ff_vs_key_hash(const void *key) 1287ec681f3Smrg{ 1297ec681f3Smrg const struct nine_ff_vs_key *vs = key; 1307ec681f3Smrg unsigned i; 1317ec681f3Smrg uint32_t hash = vs->value32[0]; 1327ec681f3Smrg for (i = 1; i < ARRAY_SIZE(vs->value32); ++i) 1337ec681f3Smrg hash ^= vs->value32[i]; 1347ec681f3Smrg return hash; 1357ec681f3Smrg} 1367ec681f3Smrgstatic bool nine_ff_vs_key_comp(const void *key1, const void *key2) 1377ec681f3Smrg{ 1387ec681f3Smrg struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1; 1397ec681f3Smrg struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2; 1407ec681f3Smrg 1417ec681f3Smrg return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0; 1427ec681f3Smrg} 1437ec681f3Smrgstatic uint32_t nine_ff_ps_key_hash(const void *key) 1447ec681f3Smrg{ 1457ec681f3Smrg const struct nine_ff_ps_key *ps = key; 1467ec681f3Smrg unsigned i; 1477ec681f3Smrg uint32_t hash = ps->value32[0]; 1487ec681f3Smrg for (i = 1; i < ARRAY_SIZE(ps->value32); ++i) 1497ec681f3Smrg hash ^= ps->value32[i]; 1507ec681f3Smrg return hash; 1517ec681f3Smrg} 1527ec681f3Smrgstatic bool nine_ff_ps_key_comp(const void *key1, const void *key2) 1537ec681f3Smrg{ 1547ec681f3Smrg struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1; 1557ec681f3Smrg struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2; 1567ec681f3Smrg 1577ec681f3Smrg return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0; 1587ec681f3Smrg} 1597ec681f3Smrgstatic uint32_t nine_ff_fvf_key_hash(const void *key) 1607ec681f3Smrg{ 1617ec681f3Smrg return *(DWORD *)key; 1627ec681f3Smrg} 1637ec681f3Smrgstatic bool nine_ff_fvf_key_comp(const void *key1, const void *key2) 1647ec681f3Smrg{ 1657ec681f3Smrg return *(DWORD *)key1 == *(DWORD *)key2; 1667ec681f3Smrg} 1677ec681f3Smrg 1687ec681f3Smrgstatic void nine_ff_prune_vs(struct NineDevice9 *); 1697ec681f3Smrgstatic void nine_ff_prune_ps(struct NineDevice9 *); 1707ec681f3Smrg 1717ec681f3Smrgstatic void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override) 1727ec681f3Smrg{ 1737ec681f3Smrg if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) { 1747ec681f3Smrg const struct tgsi_token *toks = ureg_get_tokens(ureg, NULL); 1757ec681f3Smrg tgsi_dump(toks, 0); 1767ec681f3Smrg ureg_free_tokens(toks); 1777ec681f3Smrg } 1787ec681f3Smrg} 1797ec681f3Smrg 1807ec681f3Smrg#define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X) 1817ec681f3Smrg#define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y) 1827ec681f3Smrg#define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z) 1837ec681f3Smrg#define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W) 1847ec681f3Smrg 1857ec681f3Smrg#define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X) 1867ec681f3Smrg#define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y) 1877ec681f3Smrg#define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z) 1887ec681f3Smrg#define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W) 1897ec681f3Smrg 1907ec681f3Smrg#define _XYZW(r) (r) 1917ec681f3Smrg 1927ec681f3Smrg/* AL should contain base address of lights table. */ 1937ec681f3Smrg#define LIGHT_CONST(i) \ 1947ec681f3Smrg ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL)) 1957ec681f3Smrg 1967ec681f3Smrg#define MATERIAL_CONST(i) \ 1977ec681f3Smrg ureg_DECL_constant(ureg, 19 + (i)) 1987ec681f3Smrg 1997ec681f3Smrg#define _CONST(n) ureg_DECL_constant(ureg, n) 2007ec681f3Smrg 2017ec681f3Smrg/* VS FF constants layout: 2027ec681f3Smrg * 2037ec681f3Smrg * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION 2047ec681f3Smrg * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW 2057ec681f3Smrg * CONST[ 8..11] D3DTS_PROJECTION 2067ec681f3Smrg * CONST[12..15] D3DTS_VIEW^(-1) 2077ec681f3Smrg * CONST[16..18] Normal matrix 2087ec681f3Smrg * 2097ec681f3Smrg * CONST[19].xyz MATERIAL.Emissive + Material.Ambient * RS.Ambient 2107ec681f3Smrg * CONST[20] MATERIAL.Diffuse 2117ec681f3Smrg * CONST[21] MATERIAL.Ambient 2127ec681f3Smrg * CONST[22] MATERIAL.Specular 2137ec681f3Smrg * CONST[23].x___ MATERIAL.Power 2147ec681f3Smrg * CONST[24] MATERIAL.Emissive 2157ec681f3Smrg * CONST[25] RS.Ambient 2167ec681f3Smrg * 2177ec681f3Smrg * CONST[26].x___ RS.PointSizeMin 2187ec681f3Smrg * CONST[26]._y__ RS.PointSizeMax 2197ec681f3Smrg * CONST[26].__z_ RS.PointSize 2207ec681f3Smrg * CONST[26].___w RS.PointScaleA 2217ec681f3Smrg * CONST[27].x___ RS.PointScaleB 2227ec681f3Smrg * CONST[27]._y__ RS.PointScaleC 2237ec681f3Smrg * 2247ec681f3Smrg * CONST[28].x___ RS.FogEnd 2257ec681f3Smrg * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart) 2267ec681f3Smrg * CONST[28].__z_ RS.FogDensity 2277ec681f3Smrg 2287ec681f3Smrg * CONST[30].x___ TWEENFACTOR 2297ec681f3Smrg * 2307ec681f3Smrg * CONST[32].x___ LIGHT[0].Type 2317ec681f3Smrg * CONST[32]._yzw LIGHT[0].Attenuation0,1,2 2327ec681f3Smrg * CONST[33] LIGHT[0].Diffuse 2337ec681f3Smrg * CONST[34] LIGHT[0].Specular 2347ec681f3Smrg * CONST[35] LIGHT[0].Ambient 2357ec681f3Smrg * CONST[36].xyz_ LIGHT[0].Position 2367ec681f3Smrg * CONST[36].___w LIGHT[0].Range 2377ec681f3Smrg * CONST[37].xyz_ LIGHT[0].Direction 2387ec681f3Smrg * CONST[37].___w LIGHT[0].Falloff 2397ec681f3Smrg * CONST[38].x___ cos(LIGHT[0].Theta / 2) 2407ec681f3Smrg * CONST[38]._y__ cos(LIGHT[0].Phi / 2) 2417ec681f3Smrg * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2)) 2427ec681f3Smrg * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights) 2437ec681f3Smrg * CONST[39].___w 1 if this is the last active light, 0 if not 2447ec681f3Smrg * CONST[40] LIGHT[1] 2457ec681f3Smrg * CONST[48] LIGHT[2] 2467ec681f3Smrg * CONST[56] LIGHT[3] 2477ec681f3Smrg * CONST[64] LIGHT[4] 2487ec681f3Smrg * CONST[72] LIGHT[5] 2497ec681f3Smrg * CONST[80] LIGHT[6] 2507ec681f3Smrg * CONST[88] LIGHT[7] 2517ec681f3Smrg * NOTE: no lighting code is generated if there are no active lights 2527ec681f3Smrg * 2537ec681f3Smrg * CONST[100].x___ Viewport 2/width 2547ec681f3Smrg * CONST[100]._y__ Viewport 2/height 2557ec681f3Smrg * CONST[100].__z_ Viewport 1/(zmax - zmin) 2567ec681f3Smrg * CONST[100].___w Viewport width 2577ec681f3Smrg * CONST[101].x___ Viewport x0 2587ec681f3Smrg * CONST[101]._y__ Viewport y0 2597ec681f3Smrg * CONST[101].__z_ Viewport z0 2607ec681f3Smrg * 2617ec681f3Smrg * CONST[128..131] D3DTS_TEXTURE0 2627ec681f3Smrg * CONST[132..135] D3DTS_TEXTURE1 2637ec681f3Smrg * CONST[136..139] D3DTS_TEXTURE2 2647ec681f3Smrg * CONST[140..143] D3DTS_TEXTURE3 2657ec681f3Smrg * CONST[144..147] D3DTS_TEXTURE4 2667ec681f3Smrg * CONST[148..151] D3DTS_TEXTURE5 2677ec681f3Smrg * CONST[152..155] D3DTS_TEXTURE6 2687ec681f3Smrg * CONST[156..159] D3DTS_TEXTURE7 2697ec681f3Smrg * 2707ec681f3Smrg * CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW 2717ec681f3Smrg * CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW 2727ec681f3Smrg * ... 2737ec681f3Smrg * CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW 2747ec681f3Smrg */ 2757ec681f3Smrgstruct vs_build_ctx 2767ec681f3Smrg{ 2777ec681f3Smrg struct ureg_program *ureg; 2787ec681f3Smrg const struct nine_ff_vs_key *key; 2797ec681f3Smrg 2807ec681f3Smrg uint16_t input[PIPE_MAX_ATTRIBS]; 2817ec681f3Smrg unsigned num_inputs; 2827ec681f3Smrg 2837ec681f3Smrg struct ureg_src aVtx; 2847ec681f3Smrg struct ureg_src aNrm; 2857ec681f3Smrg struct ureg_src aCol[2]; 2867ec681f3Smrg struct ureg_src aTex[8]; 2877ec681f3Smrg struct ureg_src aPsz; 2887ec681f3Smrg struct ureg_src aInd; 2897ec681f3Smrg struct ureg_src aWgt; 2907ec681f3Smrg 2917ec681f3Smrg struct ureg_src aVtx1; /* tweening */ 2927ec681f3Smrg struct ureg_src aNrm1; 2937ec681f3Smrg 2947ec681f3Smrg struct ureg_src mtlA; 2957ec681f3Smrg struct ureg_src mtlD; 2967ec681f3Smrg struct ureg_src mtlS; 2977ec681f3Smrg struct ureg_src mtlE; 2987ec681f3Smrg}; 2997ec681f3Smrg 3007ec681f3Smrgstatic inline unsigned 3017ec681f3Smrgget_texcoord_sn(struct pipe_screen *screen) 3027ec681f3Smrg{ 3037ec681f3Smrg if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD)) 3047ec681f3Smrg return TGSI_SEMANTIC_TEXCOORD; 3057ec681f3Smrg return TGSI_SEMANTIC_GENERIC; 3067ec681f3Smrg} 3077ec681f3Smrg 3087ec681f3Smrgstatic inline struct ureg_src 3097ec681f3Smrgbuild_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl) 3107ec681f3Smrg{ 3117ec681f3Smrg const unsigned i = vs->num_inputs++; 3127ec681f3Smrg assert(i < PIPE_MAX_ATTRIBS); 3137ec681f3Smrg vs->input[i] = ndecl; 3147ec681f3Smrg return ureg_DECL_vs_input(vs->ureg, i); 3157ec681f3Smrg} 3167ec681f3Smrg 3177ec681f3Smrg/* NOTE: dst may alias src */ 3187ec681f3Smrgstatic inline void 3197ec681f3Smrgureg_normalize3(struct ureg_program *ureg, 3207ec681f3Smrg struct ureg_dst dst, struct ureg_src src) 3217ec681f3Smrg{ 3227ec681f3Smrg struct ureg_dst tmp = ureg_DECL_temporary(ureg); 3237ec681f3Smrg struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 3247ec681f3Smrg 3257ec681f3Smrg ureg_DP3(ureg, tmp_x, src, src); 3267ec681f3Smrg ureg_RSQ(ureg, tmp_x, _X(tmp)); 3277ec681f3Smrg ureg_MUL(ureg, dst, src, _X(tmp)); 3287ec681f3Smrg ureg_release_temporary(ureg, tmp); 3297ec681f3Smrg} 3307ec681f3Smrg 3317ec681f3Smrgstatic void * 3327ec681f3Smrgnine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) 3337ec681f3Smrg{ 3347ec681f3Smrg const struct nine_ff_vs_key *key = vs->key; 3357ec681f3Smrg struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX); 3367ec681f3Smrg struct ureg_dst oPos, oCol[2], oPsz, oFog; 3377ec681f3Smrg struct ureg_dst AR; 3387ec681f3Smrg unsigned i, c; 3397ec681f3Smrg unsigned label[32], l = 0; 3407ec681f3Smrg boolean need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL); 3417ec681f3Smrg boolean has_aNrm; 3427ec681f3Smrg boolean need_aVtx = key->lighting || key->fog_mode || key->pointscale || key->ucp; 3437ec681f3Smrg const unsigned texcoord_sn = get_texcoord_sn(device->screen); 3447ec681f3Smrg 3457ec681f3Smrg vs->ureg = ureg; 3467ec681f3Smrg 3477ec681f3Smrg /* Check which inputs we should transform. */ 3487ec681f3Smrg for (i = 0; i < 8 * 3; i += 3) { 3497ec681f3Smrg switch ((key->tc_gen >> i) & 0x7) { 3507ec681f3Smrg case NINED3DTSS_TCI_CAMERASPACENORMAL: 3517ec681f3Smrg need_aNrm = TRUE; 3527ec681f3Smrg break; 3537ec681f3Smrg case NINED3DTSS_TCI_CAMERASPACEPOSITION: 3547ec681f3Smrg need_aVtx = TRUE; 3557ec681f3Smrg break; 3567ec681f3Smrg case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: 3577ec681f3Smrg need_aVtx = need_aNrm = TRUE; 3587ec681f3Smrg break; 3597ec681f3Smrg case NINED3DTSS_TCI_SPHEREMAP: 3607ec681f3Smrg need_aVtx = need_aNrm = TRUE; 3617ec681f3Smrg break; 3627ec681f3Smrg default: 3637ec681f3Smrg break; 3647ec681f3Smrg } 3657ec681f3Smrg } 3667ec681f3Smrg 3677ec681f3Smrg has_aNrm = need_aNrm && key->has_normal; 3687ec681f3Smrg 3697ec681f3Smrg /* Declare and record used inputs (needed for linkage with vertex format): 3707ec681f3Smrg * (texture coordinates handled later) 3717ec681f3Smrg */ 3727ec681f3Smrg vs->aVtx = build_vs_add_input(vs, 3737ec681f3Smrg key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION); 3747ec681f3Smrg 3757ec681f3Smrg vs->aNrm = ureg_imm1f(ureg, 0.0f); 3767ec681f3Smrg if (has_aNrm) 3777ec681f3Smrg vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL); 3787ec681f3Smrg 3797ec681f3Smrg vs->aCol[0] = ureg_imm1f(ureg, 1.0f); 3807ec681f3Smrg vs->aCol[1] = ureg_imm1f(ureg, 0.0f); 3817ec681f3Smrg 3827ec681f3Smrg if (key->lighting || key->darkness) { 3837ec681f3Smrg const unsigned mask = key->mtl_diffuse | key->mtl_specular | 3847ec681f3Smrg key->mtl_ambient | key->mtl_emissive; 3857ec681f3Smrg if ((mask & 0x1) && !key->color0in_one) 3867ec681f3Smrg vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0)); 3877ec681f3Smrg if ((mask & 0x2) && !key->color1in_zero) 3887ec681f3Smrg vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1)); 3897ec681f3Smrg 3907ec681f3Smrg vs->mtlD = MATERIAL_CONST(1); 3917ec681f3Smrg vs->mtlA = MATERIAL_CONST(2); 3927ec681f3Smrg vs->mtlS = MATERIAL_CONST(3); 3937ec681f3Smrg vs->mtlE = MATERIAL_CONST(5); 3947ec681f3Smrg if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else 3957ec681f3Smrg if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1]; 3967ec681f3Smrg if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else 3977ec681f3Smrg if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1]; 3987ec681f3Smrg if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else 3997ec681f3Smrg if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1]; 4007ec681f3Smrg if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else 4017ec681f3Smrg if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1]; 4027ec681f3Smrg } else { 4037ec681f3Smrg if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0)); 4047ec681f3Smrg if (!key->color1in_zero) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1)); 4057ec681f3Smrg } 4067ec681f3Smrg 4077ec681f3Smrg if (key->vertexpointsize) 4087ec681f3Smrg vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE); 4097ec681f3Smrg 4107ec681f3Smrg if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) 4117ec681f3Smrg vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES); 4127ec681f3Smrg if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) 4137ec681f3Smrg vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT); 4147ec681f3Smrg if (key->vertextween) { 4157ec681f3Smrg vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1)); 4167ec681f3Smrg vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1)); 4177ec681f3Smrg } 4187ec681f3Smrg 4197ec681f3Smrg /* Declare outputs: 4207ec681f3Smrg */ 4217ec681f3Smrg oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */ 4227ec681f3Smrg oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0)); 4237ec681f3Smrg oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1)); 4247ec681f3Smrg if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { 4257ec681f3Smrg oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 16); 4267ec681f3Smrg oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X); 4277ec681f3Smrg } 4287ec681f3Smrg 4297ec681f3Smrg if (key->vertexpointsize || key->pointscale) { 4307ec681f3Smrg oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0, 4317ec681f3Smrg TGSI_WRITEMASK_X, 0, 1); 4327ec681f3Smrg oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X); 4337ec681f3Smrg } 4347ec681f3Smrg 4357ec681f3Smrg if (key->lighting || key->vertexblend) 4367ec681f3Smrg AR = ureg_DECL_address(ureg); 4377ec681f3Smrg 4387ec681f3Smrg /* === Vertex transformation / vertex blending: 4397ec681f3Smrg */ 4407ec681f3Smrg 4417ec681f3Smrg if (key->position_t) { 4427ec681f3Smrg if (device->driver_caps.window_space_position_support) { 4437ec681f3Smrg ureg_MOV(ureg, oPos, vs->aVtx); 4447ec681f3Smrg } else { 4457ec681f3Smrg struct ureg_dst tmp = ureg_DECL_temporary(ureg); 4467ec681f3Smrg /* vs->aVtx contains the coordinates buffer wise. 4477ec681f3Smrg * later in the pipeline, clipping, viewport and division 4487ec681f3Smrg * by w (rhw = 1/w) are going to be applied, so do the reverse 4497ec681f3Smrg * of these transformations (except clipping) to have the good 4507ec681f3Smrg * position at the end.*/ 4517ec681f3Smrg ureg_MOV(ureg, tmp, vs->aVtx); 4527ec681f3Smrg /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */ 4537ec681f3Smrg ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101))); 4547ec681f3Smrg ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100)); 4557ec681f3Smrg ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f)); 4567ec681f3Smrg /* Y needs to be reversed */ 4577ec681f3Smrg ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp))); 4587ec681f3Smrg /* inverse rhw */ 4597ec681f3Smrg ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp)); 4607ec681f3Smrg /* multiply X, Y, Z by w */ 4617ec681f3Smrg ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp)); 4627ec681f3Smrg ureg_MOV(ureg, oPos, ureg_src(tmp)); 4637ec681f3Smrg ureg_release_temporary(ureg, tmp); 4647ec681f3Smrg } 4657ec681f3Smrg } else if (key->vertexblend) { 4667ec681f3Smrg struct ureg_dst tmp = ureg_DECL_temporary(ureg); 4677ec681f3Smrg struct ureg_dst tmp2 = ureg_DECL_temporary(ureg); 4687ec681f3Smrg struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg); 4697ec681f3Smrg struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg); 4707ec681f3Smrg struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg); 4717ec681f3Smrg struct ureg_src cWM[4]; 4727ec681f3Smrg 4737ec681f3Smrg for (i = 160; i <= 195; ++i) 4747ec681f3Smrg ureg_DECL_constant(ureg, i); 4757ec681f3Smrg 4767ec681f3Smrg /* translate world matrix index to constant file index */ 4777ec681f3Smrg if (key->vertexblend_indexed) { 4787ec681f3Smrg ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f)); 4797ec681f3Smrg ureg_ARL(ureg, AR, ureg_src(tmp)); 4807ec681f3Smrg } 4817ec681f3Smrg 4827ec681f3Smrg ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f)); 4837ec681f3Smrg ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f)); 4847ec681f3Smrg ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f)); 4857ec681f3Smrg 4867ec681f3Smrg for (i = 0; i < key->vertexblend; ++i) { 4877ec681f3Smrg for (c = 0; c < 4; ++c) { 4887ec681f3Smrg cWM[c] = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c), 0); 4897ec681f3Smrg if (key->vertexblend_indexed) 4907ec681f3Smrg cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i)); 4917ec681f3Smrg } 4927ec681f3Smrg 4937ec681f3Smrg /* multiply by WORLD(index) */ 4947ec681f3Smrg ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]); 4957ec681f3Smrg ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp)); 4967ec681f3Smrg ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp)); 4977ec681f3Smrg ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp)); 4987ec681f3Smrg 4997ec681f3Smrg if (has_aNrm) { 5007ec681f3Smrg /* Note: the spec says the transpose of the inverse of the 5017ec681f3Smrg * WorldView matrices should be used, but all tests show 5027ec681f3Smrg * otherwise. 5037ec681f3Smrg * Only case unknown: D3DVBF_0WEIGHTS */ 5047ec681f3Smrg ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]); 5057ec681f3Smrg ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2)); 5067ec681f3Smrg ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2)); 5077ec681f3Smrg } 5087ec681f3Smrg 5097ec681f3Smrg if (i < (key->vertexblend - 1)) { 5107ec681f3Smrg /* accumulate weighted position value */ 5117ec681f3Smrg ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst)); 5127ec681f3Smrg if (has_aNrm) 5137ec681f3Smrg ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst)); 5147ec681f3Smrg /* subtract weighted position value for last value */ 5157ec681f3Smrg ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i))); 5167ec681f3Smrg } 5177ec681f3Smrg } 5187ec681f3Smrg 5197ec681f3Smrg /* the last weighted position is always 1 - sum_of_previous_weights */ 5207ec681f3Smrg ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst)); 5217ec681f3Smrg if (has_aNrm) 5227ec681f3Smrg ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst)); 5237ec681f3Smrg 5247ec681f3Smrg /* multiply by VIEW_PROJ */ 5257ec681f3Smrg ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8)); 5267ec681f3Smrg ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp)); 5277ec681f3Smrg ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp)); 5287ec681f3Smrg ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp)); 5297ec681f3Smrg 5307ec681f3Smrg if (need_aVtx) 5317ec681f3Smrg vs->aVtx = ureg_src(aVtx_dst); 5327ec681f3Smrg 5337ec681f3Smrg ureg_release_temporary(ureg, tmp); 5347ec681f3Smrg ureg_release_temporary(ureg, tmp2); 5357ec681f3Smrg ureg_release_temporary(ureg, sum_blendweights); 5367ec681f3Smrg if (!need_aVtx) 5377ec681f3Smrg ureg_release_temporary(ureg, aVtx_dst); 5387ec681f3Smrg 5397ec681f3Smrg if (has_aNrm) { 5407ec681f3Smrg if (key->normalizenormals) 5417ec681f3Smrg ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst)); 5427ec681f3Smrg vs->aNrm = ureg_src(aNrm_dst); 5437ec681f3Smrg } else 5447ec681f3Smrg ureg_release_temporary(ureg, aNrm_dst); 5457ec681f3Smrg } else { 5467ec681f3Smrg struct ureg_dst tmp = ureg_DECL_temporary(ureg); 5477ec681f3Smrg 5487ec681f3Smrg if (key->vertextween) { 5497ec681f3Smrg struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg); 5507ec681f3Smrg ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx1, vs->aVtx); 5517ec681f3Smrg vs->aVtx = ureg_src(aVtx_dst); 5527ec681f3Smrg if (has_aNrm) { 5537ec681f3Smrg struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg); 5547ec681f3Smrg ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm1, vs->aNrm); 5557ec681f3Smrg vs->aNrm = ureg_src(aNrm_dst); 5567ec681f3Smrg } 5577ec681f3Smrg } 5587ec681f3Smrg 5597ec681f3Smrg /* position = vertex * WORLD_VIEW_PROJ */ 5607ec681f3Smrg ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0)); 5617ec681f3Smrg ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp)); 5627ec681f3Smrg ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp)); 5637ec681f3Smrg ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp)); 5647ec681f3Smrg ureg_release_temporary(ureg, tmp); 5657ec681f3Smrg 5667ec681f3Smrg if (need_aVtx) { 5677ec681f3Smrg struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 5687ec681f3Smrg ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4)); 5697ec681f3Smrg ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst)); 5707ec681f3Smrg ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst)); 5717ec681f3Smrg ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst)); 5727ec681f3Smrg vs->aVtx = ureg_src(aVtx_dst); 5737ec681f3Smrg } 5747ec681f3Smrg if (has_aNrm) { 5757ec681f3Smrg struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 5767ec681f3Smrg ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16)); 5777ec681f3Smrg ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst)); 5787ec681f3Smrg ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst)); 5797ec681f3Smrg if (key->normalizenormals) 5807ec681f3Smrg ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst)); 5817ec681f3Smrg vs->aNrm = ureg_src(aNrm_dst); 5827ec681f3Smrg } 5837ec681f3Smrg } 5847ec681f3Smrg 5857ec681f3Smrg /* === Process point size: 5867ec681f3Smrg */ 5877ec681f3Smrg if (key->vertexpointsize || key->pointscale) { 5887ec681f3Smrg struct ureg_dst tmp = ureg_DECL_temporary(ureg); 5897ec681f3Smrg struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 5907ec681f3Smrg struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); 5917ec681f3Smrg struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 5927ec681f3Smrg if (key->vertexpointsize) { 5937ec681f3Smrg struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 5947ec681f3Smrg ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1)); 5957ec681f3Smrg ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1)); 5967ec681f3Smrg } else { 5977ec681f3Smrg struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 5987ec681f3Smrg ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1)); 5997ec681f3Smrg } 6007ec681f3Smrg 6017ec681f3Smrg if (key->pointscale) { 6027ec681f3Smrg struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 6037ec681f3Smrg struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27); 6047ec681f3Smrg 6057ec681f3Smrg ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx); 6067ec681f3Smrg ureg_RSQ(ureg, tmp_y, _X(tmp)); 6077ec681f3Smrg ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp)); 6087ec681f3Smrg ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f)); 6097ec681f3Smrg ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2)); 6107ec681f3Smrg ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1)); 6117ec681f3Smrg ureg_RSQ(ureg, tmp_x, _X(tmp)); 6127ec681f3Smrg ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp)); 6137ec681f3Smrg ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100))); 6147ec681f3Smrg ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1)); 6157ec681f3Smrg ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1)); 6167ec681f3Smrg } 6177ec681f3Smrg 6187ec681f3Smrg ureg_MOV(ureg, oPsz, _Z(tmp)); 6197ec681f3Smrg ureg_release_temporary(ureg, tmp); 6207ec681f3Smrg } 6217ec681f3Smrg 6227ec681f3Smrg for (i = 0; i < 8; ++i) { 6237ec681f3Smrg struct ureg_dst tmp, tmp_x, tmp2; 6247ec681f3Smrg struct ureg_dst oTex, input_coord, transformed, t, aVtx_normed; 6257ec681f3Smrg unsigned c, writemask; 6267ec681f3Smrg const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7; 6277ec681f3Smrg const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7; 6287ec681f3Smrg unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3); 6297ec681f3Smrg const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7; 6307ec681f3Smrg 6317ec681f3Smrg /* No texture output of index s */ 6327ec681f3Smrg if (tci == NINED3DTSS_TCI_DISABLE) 6337ec681f3Smrg continue; 6347ec681f3Smrg oTex = ureg_DECL_output(ureg, texcoord_sn, i); 6357ec681f3Smrg tmp = ureg_DECL_temporary(ureg); 6367ec681f3Smrg tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 6377ec681f3Smrg input_coord = ureg_DECL_temporary(ureg); 6387ec681f3Smrg transformed = ureg_DECL_temporary(ureg); 6397ec681f3Smrg 6407ec681f3Smrg /* Get the coordinate */ 6417ec681f3Smrg switch (tci) { 6427ec681f3Smrg case NINED3DTSS_TCI_PASSTHRU: 6437ec681f3Smrg /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx * 6447ec681f3Smrg * Else the idx is used only to determine wrapping mode. */ 6457ec681f3Smrg vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx)); 6467ec681f3Smrg ureg_MOV(ureg, input_coord, vs->aTex[idx]); 6477ec681f3Smrg break; 6487ec681f3Smrg case NINED3DTSS_TCI_CAMERASPACENORMAL: 6497ec681f3Smrg ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm); 6507ec681f3Smrg ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 6517ec681f3Smrg dim_input = 4; 6527ec681f3Smrg break; 6537ec681f3Smrg case NINED3DTSS_TCI_CAMERASPACEPOSITION: 6547ec681f3Smrg ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx); 6557ec681f3Smrg ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 6567ec681f3Smrg dim_input = 4; 6577ec681f3Smrg break; 6587ec681f3Smrg case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: 6597ec681f3Smrg tmp.WriteMask = TGSI_WRITEMASK_XYZ; 6607ec681f3Smrg aVtx_normed = ureg_DECL_temporary(ureg); 6617ec681f3Smrg ureg_normalize3(ureg, aVtx_normed, vs->aVtx); 6627ec681f3Smrg ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm); 6637ec681f3Smrg ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp)); 6647ec681f3Smrg ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); 6657ec681f3Smrg ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp))); 6667ec681f3Smrg ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 6677ec681f3Smrg ureg_release_temporary(ureg, aVtx_normed); 6687ec681f3Smrg dim_input = 4; 6697ec681f3Smrg tmp.WriteMask = TGSI_WRITEMASK_XYZW; 6707ec681f3Smrg break; 6717ec681f3Smrg case NINED3DTSS_TCI_SPHEREMAP: 6727ec681f3Smrg /* Implement the formula of GL_SPHERE_MAP */ 6737ec681f3Smrg tmp.WriteMask = TGSI_WRITEMASK_XYZ; 6747ec681f3Smrg aVtx_normed = ureg_DECL_temporary(ureg); 6757ec681f3Smrg tmp2 = ureg_DECL_temporary(ureg); 6767ec681f3Smrg ureg_normalize3(ureg, aVtx_normed, vs->aVtx); 6777ec681f3Smrg ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm); 6787ec681f3Smrg ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp)); 6797ec681f3Smrg ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); 6807ec681f3Smrg ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp))); 6817ec681f3Smrg /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */ 6827ec681f3Smrg ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp)); 6837ec681f3Smrg ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2)); 6847ec681f3Smrg ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2)); 6857ec681f3Smrg ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2)); 6867ec681f3Smrg ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f)); 6877ec681f3Smrg /* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2) 6887ec681f3Smrg * TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */ 6897ec681f3Smrg ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2)); 6907ec681f3Smrg ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f)); 6917ec681f3Smrg ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_ZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); 6927ec681f3Smrg ureg_release_temporary(ureg, aVtx_normed); 6937ec681f3Smrg ureg_release_temporary(ureg, tmp2); 6947ec681f3Smrg dim_input = 4; 6957ec681f3Smrg tmp.WriteMask = TGSI_WRITEMASK_XYZW; 6967ec681f3Smrg break; 6977ec681f3Smrg default: 6987ec681f3Smrg assert(0); 6997ec681f3Smrg break; 7007ec681f3Smrg } 7017ec681f3Smrg 7027ec681f3Smrg /* Apply the transformation */ 7037ec681f3Smrg /* dim_output == 0 => do not transform the components. 7047ec681f3Smrg * XYZRHW also disables transformation */ 7057ec681f3Smrg if (!dim_output || key->position_t) { 7067ec681f3Smrg ureg_release_temporary(ureg, transformed); 7077ec681f3Smrg transformed = input_coord; 7087ec681f3Smrg writemask = TGSI_WRITEMASK_XYZW; 7097ec681f3Smrg } else { 7107ec681f3Smrg for (c = 0; c < dim_output; c++) { 7117ec681f3Smrg t = ureg_writemask(transformed, 1 << c); 7127ec681f3Smrg switch (dim_input) { 7137ec681f3Smrg /* dim_input = 1 2 3: -> we add trailing 1 to input*/ 7147ec681f3Smrg case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c))); 7157ec681f3Smrg break; 7167ec681f3Smrg case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); 7177ec681f3Smrg ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c))); 7187ec681f3Smrg break; 7197ec681f3Smrg case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); 7207ec681f3Smrg ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c))); 7217ec681f3Smrg break; 7227ec681f3Smrg case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break; 7237ec681f3Smrg default: 7247ec681f3Smrg assert(0); 7257ec681f3Smrg } 7267ec681f3Smrg } 7277ec681f3Smrg writemask = (1 << dim_output) - 1; 7287ec681f3Smrg ureg_release_temporary(ureg, input_coord); 7297ec681f3Smrg } 7307ec681f3Smrg 7317ec681f3Smrg ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed)); 7327ec681f3Smrg ureg_release_temporary(ureg, transformed); 7337ec681f3Smrg ureg_release_temporary(ureg, tmp); 7347ec681f3Smrg } 7357ec681f3Smrg 7367ec681f3Smrg /* === Lighting: 7377ec681f3Smrg * 7387ec681f3Smrg * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation. 7397ec681f3Smrg * POINT: Finite distance to scene, divergent rays, isotropic, attenuation. 7407ec681f3Smrg * SPOT: Finite distance, divergent rays, angular dependence, attenuation. 7417ec681f3Smrg * 7427ec681f3Smrg * vec3 normal = normalize(in.Normal * NormalMatrix); 7437ec681f3Smrg * vec3 hitDir = light.direction; 7447ec681f3Smrg * float atten = 1.0; 7457ec681f3Smrg * 7467ec681f3Smrg * if (light.type != DIRECTIONAL) 7477ec681f3Smrg * { 7487ec681f3Smrg * vec3 hitVec = light.position - eyeVertex; 7497ec681f3Smrg * float d = length(hitVec); 7507ec681f3Smrg * hitDir = hitVec / d; 7517ec681f3Smrg * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0); 7527ec681f3Smrg * } 7537ec681f3Smrg * 7547ec681f3Smrg * if (light.type == SPOTLIGHT) 7557ec681f3Smrg * { 7567ec681f3Smrg * float rho = dp3(-hitVec, light.direction); 7577ec681f3Smrg * if (rho < cos(light.phi / 2)) 7587ec681f3Smrg * atten = 0; 7597ec681f3Smrg * if (rho < cos(light.theta / 2)) 7607ec681f3Smrg * atten *= pow(some_func(rho), light.falloff); 7617ec681f3Smrg * } 7627ec681f3Smrg * 7637ec681f3Smrg * float nDotHit = dp3_sat(normal, hitVec); 7647ec681f3Smrg * float powFact = 0.0; 7657ec681f3Smrg * 7667ec681f3Smrg * if (nDotHit > 0.0) 7677ec681f3Smrg * { 7687ec681f3Smrg * vec3 midVec = normalize(hitDir + eye); 7697ec681f3Smrg * float nDotMid = dp3_sat(normal, midVec); 7707ec681f3Smrg * pFact = pow(nDotMid, material.power); 7717ec681f3Smrg * } 7727ec681f3Smrg * 7737ec681f3Smrg * ambient += light.ambient * atten; 7747ec681f3Smrg * diffuse += light.diffuse * atten * nDotHit; 7757ec681f3Smrg * specular += light.specular * atten * powFact; 7767ec681f3Smrg */ 7777ec681f3Smrg if (key->lighting) { 7787ec681f3Smrg struct ureg_dst tmp = ureg_DECL_temporary(ureg); 7797ec681f3Smrg struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 7807ec681f3Smrg struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); 7817ec681f3Smrg struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 7827ec681f3Smrg struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W); 7837ec681f3Smrg struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 7847ec681f3Smrg struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 7857ec681f3Smrg 7867ec681f3Smrg struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W); 7877ec681f3Smrg 7887ec681f3Smrg struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X); 7897ec681f3Smrg 7907ec681f3Smrg /* Light.*.Alpha is not used. */ 7917ec681f3Smrg struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 7927ec681f3Smrg struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 7937ec681f3Smrg struct ureg_dst rS = ureg_DECL_temporary(ureg); 7947ec681f3Smrg 7957ec681f3Smrg struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4)); 7967ec681f3Smrg 7977ec681f3Smrg struct ureg_src cLKind = _XXXX(LIGHT_CONST(0)); 7987ec681f3Smrg struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0)); 7997ec681f3Smrg struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0)); 8007ec681f3Smrg struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0)); 8017ec681f3Smrg struct ureg_src cLColD = _XYZW(LIGHT_CONST(1)); 8027ec681f3Smrg struct ureg_src cLColS = _XYZW(LIGHT_CONST(2)); 8037ec681f3Smrg struct ureg_src cLColA = _XYZW(LIGHT_CONST(3)); 8047ec681f3Smrg struct ureg_src cLPos = _XYZW(LIGHT_CONST(4)); 8057ec681f3Smrg struct ureg_src cLRng = _WWWW(LIGHT_CONST(4)); 8067ec681f3Smrg struct ureg_src cLDir = _XYZW(LIGHT_CONST(5)); 8077ec681f3Smrg struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5)); 8087ec681f3Smrg struct ureg_src cLTht = _XXXX(LIGHT_CONST(6)); 8097ec681f3Smrg struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6)); 8107ec681f3Smrg struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6)); 8117ec681f3Smrg struct ureg_src cLLast = _WWWW(LIGHT_CONST(7)); 8127ec681f3Smrg 8137ec681f3Smrg const unsigned loop_label = l++; 8147ec681f3Smrg 8157ec681f3Smrg /* Declare all light constants to allow indirect adressing */ 8167ec681f3Smrg for (i = 32; i < 96; i++) 8177ec681f3Smrg ureg_DECL_constant(ureg, i); 8187ec681f3Smrg 8197ec681f3Smrg ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */ 8207ec681f3Smrg ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f)); 8217ec681f3Smrg ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f)); 8227ec681f3Smrg ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f)); 8237ec681f3Smrg 8247ec681f3Smrg /* loop management */ 8257ec681f3Smrg ureg_BGNLOOP(ureg, &label[loop_label]); 8267ec681f3Smrg ureg_ARL(ureg, AL, _W(rCtr)); 8277ec681f3Smrg 8287ec681f3Smrg /* if (not DIRECTIONAL light): */ 8297ec681f3Smrg ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL)); 8307ec681f3Smrg ureg_MOV(ureg, rHit, ureg_negate(cLDir)); 8317ec681f3Smrg ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f)); 8327ec681f3Smrg ureg_IF(ureg, _X(tmp), &label[l++]); 8337ec681f3Smrg { 8347ec681f3Smrg /* hitDir = light.position - eyeVtx 8357ec681f3Smrg * d = length(hitDir) 8367ec681f3Smrg */ 8377ec681f3Smrg ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx)); 8387ec681f3Smrg ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit)); 8397ec681f3Smrg ureg_RSQ(ureg, tmp_y, _X(tmp)); 8407ec681f3Smrg ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */ 8417ec681f3Smrg 8427ec681f3Smrg /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */ 8437ec681f3Smrg ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1); 8447ec681f3Smrg ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0); 8457ec681f3Smrg ureg_RCP(ureg, rAtt, _W(rAtt)); 8467ec681f3Smrg /* cut-off if distance exceeds Light.Range */ 8477ec681f3Smrg ureg_SLT(ureg, tmp_x, _X(tmp), cLRng); 8487ec681f3Smrg ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp)); 8497ec681f3Smrg } 8507ec681f3Smrg ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 8517ec681f3Smrg ureg_ENDIF(ureg); 8527ec681f3Smrg 8537ec681f3Smrg /* normalize hitDir */ 8547ec681f3Smrg ureg_normalize3(ureg, rHit, ureg_src(rHit)); 8557ec681f3Smrg 8567ec681f3Smrg /* if (SPOT light) */ 8577ec681f3Smrg ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT)); 8587ec681f3Smrg ureg_IF(ureg, _X(tmp), &label[l++]); 8597ec681f3Smrg { 8607ec681f3Smrg /* rho = dp3(-hitDir, light.spotDir) 8617ec681f3Smrg * 8627ec681f3Smrg * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi 8637ec681f3Smrg * spotAtt = 1 8647ec681f3Smrg * else 8657ec681f3Smrg * if (rho <= light.cphi2) 8667ec681f3Smrg * spotAtt = 0 8677ec681f3Smrg * else 8687ec681f3Smrg * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff 8697ec681f3Smrg */ 8707ec681f3Smrg ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */ 8717ec681f3Smrg ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi)); 8727ec681f3Smrg ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv); 8737ec681f3Smrg ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */ 8747ec681f3Smrg ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */ 8757ec681f3Smrg ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */ 8767ec681f3Smrg ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp)); 8777ec681f3Smrg ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp)); 8787ec681f3Smrg } 8797ec681f3Smrg ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 8807ec681f3Smrg ureg_ENDIF(ureg); 8817ec681f3Smrg 8827ec681f3Smrg /* directional factors, let's not use LIT because of clarity */ 8837ec681f3Smrg 8847ec681f3Smrg if (has_aNrm) { 8857ec681f3Smrg if (key->localviewer) { 8867ec681f3Smrg ureg_normalize3(ureg, rMid, vs->aVtx); 8877ec681f3Smrg ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid))); 8887ec681f3Smrg } else { 8897ec681f3Smrg ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f)); 8907ec681f3Smrg } 8917ec681f3Smrg ureg_normalize3(ureg, rMid, ureg_src(rMid)); 8927ec681f3Smrg ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit)); 8937ec681f3Smrg ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid)); 8947ec681f3Smrg ureg_MUL(ureg, tmp_z, _X(tmp), _Y(tmp)); 8957ec681f3Smrg /* Tests show that specular is computed only if (dp3(normal,hitDir) > 0). 8967ec681f3Smrg * For front facing, it is more restrictive than test (dp3(normal,mid) > 0). 8977ec681f3Smrg * No tests were made for backfacing, so add the two conditions */ 8987ec681f3Smrg ureg_IF(ureg, _Z(tmp), &label[l++]); 8997ec681f3Smrg { 9007ec681f3Smrg ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid)); 9017ec681f3Smrg ureg_POW(ureg, tmp_y, _Y(tmp), mtlP); 9027ec681f3Smrg ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */ 9037ec681f3Smrg ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */ 9047ec681f3Smrg } 9057ec681f3Smrg ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 9067ec681f3Smrg ureg_ENDIF(ureg); 9077ec681f3Smrg 9087ec681f3Smrg ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */ 9097ec681f3Smrg ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */ 9107ec681f3Smrg } 9117ec681f3Smrg 9127ec681f3Smrg ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */ 9137ec681f3Smrg 9147ec681f3Smrg /* break if this was the last light */ 9157ec681f3Smrg ureg_IF(ureg, cLLast, &label[l++]); 9167ec681f3Smrg ureg_BRK(ureg); 9177ec681f3Smrg ureg_ENDIF(ureg); 9187ec681f3Smrg ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 9197ec681f3Smrg 9207ec681f3Smrg ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f)); 9217ec681f3Smrg ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg)); 9227ec681f3Smrg ureg_ENDLOOP(ureg, &label[loop_label]); 9237ec681f3Smrg 9247ec681f3Smrg /* Apply to material: 9257ec681f3Smrg * 9267ec681f3Smrg * oCol[0] = (material.emissive + material.ambient * rs.ambient) + 9277ec681f3Smrg * material.ambient * ambient + 9287ec681f3Smrg * material.diffuse * diffuse + 9297ec681f3Smrg * oCol[1] = material.specular * specular; 9307ec681f3Smrg */ 9317ec681f3Smrg if (key->mtl_emissive == 0 && key->mtl_ambient == 0) 9327ec681f3Smrg ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), vs->mtlA, _CONST(19)); 9337ec681f3Smrg else { 9347ec681f3Smrg ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25)); 9357ec681f3Smrg ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE); 9367ec681f3Smrg } 9377ec681f3Smrg 9387ec681f3Smrg ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), ureg_src(rD), vs->mtlD, ureg_src(tmp)); 9397ec681f3Smrg ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD); 9407ec681f3Smrg ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS); 9417ec681f3Smrg ureg_release_temporary(ureg, rAtt); 9427ec681f3Smrg ureg_release_temporary(ureg, rHit); 9437ec681f3Smrg ureg_release_temporary(ureg, rMid); 9447ec681f3Smrg ureg_release_temporary(ureg, rCtr); 9457ec681f3Smrg ureg_release_temporary(ureg, rD); 9467ec681f3Smrg ureg_release_temporary(ureg, rA); 9477ec681f3Smrg ureg_release_temporary(ureg, rS); 9487ec681f3Smrg ureg_release_temporary(ureg, rAtt); 9497ec681f3Smrg ureg_release_temporary(ureg, tmp); 9507ec681f3Smrg } else 9517ec681f3Smrg /* COLOR */ 9527ec681f3Smrg if (key->darkness) { 9537ec681f3Smrg if (key->mtl_emissive == 0 && key->mtl_ambient == 0) 9547ec681f3Smrg ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _CONST(19)); 9557ec681f3Smrg else 9567ec681f3Smrg ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE); 9577ec681f3Smrg ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD); 9587ec681f3Smrg ureg_MOV(ureg, oCol[1], ureg_imm1f(ureg, 0.0f)); 9597ec681f3Smrg } else { 9607ec681f3Smrg ureg_MOV(ureg, oCol[0], vs->aCol[0]); 9617ec681f3Smrg ureg_MOV(ureg, oCol[1], vs->aCol[1]); 9627ec681f3Smrg } 9637ec681f3Smrg 9647ec681f3Smrg /* === Process fog. 9657ec681f3Smrg * 9667ec681f3Smrg * exp(x) = ex2(log2(e) * x) 9677ec681f3Smrg */ 9687ec681f3Smrg if (key->fog_mode) { 9697ec681f3Smrg struct ureg_dst tmp = ureg_DECL_temporary(ureg); 9707ec681f3Smrg struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 9717ec681f3Smrg struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 9727ec681f3Smrg if (key->fog_range) { 9737ec681f3Smrg ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx); 9747ec681f3Smrg ureg_RSQ(ureg, tmp_z, _X(tmp)); 9757ec681f3Smrg ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp)); 9767ec681f3Smrg } else { 9777ec681f3Smrg ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx))); 9787ec681f3Smrg } 9797ec681f3Smrg 9807ec681f3Smrg if (key->fog_mode == D3DFOG_EXP) { 9817ec681f3Smrg ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28))); 9827ec681f3Smrg ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f)); 9837ec681f3Smrg ureg_EX2(ureg, tmp_x, _X(tmp)); 9847ec681f3Smrg } else 9857ec681f3Smrg if (key->fog_mode == D3DFOG_EXP2) { 9867ec681f3Smrg ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28))); 9877ec681f3Smrg ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp)); 9887ec681f3Smrg ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f)); 9897ec681f3Smrg ureg_EX2(ureg, tmp_x, _X(tmp)); 9907ec681f3Smrg } else 9917ec681f3Smrg if (key->fog_mode == D3DFOG_LINEAR) { 9927ec681f3Smrg ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp))); 9937ec681f3Smrg ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28))); 9947ec681f3Smrg } 9957ec681f3Smrg ureg_MOV(ureg, oFog, _X(tmp)); 9967ec681f3Smrg ureg_release_temporary(ureg, tmp); 9977ec681f3Smrg } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) { 9987ec681f3Smrg ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W)); 9997ec681f3Smrg } 10007ec681f3Smrg 10017ec681f3Smrg if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) { 10027ec681f3Smrg struct ureg_src input; 10037ec681f3Smrg struct ureg_dst output; 10047ec681f3Smrg input = vs->aWgt; 10057ec681f3Smrg output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19); 10067ec681f3Smrg ureg_MOV(ureg, output, input); 10077ec681f3Smrg } 10087ec681f3Smrg if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) { 10097ec681f3Smrg struct ureg_src input; 10107ec681f3Smrg struct ureg_dst output; 10117ec681f3Smrg input = vs->aInd; 10127ec681f3Smrg output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20); 10137ec681f3Smrg ureg_MOV(ureg, output, input); 10147ec681f3Smrg } 10157ec681f3Smrg if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) { 10167ec681f3Smrg struct ureg_src input; 10177ec681f3Smrg struct ureg_dst output; 10187ec681f3Smrg input = vs->aNrm; 10197ec681f3Smrg output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21); 10207ec681f3Smrg ureg_MOV(ureg, output, input); 10217ec681f3Smrg } 10227ec681f3Smrg if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) { 10237ec681f3Smrg struct ureg_src input; 10247ec681f3Smrg struct ureg_dst output; 10257ec681f3Smrg input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT); 10267ec681f3Smrg output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22); 10277ec681f3Smrg ureg_MOV(ureg, output, input); 10287ec681f3Smrg } 10297ec681f3Smrg if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) { 10307ec681f3Smrg struct ureg_src input; 10317ec681f3Smrg struct ureg_dst output; 10327ec681f3Smrg input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL); 10337ec681f3Smrg output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 23); 10347ec681f3Smrg ureg_MOV(ureg, output, input); 10357ec681f3Smrg } 10367ec681f3Smrg if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { 10377ec681f3Smrg struct ureg_src input; 10387ec681f3Smrg struct ureg_dst output; 10397ec681f3Smrg input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG); 10407ec681f3Smrg input = ureg_scalar(input, TGSI_SWIZZLE_X); 10417ec681f3Smrg output = oFog; 10427ec681f3Smrg ureg_MOV(ureg, output, input); 10437ec681f3Smrg } 10447ec681f3Smrg if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) { 10457ec681f3Smrg (void) 0; /* TODO: replace z of position output ? */ 10467ec681f3Smrg } 10477ec681f3Smrg 10487ec681f3Smrg /* ucp for ff applies on world coordinates. 10497ec681f3Smrg * aVtx is in worldview coordinates. */ 10507ec681f3Smrg if (key->ucp) { 10517ec681f3Smrg struct ureg_dst clipVect = ureg_DECL_output(ureg, TGSI_SEMANTIC_CLIPVERTEX, 0); 10527ec681f3Smrg struct ureg_dst tmp = ureg_DECL_temporary(ureg); 10537ec681f3Smrg ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(12)); 10547ec681f3Smrg ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(13), ureg_src(tmp)); 10557ec681f3Smrg ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(14), ureg_src(tmp)); 10567ec681f3Smrg ureg_ADD(ureg, clipVect, _CONST(15), ureg_src(tmp)); 10577ec681f3Smrg ureg_release_temporary(ureg, tmp); 10587ec681f3Smrg } 10597ec681f3Smrg 10607ec681f3Smrg if (key->position_t && device->driver_caps.window_space_position_support) 10617ec681f3Smrg ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); 10627ec681f3Smrg 10637ec681f3Smrg ureg_END(ureg); 10647ec681f3Smrg nine_ureg_tgsi_dump(ureg, FALSE); 10657ec681f3Smrg return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL); 10667ec681f3Smrg} 10677ec681f3Smrg 10687ec681f3Smrg/* PS FF constants layout: 10697ec681f3Smrg * 10707ec681f3Smrg * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT 10717ec681f3Smrg * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00 10727ec681f3Smrg * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01 10737ec681f3Smrg * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10 10747ec681f3Smrg * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11 10757ec681f3Smrg * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE 10767ec681f3Smrg * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET 10777ec681f3Smrg * 10787ec681f3Smrg * CONST[20] D3DRS_TEXTUREFACTOR 10797ec681f3Smrg * CONST[21] D3DRS_FOGCOLOR 10807ec681f3Smrg * CONST[22].x___ RS.FogEnd 10817ec681f3Smrg * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart) 10827ec681f3Smrg * CONST[22].__z_ RS.FogDensity 10837ec681f3Smrg */ 10847ec681f3Smrgstruct ps_build_ctx 10857ec681f3Smrg{ 10867ec681f3Smrg struct ureg_program *ureg; 10877ec681f3Smrg 10887ec681f3Smrg struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */ 10897ec681f3Smrg struct ureg_src vT[8]; /* TEXCOORD[i] */ 10907ec681f3Smrg struct ureg_dst rCur; /* D3DTA_CURRENT */ 10917ec681f3Smrg struct ureg_dst rMod; 10927ec681f3Smrg struct ureg_src rCurSrc; 10937ec681f3Smrg struct ureg_dst rTmp; /* D3DTA_TEMP */ 10947ec681f3Smrg struct ureg_src rTmpSrc; 10957ec681f3Smrg struct ureg_dst rTex; 10967ec681f3Smrg struct ureg_src rTexSrc; 10977ec681f3Smrg struct ureg_src cBEM[8]; 10987ec681f3Smrg struct ureg_src s[8]; 10997ec681f3Smrg 11007ec681f3Smrg struct { 11017ec681f3Smrg unsigned index; 11027ec681f3Smrg unsigned index_pre_mod; 11037ec681f3Smrg } stage; 11047ec681f3Smrg}; 11057ec681f3Smrg 11067ec681f3Smrgstatic struct ureg_src 11077ec681f3Smrgps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta) 11087ec681f3Smrg{ 11097ec681f3Smrg struct ureg_src reg; 11107ec681f3Smrg 11117ec681f3Smrg switch (ta & D3DTA_SELECTMASK) { 11127ec681f3Smrg case D3DTA_CONSTANT: 11137ec681f3Smrg reg = ureg_DECL_constant(ps->ureg, ps->stage.index); 11147ec681f3Smrg break; 11157ec681f3Smrg case D3DTA_CURRENT: 11167ec681f3Smrg reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc; 11177ec681f3Smrg break; 11187ec681f3Smrg case D3DTA_DIFFUSE: 11197ec681f3Smrg reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR); 11207ec681f3Smrg break; 11217ec681f3Smrg case D3DTA_SPECULAR: 11227ec681f3Smrg reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 11237ec681f3Smrg break; 11247ec681f3Smrg case D3DTA_TEMP: 11257ec681f3Smrg reg = ps->rTmpSrc; 11267ec681f3Smrg break; 11277ec681f3Smrg case D3DTA_TEXTURE: 11287ec681f3Smrg reg = ps->rTexSrc; 11297ec681f3Smrg break; 11307ec681f3Smrg case D3DTA_TFACTOR: 11317ec681f3Smrg reg = ureg_DECL_constant(ps->ureg, 20); 11327ec681f3Smrg break; 11337ec681f3Smrg default: 11347ec681f3Smrg assert(0); 11357ec681f3Smrg reg = ureg_src_undef(); 11367ec681f3Smrg break; 11377ec681f3Smrg } 11387ec681f3Smrg if (ta & D3DTA_COMPLEMENT) { 11397ec681f3Smrg struct ureg_dst dst = ureg_DECL_temporary(ps->ureg); 11407ec681f3Smrg ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg)); 11417ec681f3Smrg reg = ureg_src(dst); 11427ec681f3Smrg } 11437ec681f3Smrg if (ta & D3DTA_ALPHAREPLICATE) 11447ec681f3Smrg reg = _WWWW(reg); 11457ec681f3Smrg return reg; 11467ec681f3Smrg} 11477ec681f3Smrg 11487ec681f3Smrgstatic struct ureg_dst 11497ec681f3Smrgps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta) 11507ec681f3Smrg{ 11517ec681f3Smrg assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE))); 11527ec681f3Smrg 11537ec681f3Smrg switch (ta & D3DTA_SELECTMASK) { 11547ec681f3Smrg case D3DTA_CURRENT: 11557ec681f3Smrg return ps->rCur; 11567ec681f3Smrg case D3DTA_TEMP: 11577ec681f3Smrg return ps->rTmp; 11587ec681f3Smrg default: 11597ec681f3Smrg assert(0); 11607ec681f3Smrg return ureg_dst_undef(); 11617ec681f3Smrg } 11627ec681f3Smrg} 11637ec681f3Smrg 11647ec681f3Smrgstatic uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top) 11657ec681f3Smrg{ 11667ec681f3Smrg switch (top) { 11677ec681f3Smrg case D3DTOP_DISABLE: 11687ec681f3Smrg return 0x0; 11697ec681f3Smrg case D3DTOP_SELECTARG1: 11707ec681f3Smrg case D3DTOP_PREMODULATE: 11717ec681f3Smrg return 0x2; 11727ec681f3Smrg case D3DTOP_SELECTARG2: 11737ec681f3Smrg return 0x4; 11747ec681f3Smrg case D3DTOP_MULTIPLYADD: 11757ec681f3Smrg case D3DTOP_LERP: 11767ec681f3Smrg return 0x7; 11777ec681f3Smrg default: 11787ec681f3Smrg return 0x6; 11797ec681f3Smrg } 11807ec681f3Smrg} 11817ec681f3Smrg 11827ec681f3Smrgstatic inline boolean 11837ec681f3Smrgis_MOV_no_op(struct ureg_dst dst, struct ureg_src src) 11847ec681f3Smrg{ 11857ec681f3Smrg return !dst.WriteMask || 11867ec681f3Smrg (dst.File == src.File && 11877ec681f3Smrg dst.Index == src.Index && 11887ec681f3Smrg !dst.Indirect && 11897ec681f3Smrg !dst.Saturate && 11907ec681f3Smrg !src.Indirect && 11917ec681f3Smrg !src.Negate && 11927ec681f3Smrg !src.Absolute && 11937ec681f3Smrg (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) && 11947ec681f3Smrg (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) && 11957ec681f3Smrg (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) && 11967ec681f3Smrg (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W))); 11977ec681f3Smrg 11987ec681f3Smrg} 11997ec681f3Smrg 12007ec681f3Smrgstatic void 12017ec681f3Smrgps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg) 12027ec681f3Smrg{ 12037ec681f3Smrg struct ureg_program *ureg = ps->ureg; 12047ec681f3Smrg struct ureg_dst tmp = ureg_DECL_temporary(ureg); 12057ec681f3Smrg struct ureg_dst tmp2 = ureg_DECL_temporary(ureg); 12067ec681f3Smrg struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 12077ec681f3Smrg 12087ec681f3Smrg tmp.WriteMask = dst.WriteMask; 12097ec681f3Smrg 12107ec681f3Smrg if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 && 12117ec681f3Smrg top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE && 12127ec681f3Smrg top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA && 12137ec681f3Smrg top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA && 12147ec681f3Smrg top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE && 12157ec681f3Smrg top != D3DTOP_LERP) 12167ec681f3Smrg dst = ureg_saturate(dst); 12177ec681f3Smrg 12187ec681f3Smrg switch (top) { 12197ec681f3Smrg case D3DTOP_SELECTARG1: 12207ec681f3Smrg if (!is_MOV_no_op(dst, arg[1])) 12217ec681f3Smrg ureg_MOV(ureg, dst, arg[1]); 12227ec681f3Smrg break; 12237ec681f3Smrg case D3DTOP_SELECTARG2: 12247ec681f3Smrg if (!is_MOV_no_op(dst, arg[2])) 12257ec681f3Smrg ureg_MOV(ureg, dst, arg[2]); 12267ec681f3Smrg break; 12277ec681f3Smrg case D3DTOP_MODULATE: 12287ec681f3Smrg ureg_MUL(ureg, dst, arg[1], arg[2]); 12297ec681f3Smrg break; 12307ec681f3Smrg case D3DTOP_MODULATE2X: 12317ec681f3Smrg ureg_MUL(ureg, tmp, arg[1], arg[2]); 12327ec681f3Smrg ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp)); 12337ec681f3Smrg break; 12347ec681f3Smrg case D3DTOP_MODULATE4X: 12357ec681f3Smrg ureg_MUL(ureg, tmp, arg[1], arg[2]); 12367ec681f3Smrg ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f)); 12377ec681f3Smrg break; 12387ec681f3Smrg case D3DTOP_ADD: 12397ec681f3Smrg ureg_ADD(ureg, dst, arg[1], arg[2]); 12407ec681f3Smrg break; 12417ec681f3Smrg case D3DTOP_ADDSIGNED: 12427ec681f3Smrg ureg_ADD(ureg, tmp, arg[1], arg[2]); 12437ec681f3Smrg ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f)); 12447ec681f3Smrg break; 12457ec681f3Smrg case D3DTOP_ADDSIGNED2X: 12467ec681f3Smrg ureg_ADD(ureg, tmp, arg[1], arg[2]); 12477ec681f3Smrg ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f)); 12487ec681f3Smrg break; 12497ec681f3Smrg case D3DTOP_SUBTRACT: 12507ec681f3Smrg ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2])); 12517ec681f3Smrg break; 12527ec681f3Smrg case D3DTOP_ADDSMOOTH: 12537ec681f3Smrg ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1])); 12547ec681f3Smrg ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]); 12557ec681f3Smrg break; 12567ec681f3Smrg case D3DTOP_BLENDDIFFUSEALPHA: 12577ec681f3Smrg ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]); 12587ec681f3Smrg break; 12597ec681f3Smrg case D3DTOP_BLENDTEXTUREALPHA: 12607ec681f3Smrg /* XXX: alpha taken from previous stage, texture or result ? */ 12617ec681f3Smrg ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]); 12627ec681f3Smrg break; 12637ec681f3Smrg case D3DTOP_BLENDFACTORALPHA: 12647ec681f3Smrg ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]); 12657ec681f3Smrg break; 12667ec681f3Smrg case D3DTOP_BLENDTEXTUREALPHAPM: 12677ec681f3Smrg ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex))); 12687ec681f3Smrg ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]); 12697ec681f3Smrg break; 12707ec681f3Smrg case D3DTOP_BLENDCURRENTALPHA: 12717ec681f3Smrg ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]); 12727ec681f3Smrg break; 12737ec681f3Smrg case D3DTOP_PREMODULATE: 12747ec681f3Smrg ureg_MOV(ureg, dst, arg[1]); 12757ec681f3Smrg ps->stage.index_pre_mod = ps->stage.index + 1; 12767ec681f3Smrg break; 12777ec681f3Smrg case D3DTOP_MODULATEALPHA_ADDCOLOR: 12787ec681f3Smrg ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]); 12797ec681f3Smrg break; 12807ec681f3Smrg case D3DTOP_MODULATECOLOR_ADDALPHA: 12817ec681f3Smrg ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1])); 12827ec681f3Smrg break; 12837ec681f3Smrg case D3DTOP_MODULATEINVALPHA_ADDCOLOR: 12847ec681f3Smrg ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1]))); 12857ec681f3Smrg ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]); 12867ec681f3Smrg break; 12877ec681f3Smrg case D3DTOP_MODULATEINVCOLOR_ADDALPHA: 12887ec681f3Smrg ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1])); 12897ec681f3Smrg ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1])); 12907ec681f3Smrg break; 12917ec681f3Smrg case D3DTOP_BUMPENVMAP: 12927ec681f3Smrg break; 12937ec681f3Smrg case D3DTOP_BUMPENVMAPLUMINANCE: 12947ec681f3Smrg break; 12957ec681f3Smrg case D3DTOP_DOTPRODUCT3: 12967ec681f3Smrg ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5)); 12977ec681f3Smrg ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5)); 12987ec681f3Smrg ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2)); 12997ec681f3Smrg ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0)); 13007ec681f3Smrg break; 13017ec681f3Smrg case D3DTOP_MULTIPLYADD: 13027ec681f3Smrg ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]); 13037ec681f3Smrg break; 13047ec681f3Smrg case D3DTOP_LERP: 13057ec681f3Smrg ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]); 13067ec681f3Smrg break; 13077ec681f3Smrg case D3DTOP_DISABLE: 13087ec681f3Smrg /* no-op ? */ 13097ec681f3Smrg break; 13107ec681f3Smrg default: 13117ec681f3Smrg assert(!"invalid D3DTOP"); 13127ec681f3Smrg break; 13137ec681f3Smrg } 13147ec681f3Smrg ureg_release_temporary(ureg, tmp); 13157ec681f3Smrg ureg_release_temporary(ureg, tmp2); 13167ec681f3Smrg} 13177ec681f3Smrg 13187ec681f3Smrgstatic void * 13197ec681f3Smrgnine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) 13207ec681f3Smrg{ 13217ec681f3Smrg struct ps_build_ctx ps; 13227ec681f3Smrg struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT); 13237ec681f3Smrg struct ureg_dst oCol; 13247ec681f3Smrg unsigned s; 13257ec681f3Smrg const unsigned texcoord_sn = get_texcoord_sn(device->screen); 13267ec681f3Smrg 13277ec681f3Smrg memset(&ps, 0, sizeof(ps)); 13287ec681f3Smrg ps.ureg = ureg; 13297ec681f3Smrg ps.stage.index_pre_mod = -1; 13307ec681f3Smrg 13317ec681f3Smrg ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR); 13327ec681f3Smrg 13337ec681f3Smrg ps.rCur = ureg_DECL_temporary(ureg); 13347ec681f3Smrg ps.rTmp = ureg_DECL_temporary(ureg); 13357ec681f3Smrg ps.rTex = ureg_DECL_temporary(ureg); 13367ec681f3Smrg ps.rCurSrc = ureg_src(ps.rCur); 13377ec681f3Smrg ps.rTmpSrc = ureg_src(ps.rTmp); 13387ec681f3Smrg ps.rTexSrc = ureg_src(ps.rTex); 13397ec681f3Smrg 13407ec681f3Smrg /* Initial values */ 13417ec681f3Smrg ureg_MOV(ureg, ps.rCur, ps.vC[0]); 13427ec681f3Smrg ureg_MOV(ureg, ps.rTmp, ureg_imm1f(ureg, 0.0f)); 13437ec681f3Smrg ureg_MOV(ureg, ps.rTex, ureg_imm1f(ureg, 0.0f)); 13447ec681f3Smrg 13457ec681f3Smrg for (s = 0; s < 8; ++s) { 13467ec681f3Smrg ps.s[s] = ureg_src_undef(); 13477ec681f3Smrg 13487ec681f3Smrg if (key->ts[s].colorop != D3DTOP_DISABLE) { 13497ec681f3Smrg if (key->ts[s].colorarg0 == D3DTA_SPECULAR || 13507ec681f3Smrg key->ts[s].colorarg1 == D3DTA_SPECULAR || 13517ec681f3Smrg key->ts[s].colorarg2 == D3DTA_SPECULAR) 13527ec681f3Smrg ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 13537ec681f3Smrg 13547ec681f3Smrg if (key->ts[s].colorarg0 == D3DTA_TEXTURE || 13557ec681f3Smrg key->ts[s].colorarg1 == D3DTA_TEXTURE || 13567ec681f3Smrg key->ts[s].colorarg2 == D3DTA_TEXTURE || 13577ec681f3Smrg key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA || 13587ec681f3Smrg key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) { 13597ec681f3Smrg ps.s[s] = ureg_DECL_sampler(ureg, s); 13607ec681f3Smrg ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE); 13617ec681f3Smrg } 13627ec681f3Smrg if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE || 13637ec681f3Smrg key->ts[s - 1].alphaop == D3DTOP_PREMODULATE)) 13647ec681f3Smrg ps.s[s] = ureg_DECL_sampler(ureg, s); 13657ec681f3Smrg } 13667ec681f3Smrg 13677ec681f3Smrg if (key->ts[s].alphaop != D3DTOP_DISABLE) { 13687ec681f3Smrg if (key->ts[s].alphaarg0 == D3DTA_SPECULAR || 13697ec681f3Smrg key->ts[s].alphaarg1 == D3DTA_SPECULAR || 13707ec681f3Smrg key->ts[s].alphaarg2 == D3DTA_SPECULAR) 13717ec681f3Smrg ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 13727ec681f3Smrg 13737ec681f3Smrg if (key->ts[s].alphaarg0 == D3DTA_TEXTURE || 13747ec681f3Smrg key->ts[s].alphaarg1 == D3DTA_TEXTURE || 13757ec681f3Smrg key->ts[s].alphaarg2 == D3DTA_TEXTURE || 13767ec681f3Smrg key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA || 13777ec681f3Smrg key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) { 13787ec681f3Smrg ps.s[s] = ureg_DECL_sampler(ureg, s); 13797ec681f3Smrg ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE); 13807ec681f3Smrg } 13817ec681f3Smrg } 13827ec681f3Smrg } 13837ec681f3Smrg if (key->specular) 13847ec681f3Smrg ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 13857ec681f3Smrg 13867ec681f3Smrg oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); 13877ec681f3Smrg 13887ec681f3Smrg /* Run stages. 13897ec681f3Smrg */ 13907ec681f3Smrg for (s = 0; s < 8; ++s) { 13917ec681f3Smrg unsigned colorarg[3]; 13927ec681f3Smrg unsigned alphaarg[3]; 13937ec681f3Smrg const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop); 13947ec681f3Smrg const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop); 13957ec681f3Smrg struct ureg_dst dst; 13967ec681f3Smrg struct ureg_src arg[3]; 13977ec681f3Smrg 13987ec681f3Smrg if (key->ts[s].colorop == D3DTOP_DISABLE) { 13997ec681f3Smrg assert (key->ts[s].alphaop == D3DTOP_DISABLE); 14007ec681f3Smrg continue; 14017ec681f3Smrg } 14027ec681f3Smrg ps.stage.index = s; 14037ec681f3Smrg 14047ec681f3Smrg DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s, 14057ec681f3Smrg nine_D3DTOP_to_str(key->ts[s].colorop), 14067ec681f3Smrg nine_D3DTOP_to_str(key->ts[s].alphaop)); 14077ec681f3Smrg 14087ec681f3Smrg if (!ureg_src_is_undef(ps.s[s])) { 14097ec681f3Smrg unsigned target; 14107ec681f3Smrg struct ureg_src texture_coord = ps.vT[s]; 14117ec681f3Smrg struct ureg_dst delta; 14127ec681f3Smrg switch (key->ts[s].textarget) { 14137ec681f3Smrg case 0: target = TGSI_TEXTURE_1D; break; 14147ec681f3Smrg case 1: target = TGSI_TEXTURE_2D; break; 14157ec681f3Smrg case 2: target = TGSI_TEXTURE_3D; break; 14167ec681f3Smrg case 3: target = TGSI_TEXTURE_CUBE; break; 14177ec681f3Smrg /* this is a 2 bit bitfield, do I really need a default case ? */ 14187ec681f3Smrg } 14197ec681f3Smrg 14207ec681f3Smrg /* Modify coordinates */ 14217ec681f3Smrg if (s >= 1 && 14227ec681f3Smrg (key->ts[s-1].colorop == D3DTOP_BUMPENVMAP || 14237ec681f3Smrg key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) { 14247ec681f3Smrg delta = ureg_DECL_temporary(ureg); 14257ec681f3Smrg /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */ 14267ec681f3Smrg ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1))); 14277ec681f3Smrg ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta)); 14287ec681f3Smrg /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */ 14297ec681f3Smrg ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1))); 14307ec681f3Smrg ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta)); 14317ec681f3Smrg texture_coord = ureg_src(ureg_DECL_temporary(ureg)); 14327ec681f3Smrg ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]); 14337ec681f3Smrg ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta)); 14347ec681f3Smrg /* Prepare luminance multiplier 14357ec681f3Smrg * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */ 14367ec681f3Smrg if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) { 14377ec681f3Smrg struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2)); 14387ec681f3Smrg struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2)); 14397ec681f3Smrg 14407ec681f3Smrg ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset); 14417ec681f3Smrg } 14427ec681f3Smrg } 14437ec681f3Smrg if (key->projected & (3 << (s *2))) { 14447ec681f3Smrg unsigned dim = 1 + ((key->projected >> (2 * s)) & 3); 14457ec681f3Smrg if (dim == 4) 14467ec681f3Smrg ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]); 14477ec681f3Smrg else { 14487ec681f3Smrg struct ureg_dst tmp = ureg_DECL_temporary(ureg); 14497ec681f3Smrg ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1)); 14507ec681f3Smrg ureg_MUL(ureg, ps.rTmp, _X(tmp), texture_coord); 14517ec681f3Smrg ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]); 14527ec681f3Smrg ureg_release_temporary(ureg, tmp); 14537ec681f3Smrg } 14547ec681f3Smrg } else { 14557ec681f3Smrg ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]); 14567ec681f3Smrg } 14577ec681f3Smrg if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) 14587ec681f3Smrg ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta)); 14597ec681f3Smrg } 14607ec681f3Smrg 14617ec681f3Smrg if (key->ts[s].colorop == D3DTOP_BUMPENVMAP || 14627ec681f3Smrg key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE) 14637ec681f3Smrg continue; 14647ec681f3Smrg 14657ec681f3Smrg dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT); 14667ec681f3Smrg 14677ec681f3Smrg if (ps.stage.index_pre_mod == ps.stage.index) { 14687ec681f3Smrg ps.rMod = ureg_DECL_temporary(ureg); 14697ec681f3Smrg ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc); 14707ec681f3Smrg } 14717ec681f3Smrg 14727ec681f3Smrg colorarg[0] = (key->ts[s].colorarg0 | (((key->colorarg_b4[0] >> s) & 0x1) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f; 14737ec681f3Smrg colorarg[1] = (key->ts[s].colorarg1 | (((key->colorarg_b4[1] >> s) & 0x1) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f; 14747ec681f3Smrg colorarg[2] = (key->ts[s].colorarg2 | (((key->colorarg_b4[2] >> s) & 0x1) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f; 14757ec681f3Smrg alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f; 14767ec681f3Smrg alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f; 14777ec681f3Smrg alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f; 14787ec681f3Smrg 14797ec681f3Smrg if (key->ts[s].colorop != key->ts[s].alphaop || 14807ec681f3Smrg colorarg[0] != alphaarg[0] || 14817ec681f3Smrg colorarg[1] != alphaarg[1] || 14827ec681f3Smrg colorarg[2] != alphaarg[2]) 14837ec681f3Smrg dst.WriteMask = TGSI_WRITEMASK_XYZ; 14847ec681f3Smrg 14857ec681f3Smrg /* Special DOTPRODUCT behaviour (see wine tests) */ 14867ec681f3Smrg if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3) 14877ec681f3Smrg dst.WriteMask = TGSI_WRITEMASK_XYZW; 14887ec681f3Smrg 14897ec681f3Smrg if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]); 14907ec681f3Smrg if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]); 14917ec681f3Smrg if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]); 14927ec681f3Smrg ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg); 14937ec681f3Smrg 14947ec681f3Smrg if (dst.WriteMask != TGSI_WRITEMASK_XYZW) { 14957ec681f3Smrg dst.WriteMask = TGSI_WRITEMASK_W; 14967ec681f3Smrg 14977ec681f3Smrg if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]); 14987ec681f3Smrg if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]); 14997ec681f3Smrg if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]); 15007ec681f3Smrg ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg); 15017ec681f3Smrg } 15027ec681f3Smrg } 15037ec681f3Smrg 15047ec681f3Smrg if (key->specular) 15057ec681f3Smrg ureg_ADD(ureg, ureg_writemask(ps.rCur, TGSI_WRITEMASK_XYZ), ps.rCurSrc, ps.vC[1]); 15067ec681f3Smrg 15077ec681f3Smrg /* Fog. 15087ec681f3Smrg */ 15097ec681f3Smrg if (key->fog_mode) { 15107ec681f3Smrg struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X); 15117ec681f3Smrg struct ureg_src vPos; 15127ec681f3Smrg if (device->screen->get_param(device->screen, 15137ec681f3Smrg PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) { 15147ec681f3Smrg vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); 15157ec681f3Smrg } else { 15167ec681f3Smrg vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, 15177ec681f3Smrg TGSI_INTERPOLATE_LINEAR); 15187ec681f3Smrg } 15197ec681f3Smrg 15207ec681f3Smrg /* Source is either W or Z. 15217ec681f3Smrg * When we use vs ff, 15227ec681f3Smrg * Z is when an orthogonal projection matrix is detected, 15237ec681f3Smrg * W (WFOG) else. 15247ec681f3Smrg * Z is used for programmable vs. 15257ec681f3Smrg * Note: Tests indicate that the projection matrix coefficients do 15267ec681f3Smrg * actually affect pixel fog (and not vertex fog) when vs ff is used, 15277ec681f3Smrg * which justifies taking the position's w instead of taking the z coordinate 15287ec681f3Smrg * before the projection in the vs shader. 15297ec681f3Smrg */ 15307ec681f3Smrg if (!key->fog_source) 15317ec681f3Smrg ureg_MOV(ureg, rFog, _ZZZZ(vPos)); 15327ec681f3Smrg else 15337ec681f3Smrg /* Position's w is 1/w */ 15347ec681f3Smrg ureg_RCP(ureg, rFog, _WWWW(vPos)); 15357ec681f3Smrg 15367ec681f3Smrg if (key->fog_mode == D3DFOG_EXP) { 15377ec681f3Smrg ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22))); 15387ec681f3Smrg ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f)); 15397ec681f3Smrg ureg_EX2(ureg, rFog, _X(rFog)); 15407ec681f3Smrg } else 15417ec681f3Smrg if (key->fog_mode == D3DFOG_EXP2) { 15427ec681f3Smrg ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22))); 15437ec681f3Smrg ureg_MUL(ureg, rFog, _X(rFog), _X(rFog)); 15447ec681f3Smrg ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f)); 15457ec681f3Smrg ureg_EX2(ureg, rFog, _X(rFog)); 15467ec681f3Smrg } else 15477ec681f3Smrg if (key->fog_mode == D3DFOG_LINEAR) { 15487ec681f3Smrg ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog))); 15497ec681f3Smrg ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22))); 15507ec681f3Smrg } 15517ec681f3Smrg ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21)); 15527ec681f3Smrg ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc); 15537ec681f3Smrg } else 15547ec681f3Smrg if (key->fog) { 15557ec681f3Smrg struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16, TGSI_INTERPOLATE_PERSPECTIVE); 15567ec681f3Smrg ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21)); 15577ec681f3Smrg ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc); 15587ec681f3Smrg } else { 15597ec681f3Smrg ureg_MOV(ureg, oCol, ps.rCurSrc); 15607ec681f3Smrg } 15617ec681f3Smrg 15627ec681f3Smrg ureg_END(ureg); 15637ec681f3Smrg nine_ureg_tgsi_dump(ureg, FALSE); 15647ec681f3Smrg return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL); 15657ec681f3Smrg} 15667ec681f3Smrg 15677ec681f3Smrgstatic struct NineVertexShader9 * 15687ec681f3Smrgnine_ff_get_vs(struct NineDevice9 *device) 15697ec681f3Smrg{ 15707ec681f3Smrg const struct nine_context *context = &device->context; 15717ec681f3Smrg struct NineVertexShader9 *vs; 15727ec681f3Smrg struct vs_build_ctx bld; 15737ec681f3Smrg struct nine_ff_vs_key key; 15747ec681f3Smrg unsigned s, i; 15757ec681f3Smrg boolean has_indexes = false; 15767ec681f3Smrg boolean has_weights = false; 15777ec681f3Smrg char input_texture_coord[8]; 15787ec681f3Smrg 15797ec681f3Smrg assert(sizeof(key) <= sizeof(key.value32)); 15807ec681f3Smrg 15817ec681f3Smrg memset(&key, 0, sizeof(key)); 15827ec681f3Smrg memset(&bld, 0, sizeof(bld)); 15837ec681f3Smrg memset(&input_texture_coord, 0, sizeof(input_texture_coord)); 15847ec681f3Smrg 15857ec681f3Smrg bld.key = &key; 15867ec681f3Smrg 15877ec681f3Smrg /* FIXME: this shouldn't be NULL, but it is on init */ 15887ec681f3Smrg if (context->vdecl) { 15897ec681f3Smrg key.color0in_one = 1; 15907ec681f3Smrg key.color1in_zero = 1; 15917ec681f3Smrg for (i = 0; i < context->vdecl->nelems; i++) { 15927ec681f3Smrg uint16_t usage = context->vdecl->usage_map[i]; 15937ec681f3Smrg if (usage == NINE_DECLUSAGE_POSITIONT) 15947ec681f3Smrg key.position_t = 1; 15957ec681f3Smrg else if (usage == NINE_DECLUSAGE_i(COLOR, 0)) 15967ec681f3Smrg key.color0in_one = 0; 15977ec681f3Smrg else if (usage == NINE_DECLUSAGE_i(COLOR, 1)) 15987ec681f3Smrg key.color1in_zero = 0; 15997ec681f3Smrg else if (usage == NINE_DECLUSAGE_i(BLENDINDICES, 0)) { 16007ec681f3Smrg has_indexes = true; 16017ec681f3Smrg key.passthrough |= 1 << usage; 16027ec681f3Smrg } else if (usage == NINE_DECLUSAGE_i(BLENDWEIGHT, 0)) { 16037ec681f3Smrg has_weights = true; 16047ec681f3Smrg key.passthrough |= 1 << usage; 16057ec681f3Smrg } else if (usage == NINE_DECLUSAGE_i(NORMAL, 0)) { 16067ec681f3Smrg key.has_normal = 1; 16077ec681f3Smrg key.passthrough |= 1 << usage; 16087ec681f3Smrg } else if (usage == NINE_DECLUSAGE_PSIZE) 16097ec681f3Smrg key.vertexpointsize = 1; 16107ec681f3Smrg else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) { 16117ec681f3Smrg s = usage / NINE_DECLUSAGE_COUNT; 16127ec681f3Smrg if (s < 8) 16137ec681f3Smrg input_texture_coord[s] = nine_decltype_get_dim(context->vdecl->decls[i].Type); 16147ec681f3Smrg else 16157ec681f3Smrg DBG("FF given texture coordinate >= 8. Ignoring\n"); 16167ec681f3Smrg } else if (usage < NINE_DECLUSAGE_NONE) 16177ec681f3Smrg key.passthrough |= 1 << usage; 16187ec681f3Smrg } 16197ec681f3Smrg } 16207ec681f3Smrg /* ff vs + ps 3.0: some elements are passed to the ps (wine test). 16217ec681f3Smrg * We do restrict to indices 0 */ 16227ec681f3Smrg key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) | 16237ec681f3Smrg (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) | 16247ec681f3Smrg (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE)); 16257ec681f3Smrg if (!key.position_t) 16267ec681f3Smrg key.passthrough = 0; 16277ec681f3Smrg key.pointscale = !!context->rs[D3DRS_POINTSCALEENABLE]; 16287ec681f3Smrg 16297ec681f3Smrg key.lighting = !!context->rs[D3DRS_LIGHTING] && context->ff.num_lights_active; 16307ec681f3Smrg key.darkness = !!context->rs[D3DRS_LIGHTING] && !context->ff.num_lights_active; 16317ec681f3Smrg if (key.position_t) { 16327ec681f3Smrg key.darkness = 0; /* |= key.lighting; */ /* XXX ? */ 16337ec681f3Smrg key.lighting = 0; 16347ec681f3Smrg } 16357ec681f3Smrg if ((key.lighting | key.darkness) && context->rs[D3DRS_COLORVERTEX]) { 16367ec681f3Smrg uint32_t mask = (key.color0in_one ? 0 : 1) | (key.color1in_zero ? 0 : 2); 16377ec681f3Smrg key.mtl_diffuse = context->rs[D3DRS_DIFFUSEMATERIALSOURCE] & mask; 16387ec681f3Smrg key.mtl_ambient = context->rs[D3DRS_AMBIENTMATERIALSOURCE] & mask; 16397ec681f3Smrg key.mtl_specular = context->rs[D3DRS_SPECULARMATERIALSOURCE] & mask; 16407ec681f3Smrg key.mtl_emissive = context->rs[D3DRS_EMISSIVEMATERIALSOURCE] & mask; 16417ec681f3Smrg } 16427ec681f3Smrg key.fog = !!context->rs[D3DRS_FOGENABLE]; 16437ec681f3Smrg key.fog_mode = (!key.position_t && context->rs[D3DRS_FOGENABLE]) ? context->rs[D3DRS_FOGVERTEXMODE] : 0; 16447ec681f3Smrg if (key.fog_mode) 16457ec681f3Smrg key.fog_range = context->rs[D3DRS_RANGEFOGENABLE]; 16467ec681f3Smrg 16477ec681f3Smrg key.localviewer = !!context->rs[D3DRS_LOCALVIEWER]; 16487ec681f3Smrg key.normalizenormals = !!context->rs[D3DRS_NORMALIZENORMALS]; 16497ec681f3Smrg key.ucp = !!context->rs[D3DRS_CLIPPLANEENABLE]; 16507ec681f3Smrg 16517ec681f3Smrg if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) { 16527ec681f3Smrg key.vertexblend_indexed = !!context->rs[D3DRS_INDEXEDVERTEXBLENDENABLE] && has_indexes; 16537ec681f3Smrg 16547ec681f3Smrg switch (context->rs[D3DRS_VERTEXBLEND]) { 16557ec681f3Smrg case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break; 16567ec681f3Smrg case D3DVBF_1WEIGHTS: key.vertexblend = 2; break; 16577ec681f3Smrg case D3DVBF_2WEIGHTS: key.vertexblend = 3; break; 16587ec681f3Smrg case D3DVBF_3WEIGHTS: key.vertexblend = 4; break; 16597ec681f3Smrg case D3DVBF_TWEENING: key.vertextween = 1; break; 16607ec681f3Smrg default: 16617ec681f3Smrg assert(!"invalid D3DVBF"); 16627ec681f3Smrg break; 16637ec681f3Smrg } 16647ec681f3Smrg if (!has_weights && context->rs[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS) 16657ec681f3Smrg key.vertexblend = 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */ 16667ec681f3Smrg } 16677ec681f3Smrg 16687ec681f3Smrg for (s = 0; s < 8; ++s) { 16697ec681f3Smrg unsigned gen = (context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1; 16707ec681f3Smrg unsigned idx = context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7; 16717ec681f3Smrg unsigned dim; 16727ec681f3Smrg 16737ec681f3Smrg if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU) 16747ec681f3Smrg gen = NINED3DTSS_TCI_PASSTHRU; 16757ec681f3Smrg 16767ec681f3Smrg if (!input_texture_coord[idx] && gen == NINED3DTSS_TCI_PASSTHRU) 16777ec681f3Smrg gen = NINED3DTSS_TCI_DISABLE; 16787ec681f3Smrg 16797ec681f3Smrg key.tc_gen |= gen << (s * 3); 16807ec681f3Smrg key.tc_idx |= idx << (s * 3); 16817ec681f3Smrg key.tc_dim_input |= ((input_texture_coord[idx]-1) & 0x3) << (s * 2); 16827ec681f3Smrg 16837ec681f3Smrg dim = context->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7; 16847ec681f3Smrg if (dim > 4) 16857ec681f3Smrg dim = input_texture_coord[idx]; 16867ec681f3Smrg if (dim == 1) /* NV behaviour */ 16877ec681f3Smrg dim = 0; 16887ec681f3Smrg key.tc_dim_output |= dim << (s * 3); 16897ec681f3Smrg } 16907ec681f3Smrg 16917ec681f3Smrg DBG("VS ff key hash: %x\n", nine_ff_vs_key_hash(&key)); 16927ec681f3Smrg vs = util_hash_table_get(device->ff.ht_vs, &key); 16937ec681f3Smrg if (vs) 16947ec681f3Smrg return vs; 16957ec681f3Smrg NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld)); 16967ec681f3Smrg 16977ec681f3Smrg nine_ff_prune_vs(device); 16987ec681f3Smrg if (vs) { 16997ec681f3Smrg unsigned n; 17007ec681f3Smrg 17017ec681f3Smrg memcpy(&vs->ff_key, &key, sizeof(vs->ff_key)); 17027ec681f3Smrg 17037ec681f3Smrg _mesa_hash_table_insert(device->ff.ht_vs, &vs->ff_key, vs); 17047ec681f3Smrg device->ff.num_vs++; 17057ec681f3Smrg 17067ec681f3Smrg vs->num_inputs = bld.num_inputs; 17077ec681f3Smrg for (n = 0; n < bld.num_inputs; ++n) 17087ec681f3Smrg vs->input_map[n].ndecl = bld.input[n]; 17097ec681f3Smrg 17107ec681f3Smrg vs->position_t = key.position_t; 17117ec681f3Smrg vs->point_size = key.vertexpointsize | key.pointscale; 17127ec681f3Smrg } 17137ec681f3Smrg return vs; 17147ec681f3Smrg} 17157ec681f3Smrg 17167ec681f3Smrg#define GET_D3DTS(n) nine_state_access_transform(&context->ff, D3DTS_##n, FALSE) 17177ec681f3Smrg#define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32))) 17187ec681f3Smrg 17197ec681f3Smrgstatic struct NinePixelShader9 * 17207ec681f3Smrgnine_ff_get_ps(struct NineDevice9 *device) 17217ec681f3Smrg{ 17227ec681f3Smrg struct nine_context *context = &device->context; 17237ec681f3Smrg D3DMATRIX *projection_matrix = GET_D3DTS(PROJECTION); 17247ec681f3Smrg struct NinePixelShader9 *ps; 17257ec681f3Smrg struct nine_ff_ps_key key; 17267ec681f3Smrg unsigned s; 17277ec681f3Smrg uint8_t sampler_mask = 0; 17287ec681f3Smrg 17297ec681f3Smrg assert(sizeof(key) <= sizeof(key.value32)); 17307ec681f3Smrg 17317ec681f3Smrg memset(&key, 0, sizeof(key)); 17327ec681f3Smrg for (s = 0; s < 8; ++s) { 17337ec681f3Smrg key.ts[s].colorop = context->ff.tex_stage[s][D3DTSS_COLOROP]; 17347ec681f3Smrg key.ts[s].alphaop = context->ff.tex_stage[s][D3DTSS_ALPHAOP]; 17357ec681f3Smrg const uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop); 17367ec681f3Smrg const uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop); 17377ec681f3Smrg /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages. 17387ec681f3Smrg * ALPHAOP cannot be enabled if COLOROP is disabled. 17397ec681f3Smrg * Verified on Windows. */ 17407ec681f3Smrg if (key.ts[s].colorop == D3DTOP_DISABLE) { 17417ec681f3Smrg key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */ 17427ec681f3Smrg break; 17437ec681f3Smrg } 17447ec681f3Smrg 17457ec681f3Smrg if (!context->texture[s].enabled && 17467ec681f3Smrg ((context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE && 17477ec681f3Smrg used_c & 0x1) || 17487ec681f3Smrg (context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE && 17497ec681f3Smrg used_c & 0x2) || 17507ec681f3Smrg (context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE && 17517ec681f3Smrg used_c & 0x4))) { 17527ec681f3Smrg /* Tested on Windows: Invalid texture read disables the stage 17537ec681f3Smrg * and the subsequent ones, but only for colorop. For alpha, 17547ec681f3Smrg * it's as if the texture had alpha of 1.0, which is what 17557ec681f3Smrg * has our dummy texture in that case. Invalid color also 17567ec681f3Smrg * disabled the following alpha stages. */ 17577ec681f3Smrg key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE; 17587ec681f3Smrg break; 17597ec681f3Smrg } 17607ec681f3Smrg 17617ec681f3Smrg if (context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE || 17627ec681f3Smrg context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE || 17637ec681f3Smrg context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE || 17647ec681f3Smrg context->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE || 17657ec681f3Smrg context->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE || 17667ec681f3Smrg context->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE) 17677ec681f3Smrg sampler_mask |= (1 << s); 17687ec681f3Smrg 17697ec681f3Smrg if (key.ts[s].colorop != D3DTOP_DISABLE) { 17707ec681f3Smrg if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0] & 0x7; 17717ec681f3Smrg if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1] & 0x7; 17727ec681f3Smrg if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2] & 0x7; 17737ec681f3Smrg if (used_c & 0x1) key.colorarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) & 0x1) << s; 17747ec681f3Smrg if (used_c & 0x1) key.colorarg_b5[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) & 0x1) << s; 17757ec681f3Smrg if (used_c & 0x2) key.colorarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) & 0x1) << s; 17767ec681f3Smrg if (used_c & 0x2) key.colorarg_b5[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) & 0x1) << s; 17777ec681f3Smrg if (used_c & 0x4) key.colorarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) & 0x1) << s; 17787ec681f3Smrg if (used_c & 0x4) key.colorarg_b5[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) & 0x1) << s; 17797ec681f3Smrg } 17807ec681f3Smrg if (key.ts[s].alphaop != D3DTOP_DISABLE) { 17817ec681f3Smrg if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0] & 0x7; 17827ec681f3Smrg if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1] & 0x7; 17837ec681f3Smrg if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2] & 0x7; 17847ec681f3Smrg if (used_a & 0x1) key.alphaarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) & 0x1) << s; 17857ec681f3Smrg if (used_a & 0x2) key.alphaarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) & 0x1) << s; 17867ec681f3Smrg if (used_a & 0x4) key.alphaarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) & 0x1) << s; 17877ec681f3Smrg } 17887ec681f3Smrg key.ts[s].resultarg = context->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP; 17897ec681f3Smrg 17907ec681f3Smrg if (context->texture[s].enabled) { 17917ec681f3Smrg switch (context->texture[s].type) { 17927ec681f3Smrg case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break; 17937ec681f3Smrg case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break; 17947ec681f3Smrg case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break; 17957ec681f3Smrg default: 17967ec681f3Smrg assert(!"unexpected texture type"); 17977ec681f3Smrg break; 17987ec681f3Smrg } 17997ec681f3Smrg } else { 18007ec681f3Smrg key.ts[s].textarget = 1; 18017ec681f3Smrg } 18027ec681f3Smrg } 18037ec681f3Smrg 18047ec681f3Smrg /* Note: If colorop is D3DTOP_DISABLE for the first stage 18057ec681f3Smrg * (which implies alphaop is too), nothing particular happens, 18067ec681f3Smrg * that is, current is equal to diffuse (which is the case anyway, 18077ec681f3Smrg * because it is how it is initialized). 18087ec681f3Smrg * Special case seems if alphaop is D3DTOP_DISABLE and not colorop, 18097ec681f3Smrg * because then if the resultarg is TEMP, then diffuse alpha is written 18107ec681f3Smrg * to it. */ 18117ec681f3Smrg if (key.ts[0].colorop != D3DTOP_DISABLE && 18127ec681f3Smrg key.ts[0].alphaop == D3DTOP_DISABLE && 18137ec681f3Smrg key.ts[0].resultarg != 0) { 18147ec681f3Smrg key.ts[0].alphaop = D3DTOP_SELECTARG1; 18157ec681f3Smrg key.ts[0].alphaarg1 = D3DTA_DIFFUSE; 18167ec681f3Smrg } 18177ec681f3Smrg /* When no alpha stage writes to current, diffuse alpha is taken. 18187ec681f3Smrg * Since we initialize current to diffuse, we have the behaviour. */ 18197ec681f3Smrg 18207ec681f3Smrg /* Last stage always writes to Current */ 18217ec681f3Smrg if (s >= 1) 18227ec681f3Smrg key.ts[s-1].resultarg = 0; 18237ec681f3Smrg 18247ec681f3Smrg key.projected = nine_ff_get_projected_key_ff(context); 18257ec681f3Smrg key.specular = !!context->rs[D3DRS_SPECULARENABLE]; 18267ec681f3Smrg 18277ec681f3Smrg for (; s < 8; ++s) 18287ec681f3Smrg key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE; 18297ec681f3Smrg if (context->rs[D3DRS_FOGENABLE]) 18307ec681f3Smrg key.fog_mode = context->rs[D3DRS_FOGTABLEMODE]; 18317ec681f3Smrg key.fog = !!context->rs[D3DRS_FOGENABLE]; 18327ec681f3Smrg /* Pixel fog (with WFOG advertised): source is either Z or W. 18337ec681f3Smrg * W is the source if vs ff is used, and the 18347ec681f3Smrg * projection matrix is not orthogonal. 18357ec681f3Smrg * Tests on Win 10 seem to indicate _34 18367ec681f3Smrg * and _33 are checked against 0, 1. */ 18377ec681f3Smrg if (key.fog_mode && key.fog) 18387ec681f3Smrg key.fog_source = !context->programmable_vs && 18397ec681f3Smrg !(projection_matrix->_34 == 0.0f && 18407ec681f3Smrg projection_matrix->_44 == 1.0f); 18417ec681f3Smrg 18427ec681f3Smrg DBG("PS ff key hash: %x\n", nine_ff_ps_key_hash(&key)); 18437ec681f3Smrg ps = util_hash_table_get(device->ff.ht_ps, &key); 18447ec681f3Smrg if (ps) 18457ec681f3Smrg return ps; 18467ec681f3Smrg NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key)); 18477ec681f3Smrg 18487ec681f3Smrg nine_ff_prune_ps(device); 18497ec681f3Smrg if (ps) { 18507ec681f3Smrg memcpy(&ps->ff_key, &key, sizeof(ps->ff_key)); 18517ec681f3Smrg 18527ec681f3Smrg _mesa_hash_table_insert(device->ff.ht_ps, &ps->ff_key, ps); 18537ec681f3Smrg device->ff.num_ps++; 18547ec681f3Smrg 18557ec681f3Smrg ps->rt_mask = 0x1; 18567ec681f3Smrg ps->sampler_mask = sampler_mask; 18577ec681f3Smrg } 18587ec681f3Smrg return ps; 18597ec681f3Smrg} 18607ec681f3Smrg 18617ec681f3Smrgstatic void 18627ec681f3Smrgnine_ff_load_vs_transforms(struct NineDevice9 *device) 18637ec681f3Smrg{ 18647ec681f3Smrg struct nine_context *context = &device->context; 18657ec681f3Smrg D3DMATRIX T; 18667ec681f3Smrg D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const; 18677ec681f3Smrg unsigned i; 18687ec681f3Smrg 18697ec681f3Smrg /* TODO: make this nicer, and only upload the ones we need */ 18707ec681f3Smrg /* TODO: use ff.vs_const as storage of W, V, P matrices */ 18717ec681f3Smrg 18727ec681f3Smrg if (IS_D3DTS_DIRTY(context, WORLD) || 18737ec681f3Smrg IS_D3DTS_DIRTY(context, VIEW) || 18747ec681f3Smrg IS_D3DTS_DIRTY(context, PROJECTION)) { 18757ec681f3Smrg /* WVP, WV matrices */ 18767ec681f3Smrg nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW)); 18777ec681f3Smrg nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION)); 18787ec681f3Smrg 18797ec681f3Smrg /* normal matrix == transpose(inverse(WV)) */ 18807ec681f3Smrg nine_d3d_matrix_inverse(&T, &M[1]); 18817ec681f3Smrg nine_d3d_matrix_transpose(&M[4], &T); 18827ec681f3Smrg 18837ec681f3Smrg /* P matrix */ 18847ec681f3Smrg M[2] = *GET_D3DTS(PROJECTION); 18857ec681f3Smrg 18867ec681f3Smrg /* V and W matrix */ 18877ec681f3Smrg nine_d3d_matrix_inverse(&M[3], GET_D3DTS(VIEW)); 18887ec681f3Smrg M[40] = M[1]; 18897ec681f3Smrg } 18907ec681f3Smrg 18917ec681f3Smrg if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) { 18927ec681f3Smrg /* load other world matrices */ 18937ec681f3Smrg for (i = 1; i <= 8; ++i) { 18947ec681f3Smrg nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW)); 18957ec681f3Smrg } 18967ec681f3Smrg } 18977ec681f3Smrg 18987ec681f3Smrg device->ff.vs_const[30 * 4] = asfloat(context->rs[D3DRS_TWEENFACTOR]); 18997ec681f3Smrg} 19007ec681f3Smrg 19017ec681f3Smrgstatic void 19027ec681f3Smrgnine_ff_load_lights(struct NineDevice9 *device) 19037ec681f3Smrg{ 19047ec681f3Smrg struct nine_context *context = &device->context; 19057ec681f3Smrg struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 19067ec681f3Smrg unsigned l; 19077ec681f3Smrg 19087ec681f3Smrg if (context->changed.group & NINE_STATE_FF_MATERIAL) { 19097ec681f3Smrg const D3DMATERIAL9 *mtl = &context->ff.material; 19107ec681f3Smrg 19117ec681f3Smrg memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float)); 19127ec681f3Smrg memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float)); 19137ec681f3Smrg memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float)); 19147ec681f3Smrg dst[23].x = mtl->Power; 19157ec681f3Smrg memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float)); 19167ec681f3Smrg d3dcolor_to_rgba(&dst[25].x, context->rs[D3DRS_AMBIENT]); 19177ec681f3Smrg dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r; 19187ec681f3Smrg dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g; 19197ec681f3Smrg dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b; 19207ec681f3Smrg } 19217ec681f3Smrg 19227ec681f3Smrg if (!(context->changed.group & NINE_STATE_FF_LIGHTING)) 19237ec681f3Smrg return; 19247ec681f3Smrg 19257ec681f3Smrg for (l = 0; l < context->ff.num_lights_active; ++l) { 19267ec681f3Smrg const D3DLIGHT9 *light = &context->ff.light[context->ff.active_light[l]]; 19277ec681f3Smrg 19287ec681f3Smrg dst[32 + l * 8].x = light->Type; 19297ec681f3Smrg dst[32 + l * 8].y = light->Attenuation0; 19307ec681f3Smrg dst[32 + l * 8].z = light->Attenuation1; 19317ec681f3Smrg dst[32 + l * 8].w = light->Attenuation2; 19327ec681f3Smrg memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse)); 19337ec681f3Smrg memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular)); 19347ec681f3Smrg memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient)); 19357ec681f3Smrg nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW)); 19367ec681f3Smrg nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW)); 19377ec681f3Smrg dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range; 19387ec681f3Smrg dst[37 + l * 8].w = light->Falloff; 19397ec681f3Smrg dst[38 + l * 8].x = cosf(light->Theta * 0.5f); 19407ec681f3Smrg dst[38 + l * 8].y = cosf(light->Phi * 0.5f); 19417ec681f3Smrg dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y); 19427ec681f3Smrg dst[39 + l * 8].w = (float)((l + 1) == context->ff.num_lights_active); 19437ec681f3Smrg } 19447ec681f3Smrg} 19457ec681f3Smrg 19467ec681f3Smrgstatic void 19477ec681f3Smrgnine_ff_load_point_and_fog_params(struct NineDevice9 *device) 19487ec681f3Smrg{ 19497ec681f3Smrg struct nine_context *context = &device->context; 19507ec681f3Smrg struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 19517ec681f3Smrg 19527ec681f3Smrg if (!(context->changed.group & NINE_STATE_FF_VS_OTHER)) 19537ec681f3Smrg return; 19547ec681f3Smrg dst[26].x = asfloat(context->rs[D3DRS_POINTSIZE_MIN]); 19557ec681f3Smrg dst[26].y = asfloat(context->rs[D3DRS_POINTSIZE_MAX]); 19567ec681f3Smrg dst[26].z = asfloat(context->rs[D3DRS_POINTSIZE]); 19577ec681f3Smrg dst[26].w = asfloat(context->rs[D3DRS_POINTSCALE_A]); 19587ec681f3Smrg dst[27].x = asfloat(context->rs[D3DRS_POINTSCALE_B]); 19597ec681f3Smrg dst[27].y = asfloat(context->rs[D3DRS_POINTSCALE_C]); 19607ec681f3Smrg dst[28].x = asfloat(context->rs[D3DRS_FOGEND]); 19617ec681f3Smrg dst[28].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART])); 19627ec681f3Smrg if (isinf(dst[28].y)) 19637ec681f3Smrg dst[28].y = 0.0f; 19647ec681f3Smrg dst[28].z = asfloat(context->rs[D3DRS_FOGDENSITY]); 19657ec681f3Smrg} 19667ec681f3Smrg 19677ec681f3Smrgstatic void 19687ec681f3Smrgnine_ff_load_tex_matrices(struct NineDevice9 *device) 19697ec681f3Smrg{ 19707ec681f3Smrg struct nine_context *context = &device->context; 19717ec681f3Smrg D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const; 19727ec681f3Smrg unsigned s; 19737ec681f3Smrg 19747ec681f3Smrg if (!(context->ff.changed.transform[0] & 0xff0000)) 19757ec681f3Smrg return; 19767ec681f3Smrg for (s = 0; s < 8; ++s) { 19777ec681f3Smrg if (IS_D3DTS_DIRTY(context, TEXTURE0 + s)) 19787ec681f3Smrg nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(&context->ff, D3DTS_TEXTURE0 + s, FALSE)); 19797ec681f3Smrg } 19807ec681f3Smrg} 19817ec681f3Smrg 19827ec681f3Smrgstatic void 19837ec681f3Smrgnine_ff_load_ps_params(struct NineDevice9 *device) 19847ec681f3Smrg{ 19857ec681f3Smrg struct nine_context *context = &device->context; 19867ec681f3Smrg struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const; 19877ec681f3Smrg unsigned s; 19887ec681f3Smrg 19897ec681f3Smrg if (!(context->changed.group & NINE_STATE_FF_PS_CONSTS)) 19907ec681f3Smrg return; 19917ec681f3Smrg 19927ec681f3Smrg for (s = 0; s < 8; ++s) 19937ec681f3Smrg d3dcolor_to_rgba(&dst[s].x, context->ff.tex_stage[s][D3DTSS_CONSTANT]); 19947ec681f3Smrg 19957ec681f3Smrg for (s = 0; s < 8; ++s) { 19967ec681f3Smrg dst[8 + s].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]); 19977ec681f3Smrg dst[8 + s].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]); 19987ec681f3Smrg dst[8 + s].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]); 19997ec681f3Smrg dst[8 + s].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]); 20007ec681f3Smrg if (s & 1) { 20017ec681f3Smrg dst[16 + s / 2].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]); 20027ec681f3Smrg dst[16 + s / 2].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]); 20037ec681f3Smrg } else { 20047ec681f3Smrg dst[16 + s / 2].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]); 20057ec681f3Smrg dst[16 + s / 2].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]); 20067ec681f3Smrg } 20077ec681f3Smrg } 20087ec681f3Smrg 20097ec681f3Smrg d3dcolor_to_rgba(&dst[20].x, context->rs[D3DRS_TEXTUREFACTOR]); 20107ec681f3Smrg d3dcolor_to_rgba(&dst[21].x, context->rs[D3DRS_FOGCOLOR]); 20117ec681f3Smrg dst[22].x = asfloat(context->rs[D3DRS_FOGEND]); 20127ec681f3Smrg dst[22].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART])); 20137ec681f3Smrg dst[22].z = asfloat(context->rs[D3DRS_FOGDENSITY]); 20147ec681f3Smrg} 20157ec681f3Smrg 20167ec681f3Smrgstatic void 20177ec681f3Smrgnine_ff_load_viewport_info(struct NineDevice9 *device) 20187ec681f3Smrg{ 20197ec681f3Smrg D3DVIEWPORT9 *viewport = &device->context.viewport; 20207ec681f3Smrg struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 20217ec681f3Smrg float diffZ = viewport->MaxZ - viewport->MinZ; 20227ec681f3Smrg 20237ec681f3Smrg /* Note: the other functions avoids to fill the const again if nothing changed. 20247ec681f3Smrg * But we don't have much to fill, and adding code to allow that may be complex 20257ec681f3Smrg * so just fill it always */ 20267ec681f3Smrg dst[100].x = 2.0f / (float)(viewport->Width); 20277ec681f3Smrg dst[100].y = 2.0f / (float)(viewport->Height); 20287ec681f3Smrg dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ); 20297ec681f3Smrg dst[100].w = (float)(viewport->Width); 20307ec681f3Smrg dst[101].x = (float)(viewport->X); 20317ec681f3Smrg dst[101].y = (float)(viewport->Y); 20327ec681f3Smrg dst[101].z = (float)(viewport->MinZ); 20337ec681f3Smrg} 20347ec681f3Smrg 20357ec681f3Smrgvoid 20367ec681f3Smrgnine_ff_update(struct NineDevice9 *device) 20377ec681f3Smrg{ 20387ec681f3Smrg struct nine_context *context = &device->context; 20397ec681f3Smrg struct pipe_constant_buffer cb; 20407ec681f3Smrg 20417ec681f3Smrg DBG("vs=%p ps=%p\n", context->vs, context->ps); 20427ec681f3Smrg 20437ec681f3Smrg /* NOTE: the only reference belongs to the hash table */ 20447ec681f3Smrg if (!context->programmable_vs) { 20457ec681f3Smrg device->ff.vs = nine_ff_get_vs(device); 20467ec681f3Smrg context->changed.group |= NINE_STATE_VS; 20477ec681f3Smrg } 20487ec681f3Smrg if (!context->ps) { 20497ec681f3Smrg device->ff.ps = nine_ff_get_ps(device); 20507ec681f3Smrg context->changed.group |= NINE_STATE_PS; 20517ec681f3Smrg } 20527ec681f3Smrg 20537ec681f3Smrg if (!context->programmable_vs) { 20547ec681f3Smrg nine_ff_load_vs_transforms(device); 20557ec681f3Smrg nine_ff_load_tex_matrices(device); 20567ec681f3Smrg nine_ff_load_lights(device); 20577ec681f3Smrg nine_ff_load_point_and_fog_params(device); 20587ec681f3Smrg nine_ff_load_viewport_info(device); 20597ec681f3Smrg 20607ec681f3Smrg memset(context->ff.changed.transform, 0, sizeof(context->ff.changed.transform)); 20617ec681f3Smrg 20627ec681f3Smrg cb.buffer_offset = 0; 20637ec681f3Smrg cb.buffer = NULL; 20647ec681f3Smrg cb.user_buffer = device->ff.vs_const; 20657ec681f3Smrg cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float); 20667ec681f3Smrg 20677ec681f3Smrg context->pipe_data.cb_vs_ff = cb; 20687ec681f3Smrg context->commit |= NINE_STATE_COMMIT_CONST_VS; 20697ec681f3Smrg 20707ec681f3Smrg context->changed.group &= ~NINE_STATE_FF_VS; 20717ec681f3Smrg } 20727ec681f3Smrg 20737ec681f3Smrg if (!context->ps) { 20747ec681f3Smrg nine_ff_load_ps_params(device); 20757ec681f3Smrg 20767ec681f3Smrg cb.buffer_offset = 0; 20777ec681f3Smrg cb.buffer = NULL; 20787ec681f3Smrg cb.user_buffer = device->ff.ps_const; 20797ec681f3Smrg cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float); 20807ec681f3Smrg 20817ec681f3Smrg context->pipe_data.cb_ps_ff = cb; 20827ec681f3Smrg context->commit |= NINE_STATE_COMMIT_CONST_PS; 20837ec681f3Smrg 20847ec681f3Smrg context->changed.group &= ~NINE_STATE_FF_PS; 20857ec681f3Smrg } 20867ec681f3Smrg} 20877ec681f3Smrg 20887ec681f3Smrg 20897ec681f3Smrgboolean 20907ec681f3Smrgnine_ff_init(struct NineDevice9 *device) 20917ec681f3Smrg{ 20927ec681f3Smrg device->ff.ht_vs = _mesa_hash_table_create(NULL, nine_ff_vs_key_hash, 20937ec681f3Smrg nine_ff_vs_key_comp); 20947ec681f3Smrg device->ff.ht_ps = _mesa_hash_table_create(NULL, nine_ff_ps_key_hash, 20957ec681f3Smrg nine_ff_ps_key_comp); 20967ec681f3Smrg 20977ec681f3Smrg device->ff.ht_fvf = _mesa_hash_table_create(NULL, nine_ff_fvf_key_hash, 20987ec681f3Smrg nine_ff_fvf_key_comp); 20997ec681f3Smrg 21007ec681f3Smrg device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float)); 21017ec681f3Smrg device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float)); 21027ec681f3Smrg 21037ec681f3Smrg return device->ff.ht_vs && device->ff.ht_ps && 21047ec681f3Smrg device->ff.ht_fvf && 21057ec681f3Smrg device->ff.vs_const && device->ff.ps_const; 21067ec681f3Smrg} 21077ec681f3Smrg 21087ec681f3Smrgstatic enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data) 21097ec681f3Smrg{ 21107ec681f3Smrg NineUnknown_Unbind(NineUnknown(value)); 21117ec681f3Smrg return PIPE_OK; 21127ec681f3Smrg} 21137ec681f3Smrg 21147ec681f3Smrgvoid 21157ec681f3Smrgnine_ff_fini(struct NineDevice9 *device) 21167ec681f3Smrg{ 21177ec681f3Smrg if (device->ff.ht_vs) { 21187ec681f3Smrg util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL); 21197ec681f3Smrg _mesa_hash_table_destroy(device->ff.ht_vs, NULL); 21207ec681f3Smrg } 21217ec681f3Smrg if (device->ff.ht_ps) { 21227ec681f3Smrg util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL); 21237ec681f3Smrg _mesa_hash_table_destroy(device->ff.ht_ps, NULL); 21247ec681f3Smrg } 21257ec681f3Smrg if (device->ff.ht_fvf) { 21267ec681f3Smrg util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL); 21277ec681f3Smrg _mesa_hash_table_destroy(device->ff.ht_fvf, NULL); 21287ec681f3Smrg } 21297ec681f3Smrg device->ff.vs = NULL; /* destroyed by unbinding from hash table */ 21307ec681f3Smrg device->ff.ps = NULL; 21317ec681f3Smrg 21327ec681f3Smrg FREE(device->ff.vs_const); 21337ec681f3Smrg FREE(device->ff.ps_const); 21347ec681f3Smrg} 21357ec681f3Smrg 21367ec681f3Smrgstatic void 21377ec681f3Smrgnine_ff_prune_vs(struct NineDevice9 *device) 21387ec681f3Smrg{ 21397ec681f3Smrg struct nine_context *context = &device->context; 21407ec681f3Smrg 21417ec681f3Smrg if (device->ff.num_vs > 1024) { 21427ec681f3Smrg /* could destroy the bound one here, so unbind */ 21437ec681f3Smrg context->pipe->bind_vs_state(context->pipe, NULL); 21447ec681f3Smrg util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL); 21457ec681f3Smrg _mesa_hash_table_clear(device->ff.ht_vs, NULL); 21467ec681f3Smrg device->ff.num_vs = 0; 21477ec681f3Smrg context->changed.group |= NINE_STATE_VS; 21487ec681f3Smrg } 21497ec681f3Smrg} 21507ec681f3Smrgstatic void 21517ec681f3Smrgnine_ff_prune_ps(struct NineDevice9 *device) 21527ec681f3Smrg{ 21537ec681f3Smrg struct nine_context *context = &device->context; 21547ec681f3Smrg 21557ec681f3Smrg if (device->ff.num_ps > 1024) { 21567ec681f3Smrg /* could destroy the bound one here, so unbind */ 21577ec681f3Smrg context->pipe->bind_fs_state(context->pipe, NULL); 21587ec681f3Smrg util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL); 21597ec681f3Smrg _mesa_hash_table_clear(device->ff.ht_ps, NULL); 21607ec681f3Smrg device->ff.num_ps = 0; 21617ec681f3Smrg context->changed.group |= NINE_STATE_PS; 21627ec681f3Smrg } 21637ec681f3Smrg} 21647ec681f3Smrg 21657ec681f3Smrg/* ========================================================================== */ 21667ec681f3Smrg 21677ec681f3Smrg/* Matrix multiplication: 21687ec681f3Smrg * 21697ec681f3Smrg * in memory: 0 1 2 3 (row major) 21707ec681f3Smrg * 4 5 6 7 21717ec681f3Smrg * 8 9 a b 21727ec681f3Smrg * c d e f 21737ec681f3Smrg * 21747ec681f3Smrg * cA cB cC cD 21757ec681f3Smrg * r0 = (r0 * cA) (r0 * cB) . . 21767ec681f3Smrg * r1 = (r1 * cA) (r1 * cB) 21777ec681f3Smrg * r2 = (r2 * cA) . 21787ec681f3Smrg * r3 = (r3 * cA) . 21797ec681f3Smrg * 21807ec681f3Smrg * r: (11) (12) (13) (14) 21817ec681f3Smrg * (21) (22) (23) (24) 21827ec681f3Smrg * (31) (32) (33) (34) 21837ec681f3Smrg * (41) (42) (43) (44) 21847ec681f3Smrg * l: (11 12 13 14) 21857ec681f3Smrg * (21 22 23 24) 21867ec681f3Smrg * (31 32 33 34) 21877ec681f3Smrg * (41 42 43 44) 21887ec681f3Smrg * 21897ec681f3Smrg * v: (x y z 1 ) 21907ec681f3Smrg * 21917ec681f3Smrg * t.xyzw = MUL(v.xxxx, r[0]); 21927ec681f3Smrg * t.xyzw = MAD(v.yyyy, r[1], t.xyzw); 21937ec681f3Smrg * t.xyzw = MAD(v.zzzz, r[2], t.xyzw); 21947ec681f3Smrg * v.xyzw = MAD(v.wwww, r[3], t.xyzw); 21957ec681f3Smrg * 21967ec681f3Smrg * v.x = DP4(v, c[0]); 21977ec681f3Smrg * v.y = DP4(v, c[1]); 21987ec681f3Smrg * v.z = DP4(v, c[2]); 21997ec681f3Smrg * v.w = DP4(v, c[3]) = 1 22007ec681f3Smrg */ 22017ec681f3Smrg 22027ec681f3Smrg/* 22037ec681f3Smrgstatic void 22047ec681f3Smrgnine_D3DMATRIX_print(const D3DMATRIX *M) 22057ec681f3Smrg{ 22067ec681f3Smrg DBG("\n(%f %f %f %f)\n" 22077ec681f3Smrg "(%f %f %f %f)\n" 22087ec681f3Smrg "(%f %f %f %f)\n" 22097ec681f3Smrg "(%f %f %f %f)\n", 22107ec681f3Smrg M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3], 22117ec681f3Smrg M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3], 22127ec681f3Smrg M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3], 22137ec681f3Smrg M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]); 22147ec681f3Smrg} 22157ec681f3Smrg*/ 22167ec681f3Smrg 22177ec681f3Smrgstatic inline float 22187ec681f3Smrgnine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c) 22197ec681f3Smrg{ 22207ec681f3Smrg return A->m[r][0] * B->m[0][c] + 22217ec681f3Smrg A->m[r][1] * B->m[1][c] + 22227ec681f3Smrg A->m[r][2] * B->m[2][c] + 22237ec681f3Smrg A->m[r][3] * B->m[3][c]; 22247ec681f3Smrg} 22257ec681f3Smrg 22267ec681f3Smrgstatic inline float 22277ec681f3Smrgnine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c) 22287ec681f3Smrg{ 22297ec681f3Smrg return v->x * M->m[0][c] + 22307ec681f3Smrg v->y * M->m[1][c] + 22317ec681f3Smrg v->z * M->m[2][c] + 22327ec681f3Smrg 1.0f * M->m[3][c]; 22337ec681f3Smrg} 22347ec681f3Smrg 22357ec681f3Smrgstatic inline float 22367ec681f3Smrgnine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c) 22377ec681f3Smrg{ 22387ec681f3Smrg return v->x * M->m[0][c] + 22397ec681f3Smrg v->y * M->m[1][c] + 22407ec681f3Smrg v->z * M->m[2][c]; 22417ec681f3Smrg} 22427ec681f3Smrg 22437ec681f3Smrgvoid 22447ec681f3Smrgnine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R) 22457ec681f3Smrg{ 22467ec681f3Smrg D->_11 = nine_DP4_row_col(L, 0, R, 0); 22477ec681f3Smrg D->_12 = nine_DP4_row_col(L, 0, R, 1); 22487ec681f3Smrg D->_13 = nine_DP4_row_col(L, 0, R, 2); 22497ec681f3Smrg D->_14 = nine_DP4_row_col(L, 0, R, 3); 22507ec681f3Smrg 22517ec681f3Smrg D->_21 = nine_DP4_row_col(L, 1, R, 0); 22527ec681f3Smrg D->_22 = nine_DP4_row_col(L, 1, R, 1); 22537ec681f3Smrg D->_23 = nine_DP4_row_col(L, 1, R, 2); 22547ec681f3Smrg D->_24 = nine_DP4_row_col(L, 1, R, 3); 22557ec681f3Smrg 22567ec681f3Smrg D->_31 = nine_DP4_row_col(L, 2, R, 0); 22577ec681f3Smrg D->_32 = nine_DP4_row_col(L, 2, R, 1); 22587ec681f3Smrg D->_33 = nine_DP4_row_col(L, 2, R, 2); 22597ec681f3Smrg D->_34 = nine_DP4_row_col(L, 2, R, 3); 22607ec681f3Smrg 22617ec681f3Smrg D->_41 = nine_DP4_row_col(L, 3, R, 0); 22627ec681f3Smrg D->_42 = nine_DP4_row_col(L, 3, R, 1); 22637ec681f3Smrg D->_43 = nine_DP4_row_col(L, 3, R, 2); 22647ec681f3Smrg D->_44 = nine_DP4_row_col(L, 3, R, 3); 22657ec681f3Smrg} 22667ec681f3Smrg 22677ec681f3Smrgvoid 22687ec681f3Smrgnine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M) 22697ec681f3Smrg{ 22707ec681f3Smrg d->x = nine_DP4_vec_col(v, M, 0); 22717ec681f3Smrg d->y = nine_DP4_vec_col(v, M, 1); 22727ec681f3Smrg d->z = nine_DP4_vec_col(v, M, 2); 22737ec681f3Smrg} 22747ec681f3Smrg 22757ec681f3Smrgvoid 22767ec681f3Smrgnine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M) 22777ec681f3Smrg{ 22787ec681f3Smrg d->x = nine_DP3_vec_col(v, M, 0); 22797ec681f3Smrg d->y = nine_DP3_vec_col(v, M, 1); 22807ec681f3Smrg d->z = nine_DP3_vec_col(v, M, 2); 22817ec681f3Smrg} 22827ec681f3Smrg 22837ec681f3Smrgvoid 22847ec681f3Smrgnine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M) 22857ec681f3Smrg{ 22867ec681f3Smrg unsigned i, j; 22877ec681f3Smrg for (i = 0; i < 4; ++i) 22887ec681f3Smrg for (j = 0; j < 4; ++j) 22897ec681f3Smrg D->m[i][j] = M->m[j][i]; 22907ec681f3Smrg} 22917ec681f3Smrg 22927ec681f3Smrg#define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \ 22937ec681f3Smrg float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \ 22947ec681f3Smrg if (t > 0.0f) pos += t; else neg += t; } while(0) 22957ec681f3Smrg 22967ec681f3Smrg#define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \ 22977ec681f3Smrg float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \ 22987ec681f3Smrg if (t > 0.0f) neg -= t; else pos -= t; } while(0) 22997ec681f3Smrgfloat 23007ec681f3Smrgnine_d3d_matrix_det(const D3DMATRIX *M) 23017ec681f3Smrg{ 23027ec681f3Smrg float pos = 0.0f; 23037ec681f3Smrg float neg = 0.0f; 23047ec681f3Smrg 23057ec681f3Smrg _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4); 23067ec681f3Smrg _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2); 23077ec681f3Smrg _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3); 23087ec681f3Smrg 23097ec681f3Smrg _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3); 23107ec681f3Smrg _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4); 23117ec681f3Smrg _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1); 23127ec681f3Smrg 23137ec681f3Smrg _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4); 23147ec681f3Smrg _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1); 23157ec681f3Smrg _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2); 23167ec681f3Smrg 23177ec681f3Smrg _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2); 23187ec681f3Smrg _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3); 23197ec681f3Smrg _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1); 23207ec681f3Smrg 23217ec681f3Smrg _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3); 23227ec681f3Smrg _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4); 23237ec681f3Smrg _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2); 23247ec681f3Smrg 23257ec681f3Smrg _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4); 23267ec681f3Smrg _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1); 23277ec681f3Smrg _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3); 23287ec681f3Smrg 23297ec681f3Smrg _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2); 23307ec681f3Smrg _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4); 23317ec681f3Smrg _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1); 23327ec681f3Smrg 23337ec681f3Smrg _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3); 23347ec681f3Smrg _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1); 23357ec681f3Smrg _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2); 23367ec681f3Smrg 23377ec681f3Smrg return pos + neg; 23387ec681f3Smrg} 23397ec681f3Smrg 23407ec681f3Smrg/* XXX: Probably better to just use src/mesa/math/m_matrix.c because 23417ec681f3Smrg * I have no idea where this code came from. 23427ec681f3Smrg */ 23437ec681f3Smrgvoid 23447ec681f3Smrgnine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M) 23457ec681f3Smrg{ 23467ec681f3Smrg int i, k; 23477ec681f3Smrg float det; 23487ec681f3Smrg 23497ec681f3Smrg D->m[0][0] = 23507ec681f3Smrg M->m[1][1] * M->m[2][2] * M->m[3][3] - 23517ec681f3Smrg M->m[1][1] * M->m[3][2] * M->m[2][3] - 23527ec681f3Smrg M->m[1][2] * M->m[2][1] * M->m[3][3] + 23537ec681f3Smrg M->m[1][2] * M->m[3][1] * M->m[2][3] + 23547ec681f3Smrg M->m[1][3] * M->m[2][1] * M->m[3][2] - 23557ec681f3Smrg M->m[1][3] * M->m[3][1] * M->m[2][2]; 23567ec681f3Smrg 23577ec681f3Smrg D->m[0][1] = 23587ec681f3Smrg -M->m[0][1] * M->m[2][2] * M->m[3][3] + 23597ec681f3Smrg M->m[0][1] * M->m[3][2] * M->m[2][3] + 23607ec681f3Smrg M->m[0][2] * M->m[2][1] * M->m[3][3] - 23617ec681f3Smrg M->m[0][2] * M->m[3][1] * M->m[2][3] - 23627ec681f3Smrg M->m[0][3] * M->m[2][1] * M->m[3][2] + 23637ec681f3Smrg M->m[0][3] * M->m[3][1] * M->m[2][2]; 23647ec681f3Smrg 23657ec681f3Smrg D->m[0][2] = 23667ec681f3Smrg M->m[0][1] * M->m[1][2] * M->m[3][3] - 23677ec681f3Smrg M->m[0][1] * M->m[3][2] * M->m[1][3] - 23687ec681f3Smrg M->m[0][2] * M->m[1][1] * M->m[3][3] + 23697ec681f3Smrg M->m[0][2] * M->m[3][1] * M->m[1][3] + 23707ec681f3Smrg M->m[0][3] * M->m[1][1] * M->m[3][2] - 23717ec681f3Smrg M->m[0][3] * M->m[3][1] * M->m[1][2]; 23727ec681f3Smrg 23737ec681f3Smrg D->m[0][3] = 23747ec681f3Smrg -M->m[0][1] * M->m[1][2] * M->m[2][3] + 23757ec681f3Smrg M->m[0][1] * M->m[2][2] * M->m[1][3] + 23767ec681f3Smrg M->m[0][2] * M->m[1][1] * M->m[2][3] - 23777ec681f3Smrg M->m[0][2] * M->m[2][1] * M->m[1][3] - 23787ec681f3Smrg M->m[0][3] * M->m[1][1] * M->m[2][2] + 23797ec681f3Smrg M->m[0][3] * M->m[2][1] * M->m[1][2]; 23807ec681f3Smrg 23817ec681f3Smrg D->m[1][0] = 23827ec681f3Smrg -M->m[1][0] * M->m[2][2] * M->m[3][3] + 23837ec681f3Smrg M->m[1][0] * M->m[3][2] * M->m[2][3] + 23847ec681f3Smrg M->m[1][2] * M->m[2][0] * M->m[3][3] - 23857ec681f3Smrg M->m[1][2] * M->m[3][0] * M->m[2][3] - 23867ec681f3Smrg M->m[1][3] * M->m[2][0] * M->m[3][2] + 23877ec681f3Smrg M->m[1][3] * M->m[3][0] * M->m[2][2]; 23887ec681f3Smrg 23897ec681f3Smrg D->m[1][1] = 23907ec681f3Smrg M->m[0][0] * M->m[2][2] * M->m[3][3] - 23917ec681f3Smrg M->m[0][0] * M->m[3][2] * M->m[2][3] - 23927ec681f3Smrg M->m[0][2] * M->m[2][0] * M->m[3][3] + 23937ec681f3Smrg M->m[0][2] * M->m[3][0] * M->m[2][3] + 23947ec681f3Smrg M->m[0][3] * M->m[2][0] * M->m[3][2] - 23957ec681f3Smrg M->m[0][3] * M->m[3][0] * M->m[2][2]; 23967ec681f3Smrg 23977ec681f3Smrg D->m[1][2] = 23987ec681f3Smrg -M->m[0][0] * M->m[1][2] * M->m[3][3] + 23997ec681f3Smrg M->m[0][0] * M->m[3][2] * M->m[1][3] + 24007ec681f3Smrg M->m[0][2] * M->m[1][0] * M->m[3][3] - 24017ec681f3Smrg M->m[0][2] * M->m[3][0] * M->m[1][3] - 24027ec681f3Smrg M->m[0][3] * M->m[1][0] * M->m[3][2] + 24037ec681f3Smrg M->m[0][3] * M->m[3][0] * M->m[1][2]; 24047ec681f3Smrg 24057ec681f3Smrg D->m[1][3] = 24067ec681f3Smrg M->m[0][0] * M->m[1][2] * M->m[2][3] - 24077ec681f3Smrg M->m[0][0] * M->m[2][2] * M->m[1][3] - 24087ec681f3Smrg M->m[0][2] * M->m[1][0] * M->m[2][3] + 24097ec681f3Smrg M->m[0][2] * M->m[2][0] * M->m[1][3] + 24107ec681f3Smrg M->m[0][3] * M->m[1][0] * M->m[2][2] - 24117ec681f3Smrg M->m[0][3] * M->m[2][0] * M->m[1][2]; 24127ec681f3Smrg 24137ec681f3Smrg D->m[2][0] = 24147ec681f3Smrg M->m[1][0] * M->m[2][1] * M->m[3][3] - 24157ec681f3Smrg M->m[1][0] * M->m[3][1] * M->m[2][3] - 24167ec681f3Smrg M->m[1][1] * M->m[2][0] * M->m[3][3] + 24177ec681f3Smrg M->m[1][1] * M->m[3][0] * M->m[2][3] + 24187ec681f3Smrg M->m[1][3] * M->m[2][0] * M->m[3][1] - 24197ec681f3Smrg M->m[1][3] * M->m[3][0] * M->m[2][1]; 24207ec681f3Smrg 24217ec681f3Smrg D->m[2][1] = 24227ec681f3Smrg -M->m[0][0] * M->m[2][1] * M->m[3][3] + 24237ec681f3Smrg M->m[0][0] * M->m[3][1] * M->m[2][3] + 24247ec681f3Smrg M->m[0][1] * M->m[2][0] * M->m[3][3] - 24257ec681f3Smrg M->m[0][1] * M->m[3][0] * M->m[2][3] - 24267ec681f3Smrg M->m[0][3] * M->m[2][0] * M->m[3][1] + 24277ec681f3Smrg M->m[0][3] * M->m[3][0] * M->m[2][1]; 24287ec681f3Smrg 24297ec681f3Smrg D->m[2][2] = 24307ec681f3Smrg M->m[0][0] * M->m[1][1] * M->m[3][3] - 24317ec681f3Smrg M->m[0][0] * M->m[3][1] * M->m[1][3] - 24327ec681f3Smrg M->m[0][1] * M->m[1][0] * M->m[3][3] + 24337ec681f3Smrg M->m[0][1] * M->m[3][0] * M->m[1][3] + 24347ec681f3Smrg M->m[0][3] * M->m[1][0] * M->m[3][1] - 24357ec681f3Smrg M->m[0][3] * M->m[3][0] * M->m[1][1]; 24367ec681f3Smrg 24377ec681f3Smrg D->m[2][3] = 24387ec681f3Smrg -M->m[0][0] * M->m[1][1] * M->m[2][3] + 24397ec681f3Smrg M->m[0][0] * M->m[2][1] * M->m[1][3] + 24407ec681f3Smrg M->m[0][1] * M->m[1][0] * M->m[2][3] - 24417ec681f3Smrg M->m[0][1] * M->m[2][0] * M->m[1][3] - 24427ec681f3Smrg M->m[0][3] * M->m[1][0] * M->m[2][1] + 24437ec681f3Smrg M->m[0][3] * M->m[2][0] * M->m[1][1]; 24447ec681f3Smrg 24457ec681f3Smrg D->m[3][0] = 24467ec681f3Smrg -M->m[1][0] * M->m[2][1] * M->m[3][2] + 24477ec681f3Smrg M->m[1][0] * M->m[3][1] * M->m[2][2] + 24487ec681f3Smrg M->m[1][1] * M->m[2][0] * M->m[3][2] - 24497ec681f3Smrg M->m[1][1] * M->m[3][0] * M->m[2][2] - 24507ec681f3Smrg M->m[1][2] * M->m[2][0] * M->m[3][1] + 24517ec681f3Smrg M->m[1][2] * M->m[3][0] * M->m[2][1]; 24527ec681f3Smrg 24537ec681f3Smrg D->m[3][1] = 24547ec681f3Smrg M->m[0][0] * M->m[2][1] * M->m[3][2] - 24557ec681f3Smrg M->m[0][0] * M->m[3][1] * M->m[2][2] - 24567ec681f3Smrg M->m[0][1] * M->m[2][0] * M->m[3][2] + 24577ec681f3Smrg M->m[0][1] * M->m[3][0] * M->m[2][2] + 24587ec681f3Smrg M->m[0][2] * M->m[2][0] * M->m[3][1] - 24597ec681f3Smrg M->m[0][2] * M->m[3][0] * M->m[2][1]; 24607ec681f3Smrg 24617ec681f3Smrg D->m[3][2] = 24627ec681f3Smrg -M->m[0][0] * M->m[1][1] * M->m[3][2] + 24637ec681f3Smrg M->m[0][0] * M->m[3][1] * M->m[1][2] + 24647ec681f3Smrg M->m[0][1] * M->m[1][0] * M->m[3][2] - 24657ec681f3Smrg M->m[0][1] * M->m[3][0] * M->m[1][2] - 24667ec681f3Smrg M->m[0][2] * M->m[1][0] * M->m[3][1] + 24677ec681f3Smrg M->m[0][2] * M->m[3][0] * M->m[1][1]; 24687ec681f3Smrg 24697ec681f3Smrg D->m[3][3] = 24707ec681f3Smrg M->m[0][0] * M->m[1][1] * M->m[2][2] - 24717ec681f3Smrg M->m[0][0] * M->m[2][1] * M->m[1][2] - 24727ec681f3Smrg M->m[0][1] * M->m[1][0] * M->m[2][2] + 24737ec681f3Smrg M->m[0][1] * M->m[2][0] * M->m[1][2] + 24747ec681f3Smrg M->m[0][2] * M->m[1][0] * M->m[2][1] - 24757ec681f3Smrg M->m[0][2] * M->m[2][0] * M->m[1][1]; 24767ec681f3Smrg 24777ec681f3Smrg det = 24787ec681f3Smrg M->m[0][0] * D->m[0][0] + 24797ec681f3Smrg M->m[1][0] * D->m[0][1] + 24807ec681f3Smrg M->m[2][0] * D->m[0][2] + 24817ec681f3Smrg M->m[3][0] * D->m[0][3]; 24827ec681f3Smrg 24837ec681f3Smrg if (fabsf(det) < 1e-30) {/* non inversible */ 24847ec681f3Smrg *D = *M; /* wine tests */ 24857ec681f3Smrg return; 24867ec681f3Smrg } 24877ec681f3Smrg 24887ec681f3Smrg det = 1.0 / det; 24897ec681f3Smrg 24907ec681f3Smrg for (i = 0; i < 4; i++) 24917ec681f3Smrg for (k = 0; k < 4; k++) 24927ec681f3Smrg D->m[i][k] *= det; 24937ec681f3Smrg 24947ec681f3Smrg#if defined(DEBUG) || !defined(NDEBUG) 24957ec681f3Smrg { 24967ec681f3Smrg D3DMATRIX I; 24977ec681f3Smrg 24987ec681f3Smrg nine_d3d_matrix_matrix_mul(&I, D, M); 24997ec681f3Smrg 25007ec681f3Smrg for (i = 0; i < 4; ++i) 25017ec681f3Smrg for (k = 0; k < 4; ++k) 25027ec681f3Smrg if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3) 25037ec681f3Smrg DBG("Matrix inversion check FAILED !\n"); 25047ec681f3Smrg } 25057ec681f3Smrg#endif 25067ec681f3Smrg} 2507